From 36265dde2fd70f994bc8ee2b2cdff9a398ceff1d Mon Sep 17 00:00:00 2001 From: James Booth Date: Thu, 11 Jul 2013 22:57:35 +0100 Subject: Moved functions to parser.c, moved parser to tools --- src/tools/parser.c | 349 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 349 insertions(+) create mode 100644 src/tools/parser.c (limited to 'src/tools/parser.c') diff --git a/src/tools/parser.c b/src/tools/parser.c new file mode 100644 index 00000000..c3190a96 --- /dev/null +++ b/src/tools/parser.c @@ -0,0 +1,349 @@ +/* + * parser.c + * + * Copyright (C) 2012, 2013 James Booth + * + * This file is part of Profanity. + * + * Profanity is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Profanity is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Profanity. If not, see . + * + */ + +#include +#include + +#include + +/* + * Take a full line of input and return an array of strings representing + * the arguments of a command. + * If the number of arguments found is less than min, or more than max + * NULL is returned. + * + * inp - The line of input + * min - The minimum allowed number of arguments + * max - The maxmimum allowed number of arguments + * + * Returns - An NULL terminated array of strings representing the aguments + * of the command, or NULL if the validation fails. + * + * E.g. the following input line: + * + * /cmd arg1 arg2 + * + * Will return a pointer to the following array: + * + * { "arg1", "arg2", NULL } + * + */ +gchar ** +parse_args(const char * const inp, int min, int max) +{ + if (inp == NULL) { + return NULL; + } + + // copy and strip input of leading/trailing whitepsace + char *copy = strdup(inp); + g_strstrip(copy); + + int inp_size = strlen(copy); + gboolean in_token = FALSE; + gboolean in_quotes = FALSE; + char *token_start = ©[0]; + int token_size = 0; + GSList *tokens = NULL; + + // add tokens to GSList + int i; + for (i = 0; i <= inp_size; i++) { + if (!in_token) { + if (copy[i] == ' ') { + continue; + } else { + in_token = TRUE; + if (copy[i] == '"') { + in_quotes = TRUE; + i++; + } + token_start = ©[i]; + token_size++; + } + } else { + if (in_quotes) { + if ((copy[i] == '\0') || (copy[i] == '"')) { + tokens = g_slist_append(tokens, g_strndup(token_start, + token_size)); + token_size = 0; + in_token = FALSE; + in_quotes = FALSE; + } else { + token_size++; + } + } else { + if (copy[i] == ' ' || copy[i] == '\0') { + tokens = g_slist_append(tokens, g_strndup(token_start, + token_size)); + token_size = 0; + in_token = FALSE; + } else { + token_size++; + } + } + } + } + + int num = g_slist_length(tokens) - 1; + + // if num args not valid return NULL + if ((num < min) || (num > max)) { + g_slist_free_full(tokens, free); + g_free(copy); + return NULL; + + // if min allowed is 0 and 0 found, return empty char* array + } else if (min == 0 && num == 0) { + g_slist_free_full(tokens, free); + gchar **args = malloc((num + 1) * sizeof(*args)); + args[0] = NULL; + g_free(copy); + return args; + + // otherwise return args array + } else { + gchar **args = malloc((num + 1) * sizeof(*args)); + GSList *token = tokens; + token = g_slist_next(token); + int arg_count = 0; + + while (token != NULL) { + args[arg_count++] = strdup(token->data); + token = g_slist_next(token); + } + + args[arg_count] = NULL; + g_slist_free_full(tokens, free); + g_free(copy); + + return args; + } +} + +/* + * Take a full line of input and return an array of strings representing + * the arguments of a command. This function handles when the last parameter + * to the command is free text e.g. + * + * /msg user@host here is a message + * + * If the number of arguments found is less than min, or more than max + * NULL is returned. + * + * inp - The line of input + * min - The minimum allowed number of arguments + * max - The maxmimum allowed number of arguments + * + * Returns - An NULL terminated array of strings representing the aguments + * of the command, or NULL if the validation fails. + * + * E.g. the following input line: + * + * /cmd arg1 arg2 some free text + * + * Will return a pointer to the following array: + * + * { "arg1", "arg2", "some free text", NULL } + * + */ +gchar ** +parse_args_with_freetext(const char * const inp, int min, int max) +{ + if (inp == NULL) { + return NULL; + } + + // copy and strip input of leading/trailing whitepsace + char *copy = strdup(inp); + g_strstrip(copy); + + int inp_size = strlen(copy); + gboolean in_token = FALSE; + gboolean in_freetext = FALSE; + gboolean in_quotes = FALSE; + char *token_start = ©[0]; + int token_size = 0; + int num_tokens = 0; + GSList *tokens = NULL; + + // add tokens to GSList + int i; + for (i = 0; i <= inp_size; i++) { + if (!in_token) { + if (copy[i] == ' ') { + continue; + } else { + in_token = TRUE; + num_tokens++; + if (num_tokens == max + 1) { + in_freetext = TRUE; + } else if (copy[i] == '"') { + in_quotes = TRUE; + i++; + } + if (copy[i] == '"') { + token_start = ©[i+1]; + } else { + token_start = ©[i]; + } + if (copy[i] != '"') { + token_size++; + } + } + } else { + if (in_quotes) { + if ((copy[i] == '\0') || (copy[i] == '"')) { + tokens = g_slist_append(tokens, g_strndup(token_start, + token_size)); + token_size = 0; + in_token = FALSE; + in_quotes = FALSE; + } else { + if (copy[i] != '"') { + token_size++; + } + } + } else { + if ((!in_freetext && copy[i] == ' ') || copy[i] == '\0') { + tokens = g_slist_append(tokens, g_strndup(token_start, + token_size)); + token_size = 0; + in_token = FALSE; + } else { + if (copy[i] != '"') { + token_size++; + } + } + } + } + } + + int num = g_slist_length(tokens) - 1; + + // if num args not valid return NULL + if ((num < min) || (num > max)) { + g_slist_free_full(tokens, free); + free(copy); + return NULL; + + // if min allowed is 0 and 0 found, return empty char* array + } else if (min == 0 && num == 0) { + gchar **args = malloc((num + 1) * sizeof(*args)); + args[0] = NULL; + return args; + + // otherwise return args array + } else { + gchar **args = malloc((num + 1) * sizeof(*args)); + GSList *token = tokens; + token = g_slist_next(token); + int arg_count = 0; + + while (token != NULL) { + args[arg_count++] = strdup(token->data); + token = g_slist_next(token); + } + + args[arg_count] = NULL; + g_slist_free_full(tokens, free); + free(copy); + + return args; + } +} + +int +count_tokens(char *string) +{ + int num_tokens = 0; + + // if no quotes, use glib + if (g_strrstr(string, "\"") == NULL) { + gchar **tokens = g_strsplit(string, " ", 0); + num_tokens = g_strv_length(tokens); + g_strfreev(tokens); + + // else count tokens including quoted + } else { + int length = strlen(string); + int i = 0; + gboolean in_quotes = FALSE; + + // include first token + num_tokens++; + + for (i = 0; i < length; i++) { + if (string[i] == ' ') { + if (!in_quotes) { + num_tokens++; + } + } else if (string[i] == '"') { + if (in_quotes) { + in_quotes = FALSE; + } else { + in_quotes = TRUE; + } + } + } + } + + return num_tokens; +} + +char * +get_start(char *string, int tokens) +{ + char *result_str = NULL; + int num_tokens = 0; + int length = strlen(string); + int i = 0; + gboolean in_quotes = FALSE; + GString *result = g_string_new(""); + + // include first token + num_tokens++; + + for (i = 0; i < length; i++) { + if (num_tokens < tokens) { + g_string_append_c(result, string[i]); + } + if (string[i] == ' ') { + if (!in_quotes) { + num_tokens++; + } + } else if (string[i] == '"') { + if (in_quotes) { + in_quotes = FALSE; + } else { + in_quotes = TRUE; + } + } + } + + result_str = result->str; + g_string_free(result, FALSE); + + return result_str; +} + + -- cgit 1.4.1-2-gfad0 From 5233000498a30c61b22f68ac34ff10d60815f979 Mon Sep 17 00:00:00 2001 From: James Booth Date: Thu, 11 Jul 2013 23:03:20 +0100 Subject: Removed if clause in parser --- src/tools/parser.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/tools/parser.c') diff --git a/src/tools/parser.c b/src/tools/parser.c index c3190a96..96c56628 100644 --- a/src/tools/parser.c +++ b/src/tools/parser.c @@ -205,8 +205,6 @@ parse_args_with_freetext(const char * const inp, int min, int max) token_start = ©[i+1]; } else { token_start = ©[i]; - } - if (copy[i] != '"') { token_size++; } } -- cgit 1.4.1-2-gfad0 From bb550fed5547c7885e3f9c486051c742ac0b9b67 Mon Sep 17 00:00:00 2001 From: James Booth Date: Fri, 12 Jul 2013 00:46:33 +0100 Subject: Handle unicode chars in command parser --- src/tools/parser.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) (limited to 'src/tools/parser.c') diff --git a/src/tools/parser.c b/src/tools/parser.c index 96c56628..c8335879 100644 --- a/src/tools/parser.c +++ b/src/tools/parser.c @@ -58,7 +58,7 @@ parse_args(const char * const inp, int min, int max) char *copy = strdup(inp); g_strstrip(copy); - int inp_size = strlen(copy); + int inp_size = g_utf8_strlen(copy, -1); gboolean in_token = FALSE; gboolean in_quotes = FALSE; char *token_start = ©[0]; @@ -67,43 +67,48 @@ parse_args(const char * const inp, int min, int max) // add tokens to GSList int i; - for (i = 0; i <= inp_size; i++) { + for (i = 0; i < inp_size; i++) { + gchar *curr_ch = g_utf8_offset_to_pointer(copy, i); + gunichar curr_uni = g_utf8_get_char(curr_ch); if (!in_token) { - if (copy[i] == ' ') { + if (curr_uni == ' ') { continue; } else { in_token = TRUE; - if (copy[i] == '"') { + if (curr_uni == '"') { in_quotes = TRUE; i++; } - token_start = ©[i]; - token_size++; + token_start = curr_ch; + token_size += g_unichar_to_utf8(curr_uni, NULL); } } else { if (in_quotes) { - if ((copy[i] == '\0') || (copy[i] == '"')) { + if (curr_uni == '"') { tokens = g_slist_append(tokens, g_strndup(token_start, token_size)); token_size = 0; in_token = FALSE; in_quotes = FALSE; } else { - token_size++; + token_size += g_unichar_to_utf8(curr_uni, NULL); } } else { - if (copy[i] == ' ' || copy[i] == '\0') { + if (curr_uni == ' ') { tokens = g_slist_append(tokens, g_strndup(token_start, token_size)); token_size = 0; in_token = FALSE; } else { + token_size += g_unichar_to_utf8(curr_uni, NULL); token_size++; } } } } + tokens = g_slist_append(tokens, g_strndup(token_start, token_size)); + int num = g_slist_length(tokens) - 1; // if num args not valid return NULL -- cgit 1.4.1-2-gfad0 From 51786f67a6f453ff5ac96aee08cf814eb3bcfd4d Mon Sep 17 00:00:00 2001 From: James Booth Date: Sun, 14 Jul 2013 00:14:36 +0100 Subject: Implemented parse_args with unicode compatibility --- src/tools/parser.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'src/tools/parser.c') diff --git a/src/tools/parser.c b/src/tools/parser.c index c8335879..42b1165f 100644 --- a/src/tools/parser.c +++ b/src/tools/parser.c @@ -65,11 +65,18 @@ parse_args(const char * const inp, int min, int max) int token_size = 0; GSList *tokens = NULL; + // add tokens to GSList int i; for (i = 0; i < inp_size; i++) { gchar *curr_ch = g_utf8_offset_to_pointer(copy, i); gunichar curr_uni = g_utf8_get_char(curr_ch); + + gchar *character = malloc(7); + gint num_written = 0; + num_written = g_unichar_to_utf8(curr_uni, character); + character[num_written] = '\0'; + if (!in_token) { if (curr_uni == ' ') { continue; @@ -78,9 +85,14 @@ parse_args(const char * const inp, int min, int max) if (curr_uni == '"') { in_quotes = TRUE; i++; + gchar *next_ch = g_utf8_next_char(curr_ch); + gunichar next_uni = g_utf8_get_char(next_ch); + token_start = next_ch; + token_size += g_unichar_to_utf8(next_uni, NULL); + } else { + token_start = curr_ch; + token_size += g_unichar_to_utf8(curr_uni, NULL); } - token_start = curr_ch; - token_size += g_unichar_to_utf8(curr_uni, NULL); } } else { if (in_quotes) { @@ -101,13 +113,14 @@ parse_args(const char * const inp, int min, int max) in_token = FALSE; } else { token_size += g_unichar_to_utf8(curr_uni, NULL); - token_size++; } } } } - tokens = g_slist_append(tokens, g_strndup(token_start, token_size)); + if (in_token) { + tokens = g_slist_append(tokens, g_strndup(token_start, token_size)); + } int num = g_slist_length(tokens) - 1; -- cgit 1.4.1-2-gfad0 From 4d35031cb077a3aa03620d9372747dd69229b7da Mon Sep 17 00:00:00 2001 From: James Booth Date: Sun, 14 Jul 2013 00:24:57 +0100 Subject: Implemented parse_args_with_freetext with unicode compatibility --- src/tools/parser.c | 46 ++++++++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 20 deletions(-) (limited to 'src/tools/parser.c') diff --git a/src/tools/parser.c b/src/tools/parser.c index 42b1165f..b679b9f2 100644 --- a/src/tools/parser.c +++ b/src/tools/parser.c @@ -65,18 +65,12 @@ parse_args(const char * const inp, int min, int max) int token_size = 0; GSList *tokens = NULL; - // add tokens to GSList int i; for (i = 0; i < inp_size; i++) { gchar *curr_ch = g_utf8_offset_to_pointer(copy, i); gunichar curr_uni = g_utf8_get_char(curr_ch); - gchar *character = malloc(7); - gint num_written = 0; - num_written = g_unichar_to_utf8(curr_uni, character); - character[num_written] = '\0'; - if (!in_token) { if (curr_uni == ' ') { continue; @@ -195,7 +189,7 @@ parse_args_with_freetext(const char * const inp, int min, int max) char *copy = strdup(inp); g_strstrip(copy); - int inp_size = strlen(copy); + int inp_size = g_utf8_strlen(copy, -1); gboolean in_token = FALSE; gboolean in_freetext = FALSE; gboolean in_quotes = FALSE; @@ -206,54 +200,66 @@ parse_args_with_freetext(const char * const inp, int min, int max) // add tokens to GSList int i; - for (i = 0; i <= inp_size; i++) { + for (i = 0; i < inp_size; i++) { + gchar *curr_ch = g_utf8_offset_to_pointer(copy, i); + gunichar curr_uni = g_utf8_get_char(curr_ch); + if (!in_token) { - if (copy[i] == ' ') { + if (curr_uni == ' ') { continue; } else { in_token = TRUE; num_tokens++; if (num_tokens == max + 1) { in_freetext = TRUE; - } else if (copy[i] == '"') { + } else if (curr_uni == '"') { in_quotes = TRUE; i++; + gchar *next_ch = g_utf8_next_char(curr_ch); + gunichar next_uni = g_utf8_get_char(next_ch); + token_start = next_ch; + token_size += g_unichar_to_utf8(next_uni, NULL); } - if (copy[i] == '"') { - token_start = ©[i+1]; + if (curr_uni == '"') { + gchar *next_ch = g_utf8_next_char(curr_ch); + token_start = next_ch; } else { - token_start = ©[i]; - token_size++; + token_start = curr_ch; + token_size += g_unichar_to_utf8(curr_uni, NULL); } } } else { if (in_quotes) { - if ((copy[i] == '\0') || (copy[i] == '"')) { + if (curr_uni == '"') { tokens = g_slist_append(tokens, g_strndup(token_start, token_size)); token_size = 0; in_token = FALSE; in_quotes = FALSE; } else { - if (copy[i] != '"') { - token_size++; + if (curr_uni != '"') { + token_size += g_unichar_to_utf8(curr_uni, NULL); } } } else { - if ((!in_freetext && copy[i] == ' ') || copy[i] == '\0') { + if (!in_freetext && curr_uni == ' ') { tokens = g_slist_append(tokens, g_strndup(token_start, token_size)); token_size = 0; in_token = FALSE; } else { - if (copy[i] != '"') { - token_size++; + if (curr_uni != '"') { + token_size += g_unichar_to_utf8(curr_uni, NULL); } } } } } + if (in_token) { + tokens = g_slist_append(tokens, g_strndup(token_start, token_size)); + } + int num = g_slist_length(tokens) - 1; // if num args not valid return NULL -- cgit 1.4.1-2-gfad0 From e7478d8cb8c55db91628a059ff9f1065bfb9cf0e Mon Sep 17 00:00:00 2001 From: James Booth Date: Sun, 14 Jul 2013 00:46:56 +0100 Subject: Added parser tests --- src/tools/parser.c | 2 - tests/test_parser.c | 131 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 131 insertions(+), 2 deletions(-) (limited to 'src/tools/parser.c') diff --git a/src/tools/parser.c b/src/tools/parser.c index b679b9f2..595032c3 100644 --- a/src/tools/parser.c +++ b/src/tools/parser.c @@ -367,5 +367,3 @@ get_start(char *string, int tokens) return result_str; } - - diff --git a/tests/test_parser.c b/tests/test_parser.c index 58e3f3e7..e442baee 100644 --- a/tests/test_parser.c +++ b/tests/test_parser.c @@ -279,6 +279,124 @@ parse_cmd_freetext_with_many_quoted_and_many_spaces(void) assert_string_equals("and heres the free text", result[2]); g_strfreev(result); } + +void +count_one_token(void) +{ + char *inp = "one"; + int result = count_tokens(inp); + + assert_int_equals(1, result); +} + +void +count_one_token_quoted_no_whitespace(void) +{ + char *inp = "\"one\""; + int result = count_tokens(inp); + + assert_int_equals(1, result); +} + +void +count_one_token_quoted_with_whitespace(void) +{ + char *inp = "\"one two\""; + int result = count_tokens(inp); + + assert_int_equals(1, result); +} + +void +count_two_tokens(void) +{ + char *inp = "one two"; + int result = count_tokens(inp); + + assert_int_equals(2, result); +} + +void +count_two_tokens_first_quoted(void) +{ + char *inp = "\"one and\" two"; + int result = count_tokens(inp); + + assert_int_equals(2, result); +} + +void +count_two_tokens_second_quoted(void) +{ + char *inp = "one \"two and\""; + int result = count_tokens(inp); + + assert_int_equals(2, result); +} + +void +count_two_tokens_both_quoted(void) +{ + char *inp = "\"one and then\" \"two and\""; + int result = count_tokens(inp); + + assert_int_equals(2, result); +} + +void +get_first_of_one(void) +{ + char *inp = "one"; + char *result = get_start(inp, 2); + + assert_string_equals("one", result); +} + +void +get_first_of_two(void) +{ + char *inp = "one two"; + char *result = get_start(inp, 2); + + assert_string_equals("one ", result); +} + +void +get_first_two_of_three(void) +{ + char *inp = "one two three"; + char *result = get_start(inp, 3); + + assert_string_equals("one two ", result); +} + +void +get_first_two_of_three_first_quoted(void) +{ + char *inp = "\"one\" two three"; + char *result = get_start(inp, 3); + + assert_string_equals("\"one\" two ", result); +} + +void +get_first_two_of_three_second_quoted(void) +{ + char *inp = "one \"two\" three"; + char *result = get_start(inp, 3); + + assert_string_equals("one \"two\" ", result); +} + +void +get_first_two_of_three_first_and_second_quoted(void) +{ + char *inp = "\"one\" \"two\" three"; + char *result = get_start(inp, 3); + + assert_string_equals("\"one\" \"two\" ", result); +} + void register_parser_tests(void) { @@ -307,4 +425,17 @@ register_parser_tests(void) TEST(parse_cmd_freetext_with_quoted_and_space); TEST(parse_cmd_freetext_with_quoted_and_many_spaces); TEST(parse_cmd_freetext_with_many_quoted_and_many_spaces); + TEST(count_one_token); + TEST(count_one_token_quoted_no_whitespace); + TEST(count_one_token_quoted_with_whitespace); + TEST(count_two_tokens); + TEST(count_two_tokens_first_quoted); + TEST(count_two_tokens_second_quoted); + TEST(count_two_tokens_both_quoted); + TEST(get_first_of_one); + TEST(get_first_of_two); + TEST(get_first_two_of_three); + TEST(get_first_two_of_three_first_quoted); + TEST(get_first_two_of_three_second_quoted); + TEST(get_first_two_of_three_first_and_second_quoted); } -- cgit 1.4.1-2-gfad0 From 7f82dc42f593f6410e4d0058add4b91112047e63 Mon Sep 17 00:00:00 2001 From: James Booth Date: Sun, 14 Jul 2013 01:00:11 +0100 Subject: Remaining parser function unicode compatible --- src/tools/parser.c | 59 +++++++++++++++++++++++++++--------------------------- 1 file changed, 29 insertions(+), 30 deletions(-) (limited to 'src/tools/parser.c') diff --git a/src/tools/parser.c b/src/tools/parser.c index 595032c3..f4cfc3d2 100644 --- a/src/tools/parser.c +++ b/src/tools/parser.c @@ -297,34 +297,27 @@ parse_args_with_freetext(const char * const inp, int min, int max) int count_tokens(char *string) { + int length = g_utf8_strlen(string, -1); + gboolean in_quotes = FALSE; int num_tokens = 0; + int i = 0; - // if no quotes, use glib - if (g_strrstr(string, "\"") == NULL) { - gchar **tokens = g_strsplit(string, " ", 0); - num_tokens = g_strv_length(tokens); - g_strfreev(tokens); - - // else count tokens including quoted - } else { - int length = strlen(string); - int i = 0; - gboolean in_quotes = FALSE; + // include first token + num_tokens++; - // include first token - num_tokens++; + for (i = 0; i < length; i++) { + gchar *curr_ch = g_utf8_offset_to_pointer(string, i); + gunichar curr_uni = g_utf8_get_char(curr_ch); - for (i = 0; i < length; i++) { - if (string[i] == ' ') { - if (!in_quotes) { - num_tokens++; - } - } else if (string[i] == '"') { - if (in_quotes) { - in_quotes = FALSE; - } else { - in_quotes = TRUE; - } + if (curr_uni == ' ') { + if (!in_quotes) { + num_tokens++; + } + } else if (curr_uni == '"') { + if (in_quotes) { + in_quotes = FALSE; + } else { + in_quotes = TRUE; } } } @@ -335,25 +328,31 @@ count_tokens(char *string) char * get_start(char *string, int tokens) { + GString *result = g_string_new(""); + int length = g_utf8_strlen(string, -1); + gboolean in_quotes = FALSE; char *result_str = NULL; int num_tokens = 0; - int length = strlen(string); int i = 0; - gboolean in_quotes = FALSE; - GString *result = g_string_new(""); // include first token num_tokens++; for (i = 0; i < length; i++) { + gchar *curr_ch = g_utf8_offset_to_pointer(string, i); + gunichar curr_uni = g_utf8_get_char(curr_ch); + if (num_tokens < tokens) { - g_string_append_c(result, string[i]); + gchar *uni_char = malloc(7); + int len = g_unichar_to_utf8(curr_uni, uni_char); + uni_char[len] = '\0'; + g_string_append(result, uni_char); } - if (string[i] == ' ') { + if (curr_uni == ' ') { if (!in_quotes) { num_tokens++; } - } else if (string[i] == '"') { + } else if (curr_uni == '"') { if (in_quotes) { in_quotes = FALSE; } else { -- cgit 1.4.1-2-gfad0