From 36265dde2fd70f994bc8ee2b2cdff9a398ceff1d Mon Sep 17 00:00:00 2001
From: James Booth <boothj5@gmail.com>
Date: Thu, 11 Jul 2013 22:57:35 +0100
Subject: Moved functions to parser.c, moved parser to tools

---
 src/tools/parser.c | 349 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 349 insertions(+)
 create mode 100644 src/tools/parser.c

(limited to 'src/tools/parser.c')

diff --git a/src/tools/parser.c b/src/tools/parser.c
new file mode 100644
index 00000000..c3190a96
--- /dev/null
+++ b/src/tools/parser.c
@@ -0,0 +1,349 @@
+/*
+ * parser.c
+ *
+ * Copyright (C) 2012, 2013 James Booth <boothj5@gmail.com>
+ *
+ * This file is part of Profanity.
+ *
+ * Profanity is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Profanity is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Profanity.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include <glib.h>
+
+/*
+ * Take a full line of input and return an array of strings representing
+ * the arguments of a command.
+ * If the number of arguments found is less than min, or more than max
+ * NULL is returned.
+ *
+ * inp - The line of input
+ * min - The minimum allowed number of arguments
+ * max - The maxmimum allowed number of arguments
+ *
+ * Returns - An NULL terminated array of strings representing the aguments
+ * of the command, or NULL if the validation fails.
+ *
+ * E.g. the following input line:
+ *
+ * /cmd arg1 arg2
+ *
+ * Will return a pointer to the following array:
+ *
+ * { "arg1", "arg2", NULL }
+ *
+ */
+gchar **
+parse_args(const char * const inp, int min, int max)
+{
+    if (inp == NULL) {
+        return NULL;
+    }
+
+    // copy and strip input of leading/trailing whitepsace
+    char *copy = strdup(inp);
+    g_strstrip(copy);
+
+    int inp_size = strlen(copy);
+    gboolean in_token = FALSE;
+    gboolean in_quotes = FALSE;
+    char *token_start = &copy[0];
+    int token_size = 0;
+    GSList *tokens = NULL;
+
+    // add tokens to GSList
+    int i;
+    for (i = 0; i <= inp_size; i++) {
+        if (!in_token) {
+            if (copy[i] == ' ') {
+                continue;
+            } else {
+                in_token = TRUE;
+                if (copy[i] == '"') {
+                    in_quotes = TRUE;
+                    i++;
+                }
+                token_start = &copy[i];
+                token_size++;
+            }
+        } else {
+            if (in_quotes) {
+                if ((copy[i] == '\0') || (copy[i] == '"')) {
+                    tokens = g_slist_append(tokens, g_strndup(token_start,
+                        token_size));
+                    token_size = 0;
+                    in_token = FALSE;
+                    in_quotes = FALSE;
+                } else {
+                    token_size++;
+                }
+            } else {
+                if (copy[i] == ' ' || copy[i] == '\0') {
+                    tokens = g_slist_append(tokens, g_strndup(token_start,
+                        token_size));
+                    token_size = 0;
+                    in_token = FALSE;
+                } else {
+                    token_size++;
+                }
+            }
+        }
+    }
+
+    int num = g_slist_length(tokens) - 1;
+
+    // if num args not valid return NULL
+    if ((num < min) || (num > max)) {
+        g_slist_free_full(tokens, free);
+        g_free(copy);
+        return NULL;
+
+    // if min allowed is 0 and 0 found, return empty char* array
+    } else if (min == 0 && num == 0) {
+        g_slist_free_full(tokens, free);
+        gchar **args = malloc((num + 1) * sizeof(*args));
+        args[0] = NULL;
+        g_free(copy);
+        return args;
+
+    // otherwise return args array
+    } else {
+        gchar **args = malloc((num + 1) * sizeof(*args));
+        GSList *token = tokens;
+        token = g_slist_next(token);
+        int arg_count = 0;
+
+        while (token != NULL) {
+            args[arg_count++] = strdup(token->data);
+            token = g_slist_next(token);
+        }
+
+        args[arg_count] = NULL;
+        g_slist_free_full(tokens, free);
+        g_free(copy);
+
+        return args;
+    }
+}
+
+/*
+ * Take a full line of input and return an array of strings representing
+ * the arguments of a command.  This function handles when the last parameter
+ * to the command is free text e.g.
+ *
+ * /msg user@host here is a message
+ *
+ * If the number of arguments found is less than min, or more than max
+ * NULL is returned.
+ *
+ * inp - The line of input
+ * min - The minimum allowed number of arguments
+ * max - The maxmimum allowed number of arguments
+ *
+ * Returns - An NULL terminated array of strings representing the aguments
+ * of the command, or NULL if the validation fails.
+ *
+ * E.g. the following input line:
+ *
+ * /cmd arg1 arg2 some free text
+ *
+ * Will return a pointer to the following array:
+ *
+ * { "arg1", "arg2", "some free text", NULL }
+ *
+ */
+gchar **
+parse_args_with_freetext(const char * const inp, int min, int max)
+{
+    if (inp == NULL) {
+        return NULL;
+    }
+
+    // copy and strip input of leading/trailing whitepsace
+    char *copy = strdup(inp);
+    g_strstrip(copy);
+
+    int inp_size = strlen(copy);
+    gboolean in_token = FALSE;
+    gboolean in_freetext = FALSE;
+    gboolean in_quotes = FALSE;
+    char *token_start = &copy[0];
+    int token_size = 0;
+    int num_tokens = 0;
+    GSList *tokens = NULL;
+
+    // add tokens to GSList
+    int i;
+    for (i = 0; i <= inp_size; i++) {
+        if (!in_token) {
+            if (copy[i] == ' ') {
+                continue;
+            } else {
+                in_token = TRUE;
+                num_tokens++;
+                if (num_tokens == max + 1) {
+                    in_freetext = TRUE;
+                } else if (copy[i] == '"') {
+                    in_quotes = TRUE;
+                    i++;
+                }
+                if (copy[i] == '"') {
+                    token_start = &copy[i+1];
+                } else {
+                    token_start = &copy[i];
+                }
+                if (copy[i] != '"') {
+                    token_size++;
+                }
+            }
+        } else {
+            if (in_quotes) {
+                if ((copy[i] == '\0') || (copy[i] == '"')) {
+                    tokens = g_slist_append(tokens, g_strndup(token_start,
+                        token_size));
+                    token_size = 0;
+                    in_token = FALSE;
+                    in_quotes = FALSE;
+                } else {
+                    if (copy[i] != '"') {
+                        token_size++;
+                    }
+                }
+            } else {
+                if ((!in_freetext && copy[i] == ' ') || copy[i] == '\0') {
+                    tokens = g_slist_append(tokens, g_strndup(token_start,
+                        token_size));
+                    token_size = 0;
+                    in_token = FALSE;
+                } else {
+                    if (copy[i] != '"') {
+                        token_size++;
+                    }
+                }
+            }
+        }
+    }
+
+    int num = g_slist_length(tokens) - 1;
+
+    // if num args not valid return NULL
+    if ((num < min) || (num > max)) {
+        g_slist_free_full(tokens, free);
+        free(copy);
+        return NULL;
+
+    // if min allowed is 0 and 0 found, return empty char* array
+    } else if (min == 0 && num == 0) {
+        gchar **args = malloc((num + 1) * sizeof(*args));
+        args[0] = NULL;
+        return args;
+
+    // otherwise return args array
+    } else {
+        gchar **args = malloc((num + 1) * sizeof(*args));
+        GSList *token = tokens;
+        token = g_slist_next(token);
+        int arg_count = 0;
+
+        while (token != NULL) {
+            args[arg_count++] = strdup(token->data);
+            token = g_slist_next(token);
+        }
+
+        args[arg_count] = NULL;
+        g_slist_free_full(tokens, free);
+        free(copy);
+
+        return args;
+    }
+}
+
+int
+count_tokens(char *string)
+{
+    int num_tokens = 0;
+
+    // if no quotes, use glib
+    if (g_strrstr(string, "\"") == NULL) {
+        gchar **tokens = g_strsplit(string, " ", 0);
+        num_tokens = g_strv_length(tokens);
+        g_strfreev(tokens);
+
+    // else count tokens including quoted
+    } else {
+        int length = strlen(string);
+        int i = 0;
+        gboolean in_quotes = FALSE;
+
+        // include first token
+        num_tokens++;
+
+        for (i = 0; i < length; i++) {
+            if (string[i] == ' ') {
+                if (!in_quotes) {
+                    num_tokens++;
+                }
+            } else if (string[i] == '"') {
+                if (in_quotes) {
+                    in_quotes = FALSE;
+                } else {
+                    in_quotes = TRUE;
+                }
+            }
+        }
+    }
+
+    return num_tokens;
+}
+
+char *
+get_start(char *string, int tokens)
+{
+    char *result_str = NULL;
+    int num_tokens = 0;
+    int length = strlen(string);
+    int i = 0;
+    gboolean in_quotes = FALSE;
+    GString *result = g_string_new("");
+
+    // include first token
+    num_tokens++;
+
+    for (i = 0; i < length; i++) {
+        if (num_tokens < tokens) {
+            g_string_append_c(result, string[i]);
+        }
+        if (string[i] == ' ') {
+            if (!in_quotes) {
+                num_tokens++;
+            }
+        } else if (string[i] == '"') {
+            if (in_quotes) {
+                in_quotes = FALSE;
+            } else {
+                in_quotes = TRUE;
+            }
+        }
+    }
+
+    result_str = result->str;
+    g_string_free(result, FALSE);
+
+    return result_str;
+}
+
+
-- 
cgit 1.4.1-2-gfad0


From 5233000498a30c61b22f68ac34ff10d60815f979 Mon Sep 17 00:00:00 2001
From: James Booth <boothj5@gmail.com>
Date: Thu, 11 Jul 2013 23:03:20 +0100
Subject: Removed if clause in parser

---
 src/tools/parser.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'src/tools/parser.c')

diff --git a/src/tools/parser.c b/src/tools/parser.c
index c3190a96..96c56628 100644
--- a/src/tools/parser.c
+++ b/src/tools/parser.c
@@ -205,8 +205,6 @@ parse_args_with_freetext(const char * const inp, int min, int max)
                     token_start = &copy[i+1];
                 } else {
                     token_start = &copy[i];
-                }
-                if (copy[i] != '"') {
                     token_size++;
                 }
             }
-- 
cgit 1.4.1-2-gfad0


From bb550fed5547c7885e3f9c486051c742ac0b9b67 Mon Sep 17 00:00:00 2001
From: James Booth <boothj5@gmail.com>
Date: Fri, 12 Jul 2013 00:46:33 +0100
Subject: Handle unicode chars in command parser

---
 src/tools/parser.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

(limited to 'src/tools/parser.c')

diff --git a/src/tools/parser.c b/src/tools/parser.c
index 96c56628..c8335879 100644
--- a/src/tools/parser.c
+++ b/src/tools/parser.c
@@ -58,7 +58,7 @@ parse_args(const char * const inp, int min, int max)
     char *copy = strdup(inp);
     g_strstrip(copy);
 
-    int inp_size = strlen(copy);
+    int inp_size = g_utf8_strlen(copy, -1);
     gboolean in_token = FALSE;
     gboolean in_quotes = FALSE;
     char *token_start = &copy[0];
@@ -67,43 +67,48 @@ parse_args(const char * const inp, int min, int max)
 
     // add tokens to GSList
     int i;
-    for (i = 0; i <= inp_size; i++) {
+    for (i = 0; i < inp_size; i++) {
+        gchar *curr_ch = g_utf8_offset_to_pointer(copy, i);
+        gunichar curr_uni = g_utf8_get_char(curr_ch);
         if (!in_token) {
-            if (copy[i] == ' ') {
+            if (curr_uni  == ' ') {
                 continue;
             } else {
                 in_token = TRUE;
-                if (copy[i] == '"') {
+                if (curr_uni == '"') {
                     in_quotes = TRUE;
                     i++;
                 }
-                token_start = &copy[i];
-                token_size++;
+                token_start = curr_ch;
+                token_size += g_unichar_to_utf8(curr_uni, NULL);
             }
         } else {
             if (in_quotes) {
-                if ((copy[i] == '\0') || (copy[i] == '"')) {
+                if (curr_uni == '"') {
                     tokens = g_slist_append(tokens, g_strndup(token_start,
                         token_size));
                     token_size = 0;
                     in_token = FALSE;
                     in_quotes = FALSE;
                 } else {
-                    token_size++;
+                    token_size += g_unichar_to_utf8(curr_uni, NULL);
                 }
             } else {
-                if (copy[i] == ' ' || copy[i] == '\0') {
+                if (curr_uni == ' ') {
                     tokens = g_slist_append(tokens, g_strndup(token_start,
                         token_size));
                     token_size = 0;
                     in_token = FALSE;
                 } else {
+                    token_size += g_unichar_to_utf8(curr_uni, NULL);
                     token_size++;
                 }
             }
         }
     }
 
+    tokens = g_slist_append(tokens, g_strndup(token_start, token_size));
+
     int num = g_slist_length(tokens) - 1;
 
     // if num args not valid return NULL
-- 
cgit 1.4.1-2-gfad0


From 51786f67a6f453ff5ac96aee08cf814eb3bcfd4d Mon Sep 17 00:00:00 2001
From: James Booth <boothj5@gmail.com>
Date: Sun, 14 Jul 2013 00:14:36 +0100
Subject: Implemented parse_args with unicode compatibility

---
 src/tools/parser.c | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

(limited to 'src/tools/parser.c')

diff --git a/src/tools/parser.c b/src/tools/parser.c
index c8335879..42b1165f 100644
--- a/src/tools/parser.c
+++ b/src/tools/parser.c
@@ -65,11 +65,18 @@ parse_args(const char * const inp, int min, int max)
     int token_size = 0;
     GSList *tokens = NULL;
 
+
     // add tokens to GSList
     int i;
     for (i = 0; i < inp_size; i++) {
         gchar *curr_ch = g_utf8_offset_to_pointer(copy, i);
         gunichar curr_uni = g_utf8_get_char(curr_ch);
+
+        gchar *character = malloc(7);
+        gint num_written = 0;
+        num_written = g_unichar_to_utf8(curr_uni, character);
+        character[num_written] = '\0';
+
         if (!in_token) {
             if (curr_uni  == ' ') {
                 continue;
@@ -78,9 +85,14 @@ parse_args(const char * const inp, int min, int max)
                 if (curr_uni == '"') {
                     in_quotes = TRUE;
                     i++;
+                    gchar *next_ch = g_utf8_next_char(curr_ch);
+                    gunichar next_uni = g_utf8_get_char(next_ch);
+                    token_start = next_ch;
+                    token_size += g_unichar_to_utf8(next_uni, NULL);
+                } else {
+                    token_start = curr_ch;
+                    token_size += g_unichar_to_utf8(curr_uni, NULL);
                 }
-                token_start = curr_ch;
-                token_size += g_unichar_to_utf8(curr_uni, NULL);
             }
         } else {
             if (in_quotes) {
@@ -101,13 +113,14 @@ parse_args(const char * const inp, int min, int max)
                     in_token = FALSE;
                 } else {
                     token_size += g_unichar_to_utf8(curr_uni, NULL);
-                    token_size++;
                 }
             }
         }
     }
 
-    tokens = g_slist_append(tokens, g_strndup(token_start, token_size));
+    if (in_token) {
+        tokens = g_slist_append(tokens, g_strndup(token_start, token_size));
+    }
 
     int num = g_slist_length(tokens) - 1;
 
-- 
cgit 1.4.1-2-gfad0


From 4d35031cb077a3aa03620d9372747dd69229b7da Mon Sep 17 00:00:00 2001
From: James Booth <boothj5@gmail.com>
Date: Sun, 14 Jul 2013 00:24:57 +0100
Subject: Implemented parse_args_with_freetext with unicode compatibility

---
 src/tools/parser.c | 46 ++++++++++++++++++++++++++--------------------
 1 file changed, 26 insertions(+), 20 deletions(-)

(limited to 'src/tools/parser.c')

diff --git a/src/tools/parser.c b/src/tools/parser.c
index 42b1165f..b679b9f2 100644
--- a/src/tools/parser.c
+++ b/src/tools/parser.c
@@ -65,18 +65,12 @@ parse_args(const char * const inp, int min, int max)
     int token_size = 0;
     GSList *tokens = NULL;
 
-
     // add tokens to GSList
     int i;
     for (i = 0; i < inp_size; i++) {
         gchar *curr_ch = g_utf8_offset_to_pointer(copy, i);
         gunichar curr_uni = g_utf8_get_char(curr_ch);
 
-        gchar *character = malloc(7);
-        gint num_written = 0;
-        num_written = g_unichar_to_utf8(curr_uni, character);
-        character[num_written] = '\0';
-
         if (!in_token) {
             if (curr_uni  == ' ') {
                 continue;
@@ -195,7 +189,7 @@ parse_args_with_freetext(const char * const inp, int min, int max)
     char *copy = strdup(inp);
     g_strstrip(copy);
 
-    int inp_size = strlen(copy);
+    int inp_size = g_utf8_strlen(copy, -1);
     gboolean in_token = FALSE;
     gboolean in_freetext = FALSE;
     gboolean in_quotes = FALSE;
@@ -206,54 +200,66 @@ parse_args_with_freetext(const char * const inp, int min, int max)
 
     // add tokens to GSList
     int i;
-    for (i = 0; i <= inp_size; i++) {
+    for (i = 0; i < inp_size; i++) {
+        gchar *curr_ch = g_utf8_offset_to_pointer(copy, i);
+        gunichar curr_uni = g_utf8_get_char(curr_ch);
+
         if (!in_token) {
-            if (copy[i] == ' ') {
+            if (curr_uni == ' ') {
                 continue;
             } else {
                 in_token = TRUE;
                 num_tokens++;
                 if (num_tokens == max + 1) {
                     in_freetext = TRUE;
-                } else if (copy[i] == '"') {
+                } else if (curr_uni == '"') {
                     in_quotes = TRUE;
                     i++;
+                    gchar *next_ch = g_utf8_next_char(curr_ch);
+                    gunichar next_uni = g_utf8_get_char(next_ch);
+                    token_start = next_ch;
+                    token_size += g_unichar_to_utf8(next_uni, NULL);
                 }
-                if (copy[i] == '"') {
-                    token_start = &copy[i+1];
+                if (curr_uni == '"') {
+                    gchar *next_ch = g_utf8_next_char(curr_ch);
+                    token_start = next_ch;
                 } else {
-                    token_start = &copy[i];
-                    token_size++;
+                    token_start = curr_ch;
+                    token_size += g_unichar_to_utf8(curr_uni, NULL);
                 }
             }
         } else {
             if (in_quotes) {
-                if ((copy[i] == '\0') || (copy[i] == '"')) {
+                if (curr_uni == '"') {
                     tokens = g_slist_append(tokens, g_strndup(token_start,
                         token_size));
                     token_size = 0;
                     in_token = FALSE;
                     in_quotes = FALSE;
                 } else {
-                    if (copy[i] != '"') {
-                        token_size++;
+                    if (curr_uni != '"') {
+                        token_size += g_unichar_to_utf8(curr_uni, NULL);
                     }
                 }
             } else {
-                if ((!in_freetext && copy[i] == ' ') || copy[i] == '\0') {
+                if (!in_freetext && curr_uni == ' ') {
                     tokens = g_slist_append(tokens, g_strndup(token_start,
                         token_size));
                     token_size = 0;
                     in_token = FALSE;
                 } else {
-                    if (copy[i] != '"') {
-                        token_size++;
+                    if (curr_uni != '"') {
+                        token_size += g_unichar_to_utf8(curr_uni, NULL);
                     }
                 }
             }
         }
     }
 
+    if (in_token) {
+        tokens = g_slist_append(tokens, g_strndup(token_start, token_size));
+    }
+
     int num = g_slist_length(tokens) - 1;
 
     // if num args not valid return NULL
-- 
cgit 1.4.1-2-gfad0


From e7478d8cb8c55db91628a059ff9f1065bfb9cf0e Mon Sep 17 00:00:00 2001
From: James Booth <boothj5@gmail.com>
Date: Sun, 14 Jul 2013 00:46:56 +0100
Subject: Added parser tests

---
 src/tools/parser.c  |   2 -
 tests/test_parser.c | 131 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 131 insertions(+), 2 deletions(-)

(limited to 'src/tools/parser.c')

diff --git a/src/tools/parser.c b/src/tools/parser.c
index b679b9f2..595032c3 100644
--- a/src/tools/parser.c
+++ b/src/tools/parser.c
@@ -367,5 +367,3 @@ get_start(char *string, int tokens)
 
     return result_str;
 }
-
-
diff --git a/tests/test_parser.c b/tests/test_parser.c
index 58e3f3e7..e442baee 100644
--- a/tests/test_parser.c
+++ b/tests/test_parser.c
@@ -279,6 +279,124 @@ parse_cmd_freetext_with_many_quoted_and_many_spaces(void)
     assert_string_equals("and heres the free text", result[2]);
     g_strfreev(result);
 }
+
+void
+count_one_token(void)
+{
+    char *inp = "one";
+    int result = count_tokens(inp);
+
+    assert_int_equals(1, result);
+}
+
+void
+count_one_token_quoted_no_whitespace(void)
+{
+    char *inp = "\"one\"";
+    int result = count_tokens(inp);
+
+    assert_int_equals(1, result);
+}
+
+void
+count_one_token_quoted_with_whitespace(void)
+{
+    char *inp = "\"one two\"";
+    int result = count_tokens(inp);
+
+    assert_int_equals(1, result);
+}
+
+void
+count_two_tokens(void)
+{
+    char *inp = "one two";
+    int result = count_tokens(inp);
+
+    assert_int_equals(2, result);
+}
+
+void
+count_two_tokens_first_quoted(void)
+{
+    char *inp = "\"one and\" two";
+    int result = count_tokens(inp);
+
+    assert_int_equals(2, result);
+}
+
+void
+count_two_tokens_second_quoted(void)
+{
+    char *inp = "one \"two and\"";
+    int result = count_tokens(inp);
+
+    assert_int_equals(2, result);
+}
+
+void
+count_two_tokens_both_quoted(void)
+{
+    char *inp = "\"one and then\" \"two and\"";
+    int result = count_tokens(inp);
+
+    assert_int_equals(2, result);
+}
+
+void
+get_first_of_one(void)
+{
+    char *inp = "one";
+    char *result = get_start(inp, 2);
+
+    assert_string_equals("one", result);
+}
+
+void
+get_first_of_two(void)
+{
+    char *inp = "one two";
+    char *result = get_start(inp, 2);
+
+    assert_string_equals("one ", result);
+}
+
+void
+get_first_two_of_three(void)
+{
+    char *inp = "one two three";
+    char *result = get_start(inp, 3);
+
+    assert_string_equals("one two ", result);
+}
+
+void
+get_first_two_of_three_first_quoted(void)
+{
+    char *inp = "\"one\" two three";
+    char *result = get_start(inp, 3);
+
+    assert_string_equals("\"one\" two ", result);
+}
+
+void
+get_first_two_of_three_second_quoted(void)
+{
+    char *inp = "one \"two\" three";
+    char *result = get_start(inp, 3);
+
+    assert_string_equals("one \"two\" ", result);
+}
+
+void
+get_first_two_of_three_first_and_second_quoted(void)
+{
+    char *inp = "\"one\" \"two\" three";
+    char *result = get_start(inp, 3);
+
+    assert_string_equals("\"one\" \"two\" ", result);
+}
+
 void
 register_parser_tests(void)
 {
@@ -307,4 +425,17 @@ register_parser_tests(void)
     TEST(parse_cmd_freetext_with_quoted_and_space);
     TEST(parse_cmd_freetext_with_quoted_and_many_spaces);
     TEST(parse_cmd_freetext_with_many_quoted_and_many_spaces);
+    TEST(count_one_token);
+    TEST(count_one_token_quoted_no_whitespace);
+    TEST(count_one_token_quoted_with_whitespace);
+    TEST(count_two_tokens);
+    TEST(count_two_tokens_first_quoted);
+    TEST(count_two_tokens_second_quoted);
+    TEST(count_two_tokens_both_quoted);
+    TEST(get_first_of_one);
+    TEST(get_first_of_two);
+    TEST(get_first_two_of_three);
+    TEST(get_first_two_of_three_first_quoted);
+    TEST(get_first_two_of_three_second_quoted);
+    TEST(get_first_two_of_three_first_and_second_quoted);
 }
-- 
cgit 1.4.1-2-gfad0


From 7f82dc42f593f6410e4d0058add4b91112047e63 Mon Sep 17 00:00:00 2001
From: James Booth <boothj5@gmail.com>
Date: Sun, 14 Jul 2013 01:00:11 +0100
Subject: Remaining parser function unicode compatible

---
 src/tools/parser.c | 59 +++++++++++++++++++++++++++---------------------------
 1 file changed, 29 insertions(+), 30 deletions(-)

(limited to 'src/tools/parser.c')

diff --git a/src/tools/parser.c b/src/tools/parser.c
index 595032c3..f4cfc3d2 100644
--- a/src/tools/parser.c
+++ b/src/tools/parser.c
@@ -297,34 +297,27 @@ parse_args_with_freetext(const char * const inp, int min, int max)
 int
 count_tokens(char *string)
 {
+    int length = g_utf8_strlen(string, -1);
+    gboolean in_quotes = FALSE;
     int num_tokens = 0;
+    int i = 0;
 
-    // if no quotes, use glib
-    if (g_strrstr(string, "\"") == NULL) {
-        gchar **tokens = g_strsplit(string, " ", 0);
-        num_tokens = g_strv_length(tokens);
-        g_strfreev(tokens);
-
-    // else count tokens including quoted
-    } else {
-        int length = strlen(string);
-        int i = 0;
-        gboolean in_quotes = FALSE;
+    // include first token
+    num_tokens++;
 
-        // include first token
-        num_tokens++;
+    for (i = 0; i < length; i++) {
+        gchar *curr_ch = g_utf8_offset_to_pointer(string, i);
+        gunichar curr_uni = g_utf8_get_char(curr_ch);
 
-        for (i = 0; i < length; i++) {
-            if (string[i] == ' ') {
-                if (!in_quotes) {
-                    num_tokens++;
-                }
-            } else if (string[i] == '"') {
-                if (in_quotes) {
-                    in_quotes = FALSE;
-                } else {
-                    in_quotes = TRUE;
-                }
+        if (curr_uni == ' ') {
+            if (!in_quotes) {
+                num_tokens++;
+            }
+        } else if (curr_uni == '"') {
+            if (in_quotes) {
+                in_quotes = FALSE;
+            } else {
+                in_quotes = TRUE;
             }
         }
     }
@@ -335,25 +328,31 @@ count_tokens(char *string)
 char *
 get_start(char *string, int tokens)
 {
+    GString *result = g_string_new("");
+    int length = g_utf8_strlen(string, -1);
+    gboolean in_quotes = FALSE;
     char *result_str = NULL;
     int num_tokens = 0;
-    int length = strlen(string);
     int i = 0;
-    gboolean in_quotes = FALSE;
-    GString *result = g_string_new("");
 
     // include first token
     num_tokens++;
 
     for (i = 0; i < length; i++) {
+        gchar *curr_ch = g_utf8_offset_to_pointer(string, i);
+        gunichar curr_uni = g_utf8_get_char(curr_ch);
+
         if (num_tokens < tokens) {
-            g_string_append_c(result, string[i]);
+            gchar *uni_char = malloc(7);
+            int len = g_unichar_to_utf8(curr_uni, uni_char);
+            uni_char[len] = '\0';
+            g_string_append(result, uni_char);
         }
-        if (string[i] == ' ') {
+        if (curr_uni == ' ') {
             if (!in_quotes) {
                 num_tokens++;
             }
-        } else if (string[i] == '"') {
+        } else if (curr_uni == '"') {
             if (in_quotes) {
                 in_quotes = FALSE;
             } else {
-- 
cgit 1.4.1-2-gfad0