6 files changed, 264 insertions, 109 deletions
diff --git a/Makefile.am b/Makefile.am
index f161b92a..f7c3f825 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -14,8 +14,8 @@ profanity_SOURCES = \
 	src/ui/titlebar.c src/ui/statusbar.c src/ui/inputwin.c \
 	src/ui/console.c src/ui/notifier.c src/ui/notifier.h \
 	src/command/command.h src/command/command.c src/command/history.c \
-	src/command/history.h src/command/parser.c \
-	src/command/parser.h \
+	src/command/history.h src/tools/parser.c \
+	src/tools/parser.h \
 	src/tools/autocomplete.c src/tools/autocomplete.h \
 	src/tools/history.c src/tools/history.h \
 	src/tools/tinyurl.c src/tools/tinyurl.h \
@@ -40,8 +40,8 @@ tests_testsuite_SOURCES = \
 	src/ui/titlebar.c src/ui/statusbar.c src/ui/inputwin.c \
 	src/ui/console.c src/ui/notifier.c src/ui/notifier.h \
 	src/command/command.h src/command/command.c src/command/history.c \
-	src/command/history.h src/command/parser.c \
-	src/command/parser.h \
+	src/command/history.h src/tools/parser.c \
+	src/tools/parser.h \
 	src/tools/autocomplete.c src/tools/autocomplete.h \
 	src/tools/history.c src/tools/history.h \
 	src/tools/tinyurl.c src/tools/tinyurl.h \
diff --git a/src/command/command.c b/src/command/command.c
index 6bcc9986..f15de124 100644
--- a/src/command/command.c
+++ b/src/command/command.c
@@ -30,7 +30,6 @@
 #include "chat_session.h"
 #include "command/command.h"
 #include "command/history.h"
-#include "command/parser.h"
 #include "common.h"
 #include "config/accounts.h"
 #include "config/preferences.h"
@@ -41,6 +40,7 @@
 #include "muc.h"
 #include "profanity.h"
 #include "tools/autocomplete.h"
+#include "tools/parser.h"
 #include "tools/tinyurl.h"
 #include "ui/ui.h"
 #include "xmpp/xmpp.h"
diff --git a/src/tools/autocomplete.c b/src/tools/autocomplete.c
index b53e49bd..0c56cac7 100644
--- a/src/tools/autocomplete.c
+++ b/src/tools/autocomplete.c
@@ -24,7 +24,8 @@
 #include <stdlib.h>
 #include <string.h>
 
-#include "autocomplete.h"
+#include "tools/autocomplete.h"
+#include "tools/parser.h"
 
 struct autocomplete_t {
     GSList *items;
@@ -259,80 +260,6 @@ autocomplete_param_with_ac(char *input, int *size, char *command,
     return auto_msg;
 }
 
-int
-_count_tokens(char *string)
-{
-    int num_tokens = 0;
-
-    // if no quotes, use glib
-    if (g_strrstr(string, "\"") == NULL) {
-        gchar **tokens = g_strsplit(string, " ", 0);
-        num_tokens = g_strv_length(tokens);
-        g_strfreev(tokens);
-
-    // else count tokens including quoted
-    } else {
-        int length = strlen(string);
-        int i = 0;
-        gboolean in_quotes = FALSE;
-
-        // include first token
-        num_tokens++;
-
-        for (i = 0; i < length; i++) {
-            if (string[i] == ' ') {
-                if (!in_quotes) {
-                    num_tokens++;
-                }
-            } else if (string[i] == '"') {
-                if (in_quotes) {
-                    in_quotes = FALSE;
-                } else {
-                    in_quotes = TRUE;
-                }
-            }
-        }
-    }
-
-    return num_tokens;
-}
-
-char *
-_get_start(char *string, int tokens)
-{
-    char *result_str = NULL;
-    int num_tokens = 0;
-    int length = strlen(string);
-    int i = 0;
-    gboolean in_quotes = FALSE;
-    GString *result = g_string_new("");
-
-    // include first token
-    num_tokens++;
-
-    for (i = 0; i < length; i++) {
-        if (num_tokens < tokens) {
-            g_string_append_c(result, string[i]);
-        }
-        if (string[i] == ' ') {
-            if (!in_quotes) {
-                num_tokens++;
-            }
-        } else if (string[i] == '"') {
-            if (in_quotes) {
-                in_quotes = FALSE;
-            } else {
-                in_quotes = TRUE;
-            }
-        }
-    }
-
-    result_str = result->str;
-    g_string_free(result, FALSE);
-
-    return result_str;
-}
-
 char *
 autocomplete_param_no_with_func(char *input, int *size, char *command,
     int arg_number, autocomplete_func func)
@@ -352,11 +279,11 @@ autocomplete_param_no_with_func(char *input, int *size, char *command,
         g_strstrip(inp_cpy);
 
         // count tokens properly
-        int num_tokens = _count_tokens(inp_cpy);
+        int num_tokens = count_tokens(inp_cpy);
 
         // if correct number of tokens, then candidate for autocompletion of last param
         if (num_tokens == arg_number) {
-            gchar *start_str = _get_start(inp_cpy, arg_number);
+            gchar *start_str = get_start(inp_cpy, arg_number);
             gchar *comp_str = g_strdup(&inp_cpy[strlen(start_str)]);
 
             // autocomplete param
diff --git a/src/command/parser.c b/src/tools/parser.c
index d7dfebab..f4cfc3d2 100644
--- a/src/command/parser.c
+++ b/src/tools/parser.c
@@ -58,7 +58,7 @@ parse_args(const char * const inp, int min, int max)
     char *copy = strdup(inp);
     g_strstrip(copy);
 
-    int inp_size = strlen(copy);
+    int inp_size = g_utf8_strlen(copy, -1);
     gboolean in_token = FALSE;
     gboolean in_quotes = FALSE;
     char *token_start = &copy[0];
@@ -67,43 +67,55 @@ parse_args(const char * const inp, int min, int max)
 
     // add tokens to GSList
     int i;
-    for (i = 0; i <= inp_size; i++) {
+    for (i = 0; i < inp_size; i++) {
+        gchar *curr_ch = g_utf8_offset_to_pointer(copy, i);
+        gunichar curr_uni = g_utf8_get_char(curr_ch);
+
         if (!in_token) {
-            if (copy[i] == ' ') {
+            if (curr_uni  == ' ') {
                 continue;
             } else {
                 in_token = TRUE;
-                if (copy[i] == '"') {
+                if (curr_uni == '"') {
                     in_quotes = TRUE;
                     i++;
+                    gchar *next_ch = g_utf8_next_char(curr_ch);
+                    gunichar next_uni = g_utf8_get_char(next_ch);
+                    token_start = next_ch;
+                    token_size += g_unichar_to_utf8(next_uni, NULL);
+                } else {
+                    token_start = curr_ch;
+                    token_size += g_unichar_to_utf8(curr_uni, NULL);
                 }
-                token_start = &copy[i];
-                token_size++;
             }
         } else {
             if (in_quotes) {
-                if ((copy[i] == '\0') || (copy[i] == '"')) {
+                if (curr_uni == '"') {
                     tokens = g_slist_append(tokens, g_strndup(token_start,
                         token_size));
                     token_size = 0;
                     in_token = FALSE;
                     in_quotes = FALSE;
                 } else {
-                    token_size++;
+                    token_size += g_unichar_to_utf8(curr_uni, NULL);
                 }
             } else {
-                if (copy[i] == ' ' || copy[i] == '\0') {
+                if (curr_uni == ' ') {
                     tokens = g_slist_append(tokens, g_strndup(token_start,
                         token_size));
                     token_size = 0;
                     in_token = FALSE;
                 } else {
-                    token_size++;
+                    token_size += g_unichar_to_utf8(curr_uni, NULL);
                 }
             }
         }
     }
 
+    if (in_token) {
+        tokens = g_slist_append(tokens, g_strndup(token_start, token_size));
+    }
+
     int num = g_slist_length(tokens) - 1;
 
     // if num args not valid return NULL
@@ -177,7 +189,7 @@ parse_args_with_freetext(const char * const inp, int min, int max)
     char *copy = strdup(inp);
     g_strstrip(copy);
 
-    int inp_size = strlen(copy);
+    int inp_size = g_utf8_strlen(copy, -1);
     gboolean in_token = FALSE;
     gboolean in_freetext = FALSE;
     gboolean in_quotes = FALSE;
@@ -188,56 +200,66 @@ parse_args_with_freetext(const char * const inp, int min, int max)
 
     // add tokens to GSList
     int i;
-    for (i = 0; i <= inp_size; i++) {
+    for (i = 0; i < inp_size; i++) {
+        gchar *curr_ch = g_utf8_offset_to_pointer(copy, i);
+        gunichar curr_uni = g_utf8_get_char(curr_ch);
+
         if (!in_token) {
-            if (copy[i] == ' ') {
+            if (curr_uni == ' ') {
                 continue;
             } else {
                 in_token = TRUE;
                 num_tokens++;
                 if (num_tokens == max + 1) {
                     in_freetext = TRUE;
-                } else if (copy[i] == '"') {
+                } else if (curr_uni == '"') {
                     in_quotes = TRUE;
                     i++;
+                    gchar *next_ch = g_utf8_next_char(curr_ch);
+                    gunichar next_uni = g_utf8_get_char(next_ch);
+                    token_start = next_ch;
+                    token_size += g_unichar_to_utf8(next_uni, NULL);
                 }
-                if (copy[i] == '"') {
-                    token_start = &copy[i+1];
+                if (curr_uni == '"') {
+                    gchar *next_ch = g_utf8_next_char(curr_ch);
+                    token_start = next_ch;
                 } else {
-                    token_start = &copy[i];
-                }
-                if (copy[i] != '"') {
-                    token_size++;
+                    token_start = curr_ch;
+                    token_size += g_unichar_to_utf8(curr_uni, NULL);
                 }
             }
         } else {
             if (in_quotes) {
-                if ((copy[i] == '\0') || (copy[i] == '"')) {
+                if (curr_uni == '"') {
                     tokens = g_slist_append(tokens, g_strndup(token_start,
                         token_size));
                     token_size = 0;
                     in_token = FALSE;
                     in_quotes = FALSE;
                 } else {
-                    if (copy[i] != '"') {
-                        token_size++;
+                    if (curr_uni != '"') {
+                        token_size += g_unichar_to_utf8(curr_uni, NULL);
                     }
                 }
             } else {
-                if ((!in_freetext && copy[i] == ' ') || copy[i] == '\0') {
+                if (!in_freetext && curr_uni == ' ') {
                     tokens = g_slist_append(tokens, g_strndup(token_start,
                         token_size));
                     token_size = 0;
                     in_token = FALSE;
                 } else {
-                    if (copy[i] != '"') {
-                        token_size++;
+                    if (curr_uni != '"') {
+                        token_size += g_unichar_to_utf8(curr_uni, NULL);
                     }
                 }
             }
         }
     }
 
+    if (in_token) {
+        tokens = g_slist_append(tokens, g_strndup(token_start, token_size));
+    }
+
     int num = g_slist_length(tokens) - 1;
 
     // if num args not valid return NULL
@@ -271,3 +293,76 @@ parse_args_with_freetext(const char * const inp, int min, int max)
         return args;
     }
 }
+
+int
+count_tokens(char *string)
+{
+    int length = g_utf8_strlen(string, -1);
+    gboolean in_quotes = FALSE;
+    int num_tokens = 0;
+    int i = 0;
+
+    // include first token
+    num_tokens++;
+
+    for (i = 0; i < length; i++) {
+        gchar *curr_ch = g_utf8_offset_to_pointer(string, i);
+        gunichar curr_uni = g_utf8_get_char(curr_ch);
+
+        if (curr_uni == ' ') {
+            if (!in_quotes) {
+                num_tokens++;
+            }
+        } else if (curr_uni == '"') {
+            if (in_quotes) {
+                in_quotes = FALSE;
+            } else {
+                in_quotes = TRUE;
+            }
+        }
+    }
+
+    return num_tokens;
+}
+
+char *
+get_start(char *string, int tokens)
+{
+    GString *result = g_string_new("");
+    int length = g_utf8_strlen(string, -1);
+    gboolean in_quotes = FALSE;
+    char *result_str = NULL;
+    int num_tokens = 0;
+    int i = 0;
+
+    // include first token
+    num_tokens++;
+
+    for (i = 0; i < length; i++) {
+        gchar *curr_ch = g_utf8_offset_to_pointer(string, i);
+        gunichar curr_uni = g_utf8_get_char(curr_ch);
+
+        if (num_tokens < tokens) {
+            gchar *uni_char = malloc(7);
+            int len = g_unichar_to_utf8(curr_uni, uni_char);
+            uni_char[len] = '\0';
+            g_string_append(result, uni_char);
+        }
+        if (curr_uni == ' ') {
+            if (!in_quotes) {
+                num_tokens++;
+            }
+        } else if (curr_uni == '"') {
+            if (in_quotes) {
+                in_quotes = FALSE;
+            } else {
+                in_quotes = TRUE;
+            }
+        }
+    }
+
+    result_str = result->str;
+    g_string_free(result, FALSE);
+
+    return result_str;
+}
diff --git a/src/command/parser.h b/src/tools/parser.h
index f191ed15..6f00cc90 100644
--- a/src/command/parser.h
+++ b/src/tools/parser.h
@@ -27,5 +27,7 @@
 
 gchar** parse_args(const char * const inp, int min, int max);
 gchar** parse_args_with_freetext(const char * const inp, int min, int max);
+int count_tokens(char *string);
+char* get_start(char *string, int tokens);
 
 #endif
diff --git a/tests/test_parser.c b/tests/test_parser.c
index fd1b1e1c..e442baee 100644
--- a/tests/test_parser.c
+++ b/tests/test_parser.c
@@ -1,7 +1,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <head-unit.h>
-#include "command/parser.h"
+#include "tools/parser.h"
 
 void
 parse_null_returns_null(void)
@@ -279,6 +279,124 @@ parse_cmd_freetext_with_many_quoted_and_many_spaces(void)
     assert_string_equals("and heres the free text", result[2]);
     g_strfreev(result);
 }
+
+void
+count_one_token(void)
+{
+    char *inp = "one";
+    int result = count_tokens(inp);
+
+    assert_int_equals(1, result);
+}
+
+void
+count_one_token_quoted_no_whitespace(void)
+{
+    char *inp = "\"one\"";
+    int result = count_tokens(inp);
+
+    assert_int_equals(1, result);
+}
+
+void
+count_one_token_quoted_with_whitespace(void)
+{
+    char *inp = "\"one two\"";
+    int result = count_tokens(inp);
+
+    assert_int_equals(1, result);
+}
+
+void
+count_two_tokens(void)
+{
+    char *inp = "one two";
+    int result = count_tokens(inp);
+
+    assert_int_equals(2, result);
+}
+
+void
+count_two_tokens_first_quoted(void)
+{
+    char *inp = "\"one and\" two";
+    int result = count_tokens(inp);
+
+    assert_int_equals(2, result);
+}
+
+void
+count_two_tokens_second_quoted(void)
+{
+    char *inp = "one \"two and\"";
+    int result = count_tokens(inp);
+
+    assert_int_equals(2, result);
+}
+
+void
+count_two_tokens_both_quoted(void)
+{
+    char *inp = "\"one and then\" \"two and\"";
+    int result = count_tokens(inp);
+
+    assert_int_equals(2, result);
+}
+
+void
+get_first_of_one(void)
+{
+    char *inp = "one";
+    char *result = get_start(inp, 2);
+
+    assert_string_equals("one", result);
+}
+
+void
+get_first_of_two(void)
+{
+    char *inp = "one two";
+    char *result = get_start(inp, 2);
+
+    assert_string_equals("one ", result);
+}
+
+void
+get_first_two_of_three(void)
+{
+    char *inp = "one two three";
+    char *result = get_start(inp, 3);
+
+    assert_string_equals("one two ", result);
+}
+
+void
+get_first_two_of_three_first_quoted(void)
+{
+    char *inp = "\"one\" two three";
+    char *result = get_start(inp, 3);
+
+    assert_string_equals("\"one\" two ", result);
+}
+
+void
+get_first_two_of_three_second_quoted(void)
+{
+    char *inp = "one \"two\" three";
+    char *result = get_start(inp, 3);
+
+    assert_string_equals("one \"two\" ", result);
+}
+
+void
+get_first_two_of_three_first_and_second_quoted(void)
+{
+    char *inp = "\"one\" \"two\" three";
+    char *result = get_start(inp, 3);
+
+    assert_string_equals("\"one\" \"two\" ", result);
+}
+
 void
 register_parser_tests(void)
 {
@@ -307,4 +425,17 @@ register_parser_tests(void)
     TEST(parse_cmd_freetext_with_quoted_and_space);
     TEST(parse_cmd_freetext_with_quoted_and_many_spaces);
     TEST(parse_cmd_freetext_with_many_quoted_and_many_spaces);
+    TEST(count_one_token);
+    TEST(count_one_token_quoted_no_whitespace);
+    TEST(count_one_token_quoted_with_whitespace);
+    TEST(count_two_tokens);
+    TEST(count_two_tokens_first_quoted);
+    TEST(count_two_tokens_second_quoted);
+    TEST(count_two_tokens_both_quoted);
+    TEST(get_first_of_one);
+    TEST(get_first_of_two);
+    TEST(get_first_two_of_three);
+    TEST(get_first_two_of_three_first_quoted);
+    TEST(get_first_two_of_three_second_quoted);
+    TEST(get_first_two_of_three_first_and_second_quoted);
 }