From 4d35031cb077a3aa03620d9372747dd69229b7da Mon Sep 17 00:00:00 2001
From: James Booth <boothj5@gmail.com>
Date: Sun, 14 Jul 2013 00:24:57 +0100
Subject: Implemented parse_args_with_freetext with unicode compatibility

---
 src/tools/parser.c | 46 ++++++++++++++++++++++++++--------------------
 1 file changed, 26 insertions(+), 20 deletions(-)

(limited to 'src/tools')

diff --git a/src/tools/parser.c b/src/tools/parser.c
index 42b1165f..b679b9f2 100644
--- a/src/tools/parser.c
+++ b/src/tools/parser.c
@@ -65,18 +65,12 @@ parse_args(const char * const inp, int min, int max)
     int token_size = 0;
     GSList *tokens = NULL;
 
-
     // add tokens to GSList
     int i;
     for (i = 0; i < inp_size; i++) {
         gchar *curr_ch = g_utf8_offset_to_pointer(copy, i);
         gunichar curr_uni = g_utf8_get_char(curr_ch);
 
-        gchar *character = malloc(7);
-        gint num_written = 0;
-        num_written = g_unichar_to_utf8(curr_uni, character);
-        character[num_written] = '\0';
-
         if (!in_token) {
             if (curr_uni  == ' ') {
                 continue;
@@ -195,7 +189,7 @@ parse_args_with_freetext(const char * const inp, int min, int max)
     char *copy = strdup(inp);
     g_strstrip(copy);
 
-    int inp_size = strlen(copy);
+    int inp_size = g_utf8_strlen(copy, -1);
     gboolean in_token = FALSE;
     gboolean in_freetext = FALSE;
     gboolean in_quotes = FALSE;
@@ -206,54 +200,66 @@ parse_args_with_freetext(const char * const inp, int min, int max)
 
     // add tokens to GSList
     int i;
-    for (i = 0; i <= inp_size; i++) {
+    for (i = 0; i < inp_size; i++) {
+        gchar *curr_ch = g_utf8_offset_to_pointer(copy, i);
+        gunichar curr_uni = g_utf8_get_char(curr_ch);
+
         if (!in_token) {
-            if (copy[i] == ' ') {
+            if (curr_uni == ' ') {
                 continue;
             } else {
                 in_token = TRUE;
                 num_tokens++;
                 if (num_tokens == max + 1) {
                     in_freetext = TRUE;
-                } else if (copy[i] == '"') {
+                } else if (curr_uni == '"') {
                     in_quotes = TRUE;
                     i++;
+                    gchar *next_ch = g_utf8_next_char(curr_ch);
+                    gunichar next_uni = g_utf8_get_char(next_ch);
+                    token_start = next_ch;
+                    token_size += g_unichar_to_utf8(next_uni, NULL);
                 }
-                if (copy[i] == '"') {
-                    token_start = &copy[i+1];
+                if (curr_uni == '"') {
+                    gchar *next_ch = g_utf8_next_char(curr_ch);
+                    token_start = next_ch;
                 } else {
-                    token_start = &copy[i];
-                    token_size++;
+                    token_start = curr_ch;
+                    token_size += g_unichar_to_utf8(curr_uni, NULL);
                 }
             }
         } else {
             if (in_quotes) {
-                if ((copy[i] == '\0') || (copy[i] == '"')) {
+                if (curr_uni == '"') {
                     tokens = g_slist_append(tokens, g_strndup(token_start,
                         token_size));
                     token_size = 0;
                     in_token = FALSE;
                     in_quotes = FALSE;
                 } else {
-                    if (copy[i] != '"') {
-                        token_size++;
+                    if (curr_uni != '"') {
+                        token_size += g_unichar_to_utf8(curr_uni, NULL);
                     }
                 }
             } else {
-                if ((!in_freetext && copy[i] == ' ') || copy[i] == '\0') {
+                if (!in_freetext && curr_uni == ' ') {
                     tokens = g_slist_append(tokens, g_strndup(token_start,
                         token_size));
                     token_size = 0;
                     in_token = FALSE;
                 } else {
-                    if (copy[i] != '"') {
-                        token_size++;
+                    if (curr_uni != '"') {
+                        token_size += g_unichar_to_utf8(curr_uni, NULL);
                     }
                 }
             }
         }
     }
 
+    if (in_token) {
+        tokens = g_slist_append(tokens, g_strndup(token_start, token_size));
+    }
+
     int num = g_slist_length(tokens) - 1;
 
     // if num args not valid return NULL
-- 
cgit 1.4.1-2-gfad0