about summary refs log tree commit diff stats
path: root/src/tools/parser.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/tools/parser.c')
-rw-r--r--src/tools/parser.c368
1 files changed, 368 insertions, 0 deletions
diff --git a/src/tools/parser.c b/src/tools/parser.c
new file mode 100644
index 00000000..f4cfc3d2
--- /dev/null
+++ b/src/tools/parser.c
@@ -0,0 +1,368 @@
+/*
+ * parser.c
+ *
+ * Copyright (C) 2012, 2013 James Booth <boothj5@gmail.com>
+ *
+ * This file is part of Profanity.
+ *
+ * Profanity is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Profanity is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Profanity.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include <glib.h>
+
+/*
+ * Take a full line of input and return an array of strings representing
+ * the arguments of a command.
+ * If the number of arguments found is less than min, or more than max
+ * NULL is returned.
+ *
+ * inp - The line of input
+ * min - The minimum allowed number of arguments
+ * max - The maxmimum allowed number of arguments
+ *
+ * Returns - An NULL terminated array of strings representing the aguments
+ * of the command, or NULL if the validation fails.
+ *
+ * E.g. the following input line:
+ *
+ * /cmd arg1 arg2
+ *
+ * Will return a pointer to the following array:
+ *
+ * { "arg1", "arg2", NULL }
+ *
+ */
+gchar **
+parse_args(const char * const inp, int min, int max)
+{
+    if (inp == NULL) {
+        return NULL;
+    }
+
+    // copy and strip input of leading/trailing whitepsace
+    char *copy = strdup(inp);
+    g_strstrip(copy);
+
+    int inp_size = g_utf8_strlen(copy, -1);
+    gboolean in_token = FALSE;
+    gboolean in_quotes = FALSE;
+    char *token_start = &copy[0];
+    int token_size = 0;
+    GSList *tokens = NULL;
+
+    // add tokens to GSList
+    int i;
+    for (i = 0; i < inp_size; i++) {
+        gchar *curr_ch = g_utf8_offset_to_pointer(copy, i);
+        gunichar curr_uni = g_utf8_get_char(curr_ch);
+
+        if (!in_token) {
+            if (curr_uni  == ' ') {
+                continue;
+            } else {
+                in_token = TRUE;
+                if (curr_uni == '"') {
+                    in_quotes = TRUE;
+                    i++;
+                    gchar *next_ch = g_utf8_next_char(curr_ch);
+                    gunichar next_uni = g_utf8_get_char(next_ch);
+                    token_start = next_ch;
+                    token_size += g_unichar_to_utf8(next_uni, NULL);
+                } else {
+                    token_start = curr_ch;
+                    token_size += g_unichar_to_utf8(curr_uni, NULL);
+                }
+            }
+        } else {
+            if (in_quotes) {
+                if (curr_uni == '"') {
+                    tokens = g_slist_append(tokens, g_strndup(token_start,
+                        token_size));
+                    token_size = 0;
+                    in_token = FALSE;
+                    in_quotes = FALSE;
+                } else {
+                    token_size += g_unichar_to_utf8(curr_uni, NULL);
+                }
+            } else {
+                if (curr_uni == ' ') {
+                    tokens = g_slist_append(tokens, g_strndup(token_start,
+                        token_size));
+                    token_size = 0;
+                    in_token = FALSE;
+                } else {
+                    token_size += g_unichar_to_utf8(curr_uni, NULL);
+                }
+            }
+        }
+    }
+
+    if (in_token) {
+        tokens = g_slist_append(tokens, g_strndup(token_start, token_size));
+    }
+
+    int num = g_slist_length(tokens) - 1;
+
+    // if num args not valid return NULL
+    if ((num < min) || (num > max)) {
+        g_slist_free_full(tokens, free);
+        g_free(copy);
+        return NULL;
+
+    // if min allowed is 0 and 0 found, return empty char* array
+    } else if (min == 0 && num == 0) {
+        g_slist_free_full(tokens, free);
+        gchar **args = malloc((num + 1) * sizeof(*args));
+        args[0] = NULL;
+        g_free(copy);
+        return args;
+
+    // otherwise return args array
+    } else {
+        gchar **args = malloc((num + 1) * sizeof(*args));
+        GSList *token = tokens;
+        token = g_slist_next(token);
+        int arg_count = 0;
+
+        while (token != NULL) {
+            args[arg_count++] = strdup(token->data);
+            token = g_slist_next(token);
+        }
+
+        args[arg_count] = NULL;
+        g_slist_free_full(tokens, free);
+        g_free(copy);
+
+        return args;
+    }
+}
+
+/*
+ * Take a full line of input and return an array of strings representing
+ * the arguments of a command.  This function handles when the last parameter
+ * to the command is free text e.g.
+ *
+ * /msg user@host here is a message
+ *
+ * If the number of arguments found is less than min, or more than max
+ * NULL is returned.
+ *
+ * inp - The line of input
+ * min - The minimum allowed number of arguments
+ * max - The maxmimum allowed number of arguments
+ *
+ * Returns - An NULL terminated array of strings representing the aguments
+ * of the command, or NULL if the validation fails.
+ *
+ * E.g. the following input line:
+ *
+ * /cmd arg1 arg2 some free text
+ *
+ * Will return a pointer to the following array:
+ *
+ * { "arg1", "arg2", "some free text", NULL }
+ *
+ */
+gchar **
+parse_args_with_freetext(const char * const inp, int min, int max)
+{
+    if (inp == NULL) {
+        return NULL;
+    }
+
+    // copy and strip input of leading/trailing whitepsace
+    char *copy = strdup(inp);
+    g_strstrip(copy);
+
+    int inp_size = g_utf8_strlen(copy, -1);
+    gboolean in_token = FALSE;
+    gboolean in_freetext = FALSE;
+    gboolean in_quotes = FALSE;
+    char *token_start = &copy[0];
+    int token_size = 0;
+    int num_tokens = 0;
+    GSList *tokens = NULL;
+
+    // add tokens to GSList
+    int i;
+    for (i = 0; i < inp_size; i++) {
+        gchar *curr_ch = g_utf8_offset_to_pointer(copy, i);
+        gunichar curr_uni = g_utf8_get_char(curr_ch);
+
+        if (!in_token) {
+            if (curr_uni == ' ') {
+                continue;
+            } else {
+                in_token = TRUE;
+                num_tokens++;
+                if (num_tokens == max + 1) {
+                    in_freetext = TRUE;
+                } else if (curr_uni == '"') {
+                    in_quotes = TRUE;
+                    i++;
+                    gchar *next_ch = g_utf8_next_char(curr_ch);
+                    gunichar next_uni = g_utf8_get_char(next_ch);
+                    token_start = next_ch;
+                    token_size += g_unichar_to_utf8(next_uni, NULL);
+                }
+                if (curr_uni == '"') {
+                    gchar *next_ch = g_utf8_next_char(curr_ch);
+                    token_start = next_ch;
+                } else {
+                    token_start = curr_ch;
+                    token_size += g_unichar_to_utf8(curr_uni, NULL);
+                }
+            }
+        } else {
+            if (in_quotes) {
+                if (curr_uni == '"') {
+                    tokens = g_slist_append(tokens, g_strndup(token_start,
+                        token_size));
+                    token_size = 0;
+                    in_token = FALSE;
+                    in_quotes = FALSE;
+                } else {
+                    if (curr_uni != '"') {
+                        token_size += g_unichar_to_utf8(curr_uni, NULL);
+                    }
+                }
+            } else {
+                if (!in_freetext && curr_uni == ' ') {
+                    tokens = g_slist_append(tokens, g_strndup(token_start,
+                        token_size));
+                    token_size = 0;
+                    in_token = FALSE;
+                } else {
+                    if (curr_uni != '"') {
+                        token_size += g_unichar_to_utf8(curr_uni, NULL);
+                    }
+                }
+            }
+        }
+    }
+
+    if (in_token) {
+        tokens = g_slist_append(tokens, g_strndup(token_start, token_size));
+    }
+
+    int num = g_slist_length(tokens) - 1;
+
+    // if num args not valid return NULL
+    if ((num < min) || (num > max)) {
+        g_slist_free_full(tokens, free);
+        free(copy);
+        return NULL;
+
+    // if min allowed is 0 and 0 found, return empty char* array
+    } else if (min == 0 && num == 0) {
+        gchar **args = malloc((num + 1) * sizeof(*args));
+        args[0] = NULL;
+        return args;
+
+    // otherwise return args array
+    } else {
+        gchar **args = malloc((num + 1) * sizeof(*args));
+        GSList *token = tokens;
+        token = g_slist_next(token);
+        int arg_count = 0;
+
+        while (token != NULL) {
+            args[arg_count++] = strdup(token->data);
+            token = g_slist_next(token);
+        }
+
+        args[arg_count] = NULL;
+        g_slist_free_full(tokens, free);
+        free(copy);
+
+        return args;
+    }
+}
+
+int
+count_tokens(char *string)
+{
+    int length = g_utf8_strlen(string, -1);
+    gboolean in_quotes = FALSE;
+    int num_tokens = 0;
+    int i = 0;
+
+    // include first token
+    num_tokens++;
+
+    for (i = 0; i < length; i++) {
+        gchar *curr_ch = g_utf8_offset_to_pointer(string, i);
+        gunichar curr_uni = g_utf8_get_char(curr_ch);
+
+        if (curr_uni == ' ') {
+            if (!in_quotes) {
+                num_tokens++;
+            }
+        } else if (curr_uni == '"') {
+            if (in_quotes) {
+                in_quotes = FALSE;
+            } else {
+                in_quotes = TRUE;
+            }
+        }
+    }
+
+    return num_tokens;
+}
+
+char *
+get_start(char *string, int tokens)
+{
+    GString *result = g_string_new("");
+    int length = g_utf8_strlen(string, -1);
+    gboolean in_quotes = FALSE;
+    char *result_str = NULL;
+    int num_tokens = 0;
+    int i = 0;
+
+    // include first token
+    num_tokens++;
+
+    for (i = 0; i < length; i++) {
+        gchar *curr_ch = g_utf8_offset_to_pointer(string, i);
+        gunichar curr_uni = g_utf8_get_char(curr_ch);
+
+        if (num_tokens < tokens) {
+            gchar *uni_char = malloc(7);
+            int len = g_unichar_to_utf8(curr_uni, uni_char);
+            uni_char[len] = '\0';
+            g_string_append(result, uni_char);
+        }
+        if (curr_uni == ' ') {
+            if (!in_quotes) {
+                num_tokens++;
+            }
+        } else if (curr_uni == '"') {
+            if (in_quotes) {
+                in_quotes = FALSE;
+            } else {
+                in_quotes = TRUE;
+            }
+        }
+    }
+
+    result_str = result->str;
+    g_string_free(result, FALSE);
+
+    return result_str;
+}