about summary refs log tree commit diff stats
path: root/apps/ex7.subx
blob: 73cb272069caa75d23c291a75578cd1dc32c0bcc (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# Example showing file syscalls.
#
# Create a file, open it for writing, write a character to it, close it, open
# it for reading, read a character from it, close it, delete it, and return
# the character read.
#
# To run:
#   $ ./bootstrap translate init.linux apps/ex7.subx -o apps/ex7
#   $ ./bootstrap run apps/ex7
# Expected result:
#   $ echo $?
#   97

== code
#   instruction                     effective address                                                   register    displacement    immediate
# . op          subop               mod             rm32          base        index         scale       r32
# . 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes

Entry:
    # creat(Filename)
    bb/copy-to-ebx  Filename/imm32
    b9/copy-to-ecx  0x180/imm32/fixed-perms
    e8/call  syscall_creat/disp32

    # stream = open(Filename, O_WRONLY, 0)  # we can't use 'fd' because it looks like a hex byte
    bb/copy-to-ebx  Filename/imm32
    b9/copy-to-ecx  1/imm32/wronly
    ba/copy-to-edx  0x180/imm32/fixed-perms
    e8/call  syscall_open/disp32
    # save stream
    bb/copy-to-ebx  Stream/imm32
    89/copy                         0/mod/indirect  3/rm32/ebx    .           .             .           0/r32/eax   .               .                 # copy eax to *ebx

    # write(Stream, "a", 1)
    # . load stream
    bb/copy-to-ebx  Stream/imm32
    8b/copy                         0/mod/indirect  3/rm32/ebx    .           .             .           3/r32/ebx   .               .                 # copy *ebx to ebx
    # .
    b9/copy-to-ecx  A/imm32
    ba/copy-to-edx  1/imm32/size
    e8/call  syscall_write/disp32

    # close(Stream)
    # . load stream
    bb/copy-to-ebx  Stream/imm32
    8b/copy                         0/mod/indirect  3/rm32/ebx    .           .             .           3/r32/ebx   .               .                 # copy *ebx to ebx
    # .
    e8/call  syscall_close/disp32

    # stream = open(Filename, O_RDONLY, 0)
    bb/copy-to-ebx  Filename/imm32
    b9/copy-to-ecx  0/imm32/rdonly
    ba/copy-to-edx  0x180/imm32/fixed-perms
    e8/call  syscall_open/disp32
    # . save Stream
    bb/copy-to-ebx  Stream/imm32
    89/copy                         0/mod/indirect  3/rm32/ebx    .           .             .           0/r32/eax   .               .                 # copy eax to *ebx

    # read(Stream, B, 1)
    # . load stream
    bb/copy-to-ebx  Stream/imm32
    8b/copy                         0/mod/indirect  3/rm32/ebx    .           .             .           3/r32/ebx   .               .                 # copy *ebx to ebx
    # .
    b9/copy-to-ecx  B/imm32
    ba/copy-to-edx  1/imm32/size
    e8/call  syscall_read/disp32

    # close(Stream)
    # . load stream
    bb/copy-to-ebx  Stream/imm32
    8b/copy                         0/mod/indirect  3/rm32/ebx    .           .             .           3/r32/ebx   .               .                 # copy *ebx to ebx
    #
    e8/call  syscall_close/disp32

    # unlink(filename)
    bb/copy-to-ebx  Filename/imm32
    e8/call  syscall_unlink/disp32

    # exit(b)
    # . load b
    bb/copy-to-ebx  B/imm32
    8b/copy                         0/mod/indirect  3/rm32/ebx    .           .             .           3/r32/ebx   .               .                 # copy *ebx to ebx
    #
    e8/call  syscall_exit/disp32

== data

Stream:
    0/imm32
A:
    61/imm32/A
B:
    0/imm32
Filename:
    2e 66 6f 6f 00 00 00 00
#   .  f  o  o  null

# . . vim:nowrap:textwidth=0
: 5px; padding-right: 5px; } span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } .highlight .hll { background-color: #ffffcc } .highlight .c { color: #888888 } /* Comment */ .highlight .err { color: #a61717; background-color: #e3d2d2 } /* Error */ .highlight .k { color: #008800; font-weight: bold } /* Keyword */ .highlight .ch { color: #888888 } /* Comment.Hashbang */ .highlight .cm { color: #888888 } /* Comment.Multiline */ .highlight .cp { color: #cc0000; font-weight: bold } /* Comment.Preproc */ .highlight .cpf { color: #888888 } /* Comment.PreprocFile */ .highlight .c1 { color: #888888 } /* Comment.Single */ .highlight .cs { color: #cc0000; font-weight: bold; background-color: #fff0f0 } /* Comment.Special */ .highlight .gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */ .highlight .ge { font-style: italic } /* Generic.Emph */ .highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */ .highlight .gr { color: #aa0000 } /* Generic.Error */ .highlight .gh { color: #333333 } /* Generic.Heading */ .highlight .gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */ .highlight .go { color: #888888 } /* Generic.Output */ .highlight .gp { color: #555555 } /* Generic.Prompt */ .highlight .gs { font-weight: bold } /* Generic.Strong */ .highlight .gu { color: #666666 } /* Generic.Subheading */ .highlight .gt { color: #aa0000 } /* Generic.Traceback */ .highlight .kc { color: #008800; font-weight: bold } /* Keyword.Constant */ .highlight .kd { color: #008800; font-weight: bold } /* Keyword.Declaration */ .highlight .kn { color: #008800; font-weight: bold } /* Keyword.Namespace */ .highlight .kp { color: #008800 } /* Keyword.Pseudo */ .highlight .kr { color: #008800; font-weight: bold } /* Keyword.Reserved */ .highlight .kt { color: #888888; font-weight: bold } /* Keyword.Type */ .highlight .m { color: #0000DD; font-weight: bold } /* Literal.Number */ .highlight .s { color: #dd2200; background-color: #fff0f0 } /* Literal.String */ .highlight .na { color: #336699 } /* Name.Attribute */ .highlight .nb { color: #003388 } /* Name.Builtin */ .highlight .nc { color: #bb0066; font-weight: bold } /* Name.Class */ .highlight .no { color: #003366; font-weight: bold } /* Name.Constant */ .highlight .nd { color: #555555 } /* Name.Decorator */ .highlight .ne { color: #bb0066; font-weight: bold } /* Name.Exception */ .highlight .nf { color: #0066bb; font-weight: bold } /* Name.Function */ .highlight .nl { color: #336699; font-style: italic } /* Name.Label */ .highlight .nn { color: #bb0066; font-weight: bold } /* Name.Namespace */ .highlight .py { color: #336699; font-weight: bold } /* Name.Property */ .highlight .nt { color: #bb0066; font-weight: bold } /* Name.Tag */ .highlight .nv { color: #336699 } /* Name.Variable */ .highlight .ow { color: #008800 } /* Operator.Word */ .highlight .w { color: #bbbbbb } /* Text.Whitespace */ .highlight .mb { color: #0000DD; font-weight: bold } /* Literal.Number.Bin */ .highlight .mf { color: #0000DD; font-weight: bold } /* Literal.Number.Float */ .highlight .mh { color: #0000DD; font-weight: bold } /* Literal.Number.Hex */ .highlight .mi { color: #0000DD; font-weight: bold } /* Literal.Number.Integer */ .highlight .mo { color: #0000DD; font-weight: bold } /* Literal.Number.Oct */ .highlight .sa { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Affix */ .highlight .sb { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Backtick */ .highlight .sc { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Char */ .highlight .dl { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Delimiter */ .highlight .sd { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Doc */ .highlight .s2 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Double */ .highlight .se { color: #0044dd; background-color: #fff0f0 } /* Literal.String.Escape */ .highlight .sh { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Heredoc */ .highlight .si { color: #3333bb; background-color: #fff0f0 } /* Literal.String.Interpol */ .highlight .sx { color: #22bb22; background-color: #f0fff0 } /* Literal.String.Other */ .highlight .sr { color: #008800; background-color: #fff0ff } /* Literal.String.Regex */ .highlight .s1 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Single */ .highlight .ss { color: #aa6600; background-color: #fff0f0 } /* Literal.String.Symbol */ .highlight .bp { color: #003388 } /* Name.Builtin.Pseudo */ .highlight .fm { color: #0066bb; font-weight: bold } /* Name.Function.Magic */ .highlight .vc { color: #336699 } /* Name.Variable.Class */ .highlight .vg { color: #dd7700 } /* Name.Variable.Global */ .highlight .vi { color: #3333bb } /* Name.Variable.Instance */ .highlight .vm { color: #336699 } /* Name.Variable.Magic */ .highlight .il { color: #0000DD; font-weight: bold } /* Literal.Number.Integer.Long */
/*
 * parser.c
 *
 * Copyright (C) 2012, 2013 James Booth <boothj5@gmail.com>
 *
 * This file is part of Profanity.
 *
 * Profanity is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Profanity is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Profanity.  If not, see <http://www.gnu.org/licenses/>.
 *
 */

#include <stdlib.h>
#include <string.h>

#include <glib.h>

/*
 * Take a full line of input and return an array of strings representing
 * the arguments of a command.
 * If the number of arguments found is less than min, or more than max
 * NULL is returned.
 *
 * inp - The line of input
 * min - The minimum allowed number of arguments
 * max - The maxmimum allowed number of arguments
 *
 * Returns - An NULL terminated array of strings representing the aguments
 * of the command, or NULL if the validation fails.
 *
 * E.g. the following input line:
 *
 * /cmd arg1 arg2
 *
 * Will return a pointer to the following array:
 *
 * { "arg1", "arg2", NULL }
 *
 */
gchar **
parse_args(const char * const inp, int min, int max)
{
    if (inp == NULL) {
        return NULL;
    }

    // copy and strip input of leading/trailing whitepsace
    char *copy = strdup(inp);
    g_strstrip(copy);

    int inp_size = g_utf8_strlen(copy, -1);
    gboolean in_token = FALSE;
    gboolean in_quotes = FALSE;
    char *token_start = &copy[0];
    int token_size = 0;
    GSList *tokens = NULL;

    // add tokens to GSList
    int i;
    for (i = 0; i < inp_size; i++) {
        gchar *curr_ch = g_utf8_offset_to_pointer(copy, i);
        gunichar curr_uni = g_utf8_get_char(curr_ch);

        if (!in_token) {
            if (curr_uni  == ' ') {
                continue;
            } else {
                in_token = TRUE;
                if (curr_uni == '"') {
                    in_quotes = TRUE;
                    i++;
                    gchar *next_ch = g_utf8_next_char(curr_ch);
                    gunichar next_uni = g_utf8_get_char(next_ch);
                    token_start = next_ch;
                    token_size += g_unichar_to_utf8(next_uni, NULL);
                } else {
                    token_start = curr_ch;
                    token_size += g_unichar_to_utf8(curr_uni, NULL);
                }
            }
        } else {
            if (in_quotes) {
                if (curr_uni == '"') {
                    tokens = g_slist_append(tokens, g_strndup(token_start,
                        token_size));
                    token_size = 0;
                    in_token = FALSE;
                    in_quotes = FALSE;
                } else {
                    token_size += g_unichar_to_utf8(curr_uni, NULL);
                }
            } else {
                if (curr_uni == ' ') {
                    tokens = g_slist_append(tokens, g_strndup(token_start,
                        token_size));
                    token_size = 0;
                    in_token = FALSE;
                } else {
                    token_size += g_unichar_to_utf8(curr_uni, NULL);
                }
            }
        }
    }

    if (in_token) {
        tokens = g_slist_append(tokens, g_strndup(token_start, token_size));
    }

    int num = g_slist_length(tokens) - 1;

    // if num args not valid return NULL
    if ((num < min) || (num > max)) {
        g_slist_free_full(tokens, free);
        g_free(copy);
        return NULL;

    // if min allowed is 0 and 0 found, return empty char* array
    } else if (min == 0 && num == 0) {
        g_slist_free_full(tokens, free);
        gchar **args = malloc((num + 1) * sizeof(*args));
        args[0] = NULL;
        g_free(copy);
        return args;

    // otherwise return args array
    } else {
        gchar **args = malloc((num + 1) * sizeof(*args));
        GSList *token = tokens;
        token = g_slist_next(token);
        int arg_count = 0;

        while (token != NULL) {
            args[arg_count++] = strdup(token->data);
            token = g_slist_next(token);
        }

        args[arg_count] = NULL;
        g_slist_free_full(tokens, free);
        g_free(copy);

        return args;
    }
}

/*
 * Take a full line of input and return an array of strings representing
 * the arguments of a command.  This function handles when the last parameter
 * to the command is free text e.g.
 *
 * /msg user@host here is a message
 *
 * If the number of arguments found is less than min, or more than max
 * NULL is returned.
 *
 * inp - The line of input
 * min - The minimum allowed number of arguments
 * max - The maxmimum allowed number of arguments
 *
 * Returns - An NULL terminated array of strings representing the aguments
 * of the command, or NULL if the validation fails.
 *
 * E.g. the following input line:
 *
 * /cmd arg1 arg2 some free text
 *
 * Will return a pointer to the following array:
 *
 * { "arg1", "arg2", "some free text", NULL }
 *
 */
gchar **
parse_args_with_freetext(const char * const inp, int min, int max)
{
    if (inp == NULL) {
        return NULL;
    }

    // copy and strip input of leading/trailing whitepsace
    char *copy = strdup(inp);
    g_strstrip(copy);

    int inp_size = g_utf8_strlen(copy, -1);
    gboolean in_token = FALSE;
    gboolean in_freetext = FALSE;
    gboolean in_quotes = FALSE;
    char *token_start = &copy[0];
    int token_size = 0;
    int num_tokens = 0;
    GSList *tokens = NULL;

    // add tokens to GSList
    int i;
    for (i = 0; i < inp_size; i++) {
        gchar *curr_ch = g_utf8_offset_to_pointer(copy, i);
        gunichar curr_uni = g_utf8_get_char(curr_ch);

        if (!in_token) {
            if (curr_uni == ' ') {
                continue;
            } else {
                in_token = TRUE;
                num_tokens++;
                if ((num_tokens == max + 1) && (curr_uni != '"')) {
                    in_freetext = TRUE;
                } else if (curr_uni == '"') {
                    in_quotes = TRUE;
                    i++;
                    gchar *next_ch = g_utf8_next_char(curr_ch);
                    gunichar next_uni = g_utf8_get_char(next_ch);
                    token_start = next_ch;
                    token_size += g_unichar_to_utf8(next_uni, NULL);
                }
                if (curr_uni == '"') {
                    gchar *next_ch = g_utf8_next_char(curr_ch);
                    token_start = next_ch;
                } else {
                    token_start = curr_ch;
                    token_size += g_unichar_to_utf8(curr_uni, NULL);
                }
            }
        } else {
            if (in_quotes) {
                if (curr_uni == '"') {
                    tokens = g_slist_append(tokens, g_strndup(token_start,
                        token_size));
                    token_size = 0;
                    in_token = FALSE;
                    in_quotes = FALSE;
                } else {
                    if (curr_uni != '"') {
                        token_size += g_unichar_to_utf8(curr_uni, NULL);
                    }
                }
            } else {
                if (in_freetext) {
                    token_size += g_unichar_to_utf8(curr_uni, NULL);
                } else if (curr_uni == ' ') {
                    tokens = g_slist_append(tokens, g_strndup(token_start,
                        token_size));
                    token_size = 0;
                    in_token = FALSE;
                } else if (curr_uni != '"') {
                    token_size += g_unichar_to_utf8(curr_uni, NULL);
                }
            }
        }
    }

    free(copy);

    if (in_token) {
        tokens = g_slist_append(tokens, g_strndup(token_start, token_size));
    }

    int num = g_slist_length(tokens) - 1;

    // if num args not valid return NULL
    if ((num < min) || (num > max)) {
        g_slist_free_full(tokens, free);
        return NULL;

    // if min allowed is 0 and 0 found, return empty char* array
    } else if (min == 0 && num == 0) {
        gchar **args = malloc((num + 1) * sizeof(*args));
        args[0] = NULL;
        return args;

    // otherwise return args array
    } else {
        gchar **args = malloc((num + 1) * sizeof(*args));
        GSList *token = tokens;
        token = g_slist_next(token);
        int arg_count = 0;

        while (token != NULL) {
            args[arg_count++] = strdup(token->data);
            token = g_slist_next(token);
        }

        args[arg_count] = NULL;
        g_slist_free_full(tokens, free);

        return args;
    }
}

int
count_tokens(char *string)
{
    int length = g_utf8_strlen(string, -1);
    gboolean in_quotes = FALSE;
    int num_tokens = 0;
    int i = 0;

    // include first token
    num_tokens++;

    for (i = 0; i < length; i++) {
        gchar *curr_ch = g_utf8_offset_to_pointer(string, i);
        gunichar curr_uni = g_utf8_get_char(curr_ch);

        if (curr_uni == ' ') {
            if (!in_quotes) {
                num_tokens++;
            }
        } else if (curr_uni == '"') {
            if (in_quotes) {
                in_quotes = FALSE;
            } else {
                in_quotes = TRUE;
            }
        }
    }

    return num_tokens;
}

char *
get_start(char *string, int tokens)
{
    GString *result = g_string_new("");
    int length = g_utf8_strlen(string, -1);
    gboolean in_quotes = FALSE;
    char *result_str = NULL;
    int num_tokens = 0;
    int i = 0;

    // include first token
    num_tokens++;

    for (i = 0; i < length; i++) {
        gchar *curr_ch = g_utf8_offset_to_pointer(string, i);
        gunichar curr_uni = g_utf8_get_char(curr_ch);

        if (num_tokens < tokens) {
            gchar *uni_char = malloc(7);
            int len = g_unichar_to_utf8(curr_uni, uni_char);
            uni_char[len] = '\0';
            g_string_append(result, uni_char);
        }
        if (curr_uni == ' ') {
            if (!in_quotes) {
                num_tokens++;
            }
        } else if (curr_uni == '"') {
            if (in_quotes) {
                in_quotes = FALSE;
            } else {
                in_quotes = TRUE;
            }
        }
    }

    result_str = result->str;
    g_string_free(result, FALSE);

    return result_str;
}