about summary refs log tree commit diff stats
path: root/js/scripting-lang/lexer.js
diff options
context:
space:
mode:
Diffstat (limited to 'js/scripting-lang/lexer.js')
-rw-r--r--js/scripting-lang/lexer.js64
1 files changed, 58 insertions, 6 deletions
diff --git a/js/scripting-lang/lexer.js b/js/scripting-lang/lexer.js
index c7c026f..4c50b6e 100644
--- a/js/scripting-lang/lexer.js
+++ b/js/scripting-lang/lexer.js
@@ -3,6 +3,21 @@
 
 /**
  * Token types for the language
+ * 
+ * @description Defines all token types used by the lexer and parser.
+ * Each token type represents a distinct syntactic element in the language.
+ * 
+ * The token types are organized into categories:
+ * - Literals: NUMBER, STRING, TRUE, FALSE
+ * - Operators: PLUS, MINUS, MULTIPLY, DIVIDE, MODULO, POWER, etc.
+ * - Keywords: WHEN, IS, THEN, FUNCTION, etc.
+ * - Punctuation: LEFT_PAREN, RIGHT_PAREN, SEMICOLON, COMMA, etc.
+ * - Special: IO_IN, IO_OUT, IO_ASSERT, FUNCTION_REF, FUNCTION_ARG
+ * 
+ * This enumeration provides a centralized definition of all possible
+ * token types, ensuring consistency between lexer and parser. The token
+ * types are designed to support the combinator-based architecture where
+ * all operations are translated to function calls.
  */
 export const TokenType = {
     NUMBER: 'NUMBER',
@@ -47,13 +62,39 @@ export const TokenType = {
     IO_IN: 'IO_IN',
     IO_OUT: 'IO_OUT',
     IO_ASSERT: 'IO_ASSERT',
-    FUNCTION_REF: 'FUNCTION_REF'
+    FUNCTION_REF: 'FUNCTION_REF',
+    FUNCTION_ARG: 'FUNCTION_ARG',
+    COMPOSE: 'COMPOSE'
 };
 
 /**
  * Converts source code into tokens
+ * 
  * @param {string} input - The source code to tokenize
- * @returns {Array} Array of tokens
+ * @returns {Array.<Object>} Array of token objects with type, value, line, and column
+ * @throws {Error} For unexpected characters or malformed tokens
+ * 
+ * @description The lexer performs lexical analysis by converting source code
+ * into a stream of tokens. Each token represents a meaningful unit of the
+ * language syntax, such as identifiers, literals, operators, and keywords.
+ * 
+ * The lexer implements a character-by-character scanning approach with
+ * lookahead for multi-character tokens. It maintains line and column
+ * information for accurate error reporting and debugging.
+ * 
+ * Key features:
+ * - Handles whitespace and comments (single-line and multi-line)
+ * - Recognizes all language constructs including operators, keywords, and literals
+ * - Supports string literals with escape sequences
+ * - Provides detailed position information for error reporting
+ * - Cross-platform compatibility (Node.js, Bun, browser)
+ * - Supports function composition with 'via' keyword
+ * - Handles function references with '@' operator
+ * 
+ * The lexer is designed to be robust and provide clear error messages
+ * for malformed input, making it easier to debug syntax errors in user code.
+ * It supports the combinator-based architecture by recognizing all operators
+ * and special tokens needed for function composition and application.
  */
 export function lexer(input) {
     const tokens = [];
@@ -135,11 +176,18 @@ export function lexer(input) {
             continue;
         }
         
-        // Function references (@function)
+        // Function references (@function) and function arguments (@(expression))
         if (char === '@') {
             current++; // Skip '@'
             column++;
             
+            // Check if this is @(expression) for function arguments
+            if (current < input.length && input[current] === '(') {
+                // This is @(expression) - mark as function argument
+                tokens.push({ type: TokenType.FUNCTION_ARG, line, column: column - 1 });
+                continue;
+            }
+            
             // Read the function name
             let functionName = '';
             while (current < input.length && /[a-zA-Z0-9_]/.test(input[current])) {
@@ -216,6 +264,12 @@ export function lexer(input) {
                 case 'function':
                     tokens.push({ type: TokenType.FUNCTION, line, column: startColumn });
                     break;
+                case 'via':
+                    tokens.push({ type: TokenType.COMPOSE, line, column: startColumn });
+                    break;
+                case '_':
+                    tokens.push({ type: TokenType.WILDCARD, line, column: startColumn });
+                    break;
                 default:
                     tokens.push({ type: TokenType.IDENTIFIER, value, line, column: startColumn });
             }
@@ -311,9 +365,7 @@ export function lexer(input) {
             case ':':
                 tokens.push({ type: TokenType.ASSIGNMENT, line, column });
                 break;
-            case '_':
-                tokens.push({ type: TokenType.WILDCARD, line, column });
-                break;
+
             case '=':
                 if (input[current + 1] === '=') {
                     tokens.push({ type: TokenType.EQUALS, line, column });