diff options
Diffstat (limited to 'js/regex.js')
-rw-r--r-- | js/regex.js | 236 |
1 files changed, 236 insertions, 0 deletions
diff --git a/js/regex.js b/js/regex.js new file mode 100644 index 0000000..86acf21 --- /dev/null +++ b/js/regex.js @@ -0,0 +1,236 @@ +const tokenize = (pattern) => { + const tokens = []; + let i = 0; + + while (i < pattern.length) { + const char = pattern[i]; + + if (char === '.' || char === '*' || char === '(' || char === ')' || char === '|') { + tokens.push({ + type: char, + value: char + }); + } else if (char === '\\') { // Handle escaped characters + i++; + tokens.push({ + type: 'literal', + value: pattern[i] + }); + } else if (char === '[') { // Handle character classes + let charClass = ''; + i++; + while (pattern[i] !== ']' && i < pattern.length) { + charClass += pattern[i]; + i++; + } + tokens.push({ + type: 'charClass', + value: charClass + }); + } else { + tokens.push({ + type: 'literal', + value: char + }); + } + i++; + } + + return tokens; +}; + + + + +const parse = (tokens) => { + let i = 0; + + const parseSequenceExpression = () => { + const node = { + type: 'sequence', + elements: [] + }; + + while (i < tokens.length) { + const token = tokens[i]; + + if (token.type === 'literal') { + node.elements.push({ + type: 'literal', + value: token.value + }); + } else if (token.type === '*') { + const lastElement = node.elements.pop(); + node.elements.push({ + type: 'star', + element: lastElement + }); + } else if (token.type === '.') { + node.elements.push({ + type: 'dot' + }); + } else if (token.type === 'charClass') { + node.elements.push({ + type: 'charClass', + value: token.value + }); + } else if (token.type === '|') { + i++; + const right = parseSequenceExpression(); + return { + type: 'alternation', + left: node, + right + }; + } else if (token.type === '(') { + i++; + node.elements.push(parseSequenceExpression()); + } else if (token.type === ')') { + break; // End of a grouping + } + + i++; + } + + return node; + }; + + return parseSequenceExpression(); +}; + + + +const evaluateMatch = (node) => (input) => { + if (node.type === 'literal') { + return input[0] === node.value ? input.slice(1) : null; + } + if (node.type === 'dot') { + return input.length > 0 ? input.slice(1) : null; + } + if (node.type === 'star') { + let remainder = input; + while (remainder !== null) { + const next = evaluateMatch(node.element)(remainder); + if (next === null) { + break; + } + remainder = next; + } + return remainder; + } + if (node.type === 'charClass') { + return node.value.includes(input[0]) ? input.slice(1) : null; + } + if (node.type === 'alternation') { + const remainderLeft = evaluateMatch(node.left)(input); + const remainderRight = evaluateMatch(node.right)(input); + return remainderLeft !== null ? remainderLeft : remainderRight; + } + if (node.type === 'sequence') { + let remainder = input; + for (const element of node.elements) { + remainder = evaluateMatch(element)(remainder); + if (remainder === null) { + return null; + } + } + return remainder; + } +}; + + + + +const assertEqual = (expected, actual, message) => { + if (expected !== actual) { + console.error(`FAIL: ${message}`); + } else { + console.log(`PASS: ${message}`); + } +}; + + + +const runTests = () => { + const tests = [{ + pattern: "a.b*c", + input: "abbbc", + expected: true + }, + { + pattern: "a.b*c", + input: "abc", + expected: true + }, + { + pattern: "a.b*c", + input: "ac", + expected: true + }, + { + pattern: "a.b*c", + input: "abbc", + expected: true + }, + { + pattern: "a.b*c", + input: "axbc", + expected: false + }, + + // Character class tests + { + pattern: "[abc]x", + input: "bx", + expected: true + }, + { + pattern: "[abc]x", + input: "dx", + expected: false + }, + + // Grouping and alternation tests + { + pattern: "(a|b)c", + input: "ac", + expected: true + }, + { + pattern: "(a|b)c", + input: "bc", + expected: true + }, + { + pattern: "(a|b)c", + input: "cc", + expected: false + }, + + // Escaped characters tests + { + pattern: "a\\.b", + input: "a.b", + expected: true + }, + { + pattern: "a\\.b", + input: "a-b", + expected: false + }, + { + pattern: "a\\*b", + input: "a*b", + expected: true + } + ]; + + tests.forEach(({ pattern, input, expected }, index) => { + const tokens = tokenize(pattern); + const ast = parse(tokens); + const result = evaluateMatch(ast)(input) !== null; + assertEqual(expected, result, `Test ${index + 1}`); + }); +}; + +runTests(); \ No newline at end of file |