From 5c0332ff54b126bc1f305020348ff0895d51c54c Mon Sep 17 00:00:00 2001 From: elioat Date: Sun, 13 Oct 2024 18:23:43 -0400 Subject: * --- js/regex.js | 227 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 227 insertions(+) create mode 100644 js/regex.js diff --git a/js/regex.js b/js/regex.js new file mode 100644 index 0000000..8e314c4 --- /dev/null +++ b/js/regex.js @@ -0,0 +1,227 @@ +const tokenize = (pattern) => { + const tokens = []; + let i = 0; + + while (i < pattern.length) { + const char = pattern[i]; + + if (char === '.' || char === '*' || char === '(' || char === ')' || char === '|') { + tokens.push({ + type: char + }); + } else if (char === '\\') { // Handle escaped characters + i++; + tokens.push({ + type: 'literal', + value: pattern[i] + }); + } else if (char === '[') { // Handle character classes + let charClass = ''; + i++; + while (pattern[i] !== ']' && i < pattern.length) { + charClass += pattern[i]; + i++; + } + tokens.push({ + type: 'charClass', + value: charClass + }); + } else { + tokens.push({ + type: 'literal', + value: char + }); + } + i++; + } + + return tokens; +}; + + + +const parse = (tokens) => { + let i = 0; + + const parseExpression = () => { + const node = { + type: 'sequence', + elements: [] + }; + + while (i < tokens.length) { + const token = tokens[i]; + + if (token.type === 'literal') { + node.elements.push({ + type: 'literal', + value: token.value + }); + } else if (token.type === '*') { + const lastElement = node.elements.pop(); + node.elements.push({ + type: 'star', + element: lastElement + }); + } else if (token.type === '.') { + node.elements.push({ + type: 'dot' + }); + } else if (token.type === 'charClass') { + node.elements.push({ + type: 'charClass', + value: token.value + }); + } else if (token.type === '|') { + i++; + const right = parseExpression(); + return { + type: 'alternation', + left: node, + right + }; + } else if (token.type === '(') { + i++; + node.elements.push(parseExpression()); + } else if (token.type === ')') { + break; // End of grouping + } + + i++; + } + + return node; + }; + + return parseExpression(); +}; + + + +const match = (node, input) => { + if (node.type === 'literal') { + return input[0] === node.value ? input.slice(1) : null; + } + if (node.type === 'dot') { + return input.length > 0 ? input.slice(1) : null; + } + if (node.type === 'star') { + let remainder = input; + while (remainder !== null) { + const next = match(node.element, remainder); + if (next === null) { + break; + } + remainder = next; + } + return remainder; + } + if (node.type === 'charClass') { + return node.value.includes(input[0]) ? input.slice(1) : null; + } + if (node.type === 'alternation') { + const remainderLeft = match(node.left, input); + const remainderRight = match(node.right, input); + return remainderLeft !== null ? remainderLeft : remainderRight; + } + if (node.type === 'sequence') { + let remainder = input; + for (const element of node.elements) { + remainder = match(element, remainder); + if (remainder === null) { + return null; + } + } + return remainder; + } +}; + + + +const runTests = () => { + const tests = [{ + pattern: "a.b*c", + input: "abbbc", + expected: true + }, + { + pattern: "a.b*c", + input: "abc", + expected: true + }, + { + pattern: "a.b*c", + input: "ac", + expected: true + }, + { + pattern: "a.b*c", + input: "abbc", + expected: true + }, + { + pattern: "a.b*c", + input: "axbc", + expected: false + }, + + // Character class tests + { + pattern: "[abc]x", + input: "bx", + expected: true + }, + { + pattern: "[abc]x", + input: "dx", + expected: false + }, + + // Grouping and alternation tests + { + pattern: "(a|b)c", + input: "ac", + expected: true + }, + { + pattern: "(a|b)c", + input: "bc", + expected: true + }, + { + pattern: "(a|b)c", + input: "cc", + expected: false + }, + + // Escaped characters tests + { + pattern: "a\\.b", + input: "a.b", + expected: true + }, + { + pattern: "a\\.b", + input: "a-b", + expected: false + }, + { + pattern: "a\\*b", + input: "a*b", + expected: true + } + ]; + + tests.forEach(({ + pattern, + input, + expected + }, index) => { + const tokens = tokenize(pattern); + const ast = parse(tokens); + const result = match(ast, input) !== null; + console.log(`Test ${index + 1}: ${result === expected ? 'PASS' : 'FAIL'}`); + }); +}; + +runTests(); \ No newline at end of file -- cgit 1.4.1-2-gfad0