about summary refs log blame commit diff stats
path: root/js/regex.js
blob: 86acf212a267244a940eb073e2192059cf253654 (plain) (tree)
1
2
3
4
5
6
7
8
9








                                                                                           

                           































                                                                
 


                           
                                           





























                                                        
                                                        






                                            
                                                              
                                            
                                           







                    
                                     



  
                                            








                                                               
                                                                










                                                                     

                                                                




                                                                       
                                                          









                                     










                                                    









































































                                         
                                                            

                                         

                                                           



          
const tokenize = (pattern) => {
    const tokens = [];
    let i = 0;

    while (i < pattern.length) {
        const char = pattern[i];

        if (char === '.' || char === '*' || char === '(' || char === ')' || char === '|') {
            tokens.push({
                type: char,
                value: char
            });
        } else if (char === '\\') { // Handle escaped characters
            i++;
            tokens.push({
                type: 'literal',
                value: pattern[i]
            });
        } else if (char === '[') { // Handle character classes
            let charClass = '';
            i++;
            while (pattern[i] !== ']' && i < pattern.length) {
                charClass += pattern[i];
                i++;
            }
            tokens.push({
                type: 'charClass',
                value: charClass
            });
        } else {
            tokens.push({
                type: 'literal',
                value: char
            });
        }
        i++;
    }

    return tokens;
};




const parse = (tokens) => {
    let i = 0;

    const parseSequenceExpression = () => {
        const node = {
            type: 'sequence',
            elements: []
        };

        while (i < tokens.length) {
            const token = tokens[i];

            if (token.type === 'literal') {
                node.elements.push({
                    type: 'literal',
                    value: token.value
                });
            } else if (token.type === '*') {
                const lastElement = node.elements.pop();
                node.elements.push({
                    type: 'star',
                    element: lastElement
                });
            } else if (token.type === '.') {
                node.elements.push({
                    type: 'dot'
                });
            } else if (token.type === 'charClass') {
                node.elements.push({
                    type: 'charClass',
                    value: token.value
                });
            } else if (token.type === '|') {
                i++;
                const right = parseSequenceExpression();
                return {
                    type: 'alternation',
                    left: node,
                    right
                };
            } else if (token.type === '(') {
                i++;
                node.elements.push(parseSequenceExpression());
            } else if (token.type === ')') {
                break; // End of a grouping
            }

            i++;
        }

        return node;
    };

    return parseSequenceExpression();
};



const evaluateMatch = (node) => (input) => {
    if (node.type === 'literal') {
        return input[0] === node.value ? input.slice(1) : null;
    }
    if (node.type === 'dot') {
        return input.length > 0 ? input.slice(1) : null;
    }
    if (node.type === 'star') {
        let remainder = input;
        while (remainder !== null) {
            const next = evaluateMatch(node.element)(remainder);
            if (next === null) {
                break;
            }
            remainder = next;
        }
        return remainder;
    }
    if (node.type === 'charClass') {
        return node.value.includes(input[0]) ? input.slice(1) : null;
    }
    if (node.type === 'alternation') {
        const remainderLeft = evaluateMatch(node.left)(input);
        const remainderRight = evaluateMatch(node.right)(input);
        return remainderLeft !== null ? remainderLeft : remainderRight;
    }
    if (node.type === 'sequence') {
        let remainder = input;
        for (const element of node.elements) {
            remainder = evaluateMatch(element)(remainder);
            if (remainder === null) {
                return null;
            }
        }
        return remainder;
    }
};




const assertEqual = (expected, actual, message) => {
    if (expected !== actual) {
        console.error(`FAIL: ${message}`);
    } else {
        console.log(`PASS: ${message}`);
    }
};



const runTests = () => {
    const tests = [{
            pattern: "a.b*c",
            input: "abbbc",
            expected: true
        },
        {
            pattern: "a.b*c",
            input: "abc",
            expected: true
        },
        {
            pattern: "a.b*c",
            input: "ac",
            expected: true
        },
        {
            pattern: "a.b*c",
            input: "abbc",
            expected: true
        },
        {
            pattern: "a.b*c",
            input: "axbc",
            expected: false
        },

        // Character class tests
        {
            pattern: "[abc]x",
            input: "bx",
            expected: true
        },
        {
            pattern: "[abc]x",
            input: "dx",
            expected: false
        },

        // Grouping and alternation tests
        {
            pattern: "(a|b)c",
            input: "ac",
            expected: true
        },
        {
            pattern: "(a|b)c",
            input: "bc",
            expected: true
        },
        {
            pattern: "(a|b)c",
            input: "cc",
            expected: false
        },

        // Escaped characters tests
        {
            pattern: "a\\.b",
            input: "a.b",
            expected: true
        },
        {
            pattern: "a\\.b",
            input: "a-b",
            expected: false
        },
        {
            pattern: "a\\*b",
            input: "a*b",
            expected: true
        }
    ];

    tests.forEach(({ pattern, input, expected }, index) => {
        const tokens = tokenize(pattern);
        const ast = parse(tokens);
        const result = evaluateMatch(ast)(input) !== null;
        assertEqual(expected, result, `Test ${index + 1}`);
    });
};

runTests();