// lexer.js

import { LexError, ErrorHelpers } from '../core/error.js';

const tokenTypes = {
  IDENTIFIER: 'IDENTIFIER',
  TYPE: 'TYPE',
  NUMBER: 'NUMBER',
  STRING: 'STRING',
  ARROW: 'ARROW',
  COLON: 'COLON',
  SEMICOLON: 'SEMICOLON',
  COMMA: 'COMMA',
  KEYWORD: 'KEYWORD',
  OPERATOR: 'OPERATOR',
  LPAREN: 'LPAREN',
  RPAREN: 'RPAREN',
  DOT: 'DOT',
  LBRACKET: 'LBRACKET',
  RBRACKET: 'RBRACKET',
  LBRACE: 'LBRACE',
  RBRACE: 'RBRACE',
  EOF: 'EOF',
};

const keywords = ['when', 'is', 'then', 'if', 'Ok', 'Err', 'true', 'false', 'PI', 'INFINITY', 'and', 'or', 'xor'];

function createLexer(input) {
  let position = 0;
  let line = 1;
  let column = 1;

  function isWhitespace(char) {
    return /\s/.test(char);
  }

  function isDigit(char) {
    return /\d/.test(char);
  }

  function isLetter(char) {
    return /[a-zA-Z_0-9]/.test(char);
  }

  function readWhile(predicate) {
    let str = '';
    while (position < input.length && predicate(input[position])) {
      str += input[position];
      position++;
      column++;
    }
    return str;
  }

  function readString() {
    let str = '';
    const startLine = line;
    const startColumn = column;
    
    position++; // Skip the opening quote
    column++;
    
    while (position < input.length && input[position] !== '"') {
      const char = input[position];
      
      // Handle newlines in strings
      if (char === '\n') {
        line++;
        column = 1;
      } else {
        column++;
      }
      
      // Handle escape sequences
      if (char === '\\' && position + 1 < input.length) {
        const nextChar = input[position + 1];
        switch (nextChar) {
          case 'n':
            str += '\n';
            position += 2;
            column++;
            break;
          case 't':
            str += '\t';
            position += 2;
            column++;
            break;
          case 'r':
            str += '\r';
            position += 2;
            column++;
            break;
          case '\\':
            str += '\\';
            position += 2;
            column++;
            break;
          case '"':
            str += '"';
            position += 2;
            column++;
            break;
          default:
            str += char;
            position++;
        }
      } else {
        str += char;
        position++;
      }
    }
    
    // Check for unterminated string
    if (position >= input.length) {
      throw new LexError(
        'Unterminated string literal',
        { line: startLine, column: startColumn, length: str.length + 1 },
        input,
        [
          'Add closing quote " at the end of the string',
          'Check for unescaped quotes inside the string',
          'Use \\" to include quotes in strings'
        ]
      );
    }
    
    position++; // Skip the closing quote
    column++;
    return { type: tokenTypes.STRING, value: str, line: startLine, column: startColumn };
  }

  function readNumber() {
    let value = readWhile(isDigit);
    let isFloat = false;
    if (peekChar() === '.') {
      position++;
      column++;
      value += '.' + readWhile(isDigit);
      isFloat = true;
    }
    
    const numericValue = isFloat ? parseFloat(value) : parseInt(value, 10);
    return { 
      type: tokenTypes.NUMBER, 
      value: numericValue, 
      isFloat: isFloat,
      originalString: value,
      line, 
      column 
    };
  }

  function peekChar() {
    return input[position];
  }

  function shouldBeNegativeLiteral() {
    // Look at the previous non-whitespace token to decide
    let prevPos = position - 1;
    while (prevPos >= 0 && isWhitespace(input[prevPos])) {
      prevPos--;
    }
    
    if (prevPos < 0) {
      // At start of input - should be negative literal
      return true;
    }
    
    const prevChar = input[prevPos];
    
    // After opening parenthesis, comma, or operators - should be negative literal
    if (prevChar === '(' || prevChar === ',' || prevChar === '+' || 
        prevChar === '*' || prevChar === '/' || prevChar === '%' ||
        prevChar === '=' || prevChar === '>' || prevChar === '<' ||
        prevChar === ':' || prevChar === ';') {
      return true;
    }
    
    // After closing parenthesis - should be binary minus
    if (prevChar === ')') {
      return false;
    }
    
    // After numbers - this is tricky. In most cases it should be binary minus,
    // but in function call contexts it might be a negative literal.
    // Let's look ahead to see if this is likely a function call context.
    if (isDigit(prevChar)) {
      // Look ahead to see if we're in a function call context
      // If we see whitespace followed by another minus, it's probably a negative literal
      let lookAheadPos = position + 1;
      while (lookAheadPos < input.length && isWhitespace(input[lookAheadPos])) {
        lookAheadPos++;
      }
      if (lookAheadPos < input.length && input[lookAheadPos] === '-') {
        // This looks like a function call with consecutive negative arguments
        return true;
      }
      return false; // Default to binary minus
    }
    
    // After identifiers - could be either, but in most contexts it's a negative literal
    // (function calls, variable declarations, etc.)
    if (isLetter(prevChar)) {
      return true;
    }
    
    // Default to negative literal
    return true;
  }

  function readNegativeNumber() {
    // Consume the minus sign
    position++;
    column++;
    
    // Read the number part
    let value = '-' + readWhile(isDigit);
    let isFloat = false;
    
    if (peekChar() === '.') {
      position++;
      column++;
      value += '.' + readWhile(isDigit);
      isFloat = true;
    }
    
    const numericValue = isFloat ? parseFloat(value) : parseInt(value, 10);
    return { 
      type: tokenTypes.NUMBER, 
      value: numericValue, 
      isFloat: isFloat,
      originalString: value,
      line, 
      column 
    };
  }

  function nextToken() {
    if (position >= input.length) {
      return { type: tokenTypes.EOF, line, column };
    }

    let char = input[position];

    if (isWhitespace(char)) {
      if (char === '\n') {
        line++;
        column = 1;
      } else {
        column++;
      }
      position++;
      return nextToken();
    }

    if (char === '/' && input[position + 1] === '/') {
      while (position < input.length && input[position] !== '\n') {
        position++;
        column++;
      }
      return nextToken(); // Skip the comment and get the next real token
    }

    if (char === '(') {
      position++;
      column++;
      return { type: tokenTypes.LPAREN, value: '(', line, column };
    }

    if (char === ')') {
      position++;
      column++;
      return { type: tokenTypes.RPAREN, value: ')', line, column };
    }

    if (char === '[') {
      position++;
      column++;
      return { type: tokenTypes.LBRACKET, value: '[', line, column };
    }

    if (char === ']') {
      position++;
      column++;
      return { type: tokenTypes.RBRACKET, value: ']', line, column };
    }

    if (char === '{') {
      position++;
      column++;
      return { type: tokenTypes.LBRACE, value: '{', line, column };
    }

    if (char === '}') {
      position++;
      column++;
      return { type: tokenTypes.RBRACE, value: '}', line, column };
    }

    // Handle double dot operator for string concatenation (must come before single dot)
    if (char === '.' && input[position + 1] === '.') {
      position += 2;
      column += 2;
      return { type: tokenTypes.OPERATOR, value: '..', line, column };
    }

    if (char === '.') {
      position++;
      column++;
      return { type: tokenTypes.DOT, value: '.', line, column };
    }

    // Handle negative numbers based on context
    if (char === '-' && position + 1 < input.length && isDigit(input[position + 1])) {
      // Check if this should be a negative literal vs binary minus
      if (shouldBeNegativeLiteral()) {
        return readNegativeNumber();
      }
    }

    if (isDigit(char)) {
      return readNumber();
    }

    if (isLetter(char)) {
      const value = readWhile(isLetter);
      if (['Int', 'String', 'Result', 'Float', 'Number', 'List', 'Table', 'Bool'].includes(value)) {
        return { type: tokenTypes.TYPE, value, line, column };
      }
      if (keywords.includes(value)) {
        return { type: tokenTypes.KEYWORD, value, line, column };
      }
      return { type: tokenTypes.IDENTIFIER, value, line, column };
    }

    if (char === '"') {
      return readString();
    }

    if (char === ':') {
      position++;
      column++;
      return { type: tokenTypes.COLON, value: ':', line, column };
    }
    
    if (char === '-' && input[position + 1] === '>') {
      position += 2;
      column += 2;
      return { type: tokenTypes.ARROW, value: '->', line, column };
    }

    if (char === ';') {
      position++;
      column++;
      return { type: tokenTypes.SEMICOLON, value: ';', line, column };
    }
    
    // Handle >= and <=
    if (char === '>' && input[position + 1] === '=') {
      position += 2;
      column += 2;
      return { type: tokenTypes.OPERATOR, value: '>=', line, column };
    }
    if (char === '<' && input[position + 1] === '=') {
      position += 2;
      column += 2;
      return { type: tokenTypes.OPERATOR, value: '<=', line, column };
    }
    
    // Handle != (not equal)
    if (char === '!' && input[position + 1] === '=') {
      position += 2;
      column += 2;
      return { type: tokenTypes.OPERATOR, value: '!=', line, column };
    }
    
    if (char === ',') {
        position++;
        column++;
        return { type: tokenTypes.COMMA, value: ',', line, column };
    }
    
    if (['+', '-', '*', '/', '=', '>', '<', '%'].includes(char)) {
        position++;
        column++;
        return { type: tokenTypes.OPERATOR, value: char, line, column };
    }

    const suggestions = [];
    
    // Common character mistakes
    if (char === '"' || char === '"') {
      suggestions.push('Use straight quotes " instead of curly quotes');
    } else if (char === '–' || char === '—') {
      suggestions.push('Use regular minus - or arrow -> instead of em/en dash');
    } else if (/[^\x00-\x7F]/.test(char)) {
      suggestions.push('Use only ASCII characters in Baba Yaga code');
    } else {
      suggestions.push(`Character "${char}" is not valid in Baba Yaga syntax`);
    }
    
    throw new LexError(
      `Unexpected character: ${JSON.stringify(char)}`,
      { line, column, length: 1 },
      input,
      suggestions
    );
  }

  function allTokens() {
    const tokens = [];
    let token;
    do {
      token = nextToken();
      tokens.push(token);
    } while (token.type !== tokenTypes.EOF);
    return tokens;
  }

  return {
    allTokens,
  };
}

export { createLexer, tokenTypes };