From 9aa9ecd2d2c5048ac3057aeee28b1e42ce4be0d8 Mon Sep 17 00:00:00 2001 From: bptato Date: Sat, 15 Jul 2023 20:05:15 +0200 Subject: htmlparser: add parse error handling --- src/html/parseerror.nim | 70 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 src/html/parseerror.nim (limited to 'src/html/parseerror.nim') diff --git a/src/html/parseerror.nim b/src/html/parseerror.nim new file mode 100644 index 00000000..d99b2fed --- /dev/null +++ b/src/html/parseerror.nim @@ -0,0 +1,70 @@ +type ParseError* = enum + #TODO write a description for all error codes + ABRUPT_CLOSING_OF_EMPTY_COMMENT + ABRUPT_DOCTYPE_PUBLIC_IDENTIFIER + ABRUPT_DOCTYPE_SYSTEM_IDENTIFIER + ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE + CDATA_IN_HTML_CONTENT + CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE + CONTROL_CHARACTER_IN_INPUT_STREAM + CONTROL_CHARACTER_REFERENCE + END_TAG_WITH_ATTRIBUTES + DUPLICATE_ATTRIBUTE + END_TAG_WITH_TRAILING_SOLIDUS + EOF_BEFORE_TAG_NAME + EOF_IN_CDATA + EOF_IN_COMMENT + EOF_IN_DOCTYPE + EOF_IN_SCRIPT_HTML_COMMENT_LIKE_TEXT + EOF_IN_TAG + INCORRECTLY_CLOSED_COMMENT + INCORRECTLY_OPENED_COMMENT + INVALID_CHARACTER_SEQUENCE_AFTER_DOCTYPE_NAME + INVALID_FIRST_CHARACTER_OF_TAG_NAME + MISSING_ATTRIBUTE_VALUE + MISSING_DOCTYPE_NAME + MISSING_DOCTYPE_PUBLIC_IDENTIFIER + MISSING_DOCTYPE_SYSTEM_IDENTIFIER + MISSING_END_TAG_NAME + MISSING_QUOTE_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER + MISSING_QUOTE_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER + MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE + MISSING_WHITESPACE_AFTER_DOCTYPE_PUBLIC_KEYWORD + MISSING_WHITESPACE_AFTER_DOCTYPE_SYSTEM_KEYWORD + MISSING_WHITESPACE_BEFORE_DOCTYPE_NAME + MISSING_WHITESPACE_BETWEEN_ATTRIBUTES + MISSING_WHITESPACE_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS + NESTED_COMMENT + NONCHARACTER_CHARACTER_REFERENCE + NONCHARACTER_IN_INPUT_STREAM + NON_VOID_HTML_ELEMENT_START_TAG_WITH_TRAILING_SOLIDUS + NULL_CHARACTER_REFERENCE + SURROGATE_CHARACTER_REFERENCE + SURROGATE_IN_INPUT_STREAM + UNEXPECTED_CHARACTER_AFTER_DOCTYPE_SYSTEM_IDENTIFIER + UNEXPECTED_CHARACTER_IN_ATTRIBUTE_NAME + UNEXPECTED_CHARACTER_IN_UNQUOTED_ATTRIBUTE_VALUE + UNEXPECTED_EQUALS_SIGN_BEFORE_ATTRIBUTE_NAME + UNEXPECTED_NULL_CHARACTER + UNEXPECTED_QUESTION_MARK_INSTEAD_OF_TAG_NAME + UNEXPECTED_SOLIDUS_IN_TAG + UNKNOWN_NAMED_CHARACTER_REFERENCE + LAST_SPECIFIED_ERROR # never returned + # From here on, error code names have not been specified by the standard. + MISMATCHED_TAGS = "Mismatched start and end tags" + INVALID_DOCTYPE = "Unrecognized document type" + UNEXPECTED_DOCTYPE = "Unexpected document type" + UNEXPECTED_INITIAL_TOKEN = "Unexpected token in initial state" + UNEXPECTED_START_TAG = "Unexpected start tag" + UNEXPECTED_END_TAG = "Unexpected end tag" + ELEMENT_NOT_IN_OPEN_ELEMENTS = "Element has not been added to open elements" + ELEMENT_NOT_IN_SCOPE = "Element not in appropriate scope" + ELEMENT_NOT_CURRENT_NODE = "Element is not current node" + #TODO merge with UNEXPECTED_NULL_CHARACTER? + UNEXPECTED_NULL = "Unexpected null character" + NESTED_TAGS = "Non-nestable nested tags" + UNEXPECTED_SPECIAL_ELEMENT = "Unexpected special element on open elements" + UNEXPECTED_EOF = "Unexpected end of file" + INVALID_TEXT_PARENT = "Invalid parent element for text node" + NON_SPACE_TABLE_TEXT = "Non-space table text" + UNEXPECTED_AFTER_BODY_TOKEN = "Unexpected token after body" -- cgit 1.4.1-2-gfad0