1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
|
# new implementation of regular expressions
type
TRegexKind = enum
regNone,
regChar,
regSet,
regConc,
regAlt,
regStar,
regPlus,
regMN,
regNewline
TRegex = object of TObject
case kind: TRegexKind
of regChar: c: char
of regSet: s: ref set[char]
else: a, b: PRegEx
PRegEx* = ref TRegEx
TRegExFlag* = enum ## Flags concerning the semantics of regular expressions
reCaseInsensitive, ## case insensitive match
reStyleInsensitive ## style insensitive match
TRegExFlags* = set[TRegExFlag]
## Flags concerning the semantics of regular expressions
proc raiseRegex(msg: string) {.noreturn.} =
var e: ref Exception
new(e)
e.msg = msg
raise e
proc compileAux(i: int, s: string, r: PRegEx): int
proc compileBackslash(i: int, s: string, r: PRegEx): int =
var i = i
inc(i)
case s[i]
of 'A'..'Z':
of 'a'..'z':
of '0':
of '1'..'9':
else:
r.kind = regChar
r.c = s[i]
inc(i)
result = i
proc compileAtom(i: int, s: string, r: PRegEx): int =
var i = i
case s[i]
of '[':
inc(i)
var inverse = s[i] == '^'
if inverse: inc(i)
r.kind = regSet
new(r.s)
while true:
case s[i]
of '\\': i = compileBackslash(i, s, r)
of ']':
inc(i)
break
of '\0':
raiseRegex("']' expected")
elif s[i+1] == '-':
var x = s[i]
inc(i, 2)
var y = s[i]
inc(i)
r.s = r.s + {x..y}
else:
incl(r.s, s[i])
inc(i)
if inverse:
r.s = {'\0'..'\255'} - r.s
of '\\':
inc(i)
i = compileBackslash(i, s, r)
of '.':
r.kind = regAny
inc(i)
of '(':
inc(i)
i = compileAux(i, s, r)
if s[i] = ')': inc(i)
else: raiseRegex("')' expected")
of '\0': nil # do nothing
else:
r.kind = regChar
r.c = s[i]
inc(i)
result = i
proc compilePostfix(i: int, s: string, r: PRegEx): int =
var i = compileAtom(i, s, r)
var a: PRegEx
case s[i]
of '*':
of '+':
of '?':
else: nil
proc compileAux(i: int, s: string, r: PRegEx): int =
var i = i
i = compileAtom(i, s, r)
while s[i] != '\0':
result = i
proc compile*(regex: string, flags: TRegExFlags = {}): PRegEx =
## Compiles the string `regex` that represents a regular expression into
## an internal data structure that can be used for matching.
new(result)
var i = compileAux(0, regex, result)
if i < len(regex)-1:
# not all characters used for the regular expression?
raiseRegEx("invalid regular expression")
|