1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
|
#
#
# Nimrod's Runtime Library
# (c) Copyright 2009 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
## Regular expression support for Nimrod.
## Currently this module is implemented by providing a wrapper around the
## `PRCE (Perl-Compatible Regular Expressions) <http://www.pcre.org>`_
## C library. This means that your application will depend on the PRCE
## library's licence when using this module, which should not be a problem
## though.
## PRCE's licence follows:
##
## .. include:: ../doc/regexprs.txt
##
# This is not just a convenient wrapper for the pcre library; the
# API will stay the same if the implementation should change.
import
pcre, strutils
type
EInvalidRegEx* = object of EInvalidValue
## is raised if the pattern is no valid regular expression.
const
MaxSubpatterns* = 10
## defines the maximum number of subpatterns that can be captured.
## More subpatterns cannot be captured!
proc match*(s, pattern: string, matches: var openarray[string],
start: int = 0): bool
## returns ``true`` if ``s[start..]`` matches the ``pattern`` and
## the captured substrings in the array ``matches``. If it does not
## match, nothing is written into ``matches`` and ``false`` is
## returned.
proc match*(s, pattern: string, start: int = 0): bool
## returns ``true`` if ``s`` matches the ``pattern`` beginning from ``start``.
proc matchLen*(s, pattern: string, matches: var openarray[string],
start: int = 0): int
## the same as ``match``, but it returns the length of the match,
## if there is no match, -1 is returned. Note that a match length
## of zero can happen.
proc find*(s, pattern: string, matches: var openarray[string],
start: int = 0): bool
## returns ``true`` if ``pattern`` occurs in ``s`` and the captured
## substrings in the array ``matches``. If it does not match, nothing
## is written into ``matches``.
proc find*(s, pattern: string, start: int = 0): bool
## returns ``true`` if ``pattern`` occurs in ``s``.
proc rawCompile(pattern: string, flags: cint): PPcre =
var
msg: CString
offset: int
com = pcreCompile(pattern, flags, addr(msg), addr(offset), nil)
if com == nil:
var e: ref EInvalidRegEx
new(e)
e.msg = $msg & "\n" & pattern & "\n" & repeatChar(offset) & "^\n"
raise e
return com
proc matchOrFind(s: string, pattern: PPcre, matches: var openarray[string],
start: cint): cint =
var
rawMatches: array [0..maxSubpatterns * 3 - 1, cint]
res = int(pcreExec(pattern, nil, s, len(s), start, 0,
cast[ptr cint](addr(rawMatches)), maxSubpatterns * 3))
dealloc(pattern)
if res < 0: return res
for i in 0..res-1:
var
a = rawMatches[i * 2]
b = rawMatches[i * 2 + 1]
if a >= 0'i32: matches[i] = copy(s, a, int(b)-1)
else: matches[i] = ""
return res
proc matchOrFind(s: string, pattern: PPcre, start: cint): cint =
var
rawMatches: array [0..maxSubpatterns * 3 - 1, cint]
res = pcreExec(pattern, nil, s, len(s), start, 0,
cast[ptr cint](addr(rawMatches)), maxSubpatterns * 3)
dealloc(pattern)
return res
proc match(s, pattern: string, matches: var openarray[string],
start: int = 0): bool =
return matchOrFind(s, rawCompile(pattern, PCRE_ANCHORED),
matches, start) >= 0'i32
proc matchLen(s, pattern: string, matches: var openarray[string],
start: int = 0): int =
return matchOrFind(s, rawCompile(pattern, PCRE_ANCHORED), matches, start)
proc find(s, pattern: string, matches: var openarray[string],
start: int = 0): bool =
return matchOrFind(s, rawCompile(pattern, PCRE_MULTILINE),
matches, start) >= 0'i32
proc match(s, pattern: string, start: int = 0): bool =
return matchOrFind(s, rawCompile(pattern, PCRE_ANCHORED), start) >= 0'i32
proc find(s, pattern: string, start: int = 0): bool =
return matchOrFind(s, rawCompile(pattern, PCRE_MULTILINE), start) >= 0'i32
template `=~` *(s, pattern: expr): expr =
## This calls ``match`` with an implicit declared ``matches`` array that
## can be used in the scope of the ``=~`` call:
##
## .. code-block:: nimrod
##
## if line =~ r"\s*(\w+)\s*\=\s*(\w+)":
## # matches a key=value pair:
## echo("Key: ", matches[1])
## echo("Value: ", matches[2])
## elif line =~ r"\s*(\#.*)":
## # matches a comment
## # note that the implicit ``matches`` array is different from the
## # ``matches`` array of the first branch
## echo("comment: ", matches[1])
## else:
## echo("syntax error")
##
var matches: array[0..maxSubPatterns-1, string]
match(s, pattern, matches)
const ## common regular expressions
reIdentifier* = r"\b[a-zA-Z_]+[a-zA-Z_0-9]*\b" ## describes an identifier
reNatural* = r"\b\d+\b" ## describes a natural number
reInteger* = r"\b[-+]?\d+\b" ## describes an integer
reHex* = r"\b0[xX][0-9a-fA-F]+\b" ## describes a hexadecimal number
reBinary* = r"\b0[bB][01]+\b" ## describes a binary number (example: 0b11101)
reOctal* = r"\b0[oO][0-7]+\b" ## describes an octal number (example: 0o777)
reFloat* = r"\b[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?\b"
## describes a floating point number
reEmail* = r"\b[a-zA-Z0-9!#$%&'*+/=?^_`{|}~\-]+(?:\.[a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+)" &
r"*@(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?\.)+(?:[a-zA-Z]{2}|com|org|" &
r"net|gov|mil|biz|info|mobi|name|aero|jobs|museum)\b"
## describes a common email address
reURL* = r"\b(http(s)?|ftp|gopher|telnet|file|notes|ms\-help):" &
r"((//)|(\\\\))+[\w\d:#@%/;$()~_?\+\-\=\\\.\&]*\b"
## describes an URL
|