summary refs log tree commit diff stats
path: root/lib/wrappers/tre.nim
blob: 36bf3cb69fb4e685604ee3b7397623546cde67e6 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
#
#  tre.h - TRE public API definitions
#
#  This software is released under a BSD-style license.
#  See the file LICENSE for details and copyright.
#
#

when not defined(treDll):
  when hostOS == "windows":
    const treDll = "tre.dll"
  elif hostOS == "macosx":
    const treDll = "libtre.dylib"
  else:
    const treDll = "libtre.so(.5|)"

const 
  APPROX* = 1 ## approximate matching functionality
  MULTIBYTE* = 1 ## multibyte character set support. 
  VERSION* = "0.8.0" ## TRE version string. 
  VERSION_1* = 0 ## TRE version level 1. 
  VERSION_2* = 8 ## TRE version level 2. 
  VERSION_3* = 0 ## TRE version level 3. 


# If the we're not using system regex.h, we need to define the
#   structs and enums ourselves. 

type 
  Regoff* = cint
  Regex*{.pure, final.} = object 
    re_nsub*: int          ## Number of parenthesized subexpressions. 
    value*: pointer        ## For internal use only. 
  
  Regmatch*{.pure, final.} = object 
    rm_so*: Regoff
    rm_eo*: Regoff

  Reg_errcode*{.size: 4.} = enum  ## POSIX tre_regcomp() return error codes. 
                                   ## (In the order listed in the standard.)	 
    REG_OK = 0,               ## No error. 
    REG_NOMATCH,              ## No match. 
    REG_BADPAT,               ## Invalid regexp. 
    REG_ECOLLATE,             ## Unknown collating element. 
    REG_ECTYPE,               ## Unknown character class name. 
    REG_EESCAPE,              ## Trailing backslash. 
    REG_ESUBREG,              ## Invalid back reference. 
    REG_EBRACK,               ## "[]" imbalance 
    REG_EPAREN,               ## "\(\)" or "()" imbalance 
    REG_EBRACE,               ## "\{\}" or "{}" imbalance 
    REG_BADBR,                ## Invalid content of {} 
    REG_ERANGE,               ## Invalid use of range operator 
    REG_ESPACE,               ## Out of memory.  
    REG_BADRPT                ## Invalid use of repetition operators. 
{.deprecated: [TRegoff: Regoff, TRegex: Regex, TRegmatch: Regmatch,
              TReg_errcode: Reg_errcode].}

# POSIX tre_regcomp() flags. 

const 
  REG_EXTENDED* = 1
  REG_ICASE* = (REG_EXTENDED shl 1)
  REG_NEWLINE* = (REG_ICASE shl 1)
  REG_NOSUB* = (REG_NEWLINE shl 1)

# Extra tre_regcomp() flags. 

const 
  REG_BASIC* = 0
  REG_LITERAL* = (REG_NOSUB shl 1)
  REG_RIGHT_ASSOC* = (REG_LITERAL shl 1)
  REG_UNGREEDY* = (REG_RIGHT_ASSOC shl 1)

# POSIX tre_regexec() flags. 

const 
  REG_NOTBOL* = 1
  REG_NOTEOL* = (REG_NOTBOL shl 1)

# Extra tre_regexec() flags. 

const 
  REG_APPROX_MATCHER* = (REG_NOTEOL shl 1)
  REG_BACKTRACKING_MATCHER* = (REG_APPROX_MATCHER shl 1)

# The maximum number of iterations in a bound expression. 

const 
  RE_DUP_MAX* = 255

# The POSIX.2 regexp functions 

proc regcomp*(preg: var Regex, regex: cstring, cflags: cint): cint{.cdecl, 
    importc: "tre_regcomp", dynlib: treDll.}
proc regexec*(preg: var Regex, string: cstring, nmatch: int, 
              pmatch: ptr Regmatch, eflags: cint): cint{.cdecl, 
    importc: "tre_regexec", dynlib: treDll.}
proc regerror*(errcode: cint, preg: var Regex, errbuf: cstring, 
               errbuf_size: int): int{.cdecl, importc: "tre_regerror", 
    dynlib: treDll.}
proc regfree*(preg: var Regex){.cdecl, importc: "tre_regfree", dynlib: treDll.}
# Versions with a maximum length argument and therefore the capability to
#   handle null characters in the middle of the strings (not in POSIX.2). 

proc regncomp*(preg: var Regex, regex: cstring, len: int, cflags: cint): cint{.
    cdecl, importc: "tre_regncomp", dynlib: treDll.}
proc regnexec*(preg: var Regex, string: cstring, len: int, nmatch: int, 
               pmatch: ptr Regmatch, eflags: cint): cint{.cdecl, 
    importc: "tre_regnexec", dynlib: treDll.}
# Approximate matching parameter struct. 

type 
  TRegaparams*{.pure, final.} = object 
    cost_ins*: cint           ## Default cost of an inserted character. 
    cost_del*: cint           ## Default cost of a deleted character. 
    cost_subst*: cint         ## Default cost of a substituted character. 
    max_cost*: cint           ## Maximum allowed cost of a match. 
    max_ins*: cint            ## Maximum allowed number of inserts. 
    max_del*: cint            ## Maximum allowed number of deletes. 
    max_subst*: cint          ## Maximum allowed number of substitutes. 
    max_err*: cint            ## Maximum allowed number of errors total. 
  

# Approximate matching result struct. 

type 
  TRegamatch*{.pure, final.} = object 
    nmatch*: int              ## Length of pmatch[] array. 
    pmatch*: ptr Regmatch     ## Submatch data. 
    cost*: cint               ## Cost of the match. 
    num_ins*: cint            ## Number of inserts in the match. 
    num_del*: cint            ## Number of deletes in the match. 
    num_subst*: cint          ## Number of substitutes in the match. 
  

# Approximate matching functions. 

proc regaexec*(preg: var Regex, string: cstring, match: ptr TRegamatch, 
               params: TRegaparams, eflags: cint): cint{.cdecl, 
    importc: "tre_regaexec", dynlib: treDll.}
proc reganexec*(preg: var Regex, string: cstring, len: int, 
                match: ptr TRegamatch, params: TRegaparams, 
                eflags: cint): cint{.
    cdecl, importc: "tre_reganexec", dynlib: treDll.}
# Sets the parameters to default values. 

proc regaparams_default*(params: ptr TRegaparams){.cdecl, 
    importc: "tre_regaparams_default", dynlib: treDll.}

type 
  TStrSource*{.pure, final.} = object 
    get_next_char*: proc (c: cstring, pos_add: ptr cint, 
                          context: pointer): cint{.cdecl.}
    rewind*: proc (pos: int, context: pointer){.cdecl.}
    compare*: proc (pos1: int, pos2: int, len: int, context: pointer): cint{.
        cdecl.}
    context*: pointer


proc reguexec*(preg: var Regex, string: ptr TStrSource, nmatch: int, 
               pmatch: ptr Regmatch, eflags: cint): cint{.cdecl, 
    importc: "tre_reguexec", dynlib: treDll.}

proc runtimeVersion*(): cstring{.cdecl, importc: "tre_version", dynlib: treDll.}
  # Returns the version string.	The returned string is static. 

proc config*(query: cint, result: pointer): cint{.cdecl, importc: "tre_config", 
    dynlib: treDll.}
  # Returns the value for a config parameter.  The type to which `result`
  # must point to depends of the value of `query`, see documentation for
  # more details. 

const 
  CONFIG_APPROX* = 0
  CONFIG_WCHAR* = 1
  CONFIG_MULTIBYTE* = 2
  CONFIG_SYSTEM_ABI* = 3
  CONFIG_VERSION* = 4

# Returns 1 if the compiled pattern has back references, 0 if not. 

proc have_backrefs*(preg: var Regex): cint{.cdecl, 
    importc: "tre_have_backrefs", dynlib: treDll.}
# Returns 1 if the compiled pattern uses approximate matching features,
#   0 if not. 

proc have_approx*(preg: var Regex): cint{.cdecl, importc: "tre_have_approx", 
    dynlib: treDll.}