summary refs log tree commit diff stats
path: root/compiler/semdata.nim
blob: 74e82db614e662ad3122bcc246ad3044d21bfedc (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
#
#
#           The Nimrod Compiler
#        (c) Copyright 2012 Andreas Rumpf
#
#    See the file "copying.txt", included in this
#    distribution, for details about the copyright.
#

## This module contains the data structures for the semantic checking phase.

import 
  strutils, lists, intsets, options, lexer, ast, astalgo, trees, treetab,
  wordrecg, 
  ropes, msgs, platform, os, condsyms, idents, renderer, types, extccomp, math, 
  magicsys, nversion, nimsets, parser, times, passes, rodread, evals

type 
  TOptionEntry* = object of lists.TListEntry # entries to put on a
                                             # stack for pragma parsing
    options*: TOptions
    defaultCC*: TCallingConvention
    dynlib*: PLib
    Notes*: TNoteKinds

  POptionEntry* = ref TOptionEntry
  PProcCon* = ref TProcCon
  TProcCon*{.final.} = object # procedure context; also used for top-level
                              # statements
    owner*: PSym              # the symbol this context belongs to
    resultSym*: PSym          # the result symbol (if we are in a proc)
    nestedLoopCounter*: int   # whether we are in a loop or not
    nestedBlockCounter*: int  # whether we are in a block or not
    next*: PProcCon           # used for stacking procedure contexts
  
  TInstantiatedSymbol* {.final.} = object
    genericSym*, instSym*: PSym
    concreteTypes*: seq[PType]
  
  # If we generate an instance of a generic, we'd like to re-use that
  # instance if possible across module boundaries. However, this is not
  # possible if the compilation cache is enabled. So we give up then and use
  # the caching of generics only per module, not per project.
  TGenericsCache* {.final.} = object
    InstTypes*: TIdTable # map PType to PType
    generics*: seq[TInstantiatedSymbol] # a list of the things to compile
    lastGenericIdx*: int      # used for the generics stack
  
  PGenericsCache* = ref TGenericsCache
  PContext* = ref TContext
  TContext* = object of TPassContext # a context represents a module
    module*: PSym              # the module sym belonging to the context
    p*: PProcCon               # procedure context
    generics*: PGenericsCache  # may point to a global or module-local structure
    friendModule*: PSym        # current friend module; may access private data;
                               # this is used so that generic instantiations
                               # can access private object fields
    InstCounter*: int          # to prevent endless instantiations
   
    threadEntries*: TSymSeq    # list of thread entries to check
    tab*: TSymTab              # each module has its own symbol table
    AmbiguousSymbols*: TIntSet # ids of all ambiguous symbols (cannot
                               # store this info in the syms themselves!)
    InGenericContext*: int     # > 0 if we are in a generic
    InUnrolledContext*: int    # > 0 if we are unrolling a loop
    converters*: TSymSeq       # sequence of converters
    optionStack*: TLinkedList
    libs*: TLinkedList         # all libs used by this module
    semConstExpr*: proc (c: PContext, n: PNode): PNode {.nimcall.} # for the pragmas
    semExpr*: proc (c: PContext, n: PNode): PNode {.nimcall.}      # for the pragmas
    semConstBoolExpr*: proc (c: PContext, n: PNode): PNode {.nimcall.} # XXX bite the bullet
    includedFiles*: TIntSet    # used to detect recursive include files
    filename*: string          # the module's filename
    userPragmas*: TStrTable
    evalContext*: PEvalContext

var
  gGenericsCache: PGenericsCache # save for modularity

proc newGenericsCache: PGenericsCache =
  new(result)
  initIdTable(result.InstTypes)
  result.generics = @[]

proc newContext*(module: PSym, nimfile: string): PContext

proc lastOptionEntry*(c: PContext): POptionEntry
proc newOptionEntry*(): POptionEntry
proc addConverter*(c: PContext, conv: PSym)
proc newLib*(kind: TLibKind): PLib
proc addToLib*(lib: PLib, sym: PSym)
proc makePtrType*(c: PContext, baseType: PType): PType
proc makeVarType*(c: PContext, baseType: PType): PType
proc newTypeS*(kind: TTypeKind, c: PContext): PType
proc fillTypeS*(dest: PType, kind: TTypeKind, c: PContext)

# owner handling:
proc getCurrOwner*(): PSym
proc PushOwner*(owner: PSym)
proc PopOwner*()
# implementation

var gOwners: seq[PSym] = @[]

proc getCurrOwner(): PSym = 
  # owner stack (used for initializing the
  # owner field of syms)
  # the documentation comment always gets
  # assigned to the current owner
  # BUGFIX: global array is needed!
  result = gOwners[high(gOwners)]

proc PushOwner(owner: PSym) = 
  add(gOwners, owner)

proc PopOwner() = 
  var length = len(gOwners)
  if (length <= 0): InternalError("popOwner")
  setlen(gOwners, length - 1)

proc lastOptionEntry(c: PContext): POptionEntry = 
  result = POptionEntry(c.optionStack.tail)

proc pushProcCon*(c: PContext, owner: PSym) {.inline.} = 
  if owner == nil: InternalError("owner is nil")
  var x: PProcCon
  new(x)
  x.owner = owner
  x.next = c.p
  c.p = x

proc popProcCon*(c: PContext) {.inline.} = c.p = c.p.next

proc newOptionEntry(): POptionEntry = 
  new(result)
  result.options = gOptions
  result.defaultCC = ccDefault
  result.dynlib = nil
  result.notes = gNotes

proc newContext(module: PSym, nimfile: string): PContext = 
  new(result)
  InitSymTab(result.tab)
  result.AmbiguousSymbols = initIntset()
  initLinkedList(result.optionStack)
  initLinkedList(result.libs)
  append(result.optionStack, newOptionEntry())
  result.module = module
  result.friendModule = module
  result.threadEntries = @[]
  result.converters = @[]
  result.filename = nimfile
  result.includedFiles = initIntSet()
  initStrTable(result.userPragmas)
  if optSymbolFiles notin gGlobalOptions:
    # re-usage of generic instantiations across module boundaries is
    # very nice for code size:
    if gGenericsCache == nil: gGenericsCache = newGenericsCache()
    result.generics = gGenericsCache
  else:
    # we have to give up and use a per-module cache for generic instantiations:
    result.generics = newGenericsCache()
    assert gGenericsCache == nil

proc addConverter(c: PContext, conv: PSym) = 
  var L = len(c.converters)
  for i in countup(0, L - 1): 
    if c.converters[i].id == conv.id: return 
  setlen(c.converters, L + 1)
  c.converters[L] = conv

proc newLib(kind: TLibKind): PLib = 
  new(result)
  result.kind = kind          #initObjectSet(result.syms)
  
proc addToLib(lib: PLib, sym: PSym) = 
  #ObjectSetIncl(lib.syms, sym);
  if sym.annex != nil: LocalError(sym.info, errInvalidPragma)
  sym.annex = lib

proc makePtrType(c: PContext, baseType: PType): PType = 
  result = newTypeS(tyPtr, c)
  addSonSkipIntLit(result, baseType.AssertNotNil)

proc makeVarType(c: PContext, baseType: PType): PType = 
  result = newTypeS(tyVar, c)
  addSonSkipIntLit(result, baseType.AssertNotNil)

proc makeTypeDesc*(c: PContext, typ: PType): PType =
  result = newTypeS(tyTypeDesc, c)
  result.addSonSkipIntLit(typ.AssertNotNil)

proc newTypeS(kind: TTypeKind, c: PContext): PType = 
  result = newType(kind, getCurrOwner())

proc fillTypeS(dest: PType, kind: TTypeKind, c: PContext) = 
  dest.kind = kind
  dest.owner = getCurrOwner()
  dest.size = - 1

proc makeRangeType*(c: PContext, first, last: biggestInt, 
                    info: TLineInfo): PType = 
  var n = newNodeI(nkRange, info)
  addSon(n, newIntNode(nkIntLit, first))
  addSon(n, newIntNode(nkIntLit, last))
  result = newTypeS(tyRange, c)
  result.n = n
  rawAddSon(result, getSysType(tyInt)) # basetype of range
  
proc markUsed*(n: PNode, s: PSym) = 
  incl(s.flags, sfUsed)
  if {sfDeprecated, sfError} * s.flags != {}:
    if sfDeprecated in s.flags: Message(n.info, warnDeprecated, s.name.s)
    if sfError in s.flags: LocalError(n.info, errWrongSymbolX, s.name.s)

proc markIndirect*(c: PContext, s: PSym) =
  if s.kind in {skProc, skConverter, skMethod, skIterator}:
    incl(s.flags, sfAddrTaken)
    # XXX add to 'c' for global analysis

proc useSym*(sym: PSym): PNode =
  result = newSymNode(sym)
  markUsed(result, sym)

proc illFormedAst*(n: PNode) = 
  GlobalError(n.info, errIllFormedAstX, renderTree(n, {renderNoComments}))

proc checkSonsLen*(n: PNode, length: int) = 
  if sonsLen(n) != length: illFormedAst(n)
  
proc checkMinSonsLen*(n: PNode, length: int) = 
  if sonsLen(n) < length: illFormedAst(n)
pan class="n">type, len, a, is_lower; int idx, idx_min, idx_max; is_lower = (conv_type != 0); idx_min = 0; idx_max = countof(case_conv_table1) - 1; while (idx_min <= idx_max) { idx = (unsigned)(idx_max + idx_min) / 2; v = case_conv_table1[idx]; code = v >> (32 - 17); len = (v >> (32 - 17 - 7)) & 0x7f; if (c < code) { idx_max = idx - 1; } else if (c >= code + len) { idx_min = idx + 1; } else { type = (v >> (32 - 17 - 7 - 4)) & 0xf; data = ((v & 0xf) << 8) | case_conv_table2[idx]; switch(type) { case RUN_TYPE_U: case RUN_TYPE_L: case RUN_TYPE_UF: case RUN_TYPE_LF: if (conv_type == (type & 1) || (type >= RUN_TYPE_UF && conv_type == 2)) { c = c - code + (case_conv_table1[data] >> (32 - 17)); } break; case RUN_TYPE_UL: a = c - code; if ((a & 1) != (1 - is_lower)) break; c = (a ^ 1) + code; break; case RUN_TYPE_LSU: a = c - code; if (a == 1) { c += 2 * is_lower - 1; } else if (a == (1 - is_lower) * 2) { c += (2 * is_lower - 1) * 2; } break; case RUN_TYPE_U2L_399_EXT2: if (!is_lower) { res[0] = c - code + case_conv_ext[data >> 6]; res[1] = 0x399; return 2; } else { c = c - code + case_conv_ext[data & 0x3f]; } break; case RUN_TYPE_UF_D20: if (conv_type == 1) break; c = data + (conv_type == 2) * 0x20; break; case RUN_TYPE_UF_D1_EXT: if (conv_type == 1) break; c = case_conv_ext[data] + (conv_type == 2); break; case RUN_TYPE_U_EXT: case RUN_TYPE_LF_EXT: if (is_lower != (type - RUN_TYPE_U_EXT)) break; c = case_conv_ext[data]; break; case RUN_TYPE_U_EXT2: case RUN_TYPE_L_EXT2: if (conv_type != (type - RUN_TYPE_U_EXT2)) break; res[0] = c - code + case_conv_ext[data >> 6]; res[1] = case_conv_ext[data & 0x3f]; return 2; default: case RUN_TYPE_U_EXT3: if (conv_type != 0) break; res[0] = case_conv_ext[data >> 8]; res[1] = case_conv_ext[(data >> 4) & 0xf]; res[2] = case_conv_ext[data & 0xf]; return 3; } break; } } } res[0] = c; return 1; } static uint32_t get_le24(const uint8_t *ptr) { #if defined(__x86__) || defined(__x86_64__) return *(uint16_t *)ptr | (ptr[2] << 16); #else return ptr[0] | (ptr[1] << 8) | (ptr[2] << 16); #endif } #define UNICODE_INDEX_BLOCK_LEN 32 /* return -1 if not in table, otherwise the offset in the block */ static int get_index_pos(uint32_t *pcode, uint32_t c, const uint8_t *index_table, int index_table_len) { uint32_t code, v; int idx_min, idx_max, idx; idx_min = 0; v = get_le24(index_table); code = v & ((1 << 21) - 1); if (c < code) { *pcode = 0; return 0; } idx_max = index_table_len - 1; code = get_le24(index_table + idx_max * 3); if (c >= code) return -1; /* invariant: tab[idx_min] <= c < tab2[idx_max] */ while ((idx_max - idx_min) > 1) { idx = (idx_max + idx_min) / 2; v = get_le24(index_table + idx * 3); code = v & ((1 << 21) - 1); if (c < code) { idx_max = idx; } else { idx_min = idx; } } v = get_le24(index_table + idx_min * 3); *pcode = v & ((1 << 21) - 1); return (idx_min + 1) * UNICODE_INDEX_BLOCK_LEN + (v >> 21); } static BOOL lre_is_in_table(uint32_t c, const uint8_t *table, const uint8_t *index_table, int index_table_len) { uint32_t code, b, bit; int pos; const uint8_t *p; pos = get_index_pos(&code, c, index_table, index_table_len); if (pos < 0) return FALSE; /* outside the table */ p = table + pos; bit = 0; for(;;) { b = *p++; if (b < 64) { code += (b >> 3) + 1; if (c < code) return bit; bit ^= 1; code += (b & 7) + 1; } else if (b >= 0x80) { code += b - 0x80 + 1; } else if (b < 0x60) { code += (((b - 0x40) << 8) | p[0]) + 1; p++; } else { code += (((b - 0x60) << 16) | (p[0] << 8) | p[1]) + 1; p += 2; } if (c < code) return bit; bit ^= 1; } } BOOL lre_is_cased(uint32_t c) { uint32_t v, code, len; int idx, idx_min, idx_max; idx_min = 0; idx_max = countof(case_conv_table1) - 1; while (idx_min <= idx_max) { idx = (unsigned)(idx_max + idx_min) / 2; v = case_conv_table1[idx]; code = v >> (32 - 17); len = (v >> (32 - 17 - 7)) & 0x7f; if (c < code) { idx_max = idx - 1; } else if (c >= code + len) { idx_min = idx + 1; } else { return TRUE; } } return lre_is_in_table(c, unicode_prop_Cased1_table, unicode_prop_Cased1_index, sizeof(unicode_prop_Cased1_index) / 3); } BOOL lre_is_case_ignorable(uint32_t c) { return lre_is_in_table(c, unicode_prop_Case_Ignorable_table, unicode_prop_Case_Ignorable_index, sizeof(unicode_prop_Case_Ignorable_index) / 3); } /* character range */ static __maybe_unused void cr_dump(CharRange *cr) { int i; for(i = 0; i < cr->len; i++) printf("%d: 0x%04x\n", i, cr->points[i]); } static void *cr_default_realloc(void *opaque, void *ptr, size_t size) { return realloc(ptr, size); } void cr_init(CharRange *cr, void *mem_opaque, DynBufReallocFunc *realloc_func) { cr->len = cr->size = 0; cr->points = NULL; cr->mem_opaque = mem_opaque; cr->realloc_func = realloc_func ? realloc_func : cr_default_realloc; } void cr_free(CharRange *cr) { cr->realloc_func(cr->mem_opaque, cr->points, 0); } int cr_realloc(CharRange *cr, int size) { int new_size; uint32_t *new_buf; if (size > cr->size) { new_size = max_int(size, cr->size * 3 / 2); new_buf = cr->realloc_func(cr->mem_opaque, cr->points, new_size * sizeof(cr->points[0])); if (!new_buf) return -1; cr->points = new_buf; cr->size = new_size; } return 0; } int cr_copy(CharRange *cr, const CharRange *cr1) { if (cr_realloc(cr, cr1->len)) return -1; memcpy(cr->points, cr1->points, sizeof(cr->points[0]) * cr1->len); cr->len = cr1->len; return 0; } /* merge consecutive intervals and remove empty intervals */ static void cr_compress(CharRange *cr) { int i, j, k, len; uint32_t *pt; pt = cr->points; len = cr->len; i = 0; j = 0; k = 0; while ((i + 1) < len) { if (pt[i] == pt[i + 1]) { /* empty interval */ i += 2; } else { j = i; while ((j + 3) < len && pt[j + 1] == pt[j + 2]) j += 2; /* just copy */ pt[k] = pt[i]; pt[k + 1] = pt[j + 1]; k += 2; i = j + 2; } } cr->len = k; } /* union or intersection */ int cr_op(CharRange *cr, const uint32_t *a_pt, int a_len, const uint32_t *b_pt, int b_len, int op) { int a_idx, b_idx, is_in; uint32_t v; a_idx = 0; b_idx = 0; for(;;) { /* get one more point from a or b in increasing order */ if (a_idx < a_len && b_idx < b_len) { if (a_pt[a_idx] < b_pt[b_idx]) { goto a_add; } else if (a_pt[a_idx] == b_pt[b_idx]) { v = a_pt[a_idx]; a_idx++; b_idx++; } else { goto b_add; } } else if (a_idx < a_len) { a_add: v = a_pt[a_idx++]; } else if (b_idx < b_len) { b_add: v = b_pt[b_idx++]; } else { break; } /* add the point if the in/out status changes */ switch(op) { case CR_OP_UNION: is_in = (a_idx & 1) | (b_idx & 1); break; case CR_OP_INTER: is_in = (a_idx & 1) & (b_idx & 1); break; case CR_OP_XOR: is_in = (a_idx & 1) ^ (b_idx & 1); break; default: abort(); } if (is_in != (cr->len & 1)) { if (cr_add_point(cr, v)) return -1; } } cr_compress(cr); return 0; } int cr_union1(CharRange *cr, const uint32_t *b_pt, int b_len) { CharRange a = *cr; int ret; cr->len = 0; cr->size = 0; cr->points = NULL; ret = cr_op(cr, a.points, a.len, b_pt, b_len, CR_OP_UNION); cr_free(&a); return ret; } int cr_invert(CharRange *cr) { int len; len = cr->len; if (cr_realloc(cr, len + 2)) return -1; memmove(cr->points + 1, cr->points, len * sizeof(cr->points[0])); cr->points[0] = 0; cr->points[len + 1] = UINT32_MAX; cr->len = len + 2; cr_compress(cr); return 0; } #ifdef CONFIG_ALL_UNICODE BOOL lre_is_id_start(uint32_t c) { return lre_is_in_table(c, unicode_prop_ID_Start_table, unicode_prop_ID_Start_index, sizeof(unicode_prop_ID_Start_index) / 3); } BOOL lre_is_id_continue(uint32_t c) { return lre_is_id_start(c) || lre_is_in_table(c, unicode_prop_ID_Continue1_table, unicode_prop_ID_Continue1_index, sizeof(unicode_prop_ID_Continue1_index) / 3); } #define UNICODE_DECOMP_LEN_MAX 18 typedef enum { DECOMP_TYPE_C1, /* 16 bit char */ DECOMP_TYPE_L1, /* 16 bit char table */ DECOMP_TYPE_L2, DECOMP_TYPE_L3, DECOMP_TYPE_L4, DECOMP_TYPE_L5, /* XXX: not used */ DECOMP_TYPE_L6, /* XXX: could remove */ DECOMP_TYPE_L7, /* XXX: could remove */ DECOMP_TYPE_LL1, /* 18 bit char table */ DECOMP_TYPE_LL2, DECOMP_TYPE_S1, /* 8 bit char table */ DECOMP_TYPE_S2, DECOMP_TYPE_S3, DECOMP_TYPE_S4, DECOMP_TYPE_S5, DECOMP_TYPE_I1, /* increment 16 bit char value */ DECOMP_TYPE_I2_0, DECOMP_TYPE_I2_1, DECOMP_TYPE_I3_1, DECOMP_TYPE_I3_2, DECOMP_TYPE_I4_1, DECOMP_TYPE_I4_2, DECOMP_TYPE_B1, /* 16 bit base + 8 bit offset */ DECOMP_TYPE_B2, DECOMP_TYPE_B3, DECOMP_TYPE_B4, DECOMP_TYPE_B5, DECOMP_TYPE_B6, DECOMP_TYPE_B7, DECOMP_TYPE_B8, DECOMP_TYPE_B18, DECOMP_TYPE_LS2, DECOMP_TYPE_PAT3, DECOMP_TYPE_S2_UL, DECOMP_TYPE_LS2_UL, } DecompTypeEnum; static uint32_t unicode_get_short_code(uint32_t c) { static const uint16_t unicode_short_table[2] = { 0x2044, 0x2215 }; if (c < 0x80) return c; else if (c < 0x80 + 0x50) return c - 0x80 + 0x300; else return unicode_short_table[c - 0x80 - 0x50]; } static uint32_t unicode_get_lower_simple(uint32_t c) { if (c < 0x100 || (c >= 0x410 && c <= 0x42f)) c += 0x20; else c++; return c; } static uint16_t unicode_get16(const uint8_t *p) { return p[0] | (p[1] << 8); } static int unicode_decomp_entry(uint32_t *res, uint32_t c, int idx, uint32_t code, uint32_t len, uint32_t type) { uint32_t c1; int l, i, p; const uint8_t *d; if (type == DECOMP_TYPE_C1) { res[0] = unicode_decomp_table2[idx]; return 1; } else { d = unicode_decomp_data + unicode_decomp_table2[idx]; switch(type) { case DECOMP_TYPE_L1: case DECOMP_TYPE_L2: case DECOMP_TYPE_L3: case DECOMP_TYPE_L4: case DECOMP_TYPE_L5: case DECOMP_TYPE_L6: case DECOMP_TYPE_L7: l = type - DECOMP_TYPE_L1 + 1; d += (c - code) * l * 2; for(i = 0; i < l; i++) { if ((res[i] = unicode_get16(d + 2 * i)) == 0) return 0; } return l; case DECOMP_TYPE_LL1: case DECOMP_TYPE_LL2: { uint32_t k, p; l = type - DECOMP_TYPE_LL1 + 1; k = (c - code) * l; p = len * l * 2; for(i = 0; i < l; i++) { c1 = unicode_get16(d + 2 * k) | (((d[p + (k / 4)] >> ((k % 4) * 2)) & 3) << 16); if (!c1) return 0; res[i] = c1; k++; } } return l; case DECOMP_TYPE_S1: case DECOMP_TYPE_S2: case DECOMP_TYPE_S3: case DECOMP_TYPE_S4: case DECOMP_TYPE_S5: l = type - DECOMP_TYPE_S1 + 1; d += (c - code) * l; for(i = 0; i < l; i++) { if ((res[i] = unicode_get_short_code(d[i])) == 0) return 0; } return l; case DECOMP_TYPE_I1: l = 1; p = 0; goto decomp_type_i; case DECOMP_TYPE_I2_0: case DECOMP_TYPE_I2_1: case DECOMP_TYPE_I3_1: case DECOMP_TYPE_I3_2: case DECOMP_TYPE_I4_1: case DECOMP_TYPE_I4_2: l = 2 + ((type - DECOMP_TYPE_I2_0) >> 1); p = ((type - DECOMP_TYPE_I2_0) & 1) + (l > 2); decomp_type_i: for(i = 0; i < l; i++) { c1 = unicode_get16(d + 2 * i); if (i == p) c1 += c - code; res[i] = c1; } return l; case DECOMP_TYPE_B18: l = 18; goto decomp_type_b; case DECOMP_TYPE_B1: case DECOMP_TYPE_B2: case DECOMP_TYPE_B3: case DECOMP_TYPE_B4: case DECOMP_TYPE_B5: case DECOMP_TYPE_B6: case DECOMP_TYPE_B7: case DECOMP_TYPE_B8: l = type - DECOMP_TYPE_B1 + 1; decomp_type_b: { uint32_t c_min; c_min = unicode_get16(d); d += 2 + (c - code) * l; for(i = 0; i < l; i++) { c1 = d[i]; if (c1 == 0xff) c1 = 0x20; else c1 += c_min; res[i] = c1; } } return l; case DECOMP_TYPE_LS2: d += (c - code) * 3; if (!(res[0] = unicode_get16(d))) return 0; res[1] = unicode_get_short_code(d[2]); return 2; case DECOMP_TYPE_PAT3: res[0] = unicode_get16(d); res[2] = unicode_get16(d + 2); d += 4 + (c - code) * 2; res[1] = unicode_get16(d); return 3; case DECOMP_TYPE_S2_UL: case DECOMP_TYPE_LS2_UL: c1 = c - code; if (type == DECOMP_TYPE_S2_UL) { d += c1 & ~1; c = unicode_get_short_code(*d); d++; } else { d += (c1 >> 1) * 3; c = unicode_get16(d); d += 2; } if (c1 & 1) c = unicode_get_lower_simple(c); res[0] = c; res[1] = unicode_get_short_code(*d); return 2; } } return 0; } /* return the length of the decomposition (length <= UNICODE_DECOMP_LEN_MAX) or 0 if no decomposition */ static int unicode_decomp_char(uint32_t *res, uint32_t c, BOOL is_compat1) { uint32_t v, type, is_compat, code, len; int idx_min, idx_max, idx; idx_min = 0; idx_max = countof(unicode_decomp_table1) - 1; while (idx_min <= idx_max) { idx = (idx_max + idx_min) / 2; v = unicode_decomp_table1[idx]; code = v >> (32 - 18); len = (v >> (32 - 18 - 7)) & 0x7f; // printf("idx=%d code=%05x len=%d\n", idx, code, len); if (c < code) { idx_max = idx - 1; } else if (c >= code + len) { idx_min = idx + 1; } else { is_compat = v & 1; if (is_compat1 < is_compat) break; type = (v >> (32 - 18 - 7 - 6)) & 0x3f; return unicode_decomp_entry(res, c, idx, code, len, type); } } return 0; } /* return 0 if no pair found */ static int unicode_compose_pair(uint32_t c0, uint32_t c1) { uint32_t code, len, type, v, idx1, d_idx, d_offset, ch; int idx_min, idx_max, idx, d; uint32_t pair[2]; idx_min = 0; idx_max = countof(unicode_comp_table) - 1; while (idx_min <= idx_max) { idx = (idx_max + idx_min) / 2; idx1 = unicode_comp_table[idx]; /* idx1 represent an entry of the decomposition table */ d_idx = idx1 >> 6; d_offset = idx1 & 0x3f; v = unicode_decomp_table1[d_idx]; code = v >> (32 - 18); len = (v >> (32 - 18 - 7)) & 0x7f; type = (v >> (32 - 18 - 7 - 6)) & 0x3f; ch = code + d_offset; unicode_decomp_entry(pair, ch, d_idx, code, len, type); d = c0 - pair[0]; if (d == 0) d = c1 - pair[1]; if (d < 0) { idx_max = idx - 1; } else if (d > 0) { idx_min = idx + 1; } else { return ch; } } return 0; } /* return the combining class of character c (between 0 and 255) */ static int unicode_get_cc(uint32_t c) { uint32_t code, n, type, cc, c1, b; int pos; const uint8_t *p; pos = get_index_pos(&code, c, unicode_cc_index, sizeof(unicode_cc_index) / 3); if (pos < 0) return 0; p = unicode_cc_table + pos; for(;;) { b = *p++; type = b >> 6; n = b & 0x3f; if (n < 48) { } else if (n < 56) { n = (n - 48) << 8; n |= *p++; n += 48; } else { n = (n - 56) << 8; n |= *p++ << 8; n |= *p++; n += 48 + (1 << 11); } if (type <= 1) p++; c1 = code + n + 1; if (c < c1) { switch(type) { case 0: cc = p[-1]; break; case 1: cc = p[-1] + c - code; break; case 2: cc = 0; break; default: case 3: cc = 230; break; } return cc; } code = c1; } } static void sort_cc(int *buf, int len) { int i, j, k, cc, cc1, start, ch1; for(i = 0; i < len; i++) { cc = unicode_get_cc(buf[i]); if (cc != 0) { start = i; j = i + 1; while (j < len) { ch1 = buf[j]; cc1 = unicode_get_cc(ch1); if (cc1 == 0) break; k = j - 1; while (k >= start) { if (unicode_get_cc(buf[k]) <= cc1) break; buf[k + 1] = buf[k]; k--; } buf[k + 1] = ch1; j++; } #if 0 printf("cc:"); for(k = start; k < j; k++) { printf(" %3d", unicode_get_cc(buf[k])); } printf("\n"); #endif i = j; } } } static void to_nfd_rec(DynBuf *dbuf, const int *src, int src_len, int is_compat) { uint32_t c, v; int i, l; uint32_t res[UNICODE_DECOMP_LEN_MAX]; for(i = 0; i < src_len; i++) { c = src[i]; if (c >= 0xac00 && c < 0xd7a4) { /* Hangul decomposition */ c -= 0xac00; dbuf_put_u32(dbuf, 0x1100 + c / 588); dbuf_put_u32(dbuf, 0x1161 + (c % 588) / 28); v = c % 28; if (v != 0) dbuf_put_u32(dbuf, 0x11a7 + v); } else { l = unicode_decomp_char(res, c, is_compat); if (l) { to_nfd_rec(dbuf, (int *)res, l, is_compat); } else { dbuf_put_u32(dbuf, c); } } } } /* return 0 if not found */ static int compose_pair(uint32_t c0, uint32_t c1) { /* Hangul composition */ if (c0 >= 0x1100 && c0 < 0x1100 + 19 && c1 >= 0x1161 && c1 < 0x1161 + 21) { return 0xac00 + (c0 - 0x1100) * 588 + (c1 - 0x1161) * 28; } else if (c0 >= 0xac00 && c0 < 0xac00 + 11172 && (c0 - 0xac00) % 28 == 0 && c1 >= 0x11a7 && c1 < 0x11a7 + 28) { return c0 + c1 - 0x11a7; } else { return unicode_compose_pair(c0, c1); } } int unicode_normalize(uint32_t **pdst, const uint32_t *src, int src_len, UnicodeNormalizationEnum n_type, void *opaque, DynBufReallocFunc *realloc_func) { int *buf, buf_len, i, p, starter_pos, cc, last_cc, out_len; BOOL is_compat; DynBuf dbuf_s, *dbuf = &dbuf_s; is_compat = n_type >> 1; dbuf_init2(dbuf, opaque, realloc_func); if (dbuf_realloc(dbuf, sizeof(int) * src_len)) goto fail; /* common case: latin1 is unaffected by NFC */ if (n_type == UNICODE_NFC) { for(i = 0; i < src_len; i++) { if (src[i] >= 0x100) goto not_latin1; } buf = (int *)dbuf->buf; memcpy(buf, src, src_len * sizeof(int)); *pdst = (uint32_t *)buf; return src_len; not_latin1: ; } to_nfd_rec(dbuf, (const int *)src, src_len, is_compat); if (dbuf_error(dbuf)) { fail: *pdst = NULL; return -1; } buf = (int *)dbuf->buf; buf_len = dbuf->size / sizeof(int); sort_cc(buf, buf_len); if (buf_len <= 1 || (n_type & 1) != 0) { /* NFD / NFKD */ *pdst = (uint32_t *)buf; return buf_len; } i = 1; out_len = 1; while (i < buf_len) { /* find the starter character and test if it is blocked from the character at 'i' */ last_cc = unicode_get_cc(buf[i]); starter_pos = out_len - 1; while (starter_pos >= 0) { cc = unicode_get_cc(buf[starter_pos]); if (cc == 0) break; if (cc >= last_cc) goto next; last_cc = 256; starter_pos--; } if (starter_pos >= 0 && (p = compose_pair(buf[starter_pos], buf[i])) != 0) { buf[starter_pos] = p; i++; } else { next: buf[out_len++] = buf[i++]; } } *pdst = (uint32_t *)buf; return out_len; } /* char ranges for various unicode properties */ static int unicode_find_name(const char *name_table, const char *name) { const char *p, *r; int pos; size_t name_len, len; p = name_table; pos = 0; name_len = strlen(name); while (*p) { for(;;) { r = strchr(p, ','); if (!r) len = strlen(p); else len = r - p; if (len == name_len && !memcmp(p, name, name_len)) return pos; p += len + 1; if (!r) break; } pos++; } return -1; } /* 'cr' must be initialized and empty. Return 0 if OK, -1 if error, -2 if not found */ int unicode_script(CharRange *cr, const char *script_name, BOOL is_ext) { int script_idx; const uint8_t *p, *p_end; uint32_t c, c1, b, n, v, v_len, i, type; CharRange cr1_s, *cr1; CharRange cr2_s, *cr2 = &cr2_s; BOOL is_common; script_idx = unicode_find_name(unicode_script_name_table, script_name); if (script_idx < 0) return -2; /* Note: we remove the "Unknown" Script */ script_idx += UNICODE_SCRIPT_Unknown + 1; is_common = (script_idx == UNICODE_SCRIPT_Common || script_idx == UNICODE_SCRIPT_Inherited); if (is_ext) { cr1 = &cr1_s; cr_init(cr1, cr->mem_opaque, cr->realloc_func); cr_init(cr2, cr->mem_opaque, cr->realloc_func); } else { cr1 = cr; } p = unicode_script_table; p_end = unicode_script_table + countof(unicode_script_table); c = 0; while (p < p_end) { b = *p++; type = b >> 7; n = b & 0x7f; if (n < 96) { } else if (n < 112) { n = (n - 96) << 8; n |= *p++; n += 96; } else { n = (n - 112) << 16; n |= *p++ << 8; n |= *p++; n += 96 + (1 << 12); } if (type == 0) v = 0; else v = *p++; c1 = c + n + 1; if (v == script_idx) { if (cr_add_interval(cr1, c, c1)) goto fail; } c = c1; } if (is_ext) { /* add the script extensions */ p = unicode_script_ext_table; p_end = unicode_script_ext_table + countof(unicode_script_ext_table); c = 0; while (p < p_end) { b = *p++; if (b < 128) { n = b; } else if (b < 128 + 64) { n = (b - 128) << 8; n |= *p++; n += 128; } else { n = (b - 128 - 64) << 16; n |= *p++ << 8; n |= *p++; n += 128 + (1 << 14); } c1 = c + n + 1; v_len = *p++; if (is_common) { if (v_len != 0) { if (cr_add_interval(cr2, c, c1)) goto fail; } } else { for(i = 0; i < v_len; i++) { if (p[i] == script_idx) { if (cr_add_interval(cr2, c, c1)) goto fail; break; } } } p += v_len; c = c1; } if (is_common) { /* remove all the characters with script extensions */ if (cr_invert(cr2)) goto fail; if (cr_op(cr, cr1->points, cr1->len, cr2->points, cr2->len, CR_OP_INTER)) goto fail; } else { if (cr_op(cr, cr1->points, cr1->len, cr2->points, cr2->len, CR_OP_UNION)) goto fail; } cr_free(cr1); cr_free(cr2); } return 0; fail: if (is_ext) { cr_free(cr1); cr_free(cr2); } goto fail; } #define M(id) (1U << UNICODE_GC_ ## id) static int unicode_general_category1(CharRange *cr, uint32_t gc_mask) { const uint8_t *p, *p_end; uint32_t c, c0, b, n, v; p = unicode_gc_table; p_end = unicode_gc_table + countof(unicode_gc_table); c = 0; while (p < p_end) { b = *p++; n = b >> 5; v = b & 0x1f; if (n == 7) { n = *p++; if (n < 128) { n += 7; } else if (n < 128 + 64) { n = (n - 128) << 8; n |= *p++; n += 7 + 128; } else { n = (n - 128 - 64) << 16; n |= *p++ << 8; n |= *p++; n += 7 + 128 + (1 << 14); } } c0 = c; c += n + 1; if (v == 31) { /* run of Lu / Ll */ b = gc_mask & (M(Lu) | M(Ll)); if (b != 0) { if (b == (M(Lu) | M(Ll))) { goto add_range; } else { c0 += ((gc_mask & M(Ll)) != 0); for(; c0 < c; c0 += 2) { if (cr_add_interval(cr, c0, c0 + 1)) return -1; } } } } else if ((gc_mask >> v) & 1) { add_range: if (cr_add_interval(cr, c0, c)) return -1; } } return 0; } static int unicode_prop1(CharRange *cr, int prop_idx) { const uint8_t *p, *p_end; uint32_t c, c0, b, bit; p = unicode_prop_table[prop_idx]; p_end = p + unicode_prop_len_table[prop_idx]; c = 0; bit = 0; while (p < p_end) { c0 = c; b = *p++; if (b < 64) { c += (b >> 3) + 1; if (bit) { if (cr_add_interval(cr, c0, c)) return -1; } bit ^= 1; c0 = c; c += (b & 7) + 1; } else if (b >= 0x80) { c += b - 0x80 + 1; } else if (b < 0x60) { c += (((b - 0x40) << 8) | p[0]) + 1; p++; } else { c += (((b - 0x60) << 16) | (p[0] << 8) | p[1]) + 1; p += 2; } if (bit) { if (cr_add_interval(cr, c0, c)) return -1; } bit ^= 1; } return 0; } #define CASE_U (1 << 0) #define CASE_L (1 << 1) #define CASE_F (1 << 2) /* use the case conversion table to generate range of characters. CASE_U: set char if modified by uppercasing, CASE_L: set char if modified by lowercasing, CASE_F: set char if modified by case folding, */ static int unicode_case1(CharRange *cr, int case_mask) { #define MR(x) (1 << RUN_TYPE_ ## x) const uint32_t tab_run_mask[3] = { MR(U) | MR(UF) | MR(UL) | MR(LSU) | MR(U2L_399_EXT2) | MR(UF_D20) | MR(UF_D1_EXT) | MR(U_EXT) | MR(U_EXT2) | MR(U_EXT3), MR(L) | MR(LF) | MR(UL) | MR(LSU) | MR(U2L_399_EXT2) | MR(LF_EXT) | MR(L_EXT2), MR(UF) | MR(LF) | MR(UL) | MR(LSU) | MR(U2L_399_EXT2) | MR(LF_EXT) | MR(UF_D20) | MR(UF_D1_EXT) | MR(LF_EXT), }; #undef MR uint32_t mask, v, code, type, len, i, idx; if (case_mask == 0) return 0; mask = 0; for(i = 0; i < 3; i++) { if ((case_mask >> i) & 1) mask |= tab_run_mask[i]; } for(idx = 0; idx < countof(case_conv_table1); idx++) { v = case_conv_table1[idx]; type = (v >> (32 - 17 - 7 - 4)) & 0xf; code = v >> (32 - 17); len = (v >> (32 - 17 - 7)) & 0x7f; if ((mask >> type) & 1) { // printf("%d: type=%d %04x %04x\n", idx, type, code, code + len - 1); switch(type) { case RUN_TYPE_UL: if ((case_mask & CASE_U) && (case_mask & (CASE_L | CASE_F))) goto def_case; code += ((case_mask & CASE_U) != 0); for(i = 0; i < len; i += 2) { if (cr_add_interval(cr, code + i, code + i + 1)) return -1; } break; case RUN_TYPE_LSU: if ((case_mask & CASE_U) && (case_mask & (CASE_L | CASE_F))) goto def_case; if (!(case_mask & CASE_U)) { if (cr_add_interval(cr, code, code + 1)) return -1; } if (cr_add_interval(cr, code + 1, code + 2)) return -1; if (case_mask & CASE_U) { if (cr_add_interval(cr, code + 2, code + 3)) return -1; } break; default: def_case: if (cr_add_interval(cr, code, code + len)) return -1; break; } } } return 0; } typedef enum { POP_GC, POP_PROP, POP_CASE, POP_UNION, POP_INTER, POP_XOR, POP_INVERT, POP_END, } PropOPEnum; #define POP_STACK_LEN_MAX 4 static int unicode_prop_ops(CharRange *cr, ...) { va_list ap; CharRange stack[POP_STACK_LEN_MAX]; int stack_len, op, ret, i; uint32_t a; va_start(ap, cr); stack_len = 0; for(;;) { op = va_arg(ap, int); switch(op) { case POP_GC: assert(stack_len < POP_STACK_LEN_MAX); a = va_arg(ap, int); cr_init(&stack[stack_len++], cr->mem_opaque, cr->realloc_func); if (unicode_general_category1(&stack[stack_len - 1], a)) goto fail; break; case POP_PROP: assert(stack_len < POP_STACK_LEN_MAX); a = va_arg(ap, int); cr_init(&stack[stack_len++], cr->mem_opaque, cr->realloc_func); if (unicode_prop1(&stack[stack_len - 1], a)) goto fail; break; case POP_CASE: assert(stack_len < POP_STACK_LEN_MAX); a = va_arg(ap, int); cr_init(&stack[stack_len++], cr->mem_opaque, cr->realloc_func); if (unicode_case1(&stack[stack_len - 1], a)) goto fail; break; case POP_UNION: case POP_INTER: case POP_XOR: { CharRange *cr1, *cr2, *cr3; assert(stack_len >= 2); assert(stack_len < POP_STACK_LEN_MAX); cr1 = &stack[stack_len - 2]; cr2 = &stack[stack_len - 1]; cr3 = &stack[stack_len++]; cr_init(cr3, cr->mem_opaque, cr->realloc_func); if (cr_op(cr3, cr1->points, cr1->len, cr2->points, cr2->len, op - POP_UNION + CR_OP_UNION)) goto fail; cr_free(cr1); cr_free(cr2); *cr1 = *cr3; stack_len -= 2; } break; case POP_INVERT: assert(stack_len >= 1); if (cr_invert(&stack[stack_len - 1])) goto fail; break; case POP_END: goto done; default: abort(); } } done: assert(stack_len == 1); ret = cr_copy(cr, &stack[0]); cr_free(&stack[0]); return ret; fail: for(i = 0; i < stack_len; i++) cr_free(&stack[i]); return -1; } static const uint32_t unicode_gc_mask_table[] = { M(Lu) | M(Ll) | M(Lt), /* LC */ M(Lu) | M(Ll) | M(Lt) | M(Lm) | M(Lo), /* L */ M(Mn) | M(Mc) | M(Me), /* M */ M(Nd) | M(Nl) | M(No), /* N */ M(Sm) | M(Sc) | M(Sk) | M(So), /* S */ M(Pc) | M(Pd) | M(Ps) | M(Pe) | M(Pi) | M(Pf) | M(Po), /* P */ M(Zs) | M(Zl) | M(Zp), /* Z */ M(Cc) | M(Cf) | M(Cs) | M(Co) | M(Cn), /* C */ }; /* 'cr' must be initialized and empty. Return 0 if OK, -1 if error, -2 if not found */ int unicode_general_category(CharRange *cr, const char *gc_name) { int gc_idx; uint32_t gc_mask; gc_idx = unicode_find_name(unicode_gc_name_table, gc_name); if (gc_idx < 0) return -2; if (gc_idx <= UNICODE_GC_Co) { gc_mask = (uint64_t)1 << gc_idx; } else { gc_mask = unicode_gc_mask_table[gc_idx - UNICODE_GC_LC]; } return unicode_general_category1(cr, gc_mask); } /* 'cr' must be initialized and empty. Return 0 if OK, -1 if error, -2 if not found */ int unicode_prop(CharRange *cr, const char *prop_name) { int prop_idx, ret; prop_idx = unicode_find_name(unicode_prop_name_table, prop_name); if (prop_idx < 0) return -2; prop_idx += UNICODE_PROP_ASCII_Hex_Digit; ret = 0; switch(prop_idx) { case UNICODE_PROP_ASCII: if (cr_add_interval(cr, 0x00, 0x7f + 1)) return -1; break; case UNICODE_PROP_Any: if (cr_add_interval(cr, 0x00000, 0x10ffff + 1)) return -1; break; case UNICODE_PROP_Assigned: ret = unicode_prop_ops(cr, POP_GC, M(Cn), POP_INVERT, POP_END); break; case UNICODE_PROP_Math: ret = unicode_prop_ops(cr, POP_GC, M(Sm), POP_PROP, UNICODE_PROP_Other_Math, POP_UNION, POP_END); break; case UNICODE_PROP_Lowercase: ret = unicode_prop_ops(cr, POP_GC, M(Ll), POP_PROP, UNICODE_PROP_Other_Lowercase, POP_UNION, POP_END); break; case UNICODE_PROP_Uppercase: ret = unicode_prop_ops(cr, POP_GC, M(Lu), POP_PROP, UNICODE_PROP_Other_Uppercase, POP_UNION, POP_END); break; case UNICODE_PROP_Cased: ret = unicode_prop_ops(cr, POP_GC, M(Lu) | M(Ll) | M(Lt), POP_PROP, UNICODE_PROP_Other_Uppercase, POP_UNION, POP_PROP, UNICODE_PROP_Other_Lowercase, POP_UNION, POP_END); break; case UNICODE_PROP_Alphabetic: ret = unicode_prop_ops(cr, POP_GC, M(Lu) | M(Ll) | M(Lt) | M(Lm) | M(Lo) | M(Nl), POP_PROP, UNICODE_PROP_Other_Uppercase, POP_UNION, POP_PROP, UNICODE_PROP_Other_Lowercase, POP_UNION, POP_PROP, UNICODE_PROP_Other_Alphabetic, POP_UNION, POP_END); break; case UNICODE_PROP_Grapheme_Base: ret = unicode_prop_ops(cr, POP_GC, M(Cc) | M(Cf) | M(Cs) | M(Co) | M(Cn) | M(Zl) | M(Zp) | M(Me) | M(Mn), POP_PROP, UNICODE_PROP_Other_Grapheme_Extend, POP_UNION, POP_INVERT, POP_END); break; case UNICODE_PROP_Grapheme_Extend: ret = unicode_prop_ops(cr, POP_GC, M(Me) | M(Mn), POP_PROP, UNICODE_PROP_Other_Grapheme_Extend, POP_UNION, POP_END); break; case UNICODE_PROP_XID_Start: ret = unicode_prop_ops(cr, POP_GC, M(Lu) | M(Ll) | M(Lt) | M(Lm) | M(Lo) | M(Nl), POP_PROP, UNICODE_PROP_Other_ID_Start, POP_UNION, POP_PROP, UNICODE_PROP_Pattern_Syntax, POP_PROP, UNICODE_PROP_Pattern_White_Space, POP_UNION, POP_PROP, UNICODE_PROP_XID_Start1, POP_UNION, POP_INVERT, POP_INTER, POP_END); break; case UNICODE_PROP_XID_Continue: ret = unicode_prop_ops(cr, POP_GC, M(Lu) | M(Ll) | M(Lt) | M(Lm) | M(Lo) | M(Nl) | M(Mn) | M(Mc) | M(Nd) | M(Pc), POP_PROP, UNICODE_PROP_Other_ID_Start, POP_UNION, POP_PROP, UNICODE_PROP_Other_ID_Continue, POP_UNION, POP_PROP, UNICODE_PROP_Pattern_Syntax, POP_PROP, UNICODE_PROP_Pattern_White_Space, POP_UNION, POP_PROP, UNICODE_PROP_XID_Continue1, POP_UNION, POP_INVERT, POP_INTER, POP_END); break; case UNICODE_PROP_Changes_When_Uppercased: ret = unicode_case1(cr, CASE_U); break; case UNICODE_PROP_Changes_When_Lowercased: ret = unicode_case1(cr, CASE_L); break; case UNICODE_PROP_Changes_When_Casemapped: ret = unicode_case1(cr, CASE_U | CASE_L | CASE_F); break; case UNICODE_PROP_Changes_When_Titlecased: ret = unicode_prop_ops(cr, POP_CASE, CASE_U, POP_PROP, UNICODE_PROP_Changes_When_Titlecased1, POP_XOR, POP_END); break; case UNICODE_PROP_Changes_When_Casefolded: ret = unicode_prop_ops(cr, POP_CASE, CASE_F, POP_PROP, UNICODE_PROP_Changes_When_Casefolded1, POP_XOR, POP_END); break; case UNICODE_PROP_Changes_When_NFKC_Casefolded: ret = unicode_prop_ops(cr, POP_CASE, CASE_F, POP_PROP, UNICODE_PROP_Changes_When_NFKC_Casefolded1, POP_XOR, POP_END); break; #if 0 case UNICODE_PROP_ID_Start: ret = unicode_prop_ops(cr, POP_GC, M(Lu) | M(Ll) | M(Lt) | M(Lm) | M(Lo) | M(Nl), POP_PROP, UNICODE_PROP_Other_ID_Start, POP_UNION, POP_PROP, UNICODE_PROP_Pattern_Syntax, POP_PROP, UNICODE_PROP_Pattern_White_Space, POP_UNION, POP_INVERT, POP_INTER, POP_END); break; case UNICODE_PROP_ID_Continue: ret = unicode_prop_ops(cr, POP_GC, M(Lu) | M(Ll) | M(Lt) | M(Lm) | M(Lo) | M(Nl) | M(Mn) | M(Mc) | M(Nd) | M(Pc), POP_PROP, UNICODE_PROP_Other_ID_Start, POP_UNION, POP_PROP, UNICODE_PROP_Other_ID_Continue, POP_UNION, POP_PROP, UNICODE_PROP_Pattern_Syntax, POP_PROP, UNICODE_PROP_Pattern_White_Space, POP_UNION, POP_INVERT, POP_INTER, POP_END); break; case UNICODE_PROP_Case_Ignorable: ret = unicode_prop_ops(cr, POP_GC, M(Mn) | M(Cf) | M(Lm) | M(Sk), POP_PROP, UNICODE_PROP_Case_Ignorable1, POP_XOR, POP_END); break; #else /* we use the existing tables */ case UNICODE_PROP_ID_Continue: ret = unicode_prop_ops(cr, POP_PROP, UNICODE_PROP_ID_Start, POP_PROP, UNICODE_PROP_ID_Continue1, POP_XOR, POP_END); break; #endif default: if (prop_idx >= countof(unicode_prop_table)) return -2; ret = unicode_prop1(cr, prop_idx); break; } return ret; } #endif /* CONFIG_ALL_UNICODE */