diff options
Diffstat (limited to 'lib/pure/unidecode')
-rw-r--r-- | lib/pure/unidecode/gen.py | 34 | ||||
-rw-r--r-- | lib/pure/unidecode/unidecode.dat | 841 | ||||
-rw-r--r-- | lib/pure/unidecode/unidecode.nim | 27 |
3 files changed, 451 insertions, 451 deletions
diff --git a/lib/pure/unidecode/gen.py b/lib/pure/unidecode/gen.py index 8da0136ff..f0647ea6c 100644 --- a/lib/pure/unidecode/gen.py +++ b/lib/pure/unidecode/gen.py @@ -1,26 +1,30 @@ -#! usr/bin/env python +#! usr/bin/env python3 # -*- coding: utf-8 -*- # Generates the unidecode.dat module # (c) 2010 Andreas Rumpf from unidecode import unidecode +try: + import warnings + warnings.simplefilter("ignore") +except ImportError: + pass -def main2(): - data = [] - for x in xrange(128, 0xffff + 1): - u = eval("u'\u%04x'" % x) - - val = unidecode(u) - data.append(val) - - - f = open("unidecode.dat", "wb+") - for d in data: - f.write("%s\n" % d) - f.close() +def main2(): + f = open("unidecode.dat", "wb+") + for x in range(128, 0xffff + 1): + u = eval("u'\\u%04x'" % x) + val = unidecode(u) -main2() + # f.write("%x | " % x) + if x==0x2028: # U+2028 = LINE SEPARATOR + val = "" + elif x==0x2029: # U+2028 = PARAGRAPH SEPARATOR + val = "" + f.write("%s\n" % val) + f.close() +main2() \ No newline at end of file diff --git a/lib/pure/unidecode/unidecode.dat b/lib/pure/unidecode/unidecode.dat index 9dff0a4a9..5f4c075d8 100644 --- a/lib/pure/unidecode/unidecode.dat +++ b/lib/pure/unidecode/unidecode.dat @@ -58,9 +58,9 @@ P 1 o >> -1/4 -1/2 -3/4 + 1/4 + 1/2 + 3/4 ? A A @@ -91,7 +91,7 @@ U U U U -U +Y Th ss a @@ -177,7 +177,7 @@ i I i IJ - +ij J j K @@ -368,7 +368,7 @@ ZH zh j DZ -D +Dz dz G g @@ -414,8 +414,8 @@ Y y H h -[?] -[?] +N +d OU ou Z @@ -434,34 +434,34 @@ O o Y y +l +n +t +j +db +qp +A +C +c +L +T +s +z [?] [?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] +B +U +^ +E +e +J +j +q +q +R +r +Y +y a a a @@ -503,13 +503,13 @@ o OE O F -R -R -R -R r r -R +r +r +r +r +r R R s @@ -519,12 +519,12 @@ S S t t -U +u U v ^ -W -Y +w +y Y z z @@ -556,9 +556,9 @@ ls lz WW ]] -[?] -[?] -k +h +h +h h j r @@ -737,19 +737,19 @@ V -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] +a +e +i +o +u +c +d +h +m +r +t +v +x [?] [?] [?] @@ -1287,7 +1287,7 @@ o f ew [?] -. +: - [?] [?] @@ -1340,9 +1340,9 @@ o u ' +- - - +| : @@ -7402,41 +7402,41 @@ bh +b +d +f +m +n +p +r +r +s +t +z +g +p +b +d +f +g +k +l +m +n +p +r +s - - - - - - - - - - - - - - - - - - - - - - - - - - - +v +x +z @@ -7708,7 +7708,7 @@ a S [?] [?] -[?] +Ss [?] A a @@ -8109,9 +8109,6 @@ _ - - - %0 %00 @@ -8136,19 +8133,23 @@ _ / -[ ]- -[?] +?? ?! !? 7 PP (] [) +* [?] [?] [?] +% +~ [?] [?] [?] +'''' [?] [?] [?] @@ -8156,12 +8157,8 @@ PP [?] [?] [?] -[?] -[?] -[?] -[?] -[?] -[?] + + [?] [?] [?] @@ -8178,7 +8175,7 @@ PP 0 - +i 4 @@ -8209,19 +8206,19 @@ n ( ) [?] +a +e +o +x [?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] +h +k +l +m +n +p +s +t [?] [?] [?] @@ -8237,26 +8234,26 @@ Rs W NS D -EU +EUR K T Dr -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] +Pf +P +G +A +UAH +C| +L +Sm +T +Rs +L +M +m +R +l +BTC [?] [?] [?] @@ -8294,6 +8291,7 @@ Dr [?] + [?] [?] [?] @@ -8319,63 +8317,67 @@ Dr [?] [?] [?] -[?] - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + a/c + a/s +C + c/o + c/u +g +H +H +H +h +I +I +L +l +N +No. +P +Q +R +R +R +(sm) +TEL +(tm) +Z +Z +K +A +B +C +e +e +E +F +F +M +o +i +FAX @@ -8385,25 +8387,20 @@ Dr [?] [?] [?] +D +d +e +i +j [?] [?] [?] [?] +F [?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] + 1/7 + 1/9 + 1/10 1/3 2/3 1/5 @@ -8458,7 +8455,7 @@ D) [?] [?] [?] -[?] + 0/3 [?] [?] [?] @@ -8595,8 +8592,12 @@ V [?] [?] [?] +- [?] [?] +/ +\ +* [?] [?] [?] @@ -8608,6 +8609,7 @@ V [?] [?] [?] +| [?] [?] [?] @@ -8626,11 +8628,13 @@ V [?] [?] [?] +: [?] [?] [?] [?] [?] +~ [?] [?] [?] @@ -8670,17 +8674,10 @@ V [?] [?] [?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] +<= +>= +<= +>= [?] [?] [?] @@ -8836,6 +8833,7 @@ V [?] [?] [?] +^ [?] [?] [?] @@ -8873,9 +8871,8 @@ V [?] [?] [?] -[?] -[?] -[?] +< +> [?] [?] [?] @@ -9185,166 +9182,166 @@ V [?] [?] [?] - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] - +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +(1) +(2) +(3) +(4) +(5) +(6) +(7) +(8) +(9) +(10) +(11) +(12) +(13) +(14) +(15) +(16) +(17) +(18) +(19) +(20) +1. +2. +3. +4. +5. +6. +7. +8. +9. +10. +11. +12. +13. +14. +15. +16. +17. +18. +19. +20. +(a) +(b) +(c) +(d) +(e) +(f) +(g) +(h) +(i) +(j) +(k) +(l) +(m) +(n) +(o) +(p) +(q) +(r) +(s) +(t) +(u) +(v) +(w) +(x) +(y) +(z) +A +B +C +D +E +F +G +H +I +J +K +L +M +N +O +P +Q +R +S +T +U +V +W +X +Y +Z +a +b +c +d +e +f +g +h +i +j +k +l +m +n +o +p +q +r +s +t +u +v +w +x +y +z +0 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +0 - - | @@ -9712,7 +9709,7 @@ O - +# [?] @@ -9906,6 +9903,7 @@ O +* @@ -9944,8 +9942,7 @@ O - - +| @@ -9955,7 +9952,7 @@ O [?] [?] - +! @@ -10087,10 +10084,10 @@ O [?] [?] [?] +[ [?] -[?] -[?] -[?] +< +> [?] [?] [?] @@ -10500,6 +10497,8 @@ y +{ +} @@ -10739,6 +10738,9 @@ y +::= +== +=== @@ -11228,27 +11230,22 @@ y +L +l +L +P +R +a +t +H +h +K +k +Z +z - - - - - - - - - - - - - - - - - - - - +M +A @@ -12754,21 +12751,21 @@ H [?] [?] [?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 (g) (n) (d) @@ -12850,21 +12847,21 @@ KIS (Zi) (Xie) (Ye) -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 1M 2M 3M @@ -12877,10 +12874,10 @@ KIS 10M 11M 12M -[?] -[?] -[?] -[?] +Hg +erg +eV +LTD a i u @@ -13042,16 +13039,16 @@ watt 22h 23h 24h -HPA +hPa da AU bar oV pc -[?] -[?] -[?] -[?] +dm +dm^2 +dm^3 +IU Heisei Syouwa Taisyou @@ -13092,7 +13089,7 @@ mm^2 cm^2 m^2 km^2 -mm^4 +mm^3 cm^3 m^3 km^3 @@ -13184,7 +13181,7 @@ Wb 29d 30d 31d - +gal @@ -19841,7 +19838,7 @@ Wb [?] [?] -[?] +Yi Ding Kao Qi diff --git a/lib/pure/unidecode/unidecode.nim b/lib/pure/unidecode/unidecode.nim index 9d8843f06..e0b8d3946 100644 --- a/lib/pure/unidecode/unidecode.nim +++ b/lib/pure/unidecode/unidecode.nim @@ -22,14 +22,14 @@ ## strictly one-way transformation. However a human reader will probably ## still be able to guess what original string was meant from the context. ## -## This module needs the data file "unidecode.dat" to work: You can either -## ship this file with your application and initialize this module with the -## `loadUnidecodeTable` proc or you can define the ``embedUnidecodeTable`` -## symbol to embed the file as a resource into your application. +## This module needs the data file "unidecode.dat" to work: This file is +## embedded as a resource into your application by default. But you an also +## define the symbol ``--define:noUnidecodeTable`` during compile time and +## use the `loadUnidecodeTable` proc to initialize this module. import unicode -when defined(embedUnidecodeTable): +when not defined(noUnidecodeTable): import strutils const translationTable = splitLines(slurp"unidecode/unidecode.dat") @@ -38,11 +38,11 @@ else: var translationTable: seq[string] proc loadUnidecodeTable*(datafile = "unidecode.dat") = - ## loads the datafile that `unidecode` to work. Unless this module is - ## compiled with the ``embedUnidecodeTable`` symbol defined, this needs - ## to be called by the main thread before any thread can make a call - ## to `unidecode`. - when not defined(embedUnidecodeTable): + ## loads the datafile that `unidecode` to work. This is only required if + ## the module was compiled with the ``--define:noUnidecodeTable`` switch. + ## This needs to be called by the main thread before any thread can make a + ## call to `unidecode`. + when defined(noUnidecodeTable): newSeq(translationTable, 0xffff) var i = 0 for line in lines(datafile): @@ -61,7 +61,6 @@ proc unidecode*(s: string): string = ## ## Results in: "Bei Jing" ## - assert(not isNil(translationTable)) result = "" for r in runes(s): var c = int(r) @@ -69,6 +68,6 @@ proc unidecode*(s: string): string = elif c <% translationTable.len: add(result, translationTable[c-128]) when isMainModule: - loadUnidecodeTable("lib/pure/unidecode/unidecode.dat") - assert unidecode("Äußerst") == "Ausserst" - + #loadUnidecodeTable("lib/pure/unidecode/unidecode.dat") + doAssert unidecode("Äußerst") == "Ausserst" + doAssert unidecode("北京") == "Bei Jing " |