diff options
author | konsumlamm <44230978+konsumlamm@users.noreply.github.com> | 2021-02-09 22:47:07 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-02-09 22:47:07 +0100 |
commit | 00551f972e22351a55f7c9e84df6469687b674d9 (patch) | |
tree | f6c27a2fe395cda8483b1bc1542a2025045e63b3 /lib/pure/unidecode/unidecode.nim | |
parent | 635c0b6cb9c225a247ab2aeb136458c14fb711e8 (diff) | |
download | Nim-00551f972e22351a55f7c9e84df6469687b674d9.tar.gz |
Improve documentation for unidecode (#16986)
* Improve documentation for unidecode Minor changes to gen.py * Fix typo in gen.py
Diffstat (limited to 'lib/pure/unidecode/unidecode.nim')
-rw-r--r-- | lib/pure/unidecode/unidecode.nim | 46 |
1 files changed, 24 insertions, 22 deletions
diff --git a/lib/pure/unidecode/unidecode.nim b/lib/pure/unidecode/unidecode.nim index 9985b14b8..9affc53f6 100644 --- a/lib/pure/unidecode/unidecode.nim +++ b/lib/pure/unidecode/unidecode.nim @@ -7,30 +7,31 @@ # distribution, for details about the copyright. # -## This module is based on Python's Unidecode module by Tomaz Solc, -## which in turn is based on the ``Text::Unidecode`` Perl module by -## Sean M. Burke -## (http://search.cpan.org/~sburke/Text-Unidecode-0.04/lib/Text/Unidecode.pm ). +## This module is based on Python's [Unidecode](https://pypi.org/project/Unidecode/) +## module by Tomaz Solc, which in turn is based on the +## [Text::Unidecode](https://metacpan.org/pod/Text::Unidecode) +## Perl module by Sean M. Burke. ## -## It provides a single proc that does Unicode to ASCII transliterations: -## It finds the sequence of ASCII characters that is the closest approximation -## to the Unicode string. +## It provides a `unidecode proc <#unidecode,string>`_ that does +## Unicode to ASCII transliterations: It finds the sequence of ASCII characters +## that is the closest approximation to the Unicode string. ## ## For example, the closest to string "Äußerst" in ASCII is "Ausserst". Some ## information is lost in this transformation, of course, since several Unicode -## strings can be transformed in the same ASCII representation. So this is a -## strictly one-way transformation. However a human reader will probably -## still be able to guess what original string was meant from the context. +## strings can be transformed to the same ASCII representation. So this is a +## strictly one-way transformation. However, a human reader will probably +## still be able to guess from the context, what the original string was. ## -## This module needs the data file "unidecode.dat" to work: This file is -## embedded as a resource into your application by default. But you an also -## define the symbol ``--define:noUnidecodeTable`` during compile time and -## use the `loadUnidecodeTable` proc to initialize this module. +## This module needs the data file `unidecode.dat` to work: This file is +## embedded as a resource into your application by default. You can also +## define the symbol `--define:noUnidecodeTable` during compile time and +## use the `loadUnidecodeTable proc <#loadUnidecodeTable>`_ to initialize +## this module. -import unicode +import std/unicode when not defined(noUnidecodeTable): - import strutils + import std/strutils const translationTable = splitLines(slurp"unidecode/unidecode.dat") else: @@ -38,10 +39,10 @@ else: var translationTable: seq[string] proc loadUnidecodeTable*(datafile = "unidecode.dat") = - ## loads the datafile that `unidecode` to work. This is only required if - ## the module was compiled with the ``--define:noUnidecodeTable`` switch. - ## This needs to be called by the main thread before any thread can make a - ## call to `unidecode`. + ## Loads the datafile that `unidecode <#unidecode,string>`_ needs to work. + ## This is only required if the module was compiled with the + ## `--define:noUnidecodeTable` switch. This needs to be called by the + ## main thread before any thread can make a call to `unidecode`. when defined(noUnidecodeTable): newSeq(translationTable, 0xffff) var i = 0 @@ -53,10 +54,11 @@ proc unidecode*(s: string): string = ## Finds the sequence of ASCII characters that is the closest approximation ## to the UTF-8 string `s`. runnableExamples: - assert unidecode("北京") == "Bei Jing " + doAssert unidecode("北京") == "Bei Jing " + doAssert unidecode("Äußerst") == "Ausserst" result = "" for r in runes(s): var c = int(r) if c <=% 127: add(result, chr(c)) - elif c <% translationTable.len: add(result, translationTable[c-128]) + elif c <% translationTable.len: add(result, translationTable[c - 128]) |