diff options
Diffstat (limited to 'lib/std')
-rw-r--r-- | lib/std/wordwrap.nim | 143 |
1 files changed, 82 insertions, 61 deletions
diff --git a/lib/std/wordwrap.nim b/lib/std/wordwrap.nim index 85cde6f0d..ac44b28dd 100644 --- a/lib/std/wordwrap.nim +++ b/lib/std/wordwrap.nim @@ -1,67 +1,88 @@ -import unicode - -proc wordWrap*(s: string, maxLineWidth = 80, +# +# +# Nim's Runtime Library +# (c) Copyright 2018 Nim contributors +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +import strutils, unicode + +proc olen(s: string): int = + var i = 0 + result = 0 + while i < s.len: + inc result + let L = graphemeLen(s, i) + inc i, L + +proc wrapWords*(s: string, maxLineWidth = 80, splitLongWords = true, - newLine = "\n"): string = - ## This function breaks all words that reach over `maxLineWidth` - ## measured in number of runes. When `splitLongWords` is `true` - ## words that are longer than `maxLineWidth` are splitted. Multiple - ## spaces and newlines are converted to a single space. All - ## whitespace is treated equally. Non-breaking whitespace is - ## ignored. - - var currentWordLength: int = 0 - var currentWord: string = newStringOfCap(32) - var currentLineLength: int = 0 - var currentWordLengthAtLineEnd: int = -1 - var longWordMode = false - - template handleWhitespace(): untyped = - if currentWord.len > 0: - - if currentLineLength + 1 + currentWordLength > maxLineWidth: - result.add newLine - currentLineLength = 0 - - if currentLineLength > 0: - result.add ' ' - currentLineLength += 1 - - result.add currentWord - currentLineLength += currentWordLength - - currentWord.setlen 0 - currentWordLength = 0 - - for rune in s.runes: - if rune.isWhiteSpace: - handleWhitespace() + seps: set[char] = Whitespace, + newLine = "\n"): string {.noSideEffect.} = + ## Word wraps `s`. + result = newStringOfCap(s.len + s.len shr 6) + var spaceLeft = maxLineWidth + var lastSep = "" + for word, isSep in tokenize(s, seps): + let wlen = olen(word) + if isSep: + lastSep = word + spaceLeft = spaceLeft - wlen + elif wlen > spaceLeft: + if splitLongWords and wlen > maxLineWidth: + var i = 0 + while i < word.len: + if spaceLeft <= 0: + spaceLeft = maxLineWidth + result.add newLine + dec spaceLeft + let L = graphemeLen(word, i) + for j in 0 ..< L: result.add word[i+j] + inc i, L + else: + spaceLeft = maxLineWidth - wlen + result.add(newLine) + result.add(word) else: - if splitLongWords and currentWordLength >= maxLineWidth: - handleWhitespace() - - currentWord.add rune - inc currentWordLength - - handleWhitespace() - + spaceLeft = spaceLeft - wlen + result.add(lastSep) + result.add(word) + lastSep.setLen(0) when isMainModule: - import strutils - - - proc checkLineLength(arg: string): void = - for line in splitlines(arg): - var numRunes = 0 - for rune in runes(line): - numRunes += 1 - - assert numRunes <= 80 - - let longlongword = "abc uitdaeröägfßhydüäpydqfü,träpydqgpmüdträpydföägpydörztdüöäfguiaeowäzjdtrüöäp psnrtuiydrözenrüöäpyfdqazpesnrtulocjtüöäzydgyqgfqfgprtnwjlcydkqgfüöezmäzydydqüüöäpdtrnvwfhgckdumböäpydfgtdgfhtdrntdrntydfogiayqfguiatrnydrntüöärtniaoeydfgaoeiqfglwcßqfgxvlcwgtfhiaoenrsüöäapmböäptdrniaoydfglckqfhouenrtsüöäptrniaoeyqfgulocfqclgwxßqflgcwßqfxglcwrniatrnmüböäpmöäbpümöäbpüöämpbaoestnriaesnrtdiaesrtdniaesdrtnaetdriaoenvlcyfglwckßqfgvwkßqgfvlwkßqfgvlwckßqvlwkgfUIαοιαοιαχολωχσωχνωκψρχκψρτιεαοσηζϵηζιοεννκεωνιαλωσωκνκψρκγτφγτχκγτεκργτιχνκιωχσιλωσλωχξλξλξωχωχξχλωωχαοεοιαεοαεοιαεοαεοιαοεσναοεκνρκψγκψφϵιηαααοε" - - checkLineLength(longlongword.wordWrap) - let tmp ="Наши исследования позволяют сделать вывод о том, что субъект выбирает xxxuiaetudtiraeüöätpghiacodöeronfdquiahgoüöädoiaqofhgiaeotrnuiaßqzfgiaoeurnudtitraenuitenruitarenitarenuitarentduiranetduiranetdruianetrnuiaertnuiatdenruiatdrne институциональный психоз. Важность этой функции подчеркивается тем фактом, что объект вызывает эгоцентризм. Самоактуализация аннигилирует генезис. Анима аннигилирует возрастной код. Закон просветляет аутотренинг. Наши исследования позволяют сделать вывод о том, что воспитание заметно осознаёт инсайт." + when true: + let + inp = """ this is a long text -- muchlongerthan10chars and here + it goes""" + outp = " this is a\nlong text\n--\nmuchlongerthan10chars\nand here\nit goes" + doAssert wrapWords(inp, 10, false) == outp + + let + longInp = """ThisIsOneVeryLongStringWhichWeWillSplitIntoEightSeparatePartsNow""" + longOutp = "ThisIsOn\neVeryLon\ngStringW\nhichWeWi\nllSplitI\nntoEight\nSeparate\nPartsNow" + doAssert wrapWords(longInp, 8, true) == longOutp + + # test we don't break Umlauts into invalid bytes: + let fies = "äöüöäöüöäöüöäöüööäöüöäößßßßüöäößßßßßß" + let fiesRes = "ä\nö\nü\nö\nä\nö\nü\nö\nä\nö\nü\nö\nä\nö\nü\nö\nö\nä\nö\nü\nö\nä\nö\nß\nß\nß\nß\nü\nö\nä\nö\nß\nß\nß\nß\nß\nß" + doAssert wrapWords(fies, 1, true) == fiesRes + + let longlongword = """abc uitdaeröägfßhydüäpydqfü,träpydqgpmüdträpydföägpydörztdüöäfguiaeowäzjdtrüöäp psnrtuiydrözenrüöäpyfdqazpesnrtulocjtüö +äzydgyqgfqfgprtnwjlcydkqgfüöezmäzydydqüüöäpdtrnvwfhgckdumböäpydfgtdgfhtdrntdrntydfogiayqfguiatrnydrntüöärtniaoeydfgaoeiqfglwcßqfgxvlcwgtfhiaoen +rsüöäapmböäptdrniaoydfglckqfhouenrtsüöäptrniaoeyqfgulocfqclgwxßqflgcwßqfxglcwrniatrnmüböäpmöäbpümöäbpüöämpbaoestnriaesnrtdiaesrtdniaesdrtnaetdr +iaoenvlcyfglwckßqfgvwkßqgfvlwkßqfgvlwckßqvlwkgfUIαοιαοιαχολωχσωχνωκψρχκψρτιεαοσηζϵηζιοεννκεωνιαλωσωκνκψρκγτφγτχκγτεκργτιχνκιωχσιλωσλωχξλξλξωχωχ +ξχλωωχαοεοιαεοαεοιαεοαεοιαοεσναοεκνρκψγκψφϵιηαααοε""" + let longlongwordRes = """ +abc uitdaeröägfßhydüäpydqfü,träpydqgpmüdträpydföägpydörztdüöäfguiaeowäzjdtrüöäp +psnrtuiydrözenrüöäpyfdqazpesnrtulocjtüöäzydgyqgfqfgprtnwjlcydkqgfüöezmäzydydqüü +öäpdtrnvwfhgckdumböäpydfgtdgfhtdrntdrntydfogiayqfguiatrnydrntüöärtniaoeydfgaoeiq +fglwcßqfgxvlcwgtfhiaoenrsüöäapmböäptdrniaoydfglckqfhouenrtsüöäptrniaoeyqfgulocf +qclgwxßqflgcwßqfxglcwrniatrnmüböäpmöäbpümöäbpüöämpbaoestnriaesnrtdiaesrtdniaesdr +tnaetdriaoenvlcyfglwckßqfgvwkßqgfvlwkßqfgvlwckßqvlwkgfUIαοιαοιαχολωχσωχνωκψρχκψ +ρτιεαοσηζϵηζιοεννκεωνιαλωσωκνκψρκγτφγτχκγτεκργτιχνκιωχσιλωσλωχξλξλξωχωχ +ξχλωωχαοεοιαεοαεοιαεοαεοιαοεσναοεκνρκψγκψφϵιηαααοε""" + doAssert wrapWords(longlongword) == longlongwordRes - checkLineLength(tmp.wordWrap) |