diff options
Diffstat (limited to 'lib/std/wordwrap.nim')
-rw-r--r-- | lib/std/wordwrap.nim | 103 |
1 files changed, 41 insertions, 62 deletions
diff --git a/lib/std/wordwrap.nim b/lib/std/wordwrap.nim index 4b0dc4417..9333f880b 100644 --- a/lib/std/wordwrap.nim +++ b/lib/std/wordwrap.nim @@ -9,12 +9,12 @@ ## This module contains an algorithm to wordwrap a Unicode string. -import strutils, unicode +import std/[strutils, unicode] -proc olen(s: string): int = - var i = 0 +proc olen(s: string; start, lastExclusive: int): int = + var i = start result = 0 - while i < s.len: + while i < lastExclusive: inc result let L = graphemeLen(s, i) inc i, L @@ -32,64 +32,43 @@ proc wrapWords*(s: string, maxLineWidth = 80, result = newStringOfCap(s.len + s.len shr 6) var spaceLeft = maxLineWidth var lastSep = "" - for word, isSep in tokenize(s, seps): - let wlen = olen(word) + + var i = 0 + while true: + var j = i + let isSep = j < s.len and s[j] in seps + while j < s.len and (s[j] in seps) == isSep: inc(j) + if j <= i: break + #yield (substr(s, i, j-1), isSep) if isSep: - lastSep = word - spaceLeft = spaceLeft - wlen - elif wlen > spaceLeft: - if splitLongWords and wlen > maxLineWidth: - var i = 0 - while i < word.len: - if spaceLeft <= 0: - spaceLeft = maxLineWidth - result.add newLine - dec spaceLeft - let L = graphemeLen(word, i) - for j in 0 ..< L: result.add word[i+j] - inc i, L + lastSep.setLen 0 + for k in i..<j: + if s[k] notin {'\L', '\C'}: lastSep.add s[k] + if lastSep.len == 0: + lastSep.add ' ' + dec spaceLeft else: - spaceLeft = maxLineWidth - wlen - result.add(newLine) - result.add(word) + spaceLeft = spaceLeft - olen(lastSep, 0, lastSep.len) else: - spaceLeft = spaceLeft - wlen - result.add(lastSep) - result.add(word) - lastSep.setLen(0) - -when isMainModule: - - when true: - let - inp = """ this is a long text -- muchlongerthan10chars and here - it goes""" - outp = " this is a\nlong text\n--\nmuchlongerthan10chars\nand here\nit goes" - doAssert wrapWords(inp, 10, false) == outp - - let - longInp = """ThisIsOneVeryLongStringWhichWeWillSplitIntoEightSeparatePartsNow""" - longOutp = "ThisIsOn\neVeryLon\ngStringW\nhichWeWi\nllSplitI\nntoEight\nSeparate\nPartsNow" - doAssert wrapWords(longInp, 8, true) == longOutp - - # test we don't break Umlauts into invalid bytes: - let fies = "äöüöäöüöäöüöäöüööäöüöäößßßßüöäößßßßßß" - let fiesRes = "ä\nö\nü\nö\nä\nö\nü\nö\nä\nö\nü\nö\nä\nö\nü\nö\nö\nä\nö\nü\nö\nä\nö\nß\nß\nß\nß\nü\nö\nä\nö\nß\nß\nß\nß\nß\nß" - doAssert wrapWords(fies, 1, true) == fiesRes - - let longlongword = """abc uitdaeröägfßhydüäpydqfü,träpydqgpmüdträpydföägpydörztdüöäfguiaeowäzjdtrüöäp psnrtuiydrözenrüöäpyfdqazpesnrtulocjtüö -äzydgyqgfqfgprtnwjlcydkqgfüöezmäzydydqüüöäpdtrnvwfhgckdumböäpydfgtdgfhtdrntdrntydfogiayqfguiatrnydrntüöärtniaoeydfgaoeiqfglwcßqfgxvlcwgtfhiaoen -rsüöäapmböäptdrniaoydfglckqfhouenrtsüöäptrniaoeyqfgulocfqclgwxßqflgcwßqfxglcwrniatrnmüböäpmöäbpümöäbpüöämpbaoestnriaesnrtdiaesrtdniaesdrtnaetdr -iaoenvlcyfglwckßqfgvwkßqgfvlwkßqfgvlwckßqvlwkgfUIαοιαοιαχολωχσωχνωκψρχκψρτιεαοσηζϵηζιοεννκεωνιαλωσωκνκψρκγτφγτχκγτεκργτιχνκιωχσιλωσλωχξλξλξωχωχ -ξχλωωχαοεοιαεοαεοιαεοαεοιαοεσναοεκνρκψγκψφϵιηαααοε""" - let longlongwordRes = """ -abc uitdaeröägfßhydüäpydqfü,träpydqgpmüdträpydföägpydörztdüöäfguiaeowäzjdtrüöäp -psnrtuiydrözenrüöäpyfdqazpesnrtulocjtüöäzydgyqgfqfgprtnwjlcydkqgfüöezmäzydydqüü -öäpdtrnvwfhgckdumböäpydfgtdgfhtdrntdrntydfogiayqfguiatrnydrntüöärtniaoeydfgaoeiq -fglwcßqfgxvlcwgtfhiaoenrsüöäapmböäptdrniaoydfglckqfhouenrtsüöäptrniaoeyqfgulocf -qclgwxßqflgcwßqfxglcwrniatrnmüböäpmöäbpümöäbpüöämpbaoestnriaesnrtdiaesrtdniaesdr -tnaetdriaoenvlcyfglwckßqfgvwkßqgfvlwkßqfgvlwckßqvlwkgfUIαοιαοιαχολωχσωχνωκψρχκψ -ρτιεαοσηζϵηζιοεννκεωνιαλωσωκνκψρκγτφγτχκγτεκργτιχνκιωχσιλωσλωχξλξλξωχωχ -ξχλωωχαοεοιαεοαεοιαεοαεοιαοεσναοεκνρκψγκψφϵιηαααοε""" - doAssert wrapWords(longlongword) == longlongwordRes - + let wlen = olen(s, i, j) + if wlen > spaceLeft: + if splitLongWords and wlen > maxLineWidth: + var k = 0 + while k < j - i: + if spaceLeft <= 0: + spaceLeft = maxLineWidth + result.add newLine + dec spaceLeft + let L = graphemeLen(s, k+i) + for m in 0 ..< L: result.add s[i+k+m] + inc k, L + else: + spaceLeft = maxLineWidth - wlen + result.add(newLine) + for k in i..<j: result.add(s[k]) + else: + spaceLeft = spaceLeft - wlen + result.add(lastSep) + for k in i..<j: result.add(s[k]) + #lastSep.setLen(0) + i = j |