diff options
Diffstat (limited to 'lib/std/wordwrap.nim')
-rw-r--r-- | lib/std/wordwrap.nim | 90 |
1 files changed, 90 insertions, 0 deletions
diff --git a/lib/std/wordwrap.nim b/lib/std/wordwrap.nim new file mode 100644 index 000000000..c7898b339 --- /dev/null +++ b/lib/std/wordwrap.nim @@ -0,0 +1,90 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2018 Nim contributors +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module contains an algorithm to wordwrap a Unicode string. + +import strutils, unicode + +proc olen(s: string): int = + var i = 0 + result = 0 + while i < s.len: + inc result + let L = graphemeLen(s, i) + inc i, L + +proc wrapWords*(s: string, maxLineWidth = 80, + splitLongWords = true, + seps: set[char] = Whitespace, + newLine = "\n"): string {.noSideEffect.} = + ## Word wraps `s`. + result = newStringOfCap(s.len + s.len shr 6) + var spaceLeft = maxLineWidth + var lastSep = "" + for word, isSep in tokenize(s, seps): + let wlen = olen(word) + if isSep: + lastSep = word + spaceLeft = spaceLeft - wlen + elif wlen > spaceLeft: + if splitLongWords and wlen > maxLineWidth: + var i = 0 + while i < word.len: + if spaceLeft <= 0: + spaceLeft = maxLineWidth + result.add newLine + dec spaceLeft + let L = graphemeLen(word, i) + for j in 0 ..< L: result.add word[i+j] + inc i, L + else: + spaceLeft = maxLineWidth - wlen + result.add(newLine) + result.add(word) + else: + spaceLeft = spaceLeft - wlen + result.add(lastSep) + result.add(word) + lastSep.setLen(0) + +when isMainModule: + + when true: + let + inp = """ this is a long text -- muchlongerthan10chars and here + it goes""" + outp = " this is a\nlong text\n--\nmuchlongerthan10chars\nand here\nit goes" + doAssert wrapWords(inp, 10, false) == outp + + let + longInp = """ThisIsOneVeryLongStringWhichWeWillSplitIntoEightSeparatePartsNow""" + longOutp = "ThisIsOn\neVeryLon\ngStringW\nhichWeWi\nllSplitI\nntoEight\nSeparate\nPartsNow" + doAssert wrapWords(longInp, 8, true) == longOutp + + # test we don't break Umlauts into invalid bytes: + let fies = "äöüöäöüöäöüöäöüööäöüöäößßßßüöäößßßßßß" + let fiesRes = "ä\nö\nü\nö\nä\nö\nü\nö\nä\nö\nü\nö\nä\nö\nü\nö\nö\nä\nö\nü\nö\nä\nö\nß\nß\nß\nß\nü\nö\nä\nö\nß\nß\nß\nß\nß\nß" + doAssert wrapWords(fies, 1, true) == fiesRes + + let longlongword = """abc uitdaeröägfßhydüäpydqfü,träpydqgpmüdträpydföägpydörztdüöäfguiaeowäzjdtrüöäp psnrtuiydrözenrüöäpyfdqazpesnrtulocjtüö +äzydgyqgfqfgprtnwjlcydkqgfüöezmäzydydqüüöäpdtrnvwfhgckdumböäpydfgtdgfhtdrntdrntydfogiayqfguiatrnydrntüöärtniaoeydfgaoeiqfglwcßqfgxvlcwgtfhiaoen +rsüöäapmböäptdrniaoydfglckqfhouenrtsüöäptrniaoeyqfgulocfqclgwxßqflgcwßqfxglcwrniatrnmüböäpmöäbpümöäbpüöämpbaoestnriaesnrtdiaesrtdniaesdrtnaetdr +iaoenvlcyfglwckßqfgvwkßqgfvlwkßqfgvlwckßqvlwkgfUIαοιαοιαχολωχσωχνωκψρχκψρτιεαοσηζϵηζιοεννκεωνιαλωσωκνκψρκγτφγτχκγτεκργτιχνκιωχσιλωσλωχξλξλξωχωχ +ξχλωωχαοεοιαεοαεοιαεοαεοιαοεσναοεκνρκψγκψφϵιηαααοε""" + let longlongwordRes = """ +abc uitdaeröägfßhydüäpydqfü,träpydqgpmüdträpydföägpydörztdüöäfguiaeowäzjdtrüöäp +psnrtuiydrözenrüöäpyfdqazpesnrtulocjtüöäzydgyqgfqfgprtnwjlcydkqgfüöezmäzydydqüü +öäpdtrnvwfhgckdumböäpydfgtdgfhtdrntdrntydfogiayqfguiatrnydrntüöärtniaoeydfgaoeiq +fglwcßqfgxvlcwgtfhiaoenrsüöäapmböäptdrniaoydfglckqfhouenrtsüöäptrniaoeyqfgulocf +qclgwxßqflgcwßqfxglcwrniatrnmüböäpmöäbpümöäbpüöämpbaoestnriaesnrtdiaesrtdniaesdr +tnaetdriaoenvlcyfglwckßqfgvwkßqgfvlwkßqfgvlwckßqvlwkgfUIαοιαοιαχολωχσωχνωκψρχκψ +ρτιεαοσηζϵηζιοεννκεωνιαλωσωκνκψρκγτφγτχκγτεκργτιχνκιωχσιλωσλωχξλξλξωχωχ +ξχλωωχαοεοιαεοαεοιαεοαεοιαοεσναοεκνρκψγκψφϵιηαααοε""" + doAssert wrapWords(longlongword) == longlongwordRes + |