summary refs log tree commit diff stats
path: root/lib/std
diff options
context:
space:
mode:
authorAraq <rumpf_a@web.de>2018-11-08 16:00:49 +0100
committerAndreas Rumpf <rumpf_a@web.de>2018-11-08 20:52:22 +0100
commit56f76c5b08b40fb333ed7878794934cd2b3b866f (patch)
tree99073cd91df48d4a3594480e1ab66856fbc421f9 /lib/std
parent4f787ac4f4d4bf16fe5cde032dfd909756ebc972 (diff)
downloadNim-56f76c5b08b40fb333ed7878794934cd2b3b866f.tar.gz
better implementation for wrapWords
Diffstat (limited to 'lib/std')
-rw-r--r--lib/std/wordwrap.nim143
1 files changed, 82 insertions, 61 deletions
diff --git a/lib/std/wordwrap.nim b/lib/std/wordwrap.nim
index 85cde6f0d..ac44b28dd 100644
--- a/lib/std/wordwrap.nim
+++ b/lib/std/wordwrap.nim
@@ -1,67 +1,88 @@
-import unicode
-
-proc wordWrap*(s: string, maxLineWidth = 80,
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2018 Nim contributors
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+import strutils, unicode
+
+proc olen(s: string): int =
+  var i = 0
+  result = 0
+  while i < s.len:
+    inc result
+    let L = graphemeLen(s, i)
+    inc i, L
+
+proc wrapWords*(s: string, maxLineWidth = 80,
                splitLongWords = true,
-               newLine = "\n"): string  =
-  ## This function breaks all words that reach over `maxLineWidth`
-  ## measured in number of runes. When `splitLongWords` is `true`
-  ## words that are longer than `maxLineWidth` are splitted. Multiple
-  ## spaces and newlines are converted to a single space.  All
-  ## whitespace is treated equally.  Non-breaking whitespace is
-  ## ignored.
-
-  var currentWordLength: int = 0
-  var currentWord: string = newStringOfCap(32)
-  var currentLineLength: int = 0
-  var currentWordLengthAtLineEnd: int = -1
-  var longWordMode = false
-
-  template handleWhitespace(): untyped =
-    if currentWord.len > 0:
-
-      if currentLineLength + 1 + currentWordLength > maxLineWidth:
-        result.add newLine
-        currentLineLength = 0
-
-      if currentLineLength > 0:
-        result.add ' '
-        currentLineLength += 1
-
-      result.add currentWord
-      currentLineLength += currentWordLength
-
-      currentWord.setlen 0
-      currentWordLength = 0
-
-  for rune in s.runes:
-    if rune.isWhiteSpace:
-      handleWhitespace()
+               seps: set[char] = Whitespace,
+               newLine = "\n"): string {.noSideEffect.} =
+  ## Word wraps `s`.
+  result = newStringOfCap(s.len + s.len shr 6)
+  var spaceLeft = maxLineWidth
+  var lastSep = ""
+  for word, isSep in tokenize(s, seps):
+    let wlen = olen(word)
+    if isSep:
+      lastSep = word
+      spaceLeft = spaceLeft - wlen
+    elif wlen > spaceLeft:
+      if splitLongWords and wlen > maxLineWidth:
+        var i = 0
+        while i < word.len:
+          if spaceLeft <= 0:
+            spaceLeft = maxLineWidth
+            result.add newLine
+          dec spaceLeft
+          let L = graphemeLen(word, i)
+          for j in 0 ..< L: result.add word[i+j]
+          inc i, L
+      else:
+        spaceLeft = maxLineWidth - wlen
+        result.add(newLine)
+        result.add(word)
     else:
-      if splitLongWords and currentWordLength >= maxLineWidth:
-        handleWhitespace()
-
-      currentWord.add rune
-      inc currentWordLength
-
-  handleWhitespace()
-
+      spaceLeft = spaceLeft - wlen
+      result.add(lastSep)
+      result.add(word)
+      lastSep.setLen(0)
 
 when isMainModule:
-  import strutils
-
-
-  proc checkLineLength(arg: string): void =
-    for line in splitlines(arg):
-      var numRunes = 0
-      for rune in runes(line):
-        numRunes += 1
-
-      assert numRunes <= 80
-
-  let longlongword = "abc uitdaeröägfßhydüäpydqfü,träpydqgpmüdträpydföägpydörztdüöäfguiaeowäzjdtrüöäp psnrtuiydrözenrüöäpyfdqazpesnrtulocjtüöäzydgyqgfqfgprtnwjlcydkqgfüöezmäzydydqüüöäpdtrnvwfhgckdumböäpydfgtdgfhtdrntdrntydfogiayqfguiatrnydrntüöärtniaoeydfgaoeiqfglwcßqfgxvlcwgtfhiaoenrsüöäapmböäptdrniaoydfglckqfhouenrtsüöäptrniaoeyqfgulocfqclgwxßqflgcwßqfxglcwrniatrnmüböäpmöäbpümöäbpüöämpbaoestnriaesnrtdiaesrtdniaesdrtnaetdriaoenvlcyfglwckßqfgvwkßqgfvlwkßqfgvlwckßqvlwkgfUIαοιαοιαχολωχσωχνωκψρχκψρτιεαοσηζϵηζιοεννκεωνιαλωσωκνκψρκγτφγτχκγτεκργτιχνκιωχσιλωσλωχξλξλξωχωχξχλωωχαοεοιαεοαεοιαεοαεοιαοεσναοεκνρκψγκψφϵιηαααοε"
-
-  checkLineLength(longlongword.wordWrap)
 
-  let tmp ="Наши исследования позволяют сделать вывод о том, что субъект выбирает xxxuiaetudtiraeüöätpghiacodöeronfdquiahgoüöädoiaqofhgiaeotrnuiaßqzfgiaoeurnudtitraenuitenruitarenitarenuitarentduiranetduiranetdruianetrnuiaertnuiatdenruiatdrne институциональный психоз. Важность этой функции подчеркивается тем фактом, что объект вызывает эгоцентризм. Самоактуализация аннигилирует генезис. Анима аннигилирует возрастной код. Закон просветляет аутотренинг. Наши исследования позволяют сделать вывод о том, что воспитание заметно осознаёт инсайт."
+  when true:
+    let
+      inp = """ this is a long text --  muchlongerthan10chars and here
+                 it goes"""
+      outp = " this is a\nlong text\n--\nmuchlongerthan10chars\nand here\nit goes"
+    doAssert wrapWords(inp, 10, false) == outp
+
+    let
+      longInp = """ThisIsOneVeryLongStringWhichWeWillSplitIntoEightSeparatePartsNow"""
+      longOutp = "ThisIsOn\neVeryLon\ngStringW\nhichWeWi\nllSplitI\nntoEight\nSeparate\nPartsNow"
+    doAssert wrapWords(longInp, 8, true) == longOutp
+
+  # test we don't break Umlauts into invalid bytes:
+  let fies = "äöüöäöüöäöüöäöüööäöüöäößßßßüöäößßßßßß"
+  let fiesRes = "ä\nö\nü\nö\nä\nö\nü\nö\nä\nö\nü\nö\nä\nö\nü\nö\nö\nä\nö\nü\nö\nä\nö\nß\nß\nß\nß\nü\nö\nä\nö\nß\nß\nß\nß\nß\nß"
+  doAssert wrapWords(fies, 1, true) == fiesRes
+
+  let longlongword = """abc uitdaeröägfßhydüäpydqfü,träpydqgpmüdträpydföägpydörztdüöäfguiaeowäzjdtrüöäp psnrtuiydrözenrüöäpyfdqazpesnrtulocjtüö
+äzydgyqgfqfgprtnwjlcydkqgfüöezmäzydydqüüöäpdtrnvwfhgckdumböäpydfgtdgfhtdrntdrntydfogiayqfguiatrnydrntüöärtniaoeydfgaoeiqfglwcßqfgxvlcwgtfhiaoen
+rsüöäapmböäptdrniaoydfglckqfhouenrtsüöäptrniaoeyqfgulocfqclgwxßqflgcwßqfxglcwrniatrnmüböäpmöäbpümöäbpüöämpbaoestnriaesnrtdiaesrtdniaesdrtnaetdr
+iaoenvlcyfglwckßqfgvwkßqgfvlwkßqfgvlwckßqvlwkgfUIαοιαοιαχολωχσωχνωκψρχκψρτιεαοσηζϵηζιοεννκεωνιαλωσωκνκψρκγτφγτχκγτεκργτιχνκιωχσιλωσλωχξλξλξωχωχ
+ξχλωωχαοεοιαεοαεοιαεοαεοιαοεσναοεκνρκψγκψφϵιηαααοε"""
+  let longlongwordRes = """
+abc uitdaeröägfßhydüäpydqfü,träpydqgpmüdträpydföägpydörztdüöäfguiaeowäzjdtrüöäp
+psnrtuiydrözenrüöäpyfdqazpesnrtulocjtüöäzydgyqgfqfgprtnwjlcydkqgfüöezmäzydydqüü
+öäpdtrnvwfhgckdumböäpydfgtdgfhtdrntdrntydfogiayqfguiatrnydrntüöärtniaoeydfgaoeiq
+fglwcßqfgxvlcwgtfhiaoenrsüöäapmböäptdrniaoydfglckqfhouenrtsüöäptrniaoeyqfgulocf
+qclgwxßqflgcwßqfxglcwrniatrnmüböäpmöäbpümöäbpüöämpbaoestnriaesnrtdiaesrtdniaesdr
+tnaetdriaoenvlcyfglwckßqfgvwkßqgfvlwkßqfgvlwckßqvlwkgfUIαοιαοιαχολωχσωχνωκψρχκψ
+ρτιεαοσηζϵηζιοεννκεωνιαλωσωκνκψρκγτφγτχκγτεκργτιχνκιωχσιλωσλωχξλξλξωχωχ
+ξχλωωχαοεοιαεοαεοιαεοαεοιαοεσναοεκνρκψγκψφϵιηαααοε"""
+  doAssert wrapWords(longlongword) == longlongwordRes
 
-  checkLineLength(tmp.wordWrap)