lib/pure/strmisc.nim


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83

#
#
#            Nim's Runtime Library
#        (c) Copyright 2016 Joey Payne
#
#    See the file "copying.txt", included in this
#    distribution, for details about the copyright.
#

## This module contains various string utility routines that are uncommonly
## used in comparison to `strutils <strutils.html>`_.

import strutils

{.deadCodeElim: on.}

proc expandTabs*(s: string, tabSize: int = 8): string {.noSideEffect,
  procvar.} =
  ## Expand tab characters in `s` by `tabSize` spaces

  result = newStringOfCap(s.len + s.len shr 2)
  var pos = 0

  template addSpaces(n) =
    for j in 0 ..< n:
      result.add(' ')
      pos += 1

  for i in 0 ..< len(s):
    let c = s[i]
    if c == '\t':
      let
        denominator = if tabSize > 0: tabSize else: 1
        numSpaces = tabSize - pos mod denominator

      addSpaces(numSpaces)
    else:
      result.add(c)
      pos += 1
    if c == '\l':
      pos = 0

proc partition*(s: string, sep: string,
                right: bool = false): (string, string, string)
                {.noSideEffect, procvar.} =
  ## Split the string at the first or last occurrence of `sep` into a 3-tuple
  ##
  ## Returns a 3 string tuple of (beforeSep, `sep`, afterSep) or
  ## (`s`, "", "") if `sep` is not found and `right` is false or
  ## ("", "", `s`) if `sep` is not found and `right` is true
  let position = if right: s.rfind(sep) else: s.find(sep)
  if position != -1:
    return (s[0 ..< position], sep, s[position + sep.len ..< s.len])
  return if right: ("", "", s) else: (s, "", "")

proc rpartition*(s: pre { line-height: 125%; }
td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
.highlight .hll { background-color: #ffffcc }
.highlight .c { color: #888888 } /* Comment */
.highlight .err { color: #a61717; background-color: #e3d2d2 } /* Error */
.highlight .k { color: #008800; font-weight: bold } /* Keyword */
.highlight .ch { color: #888888 } /* Comment.Hashbang */
.highlight .cm { color: #888888 } /* Comment.Multiline */
.highlight .cp { color: #cc0000; font-weight: bold } /* Comment.Preproc */
.highlight .cpf { color: #888888 } /* Comment.PreprocFile */
.highlight .c1 { color: #888888 } /* Comment.Single */
.highlight .cs { color: #cc0000; font-weight: bold; background-color: #fff0f0 } /* Comment.Special */
.highlight .gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */
.highlight .ge { font-style: italic } /* Generic.Emph */
.highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */
.highlight .gr { color: #aa0000 } /* Generic.Error */
.highlight .gh { color: #333333 } /* Generic.Heading */
.highlight .gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */
.highlight .go { color: #888888 } /* Generic.Output */
.highlight .gp { color: #555555 } /* Generic.Prompt */
.highlight .gs { font-weight: bold } /* Generic.Strong */
.highlight .gu { color: #666666 } /* Generic.Subheading */
.highlight .gt { color: #aa0000 } /* Generic.Traceback */
.highlight .kc { color: #008800; font-weight: bold } /* Keyword.Constant */
.highlight .kd { color: #008800; font-weight: bold } /* Keyword.Declaration */
.highlight .kn { color: #008800; font-weight: bold } /* Keyword.Namespace */
.highlight .kp { color: #008800 } /* Keyword.Pseudo */
.highlight .kr { color: #008800; font-weight: bold } /* Keyword.Reserved */
.highlight .kt { color: #888888; font-weight: bold } /* Keyword.Type */
.highlight .m { color: #0000DD; font-weight: bold } /* Literal.Number */
.highlight .s { color: #dd2200; background-color: #fff0f0 } /* Literal.String */
.highlight .na { color: #336699 } /* Name.Attribute */
.highlight .nb { color: #003388 } /* Name.Builtin */
.highlight .nc { color: #bb0066; font-weight: bold } /* Name.Class */
.highlight .no { color: #003366; font-weight: bold } /* Name.Constant */
.highlight .nd { color: #555555 } /* Name.Decorator */
.highlight .ne { color: #bb0066; font-weight: bold } /* Name.Exception */
.highlight .nf { color: #0066bb; font-weight: bold } /* Name.Function */
.highlight .nl { color: #336699; font-style: italic } /* Name.Label */
.highlight .nn { color: #bb0066; font-weight: bold } /* Name.Namespace */
.highlight .py { color: #336699; font-weight: bold } /* Name.Property */
.highlight .nt { color: #bb0066; font-weight: bold } /* Name.Tag */
.highlight .nv { color: #336699 } /* Name.Variable */
.highlight .ow { color: #008800 } /* Operator.Word */
.highlight .w { color: #bbbbbb } /* Text.Whitespace */
.highlight .mb { color: #0000DD; font-weight: bold } /* Literal.Number.Bin */
.highlight .mf { color: #0000DD; font-weight: bold } /* Literal.Number.Float */
.highlight .mh { color: #0000DD; font-weight: bold } /* Literal.Number.Hex */
.highlight .mi { color: #0000DD; font-weight: bold } /* Literal.Number.Integer */
.highlight .mo { color: #0000DD; font-weight: bold } /* Literal.Number.Oct */
.highlight .sa { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Affix */
.highlight .sb { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Backtick */
.highlight .sc { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Char */
.highlight .dl { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Delimiter */
.highlight .sd { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Doc */
.highlight .s2 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Double */
.highlight .se { color: #0044dd; background-color: #fff0f0 } /* Literal.String.Escape */
.highlight .sh { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Heredoc */
.highlight .si { color: #3333bb; background-color: #fff0f0 } /* Literal.String.Interpol */
.highlight .sx { color: #22bb22; background-color: #f0fff0 } /* Literal.String.Other */
.highlight .sr { color: #008800; background-color: #fff0ff } /* Literal.String.Regex */
.highlight .s1 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Single */
.highlight .ss { color: #aa6600; background-color: #fff0f0 } /* Literal.String.Symbol */
.highlight .bp { color: #003388 } /* Name.Builtin.Pseudo */
.highlight .fm { color: #0066bb; font-weight: bold } /* Name.Function.Magic */
.highlight .vc { color: #336699 } /* Name.Variable.Class */
.highlight .vg { color: #dd7700 } /* Name.Variable.Global */
.highlight .vi { color: #3333bb } /* Name.Variable.Instance */
.highlight .vm { color: #336699 } /* Name.Variable.Magic */
.highlight .il { color: #0000DD; font-weight: bold } /* Literal.Number.Integer.Long */# -*- encoding: utf8 -*-
# Copyright (C) 2009, 2010, 2011  Roman Zimbelmann <romanz@lavabit.com>
# This software is distributed under the terms of the GNU GPL version 3.

import sys
from unicodedata import east_asian_width

PY3 = sys.version > '3'
ASCIIONLY = set(chr(c) for c in range(1, 128))
NARROW = 1
WIDE = 2
WIDE_SYMBOLS = set('WF')

def uwid(string):
    """Return the width of a string"""
    if not PY3:
        string = string.decode('utf-8', 'ignore')
    return sum(utf_char_width(c) for c in string)


def utf_char_width(string):
    """Return the width of a single character"""
    if east_asian_width(string) in WIDE_SYMBOLS:
        return WIDE
    return NARROW


def string_to_charlist(string):
    """Return a list of characters with extra empty strings after wide chars"""
    if not set(string) - ASCIIONLY:
        return list(string)
    result = []
    if PY3:
        for c in string:
            result.append(c)
            if east_asian_width(c) in WIDE_SYMBOLS:
                result.append('')
    else:
        string = string.decode('utf-8', 'ignore')
        for c in string:
            result.append(c.encode('utf-8'))
            if east_asian_width(c) in WIDE_SYMBOLS:
                result.append('')
    return result


class WideString(object):
    def __init__(self, string, chars=None):
        self.string = string
        if chars is None:
            self.chars = string_to_charlist(string)
        else:
            self.chars = chars

    def __add__(self, string):
        """
        >>> (WideString("a") + WideString("b")).string
        'ab'
        >>> (WideString("a") + WideString("b")).chars
        ['a', 'b']
        >>> (WideString("afd") + "bc").chars
        ['a', 'f', 'd', 'b', 'c']
        """
        if isinstance(string, str):
            return WideString(self.string + string)
        elif isinstance(string, WideString):
            return WideString(self.string + string.string,
                    self.chars + string.chars)

    def __radd__(self, string):
        """
        >>> ("bc" + WideString("afd")).chars
        ['b', 'c', 'a', 'f', 'd']
        """
        if isinstance(string, str):
            return WideString(string + self.string)
        elif isinstance(string, WideString):
            return WideString(string.string + self.string,
                    string.chars + self.chars)

    def __str__(self):
        return self.string

    def __repr__(self):
        return '<' + self.__class__.__name__ + " '" + self.string + "'>"

    def __getslice__(self, a, z):
        """
        >>> WideString("asdf")[1:3]
        <WideString 'sd'>
        >>> WideString("asdf")[1:-100]
        <WideString ''>
        >>> WideString("モヒカン")[2:4]
        <WideString 'ヒ'>
        >>> WideString("モヒカン")[2:5]
        <WideString 'ヒ '>
        >>> WideString("モabカン")[2:5]
        <WideString 'ab '>
        >>> WideString("モヒカン")[1:5]
        <WideString ' ヒ '>
        >>> WideString("モヒカン")[:]
        <WideString 'モヒカン'>
        >>> WideString("aモ")[0:3]
        <WideString 'aモ'>
        >>> WideString("aモ")[0:2]
        <WideString 'a '>
        >>> WideString("aモ")[0:1]
        <WideString 'a'>
        """
        if z is None or z > len(self.chars):
            z = len(self.chars)
        if z < 0:
            z = len(self.chars) + z
        if z < 0:
            return WideString("")
        if a is None or a < 0:
            a = 0
        if z < len(self.chars) and self.chars[z] == '':
            if self.chars[a] == '':
                return WideString(' ' + ''.join(self.chars[a:z - 1]) + ' ')
            return WideString(''.join(self.chars[a:z - 1]) + ' ')
        if self.chars[a] == '':
            return WideString(' ' + ''.join(self.chars[a:z - 1]))
        return WideString(''.join(self.chars[a:z]))

    def __getitem__(self, i):
        """
        >>> WideString("asdf")[2]
        <WideString 'd'>
        >>> WideString("……")[0]
        <WideString '…'>
        >>> WideString("……")[1]
        <WideString '…'>
        """
        if isinstance(i, slice):
            return self.__getslice__(i.start, i.stop)
        return self.__getslice__(i, i+1)

    def __len__(self):
        """
        >>> len(WideString("poo"))
        3
        >>> len(WideString("モヒカン"))
        8
        """
        return len(self.chars)


if __name__ == '__main__':
    import doctest
    doctest.testmod()