1 files changed, 141 insertions, 0 deletions
diff --git a/lib/packages/docutils/rstidx.nim b/lib/packages/docutils/rstidx.nim
new file mode 100644
index 000000000..1472d28fd
--- /dev/null
+++ b/lib/packages/docutils/rstidx.nim
@@ -0,0 +1,141 @@
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2022 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+
+## Nim `idx`:idx: file format related definitions.
+
+import std/[strutils, syncio, hashes]
+from std/os import splitFile
+
+type
+  IndexEntryKind* = enum ## discriminator tag
+    ieMarkupTitle = "markupTitle"
+                           ## RST/Markdown title, text in `keyword` +
+                           ## HTML text in `linkTitle`
+    ieNimTitle = "nimTitle"
+                           ## Nim title
+    ieHeading = "heading"  ## RST/Markdown markup heading, escaped
+    ieIdxRole = "idx"      ## RST :idx: definition, escaped
+    ieNim = "nim"          ## Nim symbol, unescaped
+    ieNimGroup = "nimgrp"  ## Nim overload group, unescaped
+  IndexEntry* = object
+    kind*: IndexEntryKind  ## 0.
+    keyword*: string       ## 1.
+    link*: string          ## 2.
+    linkTitle*: string     ## 3. contains a prettier text for the href
+    linkDesc*: string      ## 4. the title attribute of the final href
+    line*: int             ## 5.
+    module*: string        ## origin file, NOT a field in ``.idx`` file
+    aux*: string           ## auxuliary field, NOT a field in ``.idx`` file
+
+proc isDocumentationTitle*(hyperlink: string): bool =
+  ## Returns true if the hyperlink is actually a documentation title.
+  ##
+  ## Documentation titles lack the hash. See `mergeIndexes()
+  ## <#mergeIndexes,string>`_ for a more detailed explanation.
+  result = hyperlink.find('#') < 0
+
+proc `$`*(e: IndexEntry): string =
+  """("$1", "$2", "$3", "$4", $5)""" % [
+      e.keyword, e.link, e.linkTitle, e.linkDesc, $e.line]
+
+proc quoteIndexColumn(text: string): string =
+  ## Returns a safe version of `text` for serialization to the ``.idx`` file.
+  ##
+  ## The returned version can be put without worries in a line based tab
+  ## separated column text file. The following character sequence replacements
+  ## will be performed for that goal:
+  ##
+  ## * ``"\\"`` => ``"\\\\"``
+  ## * ``"\n"`` => ``"\\n"``
+  ## * ``"\t"`` => ``"\\t"``
+  result = newStringOfCap(text.len + 3)
+  for c in text:
+    case c
+    of '\\': result.add "\\"
+    of '\L': result.add "\\n"
+    of '\C': discard
+    of '\t': result.add "\\t"
+    else: result.add c
+
+proc unquoteIndexColumn*(text: string): string =
+  ## Returns the unquoted version generated by ``quoteIndexColumn``.
+  result = text.multiReplace(("\\t", "\t"), ("\\n", "\n"), ("\\\\", "\\"))
+
+proc formatIndexEntry*(kind: IndexEntryKind; htmlFile, id, term, linkTitle,
+                       linkDesc: string, line: int):
+                      tuple[entry: string, isTitle: bool] =
+  result.entry = $kind
+  result.entry.add('\t')
+  result.entry.add term
+  result.entry.add('\t')
+  result.entry.add(htmlFile)
+  if id.len > 0:
+    result.entry.add('#')
+    result.entry.add(id)
+    result.isTitle = false
+  else:
+    result.isTitle = true
+  result.entry.add('\t' & linkTitle.quoteIndexColumn)
+  result.entry.add('\t' & linkDesc.quoteIndexColumn)
+  result.entry.add('\t' & $line)
+  result.entry.add("\n")
+
+proc parseIndexEntryKind(s: string): IndexEntryKind =
+  result = case s:
+    of "nim": ieNim
+    of "nimgrp": ieNimGroup
+    of "heading": ieHeading
+    of "idx": ieIdxRole
+    of "nimTitle": ieNimTitle
+    of "markupTitle": ieMarkupTitle
+    else: raise newException(ValueError, "unknown index entry value $1" % [s])
+
+proc parseIdxFile*(path: string):
+    tuple[fileEntries: seq[IndexEntry], title: IndexEntry] =
+  var
+    f = 0
+  newSeq(result.fileEntries, 500)
+  setLen(result.fileEntries, 0)
+  let (_, base, _) = path.splitFile
+  for line in lines(path):
+    let s = line.find('\t')
+    if s < 0: continue
+    setLen(result.fileEntries, f+1)
+    let cols = line.split('\t')
+    result.fileEntries[f].kind = parseIndexEntryKind(cols[0])
+    result.fileEntries[f].keyword = cols[1]
+    result.fileEntries[f].link = cols[2]
+    if result.fileEntries[f].kind == ieIdxRole:
+      result.fileEntries[f].module = base
+    else:
+      if result.title.keyword.len == 0:
+        result.fileEntries[f].module = base
+      else:
+        result.fileEntries[f].module = result.title.keyword
+
+    result.fileEntries[f].linkTitle = cols[3].unquoteIndexColumn
+    result.fileEntries[f].linkDesc = cols[4].unquoteIndexColumn
+    result.fileEntries[f].line = parseInt(cols[5])
+
+    if result.fileEntries[f].kind in {ieNimTitle, ieMarkupTitle}:
+      result.title = result.fileEntries[f]
+    inc f
+
+proc cmp*(a, b: IndexEntry): int =
+  ## Sorts two ``IndexEntry`` first by `keyword` field, then by `link`.
+  result = cmpIgnoreStyle(a.keyword, b.keyword)
+  if result == 0:
+    result = cmpIgnoreStyle(a.link, b.link)
+
+proc hash*(x: IndexEntry): Hash =
+  ## Returns the hash for the combined fields of the type.
+  ##
+  ## The hash is computed as the chained hash of the individual string hashes.
+  result = x.keyword.hash !& x.link.hash
+  result = result !& x.linkTitle.hash
+  result = result !& x.linkDesc.hash
+  result = !$result