#
# Nim's Runtime Library
# (c) Copyright 2022 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
## Nim `idx`:idx: file format related definitions.
import strutils, std/syncio, hashes
from os import splitFile
type
IndexEntryKind* = enum ## discriminator tag
ieMarkupTitle = "markupTitle"
## RST/Markdown title, text in `keyword` +
## HTML text in `linkTitle`
ieNimTitle = "nimTitle"
## Nim title
ieHeading = "heading" ## RST/Markdown markup heading, escaped
ieIdxRole = "idx" ## RST :idx: definition, escaped
ieNim = "nim" ## Nim symbol, unescaped
ieNimGroup = "nimgrp" ## Nim overload group, unescaped
IndexEntry* = object
kind*: IndexEntryKind ## 0.
keyword*: string ## 1.
link*: string ## 2.
linkTitle*: string ## 3. contains a prettier text for the href
linkDesc*: string ## 4. the title attribute of the final href
line*: int ## 5.
module*: string ## origin file, NOT a field in ``.idx`` file
aux*: string ## auxuliary field, NOT a field in ``.idx`` file
proc isDocumentationTitle*(hyperlink: string): bool =
## Returns true if the hyperlink is actually a documentation title.
##
## Documentation titles lack the hash. See `mergeIndexes()
## <#mergeIndexes,string>`_ for a more detailed explanation.
result = hyperlink.find('#') < 0
proc `$`*(e: IndexEntry): string =
"""("$1", "$2", "$3", "$4", $5)""" % [
e.keyword, e.link, e.linkTitle, e.linkDesc, $e.line]
proc quoteIndexColumn(text: string): string =
## Returns a safe version of `text` for serialization to the ``.idx`` file.
##
## The returned version can be put without worries in a line based tab
## separated column text file. The following character sequence replacements
## will be performed for that goal:
##
## * ``"\\"`` => ``"\\\\"``
## * ``"\n"`` => ``"\\n"``
## * ``"\t"`` => ``"\\t"``
result = newStringOfCap(text.len + 3)
for c in text:
case c
of '\\': result.add "\\"
of '\L': result.add "\\n"
of '\C': discard
of '\t': result.add "\\t"
else: result.add c
proc unquoteIndexColumn*(text: string): string =
## Returns the unquoted version generated by ``quoteIndexColumn``.
result = text.multiReplace(("\\t", "\t"), ("\\n", "\n"), ("\\\\", "\\"))
proc formatIndexEntry*(kind: IndexEntryKind; htmlFile, id, term, linkTitle,
linkDesc: string, line: int):
tuple[entry: string, isTitle: bool] =
result.entry = $kind
result.entry.add('\t')
result.entry.add term
result.entry.add('\t')
result.entry.add(htmlFile)
if id.len > 0:
result.entry.add('#')
result.entry.add(id)
result.isTitle = false
else:
result.isTitle = true
result.entry.add('\t' & linkTitle.quoteIndexColumn)
result.entry.add('\t' & linkDesc.quoteIndexColumn)
result.entry.add('\t' & $line)
result.entry.add("\n")
proc parseIndexEntryKind(s: string): IndexEntryKind =
result = case s:
of "nim": ieNim
of "nimgrp": ieNimGroup
of "heading": ieHeading
of "idx": ieIdxRole
of "nimTitle": ieNimTitle
of "markupTitle": ieMarkupTitle
else: raise newException(ValueError, "unknown index entry value $1" % [s])
proc parseIdxFile*(path: string):
tuple[fileEntries: seq[IndexEntry], title: IndexEntry] =
var
f = 0
newSeq(result.fileEntries, 500)
setLen(result.fileEntries, 0)
let (_, base, _) = path.splitFile
for line in lines(path):
let s = line.find('\t')
if s < 0: continue
setLen(result.fileEntries, f+1)
let cols = line.split('\t')
result.fileEntries[f].kind = parseIndexEntryKind(cols[0])
result.fileEntries[f].keyword = cols[1]
result.fileEntries[f].link = cols[2]
if result.title.keyword.len == 0:
result.fileEntries[f].module = base
else:
result.fileEntries[f].module = result.title.keyword
result.fileEntries[f].linkTitle = cols[3].unquoteIndexColumn
result.fileEntries[f].linkDesc = cols[4].unquoteIndexColumn
result.fileEntries[f].line = parseInt(cols[5])
if result.fileEntries[f].kind in {ieNimTitle, ieMarkupTitle}:
result.title = result.fileEntries[f]
inc f
proc cmp*(a, b: IndexEntry): int =
## Sorts two ``IndexEntry`` first by `keyword` field, then by `link`.
result = cmpIgnoreStyle(a.keyword, b.keyword)
if result == 0:
result = cmpIgnoreStyle(a.link, b.link)
proc hash*(x: IndexEntry): Hash =
## Returns the hash for the combined fields of the type.
##
## The hash is computed as the chained hash of the individual string hashes.
result = x.keyword.hash !& x.link.hash
result = result !& x.linkTitle.hash
result = result !& x.linkDesc.hash
result = !$result