1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
|
#
# Nim's Runtime Library
# (c) Copyright 2022 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
## Nim `idx`:idx: file format related definitions.
import std/[strutils, syncio, hashes]
from std/os import splitFile
type
IndexEntryKind* = enum ## discriminator tag
ieMarkupTitle = "markupTitle"
## RST/Markdown title, text in `keyword` +
## HTML text in `linkTitle`
ieNimTitle = "nimTitle"
## Nim title
ieHeading = "heading" ## RST/Markdown markup heading, escaped
ieIdxRole = "idx" ## RST :idx: definition, escaped
ieNim = "nim" ## Nim symbol, unescaped
ieNimGroup = "nimgrp" ## Nim overload group, unescaped
IndexEntry* = object
kind*: IndexEntryKind ## 0.
keyword*: string ## 1.
link*: string ## 2.
linkTitle*: string ## 3. contains a prettier text for the href
linkDesc*: string ## 4. the title attribute of the final href
line*: int ## 5.
module*: string ## origin file, NOT a field in ``.idx`` file
aux*: string ## auxuliary field, NOT a field in ``.idx`` file
proc isDocumentationTitle*(hyperlink: string): bool =
## Returns true if the hyperlink is actually a documentation title.
##
## Documentation titles lack the hash. See `mergeIndexes()
## <#mergeIndexes,string>`_ for a more detailed explanation.
result = hyperlink.find('#') < 0
proc `$`*(e: IndexEntry): string =
"""("$1", "$2", "$3", "$4", $5)""" % [
e.keyword, e.link, e.linkTitle, e.linkDesc, $e.line]
proc quoteIndexColumn(text: string): string =
## Returns a safe version of `text` for serialization to the ``.idx`` file.
##
## The returned version can be put without worries in a line based tab
## separated column text file. The following character sequence replacements
## will be performed for that goal:
##
## * ``"\\"`` => ``"\\\\"``
## * ``"\n"`` => ``"\\n"``
## * ``"\t"`` => ``"\\t"``
result = newStringOfCap(text.len + 3)
for c in text:
case c
of '\\': result.add "\\"
of '\L': result.add "\\n"
of '\C': discard
of '\t': result.add "\\t"
else: result.add c
proc unquoteIndexColumn*(text: string): string =
## Returns the unquoted version generated by ``quoteIndexColumn``.
result = text.multiReplace(("\\t", "\t"), ("\\n", "\n"), ("\\\\", "\\"))
proc formatIndexEntry*(kind: IndexEntryKind; htmlFile, id, term, linkTitle,
linkDesc: string, line: int):
tuple[entry: string, isTitle: bool] =
result.entry = $kind
result.entry.add('\t')
result.entry.add term
result.entry.add('\t')
result.entry.add(htmlFile)
if id.len > 0:
result.entry.add('#')
result.entry.add(id)
result.isTitle = false
else:
result.isTitle = true
result.entry.add('\t' & linkTitle.quoteIndexColumn)
result.entry.add('\t' & linkDesc.quoteIndexColumn)
result.entry.add('\t' & $line)
result.entry.add("\n")
proc parseIndexEntryKind(s: string): IndexEntryKind =
result = case s:
of "nim": ieNim
of "nimgrp": ieNimGroup
of "heading": ieHeading
of "idx": ieIdxRole
of "nimTitle": ieNimTitle
of "markupTitle": ieMarkupTitle
else: raise newException(ValueError, "unknown index entry value $1" % [s])
proc parseIdxFile*(path: string):
tuple[fileEntries: seq[IndexEntry], title: IndexEntry] =
var
f = 0
newSeq(result.fileEntries, 500)
setLen(result.fileEntries, 0)
let (_, base, _) = path.splitFile
for line in lines(path):
let s = line.find('\t')
if s < 0: continue
setLen(result.fileEntries, f+1)
let cols = line.split('\t')
result.fileEntries[f].kind = parseIndexEntryKind(cols[0])
result.fileEntries[f].keyword = cols[1]
result.fileEntries[f].link = cols[2]
if result.fileEntries[f].kind == ieIdxRole:
result.fileEntries[f].module = base
else:
if result.title.keyword.len == 0:
result.fileEntries[f].module = base
else:
result.fileEntries[f].module = result.title.keyword
result.fileEntries[f].linkTitle = cols[3].unquoteIndexColumn
result.fileEntries[f].linkDesc = cols[4].unquoteIndexColumn
result.fileEntries[f].line = parseInt(cols[5])
if result.fileEntries[f].kind in {ieNimTitle, ieMarkupTitle}:
result.title = result.fileEntries[f]
inc f
proc cmp*(a, b: IndexEntry): int =
## Sorts two ``IndexEntry`` first by `keyword` field, then by `link`.
result = cmpIgnoreStyle(a.keyword, b.keyword)
if result == 0:
result = cmpIgnoreStyle(a.link, b.link)
proc hash*(x: IndexEntry): Hash =
## Returns the hash for the combined fields of the type.
##
## The hash is computed as the chained hash of the individual string hashes.
result = x.keyword.hash !& x.link.hash
result = result !& x.linkTitle.hash
result = result !& x.linkDesc.hash
result = !$result
|