1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
|
# A Fuzzy Match implementation inspired by the sublime text fuzzy match algorithm
# as described here: https://blog.forrestthewoods.com/reverse-engineering-sublime-text-s-fuzzy-match-4cffeed33fdb
# Heavily modified to provide more subjectively useful results
# for on the Nim manual.
#
import strutils
import math
import macros
const
MaxUnmatchedLeadingChar = 3
## Maximum number of times the penalty for unmatched leading chars is applied.
HeadingScaleFactor = 0.5
## The score from before the colon Char is multiplied by this.
## This is to weight function signatures and descriptions over module titles.
type
ScoreCard = enum
StartMatch = -100 ## Start matching.
LeadingCharDiff = -3 ## An unmatched, leading character was found.
CharDiff = -1 ## An unmatched character was found.
CharMatch = 0 ## A matched character was found.
ConsecutiveMatch = 5 ## A consecutive match was found.
LeadingCharMatch = 10 ## The character matches the begining of the
## string or the first character of a word
## or camel case boundry.
WordBoundryMatch = 20 ## The last ConsecutiveCharMatch that
## immediately precedes the end of the string,
## end of the pattern, or a LeadingCharMatch.
proc fuzzyMatch*(pattern, str: cstring) : tuple[score: int, matched: bool] =
var
scoreState = StartMatch
headerMatched = false
unmatchedLeadingCharCount = 0
consecutiveMatchCount = 0
strIndex = 0
patIndex = 0
score = 0
template transition(nextState) =
scoreState = nextState
score += ord(scoreState)
while (strIndex < str.len) and (patIndex < pattern.len):
var
patternChar = pattern[patIndex].toLowerAscii
strChar = str[strIndex].toLowerAscii
# Ignore certain characters
if patternChar in {'_', ' ', '.'}:
patIndex += 1
continue
if strChar in {'_', ' ', '.'}:
strIndex += 1
continue
# Since this algorithm will be used to search against Nim documentation,
# the below logic prioritizes headers.
if not headerMatched and strChar == ':':
headerMatched = true
scoreState = StartMatch
score = toInt(floor(HeadingScaleFactor * toFloat(score)))
patIndex = 0
strIndex += 1
continue
if strChar == patternChar:
case scoreState
of StartMatch, WordBoundryMatch:
scoreState = LeadingCharMatch
of CharMatch:
transition(ConsecutiveMatch)
of LeadingCharMatch, ConsecutiveMatch:
consecutiveMatchCount += 1
scoreState = ConsecutiveMatch
score += ord(ConsecutiveMatch) * consecutiveMatchCount
if scoreState == LeadingCharMatch:
score += ord(LeadingCharMatch)
var onBoundary = (patIndex == high(pattern))
if not onBoundary and strIndex < high(str):
let
nextPatternChar = toLowerAscii(pattern[patIndex + 1])
nextStrChar = toLowerAscii(str[strIndex + 1])
onBoundary = (
nextStrChar notin {'a'..'z'} and
nextStrChar != nextPatternChar
)
if onBoundary:
transition(WordBoundryMatch)
of CharDiff, LeadingCharDiff:
var isLeadingChar = (
str[strIndex - 1] notin Letters or
str[strIndex - 1] in {'a'..'z'} and
str[strIndex] in {'A'..'Z'}
)
if isLeadingChar:
scoreState = LeadingCharMatch
#a non alpha or a camel case transition counts as a leading char.
# Transition the state, but don't give the bonus yet; wait until we verify a consecutive match.
else:
transition(CharMatch)
patIndex += 1
else:
case scoreState
of StartMatch:
transition(LeadingCharDiff)
of ConsecutiveMatch:
transition(CharDiff)
consecutiveMatchCount = 0
of LeadingCharDiff:
if unmatchedLeadingCharCount < MaxUnmatchedLeadingChar:
transition(LeadingCharDiff)
unmatchedLeadingCharCount += 1
else:
transition(CharDiff)
strIndex += 1
result = (
score: max(0, score),
matched: (score > 0),
)
|