From 5bb0f224483fbc1d57fd1c5a2f4a22dd7263ecd6 Mon Sep 17 00:00:00 2001
From: Andinus <andinus@nand.sh>
Date: Tue, 19 Jan 2021 18:20:23 +0530
Subject: Re-implement octans, move subroutines to respective modules
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Initially it went over the list of words & checked if they exist in
the grid. This was very slow.

Currently it walks the grid & checks if the current string exist in
the dictionary. This is faster for these reasons:

• The dictionary is sorted, we perform binary range search on the
  dictionary to return the list of all words that start with specific
  string.
• Starting positions are limited.

If the dictionary wasn't sorted then this probably would've been
---
 lib/RangeSearch.rakumod | 70 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 70 insertions(+)
 create mode 100644 lib/RangeSearch.rakumod

(limited to 'lib/RangeSearch.rakumod')

diff --git a/lib/RangeSearch.rakumod b/lib/RangeSearch.rakumod
new file mode 100644
index 0000000..16e43c1
--- /dev/null
+++ b/lib/RangeSearch.rakumod
@@ -0,0 +1,70 @@
+unit module RangeSearch;
+
+# range-starts-with returns a subset of given @dict list that start
+# with $str. It should be faster than:
+#
+#   @dict.grep: *.starts-with($str)
+#
+# @dict should be a sorted list of words. It performs binary lookup on
+# the list.
+sub range-starts-with (
+    @dict, Str $str --> List
+) is export {
+    # $lower, $upper hold the lower and upper index of the range
+    # respectively.
+    my Int ($lower, $upper);
+
+    # Lookup the whole dictionary.
+    my Int ($start, $end) = (0, @dict.end);
+
+    # Loop until we end up on the lower index of range.
+    while $start < $end {
+        # Divide the list into 2 parts.
+        my Int $mid = ($start + $end) div 2;
+
+        # Check if $mid word is le (less than or equal to) $str. If
+        # true then discard the bottom end of the list, if not then
+        # discard the top end.
+        if $str le @dict[$mid].substr(0, $str.chars).lc {
+            $end = $mid;
+        } else {
+            $start = $mid + 1;
+        }
+    }
+
+    # Found the lower index.
+    $lower = $start;
+
+    # Set $end to the end of list but keep $start at the lower index.
+    $end = @dict.end;
+
+    # Loop until we end up on the upper index of range.
+    while $start < $end {
+        # Divide the list into 2 parts. Adds 1 because we have to find
+        # the upper index in this part. `div' performs Interger
+        # division, output is floor'ed.
+        my Int $mid = (($start + $end) div 2) + 1;
+
+        # Check if $mid word is lt (less than) $str. If true then
+        # discard the bottom end of the list, if not then discard the
+        # top end.
+        if $str lt @dict[$mid].substr(0, $str.chars).lc {
+            $end = $mid - 1;
+        } else {
+            $start = $mid;
+        }
+    }
+
+    # Found the upper index.
+    $upper = $end;
+
+    with @dict[$lower..$upper] -> @list {
+        # Maybe the word doesn't exist in the list, in that case there
+        # will be a single element in @list. We return an empty list
+        # unless that single element starts with $str.
+        if @list.elems == 1 {
+            return () unless @list[0].starts-with($str);
+        }
+        return @list;
+    }
+}
-- 
cgit 1.4.1-2-gfad0