summary refs log tree commit diff stats
path: root/tests/niminaction/Chapter6/WikipediaStats
diff options
context:
space:
mode:
Diffstat (limited to 'tests/niminaction/Chapter6/WikipediaStats')
-rw-r--r--tests/niminaction/Chapter6/WikipediaStats/concurrency.nim83
-rw-r--r--tests/niminaction/Chapter6/WikipediaStats/concurrency.nim.cfg1
-rw-r--r--tests/niminaction/Chapter6/WikipediaStats/concurrency_regex.nim68
-rw-r--r--tests/niminaction/Chapter6/WikipediaStats/concurrency_regex.nim.cfg1
-rw-r--r--tests/niminaction/Chapter6/WikipediaStats/naive.nim33
-rw-r--r--tests/niminaction/Chapter6/WikipediaStats/parallel_counts.nim76
-rw-r--r--tests/niminaction/Chapter6/WikipediaStats/parallel_counts.nim.cfg1
-rw-r--r--tests/niminaction/Chapter6/WikipediaStats/race_condition.nim17
-rw-r--r--tests/niminaction/Chapter6/WikipediaStats/race_condition.nim.cfg1
-rw-r--r--tests/niminaction/Chapter6/WikipediaStats/sequential_counts.nim38
-rw-r--r--tests/niminaction/Chapter6/WikipediaStats/unguarded_access.nim20
-rw-r--r--tests/niminaction/Chapter6/WikipediaStats/unguarded_access.nim.cfg1
12 files changed, 340 insertions, 0 deletions
diff --git a/tests/niminaction/Chapter6/WikipediaStats/concurrency.nim b/tests/niminaction/Chapter6/WikipediaStats/concurrency.nim
new file mode 100644
index 000000000..913cd77db
--- /dev/null
+++ b/tests/niminaction/Chapter6/WikipediaStats/concurrency.nim
@@ -0,0 +1,83 @@
+discard """
+action: compile
+"""
+
+# See this page for info about the format https://wikitech.wikimedia.org/wiki/Analytics/Data/Pagecounts-all-sites
+import tables, parseutils, strutils, threadpool
+
+const filename = "pagecounts-20160101-050000"
+
+type
+  Stats = ref object
+    projectName, pageTitle: string
+    requests, contentSize: int
+
+proc `$`(stats: Stats): string =
+  "(projectName: $#, pageTitle: $#, requests: $#, contentSize: $#)" % [
+    stats.projectName, stats.pageTitle, $stats.requests, $stats.contentSize
+  ]
+
+proc parse(chunk: string): Stats =
+  # Each line looks like: en Main_Page 242332 4737756101
+  result = Stats(projectName: "", pageTitle: "", requests: 0, contentSize: 0)
+
+  var projectName = ""
+  var pageTitle = ""
+  var requests = ""
+  var contentSize = ""
+  for line in chunk.splitLines:
+    var i = 0
+    projectName.setLen(0)
+    i.inc parseUntil(line, projectName, Whitespace, i)
+    i.inc skipWhitespace(line, i)
+    pageTitle.setLen(0)
+    i.inc parseUntil(line, pageTitle, Whitespace, i)
+    i.inc skipWhitespace(line, i)
+    requests.setLen(0)
+    i.inc parseUntil(line, requests, Whitespace, i)
+    i.inc skipWhitespace(line, i)
+    contentSize.setLen(0)
+    i.inc parseUntil(line, contentSize, Whitespace, i)
+    i.inc skipWhitespace(line, i)
+
+    if requests.len == 0 or contentSize.len == 0:
+      # Ignore lines with either of the params that are empty.
+      continue
+
+    let requestsInt = requests.parseInt
+    if requestsInt > result.requests and projectName == "en":
+      result = Stats(
+        projectName: projectName,
+        pageTitle: pageTitle,
+        requests: requestsInt,
+        contentSize: contentSize.parseInt
+      )
+
+proc readChunks(filename: string, chunksize = 1000000): Stats =
+  result = Stats(projectName: "", pageTitle: "", requests: 0, contentSize: 0)
+  var file = open(filename)
+  var responses = newSeq[FlowVar[Stats]]()
+  var buffer = newString(chunksize)
+  var oldBufferLen = 0
+  while not endOfFile(file):
+    let readSize = file.readChars(buffer, oldBufferLen, chunksize - oldBufferLen) + oldBufferLen
+    var chunkLen = readSize
+
+    while chunkLen >= 0 and buffer[chunkLen - 1] notin NewLines:
+      # Find where the last line ends
+      chunkLen.dec
+
+    responses.add(spawn parse(buffer[0 ..< chunkLen]))
+    oldBufferLen = readSize - chunkLen
+    buffer[0 ..< oldBufferLen] = buffer[readSize - oldBufferLen .. ^1]
+
+  for resp in responses:
+    let statistic = ^resp
+    if statistic.requests > result.requests:
+      result = statistic
+
+  file.close()
+
+
+when true:
+  echo readChunks(filename)
diff --git a/tests/niminaction/Chapter6/WikipediaStats/concurrency.nim.cfg b/tests/niminaction/Chapter6/WikipediaStats/concurrency.nim.cfg
new file mode 100644
index 000000000..aed303eef
--- /dev/null
+++ b/tests/niminaction/Chapter6/WikipediaStats/concurrency.nim.cfg
@@ -0,0 +1 @@
+--threads:on
diff --git a/tests/niminaction/Chapter6/WikipediaStats/concurrency_regex.nim b/tests/niminaction/Chapter6/WikipediaStats/concurrency_regex.nim
new file mode 100644
index 000000000..102313de9
--- /dev/null
+++ b/tests/niminaction/Chapter6/WikipediaStats/concurrency_regex.nim
@@ -0,0 +1,68 @@
+discard """
+action: compile
+"""
+
+# See this page for info about the format https://wikitech.wikimedia.org/wiki/Analytics/Data/Pagecounts-all-sites
+import tables, parseutils, strutils, threadpool, re
+
+const filename = "pagecounts-20160101-050000"
+
+type
+  Stats = ref object
+    projectName, pageTitle: string
+    requests, contentSize: int
+
+proc `$`(stats: Stats): string =
+  "(projectName: $#, pageTitle: $#, requests: $#, contentSize: $#)" % [
+    stats.projectName, stats.pageTitle, $stats.requests, $stats.contentSize
+  ]
+
+proc parse(chunk: string): Stats =
+  # Each line looks like: en Main_Page 242332 4737756101
+  result = Stats(projectName: "", pageTitle: "", requests: 0, contentSize: 0)
+
+  var matches: array[4, string]
+  var reg = re"([^\s]+)\s([^\s]+)\s(\d+)\s(\d+)"
+  for line in chunk.splitLines:
+
+    let start = find(line, reg, matches)
+    if start == -1: continue
+
+    let requestsInt = matches[2].parseInt
+    if requestsInt > result.requests and matches[0] == "en":
+      result = Stats(
+        projectName: matches[0],
+        pageTitle: matches[1],
+        requests: requestsInt,
+        contentSize: matches[3].parseInt
+      )
+
+proc readChunks(filename: string, chunksize = 1000000): Stats =
+  result = Stats(projectName: "", pageTitle: "", requests: 0, contentSize: 0)
+  var file = open(filename)
+  var responses = newSeq[FlowVar[Stats]]()
+  var buffer = newString(chunksize)
+  var oldBufferLen = 0
+  while not endOfFile(file):
+    let readSize = file.readChars(buffer, oldBufferLen, chunksize - oldBufferLen) + oldBufferLen
+    var chunkLen = readSize
+
+    while chunkLen >= 0 and buffer[chunkLen - 1] notin NewLines:
+      # Find where the last line ends
+      chunkLen.dec
+
+    responses.add(spawn parse(buffer[0 ..< chunkLen]))
+    oldBufferLen = readSize - chunkLen
+    buffer[0 ..< oldBufferLen] = buffer[readSize - oldBufferLen .. ^1]
+
+  echo("Spawns: ", responses.len)
+  for resp in responses:
+    let statistic = ^resp
+    if statistic.requests > result.requests:
+      result = statistic
+
+  file.close()
+
+
+when true:
+  echo readChunks(filename)
diff --git a/tests/niminaction/Chapter6/WikipediaStats/concurrency_regex.nim.cfg b/tests/niminaction/Chapter6/WikipediaStats/concurrency_regex.nim.cfg
new file mode 100644
index 000000000..aed303eef
--- /dev/null
+++ b/tests/niminaction/Chapter6/WikipediaStats/concurrency_regex.nim.cfg
@@ -0,0 +1 @@
+--threads:on
diff --git a/tests/niminaction/Chapter6/WikipediaStats/naive.nim b/tests/niminaction/Chapter6/WikipediaStats/naive.nim
new file mode 100644
index 000000000..687177f74
--- /dev/null
+++ b/tests/niminaction/Chapter6/WikipediaStats/naive.nim
@@ -0,0 +1,33 @@
+discard """
+action: compile
+"""
+
+# See this page for info about the format https://wikitech.wikimedia.org/wiki/Analytics/Data/Pagecounts-all-sites
+import tables, parseutils, strutils
+
+const filename = "pagecounts-20150101-050000"
+
+proc parse(filename: string): tuple[projectName, pageTitle: string,
+    requests, contentSize: int] =
+  # Each line looks like: en Main_Page 242332 4737756101
+  var file = open(filename)
+  for line in file.lines:
+    var i = 0
+    var projectName = ""
+    i.inc parseUntil(line, projectName, Whitespace, i)
+    i.inc
+    var pageTitle = ""
+    i.inc parseUntil(line, pageTitle, Whitespace, i)
+    i.inc
+    var requests = 0
+    i.inc parseInt(line, requests, i)
+    i.inc
+    var contentSize = 0
+    i.inc parseInt(line, contentSize, i)
+    if requests > result[2] and projectName == "en":
+      result = (projectName, pageTitle, requests, contentSize)
+
+  file.close()
+
+when true:
+  echo parse(filename)
diff --git a/tests/niminaction/Chapter6/WikipediaStats/parallel_counts.nim b/tests/niminaction/Chapter6/WikipediaStats/parallel_counts.nim
new file mode 100644
index 000000000..379ec7364
--- /dev/null
+++ b/tests/niminaction/Chapter6/WikipediaStats/parallel_counts.nim
@@ -0,0 +1,76 @@
+discard """
+action: compile
+"""
+
+import os, parseutils, threadpool, strutils
+
+type
+  Stats = ref object
+    domainCode, pageTitle: string
+    countViews, totalSize: int
+
+proc newStats(): Stats =
+  Stats(domainCode: "", pageTitle: "", countViews: 0, totalSize: 0)
+
+proc `$`(stats: Stats): string =
+  "(domainCode: $#, pageTitle: $#, countViews: $#, totalSize: $#)" % [
+    stats.domainCode, stats.pageTitle, $stats.countViews, $stats.totalSize
+  ]
+
+proc parse(line: string, domainCode, pageTitle: var string,
+    countViews, totalSize: var int) =
+  if line.len == 0: return
+  var i = 0
+  domainCode.setLen(0)
+  i.inc parseUntil(line, domainCode, {' '}, i)
+  i.inc
+  pageTitle.setLen(0)
+  i.inc parseUntil(line, pageTitle, {' '}, i)
+  i.inc
+  countViews = 0
+  i.inc parseInt(line, countViews, i)
+  i.inc
+  totalSize = 0
+  i.inc parseInt(line, totalSize, i)
+
+proc parseChunk(chunk: string): Stats =
+  result = newStats()
+  var domainCode = ""
+  var pageTitle = ""
+  var countViews = 0
+  var totalSize = 0
+  for line in splitLines(chunk):
+    parse(line, domainCode, pageTitle, countViews, totalSize)
+    if domainCode == "en" and countViews > result.countViews:
+      result = Stats(domainCode: domainCode, pageTitle: pageTitle,
+                     countViews: countViews, totalSize: totalSize)
+
+proc readPageCounts(filename: string, chunkSize = 1_000_000) =
+  var file = open(filename)
+  var responses = newSeq[FlowVar[Stats]]()
+  var buffer = newString(chunksize)
+  var oldBufferLen = 0
+  while not endOfFile(file):
+    let reqSize = chunksize - oldBufferLen
+    let readSize = file.readChars(buffer, oldBufferLen, reqSize) + oldBufferLen
+    var chunkLen = readSize
+
+    while chunkLen >= 0 and buffer[chunkLen - 1] notin NewLines:
+      chunkLen.dec
+
+    responses.add(spawn parseChunk(buffer[0 ..< chunkLen]))
+    oldBufferLen = readSize - chunkLen
+    buffer[0 ..< oldBufferLen] = buffer[readSize - oldBufferLen .. ^1]
+
+  var mostPopular = newStats()
+  for resp in responses:
+    let statistic = ^resp
+    if statistic.countViews > mostPopular.countViews:
+      mostPopular = statistic
+
+  echo("Most popular is: ", mostPopular)
+
+when true:
+  const file = "pagecounts-20160101-050000"
+  let filename = getCurrentDir() / file
+  readPageCounts(filename)
diff --git a/tests/niminaction/Chapter6/WikipediaStats/parallel_counts.nim.cfg b/tests/niminaction/Chapter6/WikipediaStats/parallel_counts.nim.cfg
new file mode 100644
index 000000000..9d57ecf93
--- /dev/null
+++ b/tests/niminaction/Chapter6/WikipediaStats/parallel_counts.nim.cfg
@@ -0,0 +1 @@
+--threads:on
\ No newline at end of file
diff --git a/tests/niminaction/Chapter6/WikipediaStats/race_condition.nim b/tests/niminaction/Chapter6/WikipediaStats/race_condition.nim
new file mode 100644
index 000000000..f4b072204
--- /dev/null
+++ b/tests/niminaction/Chapter6/WikipediaStats/race_condition.nim
@@ -0,0 +1,17 @@
+discard """
+action: compile
+"""
+
+import threadpool
+
+var counter = 0
+
+proc increment(x: int) =
+  for i in 0 ..< x:
+    let value = counter + 1
+    counter = value
+
+spawn increment(10_000)
+spawn increment(10_000)
+sync()
+echo(counter)
diff --git a/tests/niminaction/Chapter6/WikipediaStats/race_condition.nim.cfg b/tests/niminaction/Chapter6/WikipediaStats/race_condition.nim.cfg
new file mode 100644
index 000000000..9d57ecf93
--- /dev/null
+++ b/tests/niminaction/Chapter6/WikipediaStats/race_condition.nim.cfg
@@ -0,0 +1 @@
+--threads:on
\ No newline at end of file
diff --git a/tests/niminaction/Chapter6/WikipediaStats/sequential_counts.nim b/tests/niminaction/Chapter6/WikipediaStats/sequential_counts.nim
new file mode 100644
index 000000000..f4bae3df5
--- /dev/null
+++ b/tests/niminaction/Chapter6/WikipediaStats/sequential_counts.nim
@@ -0,0 +1,38 @@
+discard """
+action: compile
+"""
+
+import os, parseutils
+
+proc parse(line: string, domainCode, pageTitle: var string,
+    countViews, totalSize: var int) =
+  var i = 0
+  domainCode.setLen(0)
+  i.inc parseUntil(line, domainCode, {' '}, i)
+  i.inc
+  pageTitle.setLen(0)
+  i.inc parseUntil(line, pageTitle, {' '}, i)
+  i.inc
+  countViews = 0
+  i.inc parseInt(line, countViews, i)
+  i.inc
+  totalSize = 0
+  i.inc parseInt(line, totalSize, i)
+
+proc readPageCounts(filename: string) =
+  var domainCode = ""
+  var pageTitle = ""
+  var countViews = 0
+  var totalSize = 0
+  var mostPopular = ("", "", 0, 0)
+  for line in filename.lines:
+    parse(line, domainCode, pageTitle, countViews, totalSize)
+    if domainCode == "en" and countViews > mostPopular[2]:
+      mostPopular = (domainCode, pageTitle, countViews, totalSize)
+
+  echo("Most popular is: ", mostPopular)
+
+when true:
+  const file = "pagecounts-20160101-050000"
+  let filename = getCurrentDir() / file
+  readPageCounts(filename)
diff --git a/tests/niminaction/Chapter6/WikipediaStats/unguarded_access.nim b/tests/niminaction/Chapter6/WikipediaStats/unguarded_access.nim
new file mode 100644
index 000000000..7bdde8397
--- /dev/null
+++ b/tests/niminaction/Chapter6/WikipediaStats/unguarded_access.nim
@@ -0,0 +1,20 @@
+discard """
+  errormsg: "unguarded access: counter"
+  line: 14
+"""
+
+import threadpool, locks
+
+var counterLock: Lock
+initLock(counterLock)
+var counter {.guard: counterLock.} = 0
+
+proc increment(x: int) =
+  for i in 0 ..< x:
+    let value = counter + 1
+    counter = value
+
+spawn increment(10_000)
+spawn increment(10_000)
+sync()
+echo(counter)
diff --git a/tests/niminaction/Chapter6/WikipediaStats/unguarded_access.nim.cfg b/tests/niminaction/Chapter6/WikipediaStats/unguarded_access.nim.cfg
new file mode 100644
index 000000000..9d57ecf93
--- /dev/null
+++ b/tests/niminaction/Chapter6/WikipediaStats/unguarded_access.nim.cfg
@@ -0,0 +1 @@
+--threads:on
\ No newline at end of file