From 7889c03cbc50afaa67e1e0eedb4fdcc577913bcd Mon Sep 17 00:00:00 2001 From: Dominik Picheta Date: Sun, 1 Oct 2017 17:17:40 +0100 Subject: Add tests for examples from Nim in Action. --- .../Chapter6/WikipediaStats/concurrency.nim | 79 ++++++++++++++++++++++ .../Chapter6/WikipediaStats/concurrency.nim.cfg | 1 + .../Chapter6/WikipediaStats/concurrency_regex.nim | 64 ++++++++++++++++++ .../WikipediaStats/concurrency_regex.nim.cfg | 1 + .../niminaction/Chapter6/WikipediaStats/naive.nim | 29 ++++++++ .../Chapter6/WikipediaStats/parallel_counts.nim | 72 ++++++++++++++++++++ .../WikipediaStats/parallel_counts.nim.cfg | 1 + .../Chapter6/WikipediaStats/race_condition.nim | 13 ++++ .../Chapter6/WikipediaStats/race_condition.nim.cfg | 1 + .../Chapter6/WikipediaStats/sequential_counts.nim | 34 ++++++++++ .../Chapter6/WikipediaStats/unguarded_access.nim | 15 ++++ .../WikipediaStats/unguarded_access.nim.cfg | 1 + 12 files changed, 311 insertions(+) create mode 100644 tests/niminaction/Chapter6/WikipediaStats/concurrency.nim create mode 100644 tests/niminaction/Chapter6/WikipediaStats/concurrency.nim.cfg create mode 100644 tests/niminaction/Chapter6/WikipediaStats/concurrency_regex.nim create mode 100644 tests/niminaction/Chapter6/WikipediaStats/concurrency_regex.nim.cfg create mode 100644 tests/niminaction/Chapter6/WikipediaStats/naive.nim create mode 100644 tests/niminaction/Chapter6/WikipediaStats/parallel_counts.nim create mode 100644 tests/niminaction/Chapter6/WikipediaStats/parallel_counts.nim.cfg create mode 100644 tests/niminaction/Chapter6/WikipediaStats/race_condition.nim create mode 100644 tests/niminaction/Chapter6/WikipediaStats/race_condition.nim.cfg create mode 100644 tests/niminaction/Chapter6/WikipediaStats/sequential_counts.nim create mode 100644 tests/niminaction/Chapter6/WikipediaStats/unguarded_access.nim create mode 100644 tests/niminaction/Chapter6/WikipediaStats/unguarded_access.nim.cfg (limited to 'tests/niminaction/Chapter6') diff --git a/tests/niminaction/Chapter6/WikipediaStats/concurrency.nim b/tests/niminaction/Chapter6/WikipediaStats/concurrency.nim new file mode 100644 index 000000000..478f533d9 --- /dev/null +++ b/tests/niminaction/Chapter6/WikipediaStats/concurrency.nim @@ -0,0 +1,79 @@ +# See this page for info about the format https://wikitech.wikimedia.org/wiki/Analytics/Data/Pagecounts-all-sites +import tables, parseutils, strutils, threadpool + +const filename = "pagecounts-20160101-050000" + +type + Stats = ref object + projectName, pageTitle: string + requests, contentSize: int + +proc `$`(stats: Stats): string = + "(projectName: $#, pageTitle: $#, requests: $#, contentSize: $#)" % [ + stats.projectName, stats.pageTitle, $stats.requests, $stats.contentSize + ] + +proc parse(chunk: string): Stats = + # Each line looks like: en Main_Page 242332 4737756101 + result = Stats(projectName: "", pageTitle: "", requests: 0, contentSize: 0) + + var projectName = "" + var pageTitle = "" + var requests = "" + var contentSize = "" + for line in chunk.splitLines: + var i = 0 + projectName.setLen(0) + i.inc parseUntil(line, projectName, Whitespace, i) + i.inc skipWhitespace(line, i) + pageTitle.setLen(0) + i.inc parseUntil(line, pageTitle, Whitespace, i) + i.inc skipWhitespace(line, i) + requests.setLen(0) + i.inc parseUntil(line, requests, Whitespace, i) + i.inc skipWhitespace(line, i) + contentSize.setLen(0) + i.inc parseUntil(line, contentSize, Whitespace, i) + i.inc skipWhitespace(line, i) + + if requests.len == 0 or contentSize.len == 0: + # Ignore lines with either of the params that are empty. + continue + + let requestsInt = requests.parseInt + if requestsInt > result.requests and projectName == "en": + result = Stats( + projectName: projectName, + pageTitle: pageTitle, + requests: requestsInt, + contentSize: contentSize.parseInt + ) + +proc readChunks(filename: string, chunksize = 1000000): Stats = + result = Stats(projectName: "", pageTitle: "", requests: 0, contentSize: 0) + var file = open(filename) + var responses = newSeq[FlowVar[Stats]]() + var buffer = newString(chunksize) + var oldBufferLen = 0 + while not endOfFile(file): + let readSize = file.readChars(buffer, oldBufferLen, chunksize - oldBufferLen) + oldBufferLen + var chunkLen = readSize + + while chunkLen >= 0 and buffer[chunkLen - 1] notin NewLines: + # Find where the last line ends + chunkLen.dec + + responses.add(spawn parse(buffer[0 .. result.requests: + result = statistic + + file.close() + + +when isMainModule: + echo readChunks(filename) diff --git a/tests/niminaction/Chapter6/WikipediaStats/concurrency.nim.cfg b/tests/niminaction/Chapter6/WikipediaStats/concurrency.nim.cfg new file mode 100644 index 000000000..aed303eef --- /dev/null +++ b/tests/niminaction/Chapter6/WikipediaStats/concurrency.nim.cfg @@ -0,0 +1 @@ +--threads:on diff --git a/tests/niminaction/Chapter6/WikipediaStats/concurrency_regex.nim b/tests/niminaction/Chapter6/WikipediaStats/concurrency_regex.nim new file mode 100644 index 000000000..8df3b6aeb --- /dev/null +++ b/tests/niminaction/Chapter6/WikipediaStats/concurrency_regex.nim @@ -0,0 +1,64 @@ +# See this page for info about the format https://wikitech.wikimedia.org/wiki/Analytics/Data/Pagecounts-all-sites +import tables, parseutils, strutils, threadpool, re + +const filename = "pagecounts-20160101-050000" + +type + Stats = ref object + projectName, pageTitle: string + requests, contentSize: int + +proc `$`(stats: Stats): string = + "(projectName: $#, pageTitle: $#, requests: $#, contentSize: $#)" % [ + stats.projectName, stats.pageTitle, $stats.requests, $stats.contentSize + ] + +proc parse(chunk: string): Stats = + # Each line looks like: en Main_Page 242332 4737756101 + result = Stats(projectName: "", pageTitle: "", requests: 0, contentSize: 0) + + var matches: array[4, string] + var reg = re"([^\s]+)\s([^\s]+)\s(\d+)\s(\d+)" + for line in chunk.splitLines: + + let start = find(line, reg, matches) + if start == -1: continue + + let requestsInt = matches[2].parseInt + if requestsInt > result.requests and matches[0] == "en": + result = Stats( + projectName: matches[0], + pageTitle: matches[1], + requests: requestsInt, + contentSize: matches[3].parseInt + ) + +proc readChunks(filename: string, chunksize = 1000000): Stats = + result = Stats(projectName: "", pageTitle: "", requests: 0, contentSize: 0) + var file = open(filename) + var responses = newSeq[FlowVar[Stats]]() + var buffer = newString(chunksize) + var oldBufferLen = 0 + while not endOfFile(file): + let readSize = file.readChars(buffer, oldBufferLen, chunksize - oldBufferLen) + oldBufferLen + var chunkLen = readSize + + while chunkLen >= 0 and buffer[chunkLen - 1] notin NewLines: + # Find where the last line ends + chunkLen.dec + + responses.add(spawn parse(buffer[0 .. result.requests: + result = statistic + + file.close() + + +when isMainModule: + echo readChunks(filename) diff --git a/tests/niminaction/Chapter6/WikipediaStats/concurrency_regex.nim.cfg b/tests/niminaction/Chapter6/WikipediaStats/concurrency_regex.nim.cfg new file mode 100644 index 000000000..aed303eef --- /dev/null +++ b/tests/niminaction/Chapter6/WikipediaStats/concurrency_regex.nim.cfg @@ -0,0 +1 @@ +--threads:on diff --git a/tests/niminaction/Chapter6/WikipediaStats/naive.nim b/tests/niminaction/Chapter6/WikipediaStats/naive.nim new file mode 100644 index 000000000..ed4fba8e2 --- /dev/null +++ b/tests/niminaction/Chapter6/WikipediaStats/naive.nim @@ -0,0 +1,29 @@ +# See this page for info about the format https://wikitech.wikimedia.org/wiki/Analytics/Data/Pagecounts-all-sites +import tables, parseutils, strutils + +const filename = "pagecounts-20150101-050000" + +proc parse(filename: string): tuple[projectName, pageTitle: string, + requests, contentSize: int] = + # Each line looks like: en Main_Page 242332 4737756101 + var file = open(filename) + for line in file.lines: + var i = 0 + var projectName = "" + i.inc parseUntil(line, projectName, Whitespace, i) + i.inc + var pageTitle = "" + i.inc parseUntil(line, pageTitle, Whitespace, i) + i.inc + var requests = 0 + i.inc parseInt(line, requests, i) + i.inc + var contentSize = 0 + i.inc parseInt(line, contentSize, i) + if requests > result[2] and projectName == "en": + result = (projectName, pageTitle, requests, contentSize) + + file.close() + +when isMainModule: + echo parse(filename) diff --git a/tests/niminaction/Chapter6/WikipediaStats/parallel_counts.nim b/tests/niminaction/Chapter6/WikipediaStats/parallel_counts.nim new file mode 100644 index 000000000..7181145e9 --- /dev/null +++ b/tests/niminaction/Chapter6/WikipediaStats/parallel_counts.nim @@ -0,0 +1,72 @@ +import os, parseutils, threadpool, strutils + +type + Stats = ref object + domainCode, pageTitle: string + countViews, totalSize: int + +proc newStats(): Stats = + Stats(domainCode: "", pageTitle: "", countViews: 0, totalSize: 0) + +proc `$`(stats: Stats): string = + "(domainCode: $#, pageTitle: $#, countViews: $#, totalSize: $#)" % [ + stats.domainCode, stats.pageTitle, $stats.countViews, $stats.totalSize + ] + +proc parse(line: string, domainCode, pageTitle: var string, + countViews, totalSize: var int) = + if line.len == 0: return + var i = 0 + domainCode.setLen(0) + i.inc parseUntil(line, domainCode, {' '}, i) + i.inc + pageTitle.setLen(0) + i.inc parseUntil(line, pageTitle, {' '}, i) + i.inc + countViews = 0 + i.inc parseInt(line, countViews, i) + i.inc + totalSize = 0 + i.inc parseInt(line, totalSize, i) + +proc parseChunk(chunk: string): Stats = + result = newStats() + var domainCode = "" + var pageTitle = "" + var countViews = 0 + var totalSize = 0 + for line in splitLines(chunk): + parse(line, domainCode, pageTitle, countViews, totalSize) + if domainCode == "en" and countViews > result.countViews: + result = Stats(domainCode: domainCode, pageTitle: pageTitle, + countViews: countViews, totalSize: totalSize) + +proc readPageCounts(filename: string, chunkSize = 1_000_000) = + var file = open(filename) + var responses = newSeq[FlowVar[Stats]]() + var buffer = newString(chunksize) + var oldBufferLen = 0 + while not endOfFile(file): + let reqSize = chunksize - oldBufferLen + let readSize = file.readChars(buffer, oldBufferLen, reqSize) + oldBufferLen + var chunkLen = readSize + + while chunkLen >= 0 and buffer[chunkLen - 1] notin NewLines: + chunkLen.dec + + responses.add(spawn parseChunk(buffer[0 .. mostPopular.countViews: + mostPopular = statistic + + echo("Most popular is: ", mostPopular) + +when isMainModule: + const file = "pagecounts-20160101-050000" + let filename = getCurrentDir() / file + readPageCounts(filename) \ No newline at end of file diff --git a/tests/niminaction/Chapter6/WikipediaStats/parallel_counts.nim.cfg b/tests/niminaction/Chapter6/WikipediaStats/parallel_counts.nim.cfg new file mode 100644 index 000000000..9d57ecf93 --- /dev/null +++ b/tests/niminaction/Chapter6/WikipediaStats/parallel_counts.nim.cfg @@ -0,0 +1 @@ +--threads:on \ No newline at end of file diff --git a/tests/niminaction/Chapter6/WikipediaStats/race_condition.nim b/tests/niminaction/Chapter6/WikipediaStats/race_condition.nim new file mode 100644 index 000000000..c62b2f93e --- /dev/null +++ b/tests/niminaction/Chapter6/WikipediaStats/race_condition.nim @@ -0,0 +1,13 @@ +import threadpool + +var counter = 0 + +proc increment(x: int) = + for i in 0 .. mostPopular[2]: + mostPopular = (domainCode, pageTitle, countViews, totalSize) + + echo("Most popular is: ", mostPopular) + +when isMainModule: + const file = "pagecounts-20160101-050000" + let filename = getCurrentDir() / file + readPageCounts(filename) \ No newline at end of file diff --git a/tests/niminaction/Chapter6/WikipediaStats/unguarded_access.nim b/tests/niminaction/Chapter6/WikipediaStats/unguarded_access.nim new file mode 100644 index 000000000..72e8bff12 --- /dev/null +++ b/tests/niminaction/Chapter6/WikipediaStats/unguarded_access.nim @@ -0,0 +1,15 @@ +import threadpool, locks + +var counterLock: Lock +initLock(counterLock) +var counter {.guard: counterLock.} = 0 + +proc increment(x: int) = + for i in 0 ..