diff options
Diffstat (limited to 'tests/niminaction/Chapter6/WikipediaStats/parallel_counts.nim')
-rw-r--r-- | tests/niminaction/Chapter6/WikipediaStats/parallel_counts.nim | 72 |
1 files changed, 72 insertions, 0 deletions
diff --git a/tests/niminaction/Chapter6/WikipediaStats/parallel_counts.nim b/tests/niminaction/Chapter6/WikipediaStats/parallel_counts.nim new file mode 100644 index 000000000..7181145e9 --- /dev/null +++ b/tests/niminaction/Chapter6/WikipediaStats/parallel_counts.nim @@ -0,0 +1,72 @@ +import os, parseutils, threadpool, strutils + +type + Stats = ref object + domainCode, pageTitle: string + countViews, totalSize: int + +proc newStats(): Stats = + Stats(domainCode: "", pageTitle: "", countViews: 0, totalSize: 0) + +proc `$`(stats: Stats): string = + "(domainCode: $#, pageTitle: $#, countViews: $#, totalSize: $#)" % [ + stats.domainCode, stats.pageTitle, $stats.countViews, $stats.totalSize + ] + +proc parse(line: string, domainCode, pageTitle: var string, + countViews, totalSize: var int) = + if line.len == 0: return + var i = 0 + domainCode.setLen(0) + i.inc parseUntil(line, domainCode, {' '}, i) + i.inc + pageTitle.setLen(0) + i.inc parseUntil(line, pageTitle, {' '}, i) + i.inc + countViews = 0 + i.inc parseInt(line, countViews, i) + i.inc + totalSize = 0 + i.inc parseInt(line, totalSize, i) + +proc parseChunk(chunk: string): Stats = + result = newStats() + var domainCode = "" + var pageTitle = "" + var countViews = 0 + var totalSize = 0 + for line in splitLines(chunk): + parse(line, domainCode, pageTitle, countViews, totalSize) + if domainCode == "en" and countViews > result.countViews: + result = Stats(domainCode: domainCode, pageTitle: pageTitle, + countViews: countViews, totalSize: totalSize) + +proc readPageCounts(filename: string, chunkSize = 1_000_000) = + var file = open(filename) + var responses = newSeq[FlowVar[Stats]]() + var buffer = newString(chunksize) + var oldBufferLen = 0 + while not endOfFile(file): + let reqSize = chunksize - oldBufferLen + let readSize = file.readChars(buffer, oldBufferLen, reqSize) + oldBufferLen + var chunkLen = readSize + + while chunkLen >= 0 and buffer[chunkLen - 1] notin NewLines: + chunkLen.dec + + responses.add(spawn parseChunk(buffer[0 .. <chunkLen])) + oldBufferLen = readSize - chunkLen + buffer[0 .. <oldBufferLen] = buffer[readSize - oldBufferLen .. ^1] + + var mostPopular = newStats() + for resp in responses: + let statistic = ^resp + if statistic.countViews > mostPopular.countViews: + mostPopular = statistic + + echo("Most popular is: ", mostPopular) + +when isMainModule: + const file = "pagecounts-20160101-050000" + let filename = getCurrentDir() / file + readPageCounts(filename) \ No newline at end of file |