discard """
action: compile
"""
import os, parseutils, threadpool, strutils
type
Stats = ref object
domainCode, pageTitle: string
countViews, totalSize: int
proc newStats(): Stats =
Stats(domainCode: "", pageTitle: "", countViews: 0, totalSize: 0)
proc `$`(stats: Stats): string =
"(domainCode: $#, pageTitle: $#, countViews: $#, totalSize: $#)" % [
stats.domainCode, stats.pageTitle, $stats.countViews, $stats.totalSize
]
proc parse(line: string, domainCode, pageTitle: var string,
countViews, totalSize: var int) =
if line.len == 0: return
var i = 0
domainCode.setLen(0)
i.inc parseUntil(line, domainCode, {' '}, i)
i.inc
pageTitle.setLen(0)
i.inc parseUntil(line, pageTitle, {' '}, i)
i.inc
countViews = 0
i.inc parseInt(line, countViews, i)
i.inc
totalSize = 0
i.inc parseInt(line, totalSize, i)
proc parseChunk(chunk: string): Stats =
result = newStats()
var domainCode = ""
var pageTitle = ""
var countViews = 0
var totalSize = 0
for line in splitLines(chunk):
parse(line, domainCode, pageTitle, countViews, totalSize)
if domainCode == "en" and countViews > result.countViews:
result = Stats(domainCode: domainCode, pageTitle: pageTitle,
countViews: countViews, totalSize: totalSize)
proc readPageCounts(filename: string, chunkSize = 1_000_000) =
var file = open(filename)
var responses = newSeq[FlowVar[Stats]]()
var buffer = newString(chunksize)
var oldBufferLen = 0
while not endOfFile(file):
let reqSize = chunksize - oldBufferLen
let readSize = file.readChars(buffer, oldBufferLen, reqSize) + oldBufferLen
var chunkLen = readSize
while chunkLen >= 0 and buffer[chunkLen - 1] notin NewLines:
chunkLen.dec
responses.add(spawn parseChunk(buffer[0 ..< chunkLen]))
oldBufferLen = readSize - chunkLen
buffer[0 ..< oldBufferLen] = buffer[readSize - oldBufferLen .. ^1]
var mostPopular = newStats()
for resp in responses:
let statistic = ^resp
if statistic.countViews > mostPopular.countViews:
mostPopular = statistic
echo("Most popular is: ", mostPopular)
when true:
const file = "pagecounts-20160101-050000"
let filename = getCurrentDir() / file
readPageCounts(filename)