diff options
Diffstat (limited to 'lib/pure/parseurl.nim')
-rwxr-xr-x | lib/pure/parseurl.nim | 105 |
1 files changed, 91 insertions, 14 deletions
diff --git a/lib/pure/parseurl.nim b/lib/pure/parseurl.nim index cbb5ba9c9..cd3bc621a 100755 --- a/lib/pure/parseurl.nim +++ b/lib/pure/parseurl.nim @@ -1,18 +1,95 @@ -import regexprs, strutils +# +# +# Nimrod's Runtime Library +# (c) Copyright 2010 Dominik Picheta +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Parses & constructs URLs. + +import strutils type - TUrl* = tuple[protocol, subdomain, domain, port: string, path: seq[string]] + TURL* = tuple[ ## represents a *Uniform Resource Locator* (URL) + ## any optional component is "" if it does not exist + scheme, username, password, + hostname, port, path, query, anchor: string] + +proc parseUrl*(url: string): TURL = + var i: int = 0 + + var scheme, username, password: string = "" + var hostname, port, path, query, anchor: string = "" + + var temp: string = "" + + if url[i] != '/': #url isn't a relative path + while True: + #Scheme + if url[i] == ':': + if url[i+1] == '/' and url[i+2] == '/': + scheme = temp + temp = "" + inc(i, 3) #Skip the // + #Authority(username, password) + if url[i] == '@': + username = temp.split(':')[0] + if temp.split(':').len() > 1: + password = temp.split(':')[1] + temp = "" + inc(i) #Skip the @ + #hostname(subdomain, domain, port) + if url[i] == '/' or url[i] == '\0': + #TODO + hostname = temp + if hostname.split(':').len() > 1: + port = hostname.split(':')[1] + hostname = hostname.split(':')[0] + + temp = "" + break + + temp.add(url[i]) + inc(i) -proc parseUrl*(url: string): TUrl = - #([a-zA-Z]+://)?(\w+?\.)?(\w+)(\.\w+)(:[0-9]+)?(/.+)? - const pattern = r"([a-zA-Z]+://)?(\w+?\.)?(\w+)(\.\w+)(:[0-9]+)?(/.+)?" - var m: array[0..6, string] #Array with the matches - discard regexprs.match(url, pattern, m) - - result = (protocol: m[1], subdomain: m[2], domain: m[3] & m[4], - port: m[5], path: m[6].split('/')) - -when isMainModule: - var r = parseUrl(r"http://google.com/search?var=bleahdhsad") - echo(r.domain) + #Path + while True: + if url[i] == '?': + path = temp + temp = "" + if url[i] == '#': + if temp[0] == '?': + query = temp + else: + path = temp + temp = "" + + if url[i] == '\0': + if temp[0] == '?': + query = temp + elif temp[0] == '#': + anchor = temp + else: + path = temp + break + + temp.add(url[i]) + inc(i) + + return (scheme, username, password, hostname, port, path, query, anchor) +proc `$`*(t: TURL): string = + result = "" + if t.scheme != "": result.add(t.scheme & "://") + if t.username != "": + if t.password != "": + result.add(t.username & ":" & t.password & "@") + else: + result.add(t.username & "@") + if t.hostname != "": result.add(t.hostname) + if t.port != "": result.add(":" & t.port) + if t.path != "": result.add(t.path) + if t.query != "": result.add(t.query) + if t.anchor != "": result.add(t.anchor) |