diff options
author | Araq <rumpf_a@web.de> | 2013-12-03 01:59:59 +0100 |
---|---|---|
committer | Araq <rumpf_a@web.de> | 2013-12-03 01:59:59 +0100 |
commit | 5cb60ffbe0c3afde964467fc2ab99bc3bbd42968 (patch) | |
tree | d7213872ef41db6b8d0dd9b0cfb04582c52e468b | |
parent | fe983b13099dd8d8a93ebefc00174e98eb048dea (diff) | |
parent | 0628d85a6afd6465f1f7419a09b5bd19d7cdfb6c (diff) | |
download | Nim-5cb60ffbe0c3afde964467fc2ab99bc3bbd42968.tar.gz |
Merge branch 'master' of github.com:Araq/Nimrod
-rw-r--r-- | compiler/nimconf.nim | 5 | ||||
-rw-r--r-- | compiler/nimrod.nimrod.cfg (renamed from compiler/nimrod.cfg) | 0 | ||||
-rw-r--r-- | doc/tut1.txt | 54 | ||||
-rw-r--r-- | doc/tut2.txt | 271 | ||||
-rw-r--r-- | lib/pure/htmlparser.nim | 30 |
5 files changed, 341 insertions, 19 deletions
diff --git a/compiler/nimconf.nim b/compiler/nimconf.nim index 507812d9c..7ec566a01 100644 --- a/compiler/nimconf.nim +++ b/compiler/nimconf.nim @@ -243,11 +243,6 @@ proc LoadConfigs*(cfg: string) = readConfigFile(pd / cfg) if gProjectName.len != 0: - var conffile = changeFileExt(gProjectFull, "cfg") - if conffile != pd / cfg and existsFile(conffile): - readConfigFile(conffile) - rawMessage(warnConfigDeprecated, conffile) - # new project wide config file: readConfigFile(changeFileExt(gProjectFull, "nimrod.cfg")) diff --git a/compiler/nimrod.cfg b/compiler/nimrod.nimrod.cfg index ac8f732f1..ac8f732f1 100644 --- a/compiler/nimrod.cfg +++ b/compiler/nimrod.nimrod.cfg diff --git a/doc/tut1.txt b/doc/tut1.txt index 5c1cdb52e..2070c69d6 100644 --- a/doc/tut1.txt +++ b/doc/tut1.txt @@ -189,9 +189,18 @@ to a storage location: var x = "abc" # introduces a new variable `x` and assigns a value to it x = "xyz" # assigns a new value to `x` -``=`` is the *assignment operator*. The assignment operator cannot -be overloaded, overwritten or forbidden, but this might change in a future -version of Nimrod. +``=`` is the *assignment operator*. The assignment operator cannot be +overloaded, overwritten or forbidden, but this might change in a future version +of Nimrod. You can declare multiple variables with a single assignment +statement and all the variables will have the same value: + +.. code-block:: + var x, y = 3 # assigns 3 to the variables `x` and `y` + echo "x ", x # outputs "x 3" + echo "y ", y # outputs "y 3" + x = 42 # changes `x` to 42 without changing `y` + echo "x ", x # outputs "x 42" + echo "y ", y # outputs "y 3" Constants @@ -1352,6 +1361,45 @@ Even though you don't need to declare a type for a tuple to use it, tuples created with different field names will be considered different objects despite having the same field types. +Tuples can be *unpacked* during variable assignment (and only then!). This can +be handy to assign directly the fields of the tuples to individually named +variables. An example of this is the ``splitFile`` proc from the `os module +<os.html>`_ which returns the directory, name and extension of a path at the +same time. For tuple unpacking to work you have to use parenthesis around the +values you want to assign the unpacking to, otherwise you will be assigning the +same value to all the individual variables! Example: + +.. code-block:: nimrod + + import os + + let + path = "usr/local/nimrodc.html" + (dir, name, ext) = splitFile(path) + baddir, badname, badext = splitFile(path) + echo dir # outputs `usr/local` + echo name # outputs `nimrodc` + echo ext # outputs `.html` + # All the following output the same line: + # `(dir: usr/local, name: nimrodc, ext: .html)` + echo baddir + echo badname + echo badext + +Tuple unpacking **only** works in ``var`` or ``let`` blocks. The following code +won't compile: + +.. code-block:: nimrod + + import os + + var + path = "usr/local/nimrodc.html" + dir, name, ext = "" + + (dir, name, ext) = splitFile(path) + # --> Error: '(dir, name, ext)' cannot be assigned to + Reference and pointer types --------------------------- diff --git a/doc/tut2.txt b/doc/tut2.txt index e1e36bfc4..f66a5135d 100644 --- a/doc/tut2.txt +++ b/doc/tut2.txt @@ -699,15 +699,22 @@ once. Macros ====== -Macros enable advanced compile-time code transformations, but they -cannot change Nimrod's syntax. However, this is no real restriction because -Nimrod's syntax is flexible enough anyway. - -To write a macro, one needs to know how the Nimrod concrete syntax is converted -to an abstract syntax tree (AST). The AST is documented in the -`macros <macros.html>`_ module. - -There are two ways to invoke a macro: +Macros enable advanced compile-time code transformations, but they cannot +change Nimrod's syntax. However, this is no real restriction because Nimrod's +syntax is flexible enough anyway. Macros have to be implemented in pure Nimrod +code if `foreign function interface (FFI) +<manual.html#foreign-function-interface>`_ is not enabled in the compiler, but +other than that restriction (which at some point in the future will go away) +you can write any kind of Nimrod code and the compiler will run it at compile +time. + +There are two ways to write a macro, either *generating* Nimrod source code and +letting the compiler parse it, or creating manually an abstract syntax tree +(AST) which you feed to the compiler. In order to build the AST one needs to +know how the Nimrod concrete syntax is converted to an abstract syntax tree +(AST). The AST is documented in the `macros <macros.html>`_ module. + +Once your macro is finished, there are two ways to invoke it: (1) invoking a macro like a procedure call (`expression macros`:idx:) (2) invoking a macro with the special ``macrostmt`` syntax (`statement macros`:idx:) @@ -796,3 +803,249 @@ Term rewriting macros Term rewriting macros can be used to enhance the compilation process with user defined optimizations; see this `document <trmacros.html>`_ for further information. + + +Building your first macro +------------------------- + +To give a footstart to writing macros we will show now how to turn your typical +dynamic code into something that compiles statically. For the exercise we will +use the following snippet of code as the starting point: + +.. code-block:: nimrod + + import strutils, tables + + proc readCfgAtRuntime(cfgFilename: string): TTable[string, string] = + let + inputString = readFile(cfgFilename) + var + source = "" + + result = initTable[string, string]() + for line in inputString.splitLines: + # Ignore empty lines + if line.len < 1: continue + var chunks = split(line, ',') + if chunks.len != 2: + quit("Input needs comma split values, got: " & line) + result[chunks[0]] = chunks[1] + + if result.len < 1: quit("Input file empty!") + + let info = readCfgAtRuntime("data.cfg") + + when isMainModule: + echo info["licenseOwner"] + echo info["licenseKey"] + echo info["version"] + +Presumably this snippet of code could be used in a commercial software, reading +a configuration file to display information about the person who bought the +software. This external file would be generated by an online web shopping cart +to be included along the program containing the license information:: + + version,1.1 + licenseOwner,Hyori Lee + licenseKey,M1Tl3PjBWO2CC48m + +The ``readCfgAtRuntime`` proc will open the given filename and return a +``TTable`` from the `tables module <tables.html>`_. The parsing of the file is +done (without much care for handling invalid data or corner cases) using the +``split`` proc from the `strutils module <strutils.html>`_. There are many +things which can fail; mind the purpose is explaining how to make this run at +compile time, not how to properly implement a DRM scheme. + +The reimplementation of this code as a compile time proc will allow us to get +rid of the ``data.cfg`` file we would need to distribute along the binary, plus +if the information is really constant, it doesn't make from a logical point of +view to have it *mutable* in a global variable, it would be better if it was a +constant. Finally, and likely the most valuable feature, we can implement some +verification at compile time. You could think of this as a *better unit +testing*, since it is impossible to obtain a binary unless everything is +correct, preventing you to ship to users a broken program which won't start +because a small critical file is missing or its contents changed by mistake to +something invalid. + + +Generating source code +++++++++++++++++++++++ + +Our first attempt will start by modifying the program to generate a compile +time string with the *generated source code*, which we then pass to the +``parseStmt`` proc from the `macros module <macros.html>`_. Here is the +modified source code implementing the macro: + +.. code-block:: nimrod + import macros, strutils + + macro readCfgAndBuildSource(cfgFilename: string): stmt = + let + inputString = slurp(cfgFilename.strVal) + var + source = "" + + for line in inputString.splitLines: + # Ignore empty lines + if line.len < 1: continue + var chunks = split(line, ',') + if chunks.len != 2: + error("Input needs comma split values, got: " & line) + source &= "const cfg" & chunks[0] & "= \"" & chunks[1] & "\"\n" + + if source.len < 1: error("Input file empty!") + result = parseStmt(source) + + readCfgAndBuildSource("data.cfg") + + when isMainModule: + echo cfglicenseOwner + echo cfglicenseKey + echo cfgversion + +The good news is not much has changed! First, we need to change the handling of +the input parameter. In the dynamic version the ``readCfgAtRuntime`` proc +receives a string parameter. However, in the macro version it is also declared +as string, but this is the *outside* interface of the macro. When the macro is +run, it actually gets a ``PNimrodNode`` object instead of a string, and we have +to call the ``strVal`` proc from the `macros module <macros.html>`_ to obtain +the string being passed in to the macro. + +Second, we cannot use the ``readFile`` proc from the `system module +<system.html>`_ due to FFI restriction at compile time. If we try to use this +proc, or any other which depends on FFI, the compiler will error with the +message ``cannot evaluate`` and a dump of the macro's source code, along with a +stack trace where the compiler reached before bailing out. We can get around +this limitation by using the ``slurp`` proc from the `system module +<system.html>`_, which was precisely made for compilation time (just like +``gorge`` which executes an external program and captures its output). + +The interesting thing is that our macro does not return a runtime ``TTable`` +object. Instead, it builds up Nimrod source code into the ``source`` variable. +For each line of the configuration file a ``const`` variable will be generated. +To avoid conflicts we prefix these variables with ``cfg``. In essence, what the +compiler is doing is replacing the line calling the macro with the following +snippet of code: + +.. code-block:: nimrod + const cfgversion= "1.1" + const cfglicenseOwner= "Hyori Lee" + const cfglicenseKey= "M1Tl3PjBWO2CC48m" + +You can verify this yourself adding the line ``echo source`` somewhere at the +end of the macro and compiling the program. Another difference is that instead +of calling the usual ``quit`` proc to abort (which we could still call) this +version calls the ``error`` proc. The ``error`` proc has the same behavior as +``quit`` but will dump also the source and file line information where the +error happened, making it easier for the programmer to find where compilation +failed. In this situation it would point to the line invoking the macro, but +**not** the line of ``data.cfg`` we are processing, that's something the macro +itself would need to control. + + +Generating AST by hand +++++++++++++++++++++++ + +To generate an AST we would need to intimately know the structures used by the +Nimrod compiler exposed in the `macros module <macros.html>`_, which at first +look seems a daunting task. But we can use a helper shortcut the ``dumpTree`` +macro, which is used as a statement macro instead of an expression macro. +Since we know that we want to generate a bunch of ``const`` symbols we can +create the following source file and compile it to see what the compiler +*expects* from us: + +.. code-block:: nimrod + import macros + + dumpTree: + const cfgversion: string = "1.1" + const cfglicenseOwner= "Hyori Lee" + const cfglicenseKey= "M1Tl3PjBWO2CC48m" + +During compilation of the source code we should see the following lines in the +output (again, since this is a macro, compilation is enough, you don't have to +run any binary):: + + StmtList + ConstSection + ConstDef + Ident !"cfgversion" + Ident !"string" + StrLit 1.1 + ConstSection + ConstDef + Ident !"cfglicenseOwner" + Empty + StrLit Hyori Lee + ConstSection + ConstDef + Ident !"cfglicenseKey" + Empty + StrLit M1Tl3PjBWO2CC48m + +With this output we have a better idea of what kind of input the compiler +expects. We need to generate a list of statements. For each constant the source +code generates a ``ConstSection`` and a ``ConstDef``. If we were to move all +the constants to a single ``const`` block we would see only a single +``ConstSection`` with three children. + +Maybe you didn't notice, but in the ``dumpTree`` example the first constant +explicitly specifies the type of the constant. That's why in the tree output +the two last constants have their second child ``Empty`` but the first has a +string identifier. So basically a ``const`` definition is made up from an +identifier, optionally a type (can be an *empty* node) and the value. Armed +with this knowledge, let's look at the finished version of the AST building +macro: + +.. code-block:: nimrod + import macros, strutils + + macro readCfgAndBuildAST(cfgFilename: string): stmt = + let + inputString = slurp(cfgFilename.strVal) + + result = newNimNode(nnkStmtList) + for line in inputString.splitLines: + # Ignore empty lines + if line.len < 1: continue + var chunks = split(line, ',') + if chunks.len != 2: + error("Input needs comma split values, got: " & line) + var + section = newNimNode(nnkConstSection) + constDef = newNimNode(nnkConstDef) + constDef.add(newIdentNode("cfg" & chunks[0])) + constDef.add(newEmptyNode()) + constDef.add(newStrLitNode(chunks[1])) + section.add(constDef) + result.add(section) + + if result.len < 1: error("Input file empty!") + + readCfgAndBuildAST("data.cfg") + + when isMainModule: + echo cfglicenseOwner + echo cfglicenseKey + echo cfgversion + +Since we are building on the previous example generating source code, we will +only mention the differences to it. Instead of creating a temporary ``string`` +variable and writing into it source code as if it were written *by hand*, we +use the ``result`` variable directly and create a statement list node +(``nnkStmtList``) which will hold our children. + +For each input line we have to create a constant definition (``nnkConstDef``) +and wrap it inside a constant section (``nnkConstSection``). Once these +variables are created, we fill them hierarchichally like the previous AST dump +tree showed: the constant definition is a child of the section definition, and +the constant definition has an identifier node, an empty node (we let the +compiler figure out the type), and a string literal with the value. + +A last tip when writing a macro: if you are not sure the AST you are building +looks ok, you may be tempted to use the ``dumpTree`` macro. But you can't use +it *inside* the macro you are writting/debugging. Instead ``echo`` the string +generated by ``treeRepr``. If at the end of the this example you add ``echo +treeRepr(result)`` you should get the same output as using the ``dumpTree`` +macro, but of course you can call that at any point of the macro where you +might be having troubles. diff --git a/lib/pure/htmlparser.nim b/lib/pure/htmlparser.nim index d60d2e583..060f0e386 100644 --- a/lib/pure/htmlparser.nim +++ b/lib/pure/htmlparser.nim @@ -17,11 +17,37 @@ ## ## echo loadHtml("mydirty.html") ## -## ## Every tag in the resulting tree is in lower case. ## ## **Note:** The resulting ``PXmlNode`` already uses the ``clientData`` field, ## so it cannot be used by clients of this library. +## +## Example: Transforming hyperlinks +## ================================ +## +## This code demonstrates how you can iterate over all the tags in an HTML file +## and write back the modified version. In this case we look for hyperlinks +## ending with the extension ``.rst`` and convert them to ``.html``. +## +## .. code-block:: nimrod +## +## import htmlparser +## import xmltree # To use '$' for PXmlNode +## import strtabs # To access PXmlAttributes +## import os # To use splitFile +## import strutils # To use cmpIgnoreCase +## +## proc transformHyperlinks() = +## let html = loadHTML("input.html") +## +## for a in html.findAll("a"): +## let href = a.attrs["href"] +## if not href.isNil: +## let (dir, filename, ext) = splitFile(href) +## if cmpIgnoreCase(ext, ".rst") == 0: +## a.attrs["href"] = dir / filename & ".html" +## +## writeFile("output.html", $html) import strutils, streams, parsexml, xmltree, unicode, strtabs @@ -528,7 +554,7 @@ proc parseHtml*(s: PStream, filename: string, ## parses the XML from stream `s` and returns a ``PXmlNode``. Every ## occured parsing error is added to the `errors` sequence. var x: TXmlParser - open(x, s, filename, {reportComments}) + open(x, s, filename, {reportComments, reportWhitespace}) next(x) # skip the DOCTYPE: if x.kind == xmlSpecial: next(x) |