compiler/sourcemap.nim


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
pre { line-height: 125%; }
td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
.highlight .hll { background-color: #ffffcc }
.highlight .c { color: #888888 } /* Comment */
.highlight .err { color: #a61717; background-color: #e3d2d2 } /* Error */
.highlight .k { color: #008800; font-weight: bold } /* Keyword */
.highlight .ch { color: #888888 } /* Comment.Hashbang */
.highlight .cm { color: #888888 } /* Comment.Multiline */
.highlight .cp { color: #cc0000; font-weight: bold } /* Comment.Preproc */
.highlight .cpf { color: #888888 } /* Comment.PreprocFile */
.highlight .c1 { color: #888888 } /* Comment.Single */
.highlight .cs { color: #cc0000; font-weight: bold; background-color: #fff0f0 } /* Comment.Special */
.highlight .gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */
.highlight .ge { font-style: italic } /* Generic.Emph */
.highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */
.highlight .gr { color: #aa0000 } /* Generic.Error */
.highlight .gh { color: #333333 } /* Generic.Heading */
.highlight .gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */
.highlight .go { color: #888888 } /* Generic.Output */
.highlight .gp { color: #555555 } /* Generic.Prompt */
.highlight .gs { font-weight: bold } /* Generic.Strong */
.highlight .gu { color: #666666 } /* Generic.Subheading */
.highlight .gt { color: #aa0000 } /* Generic.Traceback */
.highlight .kc { color: #008800; font-weight: bold } /* Keyword.Constant */
.highlight .kd { color: #008800; font-weight: bold } /* Keyword.Declaration */
.highlight .kn { color: #008800; font-weight: bold } /* Keyword.Namespace */
.highlight .kp { color: #008800 } /* Keyword.Pseudo */
.highlight .kr { color: #008800; font-weight: bold } /* Keyword.Reserved */
.highlight .kt { color: #888888; font-weight: bold } /* Keyword.Type */
.highlight .m { color: #0000DD; font-weight: bold } /* Literal.Number */
.highlight .s { color: #dd2200; background-color: #fff0f0 } /* Literal.String */
.highlight .na { color: #336699 } /* Name.Attribute */
.highlight .nb { color: #003388 } /* Name.Builtin */
.highlight .nc { color: #bb0066; font-weight: bold } /* Name.Class */
.highlight .no { color: #003366; font-weight: bold } /* Name.Constant */
.highlight .nd { color: #555555 } /* Name.Decorator */
.highlight .ne { color: #bb0066; font-weight: bold } /* Name.Exception */
.highlight .nf { color: #0066bb; font-weight: bold } /* Name.Function */
.highlight .nl { color: #336699; font-style: italic } /* Name.Label */
.highlight .nn { color: #bb0066; font-weight: bold } /* Name.Namespace */
.highlight .py { color: #336699; font-weight: bold } /* Name.Property */
.highlight .nt { color: #bb0066; font-weight: bold } /* Name.Tag */
.highlight .nv { color: #336699 } /* Name.Variable */
.highlight .ow { color: #008800 } /* Operator.Word */
.highlight .w { color: #bbbbbb } /* Text.Whitespace */
.highlight .mb { color: #0000DD; font-weight: bold } /* Literal.Number.Bin */
.highlight .mf { color: #0000DD; font-weight: bold } /* Literal.Number.Float */
.highlight .mh { color: #0000DD; font-weight: bold } /* Literal.Number.Hex */
.highlight .mi { color: #0000DD; font-weight: bold } /* Literal.Number.Integer */
.highlight .mo { color: #0000DD; font-weight: bold } /* Literal.Number.Oct */
.highlight .sa { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Affix */
.highlight .sb { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Backtick */
.highlight .sc { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Char */
.highlight .dl { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Delimiter */
.highlight .sd { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Doc */
.highlight .s2 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Double */
.highlight .se { color: #0044dd; background-color: #fff0f0 } /* Literal.String.Escape */
.highlight .sh { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Heredoc */
.highlight .si { color: #3333bb; background-color: #fff0f0 } /* Literal.String.Interpol */
.highlight .sx { color: #22bb22; background-color: #f0fff0 } /* Literal.String.Other */
.highlight .sr { color: #008800; background-color: #fff0ff } /* Literal.String.Regex */
.highlight .s1 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Single */
.highlight .ss { color: #aa6600; background-color: #fff0f0 } /* Literal.String.Symbol */
.highlight .bp { color: #003388 } /* Name.Builtin.Pseudo */
.highlight .fm { color: #0066bb; font-weight: bold } /* Name.Function.Magic */
.highlight .vc { color: #336699 } /* Name.Variable.Class */
.highlight .vg { color: #dd7700 } /* Name.Variable.Global */
.highlight .vi { color: #3333bb } /* Name.Variable.Instance */
.highlight .vm { color: #336699 } /* Name.Variable.Magic */
.highlight .il { color: #0000DD; font-weight: bold } /* Literal.Number.Integer.Long */# Hamming

Calculate the Hamming Distance between two DNA strands.

Your body is made up of cells that contain DNA. Those cells regularly wear out and need replacing, which they achieve by dividing into daughter cells. In fact, the average human body experiences about 10 quadrillion cell divisions in a lifetime!

When cells divide, their DNA replicates too. Sometimes during this process mistakes happen and single pieces of DNA get encoded with the incorrect information. If we compare two strands of DNA and count the differences between them we can see how many mistakes occurred. This is known as the "Hamming Distance".

We read DNA using the letters C,A,G and T. Two strands might look like this:

    GAGCCTACTAACGGGAT
    CATCGTAATGACGGCCT
    ^ ^ ^  ^ ^    ^^

They have 7 differences, and therefore the Hamming Distance is 7.

The Hamming Distance is useful for lots of things in science, not just biology, so it's a nice phrase to be familiar with :)

# Implementation notes

The Hamming distance is only defined for sequences of equal length, so
an attempt to calculate it between sequences of different lengths should
not work. The general handling of this situation (e.g., raising an
exception vs returning a special value) may differ between languages.

You may be wondering about the `cases_test.go` file. We explain it in the
[leap exercise][leap-exercise-readme].

[leap-exercise-readme]: https://github.com/exercism/go/blob/master/exercises/leap/README.md 


## Coding the solution

Look for a stub file having the name hamming.go
and place your solution code in that file.

## Running the tests

To run the tests run the command `go test` from within the exercise directory.

If the test suite contains benchmarks, you can run these with the `--bench` and `--benchmem`
flags:

    go test -v --bench . --benchmem

Keep in mind that each reviewer will run benchmarks on a different machine, with
different specs, so the results from these benchmark tests may vary.

## Further information

For more detailed information about the Go track, including how to get help if
you're having trouble, please visit the exercism.io [Go language page](http://exercism.io/languages/go/resources).

## Source

The Calculating Point Mutations problem at Rosalind [http://rosalind.info/problems/hamm/](http://rosalind.info/problems/hamm/)

## Submitting Incomplete Solutions
It's possible to submit an incomplete solution so you can see how others have completed the exercise.
import os, strformat, strutils, tables, sets, ropes, json, algorithm

type
  SourceNode* = ref object
    line*:      int
    column*:    int
    source*:    string
    name*:      string
    children*:  seq[Child]

  C = enum cSourceNode, cSourceString

  Child* = ref object
    case kind*: C:
    of cSourceNode:
      node*:  SourceNode
    of cSourceString:
      s*:     string

  SourceMap* = ref object
    version*:   int
    sources*:   seq[string]
    names*:     seq[string]
    mappings*:  string
    file*:      string
    # sourceRoot*: string
    # sourcesContent*: string

  SourceMapGenerator = ref object
    file:           string
    sourceRoot:     string
    skipValidation: bool
    sources:        seq[string]
    names:          seq[string]
    mappings:       seq[Mapping]

  Mapping* = ref object
    source*:        string
    original*:      tuple[line: int, column: int]
    generated*:     tuple[line: int, column: int]
    name*:          string
    noSource*:      bool
    noName*:        bool


proc child*(s: string): Child =
  Child(kind: cSourceString, s: s)


proc child*(node: SourceNode): Child =
  Child(kind: cSourceNode, node: node)


proc newSourceNode(line: int, column: int, path: string, node: SourceNode, name: string = ""): SourceNode =
  SourceNode(line: line, column: column, source: path, name: name, children: @[child(node)])


proc newSourceNode(line: int, column: int, path: string, s: string, name: string = ""): SourceNode =
  SourceNode(line: line, column: column, source: path, name: name, children: @[child(s)])


proc newSourceNode(line: int, column: int, path: string, children: seq[Child], name: string = ""): SourceNode =
  SourceNode(line: line, column: column, source: path, name: name, children: children)


# debugging


proc text*(sourceNode: SourceNode, depth: int): string =
  let empty = "  "
  result = &"{repeat(empty, depth)}SourceNode({sourceNode.source}:{sourceNode.line}:{sourceNode.column}):\n"
  for child in sourceNode.children:
    if child.kind == cSourceString:
      result.add(&"{repeat(empty, depth + 1)}{child.s}\n")
    else:
      result.add(child.node.text(depth + 1))


proc `$`*(sourceNode: SourceNode): string = text(sourceNode, 0)


# base64_VLQ


let integers = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="


proc encode*(i: int): string =
  result = ""
  var n = i
  if n < 0:
    n = (-n shl 1) or 1
  else:
    n = n shl 1

  var z = 0
  while z == 0 or n > 0:
    var e = n and 31
    n = n shr 5
    if n > 0:
      e = e or 32

    result.add(integers[e])
    z += 1


type TokenState = enum Normal, String, Ident, Mangled

iterator tokenize*(line: string): (bool, string) =
  # result = @[]
  var state = Normal
  var token = ""
  var isMangled = false
  for z, ch in line:
    if ch.isAlphaAscii:
      if state == Normal:
        state = Ident
        if token.len > 0:
          yield (isMangled, token)
        token = $ch
        isMangled = false
      else:
        token.add(ch)
    elif ch == '_':
      if state == Ident:
        state = Mangled
        isMangled = true
      token.add($ch)
    elif ch != '"' and not ch.isAlphaNumeric:
      if state in {Ident, Mangled}:
        state = Normal
        if token.len > 0:
          yield (isMangled, token)
        token = $ch
        isMangled = false
      else:
        token.add($ch)
    elif ch == '"':
      if state != String:
        state = String
        if token.len > 0:
          yield (isMangled, token)
        token = $ch
        isMangled = false
      else:
        state = Normal
        token.add($ch)
        if token.len > 0:
          yield (isMangled, token)
        isMangled = false
        token = ""
    else:
      token.add($ch)
  if token.len > 0:
    yield (isMangled, token)

proc parse*(source: string, path: string): SourceNode =
  let lines = source.splitLines()
  var lastLocation: SourceNode = nil
  result = newSourceNode(0, 0, path, @[])
    
  # we just use one single parent and add all nim lines
  # as its children, I guess in typical codegen
  # that happens recursively on ast level
  # we also don't have column info, but I doubt more one nim lines can compile to one js
  # maybe in macros?

  for i, originalLine in lines:
    let line = originalLine.strip
    if line.len == 0:
      continue
      
    # this shouldn't be a problem:
    # jsgen doesn't generate comments
    # and if you emit // line you probably know what you're doing
    if line.startsWith("// line"):
      if result.children.len > 0:
        result.children[^1].node.children.add(child(line & "\n"))
      let pos = line.find(" ", 8)
      let lineNumber = line[8 .. pos - 1].parseInt
      let linePath = line[pos + 2 .. ^2] # quotes
      
      lastLocation = newSourceNode(
        lineNumber,
        0,
        linePath,
        @[])
      result.children.add(child(lastLocation))
    else:
      var last: SourceNode
      for token in line.tokenize():
        var name = ""
        if token[0]:
          name = token[1].split('_', 1)[0]
        
        
        if result.children.len > 0:
          result.children[^1].node.children.add(
            child(
              newSourceNode(
                result.children[^1].node.line,
                0,
                result.children[^1].node.source,
                token[1],
                name)))
          last = result.children[^1].node.children[^1].node
        else:
          result.children.add(
            child(
              newSourceNode(i + 1, 0, path, token[1], name)))
          last = result.children[^1].node
      let nl = "\n"
      if not last.isNil:
        last.source.add(nl)

proc cmp(a: Mapping, b: Mapping): int =
  var c = cmp(a.generated, b.generated)
  if c != 0:
    return c

  c = cmp(a.source, b.source)
  if c != 0:
    return c

  c = cmp(a.original, b.original)
  if c != 0:
    return c

  return cmp(a.name, b.name)


proc index*[T](elements: seq[T], element: T): int =
  for z in 0 ..< elements.len:
    if elements[z] == element:
      return z
  return -1


proc serializeMappings(map: SourceMapGenerator, mappings: seq[Mapping]): string =
  var previous = Mapping(generated: (line: 1, column: 0), original: (line: 0, column: 0), name: "", source: "")
  var previousSourceId = 0
  var previousNameId = 0
  var next = ""
  var nameId = 0
  var sourceId = 0
  result = ""

  for z, mapping in mappings:
    next = ""

    if mapping.generated.line != previous.generated.line:
      previous.generated.column = 0

      while mapping.generated.line != previous.generated.line:
        next.add(";")
        previous.generated.line += 1

    else:
      if z > 0:
        if cmp(mapping, mappings[z - 1]) == 0:
          continue
        next.add(",")

    next.add(encode(mapping.generated.column - previous.generated.column))
    previous.generated.column = mapping.generated.column

    if not mapping.noSource and mapping.source.len > 0:
      sourceId = map.sources.index(mapping.source)
      next.add(encode(sourceId - previousSourceId))
      previousSourceId = sourceId
      next.add(encode(mapping.original.line - 1 - previous.original.line))
      previous.original.line = mapping.original.line - 1
      next.add(encode(mapping.original.column - previous.original.column))
      previous.original.column = mapping.original.column

      if not mapping.noName and mapping.name.len > 0:
        nameId = map.names.index(mapping.name)
        next.add(encode(nameId - previousNameId))
        previousNameId = nameId

    result.add(next)


proc gen*(map: SourceMapGenerator): SourceMap =
  var mappings = map.mappings.sorted do (a: Mapping, b: Mapping) -> int:
    cmp(a, b)
  result = SourceMap(
    file: map.file,
    version: 3,
    sources: map.sources[0..^1],
    names: map.names[0..^1],
    mappings: map.serializeMappings(mappings))


proc addMapping*(map: SourceMapGenerator, mapping: Mapping) =
  if not mapping.noSource and mapping.source notin map.sources:
    map.sources.add(mapping.source)

  if not mapping.noName and mapping.name.len > 0 and mapping.name notin map.names:
    map.names.add(mapping.name)

  # echo "map ", mapping.source, " ", mapping.original, " ", mapping.generated, " ", mapping.name
  map.mappings.add(mapping)


proc walk*(node: SourceNode, fn: proc(line: string, original: SourceNode)) =
  for child in node.children:
    if child.kind == cSourceString and child.s.len > 0:
      fn(child.s, node)
    else:
      child.node.walk(fn)


proc toSourceMap*(node: SourceNode, file: string): SourceMapGenerator =
  var map = SourceMapGenerator(file: file, sources: @[], names: @[], mappings: @[])

  var generated = (line: 1, column: 0)
  var sourceMappingActive = false
  var lastOriginal = SourceNode(source: "", line: -1, column: 0, name: "", children: @[])

  node.walk do (line: string, original: SourceNode):
    if original.source.endsWith(".js"):
      # ignore it
      discard
    else:
      if original.line != -1:
        if lastOriginal.source != original.source or
           lastOriginal.line != original.line or
           lastOriginal.column != original.column or
           lastOriginal.name != original.name:
          map.addMapping(
            Mapping(
              source: original.source,
              original: (line: original.line, column: original.column),
              generated: (line: generated.line, column: generated.column),
              name: original.name))

        lastOriginal = SourceNode(
          source: original.source,
          line: original.line,
          column: original.column,
          name: original.name,
          children: lastOriginal.children)
        sourceMappingActive = true
      elif sourceMappingActive:
        map.addMapping(
          Mapping(
            noSource: true,
            noName: true,
            generated: (line: generated.line, column: generated.column),
            original: (line: -1, column: -1)))
        lastOriginal.line = -1
        sourceMappingActive = false

    for z in 0 ..< line.len:
      if line[z] in Newlines:
        generated.line += 1
        generated.column = 0

        if z == line.len - 1:
          lastOriginal.line = -1
          sourceMappingActive = false
        elif sourceMappingActive:
          map.addMapping(
            Mapping(
              source: original.source,
              original: (line: original.line, column: original.column),
              generated: (line: generated.line, column: generated.column),
              name: original.name))
      else:
        generated.column += 1
    
  map


proc genSourceMap*(source: string, outFile: string): (Rope, SourceMap) =
  let node = parse(source, outFile)
  let map = node.toSourceMap(file = outFile)
  ((&"{source}\n//# sourceMappingURL={outFile}.map").rope, map.gen)