summary refs log tree commit diff stats
path: root/compiler/ast.nim
diff options
context:
space:
mode:
authorTimothee Cour <timothee.cour2@gmail.com>2021-08-29 06:54:07 -0700
committerGitHub <noreply@github.com>2021-08-29 15:54:07 +0200
commitfa7c1aa8647aa97a4a204751db727c3cbeded749 (patch)
tree08b9134162b9578eee4c6f6481cc2f2c4e4b7564 /compiler/ast.nim
parent0ddd9519c0007b705d70b5a75b634831396dd904 (diff)
downloadNim-fa7c1aa8647aa97a4a204751db727c3cbeded749.tar.gz
move PNode.comment to a side channel, reducing memory usage during compilation by a factor 1.25x (#18760)
* move PNode.comment so a side channel, reducing memory usage

* fix a bug

* fixup

* use sfHasComment to speedup comment lookups

* fix for IC

* Update compiler/parser.nim

Co-authored-by: Andreas Rumpf <rumpf_a@web.de>
Diffstat (limited to 'compiler/ast.nim')
-rw-r--r--compiler/ast.nim77
1 files changed, 56 insertions, 21 deletions
diff --git a/compiler/ast.nim b/compiler/ast.nim
index 446c00f49..5893a5671 100644
--- a/compiler/ast.nim
+++ b/compiler/ast.nim
@@ -10,7 +10,7 @@
 # abstract syntax tree + symbol table
 
 import
-  lineinfos, hashes, options, ropes, idents, int128
+  lineinfos, hashes, options, ropes, idents, int128, tables
 from strutils import toLowerAscii
 
 export int128
@@ -497,6 +497,7 @@ type
     nfExecuteOnReload  # A top-level statement that will be executed during reloads
     nfLastRead  # this node is a last read
     nfFirstWrite# this node is a first write
+    nfHasComment # node has a comment
 
   TNodeFlags* = set[TNodeFlag]
   TTypeFlag* = enum   # keep below 32 for efficiency reasons (now: 43)
@@ -774,7 +775,6 @@ type
       ident*: PIdent
     else:
       sons*: TNodeSeq
-    comment*: string
 
   TStrTable* = object         # a table[PIdent] of PSym
     counter*: int
@@ -991,6 +991,38 @@ type
   TImplication* = enum
     impUnknown, impNo, impYes
 
+template nodeId(n: PNode): int = cast[int](n)
+
+type Gconfig = object
+  # we put comments in a side channel to avoid increasing `sizeof(TNode)`, which
+  # reduces memory usage given that `PNode` is the most allocated type by far.
+  comments: Table[int, string] # nodeId => comment
+  useIc*: bool
+
+var gconfig {.threadvar.}: Gconfig
+
+proc setUseIc*(useIc: bool) = gconfig.useIc = useIc
+
+proc comment*(n: PNode): string =
+  if nfHasComment in n.flags and not gconfig.useIc:
+    # IC doesn't track comments, see `packed_ast`, so this could fail
+    result = gconfig.comments[n.nodeId]
+
+proc `comment=`*(n: PNode, a: string) =
+  let id = n.nodeId
+  if a.len > 0:
+    # if needed, we could periodically cleanup gconfig.comments when its size increases,
+    # to ensure only live nodes (and with nfHasComment) have an entry in gconfig.comments;
+    # for compiling compiler, the waste is very small:
+    # num calls to newNodeImpl: 14984160 (num of PNode allocations)
+    # size of gconfig.comments: 33585
+    # num of nodes with comments that were deleted and hence wasted: 3081
+    n.flags.incl nfHasComment
+    gconfig.comments[id] = a
+  elif nfHasComment in n.flags:
+    n.flags.excl nfHasComment
+    gconfig.comments.del(id)
+
 # BUGFIX: a module is overloadable so that a proc can have the
 # same name as an imported module. This is necessary because of
 # the poor naming choices in the standard library.
@@ -1207,9 +1239,17 @@ when defined(useNodeIds):
   const nodeIdToDebug* = -1 # 2322968
   var gNodeId: int
 
-proc newNode*(kind: TNodeKind): PNode =
-  ## new node with unknown line info, no type, and no children
-  result = PNode(kind: kind, info: unknownLineInfo)
+template newNodeImpl(info2) =
+  result = PNode(kind: kind, info: info2)
+  when false:
+    # this would add overhead, so we skip it; it results in a small amount of leaked entries
+    # for old PNode that gets re-allocated at the same address as a PNode that
+    # has `nfHasComment` set (and an entry in that table). Only `nfHasComment`
+    # should be used to test whether a PNode has a comment; gconfig.comments
+    # can contain extra entries for deleted PNode's with comments.
+    gconfig.comments.del(cast[int](result))
+
+template setIdMaybe() =
   when defined(useNodeIds):
     result.id = gNodeId
     if result.id == nodeIdToDebug:
@@ -1217,27 +1257,22 @@ proc newNode*(kind: TNodeKind): PNode =
       writeStackTrace()
     inc gNodeId
 
+proc newNode*(kind: TNodeKind): PNode =
+  ## new node with unknown line info, no type, and no children
+  newNodeImpl(unknownLineInfo)
+  setIdMaybe()
+
 proc newNodeI*(kind: TNodeKind, info: TLineInfo): PNode =
   ## new node with line info, no type, and no children
-  result = PNode(kind: kind, info: info)
-  when defined(useNodeIds):
-    result.id = gNodeId
-    if result.id == nodeIdToDebug:
-      echo "KIND ", result.kind
-      writeStackTrace()
-    inc gNodeId
+  newNodeImpl(info)
+  setIdMaybe()
 
 proc newNodeI*(kind: TNodeKind, info: TLineInfo, children: int): PNode =
   ## new node with line info, type, and children
-  result = PNode(kind: kind, info: info)
+  newNodeImpl(info)
   if children > 0:
     newSeq(result.sons, children)
-  when defined(useNodeIds):
-    result.id = gNodeId
-    if result.id == nodeIdToDebug:
-      echo "KIND ", result.kind
-      writeStackTrace()
-    inc gNodeId
+  setIdMaybe()
 
 proc newNodeIT*(kind: TNodeKind, info: TLineInfo, typ: PType): PNode =
   ## new node with line info, type, and no children
@@ -1644,8 +1679,8 @@ proc copyNode*(src: PNode): PNode =
 
 template transitionNodeKindCommon(k: TNodeKind) =
   let obj {.inject.} = n[]
-  n[] = TNode(kind: k, typ: obj.typ, info: obj.info, flags: obj.flags,
-              comment: obj.comment)
+  n[] = TNode(kind: k, typ: obj.typ, info: obj.info, flags: obj.flags)
+  # n.comment = obj.comment # shouldn't be needed, the address doesnt' change
   when defined(useNodeIds):
     n.id = obj.id