summary refs log tree commit diff stats
path: root/tools/nimgrep.nim
diff options
context:
space:
mode:
Diffstat (limited to 'tools/nimgrep.nim')
-rw-r--r--tools/nimgrep.nim364
1 files changed, 190 insertions, 174 deletions
diff --git a/tools/nimgrep.nim b/tools/nimgrep.nim
index cb46f30b8..599c616ba 100644
--- a/tools/nimgrep.nim
+++ b/tools/nimgrep.nim
@@ -11,111 +11,12 @@ import
   os, strutils, parseopt, pegs, re, terminal, osproc, tables, algorithm, times
 
 const
-  Version = "1.6.0"
+  Version = "2.0.0"
   Usage = "nimgrep - Nim Grep Searching and Replacement Utility Version " &
   Version & """
 
   (c) 2012-2020 Andreas Rumpf
-
-Usage:
-* To search:
-  nimgrep [options] PATTERN [(FILE/DIRECTORY)*/-]
-* To replace:
-  nimgrep [options] PATTERN --replace REPLACEMENT (FILE/DIRECTORY)*/-
-* To list file names:
-  nimgrep [options] --filenames [PATTERN] [(FILE/DIRECTORY)*]
-
-Positional arguments, from left to right:
-* PATERN is either Regex (default) or Peg if --peg is specified.
-  PATTERN and REPLACEMENT should be skipped when --stdin is specified.
-* REPLACEMENT supports $1, $# notations for captured groups in PATTERN.
-  Note: --replace mode DOES NOT ask confirmation unless --confirm is specified!
-* Final arguments are a list of paths (FILE/DIRECTORY) or a standalone
-  minus '-' (pipe) or not specified (empty). Note for the empty case: when
-  no FILE/DIRECTORY/- is specified nimgrep DOES NOT read the pipe, but
-  searches files in the current dir instead!
-    -                 read buffer once from stdin: pipe or terminal input;
-                      in --replace mode the result is directed to stdout;
-                      it's not compatible with --stdin, --filenames, --confirm
-    (empty)           current directory '.' is assumed (not with --replace)
-  For any given DIRECTORY nimgrep searches only its immediate files without
-  traversing sub-directories unless --recursive is specified.
-  In replacement mode all 3 positional arguments are required to avoid damaging.
-
-Options:
-* Mode of operation:
-  --find, -f          find the PATTERN (default)
-  --replace, -!       replace the PATTERN to REPLACEMENT, rewriting the files
-  --confirm           confirm each occurrence/replacement; there is a chance
-                      to abort any time without touching the file
-  --filenames         just list filenames. Provide a PATTERN to find it in
-                      the filenames (not in the contents of a file) or run
-                      with empty pattern to just list all files:
-                      nimgrep --filenames               # In current directory
-                      nimgrep --filenames "" DIRECTORY  # Note empty pattern ""
-
-* Interprete patterns:
-  --peg               PATTERN and PAT are Peg
-  --re                PATTERN and PAT are regular expressions (default)
-  --rex, -x           use the "extended" syntax for the regular expression
-                      so that whitespace is not significant
-  --word, -w          matches should have word boundaries (buggy for pegs!)
-  --ignoreCase, -i    be case insensitive in PATTERN and PAT
-  --ignoreStyle, -y   be style insensitive in PATTERN and PAT
-  NOTE: PATERN and patterns PAT (see below in other options) are all either
-        Regex or Peg simultaneously and options --rex, --word, --ignoreCase,
-        --ignoreStyle are applied to all of them.
-
-* File system walk:
-  --recursive, -r     process directories recursively
-  --follow            follow all symlinks when processing recursively
-  --ext:EX1|EX2|...   only search the files with the given extension(s),
-                      empty one ("--ext") means files with missing extension
-  --noExt:EX1|...     exclude files having given extension(s), use empty one to
-                      skip files with no extension (like some binary files are)
-  --includeFile:PAT   search only files whose names contain pattern PAT
-  --excludeFile:PAT   skip files whose names contain pattern PAT
-  --includeDir:PAT    search only files with whole directory path containing PAT
-  --excludeDir:PAT    skip directories whose name (not path) contain pattern PAT
-  --if,--ef,--id,--ed abbreviations of 4 options above
-  --sortTime          order files by the last modification time (default: off):
-       -s[:asc|desc]    ascending (recent files go last) or descending
-
-* Filter file content:
-  --match:PAT         select files containing a (not displayed) match of PAT
-  --noMatch:PAT       select files not containing any match of PAT
-  --bin:on|off|only   process binary files? (detected by \0 in first 1K bytes)
-                      (default: on - binary and text files treated the same way)
-  --text, -t          process only text files, the same as --bin:off
-
-* Represent results:
-  --nocolor           output will be given without any colors
-  --color[:on]        force color even if output is redirected (default: auto)
-  --colorTheme:THEME  select color THEME from 'simple' (default),
-                      'bnw' (black and white) ,'ack', or 'gnu' (GNU grep)
-  --count             only print counts of matches for files that matched
-  --context:N, -c:N   print N lines of leading context before every match and
-                      N lines of trailing context after it (default N: 0)
-  --afterContext:N,
-               -a:N   print N lines of trailing context after every match
-  --beforeContext:N,
-               -b:N   print N lines of leading context before every match
-  --group, -g         group matches by file
-  --newLine, -l       display every matching line starting from a new line
-  --cols[:N]          limit max displayed columns/width of output lines from
-                      files by N characters, cropping overflows (default: off)
-  --cols:auto, -%     calculate columns from terminal width for every line
-  --onlyAscii, -@     display only printable ASCII Latin characters 0x20-0x7E
-                      substitutions: 0 -> ^@, 1 -> ^A, ... 0x1F -> ^_,
-                                     0x7F -> '7F, ..., 0xFF -> 'FF
-* Miscellaneous:
-  --threads:N, -j:N   speed up search by N additional workers (default: 0, off)
-  --stdin             read PATTERN from stdin (to avoid the shell's confusing
-                      quoting rules) and, if --replace given, REPLACEMENT
-  --verbose           be verbose: list every processed file
-  --help, -h          shows this help
-  --version, -v       shows the version
-"""
+""" & slurp "../doc/nimgrep_cmdline.txt"
 
 # Limitations / ideas / TODO:
 # * No unicode support with --cols
@@ -194,26 +95,34 @@ type
                   filename: string, fileResult: FileResult]
   WalkOpt = tuple  # used for walking directories/producing paths
     extensions: seq[string]
-    skipExtensions: seq[string]
-    excludeFile: seq[string]
-    includeFile: seq[string]
-    includeDir : seq[string]
-    excludeDir : seq[string]
+    notExtensions: seq[string]
+    filename: seq[string]
+    notFilename: seq[string]
+    dirPath: seq[string]
+    notDirPath: seq[string]
+    dirname : seq[string]
+    notDirname : seq[string]
   WalkOptComp[Pat] = tuple  # a compiled version of the previous
-    excludeFile: seq[Pat]
-    includeFile: seq[Pat]
-    includeDir : seq[Pat]
-    excludeDir : seq[Pat]
+    filename: seq[Pat]
+    notFilename: seq[Pat]
+    dirname : seq[Pat]
+    notDirname : seq[Pat]
+    dirPath: seq[Pat]
+    notDirPath: seq[Pat]
   SearchOpt = tuple  # used for searching inside a file
-    patternSet: bool     # to distinguish uninitialized 'pattern' and empty one
-    pattern: string      # main PATTERN
-    checkMatch: string   # --match
-    checkNoMatch: string # --nomatch
-    checkBin: Bin        # --bin
+    patternSet: bool           # To distinguish uninitialized/empty 'pattern'
+    pattern: string            # Main PATTERN
+    inFile: seq[string]        # --inFile, --inf
+    notInFile: seq[string]     # --notinFile, --ninf
+    inContext: seq[string]     # --inContext, --inc
+    notInContext: seq[string]  # --notinContext, --ninc
+    checkBin: Bin              # --bin, --text
   SearchOptComp[Pat] = tuple  # a compiled version of the previous
     pattern: Pat
-    checkMatch: Pat
-    checkNoMatch: Pat
+    inFile: seq[Pat]
+    notInFile: seq[Pat]
+    inContext: seq[Pat]
+    notInContext: seq[Pat]
   SinglePattern[PAT] = tuple  # compile single pattern for replacef
     pattern: PAT
   Column = tuple  # current column info for the cropping (--limit) feature
@@ -748,7 +657,7 @@ template updateCounters(output: Output) =
 proc printInfo(filename:string, output: Output) =
   case output.kind
   of openError:
-    printError("can not open path " & filename & " " & output.msg)
+    printError("cannot open path '" & filename & "': " & output.msg)
   of rejected:
     if optVerbose in options:
       echo "(rejected: ", output.reason, ")"
@@ -818,6 +727,9 @@ iterator searchFile(pattern: Pattern; buffer: string): Output =
                      pre: pre,
                      match: move(curMi))
     i = t.last+1
+  when typeof(pattern) is Regex:
+    if buffer.len > MaxReBufSize:
+      yield Output(kind: openError, msg: "PCRE size limit is " & $MaxReBufSize)
 
 func detectBin(buffer: string): bool =
   for i in 0 ..< min(1024, buffer.len):
@@ -903,6 +815,33 @@ template declareCompiledPatterns(compiledStruct: untyped,
     body
   {.hint[XDeclaredButNotUsed]: on.}
 
+template ensureIncluded(includePat: seq[Pattern], str: string,
+                                body: untyped) =
+  if includePat.len != 0:
+    var matched = false
+    for pat in includePat:
+      if str.contains(pat):
+        matched = true
+        break
+    if not matched:
+      body
+
+template ensureExcluded(excludePat: seq[Pattern], str: string,
+                                body: untyped) =
+  {.warning[UnreachableCode]: off.}
+  for pat in excludePat:
+    if str.contains(pat, 0):
+      body
+      break
+  {.warning[UnreachableCode]: on.}
+
+func checkContext(context: string, searchOptC: SearchOptComp[Pattern]): bool =
+  ensureIncluded searchOptC.inContext, context:
+    return false
+  ensureExcluded searchOptC.notInContext, context:
+    return false
+  result = true
+
 iterator processFile(searchOptC: SearchOptComp[Pattern], filename: string,
                      yieldContents=false): Output =
   var buffer: string
@@ -932,13 +871,13 @@ iterator processFile(searchOptC: SearchOptComp[Pattern], filename: string,
         reason = "text file"
 
     if not reject:
-      if searchOpt.checkMatch != "":
-        reject = not contains(buffer, searchOptC.checkMatch, 0)
+      ensureIncluded searchOptC.inFile, buffer:
+        reject = true
         reason = "doesn't contain a requested match"
 
     if not reject:
-      if searchOpt.checkNoMatch != "":
-        reject = contains(buffer, searchOptC.checkNoMatch, 0)
+      ensureExcluded searchOptC.notInFile, buffer:
+        reject = true
         reason = "contains a forbidden match"
 
     if reject:
@@ -948,20 +887,50 @@ iterator processFile(searchOptC: SearchOptComp[Pattern], filename: string,
     else:
       var found = false
       var cnt = 0
-      for output in searchFile(searchOptC.pattern, buffer):
-        found = true
-        if optCount notin options:
-          yield output
-        else:
-          if output.kind in {blockFirstMatch, blockNextMatch}:
-            inc(cnt)
+      let skipCheckContext = (searchOpt.notInContext.len == 0 and
+                              searchOpt.inContext.len == 0)
+      if skipCheckContext:
+        for output in searchFile(searchOptC.pattern, buffer):
+          found = true
+          if optCount notin options:
+            yield output
+          else:
+            if output.kind in {blockFirstMatch, blockNextMatch}:
+              inc(cnt)
+      else:
+        var context: string
+        var outputAccumulator: seq[Output]
+        for outp in searchFile(searchOptC.pattern, buffer):
+          if outp.kind in {blockFirstMatch, blockNextMatch}:
+            outputAccumulator.add outp
+            context.add outp.pre
+            context.add outp.match.match
+          elif outp.kind == blockEnd:
+            outputAccumulator.add outp
+            context.add outp.blockEnding
+            # context has been formed, now check it:
+            if checkContext(context, searchOptC):
+              found = true
+              for output in outputAccumulator:
+                if optCount notin options:
+                  yield output
+                else:
+                  if output.kind in {blockFirstMatch, blockNextMatch}:
+                    inc(cnt)
+            context = ""
+            outputAccumulator.setLen 0
+      # end `if skipCheckContext`.
       if optCount in options and cnt > 0:
         yield Output(kind: justCount, matches: cnt)
       if yieldContents and found and optCount notin options:
         yield Output(kind: fileContents, buffer: move(buffer))
 
-
-proc hasRightFileName(path: string, walkOptC: WalkOptComp[Pattern]): bool =
+proc hasRightPath(path: string, walkOptC: WalkOptComp[Pattern]): bool =
+  if not (
+      walkOpt.extensions.len > 0 or walkOpt.notExtensions.len > 0 or
+      walkOpt.filename.len > 0 or walkOpt.notFilename.len > 0 or
+      walkOpt.notDirPath.len > 0 or walkOpt.dirPath.len > 0):
+    return true
   let filename = path.lastPathPart
   let ex = filename.splitFile.ext.substr(1) # skip leading '.'
   if walkOpt.extensions.len != 0:
@@ -971,31 +940,44 @@ proc hasRightFileName(path: string, walkOptC: WalkOptComp[Pattern]): bool =
         matched = true
         break
     if not matched: return false
-  for x in walkOpt.skipExtensions:
+  for x in walkOpt.notExtensions:
     if os.cmpPaths(x, ex) == 0: return false
-  if walkOptC.includeFile.len != 0:
-    var matched = false
-    for pat in walkOptC.includeFile:
-      if filename.contains(pat):
-        matched = true
-        break
-    if not matched: return false
-  for pat in walkOptC.excludeFile:
-    if filename.contains(pat): return false
-  let dirname = path.parentDir
-  if walkOptC.includeDir.len != 0:
-    var matched = false
-    for pat in walkOptC.includeDir:
-      if dirname.contains(pat):
-        matched = true
+  ensureIncluded walkOptC.filename, filename:
+    return false
+  ensureExcluded walkOptC.notFilename, filename:
+    return false
+  let parent = path.parentDir
+  ensureExcluded walkOptC.notDirPath, parent:
+    return false
+  ensureIncluded walkOptC.dirPath, parent:
+    return false
+  result = true
+
+proc isRightDirectory(path: string, walkOptC: WalkOptComp[Pattern]): bool =
+  ## --dirname can be only checked when the final path is known
+  ## so this proc is suitable for files only.
+  if walkOptC.dirname.len > 0:
+    var badDirname = false
+    var (nextParent, dirname) = splitPath(path)
+    # check that --dirname matches for one of directories in parent path:
+    while dirname != "":
+      badDirname = false
+      ensureIncluded walkOptC.dirname, dirname:
+        badDirname = true
+      if not badDirname:
         break
-    if not matched: return false
+      (nextParent, dirname) = splitPath(nextParent)
+    if badDirname:  # badDirname was set to true for all the dirs
+      return false
   result = true
 
-proc hasRightDirectory(path: string, walkOptC: WalkOptComp[Pattern]): bool =
-  let dirname = path.lastPathPart
-  for pat in walkOptC.excludeDir:
-    if dirname.contains(pat): return false
+proc descendToDirectory(path: string, walkOptC: WalkOptComp[Pattern]): bool =
+  ## --notdirname can be checked for directories immediately for optimization to
+  ## prevent descending into undesired directories.
+  if walkOptC.notDirname.len > 0:
+    let dirname = path.lastPathPart
+    ensureExcluded walkOptC.notDirname, dirname:
+      return false
   result = true
 
 iterator walkDirBasic(dir: string, walkOptC: WalkOptComp[Pattern]): string
@@ -1004,22 +986,24 @@ iterator walkDirBasic(dir: string, walkOptC: WalkOptComp[Pattern]): string
   var timeFiles = newSeq[(times.Time, string)]()
   while dirStack.len > 0:
     let d = dirStack.pop()
+    let rightDirForFiles = d.isRightDirectory(walkOptC)
     var files = newSeq[string]()
     var dirs = newSeq[string]()
-    for kind, path in walkDir(d):
+    for kind, path in walkDir(d, skipSpecial = true):
       case kind
       of pcFile:
-        if path.hasRightFileName(walkOptC):
+        if path.hasRightPath(walkOptC) and rightDirForFiles:
           files.add(path)
       of pcLinkToFile:
-        if optFollow in options and path.hasRightFileName(walkOptC):
+        if optFollow in options and path.hasRightPath(walkOptC) and
+            rightDirForFiles:
           files.add(path)
       of pcDir:
-        if optRecursive in options and path.hasRightDirectory(walkOptC):
+        if optRecursive in options and path.descendToDirectory(walkOptC):
           dirs.add path
       of pcLinkToDir:
         if optFollow in options and optRecursive in options and
-           path.hasRightDirectory(walkOptC):
+            path.descendToDirectory(walkOptC):
           dirs.add path
     if sortTime:  # sort by time - collect files before yielding
       for file in files:
@@ -1044,10 +1028,12 @@ iterator walkDirBasic(dir: string, walkOptC: WalkOptComp[Pattern]): string
 iterator walkRec(paths: seq[string]): tuple[error: string, filename: string]
          {.closure.} =
   declareCompiledPatterns(walkOptC, WalkOptComp):
-    walkOptC.excludeFile.add walkOpt.excludeFile.compileArray()
-    walkOptC.includeFile.add walkOpt.includeFile.compileArray()
-    walkOptC.includeDir.add  walkOpt.includeDir.compileArray()
-    walkOptC.excludeDir.add  walkOpt.excludeDir.compileArray()
+    walkOptC.notFilename.add walkOpt.notFilename.compileArray()
+    walkOptC.filename.add    walkOpt.filename.compileArray()
+    walkOptC.dirname.add     walkOpt.dirname.compileArray()
+    walkOptC.notDirname.add  walkOpt.notDirname.compileArray()
+    walkOptC.dirPath.add     walkOpt.dirPath.compileArray()
+    walkOptC.notDirPath.add  walkOpt.notDirPath.compileArray()
     for path in paths:
       if dirExists(path):
         for p in walkDirBasic(path, walkOptC):
@@ -1126,8 +1112,10 @@ template processFileResult(pattern: Pattern; filename: string,
 proc run1Thread() =
   declareCompiledPatterns(searchOptC, SearchOptComp):
     compile1Pattern(searchOpt.pattern, searchOptC.pattern)
-    compile1Pattern(searchOpt.checkMatch, searchOptC.checkMatch)
-    compile1Pattern(searchOpt.checkNoMatch, searchOptC.checkNoMatch)
+    searchOptC.inFile.add searchOpt.inFile.compileArray()
+    searchOptC.notInFile.add searchOpt.notInFile.compileArray()
+    searchOptC.inContext.add searchOpt.inContext.compileArray()
+    searchOptC.notInContext.add searchOpt.notInContext.compileArray()
     if optPipe in options:
       processFileResult(searchOptC.pattern, "-",
                         processFile(searchOptC, "-",
@@ -1169,8 +1157,10 @@ proc worker(initSearchOpt: SearchOpt) {.thread.} =
   searchOpt = initSearchOpt  # init thread-local var
   declareCompiledPatterns(searchOptC, SearchOptComp):
     compile1Pattern(searchOpt.pattern, searchOptC.pattern)
-    compile1Pattern(searchOpt.checkMatch, searchOptC.checkMatch)
-    compile1Pattern(searchOpt.checkNoMatch, searchOptC.checkNoMatch)
+    searchOptC.inFile.add searchOpt.inFile.compileArray()
+    searchOptC.notInFile.add searchOpt.notInFile.compileArray()
+    searchOptC.inContext.add searchOpt.inContext.compileArray()
+    searchOptC.notInContext.add searchOpt.notInContext.compileArray()
     while true:
       let (fileNo, filename) = searchRequestsChan.recv()
       var fileResult: FileResult
@@ -1268,6 +1258,11 @@ for kind, key, val in getopt():
     else:
       paths.add(key)
   of cmdLongOption, cmdShortOption:
+    proc addNotEmpty(s: var seq[string], name: string) =
+      if name == "":
+        reportError("empty string given for option --" & key &
+                    " (did you forget `:`?)")
+      s.add name
     case normalize(key)
     of "find", "f": incl(options, optFind)
     of "replace", "!": incl(options, optReplace)
@@ -1293,15 +1288,36 @@ for kind, key, val in getopt():
         nWorkers = countProcessors()
       else:
         nWorkers = parseNonNegative(val, key)
-    of "ext": walkOpt.extensions.add val.split('|')
-    of "noext", "no-ext": walkOpt.skipExtensions.add val.split('|')
-    of "excludedir", "exclude-dir",   "ed": walkOpt.excludeDir.add val
-    of "includedir", "include-dir",   "id": walkOpt.includeDir.add val
-    of "includefile", "include-file", "if": walkOpt.includeFile.add val
-    of "excludefile", "exclude-file", "ef": walkOpt.excludeFile.add val
-    of "match": searchOpt.checkMatch = val
-    of "nomatch":
-      searchOpt.checkNoMatch = val
+    of "extensions", "ex", "ext": walkOpt.extensions.add val.split('|')
+    of "nextensions", "notextensions", "nex", "notex",
+       "noext", "no-ext":  # 2 deprecated options
+      walkOpt.notExtensions.add val.split('|')
+    of "dirname",  "di":
+      walkOpt.dirname.addNotEmpty val
+    of "ndirname", "notdirname", "ndi", "notdi",
+       "excludedir", "exclude-dir", "ed":  # 3 deprecated options
+      walkOpt.notDirname.addNotEmpty val
+    of "dirpath", "dirp",
+       "includedir", "include-dir", "id":  # 3 deprecated options
+      walkOpt.dirPath.addNotEmpty val
+    of "ndirpath", "notdirpath", "ndirp", "notdirp":
+      walkOpt.notDirPath.addNotEmpty val
+    of "filename", "fi",
+       "includefile", "include-file", "if":  # 3 deprecated options
+      walkOpt.filename.addNotEmpty val
+    of "nfilename", "nfi", "notfilename", "notfi",
+       "excludefile", "exclude-file", "ef":  # 3 deprecated options
+      walkOpt.notFilename.addNotEmpty val
+    of "infile", "inf",
+       "matchfile", "match", "mf":  # 3 deprecated options
+      searchOpt.inFile.addNotEmpty val
+    of "ninfile", "notinfile", "ninf", "notinf",
+       "nomatchfile", "nomatch", "nf":  # 3 options are deprecated
+      searchOpt.notInFile.addNotEmpty val
+    of "incontext", "inc":
+      searchOpt.inContext.addNotEmpty val
+    of "nincontext", "notincontext", "ninc", "notinc":
+      searchOpt.notInContext.addNotEmpty val
     of "bin":
       case val
       of "on": searchOpt.checkBin = biOn