summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorJuan Carlos <juancarlospaco@gmail.com>2023-09-30 01:31:28 -0300
committerGitHub <noreply@github.com>2023-09-30 06:31:28 +0200
commita38e3dcb1f0c47b89f07b343a259c4b10a333ac9 (patch)
tree46a51136d0afb9bac3c50cf4ed2b63bb973f6b54
parent5eeafbf55029816e5022d9d6aa9ed3b0acf2d3ba (diff)
downloadNim-a38e3dcb1f0c47b89f07b343a259c4b10a333ac9.tar.gz
copyFile with bufferSize instead of hardcoded value (#22769)
- `copyFile` allows to specify `bufferSize` instead of hardcoded wrong
value. Tiny diff.


# Performance

- 1200% Performance improvement.


# Check it yourself

Execute:

```bash
for i in $(seq 0 10); do
  bs=$((1024*2**$i))
  printf "%7s Kb\t" $bs
  timeout --foreground -sINT 2 dd bs=$bs if=/dev/zero of=/dev/null 2>&1 | sed -n 's/.* \([0-9.,]* [GM]B\/s\)/\1/p'
done
```

(This script can be ported to PowerShell for Windows I guess, it works
in Windows MinGW Bash anyways).


# Stats

- Hardcoded `8192` or `8000` Kb bufferSize gives `5` GB/s.
- Setting `262144` Kb bufferSize gives `65` GB/s (script suggestion).

---------

Co-authored-by: Andreas Rumpf <rumpf_a@web.de>
-rw-r--r--changelog.md1
-rw-r--r--lib/std/private/osfiles.nim19
2 files changed, 11 insertions, 9 deletions
diff --git a/changelog.md b/changelog.md
index 74020e8fa..1fb6377a2 100644
--- a/changelog.md
+++ b/changelog.md
@@ -8,6 +8,7 @@
 
 [//]: # "Changes:"
 
+- Changed `std/osfiles.copyFile` to allow to specify `bufferSize` instead of a hardcoded one.
 
 [//]: # "Additions:"
 
diff --git a/lib/std/private/osfiles.nim b/lib/std/private/osfiles.nim
index 4596adfb2..b7957fa4f 100644
--- a/lib/std/private/osfiles.nim
+++ b/lib/std/private/osfiles.nim
@@ -173,7 +173,7 @@ type
 
 const copyFlagSymlink = {cfSymlinkAsIs, cfSymlinkFollow, cfSymlinkIgnore}
 
-proc copyFile*(source, dest: string, options = {cfSymlinkFollow}) {.rtl,
+proc copyFile*(source, dest: string, options = {cfSymlinkFollow}; bufferSize = 16_384) {.rtl,
   extern: "nos$1", tags: [ReadDirEffect, ReadIOEffect, WriteIOEffect],
   noWeirdTarget.} =
   ## Copies a file from `source` to `dest`, where `dest.parentDir` must exist.
@@ -202,6 +202,7 @@ proc copyFile*(source, dest: string, options = {cfSymlinkFollow}) {.rtl,
   ## On OSX, `copyfile` C api will be used (available since OSX 10.5) unless
   ## `-d:nimLegacyCopyFile` is used.
   ##
+  ## `copyFile` allows to specify `bufferSize` to improve I/O performance.
   ## See also:
   ## * `CopyFlag enum`_
   ## * `copyDir proc`_
@@ -210,8 +211,7 @@ proc copyFile*(source, dest: string, options = {cfSymlinkFollow}) {.rtl,
   ## * `removeFile proc`_
   ## * `moveFile proc`_
 
-  doAssert card(copyFlagSymlink * options) == 1, "There should be exactly " &
-                                                 "one cfSymlink* in options"
+  doAssert card(copyFlagSymlink * options) == 1, "There should be exactly one cfSymlink* in options"
   let isSymlink = source.symlinkExists
   if isSymlink and (cfSymlinkIgnore in options or defined(windows)):
     return
@@ -238,15 +238,14 @@ proc copyFile*(source, dest: string, options = {cfSymlinkFollow}) {.rtl,
         if status2 != 0: raiseOSError(osLastError(), $(source, dest))
       else:
         # generic version of copyFile which works for any platform:
-        const bufSize = 8000 # better for memory manager
         var d, s: File
         if not open(s, source):raiseOSError(osLastError(), source)
         if not open(d, dest, fmWrite):
           close(s)
           raiseOSError(osLastError(), dest)
-        var buf = alloc(bufSize)
+        var buf = alloc(bufferSize)
         while true:
-          var bytesread = readBuffer(s, buf, bufSize)
+          var bytesread = readBuffer(s, buf, bufferSize)
           if bytesread > 0:
             var byteswritten = writeBuffer(d, buf, bytesread)
             if bytesread != byteswritten:
@@ -254,13 +253,13 @@ proc copyFile*(source, dest: string, options = {cfSymlinkFollow}) {.rtl,
               close(s)
               close(d)
               raiseOSError(osLastError(), dest)
-          if bytesread != bufSize: break
+          if bytesread != bufferSize: break
         dealloc(buf)
         close(s)
         flushFile(d)
         close(d)
 
-proc copyFileToDir*(source, dir: string, options = {cfSymlinkFollow})
+proc copyFileToDir*(source, dir: string, options = {cfSymlinkFollow}; bufferSize = 16_384)
   {.noWeirdTarget, since: (1,3,7).} =
   ## Copies a file `source` into directory `dir`, which must exist.
   ##
@@ -268,12 +267,14 @@ proc copyFileToDir*(source, dir: string, options = {cfSymlinkFollow})
   ## if `source` is a symlink, copies the file symlink points to. `options` is
   ## ignored on Windows: symlinks are skipped.
   ##
+  ## `copyFileToDir` allows to specify `bufferSize` to improve I/O performance.
+  ##
   ## See also:
   ## * `CopyFlag enum`_
   ## * `copyFile proc`_
   if dir.len == 0: # treating "" as "." is error prone
     raise newException(ValueError, "dest is empty")
-  copyFile(source, dir / source.lastPathPart, options)
+  copyFile(source, dir / source.lastPathPart, options, bufferSize)
 
 
 proc copyFileWithPermissions*(source, dest: string,