about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorbptato <nincsnevem662@gmail.com>2024-04-18 18:30:53 +0200
committerbptato <nincsnevem662@gmail.com>2024-04-18 18:30:53 +0200
commit38db6ab5be80b255fe40df715adc3b5852875cdd (patch)
tree328eada3b571e475903be0df61c5abf09c022d8b
parent5bb9542045ff6dbb6c357eb4dd0a7616dba33a9a (diff)
downloadchawan-38db6ab5be80b255fe40df715adc3b5852875cdd.tar.gz
sandbox: seccomp support on Linux
We use libseccomp, which is now a semi-mandatory dependency on Linux.
(You can still build without it, but only if you pass a scary long flag
to make.)

For this to work I had to disable getTimezoneOffset, which would
otherwise call localtime_r which in turn reads in some files from
/usr/share/zoneinfo.  To allow this we would have to give unrestricted
openat(2) access to buffer processes, which is unacceptable.

(Giving websites access to the local timezone is a fingerprinting vector
so if this ever gets fixed then it should be an opt-in config setting.)

This patch also includes misc fixes to buffer cloning, and fixes the
LIBEXECDIR override in the makefile so that it is actually useful.
-rw-r--r--Makefile15
-rw-r--r--README.md3
-rw-r--r--doc/build.md9
-rw-r--r--lib/quickjs/quickjs.c6
-rw-r--r--src/bindings/libseccomp.nim49
-rw-r--r--src/io/serversocket.nim26
-rw-r--r--src/io/socketstream.nim5
-rw-r--r--src/loader/loader.nim9
-rw-r--r--src/local/client.nim4
-rw-r--r--src/local/container.nim26
-rw-r--r--src/local/pager.nim2
-rw-r--r--src/server/buffer.nim53
-rw-r--r--src/server/forkserver.nim25
-rw-r--r--src/utils/sandbox.nim120
14 files changed, 302 insertions, 50 deletions
diff --git a/Makefile b/Makefile
index 988b42d5..ed4365ef 100644
--- a/Makefile
+++ b/Makefile
@@ -18,7 +18,7 @@ LIBEXECDIR ?= '$${%CHA_BIN_DIR}/../libexec/chawan'
 ifeq ($(LIBEXECDIR),'$${%CHA_BIN_DIR}/../libexec/chawan')
 LIBEXECDIR_CHAWAN = "$(DESTDIR)$(PREFIX)/libexec/chawan"
 else
-LIBEXECDIR_CHAWAN = $(LIBEXECDIR)/chawan
+LIBEXECDIR_CHAWAN = $(LIBEXECDIR)
 endif
 
 # These paths are quoted in recipes.
@@ -28,6 +28,11 @@ OUTDIR_LIBEXEC = $(OUTDIR_TARGET)/libexec/chawan
 OUTDIR_CGI_BIN = $(OUTDIR_LIBEXEC)/cgi-bin
 OUTDIR_MAN = $(OUTDIR_TARGET)/share/man
 
+# I won't take this from the environment for obvious reasons. Please override it
+# in the make command if you must, or (preferably) fix your environment so it's
+# not needed.
+DANGER_DISABLE_SANDBOX = 0
+
 # Nim compiler flags
 ifeq ($(TARGET),debug)
 FLAGS += -d:debug --debugger:native
@@ -55,7 +60,8 @@ $(OUTDIR_BIN)/cha: lib/libquickjs.a src/*.nim src/**/*.nim src/**/*.c res/* \
 		res/**/* res/map/idna_gen.nim nim.cfg
 	@mkdir -p "$(OUTDIR_BIN)"
 	$(NIMC) --nimcache:"$(OBJDIR)/$(TARGET)/cha" -d:libexecPath=$(LIBEXECDIR) \
-		$(FLAGS) -o:"$(OUTDIR_BIN)/cha" src/main.nim
+                -d:disableSandbox=$(DANGER_DISABLE_SANDBOX) $(FLAGS) \
+		-o:"$(OUTDIR_BIN)/cha" src/main.nim
 	ln -sf "$(OUTDIR)/$(TARGET)/bin/cha" cha
 
 $(OUTDIR_BIN)/mancha: adapter/tools/mancha.nim
@@ -125,10 +131,11 @@ $(OUTDIR_CGI_BIN)/spartan: adapter/protocol/spartan
 
 $(OUTDIR_CGI_BIN)/http: adapter/protocol/http.nim adapter/protocol/curlwrap.nim \
 		adapter/protocol/curlerrors.nim adapter/protocol/curl.nim \
-		src/utils/twtstr.nim
+		src/utils/twtstr.nim src/utils/sandbox.nim
 	@mkdir -p "$(OUTDIR_CGI_BIN)"
 	$(NIMC) $(FLAGS) --nimcache:"$(OBJDIR)/$(TARGET)/http" -d:curlLibName:$(CURLLIBNAME) \
-		-o:"$(OUTDIR_CGI_BIN)/http" adapter/protocol/http.nim
+                -d:disableSandbox=$(DANGER_DISABLE_SANDBOX) \
+                -o:"$(OUTDIR_CGI_BIN)/http" adapter/protocol/http.nim
 
 $(OUTDIR_CGI_BIN)/about: adapter/protocol/about.nim res/chawan.html \
 		res/license.html
diff --git a/README.md b/README.md
index 5449a0f8..77550182 100644
--- a/README.md
+++ b/README.md
@@ -27,11 +27,12 @@ supported yet.)
 	* zlib: <https://zlib.net/>
 	* pkg-config, pkgconf, or similar (must be found as "pkg-config" in your
 	  `$PATH`)
+	* (Linux only) libseccomp: <https://github.com/seccomp/libseccomp>
 	* If you are using a system where the default make program is not GNU
 	  make (e.g. BSD), install gmake and use that in the following steps.
 	* Optional: a termcap library; e.g. ncurses comes with one.
 	* TLDR for Debian:
-	  `apt install libcurl4-openssl-dev zlib1g-dev pkg-config make ncurses-base`
+	  `apt install libcurl4-openssl-dev zlib1g-dev pkg-config make ncurses-base libseccomp-dev`
 4. Download parts of Chawan found in other repositories: `make submodule`
 5. Run `make`. (By default, this will build the whole project in release mode;
    for details, see [doc/build.md](doc/build.md).)
diff --git a/doc/build.md b/doc/build.md
index bb9f929d..3d337d82 100644
--- a/doc/build.md
+++ b/doc/build.md
@@ -37,9 +37,16 @@ also override them by setting an environment variable with the same name.
   man pages. The default setting expands to `/usr/local/share/man/man1`, etc.
 * `CURLLIBNAME`: Change the name of the libcurl shared object file.
 * `LIBEXECDIR`: Path to your libexec directory; by default, it is relative
-  to wherever the binary is placed when it is executed.<BR>
+  to wherever the binary is placed when it is executed. (i.e. after installation
+  it would resolve to `/usr/local/libexec`.) <BR>
   WARNING: Unlike other path names, this must be quoted if your path contains
   spaces!
+* `DANGER_DISABLE_SANDBOX`: Set it to 1 to disable OS-level sandboxing even
+  on systems where we have built-in sandboxing support. Note that this is
+  *not* taken from the environment variables; you must use it like
+  `make DANGER_DISABLE_SANDBOX=1`.<BR>
+  WARNING: as the name suggests, this is rarely an optimal solution to whatever
+  problem you are facing.
 
 ## Phony targets
 
diff --git a/lib/quickjs/quickjs.c b/lib/quickjs/quickjs.c
index 0da7e13f..fc5a0357 100644
--- a/lib/quickjs/quickjs.c
+++ b/lib/quickjs/quickjs.c
@@ -43432,6 +43432,11 @@ static const JSCFunctionListEntry js_math_obj[] = {
    between UTC time and local time 'd' in minutes */
 static int getTimezoneOffset(int64_t time)
 {
+    /* this is a fingerprinting vector, and doesn't work with seccomp
+     * anyway because the glibc localtime_r tries to openat(2) files
+     * in /usr/share/zoneinfo. */
+    return 0;
+#if 0
     time_t ti;
     int res;
 
@@ -43478,6 +43483,7 @@ static int getTimezoneOffset(int64_t time)
     }
 #endif
     return res;
+#endif
 }
 
 #if 0
diff --git a/src/bindings/libseccomp.nim b/src/bindings/libseccomp.nim
new file mode 100644
index 00000000..81a6e969
--- /dev/null
+++ b/src/bindings/libseccomp.nim
@@ -0,0 +1,49 @@
+import std/macros
+
+const seccomp = (proc(): string =
+  let res = staticExec("pkg-config --libs --silence-errors libseccomp")
+  if res == "":
+    error("Couldn't find libseccomp on your computer!  Please install " &
+      "libseccomp (e.g. apt install libseccomp-dev), or build with " &
+      "`make CHA_DANGER_DISABLE_SANDBOX=1'.")
+  return res
+)()
+
+type
+  scmp_filter_ctx* = distinct pointer
+
+  scmp_datum_t* = uint64
+
+  scmp_compare* {.size: sizeof(cint).} = enum
+    N_SCMP_CMP_MIN = 0
+    SCMP_CMP_NE = 1 # not equal
+    SCMP_CMP_LT = 2 # less than
+    SCMP_CMP_LE = 3 # less than or equal
+    SCMP_CMP_EQ = 4 # equal
+    SCMP_CMP_GE = 5 # greater than or equal
+    SCMP_CMP_GT = 6 # greater than
+    SCMP_CMP_MASKED_EQ = 7 # masked equality
+
+  scmp_arg_cmp* = object
+    arg*: cuint
+    op*: scmp_compare
+    datum_a*: scmp_datum_t
+    datum_b*: scmp_datum_t
+
+{.push importc.}
+{.passl: seccomp.}
+
+const SCMP_ACT_KILL_PROCESS* = 0x80000000u32
+const SCMP_ACT_ALLOW* = 0x7FFF0000u32
+const SCMP_ACT_TRAP* = 0x00030000u32
+
+proc seccomp_init*(def_action: uint32): scmp_filter_ctx
+proc seccomp_reset*(ctx: scmp_filter_ctx; def_action: uint32): cint
+proc seccomp_syscall_resolve_name*(name: cstring): cint
+proc seccomp_syscall_resolve_name_rewrite*(name: cstring): cint
+proc seccomp_rule_add*(ctx: scmp_filter_ctx; action: uint32; syscall: cint;
+  arg_cnt: cuint): cint {.varargs.}
+proc seccomp_load*(ctx: scmp_filter_ctx): cint
+proc seccomp_release*(ctx: scmp_filter_ctx)
+
+{.pop.}
diff --git a/src/io/serversocket.nim b/src/io/serversocket.nim
index ea5bc97d..dff6de70 100644
--- a/src/io/serversocket.nim
+++ b/src/io/serversocket.nim
@@ -5,9 +5,10 @@ import std/os
 when defined(posix):
   import std/posix
 
-type ServerSocket* = object
+type ServerSocket* = ref object
   sock*: Socket
   path*: string
+  dfd: int
 
 const SocketPathPrefix = "cha_sock_"
 proc getSocketName*(pid: int): string =
@@ -29,6 +30,13 @@ when defined(freebsd):
   proc bindat_unix_from_c(dfd, sock: cint; path: cstring; pathlen: cint): cint
     {.importc.}
 
+proc initServerSocket*(fd: SocketHandle; sockDir: string; pid, sockDirFd: int):
+    ServerSocket =
+  let sock = newSocket(fd, Domain.AF_UNIX, SockType.SOCK_STREAM,
+    Protocol.IPPROTO_IP, buffered = false)
+  let path = getSocketPath(sockDir, pid)
+  return ServerSocket(sock: sock, path: path, dfd: sockDirFd)
+
 proc initServerSocket*(sockDir: string; sockDirFd, pid: int; blocking = true):
     ServerSocket =
   let sock = newSocket(Domain.AF_UNIX, SockType.SOCK_STREAM,
@@ -37,7 +45,7 @@ proc initServerSocket*(sockDir: string; sockDirFd, pid: int; blocking = true):
     sock.getFd().setBlocking(false)
   let path = getSocketPath(sockDir, pid)
   if sockDirFd == -1:
-    discard unlink(cstring(path))
+    discard tryRemoveFile(path)
     if bind_unix_from_c(cint(sock.getFd()), cstring(path), cint(path.len)) != 0:
       raiseOSError(osLastError())
   else:
@@ -47,9 +55,17 @@ proc initServerSocket*(sockDir: string; sockDirFd, pid: int; blocking = true):
       if bindat_unix_from_c(cint(sockDirFd), cint(sock.getFd()), cstring(name),
           cint(name.len)) != 0:
         raiseOSError(osLastError())
+    else:
+      # shouldn't have sockDirFd on other architectures
+      doAssert false
   listen(sock)
-  return ServerSocket(sock: sock, path: path)
+  return ServerSocket(sock: sock, path: path, dfd: sockDirFd)
 
-proc close*(ssock: ServerSocket) =
+proc close*(ssock: ServerSocket; unlink = true) =
   close(ssock.sock)
-  discard unlink(cstring(ssock.path))
+  if unlink:
+    when defined(freebsd):
+      if ssock.dfd != -1:
+        discard unlinkat(cint(ssock.dfd), cstring(ssock.path), 0)
+        return
+    discard tryRemoveFile(ssock.path)
diff --git a/src/io/socketstream.nim b/src/io/socketstream.nim
index 13fcd664..5744ad32 100644
--- a/src/io/socketstream.nim
+++ b/src/io/socketstream.nim
@@ -50,7 +50,7 @@ method setBlocking*(s: SocketStream; blocking: bool) =
   s.blocking = blocking
   s.source.getFd().setBlocking(blocking)
 
-method seek*(s: PosixStream; off: int) =
+method seek*(s: SocketStream; off: int) =
   doAssert false
 
 method sclose*(s: SocketStream) =
@@ -83,6 +83,9 @@ proc connectAtSocketStream0(socketDir: string; baseFd, pid: int;
       if connectat_unix_from_c(cint(baseFd), cint(sock.getFd()), cstring(name),
           cint(name.len)) != 0:
         raiseOSError(osLastError())
+    else:
+      # shouldn't have sockDirFd on other architectures
+      doAssert false
   return SocketStream(
     source: sock,
     fd: cint(sock.getFd()),
diff --git a/src/loader/loader.nim b/src/loader/loader.nim
index 207600f2..6a8e9164 100644
--- a/src/loader/loader.nim
+++ b/src/loader/loader.nim
@@ -258,7 +258,7 @@ proc addFd(ctx: LoaderContext; handle: LoaderHandle) =
   ctx.outputMap[output.ostream.fd] = output
 
 type HandleReadResult = enum
-  hrrDone, hrrUnregister
+  hrrDone, hrrUnregister, hrrBrokenPipe
 
 # Called whenever there is more data available to read.
 proc handleRead(ctx: LoaderContext; handle: LoaderHandle;
@@ -296,7 +296,7 @@ proc handleRead(ctx: LoaderContext; handle: LoaderHandle;
     except ErrorAgain: # retry later
       break
     except ErrorBrokenPipe: # sender died; stop streaming
-      return hrrUnregister
+      return hrrBrokenPipe
   hrrDone
 
 # stream is a regular file, so we can't select on it.
@@ -315,7 +315,7 @@ proc loadStreamRegular(ctx: LoaderContext; handle, cachedHandle: LoaderHandle) =
       output.registered = false
     handle.outputs.del(i)
   for output in handle.outputs:
-    if r == hrrUnregister:
+    if r == hrrBrokenPipe:
       output.ostream.sclose()
       output.ostream = nil
     elif cachedHandle != nil:
@@ -823,7 +823,7 @@ proc runFileLoader*(fd: cint; config: LoaderConfig) =
           let handle = ctx.handleMap[event.fd]
           case ctx.handleRead(handle, unregWrite)
           of hrrDone: discard
-          of hrrUnregister: unregRead.add(handle)
+          of hrrUnregister, hrrBrokenPipe: unregRead.add(handle)
       if Write in event.events:
         ctx.handleWrite(ctx.outputMap[event.fd], unregWrite)
       if Error in event.events:
@@ -1111,7 +1111,6 @@ proc removeClient*(loader: FileLoader; pid: int) =
       w.swrite(pid)
     stream.sclose()
 
-
 when defined(freebsd):
   let O_DIRECTORY* {.importc, header: "<fcntl.h>", noinit.}: cint
 
diff --git a/src/local/client.nim b/src/local/client.nim
index 92fa4660..73b17c99 100644
--- a/src/local/client.nim
+++ b/src/local/client.nim
@@ -24,6 +24,7 @@ import io/dynstream
 import io/filestream
 import io/posixstream
 import io/promise
+import io/serversocket
 import io/socketstream
 import js/base64
 import js/console
@@ -407,6 +408,9 @@ proc acceptBuffers(client: Client) =
     let container = item.container
     let stream = connectSocketStream(client.config.external.tmpdir,
       client.loader.sockDirFd, container.process)
+    # unlink here; on Linux we can't unlink from the buffer :/
+    discard tryRemoveFile(getSocketPath(client.config.external.tmpdir,
+      container.process))
     if stream == nil:
       pager.alert("Error: failed to set up buffer")
       continue
diff --git a/src/local/container.nim b/src/local/container.nim
index e93bf610..b88161b8 100644
--- a/src/local/container.nim
+++ b/src/local/container.nim
@@ -1,14 +1,15 @@
 import std/deques
+import std/net
 import std/options
+import std/os
+import std/posix
 import std/unicode
 
-when defined(posix):
-  import std/posix
-
 import config/config
 import config/mimetypes
 import io/dynstream
 import io/promise
+import io/serversocket
 import io/socketstream
 import js/javascript
 import js/jstypes
@@ -184,14 +185,29 @@ proc newContainer*(config: BufferConfig; loaderConfig: LoaderClientConfig;
 func location(container: Container): URL {.jsfget.} =
   return container.url
 
-proc clone*(container: Container; newurl: URL): Promise[Container] =
+proc clone*(container: Container; newurl: URL; loader: FileLoader):
+    Promise[Container] =
+  if container.iface == nil:
+    return nil
   let url = if newurl != nil:
     newurl
   else:
     container.url
-  return container.iface.clone(url).then(proc(pid: int): Container =
+  let p = container.iface.clone(url)
+  # create a server socket, pass it on to the buffer, then move it to
+  # the expected path after the buffer forked itself
+  #TODO this is very ugly
+  let ssock = initServerSocket(loader.sockDir, loader.sockDirFd,
+    loader.clientPid)
+  SocketStream(container.iface.stream.source)
+    .sendFileHandle(FileHandle(ssock.sock.getFd()))
+  ssock.sock.close()
+  return p.then(proc(pid: int): Container =
     if pid == -1:
       return nil
+    let newPath = getSocketPath(loader.sockDir, pid)
+    let oldPath = getSocketPath(loader.sockDir, loader.clientPid)
+    moveFile(oldPath, newPath)
     let nc = Container()
     nc[] = container[]
     nc.url = url
diff --git a/src/local/pager.nim b/src/local/pager.nim
index 99a1ba1c..28c1face 100644
--- a/src/local/pager.nim
+++ b/src/local/pager.nim
@@ -571,7 +571,7 @@ func findProcMapItem*(pager: Pager; pid: int): int =
   -1
 
 proc dupeBuffer(pager: Pager; container: Container; url: URL) =
-  container.clone(url).then(proc(container: Container) =
+  container.clone(url, pager.loader).then(proc(container: Container) =
     if container == nil:
       pager.alert("Failed to duplicate buffer.")
     else:
diff --git a/src/server/buffer.nim b/src/server/buffer.nim
index 3d5b706f..74dc3c2f 100644
--- a/src/server/buffer.nim
+++ b/src/server/buffer.nim
@@ -122,6 +122,7 @@ type
     charset: Charset
     cacheId: int
     outputId: int
+    emptySel: Selector[int]
 
   InterfaceOpaque = ref object
     stream: SocketStream
@@ -909,6 +910,9 @@ when defined(freebsd) or defined(openbsd):
   # necessary for an ugly hack we will do later
   import std/kqueue
 
+var gssock* {.global.}: ServerSocket
+var gpstream* {.global.}: SocketStream
+
 # Create an exact clone of the current buffer.
 # This clone will share the loader process with the previous buffer.
 proc clone*(buffer: Buffer; newurl: URL): int {.proxy.} =
@@ -930,6 +934,7 @@ proc clone*(buffer: Buffer; newurl: URL): int {.proxy.} =
     buffer.estream.write("Failed to clone buffer.\n")
     return -1
   if pid == 0: # child
+    let sockFd = buffer.pstream.recvFileHandle()
     discard close(pipefd[0]) # close read
     let ps = newPosixStream(pipefd[1])
     # We must allocate a new selector for this new process. (Otherwise we
@@ -938,11 +943,23 @@ proc clone*(buffer: Buffer; newurl: URL): int {.proxy.} =
     when not bsdPlatform:
       buffer.selector.close()
     when defined(freebsd) or defined(openbsd):
-      # hack necessary because newSelector calls sysctl, but Capsicum really
+      # Hack necessary because newSelector calls sysctl, but Capsicum really
       # dislikes that and we don't want to request systctl capabilities
       # from pledge either.
+      #
+      # To make this work we
+      # * allocate a new Selector object on buffer startup
+      # * copy into it the initial state of the real selector we will use
+      # * on fork, reset the selector object's state by writing the dummy
+      #   selector into it
+      # * override the file handle with a new kqueue().
+      #
+      # Warning: this breaks when threading is enabled; then fds is no longer a
+      # seq, so it's copied by reference (+ leaks). We explicitly disable
+      # threading, so for now we should be fine.
       let fd = kqueue()
       doAssert fd != -1
+      buffer.selector[] = buffer.emptySel[]
       cast[ptr cint](buffer.selector)[] = fd
     else:
       buffer.selector = newSelector[int]()
@@ -952,8 +969,9 @@ proc clone*(buffer: Buffer; newurl: URL): int {.proxy.} =
       cfds.add(fd)
     for fd in cfds:
       # connecting: just reconnect
-      buffer.loader.reconnect(buffer.loader.connecting[fd])
+      let data = buffer.loader.connecting[fd]
       buffer.loader.connecting.del(fd)
+      buffer.loader.reconnect(data)
     var ongoing: seq[OngoingData] = @[]
     for data in buffer.loader.ongoing.values:
       ongoing.add(data)
@@ -977,14 +995,17 @@ proc clone*(buffer: Buffer; newurl: URL): int {.proxy.} =
       # We ignore errors; not much we can do with them here :/
       discard buffer.rewind(buffer.bytesRead, unregister = false)
     buffer.pstream.sclose()
-    let ssock = initServerSocket(buffer.loader.sockDir, buffer.loader.sockDirFd,
-      myPid)
+    buffer.ssock.close(unlink = false)
+    let ssock = initServerSocket(SocketHandle(sockFd), buffer.loader.sockDir,
+      buffer.loader.sockDirFd, myPid)
     buffer.ssock = ssock
+    gssock = ssock
     ps.write(char(0))
     buffer.url = newurl
     for it in buffer.tasks.mitems:
       it = 0
     buffer.pstream = ssock.acceptSocketStream()
+    gpstream = buffer.pstream
     buffer.loader.clientPid = myPid
     # get key for new buffer
     var r = buffer.pstream.initPacketReader()
@@ -1866,7 +1887,8 @@ proc handleRead(buffer: Buffer; fd: int): bool =
       buffer.window.runJSJobs()
   elif fd in buffer.loader.unregistered:
     discard # ignore
-  else: assert false
+  else:
+    assert false
   true
 
 proc handleError(buffer: Buffer; fd: int; err: OSErrorCode): bool =
@@ -1911,12 +1933,18 @@ proc runBuffer(buffer: Buffer) =
 
 proc cleanup(buffer: Buffer) =
   buffer.pstream.sclose()
-  buffer.ssock.close()
+  # no unlink access on Linux
+  when defined(linux):
+    buffer.ssock.close(unlink = false)
+  else:
+    buffer.ssock.close()
 
 proc launchBuffer*(config: BufferConfig; url: URL; request: Request;
     attrs: WindowAttributes; ishtml: bool; charsetStack: seq[Charset];
-    loader: FileLoader; ssock: ServerSocket; selector: Selector[int]) =
-  let pstream = ssock.acceptSocketStream()
+    loader: FileLoader; ssock: ServerSocket; pstream: SocketStream;
+    selector: Selector[int]) =
+  let emptySel = Selector[int]()
+  emptySel[] = selector[]
   let buffer = Buffer(
     attrs: attrs,
     config: config,
@@ -1932,7 +1960,8 @@ proc launchBuffer*(config: BufferConfig; url: URL; request: Request;
     url: url,
     charsetStack: charsetStack,
     cacheId: -1,
-    outputId: -1
+    outputId: -1,
+    emptySel: emptySel
   )
   buffer.charset = buffer.charsetStack.pop()
   var r = pstream.initPacketReader()
@@ -1943,12 +1972,6 @@ proc launchBuffer*(config: BufferConfig; url: URL; request: Request;
   buffer.istream = newPosixStream(fd)
   buffer.istream.setBlocking(false)
   buffer.selector.registerHandle(fd, {Read}, 0)
-  var gbuffer {.global.}: Buffer
-  gbuffer = buffer
-  onSignal SIGTERM:
-    discard sig
-    gbuffer.cleanup()
-    exitnow(1)
   loader.registerFun = proc(fd: int) =
     buffer.selector.registerHandle(fd, {Read}, 0)
   loader.unregisterFun = proc(fd: int) =
diff --git a/src/server/forkserver.nim b/src/server/forkserver.nim
index e3f210f8..7502a481 100644
--- a/src/server/forkserver.nim
+++ b/src/server/forkserver.nim
@@ -10,6 +10,7 @@ import io/bufwriter
 import io/dynstream
 import io/posixstream
 import io/serversocket
+import io/socketstream
 import io/stdio
 import loader/loader
 import server/buffer
@@ -117,7 +118,6 @@ proc forkLoader(ctx: var ForkServerContext; config: LoaderConfig): int =
   discard close(pipefd[0])
   return pid
 
-var gssock: ServerSocket
 proc forkBuffer(ctx: var ForkServerContext; r: var BufferedReader): int =
   var config: BufferConfig
   var url: URL
@@ -155,18 +155,24 @@ proc forkBuffer(ctx: var ForkServerContext; r: var BufferedReader): int =
     # calling sysctl
     # also lets us deny sysctl call with pledge
     let selector = newSelector[int]()
-    enterBufferSandbox(sockDir)
+    setBufferProcessTitle(url)
     let pid = getCurrentProcessId()
     let ssock = initServerSocket(sockDir, sockDirFd, pid)
-    gssock = ssock
-    onSignal SIGTERM:
-      # This will be overridden after buffer has been set up; it is only
-      # necessary to avoid a race condition when buffer is killed before that.
-      discard sig
-      gssock.close()
     let ps = newPosixStream(pipefd[1])
     ps.write(char(0))
     ps.sclose()
+    let pstream = ssock.acceptSocketStream()
+    gssock = ssock
+    gpstream = pstream
+    onSignal SIGTERM:
+      discard sig
+      gpstream.sclose()
+      when defined(linux):
+        # no unlink access on Linux
+        gssock.close(unlink = false)
+      else:
+        gssock.close()
+    enterBufferSandbox(sockDir)
     let loader = FileLoader(
       process: loaderPid,
       clientPid: pid,
@@ -174,9 +180,8 @@ proc forkBuffer(ctx: var ForkServerContext; r: var BufferedReader): int =
       sockDirFd: sockDirFd
     )
     try:
-      setBufferProcessTitle(url)
       launchBuffer(config, url, request, attrs, ishtml, charsetStack, loader,
-        ssock, selector)
+        ssock, pstream, selector)
     except CatchableError:
       let e = getCurrentException()
       # taken from system/excpt.nim
diff --git a/src/utils/sandbox.nim b/src/utils/sandbox.nim
index 70e592d6..4697523f 100644
--- a/src/utils/sandbox.nim
+++ b/src/utils/sandbox.nim
@@ -1,4 +1,41 @@
-when defined(freebsd):
+# Security model with sandboxing:
+#
+# Buffer processes are the most security-sensitive, since they parse
+# various resources retrieved from the network (CSS, HTML) and sometimes
+# even execute untrusted code (JS, with an engine written in C). So the
+# main goal is to give buffers as few permissions as possible.
+#
+# On FreeBSD, we create a file descriptor to the directory sockets
+# reside in, and then use that for manipulating our sockets.
+#(TODO: currently this is the same directory as the cache directory, which
+# is sub-optimal because rogue buffers could access cached files.)
+#
+# Capsicum does not enable more fine-grained capability control, but
+# in practice the things it does enable should not be enough to harm the
+# user's system.
+#
+# On OpenBSD, we pledge the minimum amount of promises we need, and
+# unveil the same socket directory as above. It seems to be roughly
+# equivalent to the security we get with FreeBSD Capsicum.
+#
+# On Linux, we use libseccomp so that I don't have to manually write
+# BPF filters.
+# Sandboxing on Linux is at the moment slightly less safe than on the
+# two BSDs, because a rogue buffer could in theory connect to whatever
+# open UNIX domain socket on the system that the user has access to.
+#TODO look into integrating Landlock to fix this.
+#
+# We do not have OS-level sandboxing on other systems (yet).
+#
+# Aside from sandboxing in buffer processes, we also have a more
+# restrictive "network" sandbox that is intended for CGI processes that
+# just read/write from/to the network and stdin/stdout. At the moment this
+# is only used in the HTTP process.
+#TODO add it to more CGI scripts
+
+const disableSandbox {.booldefine.} = false
+
+when defined(freebsd) and not disableSandbox:
   import bindings/capsicum
 
   proc enterBufferSandbox*(sockPath: string) =
@@ -12,7 +49,7 @@ when defined(freebsd):
     # no difference between buffer; Capsicum is quite straightforward
     # to use in this regard.
     discard cap_enter()
-elif defined(openbsd):
+elif defined(openbsd) and not disableSandbox:
   import bindings/pledge
 
   proc enterBufferSandbox*(sockPath: string) =
@@ -28,7 +65,86 @@ elif defined(openbsd):
   proc enterNetworkSandbox*() =
     # we don't need much to write out data from sockets to stdout.
     doAssert pledge("stdio", nil) == 0
+elif defined(linux) and not disableSandbox:
+  import std/posix
+  import bindings/libseccomp
+
+  proc enterBufferSandbox*(sockPath: string) =
+    onSignal SIGSYS:
+      discard sig
+      raise newException(Defect, "Sandbox violation in buffer")
+    let ctx = seccomp_init(SCMP_ACT_TRAP)
+    doAssert pointer(ctx) != nil
+    const allowList = [
+      cstring"accept", # for incoming requests to our controlling socket
+      "accept4", # for when accept is implemented as accept4
+      "bind", # for outgoing requests to loader
+      "brk", # memory allocation
+      "clock_gettime", # used by QuickJS in atomics
+      "clone", # for when fork is implemented as clone
+      "close", # duh
+      "connect", # for outgoing requests to loader
+      "epoll_create", "epoll_create1", "epoll_ctl", "epoll_wait", # epoll stuff
+      "exit_group", # for quit
+      "fcntl", "fcntl64", # for changing blocking status
+      "fork", # for when fork is really fork
+      "getpid", # for determining current PID after we fork
+      "getrlimit", # glibc uses it after fork it seems
+      "getsockname", # Nim needs it for connecting
+      "gettimeofday", # used by QuickJS in Date.now()
+      "mmap", # memory allocation
+      "mmap2", # memory allocation
+      "munmap", # memory allocation
+      "pipe", # for pipes to child process
+      "pipe2", # for when pipe is implemented as pipe2
+      "prlimit64", # for when getrlimit is implemented as prlimit64
+      "read", "recv", "recvfrom", "recvmsg", # for reading from sockets
+      "send", "sendmsg", "sendto", # for writing to sockets
+      "set_robust_list", # glibc seems to need it for whatever reason
+      "setrlimit", # glibc seems to use it for whatever reason
+      "write" # for writing to sockets
+    ]
+    for it in allowList:
+      let syscall = seccomp_syscall_resolve_name(it)
+      doAssert seccomp_rule_add(ctx, SCMP_ACT_ALLOW, syscall, 0) == 0
+    block allowUnixSockets:
+      # only allow creation of UNIX domain sockets.
+      let syscall = seccomp_syscall_resolve_name("socket")
+      let arg0 = scmp_arg_cmp(
+        arg: 0, # domain
+        op: SCMP_CMP_EQ, # equals
+        datum_a: 1 # PF_LOCAL == PF_UNIX == AF_UNIX
+      )
+      doAssert seccomp_rule_add(ctx, SCMP_ACT_ALLOW, syscall, 1, arg0) == 0
+    doAssert seccomp_load(ctx) == 0
+    seccomp_release(ctx)
+
+  proc enterNetworkSandbox*() =
+    onSignal SIGSYS:
+      discard sig
+      raise newException(Defect, "Sandbox violation in network process")
+    let ctx = seccomp_init(SCMP_ACT_TRAP)
+    doAssert pointer(ctx) != nil
+    const allowList = [
+      cstring"close", "exit_group", # duh
+      "read", "write", "recv", "send", "recvfrom", "sendto", # socket i/o
+      "fcntl", "fcntl64", # so we can set nonblock etc.
+      "mmap", "mmap2", "munmap", "brk", # memory allocation
+      "poll", # curl needs poll
+      # maybe it will need epoll too in the future
+      "epoll_create", "epoll_create1", "epoll_ctl", "epoll_wait",
+      "ppoll", # or ppoll
+      # we either have to use CURLOPT_NOSIGNAL or allow signals.
+      # do the latter, otherwise the default name resolver will never time out.
+      "signal", "sigaction", "rt_sigaction",
+    ]
+    for it in allowList:
+      doAssert seccomp_rule_add(ctx, SCMP_ACT_ALLOW,
+        seccomp_syscall_resolve_name(it), 0) == 0
+    doAssert seccomp_load(ctx) == 0
+    seccomp_release(ctx)
 else:
+  {.warning: "Building without OS-level sandboxing!".}
   proc enterBufferSandbox*(sockPath: string) =
     discard