about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorbptato <nincsnevem662@gmail.com>2025-03-14 18:11:25 +0100
committerbptato <nincsnevem662@gmail.com>2025-03-14 18:15:10 +0100
commit42a61c98034cef298303effab0fe0a531fdc68c4 (patch)
tree7b791e14f2c18115fac20731c7c277397ca1efaf
parent4a79d029f5753bbbdd28b735327cdabd724c356d (diff)
downloadchawan-42a61c98034cef298303effab0fe0a531fdc68c4.tar.gz
loader, forkserver: launch CGI processes from fork server
Well, it's not much prettier...  but it will be useful for keep-alive.
(Hopefully.)

Also, loader could now be locked down capsicum/pledge/seccomp, but I'm
not sure if there's a point.  The biggest potential threat it faces is
cross-contamination of handles, and no amount of syscall filtering is
going to protect against that.
-rw-r--r--doc/architecture.md40
-rw-r--r--src/local/pager.nim2
-rw-r--r--src/main.nim3
-rw-r--r--src/server/forkserver.nim121
-rw-r--r--src/server/loader.nim140
5 files changed, 193 insertions, 113 deletions
diff --git a/doc/architecture.md b/doc/architecture.md
index dcfee52d..ee856ed0 100644
--- a/doc/architecture.md
+++ b/doc/architecture.md
@@ -54,8 +54,8 @@ Described as a tree:
 * cha (main process)
 	- forkserver (forked immediately at startup)
 		* loader
-			- local CGI scripts
 		* buffer(s)
+		* local CGI scripts
 	- mailcap processes (e.g. md2html, feh, ...)
 	- editor (e.g. vi)
 
@@ -72,21 +72,21 @@ in [Opening buffers](#opening-buffers).
 
 ### Forkserver
 
-For forking buffer and loader processes, a forkserver process is launched at the
-very beginning of every 'cha' invocation. The fork server is responsible for
-forking the loader process, and also buffer processes.
+For forking the loader process, buffer processes and CGI processes, a
+fork server process is launched at the very beginning of every 'cha'
+invocation.
 
 We use a fork server for two reasons:
 
-1. It helps clean up child processes when the main process crashes. (We open
-   pipes between the main process and the fork server, and kill all child
-   processes from the fork server on EOF.)
-2. It allows us to start new processes without cloning the pager's entire address
-   space. This reduces the impact of memory bugs somewhat, and also our memory
-   usage.
+1. It helps clean up child processes when the main process crashes.
+   (We open a UNIX domain socket between the main process and the fork
+   server, and kill all child processes from the fork server on EOF.)
+2. It allows us to start new buffer processes without cloning the
+   pager's entire address space.  This reduces the impact of memory bugs
+   somewhat, and also our memory usage.
 
-The fork server is not used for mailcap or CGI processes, because their address
-space is replaced by exec anyway. (Also, it would be slow.)
+For convenience reasons, the fork server is not used for mailcap
+processes.
 
 ### Loader
 
@@ -112,11 +112,10 @@ following steps:
   obviously, it's more efficient this way.)
 
 The loader process distinguishes between clients (i.e processes) through
-their control stream (one end of a socketpair created by loader). In
-theory this should help against rogue clients, though in practice it is
-still trivial to crash the loader as a client. It also helps us block
-further requests from buffers that have been discarded by the pager, but
-still haven't found out yet that their life time has ended.
+their control stream (one end of a socketpair created by loader).
+This control stream is closed when the pager discards the buffer, so
+discarded buffers are unable to make further requests even if their
+process is still alive.
 
 ### Buffer
 
@@ -339,9 +338,10 @@ CSS 3 parsing module. The latest iteration of the selector parser is
 pretty good. The media query parser and the CSS value parser both work
 OK, but are missing some commonly used features like variables.
 
-Cascading is OK.  To speed up selector matching, various properties are
-hashed to filter out irrelevant CSS rules.  However, no further style
-optimization exists yet (such as Bloom filters or style interning).
+Cascading works OK.  To speed up selector matching, various properties
+are hashed to filter out irrelevant CSS rules.  However, no further
+style optimization exists yet (such as Bloom filters or style
+interning).
 
 Style calculation is incremental, and results are cached until an
 element's style is invalidated, so re-styles are quite fast.  (The
diff --git a/src/local/pager.nim b/src/local/pager.nim
index 737a13ac..f2d820a5 100644
--- a/src/local/pager.nim
+++ b/src/local/pager.nim
@@ -967,7 +967,7 @@ proc drawBufferAdvance(s: openArray[char]; bgcolor: CellColor; oi, ox: var int;
   ox = x
   return move(ls)
 
-proc drawBuffer*(pager: Pager; container: Container; ofile: File): bool =
+proc drawBuffer(pager: Pager; container: Container; ofile: File): bool =
   var format = Format()
   let res = container.readLines(proc(line: SimpleFlexibleLine) =
     var x = 0
diff --git a/src/main.nim b/src/main.nim
index c4e9c37f..ff9ae25e 100644
--- a/src/main.nim
+++ b/src/main.nim
@@ -273,6 +273,9 @@ proc main() =
   # make sure tmpdir exists
   discard mkdir(cstring(config.external.tmpdir), 0o700)
   let loaderPid = forkserver.loadConfig(config)
+  if loaderPid == -1:
+    stderr.writeLine("Failed to fork loader process")
+    quit(1)
   onSignal SIGINT:
     discard sig
     if acceptSigint:
diff --git a/src/server/forkserver.nim b/src/server/forkserver.nim
index 5ff982f4..0469bd38 100644
--- a/src/server/forkserver.nim
+++ b/src/server/forkserver.nim
@@ -9,7 +9,9 @@ import config/urimethodmap
 import io/dynstream
 import io/packetreader
 import io/packetwriter
+import io/poll
 import server/buffer
+import server/connecterror
 import server/loader
 import server/loaderiface
 import types/url
@@ -17,6 +19,7 @@ import types/winattrs
 import utils/proctitle
 import utils/sandbox
 import utils/strwidth
+import utils/twtstr
 
 type
   ForkServer* = ref object
@@ -25,6 +28,8 @@ type
 
   ForkServerContext = object
     stream: SocketStream
+    loaderStream: SocketStream
+    pollData: PollData
 
 proc loadConfig*(forkserver: ForkServer; config: Config): int =
   forkserver.stream.withPacketWriter w:
@@ -65,22 +70,33 @@ proc forkBuffer*(forkserver: ForkServer; config: BufferConfig; url: URL;
   return (bufferPid, newSocketStream(sv[0]))
 
 proc forkLoader(ctx: var ForkServerContext; config: LoaderConfig;
-    loaderStream: SocketStream): int =
+    loaderStream: SocketStream): (int, SocketStream) =
+  # loaderStream is a connection between main process <-> loader, but we
+  # also need a connection between fork server <-> loader.
+  # The naming here is very confusing, sorry about that.
+  var sv {.noinit.}: array[2, cint]
+  if socketpair(AF_UNIX, SOCK_STREAM, IPPROTO_IP, sv) != 0:
+    loaderStream.sclose()
+    return (-1, nil)
   stderr.flushFile()
   let pid = fork()
   if pid == 0:
     # child process
     try:
       ctx.stream.sclose()
+      discard close(sv[0])
+      let forkStream = newSocketStream(sv[1])
       setProcessTitle("cha loader")
-      runFileLoader(config, loaderStream)
+      runFileLoader(config, loaderStream, forkStream)
     except CatchableError as e:
       stderr.write(e.getStackTrace() & "Error: unhandled exception: " & e.msg &
         " [" & $e.name & "]\n")
       quit(1)
     doAssert false
-  loaderStream.sclose()
-  return pid
+  else:
+    discard close(sv[1])
+    loaderStream.sclose()
+    return (pid, newSocketStream(sv[0]))
 
 proc forkBuffer(ctx: var ForkServerContext; r: var PacketReader): int =
   var config: BufferConfig
@@ -99,6 +115,7 @@ proc forkBuffer(ctx: var ForkServerContext; r: var PacketReader): int =
   if pid == 0:
     # child process
     ctx.stream.sclose()
+    ctx.loaderStream.sclose()
     setBufferProcessTitle(url)
     let pid = getCurrentProcessId()
     let urandom = newPosixStream("/dev/urandom", O_RDONLY, 0)
@@ -134,6 +151,49 @@ proc forkBuffer(ctx: var ForkServerContext; r: var PacketReader): int =
   discard close(fd)
   return pid
 
+proc forkCGI(ctx: var ForkServerContext; r: var PacketReader): int =
+  let istream = newPosixStream(r.recvFd())
+  let ostream = newPosixStream(r.recvFd())
+  # hack to detect when the child died
+  var hasOstreamOut2: bool
+  r.sread(hasOstreamOut2)
+  let ostreamOut2 = if hasOstreamOut2: newPosixStream(r.recvFd()) else: nil
+  var env: seq[tuple[name, value: string]]
+  var dir: string
+  var cmd: string
+  var basename: string
+  r.sread(env)
+  r.sread(dir)
+  r.sread(cmd)
+  r.sread(basename)
+  let pid = fork()
+  if pid == 0: # child
+    ctx.stream.sclose()
+    ctx.loaderStream.sclose()
+    # we leave stderr open, so it can be seen in the browser console
+    istream.moveFd(STDIN_FILENO)
+    ostream.moveFd(STDOUT_FILENO)
+    # reset SIGCHLD to the default handler. this is useful if the child
+    # process expects SIGCHLD to be untouched.
+    # (e.g. git dies a horrible death with SIGCHLD as SIG_IGN)
+    signal(SIGCHLD, SIG_DFL)
+    # let's also reset SIGPIPE, which we ignored on init
+    signal(SIGPIPE, SIG_DFL)
+    for it in env:
+      putEnv(it.name, it.value)
+    setCurrentDir(dir)
+    discard execl(cstring(cmd), cstring(basename), nil)
+    let code = int(ceFailedToExecuteCGIScript)
+    stdout.write("Cha-Control: ConnectionError " & $code & " " &
+      ($strerror(errno)).deleteChars({'\n', '\r'}))
+    exitnow(1)
+  else: # parent or error
+    istream.sclose()
+    ostream.sclose()
+    if ostreamOut2 != nil:
+      ostreamOut2.sclose()
+    return pid
+
 proc runForkServer(controlStream, loaderStream: SocketStream) =
   setProcessTitle("cha forkserver")
   var ctx = ForkServerContext(stream: controlStream)
@@ -143,18 +203,51 @@ proc runForkServer(controlStream, loaderStream: SocketStream) =
     var config: LoaderConfig
     r.sread(isCJKAmbiguous)
     r.sread(config)
-    let pid = ctx.forkLoader(config, loaderStream)
+    # for CGI
+    putEnv("SERVER_SOFTWARE", "Chawan")
+    putEnv("SERVER_PROTOCOL", "HTTP/1.0")
+    putEnv("SERVER_NAME", "localhost")
+    putEnv("SERVER_PORT", "80")
+    putEnv("REMOTE_HOST", "localhost")
+    putEnv("REMOTE_ADDR", "127.0.0.1")
+    putEnv("GATEWAY_INTERFACE", "CGI/1.1")
+    putEnv("CHA_INSECURE_SSL_NO_VERIFY", "0")
+    putEnv("CHA_TMP_DIR", config.tmpdir)
+    putEnv("CHA_DIR", config.configdir)
+    putEnv("CHA_BOOKMARK", config.bookmark)
+    # returns a new stream that connects fork server <-> loader and
+    # gives away main process <-> loader
+    let (pid, loaderStream) = ctx.forkLoader(config, loaderStream)
     ctx.stream.withPacketWriter w:
       w.swrite(pid)
-  while true:
-    try:
-      ctx.stream.withPacketReader r:
-        let pid = ctx.forkBuffer(r)
-        ctx.stream.withPacketWriter w:
-          w.swrite(pid)
-    except EOFError:
-      # EOF
-      break
+    if pid == -1:
+      # Notified main process of failure; our job is done.
+      quit(1)
+    ctx.loaderStream = loaderStream
+  ctx.pollData.register(ctx.stream.fd, POLLIN)
+  ctx.pollData.register(ctx.loaderStream.fd, POLLIN)
+  var alive = true
+  while alive:
+    ctx.pollData.poll(-1)
+    for event in ctx.pollData.events:
+      if (event.revents and POLLIN) != 0:
+        try:
+          if event.fd == ctx.stream.fd:
+            ctx.stream.withPacketReader r:
+              let pid = ctx.forkBuffer(r)
+              ctx.stream.withPacketWriter w:
+                w.swrite(pid)
+          elif event.fd == ctx.loaderStream.fd:
+            ctx.loaderStream.withPacketReader r:
+              let pid = ctx.forkCGI(r)
+              ctx.loaderStream.withPacketWriter w:
+                w.swrite(pid)
+        except EOFError:
+          alive = false # EOF
+          break
+      if (event.revents and POLLERR) != 0 or (event.revents and POLLHUP) != 0:
+        alive = false # EOF
+        break
   ctx.stream.sclose()
   # Clean up when the main process crashed.
   discard kill(0, cint(SIGTERM))
diff --git a/src/server/loader.nim b/src/server/loader.nim
index 92a10fd6..eec463c1 100644
--- a/src/server/loader.nim
+++ b/src/server/loader.nim
@@ -134,6 +134,7 @@ type
   LoaderContext = object
     pid: int
     pagerClient: ClientHandle
+    forkStream: SocketStream # handle to the fork server
     config: LoaderConfig
     handleMap: seq[LoaderHandle]
     pollData: PollData
@@ -775,18 +776,16 @@ proc findAuth(client: ClientHandle; url: URL): AuthItem =
     return client.authMap.findItem(url.authOrigin)
   return nil
 
-proc putMappedURL(url: URL; auth: AuthItem) =
-  putEnv("MAPPED_URI_SCHEME", url.scheme)
+proc putMappedURL(s: var seq[tuple[name, value: string]]; url: URL;
+    auth: AuthItem) =
+  s.add(("MAPPED_URI_SCHEME", url.scheme))
   if auth != nil:
-    putEnv("MAPPED_URI_USERNAME", auth.username)
-    putEnv("MAPPED_URI_PASSWORD", auth.password)
-  else:
-    delEnv("MAPPED_URI_USERNAME")
-    delEnv("MAPPED_URI_PASSWORD")
-  putEnv("MAPPED_URI_HOST", url.hostname)
-  putEnv("MAPPED_URI_PORT", url.port)
-  putEnv("MAPPED_URI_PATH", url.pathname)
-  putEnv("MAPPED_URI_QUERY", url.search.substr(1))
+    s.add(("MAPPED_URI_USERNAME", auth.username))
+    s.add(("MAPPED_URI_PASSWORD", auth.password))
+  s.add(("MAPPED_URI_HOST", url.hostname))
+  s.add(("MAPPED_URI_PORT", url.port))
+  s.add(("MAPPED_URI_PATH", url.pathname))
+  s.add(("MAPPED_URI_QUERY", url.search.substr(1)))
 
 type CGIPath = object
   basename: string
@@ -797,37 +796,39 @@ type CGIPath = object
   myDir: string
 
 proc setupEnv(cpath: CGIPath; request: Request; contentLen: int; prevURL: URL;
-    config: LoaderClientConfig; auth: AuthItem) =
+    config: LoaderClientConfig; auth: AuthItem):
+    seq[tuple[name, value: string]] =
+  result = @[]
   let url = request.url
-  putEnv("SCRIPT_NAME", cpath.scriptName)
-  putEnv("SCRIPT_FILENAME", cpath.cmd)
-  putEnv("REQUEST_URI", cpath.requestURI)
-  putEnv("REQUEST_METHOD", $request.httpMethod)
+  result.add(("SCRIPT_NAME", cpath.scriptName))
+  result.add(("SCRIPT_FILENAME", cpath.cmd))
+  result.add(("REQUEST_URI", cpath.requestURI))
+  result.add(("REQUEST_METHOD", $request.httpMethod))
   var headers = ""
   for k, v in request.headers.allPairs:
     headers &= k & ": " & v & "\r\n"
-  putEnv("REQUEST_HEADERS", headers)
+  result.add(("REQUEST_HEADERS", headers))
   if prevURL != nil:
-    putMappedURL(prevURL, auth)
+    result.putMappedURL(prevURL, auth)
   if cpath.pathInfo != "":
-    putEnv("PATH_INFO", cpath.pathInfo)
+    result.add(("PATH_INFO", cpath.pathInfo))
   if url.search != "":
-    putEnv("QUERY_STRING", url.search.substr(1))
+    result.add(("QUERY_STRING", url.search.substr(1)))
   if request.httpMethod == hmPost:
     if request.body.t == rbtMultipart:
-      putEnv("CONTENT_TYPE", request.body.multipart.getContentType())
+      result.add(("CONTENT_TYPE", request.body.multipart.getContentType()))
     else:
-      putEnv("CONTENT_TYPE", request.headers.getOrDefault("Content-Type", ""))
-    putEnv("CONTENT_LENGTH", $contentLen)
+      let contentType = request.headers.getOrDefault("Content-Type")
+      result.add(("CONTENT_TYPE", contentType))
+    result.add(("CONTENT_LENGTH", $contentLen))
   if "Cookie" in request.headers:
-    putEnv("HTTP_COOKIE", request.headers["Cookie"])
+    result.add(("HTTP_COOKIE", request.headers["Cookie"]))
   if request.referrer != nil:
-    putEnv("HTTP_REFERER", $request.referrer)
+    result.add(("HTTP_REFERER", $request.referrer))
   if config.proxy != nil:
-    putEnv("ALL_PROXY", $config.proxy)
+    result.add(("ALL_PROXY", $config.proxy))
   if config.insecureSslNoVerify:
-    putEnv("CHA_INSECURE_SSL_NO_VERIFY", "1")
-  setCurrentDir(cpath.myDir)
+    result.add(("CHA_INSECURE_SSL_NO_VERIFY", "1"))
 
 proc parseCGIPath(ctx: LoaderContext; request: Request): CGIPath =
   var path = percentDecode(request.url.pathname)
@@ -931,46 +932,40 @@ proc loadCGI(ctx: var LoaderContext; client: ClientHandle; handle: InputHandle;
     istream = newPosixStream(pipefdRead[0])
     ostream = newPosixStream(pipefdRead[1])
   let contentLen = request.body.contentLength()
-  stderr.flushFile()
-  let pid = fork()
+  let auth = if prevURL != nil: client.findAuth(prevURL) else: nil
+  let env = setupEnv(cpath, request, contentLen, prevURL, config, auth)
+  var pid: int
+  try:
+    let istream3 = if istream != nil: nil else: newPosixStream("/dev/null")
+    ctx.forkStream.withPacketWriter w:
+      if istream != nil:
+        w.sendFd(istream.fd)
+      else:
+        w.sendFd(istream3.fd)
+      w.sendFd(ostreamOut.fd)
+      w.swrite(ostreamOut2 != nil)
+      if ostreamOut2 != nil:
+        w.sendFd(ostreamOut2.fd)
+      w.swrite(env)
+      w.swrite(cpath.myDir)
+      w.swrite(cpath.cmd)
+      w.swrite(cpath.basename)
+    if istream3 != nil:
+      istream3.sclose()
+    ctx.forkStream.withPacketReader r:
+      r.sread(pid)
+  except EOFError:
+    pid = -1
+  ostreamOut.sclose() # close write
+  if ostreamOut2 != nil:
+    ostreamOut2.sclose() # close write
+  if request.body.t != rbtNone:
+    istream.sclose() # close read
   if pid == -1:
     ctx.rejectHandle(handle, ceFailedToSetUpCGI)
-  elif pid == 0:
-    istreamOut.sclose() # close read
-    ostreamOut.moveFd(STDOUT_FILENO) # dup stdout
     if ostream != nil:
-      ostream.sclose() # close write
-    if istream2 != nil:
-      istream2.sclose() # close cache file; we aren't reading it directly
-    if istream != nil:
-      if istream.fd != 0:
-        istream.moveFd(STDIN_FILENO) # dup stdin
-    else:
-      closeStdin()
-    let auth = if prevURL != nil: client.findAuth(prevURL) else: nil
-    # we leave stderr open, so it can be seen in the browser console
-    setupEnv(cpath, request, contentLen, prevURL, config, auth)
-    # reset SIGCHLD to the default handler. this is useful if the child process
-    # expects SIGCHLD to be untouched. (e.g. git dies a horrible death with
-    # SIGCHLD as SIG_IGN)
-    signal(SIGCHLD, SIG_DFL)
-    # let's also reset SIGPIPE, which we ignored in forkserver
-    signal(SIGPIPE, SIG_DFL)
-    # close the parent handles
-    for i in 0 ..< ctx.handleMap.len:
-      if ctx.handleMap[i] != nil:
-        discard close(cint(i))
-    discard execl(cstring(cpath.cmd), cstring(cpath.basename), nil)
-    let code = int(ceFailedToExecuteCGIScript)
-    stdout.write("Cha-Control: ConnectionError " & $code & " " &
-      ($strerror(errno)).deleteChars({'\n', '\r'}))
-    exitnow(1)
+      ostream.sclose()
   else:
-    ostreamOut.sclose() # close write
-    if ostreamOut2 != nil:
-      ostreamOut2.sclose() # close write
-    if request.body.t != rbtNone:
-      istream.sclose() # close read
     handle.parser = HeaderParser(headers: newHeaders(hgResponse))
     handle.stream = istreamOut
     case request.body.t
@@ -1762,11 +1757,12 @@ proc loaderLoop(ctx: var LoaderContext) =
     ctx.finishCycle()
   ctx.exitLoader()
 
-proc runFileLoader*(config: LoaderConfig; stream: SocketStream) =
+proc runFileLoader*(config: LoaderConfig; stream, forkStream: SocketStream) =
   var ctx {.global.}: LoaderContext
   ctx = LoaderContext(
     config: config,
-    pid: getCurrentProcessId()
+    pid: getCurrentProcessId(),
+    forkStream: forkStream
   )
   onSignal SIGTERM:
     discard sig
@@ -1787,16 +1783,4 @@ proc runFileLoader*(config: LoaderConfig; stream: SocketStream) =
     ctx.pagerClient = ClientHandle(stream: stream, pid: pid, config: config)
   ctx.register(ctx.pagerClient)
   ctx.put(ctx.pagerClient)
-  # for CGI
-  putEnv("SERVER_SOFTWARE", "Chawan")
-  putEnv("SERVER_PROTOCOL", "HTTP/1.0")
-  putEnv("SERVER_NAME", "localhost")
-  putEnv("SERVER_PORT", "80")
-  putEnv("REMOTE_HOST", "localhost")
-  putEnv("REMOTE_ADDR", "127.0.0.1")
-  putEnv("GATEWAY_INTERFACE", "CGI/1.1")
-  putEnv("CHA_INSECURE_SSL_NO_VERIFY", "0")
-  putEnv("CHA_TMP_DIR", config.tmpdir)
-  putEnv("CHA_DIR", config.configdir)
-  putEnv("CHA_BOOKMARK", config.bookmark)
   ctx.loaderLoop()