`./binaries`:
+ [s6-portable-utils](https://skaret.org/software/s6-portable-utils/)
-`s6-applyuidgid`, `s6-test`
+`s6-applyuidgid`, `s6-echo`
+ [9base](https://tools.suckless.org/9base/):
-`tr(1)` `read(1)`, `hoc(1)`, `sed(1)`, `grep(1)`, `urlencode(1)`,
-`cleanname(1)`, `cat(1)`
+`tr(1)` `read(1)`, `urlencode(1)`
+ [toybox](http://www.landley.net/toybox/): `wc(1)`,
-`date(1p)`, `printenv(1)`, `stat(1)`
+`date(1p)`, `printenv(1)`, `stat(1)`, `cat(1)`
-we heavily rely on plan 9 regular expression semantics for `sed(1)` and
-`grep(1)`; i expect translating them to coreutils or \*BSD userspace would be an
-effort. so long as i am writing this code for myself, i will not perform that
-effort for you.
+<!-- TODO: is POSIX tr(1) (or at least toybox's) usable? -->
-<!-- an old version of this README explained that we use nonstandard functionality
-from `s6-test`, but adjustments to the filesystem layout for configuration and
-website layouts has rendered this moot. -->
+note that if you build execline and s6-portable-utils with slashpackage
+support, they will expect to find commands in `/package`, and you will
+need to somehow ensure it exists in the chroot directory.
### configuration ###
--- /dev/null
+#!/binaries/execlineb -W
+# `clean-dot-directories.execline filename`: clean dot directories out of
+# `filename`.
+#
+# effectively `sed s@/\.\.?/@/@g`
+
+importas given 1
+case -N -- ${given} {
+ "(.*)/\\.\\.?(/.*)?" {
+ elgetpositionals
+ clean-dot-directories.execline ${1}${2}
+ }
+}
+s6-echo -n -- ${given}
heredoc 0 ""
foreground {
- if -t { s6-test \${#} = 3 }
+ if -t { eltest \${#} = 3 }
log.execline "fatal: ??"${1}"??: "${3}
}
backtick -x -n error_response_directory {
ifelse {
- s6-test -d configuration/error_response_pages/${hostname}/${1}
+ eltest -d configuration/error_response_pages/${hostname}/${1}
-a -r configuration/error_response_pages/${hostname}/${1}
}
{
s6-echo -n -- configuration/error_response_pages/${hostname}/${1}
}
ifelse {
- s6-test -d configuration/error_response_pages/-fallback/${1}
+ eltest -d configuration/error_response_pages/-fallback/${1}
-a -r configuration/error_response_pages/-fallback/${1}
}
{
if {
# if there is an error response page for this status code:
- ifelse { s6-test -v error_response_directory }
+ ifelse { eltest -v error_response_directory }
{
importas -i -u error_response_directory error_response_directory
cd ${error_response_directory}
#!/binaries/execlineb -WS0
#
-# http-get-particular-headers.execline [status-code]
+# http-get-extra-headers.execline [status-code]
#
# sufficiently annoyingly complex resource-specific HTTP header logic that
# gets reused a lot
# user need not bother with editing files containing `\r\n`s
# (however, it is almost certainly considerably more annoying to use)
#
-# TODO: ? status code overrides support??
# TODO: write a GUI?? with like, `yad(1)` or something??? idk
multisubstitute {
# expects a series of http headers from standard input
# exits syntax error otherwise
#
-# current hard dependencies on external ./httpd.execline subscripts:
-#
-# + ./get-line-from-client.execline
-# + ./http-error-response.execline: and thus,
-# + ./log.execline
-#
# the first argument is a program to call when parsing a `Host` header
# for the first time (usually the first line, but this convention is not
# required); it should exit zero on a hostname the program supports,
# the remaining program is supplied **after an initial argument we must
# preserve and use**. we have not found a way to handle this without losing
# efficiency (see the shebang (the "#!" line, on line 1))
-#
importas supported_hostname_test 1
shift
elgetpositionals
# this is a heavy weakness for implementing actual program logic; we expect
# the author of execline to never provide a convenient way to circumvent this
# problem, as supporting Actual Programming Logic is out of scope for the
-# language (for example: see all the use of external tools `grep` and `sed`
-# throughout `httpd.execline`, as execline’s string manipulation tools are
-# (deliberately) very underpowered)
-#
+# language
-backtick -n current_line { get-line-from-client.execline }
+backtick -E -n current_line { get-line-from-client.execline }
### terminating case: empty line
-#
-# exec(3p) the remaining program
-#
-ifelse {
- pipeline { printenv current_line }
- grep -s "^ *$"
-}
+ifelse { eltest \${current_line} =~ "^ *$" }
{
# the client MUST send a Host header, halt otherwise
- #
- ifelse { s6-test ! -v http_header_parse_host }
+ ifelse { eltest ! -v http_header_parse_host }
{
http-error-response.execline
400
#
# after parsing, exec(3p) this script with the hostname validating
# subscript, then the remaing program, as arguments
-#
-backtick -x -n header_name {
- pipeline { printenv current_line }
- pipeline { sed -n "s/^([^ :]+):.*/\\1/p" }
- pipeline { tr A-Z a-z }
- read
-}
-backtick -x -n header_contents {
- pipeline { printenv current_line }
- # strip spaces or tabs from end of line
- # then print the second token verbatim
- #
- # whitespace between header name and contents is optional
- #
- pipeline { sed -n "s/( )*$//; s/^[^ ]+ *([^ ].*)/\\1/p" }
- read
-}
-ifelse {
- s6-test ! -v header_name -o
- ! -v header_contents
-}
-{
- importas -i current_line current_line
- http-error-response.execline
- 400
- "syntax error"
- "http-header-parse.execline: bad header line: \""${current_line}\"
+backtick -E -n current_line_stripped {
+ case -N -- ${current_line} {
+ # strip trailing whitespace
+ "^(.*)[ \t]*$" {
+ importas current_line_stripped 1
+ s6-echo -n -- ${current_line_stripped}
+ }
+ }
}
+case -N -- ${current_line_stripped} {
+ "([^ :]+): *([^ ].*)" {
+ multisubstitute {
+ importas -i header_name_anycase 1
+ importas -i header_contents 2
+ }
+ backtick -E header_name {
+ pipeline { s6-echo -n -- ${header_name_anycase} }
+ tr A-Z a-z
+ }
-multisubstitute {
- importas -i -u header_name header_name
- importas -i -u header_contents header_contents
-}
+ #### special case: host header
+ #
+ # short circuits the program
+ # TODO: [hard, design problem]: short circuit but exec(3p) into the
+ # remaining program
+ ifelse { eltest \${header_name} = host }
+ {
+ # we MUST 400 on multiple Host headers
+ ifelse { eltest -v http_header_parse_host }
+ {
+ http-error-response.execline
+ 400
+ "syntax error"
+ "http-header-parse.execline: multiple Host headers!??"
+ }
-#### special case: host header
-#
-# short circuits the program
-# TODO: [hard, design problem]: short circuit but exec(3p) into the
-# remaining program
-#
-ifelse { s6-test \${header_name} = host }
-{
- # we MUST 400 on multiple Host headers
- #
- ifelse { s6-test -v http_header_parse_host }
- {
- http-error-response.execline
- 400
- "syntax error"
- "http-header-parse.execline: multiple Host headers!??"
- }
+ # 400 on syntactically illegal hostnames
+ ifelse {
+ define hexadecimal "([0-9]|[a-f]|[A-F])"
+ multisubstitute {
+ # incidentally covers all of ipv4
+ define domain_name "(([a-z]|[A-Z]|[0-9])([a-z]|[A-Z]|[0-9][\-.])*)"
- # validate hostnames, exiting on syntactically illegal ones
- #
- ifelse {
- define hexadecimal "[0-9a-fA-F]"
- multisubstitute {
- # + dns-resolved hostname
- define domain_name "[a-zA-Z0-9\-.]+"
+ # TODO: incorrect
+ define approximate_ipv6 "("${hexadecimal}"+?(::"${hexadecimal}")+)"
- # + ipv6 address (TODO: handle robustly)
- define approximate_ipv6 "("${hexadecimal}"+)?(::"${hexadecimal}")+"
+ define port ":[0-9]+"
+ }
+ eltest \${header_contents} =~ "^ *("${domain_name}"|"${approximate_ipv6}")("${port}")? *$"
+ }
+ {
+ http-error-response.execline
+ 400
+ "syntax error"
+ "illegal host: "\"${header_contents}\"
+ }
- # + port string
- define port ":[0-9]+"
+ # short circuits on unsupported hostnames
+ if { ${supported_hostname_test} ${header_contents} }
+ export http_header_parse_${header_name} ${header_contents}
+ ${0}
+ ${supported_hostname_test}
+ ${@}
}
- #
- # as we understand it, a valid ipv4 address is always a valid
- # domain name address, so we do not actually have to handle
- # that…
- #
- pipeline { s6-echo -n -- ${header_contents} }
- grep -sv "^ *(("${domain_name}")|("${approximate_ipv6}"))("${port}")? *$"
- }
- {
- http-error-response.execline
- 400
- "syntax error"
- "illegal host: "\"${header_contents}\"
- }
-
- # short circuit on unsupported hostnames
- #
- ifelse -n { ${supported_hostname_test} ${header_contents} }
- {
- heredoc 0 ""
export http_header_parse_${header_name} ${header_contents}
- ${@}
-
+ ${0}
+ ${supported_hostname_test}
+ ${@}
}
- export http_header_parse_${header_name} ${header_contents}
- ${0}
- ${supported_hostname_test}
- ${@}
}
-
-export http_header_parse_${header_name} ${header_contents}
-${0}
- ${supported_hostname_test}
- ${@}
+http-error-response.execline
+ 400
+ "syntax error"
+ "http-header-parse.execline: bad header line: \""${current_line}\"
# base case: quit if there are no arguments
ifelse {
importas "#" "#"
- s6-test ${#} = 0
+ eltest ${#} = 0
}
{
exit 0
}
# recursive case: print the current header
-
importas header_file 1
shift
elgetpositionals
# performs `basename ${header_file}`
backtick -E -n header_name {
- pipeline { s6-echo -n -- ${header_file} }
- pipeline { sed "s@.*/([^/]*)@\\1@" }
- tr -d " \t\r\n" # paranoid
+ pipeline {
+ case -N -- ${header_file} {
+ ".*/([^/]*)$" {
+ importas basename 1
+ s6-echo -n -- ${basename}
+ }
+ }
+ }
+ tr -d " \t\r\n" # paranoia
}
# ignore protected headers
pipeline { s6-echo -n -- ${header_name} }
tr A-Z a-z
}
- s6-test -v httpd_execline_protected_header_${header_name_lowercase}
+ eltest -v httpd_execline_protected_header_${header_name_lowercase}
}
{
foreground { log.execline "WARNING: configuration error: ignoring protected header: \""${header_name}\" }
}
# short circuit on overridden header
-ifelse { s6-test -v http_print_header_directories_${header_name} }
+ifelse { eltest -v http_print_header_directories_${header_name} }
{
foreground { log.execline "ignoring overridden header_name: "\"${header_name}\" }
http-print-header-directories.execline ${@}
}
# otherwise, print out the header line
-
multisubstitute {
importas -D -no_hostname_parsed hostname http_header_parse_host
importas -D -no_resource_parsed requested_resource http_start_line_parse_resource
}
# wrapper around s6-echo for hostname and resource substitutions in header contents
define header_substitution_script
-"multisubstitute {
- define hostname "${hostname}"
- define resource "${requested_resource}"
-}
-s6-echo -n -- "
+ "multisubstitute {
+ define hostname "${hostname}"
+ define resource "${requested_resource}"
+ }
+ s6-echo -n -- "
# we’ll strip out `\r`s and `\n`s from file contents, in
# case the configuration should ever be made in a mischevious way
# expects a start line from an http request from standard input
# exits syntax error otherwise
#
-# current hard dependencies on `httpd.execline` subscripts:
-#
-# + ./get-line-from-client.execline
-# + ./http-error-response.execline: and, thus
-# + ./log.execline
-#
# on success, exports
#
# + http_start_line_parse_method
# + http_start_line_parse_resource
# + http_start_line_parse_version
#
-# containing the request's method, requested resource, and http version
-# it then exec(3p)s into its command line
+# containing the request's method, requested resource, and http version,
+# and then exec(3p)s into its command line
-backtick -n start_line { get-line-from-client.execline }
-backtick -x -n http_start_line_parse_method {
- pipeline { printenv start_line }
- pipeline { sed -n "s@^(CONNECT|DELETE|GET|HEAD|OPTIONS|PATCH|POST|PUT|TRACE) +.*@\\1@p" }
- read
-}
-backtick -x -n http_start_line_parse_resource {
- pipeline { printenv start_line }
- pipeline { sed -n "s@^[^ ]+ +(/[^ ]*) +.*@\\1@p" }
- read
-}
-backtick -x -n http_start_line_parse_version {
- pipeline { printenv start_line }
- pipeline { sed -n "s@.*HTTP/([0-9]\.[0-9]) *@\\1@p" }
- read
-}
+backtick -E -n start_line { get-line-from-client.execline }
+case -N -- ${start_line} {
+ "^(CONNECT|DELETE|GET|HEAD|OPTIONS|PATCH|POST|PUT|TRACE) +(/[^ ]*) +HTTP/([0-9]\.[0-9]) *$" {
+ # the -S1 flag to execlineb will auto-substitute $1 and the like,
+ # we need this workaround. the names are chosen so as to not cause
+ # accidental substitutions in ${@}
+ multisubstitute {
+ importas _http_start_line_parse_method 1
+ importas _http_start_line_parse_resource 2
+ importas _http_start_line_parse_version 3
+ }
+ export http_start_line_parse_method ${_http_start_line_parse_method}
+ export http_start_line_parse_resource ${_http_start_line_parse_resource}
+ export http_start_line_parse_version ${_http_start_line_parse_version}
-importas -i -u start_line start_line
-ifelse {
- s6-test ! -v http_start_line_parse_method -o
- ! -v http_start_line_parse_resource -o
- ! -v http_start_line_parse_version
+ emptyenv -P
+ ${@}
+ }
}
-{
http-error-response.execline
400
"syntax error"
"http-start-line-parse.execline: (bad) start line: \""${start_line}\"
-}
-
-${@}
#!/usr/local/bin/execlineb -WP
## `httpd.execline`: a simple static web server ###
-#
-# i would like to note that simplicity is relative; the *implementation* of
-# this simple functionality is not exactly simple. there are several subscripts
-# with sufficently complex and (in all but one case) reusable functionality
-# that we separate them out.
-# unfortunately, many themselves are (currently )dependent on other subscripts.
-
-### the http/1.1 protocol, oversimplified ###
-#
-# a client sends a request that normally looks something like
-#
-# ```
-# > [http_method] [resource] [http version]\r
-# > Host: [hostname]\r
-# > [quite possibly many other headers]\r
-# > \r
-# ```
-#
-# (note the `\r`s before newlines.
-# (also: the Host header does not *have* to be the second line)
-#
-# we respond to the client appropriately, using to the following template:
-#
-# ```
-# < HTTP/1.1 [status code] [status message]\r
-# < Content-Type: [MIME type of the message body]\r
-# < Content-Length: [size of message body in bytes]\r
-# < Date: [the time as of this response]\r
-# < [Last-Modified: [date of the resource’s last revision]]\r
-# < \r
-# < [content, sent verbatim]
-# ```
-#
-# we do not follow the http/1.1 protocol precisely, but it is enough to satisfy
-# web browsers and tools like `curl(1)`, and to handle misbehaving clients.
-#
-
### brief httpd.execline overview ##
#
# 1. sandboxing (paranoia?)
# 2. read, validate the start line and Host header sent by the client
# 3. find resource, determine its filetype
# 4. send response to client
-#
#### 1. sandboxing ###
-#
-# this recreates a security measure we picked up from `publicfile`: if this
-# server should somehow be hijacked, it will not be able to escape the
-# directory it runs in, and it will be running as an unpriveleged user
-# in the setup of this server, the user `httpd` owns no files or directories in
-# the change-rooted directory, nor does it have any write permissions for those
-# files and directories, so a hijacked process will not be able to do very much
export PATH /binaries
chroot .
s6-applyuidgid -U -z
# see `./log.execline`
export program_name httpd.execline
-# see end of script: handle crashes (or syntax errors in this script,) cleanly
+# see end of script: handle crashes cleanly
if -X -n -t {
#### 2. read from client, with interspersed validation ###
##### 2.1. start line ###
importas -i method http_start_line_parse_method
importas -i requested_resource http_start_line_parse_resource
}
- ifelse -n {
- s6-test \${method} = HEAD -o
- \${method} = GET
- }
+ ifelse -n { eltest \${method} =~ HEAD|GET }
{
http-error-response.execline
501
backtick -n resource {
cd supported_domains
- backtick -n candidate_resource {
- backtick -n with_dot_and_dot_dot {
- pipeline { s6-echo -n -- ${requested_resource} }
- #
- # strip query string, or resource location
- #
- pipeline { sed "s/[?#].*//; s@/\\.\\.?/@/@g" }
- # decode url-encodings, if any
+ backtick -E -n candidate_resource {
+ backtick -E -n without_dot_and_dot_dot {
+ backtick -E -n without_query_string {
+ case -N -- ${requested_resource} {
+ "(.*)[?#].*" {
+ elgetpositionals
+ s6-echo -n -- ${1}
+ }
+ }
+ s6-echo -n -- ${requested_resource}
+ }
+ pipeline { clean-dot-directories.execline ${without_query_string} }
urlencode -d
}
- importas -i -u with_dot_and_dot_dot with_dot_and_dot_dot
+
# include the hostname in the final resource name
- #
- if { s6-echo -n -- ${hostname} }
- # handle dot and dot-dot directory semantics
- # we prepend the hostname to the result, ensuring
- # `${resource}` will route to somewhere inside the
- # subdirectory named after the host
- cleanname ${with_dot_and_dot_dot}
+ s6-echo -n -- ${hostname}/${without_dot_and_dot_dot}
}
- importas -i -u candidate_resource candidate_resource
# `${directory}` -> `${directory}/index.xhtml`
- ifelse { s6-test -d \${candidate_resource} }
+ ifelse { eltest -d \${candidate_resource} }
{
s6-echo -n -- ${candidate_resource}/index.xhtml
}
}
importas -i resource resource
- ifelse { s6-test ! -r supported_domains/${resource} }
+ ifelse { eltest ! -r supported_domains/${resource} }
{
http-error-response.execline
404
cat ${Content_Type_override_file}
}
- backtick -D "no.extension" -n extension {
- pipeline { printenv resource }
- # strip everything up to the non-periods after the final
- # period in the string
- #
- pipeline { sed -n "s/.+\\.([^.]+)$/\\1/p" }
- read
+ backtick -E -D "no.extension" -n extension {
+ case -N -- ${resource} {
+ ".+\\.([^.]+)$" {
+ elgetpositionals
+ s6-echo -n -- ${1}
+ }
+ }
}
# publicfile-style custom filetypes: `file.{1}={2}` is served
# transformed into periods, allowing files like
# `index.text=x:market` being served as `text/x.market`
ifelse {
- pipeline { printenv extension }
# this regex matches exactly what `publicfile` does
- grep -s "[a-zA-Z0-9]+=[^=]+$"
+ eltest \\${extension} =~ "[a-zA-Z0-9]+=[^=]+$"
}
{
- pipeline { printenv extension }
+ pipeline { s6-echo -n -- extension }
tr := ./
}
- # use `./configuration/Content-Type_table` as a key-value store: files with
- # the name ${extension} map to the `Content-Type` embedded in
- # their contents. for example, `./configuration/Content-Type_table/xhtml`
- # contains the text “application/xhtml+xml” (with no newline)
- # (it is fine if the file contains a single newline at the end)
- #
- # if no key exists with the extension’s name, we fall back on
- # “application/octet-stream”, as we should
- importas -i -u extension extension
- ifelse { s6-test -r configuration/Content-Type_table/${extension} }
+ # use `./configuration/Content-Type_table` as a key-value store
+ ifelse { eltest -r configuration/Content-Type_table/${extension} }
{
cat configuration/Content-Type_table/${extension}
}
# current time of response: SHOULD be provided (why?)
backtick -n Date { date -u ${date_format} }
- # allow for arbitrary HTTP header and HTTP status code overrides.
- # for an example where the former might be useful, consider Content
- # Security Policy; for the latter, consider HTTP 301 redirects
+ # allow for arbitrary HTTP header and HTTP status code overrides
#
# be warned!! we do not validate these overrides!
backtick -n extra_headers {
"
}
foreground {
- if -t { s6-test \${method} = GET }
+ if -t { eltest \${method} = GET }
cat supported_domains/${resource}
}
# hack: write(3p) does not guarantee that all the
s6-sleep -m 512
# TODO: (?) persistent connections? (recursion??)
}
- ##### end of script
- # catches crashes (and syntax errors,,), and other unexpected things
- # useful for debugging! otherwise, clients might do strange things
- #
- # probably a bad sign this is still left in lol
http-error-response.execline
500
"internal server error"
# tests if `hostname` is supported by this server, by checking if
# a directory by that exact name exists in the current working directory
# immediately 404s otherwise
-#
-# hard depends on these external `httpd.execline` subscripts:
-#
-# + ./http-error-response.execline: and thus,
-# + ./log.execline
-#
-# reject unsupported hostnames
-#
-ifelse { s6-test ! -d \supported_domains/${1} }
+ifelse {
+ eltest ! -d \supported_domains/${1} -o
+ ! -r \supported_domains/${1}
+}
{
if {
http-error-response.execline