diff --git a/documentation/manuals/archiver.1.man b/documentation/manuals/archiver.1.man new file mode 100644 index 0000000..93b6ea4 --- /dev/null +++ b/documentation/manuals/archiver.1.man @@ -0,0 +1,222 @@ +.\" Man page generated from reStructuredText. +. +.TH "KAWIPIKO-ARCHIVER" "1" "2021-12-23" "volution.ro" "kawipiko" +.SH NAME +kawipiko -- blazingly fast static HTTP server \- kawipiko-archiver +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +>> kawipiko\-archiver \-\-help +.ft P +.fi +.UNINDENT +.UNINDENT +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +\-\-sources + +\-\-archive + +\-\-compress +\-\-compress\-level +\-\-compress\-cache + +\-\-exclude\-index +\-\-exclude\-strip +\-\-exclude\-cache +\-\-include\-etag + +\-\-exclude\-file\-listing +\-\-include\-folder\-listing + +\-\-progress +\-\-debug +.ft P +.fi +.UNINDENT +.UNINDENT + +.sp +.ce +---- + +.ce 0 +.sp +.SH FLAGS +.sp +\fB\-\-sources\fP +.INDENT 0.0 +.INDENT 3.5 +The path to the source folder that is the root of the static website content. +.UNINDENT +.UNINDENT +.sp +\fB\-\-archive\fP +.INDENT 0.0 +.INDENT 3.5 +The path to the target CDB file that contains the archived static content. +.UNINDENT +.UNINDENT +.sp +\fB\-\-compress\fP, and \fB\-\-compress\-level\fP +.INDENT 0.0 +.INDENT 3.5 +Each individual file (and consequently of the corresponding HTTP response body) is compressed with either \fBgzip\fP, \fBzopfli\fP or \fBbrotli\fP; by default (or alternatively with \fBidentity\fP) no compression is used. +.sp +Even if compression is explicitly requested, if the compression ratio is bellow a certain threshold (depending on the uncompressed size), the file is stored without any compression. +(It\(aqs senseless to force the client to spend time and decompress the response body if that time is not recovered during network transmission.) +.sp +The compression level can be chosen, the value depending on the algorithm: +.INDENT 0.0 +.IP \(bu 2 +\fBgzip\fP \-\- \fB\-1\fP for algorithm default, \fB\-2\fP for Huffman only, \fB0\fP to \fB9\fP for fast to slow; +.IP \(bu 2 +\fBzopfli\fP \-\- \fB\-1\fP for algorithm default, \fB0\fP to \fB30\fP iterations for fast to slow; +.IP \(bu 2 +\fBbrotli\fP \-\- \fB\-1\fP for algorithm default, \fB0\fP to \fB9\fP for fast to slow, \fB\-2\fP for extreme; +.IP \(bu 2 +(by "algorithm default", it is meant "what that algorithm considers the recommended default compression level";) +.IP \(bu 2 +\fBkawipiko\fP by default uses the maximum compression level for each algorithm; (i.e. \fB9\fP for \fBgzip\fP, \fB30\fP for \fBzopfli\fP, and \fB\-2\fP for \fBbrotli\fP;) +.UNINDENT +.UNINDENT +.UNINDENT +.sp +\fB\-\-sources\-cache \fP, and \fB\-\-compress\-cache \fP +.INDENT 0.0 +.INDENT 3.5 +At the given path a single file is created (that is an BBolt database), that will be used to cache the following information: +.INDENT 0.0 +.IP \(bu 2 +in case of \fB\-\-sources\-cache\fP, the fingerprint of each file contents is stored, so that if the file was not changed, re\-reading it shouldn\(aqt be attempted unless it is absolutely necessary; also if the file is small enough, its contents is stored in this database (deduplicated by its fingerprint); +.IP \(bu 2 +in case of \fB\-\-compress\-cache\fP the compression outcome of each file contents is stored (deduplicated by its fingerprint), so that compression is done only once over multiple runs; +.UNINDENT +.sp +Each of these caches can be safely reused between multiple related archives, especially when they have many files in common. +Each of these caches can be independently used (or shared). +.sp +Using these caches allows one to very quickly rebuild an archive when only a couple of files have been changed, without even touching the file\-system for the unchanged ones. +.UNINDENT +.UNINDENT +.sp +\fB\-\-exclude\-index\fP +.INDENT 0.0 +.INDENT 3.5 +Disables using \fBindex.*\fP files (where \fB\&.*\fP is one of \fB\&.html\fP, \fB\&.htm\fP, \fB\&.xhtml\fP, \fB\&.xht\fP, \fB\&.txt\fP, \fB\&.json\fP, and \fB\&.xml\fP) to respond to a request whose URL path ends in \fB/\fP (corresponding to the folder wherein \fBindex.*\fP file is located). +(This can be used to implement "slash" blog style URL\(aqs like \fB/blog/whatever/\fP which maps to \fB/blog/whatever/index.html\fP\&.) +.UNINDENT +.UNINDENT +.sp +\fB\-\-exclude\-strip\fP +.INDENT 0.0 +.INDENT 3.5 +Disables using a file with the suffix \fB\&.html\fP, \fB\&.htm\fP, \fB\&.xhtml\fP, \fB\&.xht\fP, and \fB\&.txt\fP to respond to a request whose URL does not exactly match an existing file. +(This can be used to implement "suffix\-less" blog style URL\(aqs like \fB/blog/whatever\fP which maps to \fB/blog/whatever.html\fP\&.) +.UNINDENT +.UNINDENT +.sp +\fB\-\-exclude\-cache\fP +.INDENT 0.0 +.INDENT 3.5 +Disables adding an \fBCache\-Control: public, immutable, max\-age=3600\fP header that forces the browser (and other intermediary proxies) to cache the response for an hour (the \fBpublic\fP and \fBmax\-age=3600\fP arguments), and furthermore not request it even on reloads (the \fBimmutable\fP argument). +.UNINDENT +.UNINDENT +.sp +\fB\-\-include\-etag\fP +.INDENT 0.0 +.INDENT 3.5 +Enables adding an \fBETag\fP response header that contains the SHA256 of the response body. +.sp +By not including the \fBETag\fP header (i.e. the default), and because identical headers are stored only one, if one has many files of the same type (that in turn without \fBETag\fP generates the same headers), this can lead to significant reduction in stored headers blocks, including reducing RAM usage. +(At this moment it does not support HTTP conditional requests, i.e. the \fBIf\-None\-Match\fP, \fBIf\-Modified\-Since\fP and their counterparts; however this \fBETag\fP header might be used in conjuction with \fBHEAD\fP requests to see if the resource has changed.) +.UNINDENT +.UNINDENT +.sp +\fB\-\-exclude\-file\-listing\fP +.INDENT 0.0 +.INDENT 3.5 +Disables the creation of an internal list of files that can be used in conjunction with the \fB\-\-index\-all\fP flag of the \fBkawipiko\-server\fP\&. +.UNINDENT +.UNINDENT +.sp +\fB\-\-include\-folder\-listing\fP +.INDENT 0.0 +.INDENT 3.5 +Enables the creation of an internal list of folders. (Currently not used by the \fBkawipiko\-server\fP tool.) +.UNINDENT +.UNINDENT +.sp +\fB\-\-progress\fP +.INDENT 0.0 +.INDENT 3.5 +Enables periodic reporting of various metrics. +.UNINDENT +.UNINDENT +.sp +\fB\-\-debug\fP +.INDENT 0.0 +.INDENT 3.5 +Enables verbose logging. +It will log various information about the archived files (including compression statistics). +.UNINDENT +.UNINDENT +.SH IGNORED FILES +.INDENT 0.0 +.IP \(bu 2 +any file with the following prefixes: \fB\&.\fP, \fB#\fP; +.IP \(bu 2 +any file with the following suffixes: \fB~\fP, \fB#\fP, \fB\&.log\fP, \fB\&.tmp\fP, \fB\&.temp\fP, \fB\&.lock\fP; +.IP \(bu 2 +any file that contains the following: \fB#\fP; +.IP \(bu 2 +any file that exactly matches the following: \fBThumbs.db\fP, \fB\&.DS_Store\fP; +.IP \(bu 2 +(at the moment these rules are not configurable through flags;) +.UNINDENT +.SH WILDCARD FILES +.sp +By placing a file whose name matches \fB_wildcard.*\fP (i.e. with the prefix \fB_wildcard.\fP and any other suffix), it will be used to respond to any request whose URL fails to find a "better" match. +.sp +These wildcard files respect the folder hierarchy, in that wildcard files in (direct or transitive) subfolders override the wildcard file in their parents (direct or transitive). +.SH SYMLINKS, HARDLINKS, LOOPS, AND DUPLICATED FILES +.sp +You freely use symlinks (including pointing outside of the content root) and they will be crawled during archival respecting the "logical" hierarchy they introduce. +(Any loop that you introduce into the hierarchy will be ignored and a warning will be issued.) +.sp +You can safely symlink or hardlink the same file (or folder) in multiple places (within the content hierarchy), and its data will be stored only once. +(The same applies to duplicated files that have exactly the same data.) +.\" Generated by docutils manpage writer. +. diff --git a/documentation/manual-archiver.rst b/documentation/manuals/archiver.rst similarity index 100% rename from documentation/manual-archiver.rst rename to documentation/manuals/archiver.rst diff --git a/documentation/manuals/archiver.txt b/documentation/manuals/archiver.txt new file mode 100644 index 0000000..f08344b --- /dev/null +++ b/documentation/manuals/archiver.txt @@ -0,0 +1,177 @@ +KAWIPIKO-ARCHIVER(1) kawipiko KAWIPIKO-ARCHIVER(1) + + + +NAME + kawipiko -- blazingly fast static HTTP server - kawipiko-archiver + + >> kawipiko-archiver --help + + --sources + + --archive + + --compress + --compress-level + --compress-cache + + --exclude-index + --exclude-strip + --exclude-cache + --include-etag + + --exclude-file-listing + --include-folder-listing + + --progress + --debug + + + ---- + + + + +FLAGS + --sources + The path to the source folder that is the root of the static website + content. + + --archive + The path to the target CDB file that contains the archived static + content. + + --compress, and --compress-level + Each individual file (and consequently of the corresponding HTTP + response body) is compressed with either gzip, zopfli or brotli; by + default (or alternatively with identity) no compression is used. + + Even if compression is explicitly requested, if the compression + ratio is bellow a certain threshold (depending on the uncompressed + size), the file is stored without any compression. (It's senseless + to force the client to spend time and decompress the response body + if that time is not recovered during network transmission.) + + The compression level can be chosen, the value depending on the + algorithm: + + • gzip -- -1 for algorithm default, -2 for Huffman only, 0 to 9 for + fast to slow; + + • zopfli -- -1 for algorithm default, 0 to 30 iterations for fast to + slow; + + • brotli -- -1 for algorithm default, 0 to 9 for fast to slow, -2 + for extreme; + + • (by "algorithm default", it is meant "what that algorithm + considers the recommended default compression level";) + + • kawipiko by default uses the maximum compression level for each + algorithm; (i.e. 9 for gzip, 30 for zopfli, and -2 for brotli;) + + --sources-cache , and --compress-cache + At the given path a single file is created (that is an BBolt + database), that will be used to cache the following information: + + • in case of --sources-cache, the fingerprint of each file contents + is stored, so that if the file was not changed, re-reading it + shouldn't be attempted unless it is absolutely necessary; also if + the file is small enough, its contents is stored in this database + (deduplicated by its fingerprint); + + • in case of --compress-cache the compression outcome of each file + contents is stored (deduplicated by its fingerprint), so that + compression is done only once over multiple runs; + + Each of these caches can be safely reused between multiple related + archives, especially when they have many files in common. Each of + these caches can be independently used (or shared). + + Using these caches allows one to very quickly rebuild an archive + when only a couple of files have been changed, without even touching + the file-system for the unchanged ones. + + --exclude-index + Disables using index.* files (where .* is one of .html, .htm, + .xhtml, .xht, .txt, .json, and .xml) to respond to a request whose + URL path ends in / (corresponding to the folder wherein index.* file + is located). (This can be used to implement "slash" blog style + URL's like /blog/whatever/ which maps to /blog/whatever/index.html.) + + --exclude-strip + Disables using a file with the suffix .html, .htm, .xhtml, .xht, and + .txt to respond to a request whose URL does not exactly match an + existing file. (This can be used to implement "suffix-less" blog + style URL's like /blog/whatever which maps to /blog/whatever.html.) + + --exclude-cache + Disables adding an Cache-Control: public, immutable, max-age=3600 + header that forces the browser (and other intermediary proxies) to + cache the response for an hour (the public and max-age=3600 + arguments), and furthermore not request it even on reloads (the + immutable argument). + + --include-etag + Enables adding an ETag response header that contains the SHA256 of + the response body. + + By not including the ETag header (i.e. the default), and because + identical headers are stored only one, if one has many files of the + same type (that in turn without ETag generates the same headers), + this can lead to significant reduction in stored headers blocks, + including reducing RAM usage. (At this moment it does not support + HTTP conditional requests, i.e. the If-None-Match, If-Modified-Since + and their counterparts; however this ETag header might be used in + conjuction with HEAD requests to see if the resource has changed.) + + --exclude-file-listing + Disables the creation of an internal list of files that can be used + in conjunction with the --index-all flag of the kawipiko-server. + + --include-folder-listing + Enables the creation of an internal list of folders. (Currently not + used by the kawipiko-server tool.) + + --progress + Enables periodic reporting of various metrics. + + --debug + Enables verbose logging. It will log various information about the + archived files (including compression statistics). + +IGNORED FILES + + • any file with the following prefixes: ., #; + + • any file with the following suffixes: ~, #, .log, .tmp, .temp, .lock; + + • any file that contains the following: #; + + • any file that exactly matches the following: Thumbs.db, .DS_Store; + + • (at the moment these rules are not configurable through flags;) + +WILDCARD FILES + By placing a file whose name matches _wildcard.* (i.e. with the prefix + _wildcard. and any other suffix), it will be used to respond to any + request whose URL fails to find a "better" match. + + These wildcard files respect the folder hierarchy, in that wildcard + files in (direct or transitive) subfolders override the wildcard file + in their parents (direct or transitive). + +SYMLINKS, HARDLINKS, LOOPS, AND DUPLICATED FILES + You freely use symlinks (including pointing outside of the content + root) and they will be crawled during archival respecting the "logical" + hierarchy they introduce. (Any loop that you introduce into the + hierarchy will be ignored and a warning will be issued.) + + You can safely symlink or hardlink the same file (or folder) in + multiple places (within the content hierarchy), and its data will be + stored only once. (The same applies to duplicated files that have + exactly the same data.) + + + +volution.ro 2021-12-23 KAWIPIKO-ARCHIVER(1) diff --git a/documentation/manuals/server.1.man b/documentation/manuals/server.1.man new file mode 100644 index 0000000..e73ac99 --- /dev/null +++ b/documentation/manuals/server.1.man @@ -0,0 +1,292 @@ +.\" Man page generated from reStructuredText. +. +.TH "KAWIPIKO-SERVER" "1" "2021-12-23" "volution.ro" "kawipiko" +.SH NAME +kawipiko -- blazingly fast static HTTP server \- kawipiko-server +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +>> kawipiko\-server \-\-help +.ft P +.fi +.UNINDENT +.UNINDENT +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +\-\-archive +\-\-archive\-inmem (memory\-loaded archive file) +\-\-archive\-mmap (memory\-mapped archive file) +\-\-archive\-preload (preload archive in OS cache) + +\-\-bind : (HTTP, only HTTP/1.1, FastHTTP) +\-\-bind\-2 : (HTTP, only HTTP/1.1, Go net/http) +\-\-bind\-tls : (HTTPS, only HTTP/1.1, FastHTTP) +\-\-bind\-tls\-2 : (HTTPS, with HTTP/2, Go net/http) +\-\-bind\-quic : (HTTPS, with HTTP/3) + +\-\-http1\-disable +\-\-http2\-disable +\-\-http3\-alt\-svc : + +\-\-tls\-bundle (TLS certificate bundle) +\-\-tls\-public (TLS certificate public) +\-\-tls\-private (TLS certificate private) +\-\-tls\-self\-rsa (use self\-signed RSA) +\-\-tls\-self\-ed25519 (use self\-signed Ed25519) + +\-\-processes (of slave processes) +\-\-threads (of threads per process) +\-\-index\-all +\-\-index\-paths +\-\-index\-data\-meta +\-\-index\-data\-content + +\-\-security\-headers\-tls +\-\-security\-headers\-disable + +\-\-limit\-memory +\-\-timeout\-disable +\-\-profile\-cpu ; \-\-profile\-mem + +\-\-report ; \-\-quiet ; \-\-debug +\-\-dummy ; \-\-dummy\-empty ; \-\-dummy\-delay +.ft P +.fi +.UNINDENT +.UNINDENT + +.sp +.ce +---- + +.ce 0 +.sp +.SH FLAGS +.sp +\fB\-\-bind \fP, \fB\-\-bind\-tls \fP, \fB\-\-bind\-2 \fP, \fB\-\-bind\-tls\-2 \fP, and \fB\-\-bind\-quic \fP +.INDENT 0.0 +.INDENT 3.5 +The IP and port to listen for requests with: +.INDENT 0.0 +.IP \(bu 2 +(insecure) HTTP/1.1 for \fB\-\-bind\fP, leveraging \fBfasthttp\fP library; +.IP \(bu 2 +(secure) HTTP/1.1 over TLS for \fB\-\-bind\-tls\fP, leveraging \fBfasthttp\fP library; +.IP \(bu 2 +(insecure) HTTP/1.1 for \fI\-\-bind\-2\(ga\fP, leveraging Go\(aqs \fBnet/http\fP library; (not as performant as the \fBfasthttp\fP powered endpoint;) +.IP \(bu 2 +(secure) H2 or HTTP/1.1 over TLS for \fB\-\-bind\-tls\-2\fP, leveraging Go\(aqs \fBnet/http\fP; (not as performant as the \fBfasthttp\fP powered endpoint;) +.IP \(bu 2 +(secure) H3 over QUIC for \fB\-\-bind\-quic\fP, leveraging \fBgithub.com/lucas\-clemente/quic\-go\fP library; (given that H3 is still a new protocol, this must be used with caution; also one should use the \fB\-\-http3\-alt\-svc \fP;) +.IP \(bu 2 +if one uses just \fB\-\-bind\-tls\fP (without \fB\-\-bind\-tls\-2\fP, and without \fB\-\-http2\-disabled\fP), then the TLS endpoint is split between \fBfasthttp\fP for HTTP/1.1 and Go\(aqs \fBnet/http\fP for H2; +.UNINDENT +.UNINDENT +.UNINDENT +.sp +\fB\-\-tls\-bundle \fP, \fB\-\-tls\-public \fP, and \fB\-\-tls\-private \fP (optional) +.INDENT 0.0 +.INDENT 3.5 +If TLS is enabled, these options allows one to specify the certificate to use, either as a single file (a bundle) or separate files (the actual public certificate and the private key). +.sp +If one doesn\(aqt specify any of these options, an embedded self\-signed certificate will be used. In such case, one can choose between RSA (the \fB\-\-tls\-self\-rsa\fP flag) or Ed25519 (the \fB\-\-tls\-self\-ed25519\fP flag); +.UNINDENT +.UNINDENT +.sp +\fB\-\-http1\-disable\fP, \fB\-\-http2\-disable\fP +.INDENT 0.0 +.INDENT 3.5 +Disables that particular protocol. +(It can be used only with \fB\-\-bind\-tls\-2\fP, given that \fBfasthttp\fP only supports HTTP/1.) +.UNINDENT +.UNINDENT +.sp +\fB\-\-processes \fP and \fB\-\-threads \fP +.INDENT 0.0 +.INDENT 3.5 +The number of processes and threads per each process to start. (Given Go\(aqs concurrency model, the threads count is somewhat a soft limit, hinting to the runtime the desired parallelism level.) +.sp +It is highly recommended to use one process and as many threads as there are cores. +.sp +Depending on the use\-case, one can use multiple processes each with a single thread; this would reduce goroutine contention if it causes problems. +(However note that if using \fB\-\-archive\-inmem\fP, then each process will allocate its own copy of the database in RAM; in such cases it is highly recommended to use \fB\-\-archive\-mmap\fP\&.) +.UNINDENT +.UNINDENT +.sp +\fB\-\-archive \fP +.INDENT 0.0 +.INDENT 3.5 +The path of the CDB file that contains the archived static content. +(It can be created with the \fBkawipiko\-archiver\fP tool.) +.UNINDENT +.UNINDENT +.sp +\fB\-\-archive\-inmem\fP +.INDENT 0.0 +.INDENT 3.5 +Reads the CDB file in RAM, and thus all requests are served from RAM without touching the file\-system. +(The memory impact is equal to the size of the CDB archive. This can be used if enough RAM is available to avoid swapping.) +.UNINDENT +.UNINDENT +.sp +\fB\-\-archive\-mmap\fP +.INDENT 0.0 +.INDENT 3.5 +(\fBrecommended\fP) The CDB file is \fI\%memory mapped\fP, thus reading its data uses the kernel\(aqs file\-system cache, as opposed to issuing \fBread\fP syscalls. +.UNINDENT +.UNINDENT +.sp +\fB\-\-archive\-preload\fP +.INDENT 0.0 +.INDENT 3.5 +Before starting to serve requests, read the CDB file so that its data is buffered in the kernel\(aqs file\-system cache. (This option can be used with or without \fB\-\-archive\-mmap\fP\&.) +.UNINDENT +.UNINDENT +.sp +\fB\-\-index\-all\fP, \fB\-\-index\-paths\fP, \fB\-\-index\-data\-meta\fP, and \fB\-\-index\-data\-content\fP +.INDENT 0.0 +.INDENT 3.5 +In order to serve a request \fBkawipiko\fP does the following: +.INDENT 0.0 +.IP \(bu 2 +given the request\(aqs path, it is used to locate the corresponding resource\(aqs metadata (i.e. response headers) and data (i.e. response body) references; +by using \fB\-\-index\-paths\fP a RAM\-based lookup table is created to eliminate a CDB read operation for this purpose; (the memory impact is proportional to the size of all resource paths combined; given that the number of resources is acceptable, say up to a couple hundred thousand, one could safely use this option;) +.IP \(bu 2 +based on the resource\(aqs metadata reference, the actual metadata (i.e. the response headers) is located; +by using \fB\-\-index\-data\-meta\fP a RAM\-based lookup table is created to eliminate a CDB read operation for this purpose; (the memory impact is proportional to the size of all resource metadata blocks combined; given that the metadata blocks are deduplicated, one could safely use this option; if one also uses \fB\-\-archive\-mmap\fP or \fB\-\-archive\-inmem\fP, then the memory impact is only proportional to the number of resource metadata blocks;) +.IP \(bu 2 +based on the resource\(aqs data reference, the actual data (i.e. the response body) is located; +by using \fB\-\-index\-data\-content\fP a RAM\-based lookup table is created to eliminate a CDB operation operation for this purpose; (the memory impact is proportional to the size of all resource data blocks combined; one can use this option to obtain the best performance; if one also uses \fB\-\-archive\-mmap\fP or \fB\-\-archive\-inmem\fP, then the memory impact is only proportional to the number of resource data blocks;) +.IP \(bu 2 +\fB\-\-index\-all\fP enables all the options above; +.IP \(bu 2 +(depending on the use\-case) it is recommended to use \fB\-\-index\-paths\fP; if \fB\-\-exclude\-etag\fP was used during archival, one can also use \fB\-\-index\-data\-meta\fP; +.IP \(bu 2 +it is recommended to use either \fB\-\-archive\-mmap\fP or \fB\-\-archive\-inmem\fP, else (especially if data is indexed) the resulting effect is that of loading everything in RAM; +.UNINDENT +.UNINDENT +.UNINDENT +.sp +\fB\-\-security\-headers\-tls\fP +.INDENT 0.0 +.INDENT 3.5 +Enables adding the following TLS related headers to the response: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +Strict\-Transport\-Security: max\-age=31536000 +Content\-Security\-Policy: upgrade\-insecure\-requests +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +These instruct the browser to always use HTTPS for the served domain. +(Useful even without HTTPS, when used behind a TLS terminator, load\-balancer or proxy that do support HTTPS.) +.UNINDENT +.UNINDENT +.sp +\fB\-\-security\-headers\-disable\fP +.INDENT 0.0 +.INDENT 3.5 +Disables adding a few security related headers: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +Referrer\-Policy: strict\-origin\-when\-cross\-origin +X\-Content\-Type\-Options: nosniff +X\-XSS\-Protection: 1; mode=block +X\-Frame\-Options: sameorigin +.ft P +.fi +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.sp +\fB\-\-report\fP +.INDENT 0.0 +.INDENT 3.5 +Enables periodic reporting of various metrics. +Also enables reporting a selection of metrics if certain thresholds are matched (which most likely is a sign of high\-load). +.UNINDENT +.UNINDENT +.sp +\fB\-\-quiet\fP +.INDENT 0.0 +.INDENT 3.5 +Disables most logging messages. +.UNINDENT +.UNINDENT +.sp +\fB\-\-debug\fP +.INDENT 0.0 +.INDENT 3.5 +Enables all logging messages. +.UNINDENT +.UNINDENT +.sp +\fB\-\-dummy\fP, \fB\-\-dummy\-empty\fP +.INDENT 0.0 +.INDENT 3.5 +It starts the server in a "dummy" mode, ignoring all archive related arguments and always responding with \fBhello world!\en\fP (unless \fB\-\-dummy\-empty\fP was used) and without additional headers except the HTTP status line and \fBContent\-Length\fP\&. +.sp +This argument can be used to benchmark the raw performance of the underlying \fBfasthttp\fP, Go\(aqs \fBnet/http\fP, or QUIC performance; this is the upper limit of the achievable performance given the underlying technologies. +(From my own benchmarks \fBkawipiko\fP\(aqs adds only about ~15% overhead when actually serving the \fBhello\-world.cdb\fP archive.) +.UNINDENT +.UNINDENT +.sp +\fB\-\-delay \fP +.INDENT 0.0 +.INDENT 3.5 +Enables delaying each response with a certain amount (for example \fB1s\fP, \fB1ms\fP, etc.) +.sp +It can be used to simulate the real\-world network latencies, perhaps to see how a site with many resources loads in various conditions. +(For example, see \fI\%an experiment\fP I made with an image made out of 1425 tiles.) +.UNINDENT +.UNINDENT +.sp +\fB\-\-profile\-cpu \fP and \fB\-\-profile\-mem \fP +.INDENT 0.0 +.INDENT 3.5 +Enables CPU and memory profiling using Go\(aqs profiling infrastructure. +.UNINDENT +.UNINDENT +.\" Generated by docutils manpage writer. +. diff --git a/documentation/manual-server.rst b/documentation/manuals/server.rst similarity index 100% rename from documentation/manual-server.rst rename to documentation/manuals/server.rst diff --git a/documentation/manuals/server.txt b/documentation/manuals/server.txt new file mode 100644 index 0000000..2ecf120 --- /dev/null +++ b/documentation/manuals/server.txt @@ -0,0 +1,225 @@ +KAWIPIKO-SERVER(1) kawipiko KAWIPIKO-SERVER(1) + + + +NAME + kawipiko -- blazingly fast static HTTP server - kawipiko-server + + >> kawipiko-server --help + + --archive + --archive-inmem (memory-loaded archive file) + --archive-mmap (memory-mapped archive file) + --archive-preload (preload archive in OS cache) + + --bind : (HTTP, only HTTP/1.1, FastHTTP) + --bind-2 : (HTTP, only HTTP/1.1, Go net/http) + --bind-tls : (HTTPS, only HTTP/1.1, FastHTTP) + --bind-tls-2 : (HTTPS, with HTTP/2, Go net/http) + --bind-quic : (HTTPS, with HTTP/3) + + --http1-disable + --http2-disable + --http3-alt-svc : + + --tls-bundle (TLS certificate bundle) + --tls-public (TLS certificate public) + --tls-private (TLS certificate private) + --tls-self-rsa (use self-signed RSA) + --tls-self-ed25519 (use self-signed Ed25519) + + --processes (of slave processes) + --threads (of threads per process) + --index-all + --index-paths + --index-data-meta + --index-data-content + + --security-headers-tls + --security-headers-disable + + --limit-memory + --timeout-disable + --profile-cpu ; --profile-mem + + --report ; --quiet ; --debug + --dummy ; --dummy-empty ; --dummy-delay + + + ---- + + + + +FLAGS + --bind , --bind-tls , --bind-2 , + --bind-tls-2 , and --bind-quic + The IP and port to listen for requests with: + + • (insecure) HTTP/1.1 for --bind, leveraging fasthttp library; + + • (secure) HTTP/1.1 over TLS for --bind-tls, leveraging fasthttp + library; + + • (insecure) HTTP/1.1 for --bind-2`, leveraging Go's net/http + library; (not as performant as the fasthttp powered endpoint;) + + • (secure) H2 or HTTP/1.1 over TLS for --bind-tls-2, leveraging Go's + net/http; (not as performant as the fasthttp powered endpoint;) + + • (secure) H3 over QUIC for --bind-quic, leveraging + github.com/lucas-clemente/quic-go library; (given that H3 is + still a new protocol, this must be used with caution; also one + should use the --http3-alt-svc ;) + + • if one uses just --bind-tls (without --bind-tls-2, and without + --http2-disabled), then the TLS endpoint is split between fasthttp + for HTTP/1.1 and Go's net/http for H2; + + --tls-bundle , --tls-public , and --tls-private + (optional) + If TLS is enabled, these options allows one to specify the + certificate to use, either as a single file (a bundle) or separate + files (the actual public certificate and the private key). + + If one doesn't specify any of these options, an embedded self-signed + certificate will be used. In such case, one can choose between RSA + (the --tls-self-rsa flag) or Ed25519 (the --tls-self-ed25519 flag); + + --http1-disable, --http2-disable + Disables that particular protocol. (It can be used only with + --bind-tls-2, given that fasthttp only supports HTTP/1.) + + --processes and --threads + The number of processes and threads per each process to start. + (Given Go's concurrency model, the threads count is somewhat a soft + limit, hinting to the runtime the desired parallelism level.) + + It is highly recommended to use one process and as many threads as + there are cores. + + Depending on the use-case, one can use multiple processes each with + a single thread; this would reduce goroutine contention if it + causes problems. (However note that if using --archive-inmem, then + each process will allocate its own copy of the database in RAM; in + such cases it is highly recommended to use --archive-mmap.) + + --archive + The path of the CDB file that contains the archived static content. + (It can be created with the kawipiko-archiver tool.) + + --archive-inmem + Reads the CDB file in RAM, and thus all requests are served from RAM + without touching the file-system. (The memory impact is equal to + the size of the CDB archive. This can be used if enough RAM is + available to avoid swapping.) + + --archive-mmap + (recommended) The CDB file is memory mapped, thus reading its data + uses the kernel's file-system cache, as opposed to issuing read + syscalls. + + --archive-preload + Before starting to serve requests, read the CDB file so that its + data is buffered in the kernel's file-system cache. (This option + can be used with or without --archive-mmap.) + + --index-all, --index-paths, --index-data-meta, and + --index-data-content + In order to serve a request kawipiko does the following: + + • given the request's path, it is used to locate the corresponding + resource's metadata (i.e. response headers) and data (i.e. + response body) references; by using --index-paths a RAM-based + lookup table is created to eliminate a CDB read operation for this + purpose; (the memory impact is proportional to the size of all + resource paths combined; given that the number of resources is + acceptable, say up to a couple hundred thousand, one could safely + use this option;) + + • based on the resource's metadata reference, the actual metadata + (i.e. the response headers) is located; by using --index-data-meta + a RAM-based lookup table is created to eliminate a CDB read + operation for this purpose; (the memory impact is proportional to + the size of all resource metadata blocks combined; given that the + metadata blocks are deduplicated, one could safely use this + option; if one also uses --archive-mmap or --archive-inmem, then + the memory impact is only proportional to the number of resource + metadata blocks;) + + • based on the resource's data reference, the actual data (i.e. the + response body) is located; by using --index-data-content a + RAM-based lookup table is created to eliminate a CDB operation + operation for this purpose; (the memory impact is proportional to + the size of all resource data blocks combined; one can use this + option to obtain the best performance; if one also uses + --archive-mmap or --archive-inmem, then the memory impact is only + proportional to the number of resource data blocks;) + + • --index-all enables all the options above; + + • (depending on the use-case) it is recommended to use + --index-paths; if --exclude-etag was used during archival, one + can also use --index-data-meta; + + • it is recommended to use either --archive-mmap or + --archive-inmem, else (especially if data is indexed) the + resulting effect is that of loading everything in RAM; + + --security-headers-tls + Enables adding the following TLS related headers to the response: + + Strict-Transport-Security: max-age=31536000 + Content-Security-Policy: upgrade-insecure-requests + + These instruct the browser to always use HTTPS for the served + domain. (Useful even without HTTPS, when used behind a TLS + terminator, load-balancer or proxy that do support HTTPS.) + + --security-headers-disable + Disables adding a few security related headers: + + Referrer-Policy: strict-origin-when-cross-origin + X-Content-Type-Options: nosniff + X-XSS-Protection: 1; mode=block + X-Frame-Options: sameorigin + + --report + Enables periodic reporting of various metrics. Also enables + reporting a selection of metrics if certain thresholds are matched + (which most likely is a sign of high-load). + + --quiet + Disables most logging messages. + + --debug + Enables all logging messages. + + --dummy, --dummy-empty + It starts the server in a "dummy" mode, ignoring all archive related + arguments and always responding with hello world!\n (unless + --dummy-empty was used) and without additional headers except the + HTTP status line and Content-Length. + + This argument can be used to benchmark the raw performance of the + underlying fasthttp, Go's net/http, or QUIC performance; this is + the upper limit of the achievable performance given the underlying + technologies. (From my own benchmarks kawipiko's adds only about + ~15% overhead when actually serving the hello-world.cdb archive.) + + --delay + Enables delaying each response with a certain amount (for example + 1s, 1ms, etc.) + + It can be used to simulate the real-world network latencies, perhaps + to see how a site with many resources loads in various conditions. + (For example, see an experiment I made with an image made out of + 1425 tiles.) + + --profile-cpu and --profile-mem + Enables CPU and memory profiling using Go's profiling + infrastructure. + + + +volution.ro 2021-12-23 KAWIPIKO-SERVER(1) diff --git a/documentation/readme.rst b/documentation/readme.rst index 0fc75c1..2f8770b 100644 --- a/documentation/readme.rst +++ b/documentation/readme.rst @@ -151,7 +151,11 @@ kawipiko-server --------------- -See `dedicated manual <./documentation/manual-server.rst>`__. +See the `dedicated manual <./documentation/manuals/server.rst>`__. + +This document is also available +in `plain text <./documentation/manuals/server.txt>`__, +or as a `man page <./documentation/manuals/server.1.man>`__. @@ -160,7 +164,11 @@ kawipiko-archiver ----------------- -See `dedicated manual <./documentation/manual-archiver.rst>`__. +See the `dedicated manual <./documentation/manuals/archiver.rst>`__. + +This document is also available +in `plain text <./documentation/manuals/archiver.txt>`__, +or as a `man page <./documentation/manuals/archiver.1.man>`__. @@ -272,7 +280,7 @@ Installation ============ -See `dedicated installation document <./documentation/installation.rst>`__. +See the `dedicated installation document <./documentation/installation.rst>`__. @@ -402,7 +410,7 @@ Benchmarks ========== -See `dedicated benchmarks document <./documentation/benchmarks.rst>`__. +See the `dedicated benchmarks document <./documentation/benchmarks.rst>`__. @@ -536,5 +544,5 @@ References ========== -See `dedicated references document <./documentation/references.rst>`__. +See the `dedicated references document <./documentation/references.rst>`__. diff --git a/scripts/workspace.z-run b/scripts/workspace.z-run index 4cb5c5f..9b207a9 100644 --- a/scripts/workspace.z-run +++ b/scripts/workspace.z-run @@ -172,3 +172,46 @@ exec -- env -i -- ./.python/bin/restview --no-browser --listen 127.33.237.174:8080 --allowed-hosts '*' -- ./readme.rst !! + +<< documentation / manuals / render + test "${#}" -eq 0 + + _date="$( exec -- date -- '+%Y-%m-%d' )" + + for _manual in archiver server ; do + + printf -- '[ii] rendering `%s`...\n' "${_manual}" >&2 + + rst2man --strict \ + < "./documentation/manuals/${_manual}.rst" \ + | sed -r \ + -e 's#^\.TH .*#.TH "KAWIPIKO\-'"${_manual^^}"'" "1" "'"${_date}"'" "volution.ro" "kawipiko"#' \ + >| "./documentation/manuals/${_manual}.1.man.tmp" \ + # + + mv -T -- \ + "./documentation/manuals/${_manual}.1.man.tmp" \ + "./documentation/manuals/${_manual}.1.man" \ + # + + mandoc \ + -c \ + -man \ + -T utf8 \ + -I os=POSIX \ + -O indent=2 \ + -O width=78 \ + -W error \ + < "./documentation/manuals/${_manual}.1.man" \ + | col -b -x \ + >| "./documentation/manuals/${_manual}.txt.tmp" \ + # + + mv -T -- \ + "./documentation/manuals/${_manual}.txt.tmp" \ + "./documentation/manuals/${_manual}.txt" \ + # + + done +!! +