From 1c9d09f8593b482aa7bcf66f9b9c41cf5b80923a Mon Sep 17 00:00:00 2001 From: Ciprian Dorin Craciun Date: Thu, 23 Dec 2021 20:35:36 +0200 Subject: [PATCH] [documentation] Embed manuals in executables, add `--man` flag to display. --- scripts/workspace.z-run | 5 + sources/cmd/archiver/archiver.go | 14 ++ sources/cmd/archiver/manual.txt | 177 ++++++++++++++++++++++++ sources/cmd/archiver/usage.txt | 21 +-- sources/cmd/server/manual.txt | 225 +++++++++++++++++++++++++++++++ sources/cmd/server/server.go | 14 ++ sources/cmd/server/usage.txt | 24 ++-- 7 files changed, 449 insertions(+), 31 deletions(-) create mode 100644 sources/cmd/archiver/manual.txt create mode 100644 sources/cmd/server/manual.txt diff --git a/scripts/workspace.z-run b/scripts/workspace.z-run index 9b207a9..f0348d0 100644 --- a/scripts/workspace.z-run +++ b/scripts/workspace.z-run @@ -212,6 +212,11 @@ "./documentation/manuals/${_manual}.txt" \ # + cp -T -- \ + "./documentation/manuals/${_manual}.txt" \ + "./sources/cmd/${_manual}/manual.txt" \ + # + done !! diff --git a/sources/cmd/archiver/archiver.go b/sources/cmd/archiver/archiver.go index 417a123..01acec8 100644 --- a/sources/cmd/archiver/archiver.go +++ b/sources/cmd/archiver/archiver.go @@ -1014,6 +1014,17 @@ func walkPath (_context *context, _pathResolved string, _pathInArchive string, _ func Main () () { + if len (os.Args) == 2 { + switch os.Args[1] { + case "--help", "-h" : + os.Stderr.WriteString (usageText) + return + case "--man" : + os.Stderr.WriteString (manualText) + return + } + } + runtime.GOMAXPROCS (2) debug.SetGCPercent (75) debug.SetMaxThreads (8) @@ -1257,6 +1268,9 @@ func gobUnmarshal (_data []byte, _object interface{}) (error) { //go:embed usage.txt var usageText string +//go:embed manual.txt +var manualText string + func init () { usageText = strings.ReplaceAll (usageText, "@{SCHEMA}", CurrentSchemaVersion) } diff --git a/sources/cmd/archiver/manual.txt b/sources/cmd/archiver/manual.txt new file mode 100644 index 0000000..f08344b --- /dev/null +++ b/sources/cmd/archiver/manual.txt @@ -0,0 +1,177 @@ +KAWIPIKO-ARCHIVER(1) kawipiko KAWIPIKO-ARCHIVER(1) + + + +NAME + kawipiko -- blazingly fast static HTTP server - kawipiko-archiver + + >> kawipiko-archiver --help + + --sources + + --archive + + --compress + --compress-level + --compress-cache + + --exclude-index + --exclude-strip + --exclude-cache + --include-etag + + --exclude-file-listing + --include-folder-listing + + --progress + --debug + + + ---- + + + + +FLAGS + --sources + The path to the source folder that is the root of the static website + content. + + --archive + The path to the target CDB file that contains the archived static + content. + + --compress, and --compress-level + Each individual file (and consequently of the corresponding HTTP + response body) is compressed with either gzip, zopfli or brotli; by + default (or alternatively with identity) no compression is used. + + Even if compression is explicitly requested, if the compression + ratio is bellow a certain threshold (depending on the uncompressed + size), the file is stored without any compression. (It's senseless + to force the client to spend time and decompress the response body + if that time is not recovered during network transmission.) + + The compression level can be chosen, the value depending on the + algorithm: + + • gzip -- -1 for algorithm default, -2 for Huffman only, 0 to 9 for + fast to slow; + + • zopfli -- -1 for algorithm default, 0 to 30 iterations for fast to + slow; + + • brotli -- -1 for algorithm default, 0 to 9 for fast to slow, -2 + for extreme; + + • (by "algorithm default", it is meant "what that algorithm + considers the recommended default compression level";) + + • kawipiko by default uses the maximum compression level for each + algorithm; (i.e. 9 for gzip, 30 for zopfli, and -2 for brotli;) + + --sources-cache , and --compress-cache + At the given path a single file is created (that is an BBolt + database), that will be used to cache the following information: + + • in case of --sources-cache, the fingerprint of each file contents + is stored, so that if the file was not changed, re-reading it + shouldn't be attempted unless it is absolutely necessary; also if + the file is small enough, its contents is stored in this database + (deduplicated by its fingerprint); + + • in case of --compress-cache the compression outcome of each file + contents is stored (deduplicated by its fingerprint), so that + compression is done only once over multiple runs; + + Each of these caches can be safely reused between multiple related + archives, especially when they have many files in common. Each of + these caches can be independently used (or shared). + + Using these caches allows one to very quickly rebuild an archive + when only a couple of files have been changed, without even touching + the file-system for the unchanged ones. + + --exclude-index + Disables using index.* files (where .* is one of .html, .htm, + .xhtml, .xht, .txt, .json, and .xml) to respond to a request whose + URL path ends in / (corresponding to the folder wherein index.* file + is located). (This can be used to implement "slash" blog style + URL's like /blog/whatever/ which maps to /blog/whatever/index.html.) + + --exclude-strip + Disables using a file with the suffix .html, .htm, .xhtml, .xht, and + .txt to respond to a request whose URL does not exactly match an + existing file. (This can be used to implement "suffix-less" blog + style URL's like /blog/whatever which maps to /blog/whatever.html.) + + --exclude-cache + Disables adding an Cache-Control: public, immutable, max-age=3600 + header that forces the browser (and other intermediary proxies) to + cache the response for an hour (the public and max-age=3600 + arguments), and furthermore not request it even on reloads (the + immutable argument). + + --include-etag + Enables adding an ETag response header that contains the SHA256 of + the response body. + + By not including the ETag header (i.e. the default), and because + identical headers are stored only one, if one has many files of the + same type (that in turn without ETag generates the same headers), + this can lead to significant reduction in stored headers blocks, + including reducing RAM usage. (At this moment it does not support + HTTP conditional requests, i.e. the If-None-Match, If-Modified-Since + and their counterparts; however this ETag header might be used in + conjuction with HEAD requests to see if the resource has changed.) + + --exclude-file-listing + Disables the creation of an internal list of files that can be used + in conjunction with the --index-all flag of the kawipiko-server. + + --include-folder-listing + Enables the creation of an internal list of folders. (Currently not + used by the kawipiko-server tool.) + + --progress + Enables periodic reporting of various metrics. + + --debug + Enables verbose logging. It will log various information about the + archived files (including compression statistics). + +IGNORED FILES + + • any file with the following prefixes: ., #; + + • any file with the following suffixes: ~, #, .log, .tmp, .temp, .lock; + + • any file that contains the following: #; + + • any file that exactly matches the following: Thumbs.db, .DS_Store; + + • (at the moment these rules are not configurable through flags;) + +WILDCARD FILES + By placing a file whose name matches _wildcard.* (i.e. with the prefix + _wildcard. and any other suffix), it will be used to respond to any + request whose URL fails to find a "better" match. + + These wildcard files respect the folder hierarchy, in that wildcard + files in (direct or transitive) subfolders override the wildcard file + in their parents (direct or transitive). + +SYMLINKS, HARDLINKS, LOOPS, AND DUPLICATED FILES + You freely use symlinks (including pointing outside of the content + root) and they will be crawled during archival respecting the "logical" + hierarchy they introduce. (Any loop that you introduce into the + hierarchy will be ignored and a warning will be issued.) + + You can safely symlink or hardlink the same file (or folder) in + multiple places (within the content hierarchy), and its data will be + stored only once. (The same applies to duplicated files that have + exactly the same data.) + + + +volution.ro 2021-12-23 KAWIPIKO-ARCHIVER(1) diff --git a/sources/cmd/archiver/usage.txt b/sources/cmd/archiver/usage.txt index ff6066e..31d1542 100644 --- a/sources/cmd/archiver/usage.txt +++ b/sources/cmd/archiver/usage.txt @@ -1,15 +1,4 @@ - ==== kawipiko -- blazingly fast static HTTP server ==== - - | Documentation, issues and sources: - | * https://github.com/volution/kawipiko - | Authors: - | * Ciprian Dorin Craciun - | ciprian@volution.ro - | ciprian.craciun@gmail.com - | https://volution.ro/ciprian - ----------------------------------------------------------- - kawipiko-archiver --sources @@ -30,11 +19,13 @@ --exclude-file-listing --include-folder-listing - --progress - --debug + --progress --debug + + --help (show this short help) + --man (show the full manual) ** supported archive version: @{SCHEMA} - ** for details see: - https://github.com/volution/kawipiko/blob/development/documentation/manual-archiver.rst + || kawipiko, + || Ciprian Dorin Craciun, diff --git a/sources/cmd/server/manual.txt b/sources/cmd/server/manual.txt new file mode 100644 index 0000000..2ecf120 --- /dev/null +++ b/sources/cmd/server/manual.txt @@ -0,0 +1,225 @@ +KAWIPIKO-SERVER(1) kawipiko KAWIPIKO-SERVER(1) + + + +NAME + kawipiko -- blazingly fast static HTTP server - kawipiko-server + + >> kawipiko-server --help + + --archive + --archive-inmem (memory-loaded archive file) + --archive-mmap (memory-mapped archive file) + --archive-preload (preload archive in OS cache) + + --bind : (HTTP, only HTTP/1.1, FastHTTP) + --bind-2 : (HTTP, only HTTP/1.1, Go net/http) + --bind-tls : (HTTPS, only HTTP/1.1, FastHTTP) + --bind-tls-2 : (HTTPS, with HTTP/2, Go net/http) + --bind-quic : (HTTPS, with HTTP/3) + + --http1-disable + --http2-disable + --http3-alt-svc : + + --tls-bundle (TLS certificate bundle) + --tls-public (TLS certificate public) + --tls-private (TLS certificate private) + --tls-self-rsa (use self-signed RSA) + --tls-self-ed25519 (use self-signed Ed25519) + + --processes (of slave processes) + --threads (of threads per process) + --index-all + --index-paths + --index-data-meta + --index-data-content + + --security-headers-tls + --security-headers-disable + + --limit-memory + --timeout-disable + --profile-cpu ; --profile-mem + + --report ; --quiet ; --debug + --dummy ; --dummy-empty ; --dummy-delay + + + ---- + + + + +FLAGS + --bind , --bind-tls , --bind-2 , + --bind-tls-2 , and --bind-quic + The IP and port to listen for requests with: + + • (insecure) HTTP/1.1 for --bind, leveraging fasthttp library; + + • (secure) HTTP/1.1 over TLS for --bind-tls, leveraging fasthttp + library; + + • (insecure) HTTP/1.1 for --bind-2`, leveraging Go's net/http + library; (not as performant as the fasthttp powered endpoint;) + + • (secure) H2 or HTTP/1.1 over TLS for --bind-tls-2, leveraging Go's + net/http; (not as performant as the fasthttp powered endpoint;) + + • (secure) H3 over QUIC for --bind-quic, leveraging + github.com/lucas-clemente/quic-go library; (given that H3 is + still a new protocol, this must be used with caution; also one + should use the --http3-alt-svc ;) + + • if one uses just --bind-tls (without --bind-tls-2, and without + --http2-disabled), then the TLS endpoint is split between fasthttp + for HTTP/1.1 and Go's net/http for H2; + + --tls-bundle , --tls-public , and --tls-private + (optional) + If TLS is enabled, these options allows one to specify the + certificate to use, either as a single file (a bundle) or separate + files (the actual public certificate and the private key). + + If one doesn't specify any of these options, an embedded self-signed + certificate will be used. In such case, one can choose between RSA + (the --tls-self-rsa flag) or Ed25519 (the --tls-self-ed25519 flag); + + --http1-disable, --http2-disable + Disables that particular protocol. (It can be used only with + --bind-tls-2, given that fasthttp only supports HTTP/1.) + + --processes and --threads + The number of processes and threads per each process to start. + (Given Go's concurrency model, the threads count is somewhat a soft + limit, hinting to the runtime the desired parallelism level.) + + It is highly recommended to use one process and as many threads as + there are cores. + + Depending on the use-case, one can use multiple processes each with + a single thread; this would reduce goroutine contention if it + causes problems. (However note that if using --archive-inmem, then + each process will allocate its own copy of the database in RAM; in + such cases it is highly recommended to use --archive-mmap.) + + --archive + The path of the CDB file that contains the archived static content. + (It can be created with the kawipiko-archiver tool.) + + --archive-inmem + Reads the CDB file in RAM, and thus all requests are served from RAM + without touching the file-system. (The memory impact is equal to + the size of the CDB archive. This can be used if enough RAM is + available to avoid swapping.) + + --archive-mmap + (recommended) The CDB file is memory mapped, thus reading its data + uses the kernel's file-system cache, as opposed to issuing read + syscalls. + + --archive-preload + Before starting to serve requests, read the CDB file so that its + data is buffered in the kernel's file-system cache. (This option + can be used with or without --archive-mmap.) + + --index-all, --index-paths, --index-data-meta, and + --index-data-content + In order to serve a request kawipiko does the following: + + • given the request's path, it is used to locate the corresponding + resource's metadata (i.e. response headers) and data (i.e. + response body) references; by using --index-paths a RAM-based + lookup table is created to eliminate a CDB read operation for this + purpose; (the memory impact is proportional to the size of all + resource paths combined; given that the number of resources is + acceptable, say up to a couple hundred thousand, one could safely + use this option;) + + • based on the resource's metadata reference, the actual metadata + (i.e. the response headers) is located; by using --index-data-meta + a RAM-based lookup table is created to eliminate a CDB read + operation for this purpose; (the memory impact is proportional to + the size of all resource metadata blocks combined; given that the + metadata blocks are deduplicated, one could safely use this + option; if one also uses --archive-mmap or --archive-inmem, then + the memory impact is only proportional to the number of resource + metadata blocks;) + + • based on the resource's data reference, the actual data (i.e. the + response body) is located; by using --index-data-content a + RAM-based lookup table is created to eliminate a CDB operation + operation for this purpose; (the memory impact is proportional to + the size of all resource data blocks combined; one can use this + option to obtain the best performance; if one also uses + --archive-mmap or --archive-inmem, then the memory impact is only + proportional to the number of resource data blocks;) + + • --index-all enables all the options above; + + • (depending on the use-case) it is recommended to use + --index-paths; if --exclude-etag was used during archival, one + can also use --index-data-meta; + + • it is recommended to use either --archive-mmap or + --archive-inmem, else (especially if data is indexed) the + resulting effect is that of loading everything in RAM; + + --security-headers-tls + Enables adding the following TLS related headers to the response: + + Strict-Transport-Security: max-age=31536000 + Content-Security-Policy: upgrade-insecure-requests + + These instruct the browser to always use HTTPS for the served + domain. (Useful even without HTTPS, when used behind a TLS + terminator, load-balancer or proxy that do support HTTPS.) + + --security-headers-disable + Disables adding a few security related headers: + + Referrer-Policy: strict-origin-when-cross-origin + X-Content-Type-Options: nosniff + X-XSS-Protection: 1; mode=block + X-Frame-Options: sameorigin + + --report + Enables periodic reporting of various metrics. Also enables + reporting a selection of metrics if certain thresholds are matched + (which most likely is a sign of high-load). + + --quiet + Disables most logging messages. + + --debug + Enables all logging messages. + + --dummy, --dummy-empty + It starts the server in a "dummy" mode, ignoring all archive related + arguments and always responding with hello world!\n (unless + --dummy-empty was used) and without additional headers except the + HTTP status line and Content-Length. + + This argument can be used to benchmark the raw performance of the + underlying fasthttp, Go's net/http, or QUIC performance; this is + the upper limit of the achievable performance given the underlying + technologies. (From my own benchmarks kawipiko's adds only about + ~15% overhead when actually serving the hello-world.cdb archive.) + + --delay + Enables delaying each response with a certain amount (for example + 1s, 1ms, etc.) + + It can be used to simulate the real-world network latencies, perhaps + to see how a site with many resources loads in various conditions. + (For example, see an experiment I made with an image made out of + 1425 tiles.) + + --profile-cpu and --profile-mem + Enables CPU and memory profiling using Go's profiling + infrastructure. + + + +volution.ro 2021-12-23 KAWIPIKO-SERVER(1) diff --git a/sources/cmd/server/server.go b/sources/cmd/server/server.go index 9c11d13..1928fea 100644 --- a/sources/cmd/server/server.go +++ b/sources/cmd/server/server.go @@ -623,6 +623,17 @@ func (_server *server) Printf (_format string, _arguments ... interface{}) () { func Main () () { + if len (os.Args) == 2 { + switch os.Args[1] { + case "--help", "-h" : + os.Stderr.WriteString (usageText) + return + case "--man" : + os.Stderr.WriteString (manualText) + return + } + } + log.SetPrefix (fmt.Sprintf ("[%8d] ", os.Getpid ())) Main_0 (main_0) @@ -2442,6 +2453,9 @@ var _reportUsageIoWrites = & StatMetric { //go:embed usage.txt var usageText string +//go:embed manual.txt +var manualText string + func init () { usageText = strings.ReplaceAll (usageText, "@{SCHEMA}", CurrentSchemaVersion) } diff --git a/sources/cmd/server/usage.txt b/sources/cmd/server/usage.txt index 8c3b523..48047fc 100644 --- a/sources/cmd/server/usage.txt +++ b/sources/cmd/server/usage.txt @@ -1,15 +1,4 @@ - ==== kawipiko -- blazingly fast static HTTP server ==== - - | Documentation, issues and sources: - | * https://github.com/volution/kawipiko - | Authors: - | * Ciprian Dorin Craciun - | ciprian@volution.ro - | ciprian.craciun@gmail.com - | https://volution.ro/ciprian - ----------------------------------------------------------- - kawipiko-server --archive @@ -46,13 +35,16 @@ --limit-memory --timeout-disable - --profile-cpu ; --profile-mem + --profile-cpu + --profile-mem - --report ; --quiet ; --debug - --dummy ; --dummy-empty ; --dummy-delay + --report --quiet --debug + + --help (show this short help) + --man (show the full manual) ** supported archive version: @{SCHEMA} - ** for details see: - https://github.com/volution/kawipiko/blob/development/documentation/manual-server.rst + || kawipiko, + || Ciprian Dorin Craciun,