/+
- Name: SisuDoc Spine, Doc Reform [a part of]
- Description: documents, structuring, processing, publishing, search
- static content generator
- Author: Ralph Amissah
[ralph.amissah@gmail.com]
- Copyright: (C) 2015 (continuously updated, current 2026) Ralph Amissah, All Rights Reserved.
- License: AGPL 3 or later:
Spine (SiSU), a framework for document structuring, publishing and
search
Copyright (C) Ralph Amissah
This program is free software: you can redistribute it and/or modify it
under the terms of the GNU AFERO General Public License as published by the
Free Software Foundation, either version 3 of the License, or (at your
option) any later version.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
You should have received a copy of the GNU General Public License along with
this program. If not, see [https://www.gnu.org/licenses/].
If you have Internet connection, the latest version of the AGPL should be
available at these locations:
[https://www.fsf.org/licensing/licenses/agpl.html]
[https://www.gnu.org/licenses/agpl.html]
- Spine (by Doc Reform, related to SiSU) uses standard:
- docReform markup syntax
- standard SiSU markup syntax with modified headers and minor modifications
- docReform object numbering
- standard SiSU object citation numbering & system
- Homepages:
[https://www.sisudoc.org]
[https://www.doc-reform.org]
- Git
[https://git.sisudoc.org/]
+/
/++
module read_zip_pod;
- extract pod zip archives to temp directory for processing
- validate zip entries for security (path traversal, size limits)
+/
module sisudoc.io_in.read_zip_pod;
@safe:
template spineExtractZipPod() {
import std.algorithm : canFind;
import std.array : array;
import std.conv : to;
import std.file;
import std.path;
import std.regex;
import std.stdio;
import std.string : indexOf;
/+ security limits for zip extraction +/
enum size_t MAX_ENTRY_SIZE = 50 * 1024 * 1024; /+ 50 MB per entry +/
enum size_t MAX_TOTAL_SIZE = 500 * 1024 * 1024; /+ 500 MB total +/
enum size_t MAX_ENTRY_COUNT = 500; /+ max entries in archive +/
enum size_t MAX_PATH_DEPTH = 10; /+ max path components +/
/+ allowed entry name pattern: alphanumeric, dots, dashes, underscores, forward slashes +/
static auto rgx_safe_entry_name = ctRegex!(`^[a-zA-Z0-9._/ -]+$`);
struct ZipPodResult {
string tmp_dir; /+ temp directory containing extracted pod +/
string pod_dir; /+ path to pod directory within tmp_dir +/
bool ok; /+ extraction succeeded +/
string error_msg; /+ error description if !ok +/
}
/+ ↓ validate a single zip entry name for security +/
string validateEntryName(string name) {
/+ reject empty names +/
if (name.length == 0)
return "empty entry name";
/+ reject absolute paths +/
if (name[0] == '/')
return "absolute path in zip entry: " ~ name;
/+ reject path traversal +/
if (name.canFind(".."))
return "path traversal in zip entry: " ~ name;
/+ reject null bytes +/
if (name.indexOf('\0') >= 0)
return "null byte in zip entry name: " ~ name;
/+ reject backslashes (windows path separator tricks) +/
if (name.canFind("\\"))
return "backslash in zip entry: " ~ name;
/+ check path depth +/
size_t depth = 0;
foreach (c; name) {
if (c == '/') depth++;
}
if (depth > MAX_PATH_DEPTH)
return "path too deep in zip entry: " ~ name;
/+ check allowed characters +/
if (!(name.matchFirst(rgx_safe_entry_name)))
return "disallowed characters in zip entry: " ~ name;
return ""; /+ empty string means valid +/
}
/+ ↓ extract zip pod to temp directory, returns ZipPodResult +/
@trusted ZipPodResult extractZipPod(string zip_path) {
import std.zip;
ZipPodResult result;
result.ok = false;
/+ ↓ verify zip file exists +/
if (!exists(zip_path) || !zip_path.isFile) {
result.error_msg = "zip file not found: " ~ zip_path;
return result;
}
/+ ↓ derive pod name from zip filename +/
string zip_basename = zip_path.baseName.stripExtension;
/+ ↓ read and parse zip archive +/
ZipArchive zip;
try {
zip = new ZipArchive(read(zip_path));
} catch (ZipException ex) {
result.error_msg = "failed to read zip archive: " ~ zip_path ~ " - " ~ ex.msg;
return result;
} catch (Exception ex) {
result.error_msg = "error reading zip file: " ~ zip_path ~ " - " ~ ex.msg;
return result;
}
/+ ↓ validate entry count +/
if (zip.directory.length > MAX_ENTRY_COUNT) {
result.error_msg = "zip archive has too many entries ("
~ zip.directory.length.to!string ~ " > " ~ MAX_ENTRY_COUNT.to!string ~ "): " ~ zip_path;
return result;
}
/+ ↓ validate all entries before extracting any +/
size_t total_size = 0;
foreach (name, member; zip.directory) {
/+ validate entry name +/
string name_err = validateEntryName(name);
if (name_err.length > 0) {
result.error_msg = name_err;
return result;
}
/+ check per-entry size +/
if (member.expandedSize > MAX_ENTRY_SIZE) {
result.error_msg = "zip entry too large ("
~ member.expandedSize.to!string ~ " bytes): " ~ name;
return result;
}
/+ check total size +/
total_size += member.expandedSize;
if (total_size > MAX_TOTAL_SIZE) {
result.error_msg = "zip archive total size exceeds limit ("
~ MAX_TOTAL_SIZE.to!string ~ " bytes): " ~ zip_path;
return result;
}
}
/+ ↓ create temp directory +/
string tmp_base = tempDir.buildPath("spine-zip-pod");
try {
if (!exists(tmp_base))
mkdirRecurse(tmp_base);
} catch (FileException ex) {
result.error_msg = "failed to create temp base directory: " ~ ex.msg;
return result;
}
/+ pod directory inside temp: tmp_base/pod_name/ +/
string pod_dir = tmp_base.buildPath(zip_basename);
try {
if (exists(pod_dir))
rmdirRecurse(pod_dir);
mkdirRecurse(pod_dir);
} catch (FileException ex) {
result.error_msg = "failed to create temp pod directory: " ~ ex.msg;
return result;
}
/+ ↓ extract entries +/
/+ zip internal structure uses paths like:
pod.manifest, conf/dr_document_make,
pod/media/text/en/filename.sst, image/filename.png
but the extracted pod directory needs to look like a normal pod:
pod.manifest, conf/dr_document_make,
media/text/en/filename.sst, image/filename.png
The "pod/" prefix in zip entries for text files maps to the pod root.
+/
/+ ↓ pre-compute canonical pod path for containment checks +/
auto canonical_pod = (pod_dir.asNormalizedPath).array.to!string ~ "/";
foreach (name, member; zip.directory) {
/+ skip directory entries +/
if (name.length > 0 && name[$-1] == '/')
continue;
/+ ↓ map zip internal path to filesystem path +/
/+ entries with "pod/" prefix: strip it so media/text/en/file.sst ends up at pod_dir/media/text/en/file.sst +/
string entry_path = name;
if (entry_path.length > 4 && entry_path[0..4] == "pod/") {
entry_path = entry_path[4..$];
}
string out_path = pod_dir.buildPath(entry_path);
/+ ↓ verify resolved path is within pod_dir (defense in depth) +/
auto canonical_out = (out_path.asNormalizedPath).array.to!string;
if (canonical_out.length < canonical_pod.length
|| canonical_out[0..canonical_pod.length] != canonical_pod)
{
result.error_msg = "zip entry escapes extraction directory: " ~ name;
try { rmdirRecurse(pod_dir); } catch (FileException) {}
return result;
}
/+ ↓ create parent directories +/
string parent = out_path.dirName;
try {
if (!exists(parent))
mkdirRecurse(parent);
} catch (FileException ex) {
result.error_msg = "failed to create directory for: " ~ name ~ " - " ~ ex.msg;
try { rmdirRecurse(pod_dir); } catch (FileException) {}
return result;
}
/+ ↓ decompress and write file +/
try {
auto data = zip.expand(member);
std.file.write(out_path, data);
} catch (Exception ex) {
result.error_msg = "failed to extract: " ~ name ~ " - " ~ ex.msg;
try { rmdirRecurse(pod_dir); } catch (FileException) {}
return result;
}
}
/+ ↓ verify no symlinks were created (defense in depth) +/
string symlink_err = checkForSymlinks(pod_dir);
if (symlink_err.length > 0) {
result.error_msg = symlink_err;
try { rmdirRecurse(pod_dir); } catch (FileException) {}
return result;
}
/+ ↓ verify pod.manifest exists in extracted content +/
if (!exists(pod_dir.buildPath("pod.manifest"))) {
result.error_msg = "zip archive does not contain pod.manifest: " ~ zip_path;
try { rmdirRecurse(pod_dir); } catch (FileException) {}
return result;
}
result.tmp_dir = tmp_base;
result.pod_dir = pod_dir;
result.ok = true;
return result;
}
/+ ↓ recursively check for symlinks in extracted directory +/
@trusted string checkForSymlinks(string dir_path) {
try {
foreach (entry; dirEntries(dir_path, SpanMode.depth)) {
if (entry.isSymlink) {
return "symlink found in zip extraction: " ~ entry.name;
}
}
} catch (FileException ex) {
return "error checking for symlinks: " ~ ex.msg;
}
return "";
}
/+ ↓ download a zip pod from a URL to a temp file +/
enum size_t MAX_DOWNLOAD_SIZE = 200 * 1024 * 1024; /+ 200 MB download limit +/
enum int DOWNLOAD_TIMEOUT = 120; /+ seconds +/
static auto rgx_url_zip = ctRegex!(`^https?://[a-zA-Z0-9._:/-]+[.]zip$`);
struct DownloadResult {
string local_path; /+ path to downloaded temp file +/
bool ok;
string error_msg;
}
bool isUrl(string arg) {
return arg.length > 8
&& (arg[0..8] == "https://" || arg[0..7] == "http://");
}
@trusted DownloadResult downloadZipUrl(string url) {
import std.process : execute, environment;
DownloadResult result;
result.ok = false;
/+ ↓ validate URL scheme +/
if (url.length < 8 || (url[0..8] != "https://" && url[0..7] != "http://")) {
result.error_msg = "only http/https URLs are supported: " ~ url;
return result;
}
if (url[0..7] == "http://" && url[0..8] != "https://") {
stderr.writeln("WARNING: downloading over insecure http: ", url);
}
/+ ↓ validate URL format +/
if (!(url.matchFirst(rgx_url_zip))) {
result.error_msg = "URL does not match expected zip URL pattern: " ~ url;
return result;
}
/+ ↓ reject URLs that could target internal services +/
{
import std.uni : toLower;
string url_lower = url.toLower;
/+ strip scheme to get host portion +/
string after_scheme = (url_lower[0..8] == "https://")
? url_lower[8..$]
: url_lower[7..$];
/+ extract host (up to first / or :) +/
string host;
foreach (i, c; after_scheme) {
if (c == '/' || c == ':') {
host = after_scheme[0..i];
break;
}
}
if (host.length == 0) host = after_scheme;
if (host == "localhost"
|| host == "127.0.0.1"
|| host == "::1"
|| host == "[::1]"
|| host == "0.0.0.0"
|| host.canFind("169.254.")
|| host.canFind("10.")
|| host.canFind("192.168.")
) {
result.error_msg = "URL targets a local/private address: " ~ url;
return result;
}
}
/+ ↓ derive filename from URL +/
string url_basename = url.baseName;
if (url_basename.length == 0 || url_basename.indexOf('.') < 0) {
result.error_msg = "cannot determine filename from URL: " ~ url;
return result;
}
/+ ↓ create temp directory for download +/
string tmp_base = tempDir.buildPath("spine-zip-pod");
try {
if (!exists(tmp_base))
mkdirRecurse(tmp_base);
} catch (FileException ex) {
result.error_msg = "failed to create temp directory: " ~ ex.msg;
return result;
}
string tmp_file = tmp_base.buildPath(url_basename);
/+ ↓ download using curl +/
auto curl_result = execute([
"curl",
"--silent", "--show-error",
"--fail", /+ fail on HTTP errors +/
"--location", /+ follow redirects +/
"--max-redirs", "5", /+ limit redirects +/
"--max-time", DOWNLOAD_TIMEOUT.to!string,
"--max-filesize", MAX_DOWNLOAD_SIZE.to!string,
"--proto", "=https,http", /+ restrict protocols +/
"--output", tmp_file,
url
]);
if (curl_result.status != 0) {
result.error_msg = "download failed: " ~ url;
if (curl_result.output.length > 0)
result.error_msg ~= " - " ~ curl_result.output;
/+ clean up partial download +/
try { if (exists(tmp_file)) remove(tmp_file); } catch (FileException) {}
return result;
}
if (!exists(tmp_file) || !tmp_file.isFile) {
result.error_msg = "download produced no file: " ~ url;
return result;
}
result.local_path = tmp_file;
result.ok = true;
return result;
}
/+ ↓ clean up a downloaded temp file +/
void cleanupDownload(ref DownloadResult dlr) {
if (dlr.local_path.length > 0 && exists(dlr.local_path)) {
try {
remove(dlr.local_path);
} catch (FileException ex) {
stderr.writeln("WARNING: failed to clean up downloaded file: ", dlr.local_path);
}
}
dlr.ok = false;
}
/+ ↓ clean up extracted temp directory +/
void cleanupZipPod(ref ZipPodResult zpr) {
if (zpr.pod_dir.length > 0 && exists(zpr.pod_dir)) {
try {
rmdirRecurse(zpr.pod_dir);
} catch (FileException ex) {
stderr.writeln("WARNING: failed to clean up temp zip extraction: ", zpr.pod_dir);
}
}
zpr.ok = false;
}
}