/+ - Name: SisuDoc Spine, Doc Reform [a part of] - Description: documents, structuring, processing, publishing, search - static content generator - Author: Ralph Amissah [ralph.amissah@gmail.com] - Copyright: (C) 2015 (continuously updated, current 2026) Ralph Amissah, All Rights Reserved. - License: AGPL 3 or later: Spine (SiSU), a framework for document structuring, publishing and search Copyright (C) Ralph Amissah This program is free software: you can redistribute it and/or modify it under the terms of the GNU AFERO General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see [https://www.gnu.org/licenses/]. If you have Internet connection, the latest version of the AGPL should be available at these locations: [https://www.fsf.org/licensing/licenses/agpl.html] [https://www.gnu.org/licenses/agpl.html] - Spine (by Doc Reform, related to SiSU) uses standard: - docReform markup syntax - standard SiSU markup syntax with modified headers and minor modifications - docReform object numbering - standard SiSU object citation numbering & system - Homepages: [https://www.sisudoc.org] [https://www.doc-reform.org] - Git [https://git.sisudoc.org/] +/ /++ module read_zip_pod;
- extract pod zip archives to temp directory for processing
- validate zip entries for security (path traversal, size limits) +/ module sisudoc.io_in.read_zip_pod; @safe: template spineExtractZipPod() { import std.algorithm : canFind; import std.array : array; import std.conv : to; import std.file; import std.path; import std.regex; import std.stdio; import std.string : indexOf; /+ security limits for zip extraction +/ enum size_t MAX_ENTRY_SIZE = 50 * 1024 * 1024; /+ 50 MB per entry +/ enum size_t MAX_TOTAL_SIZE = 500 * 1024 * 1024; /+ 500 MB total +/ enum size_t MAX_ENTRY_COUNT = 500; /+ max entries in archive +/ enum size_t MAX_PATH_DEPTH = 10; /+ max path components +/ /+ allowed entry name pattern: alphanumeric, dots, dashes, underscores, forward slashes +/ static auto rgx_safe_entry_name = ctRegex!(`^[a-zA-Z0-9._/ -]+$`); struct ZipPodResult { string tmp_dir; /+ temp directory containing extracted pod +/ string pod_dir; /+ path to pod directory within tmp_dir +/ bool ok; /+ extraction succeeded +/ string error_msg; /+ error description if !ok +/ } /+ ↓ validate a single zip entry name for security +/ string validateEntryName(string name) { /+ reject empty names +/ if (name.length == 0) return "empty entry name"; /+ reject absolute paths +/ if (name[0] == '/') return "absolute path in zip entry: " ~ name; /+ reject path traversal +/ if (name.canFind("..")) return "path traversal in zip entry: " ~ name; /+ reject null bytes +/ if (name.indexOf('\0') >= 0) return "null byte in zip entry name: " ~ name; /+ reject backslashes (windows path separator tricks) +/ if (name.canFind("\\")) return "backslash in zip entry: " ~ name; /+ check path depth +/ size_t depth = 0; foreach (c; name) { if (c == '/') depth++; } if (depth > MAX_PATH_DEPTH) return "path too deep in zip entry: " ~ name; /+ check allowed characters +/ if (!(name.matchFirst(rgx_safe_entry_name))) return "disallowed characters in zip entry: " ~ name; return ""; /+ empty string means valid +/ } /+ ↓ extract zip pod to temp directory, returns ZipPodResult +/ @trusted ZipPodResult extractZipPod(string zip_path) { import std.zip; ZipPodResult result; result.ok = false; /+ ↓ verify zip file exists +/ if (!exists(zip_path) || !zip_path.isFile) { result.error_msg = "zip file not found: " ~ zip_path; return result; } /+ ↓ derive pod name from zip filename +/ string zip_basename = zip_path.baseName.stripExtension; /+ ↓ read and parse zip archive +/ ZipArchive zip; try { zip = new ZipArchive(read(zip_path)); } catch (ZipException ex) { result.error_msg = "failed to read zip archive: " ~ zip_path ~ " - " ~ ex.msg; return result; } catch (Exception ex) { result.error_msg = "error reading zip file: " ~ zip_path ~ " - " ~ ex.msg; return result; } /+ ↓ validate entry count +/ if (zip.directory.length > MAX_ENTRY_COUNT) { result.error_msg = "zip archive has too many entries (" ~ zip.directory.length.to!string ~ " > " ~ MAX_ENTRY_COUNT.to!string ~ "): " ~ zip_path; return result; } /+ ↓ validate all entries before extracting any +/ size_t total_size = 0; foreach (name, member; zip.directory) { /+ validate entry name +/ string name_err = validateEntryName(name); if (name_err.length > 0) { result.error_msg = name_err; return result; } /+ check per-entry size +/ if (member.expandedSize > MAX_ENTRY_SIZE) { result.error_msg = "zip entry too large (" ~ member.expandedSize.to!string ~ " bytes): " ~ name; return result; } /+ check total size +/ total_size += member.expandedSize; if (total_size > MAX_TOTAL_SIZE) { result.error_msg = "zip archive total size exceeds limit (" ~ MAX_TOTAL_SIZE.to!string ~ " bytes): " ~ zip_path; return result; } } /+ ↓ create temp directory +/ string tmp_base = tempDir.buildPath("spine-zip-pod"); try { if (!exists(tmp_base)) mkdirRecurse(tmp_base); } catch (FileException ex) { result.error_msg = "failed to create temp base directory: " ~ ex.msg; return result; } /+ pod directory inside temp: tmp_base/pod_name/ +/ string pod_dir = tmp_base.buildPath(zip_basename); try { if (exists(pod_dir)) rmdirRecurse(pod_dir); mkdirRecurse(pod_dir); } catch (FileException ex) { result.error_msg = "failed to create temp pod directory: " ~ ex.msg; return result; } /+ ↓ extract entries +/ /+ zip internal structure uses paths like: pod.manifest, conf/dr_document_make, pod/media/text/en/filename.sst, image/filename.png but the extracted pod directory needs to look like a normal pod: pod.manifest, conf/dr_document_make, media/text/en/filename.sst, image/filename.png The "pod/" prefix in zip entries for text files maps to the pod root. +/ /+ ↓ pre-compute canonical pod path for containment checks +/ auto canonical_pod = (pod_dir.asNormalizedPath).array.to!string ~ "/"; foreach (name, member; zip.directory) { /+ skip directory entries +/ if (name.length > 0 && name[$-1] == '/') continue; /+ ↓ map zip internal path to filesystem path +/ /+ entries with "pod/" prefix: strip it so media/text/en/file.sst ends up at pod_dir/media/text/en/file.sst +/ string entry_path = name; if (entry_path.length > 4 && entry_path[0..4] == "pod/") { entry_path = entry_path[4..$]; } string out_path = pod_dir.buildPath(entry_path); /+ ↓ verify resolved path is within pod_dir (defense in depth) +/ auto canonical_out = (out_path.asNormalizedPath).array.to!string; if (canonical_out.length < canonical_pod.length || canonical_out[0..canonical_pod.length] != canonical_pod) { result.error_msg = "zip entry escapes extraction directory: " ~ name; try { rmdirRecurse(pod_dir); } catch (FileException) {} return result; } /+ ↓ create parent directories +/ string parent = out_path.dirName; try { if (!exists(parent)) mkdirRecurse(parent); } catch (FileException ex) { result.error_msg = "failed to create directory for: " ~ name ~ " - " ~ ex.msg; try { rmdirRecurse(pod_dir); } catch (FileException) {} return result; } /+ ↓ decompress and write file +/ try { auto data = zip.expand(member); std.file.write(out_path, data); } catch (Exception ex) { result.error_msg = "failed to extract: " ~ name ~ " - " ~ ex.msg; try { rmdirRecurse(pod_dir); } catch (FileException) {} return result; } } /+ ↓ verify no symlinks were created (defense in depth) +/ string symlink_err = checkForSymlinks(pod_dir); if (symlink_err.length > 0) { result.error_msg = symlink_err; try { rmdirRecurse(pod_dir); } catch (FileException) {} return result; } /+ ↓ verify pod.manifest exists in extracted content +/ if (!exists(pod_dir.buildPath("pod.manifest"))) { result.error_msg = "zip archive does not contain pod.manifest: " ~ zip_path; try { rmdirRecurse(pod_dir); } catch (FileException) {} return result; } result.tmp_dir = tmp_base; result.pod_dir = pod_dir; result.ok = true; return result; } /+ ↓ recursively check for symlinks in extracted directory +/ @trusted string checkForSymlinks(string dir_path) { try { foreach (entry; dirEntries(dir_path, SpanMode.depth)) { if (entry.isSymlink) { return "symlink found in zip extraction: " ~ entry.name; } } } catch (FileException ex) { return "error checking for symlinks: " ~ ex.msg; } return ""; } /+ ↓ download a zip pod from a URL to a temp file +/ enum size_t MAX_DOWNLOAD_SIZE = 200 * 1024 * 1024; /+ 200 MB download limit +/ enum int DOWNLOAD_TIMEOUT = 120; /+ seconds +/ static auto rgx_url_zip = ctRegex!(`^https?://[a-zA-Z0-9._:/-]+[.]zip$`); struct DownloadResult { string local_path; /+ path to downloaded temp file +/ bool ok; string error_msg; } bool isUrl(string arg) { return arg.length > 8 && (arg[0..8] == "https://" || arg[0..7] == "http://"); } @trusted DownloadResult downloadZipUrl(string url) { import std.process : execute, environment; DownloadResult result; result.ok = false; /+ ↓ validate URL scheme +/ if (url.length < 8 || (url[0..8] != "https://" && url[0..7] != "http://")) { result.error_msg = "only http/https URLs are supported: " ~ url; return result; } if (url[0..7] == "http://" && url[0..8] != "https://") { stderr.writeln("WARNING: downloading over insecure http: ", url); } /+ ↓ validate URL format +/ if (!(url.matchFirst(rgx_url_zip))) { result.error_msg = "URL does not match expected zip URL pattern: " ~ url; return result; } /+ ↓ reject URLs that could target internal services +/ { import std.uni : toLower; string url_lower = url.toLower; /+ strip scheme to get host portion +/ string after_scheme = (url_lower[0..8] == "https://") ? url_lower[8..$] : url_lower[7..$]; /+ extract host (up to first / or :) +/ string host; foreach (i, c; after_scheme) { if (c == '/' || c == ':') { host = after_scheme[0..i]; break; } } if (host.length == 0) host = after_scheme; if (host == "localhost" || host == "127.0.0.1" || host == "::1" || host == "[::1]" || host == "0.0.0.0" || host.canFind("169.254.") || host.canFind("10.") || host.canFind("192.168.") ) { result.error_msg = "URL targets a local/private address: " ~ url; return result; } } /+ ↓ derive filename from URL +/ string url_basename = url.baseName; if (url_basename.length == 0 || url_basename.indexOf('.') < 0) { result.error_msg = "cannot determine filename from URL: " ~ url; return result; } /+ ↓ create temp directory for download +/ string tmp_base = tempDir.buildPath("spine-zip-pod"); try { if (!exists(tmp_base)) mkdirRecurse(tmp_base); } catch (FileException ex) { result.error_msg = "failed to create temp directory: " ~ ex.msg; return result; } string tmp_file = tmp_base.buildPath(url_basename); /+ ↓ download using curl +/ auto curl_result = execute([ "curl", "--silent", "--show-error", "--fail", /+ fail on HTTP errors +/ "--location", /+ follow redirects +/ "--max-redirs", "5", /+ limit redirects +/ "--max-time", DOWNLOAD_TIMEOUT.to!string, "--max-filesize", MAX_DOWNLOAD_SIZE.to!string, "--proto", "=https,http", /+ restrict protocols +/ "--output", tmp_file, url ]); if (curl_result.status != 0) { result.error_msg = "download failed: " ~ url; if (curl_result.output.length > 0) result.error_msg ~= " - " ~ curl_result.output; /+ clean up partial download +/ try { if (exists(tmp_file)) remove(tmp_file); } catch (FileException) {} return result; } if (!exists(tmp_file) || !tmp_file.isFile) { result.error_msg = "download produced no file: " ~ url; return result; } result.local_path = tmp_file; result.ok = true; return result; } /+ ↓ clean up a downloaded temp file +/ void cleanupDownload(ref DownloadResult dlr) { if (dlr.local_path.length > 0 && exists(dlr.local_path)) { try { remove(dlr.local_path); } catch (FileException ex) { stderr.writeln("WARNING: failed to clean up downloaded file: ", dlr.local_path); } } dlr.ok = false; } /+ ↓ clean up extracted temp directory +/ void cleanupZipPod(ref ZipPodResult zpr) { if (zpr.pod_dir.length > 0 && exists(zpr.pod_dir)) { try { rmdirRecurse(zpr.pod_dir); } catch (FileException ex) { stderr.writeln("WARNING: failed to clean up temp zip extraction: ", zpr.pod_dir); } } zpr.ok = false; } }