aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/sisudoc
diff options
context:
space:
mode:
authorRalph Amissah <ralph.amissah@gmail.com>2024-06-29 13:54:28 -0400
committerRalph Amissah <ralph.amissah@gmail.com>2024-06-29 15:39:08 -0400
commit860b894a10f3526e6bd73d53850764c0ad95ab99 (patch)
treee4a997fcacd3b2dcbff876479ebd1ee718230f1e /src/sisudoc
parentnix minor (diff)
document digests and reduction in use of tuples
Diffstat (limited to 'src/sisudoc')
-rw-r--r--src/sisudoc/io_in/read_source_files.d54
-rw-r--r--src/sisudoc/io_out/latex.d1
-rw-r--r--src/sisudoc/io_out/odt.d2
-rw-r--r--src/sisudoc/meta/metadoc.d10
-rw-r--r--src/sisudoc/meta/metadoc_from_src.d36
-rw-r--r--src/sisudoc/meta/metadoc_from_src_functions.d25
-rw-r--r--src/sisudoc/meta/metadoc_object_setter.d1
7 files changed, 83 insertions, 46 deletions
diff --git a/src/sisudoc/io_in/read_source_files.d b/src/sisudoc/io_in/read_source_files.d
index 4ba0b4f..8d814fc 100644
--- a/src/sisudoc/io_in/read_source_files.d
+++ b/src/sisudoc/io_in/read_source_files.d
@@ -56,6 +56,7 @@ module sisudoc.io_in.read_source_files;
@safe:
template spineRawMarkupContent() {
import
+ std.digest.sha,
std.file,
std.path;
import
@@ -67,6 +68,14 @@ template spineRawMarkupContent() {
static auto rgx = RgxI();
mixin spineRgxFiles;
static auto rgx_files = RgxFiles();
+ struct ST_doc_parts {
+ char[] header_raw;
+ char[][] sourcefile_body_content;
+ string[] insert_file_list;
+ string[] images_list;
+ ubyte[32] header_raw_digest;
+ ubyte[32] src_txt_digest;
+ }
string[] _images=[];
string[] _extract_images(S)(S content_block) {
string[] images_;
@@ -86,7 +95,9 @@ template spineRawMarkupContent() {
char[], "header",
char[][], "src_txt",
string[], "insert_files",
- string[], "images"
+ string[], "images",
+ ubyte[32], "header_digest",
+ ubyte[32], "src_txt_digest"
);
auto spineRawMarkupContent(O,Fn)(O _opt_action, Fn fn_src) {
auto _0_header_1_body_content_2_insert_filelist_tuple
@@ -100,43 +111,50 @@ template spineRawMarkupContent() {
= raw.markupSourceReadIn(fn_src);
return source_txt_str;
}
- final auto sourceContentSplitIntoHeaderAndBody(O)(
+ final ST_doc_parts sourceContentSplitIntoHeaderAndBody(O)(
O _opt_action,
in string source_txt_str,
in string fn_src=""
) {
auto raw = MarkupRawUnit();
- string[] insert_file_list;
- string[] images_list;
+ string[] insert_file_list_get;
+ string[] images_list_get;
HeaderContentInsertsImages t
= raw.markupSourceHeaderContentRawLineTupleArray(source_txt_str);
char[] header_raw = t.header;
+ ubyte[32] header_raw_digest = t.header.sha256Of;
char[][] sourcefile_body_content = t.src_txt;
if (fn_src.match(rgx_files.src_fn_master)) { // filename with path needed if master file (.ssm) not otherwise
auto ins = Inserts();
ContentsInsertsImages tu
= ins.scan_master_src_for_insert_files_and_import_content(_opt_action, sourcefile_body_content, fn_src);
sourcefile_body_content = tu.contents;
- insert_file_list = tu.insert_files.dup;
- images_list = tu.images.dup;
+ insert_file_list_get = tu.insert_files.dup;
+ images_list_get = tu.images.dup;
} else if (_opt_action.source || _opt_action.pod) {
auto ins = Inserts();
ContentsInsertsImages tu
= ins.scan_master_src_for_insert_files_and_import_content(_opt_action, sourcefile_body_content, fn_src);
- images_list = tu.images.dup;
+ images_list_get = tu.images.dup;
}
+ ubyte[32] src_txt_digest = sourcefile_body_content.sha256Of;
string header_type = "";
- t = tuple(
- header_raw,
- sourcefile_body_content,
- insert_file_list,
- images_list
- );
- return t;
+ ST_doc_parts ret;
+ {
+ ret.header_raw = t.header;
+ ret.sourcefile_body_content = sourcefile_body_content;
+ ret.insert_file_list = insert_file_list_get;
+ ret.images_list = images_list_get;
+ ret.header_raw_digest = t.header.sha256Of;
+ ret.src_txt_digest = sourcefile_body_content.sha256Of;
+ }
+ return ret;
}
}
struct MarkupRawUnit {
- import std.file;
+ import
+ std.digest.sha,
+ std.file;
final private string readInMarkupSource(in char[] fn_src) {
enforce(
exists(fn_src) != 0,
@@ -195,11 +213,15 @@ template spineRawMarkupContent() {
char[] header = hc[0];
char[] source_txt = hc[1];
char[][] source_line_arr = markupSourceLineArray(source_txt);
+ ubyte[32] header_digest;
+ ubyte[32] src_txt_digest;
HeaderContentInsertsImages t = tuple(
header,
source_line_arr,
file_insert_list,
- images_list
+ images_list,
+ header_digest,
+ src_txt_digest
);
return t;
}
diff --git a/src/sisudoc/io_out/latex.d b/src/sisudoc/io_out/latex.d
index 771bc57..e1b5731 100644
--- a/src/sisudoc/io_out/latex.d
+++ b/src/sisudoc/io_out/latex.d
@@ -320,7 +320,6 @@ template paperLaTeX() {
}
template outputLaTeX() {
import
- std.digest.sha,
std.file,
std.outbuffer,
std.uri,
diff --git a/src/sisudoc/io_out/odt.d b/src/sisudoc/io_out/odt.d
index d6ac27d..0450509 100644
--- a/src/sisudoc/io_out/odt.d
+++ b/src/sisudoc/io_out/odt.d
@@ -55,7 +55,6 @@ template formatODT() {
sisudoc.io_out.rgx,
sisudoc.io_out.rgx_xhtml;
import
- std.digest.sha,
std.file,
std.outbuffer,
std.uri,
@@ -651,7 +650,6 @@ template outputODT() {
sisudoc.io_out.rgx,
sisudoc.io_out.rgx_xhtml;
import
- std.digest.sha,
std.file,
std.outbuffer,
std.uri,
diff --git a/src/sisudoc/meta/metadoc.d b/src/sisudoc/meta/metadoc.d
index a1899da..ed9a5b1 100644
--- a/src/sisudoc/meta/metadoc.d
+++ b/src/sisudoc/meta/metadoc.d
@@ -64,7 +64,6 @@ template spineAbstraction() {
sisudoc.io_out.hub;
mixin spineBiblio;
mixin outputHub;
- enum headBody { header, body_content, insert_file_list, image_list }
enum makeMeta { make, meta }
enum docAbst { doc_abstract_obj, doc_has }
@system auto spineAbstraction(E,P,O,Cfg,M,S)(
@@ -89,7 +88,6 @@ template spineAbstraction() {
}
auto _header_body_insertfilelist_imagelist
= spineRawMarkupContent!()(_opt_action, _manifest.src.path_and_fn);
- static assert(_header_body_insertfilelist_imagelist.length==4);
if ((_opt_action.debug_do)
|| (_opt_action.debug_do_stages)
) {
@@ -98,7 +96,7 @@ template spineAbstraction() {
debug(header_and_body) {
writeln(header);
writeln(_header_body_insertfilelist_imagelist.length);
- writeln(_header_body_insertfilelist_imagelist.length[headBody.body_content][0]);
+ // writeln(_header_body_insertfilelist_imagelist.length.body_content[0]);
}
/+ ↓ split header into make and meta +/
if ((_opt_action.debug_do)
@@ -109,7 +107,7 @@ template spineAbstraction() {
import sisudoc.meta.conf_make_meta_yaml;
_make_and_meta_struct =
docHeaderMakeAndMetaTupYamlExtractAndConvertToStruct!()(
- _header_body_insertfilelist_imagelist[headBody.header],
+ _header_body_insertfilelist_imagelist.header_raw,
_make_and_meta_struct,
_manifest,
_opt_action,
@@ -127,7 +125,7 @@ template spineAbstraction() {
writeln("step3 commence → (document abstraction (da); da keys; segnames; doc_matters) [", _manifest.src.filename, "]");
}
auto da = docAbstraction!()(
- _header_body_insertfilelist_imagelist[headBody.body_content],
+ _header_body_insertfilelist_imagelist.sourcefile_body_content,
_make_and_meta_struct,
_opt_action,
_manifest,
@@ -275,7 +273,7 @@ template spineAbstraction() {
auto srcs() {
struct SRC_ {
auto file_insert_list() {
- return _header_body_insertfilelist_imagelist[headBody.insert_file_list];
+ return _header_body_insertfilelist_imagelist.insert_file_list;
}
auto image_list() {
return _doc_has_struct.imagelist;
diff --git a/src/sisudoc/meta/metadoc_from_src.d b/src/sisudoc/meta/metadoc_from_src.d
index 32954f1..4bd747d 100644
--- a/src/sisudoc/meta/metadoc_from_src.d
+++ b/src/sisudoc/meta/metadoc_from_src.d
@@ -57,6 +57,7 @@ template docAbstraction() {
import
std.algorithm,
std.container,
+ std.digest.sha,
std.file,
std.json,
std.path;
@@ -970,8 +971,7 @@ template docAbstraction() {
}
obj = _links(obj);
}
- if (the_document_toc_section.length > 1) {
- // scroll
+ if (the_document_toc_section.length > 1) { // writeln("toc"); // scroll
dom_structure_markedup_tags_status_buffer = dom_structure_markedup_tags_status.dup;
dom_structure_collapsed_tags_status_buffer = dom_structure_collapsed_tags_status.dup;
foreach (ref obj; the_document_toc_section) {
@@ -997,7 +997,7 @@ template docAbstraction() {
// images
string[] _images;
// multiple 1~ levels, loop through document body
- if (the_document_body_section.length > 1) {
+ if (the_document_body_section.length > 1) { // writeln("body");
foreach (ref obj; the_document_body_section) {
if (!(obj.metainfo.identifier.empty)) {
if (!(((obj.metainfo.identifier) in tag_assoc)
@@ -1033,12 +1033,26 @@ template docAbstraction() {
_images ~= extract_images(obj.text);
obj = _image_dimensions(obj, manifested);
}
+ obj.metainfo.sha256 = obj.obj_digest;
obj = _links(obj);
}
}
- auto image_list = (_images.sort()).uniq;
+ auto image_list = (_images.sort()).uniq; // also get digest on each image here? // workon
+ if (_images.length > 0) {
+ foreach (img; image_list) {
+ try { // also get sha digest on image file
+ // read_image
+ auto data = (cast(byte[]) (manifested.src.image_dir_path ~ "/" ~ img).read);
+ // calculate, digest, hash
+ writefln("%s\n%-(%02x%)::%s ⋅ %s", img, data.sha256Of, data.length, img);
+ writefln("%-(%02x%) ⋅ %s ⋅ %s", data.sha256Of, img, data.length);
+ } catch (Exception ex) {
+ writeln("WARNING, image not found: ", img, "\n ", manifested.src.image_dir_path ~ "/" ~ img);
+ }
+ }
+ }
// endnotes optional only one 1~ level
- if (the_document_endnotes_section.length > 1) {
+ if (the_document_endnotes_section.length > 1) { // writeln("endnotes");
dom_structure_markedup_tags_status_buffer = dom_structure_markedup_tags_status.dup;
dom_structure_collapsed_tags_status_buffer = dom_structure_collapsed_tags_status.dup;
dom_structure_markedup_tags_status = dom_structure_markedup_tags_status_buffer.dup;
@@ -1073,7 +1087,7 @@ template docAbstraction() {
}
}
// glossary optional only one 1~ level
- if (the_document_glossary_section.length > 1) {
+ if (the_document_glossary_section.length > 1) { // writeln("glossary");
foreach (ref obj; the_document_glossary_section) {
if (obj.metainfo.is_a == "heading") {
debug(dom) { writeln(obj.text); }
@@ -1104,11 +1118,12 @@ template docAbstraction() {
obj.metainfo.ocn = obj_cite_digits.object_number;
obj.metainfo.identifier = obj_cite_digits.identifier;
}
+ obj.metainfo.sha256 = obj.obj_digest;
obj = _links(obj);
}
}
// bibliography optional only one 1~ level
- if (the_document_bibliography_section.length > 1) {
+ if (the_document_bibliography_section.length > 1) { // writeln("bibliography");
foreach (ref obj; the_document_bibliography_section) {
if (obj.metainfo.is_a == "heading") {
debug(dom) { writeln(obj.text); }
@@ -1139,6 +1154,7 @@ template docAbstraction() {
obj.metainfo.ocn = obj_cite_digits.object_number;
obj.metainfo.identifier = obj_cite_digits.identifier;
}
+ obj.metainfo.sha256 = obj.obj_digest;
obj = _links(obj);
}
}
@@ -1146,7 +1162,7 @@ template docAbstraction() {
int ocn_ = obj_cite_digits.object_number;
int ocn_bkidx_ = 0;
int ocn_bidx_;
- if (the_document_bookindex_section.length > 1) { // scroll
+ if (the_document_bookindex_section.length > 1) { // writeln("book index"); // scroll
dom_structure_markedup_tags_status_buffer = dom_structure_markedup_tags_status.dup;
dom_structure_collapsed_tags_status_buffer = dom_structure_collapsed_tags_status.dup;
foreach (ref obj; the_document_bookindex_section) {
@@ -1183,13 +1199,14 @@ template docAbstraction() {
obj.metainfo.o_n_book_index = obj_cite_digits.bkidx;
obj.metainfo.object_number_type = OCNtype.bkidx;
}
+ obj.metainfo.sha256 = obj.obj_digest;
obj = _links(obj);
}
// TODO assert failure, reinstate
// assert(obj_cite_digit_bkidx == ocn_bidx_ obj_cite_digit_bkidx ~ " == ocn_" ~ ocn_ ~ "?");
}
// blurb optional only one 1~ level
- if (the_document_blurb_section.length > 1) {
+ if (the_document_blurb_section.length > 1) { // writeln("blurb");
foreach (ref obj; the_document_blurb_section) {
if (obj.metainfo.is_a == "heading") {
debug(dom) { writeln(obj.text); }
@@ -1220,6 +1237,7 @@ template docAbstraction() {
obj.metainfo.object_number_off = obj_cite_digits.off;
obj.metainfo.object_number_type = OCNtype.non;
}
+ obj.metainfo.sha256 = obj.obj_digest;
obj = _links(obj);
}
}
diff --git a/src/sisudoc/meta/metadoc_from_src_functions.d b/src/sisudoc/meta/metadoc_from_src_functions.d
index 29e675c..b5956c0 100644
--- a/src/sisudoc/meta/metadoc_from_src_functions.d
+++ b/src/sisudoc/meta/metadoc_from_src_functions.d
@@ -54,18 +54,6 @@ module sisudoc.meta.metadoc_from_src_functions;
@safe:
template docAbstractionFunctions() {
// ↓ abstraction imports
- import
- std.algorithm,
- std.container,
- std.file,
- std.json,
- std.path;
- import
- sisudoc.meta,
- sisudoc.meta.defaults,
- sisudoc.meta.rgx,
- sisudoc.meta.metadoc_object_setter,
- sisudoc.meta.rgx;
// ↓ abstraction mixins
mixin ObjectSetter;
mixin InternalMarkup;
@@ -2998,6 +2986,19 @@ template docAbstractionFunctions() {
return obj;
}
// ↑ - object tags
+ // ↓ - object digest
+ pure ubyte[32] obj_digest()(
+ ObjGenericComposite obj,
+ ) {
+ obj.metainfo.sha256 = obj.text.sha256Of;
+ // if (obj.metainfo.is_a == "heading") {
+ // writeln(obj.metainfo.sha256.toHexString, " ", obj.metainfo.ocn, " ", obj.metainfo.is_a, " ", obj.metainfo.heading_lev_markup);
+ // } else {
+ // writeln(obj.metainfo.sha256.toHexString, " ", obj.metainfo.ocn, " ", obj.metainfo.is_a);
+ // }
+ return obj.metainfo.sha256;
+ }
+ // ↑ - object digest
// ↓ - table of contents
@system ObjGenericComposite[] backmatter_gather_table_of_contents(
ObjGenericComposite[] the_document_endnotes_section,
diff --git a/src/sisudoc/meta/metadoc_object_setter.d b/src/sisudoc/meta/metadoc_object_setter.d
index a2ceff6..8b2daf0 100644
--- a/src/sisudoc/meta/metadoc_object_setter.d
+++ b/src/sisudoc/meta/metadoc_object_setter.d
@@ -173,6 +173,7 @@ template ObjectSetter() {
int parent_lev_markup = 0;
int parent_ocn = 0;
int last_descendant_ocn = 0;
+ ubyte[32] sha256;
}
struct ObjGenericComposite {
string text = "";