From 860b894a10f3526e6bd73d53850764c0ad95ab99 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Sat, 29 Jun 2024 13:54:28 -0400 Subject: document digests and reduction in use of tuples --- src/sisudoc/meta/metadoc_from_src.d | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) (limited to 'src/sisudoc/meta/metadoc_from_src.d') diff --git a/src/sisudoc/meta/metadoc_from_src.d b/src/sisudoc/meta/metadoc_from_src.d index 32954f1..4bd747d 100644 --- a/src/sisudoc/meta/metadoc_from_src.d +++ b/src/sisudoc/meta/metadoc_from_src.d @@ -57,6 +57,7 @@ template docAbstraction() { import std.algorithm, std.container, + std.digest.sha, std.file, std.json, std.path; @@ -970,8 +971,7 @@ template docAbstraction() { } obj = _links(obj); } - if (the_document_toc_section.length > 1) { - // scroll + if (the_document_toc_section.length > 1) { // writeln("toc"); // scroll dom_structure_markedup_tags_status_buffer = dom_structure_markedup_tags_status.dup; dom_structure_collapsed_tags_status_buffer = dom_structure_collapsed_tags_status.dup; foreach (ref obj; the_document_toc_section) { @@ -997,7 +997,7 @@ template docAbstraction() { // images string[] _images; // multiple 1~ levels, loop through document body - if (the_document_body_section.length > 1) { + if (the_document_body_section.length > 1) { // writeln("body"); foreach (ref obj; the_document_body_section) { if (!(obj.metainfo.identifier.empty)) { if (!(((obj.metainfo.identifier) in tag_assoc) @@ -1033,12 +1033,26 @@ template docAbstraction() { _images ~= extract_images(obj.text); obj = _image_dimensions(obj, manifested); } + obj.metainfo.sha256 = obj.obj_digest; obj = _links(obj); } } - auto image_list = (_images.sort()).uniq; + auto image_list = (_images.sort()).uniq; // also get digest on each image here? // workon + if (_images.length > 0) { + foreach (img; image_list) { + try { // also get sha digest on image file + // read_image + auto data = (cast(byte[]) (manifested.src.image_dir_path ~ "/" ~ img).read); + // calculate, digest, hash + writefln("%s\n%-(%02x%)::%s ⋅ %s", img, data.sha256Of, data.length, img); + writefln("%-(%02x%) ⋅ %s ⋅ %s", data.sha256Of, img, data.length); + } catch (Exception ex) { + writeln("WARNING, image not found: ", img, "\n ", manifested.src.image_dir_path ~ "/" ~ img); + } + } + } // endnotes optional only one 1~ level - if (the_document_endnotes_section.length > 1) { + if (the_document_endnotes_section.length > 1) { // writeln("endnotes"); dom_structure_markedup_tags_status_buffer = dom_structure_markedup_tags_status.dup; dom_structure_collapsed_tags_status_buffer = dom_structure_collapsed_tags_status.dup; dom_structure_markedup_tags_status = dom_structure_markedup_tags_status_buffer.dup; @@ -1073,7 +1087,7 @@ template docAbstraction() { } } // glossary optional only one 1~ level - if (the_document_glossary_section.length > 1) { + if (the_document_glossary_section.length > 1) { // writeln("glossary"); foreach (ref obj; the_document_glossary_section) { if (obj.metainfo.is_a == "heading") { debug(dom) { writeln(obj.text); } @@ -1104,11 +1118,12 @@ template docAbstraction() { obj.metainfo.ocn = obj_cite_digits.object_number; obj.metainfo.identifier = obj_cite_digits.identifier; } + obj.metainfo.sha256 = obj.obj_digest; obj = _links(obj); } } // bibliography optional only one 1~ level - if (the_document_bibliography_section.length > 1) { + if (the_document_bibliography_section.length > 1) { // writeln("bibliography"); foreach (ref obj; the_document_bibliography_section) { if (obj.metainfo.is_a == "heading") { debug(dom) { writeln(obj.text); } @@ -1139,6 +1154,7 @@ template docAbstraction() { obj.metainfo.ocn = obj_cite_digits.object_number; obj.metainfo.identifier = obj_cite_digits.identifier; } + obj.metainfo.sha256 = obj.obj_digest; obj = _links(obj); } } @@ -1146,7 +1162,7 @@ template docAbstraction() { int ocn_ = obj_cite_digits.object_number; int ocn_bkidx_ = 0; int ocn_bidx_; - if (the_document_bookindex_section.length > 1) { // scroll + if (the_document_bookindex_section.length > 1) { // writeln("book index"); // scroll dom_structure_markedup_tags_status_buffer = dom_structure_markedup_tags_status.dup; dom_structure_collapsed_tags_status_buffer = dom_structure_collapsed_tags_status.dup; foreach (ref obj; the_document_bookindex_section) { @@ -1183,13 +1199,14 @@ template docAbstraction() { obj.metainfo.o_n_book_index = obj_cite_digits.bkidx; obj.metainfo.object_number_type = OCNtype.bkidx; } + obj.metainfo.sha256 = obj.obj_digest; obj = _links(obj); } // TODO assert failure, reinstate // assert(obj_cite_digit_bkidx == ocn_bidx_ obj_cite_digit_bkidx ~ " == ocn_" ~ ocn_ ~ "?"); } // blurb optional only one 1~ level - if (the_document_blurb_section.length > 1) { + if (the_document_blurb_section.length > 1) { // writeln("blurb"); foreach (ref obj; the_document_blurb_section) { if (obj.metainfo.is_a == "heading") { debug(dom) { writeln(obj.text); } @@ -1220,6 +1237,7 @@ template docAbstraction() { obj.metainfo.object_number_off = obj_cite_digits.off; obj.metainfo.object_number_type = OCNtype.non; } + obj.metainfo.sha256 = obj.obj_digest; obj = _links(obj); } } -- cgit v1.2.3