diff options
Diffstat (limited to 'src/doc_reform/meta/metadoc_from_src.d')
-rw-r--r-- | src/doc_reform/meta/metadoc_from_src.d | 1509 |
1 files changed, 0 insertions, 1509 deletions
diff --git a/src/doc_reform/meta/metadoc_from_src.d b/src/doc_reform/meta/metadoc_from_src.d deleted file mode 100644 index cf4a7cc..0000000 --- a/src/doc_reform/meta/metadoc_from_src.d +++ /dev/null @@ -1,1509 +0,0 @@ -/+ -- Name: Spine, Doc Reform [a part of] - - Description: documents, structuring, processing, publishing, search - - static content generator - - - Author: Ralph Amissah - [ralph.amissah@gmail.com] - - - Copyright: (C) 2015 - 2024 Ralph Amissah, All Rights Reserved. - - - License: AGPL 3 or later: - - Spine (SiSU), a framework for document structuring, publishing and - search - - Copyright (C) Ralph Amissah - - This program is free software: you can redistribute it and/or modify it - under the terms of the GNU AFERO General Public License as published by the - Free Software Foundation, either version 3 of the License, or (at your - option) any later version. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program. If not, see [https://www.gnu.org/licenses/]. - - If you have Internet connection, the latest version of the AGPL should be - available at these locations: - [https://www.fsf.org/licensing/licenses/agpl.html] - [https://www.gnu.org/licenses/agpl.html] - - - Spine (by Doc Reform, related to SiSU) uses standard: - - docReform markup syntax - - standard SiSU markup syntax with modified headers and minor modifications - - docReform object numbering - - standard SiSU object citation numbering & system - - - Homepages: - [https://www.doc_reform.org] - [https://www.sisudoc.org] - - - Git - [https://git.sisudoc.org/projects/?p=software/spine.git;a=summary] - -+/ -// document abstraction: -// abstraction of sisu markup for downstream processing -// metadoc_from_src.d -module doc_reform.meta.metadoc_from_src; -@safe: -template docAbstraction() { - // ↓ abstraction imports - import - std.algorithm, - std.container, - std.file, - std.json, - std.path; - import - doc_reform.meta, - doc_reform.meta.defaults, - doc_reform.meta.rgx, - doc_reform.meta.metadoc_object_setter, - doc_reform.meta.rgx; - public import doc_reform.meta.metadoc_from_src_functions; - mixin docAbstractionFunctions; - @system auto docAbstraction(CMM,Opt,Mf) ( - char[][] markup_sourcefile_content, - CMM conf_make_meta, - Opt opt_action, - Mf manifested, - bool _new_doc - ) { - static auto rgx = RgxI(); - // ↓ abstraction init - scope(success) { - } - scope(failure) { - } - scope(exit) { - destroy(the_document_toc_section); - destroy(the_document_head_section); - destroy(the_document_body_section); - destroy(the_document_bibliography_section); - destroy(the_document_glossary_section); - destroy(the_document_blurb_section); - destroy(the_document_xml_dom_tail_section); - destroy(an_object); - destroy(processing); - destroy(biblio_arr_json); - previous_length = 0; - reset_note_numbers = true; - lev_anchor_tag = ""; - anchor_tag = ""; - } - mixin spineNode; - auto node_para_int_ = node_metadata_para_int; - auto node_para_str_ = node_metadata_para_str; - ObjGenericComposite comp_obj_; - line_occur = [ - "heading" : 0, - "para" : 0, - "glossary" : 0, - "blurb" : 0, - ]; - uint[string] dochas = [ - "inline_links" : 0, - "inline_notes" : 0, - "inline_notes_star" : 0, - "codeblock" : 0, - "table" : 0, - "block" : 0, - "group" : 0, - "poem" : 0, - "quote" : 0, - "images" : 0, - ]; - uint[string] pith = [ - "ocn" : 1, - "section" : 0, - "txt_is" : 0, - "block_is" : 0, - "block_state" : 0, - "block_delim" : 0, - "make_headings" : 0, - "dummy_heading_status" : 0, - "dummy_heading_multiple_objects" : 0, - "no_ocn_multiple_objects" : 0, - "verse_new" : 0, - ]; - string[string] object_number_poem = [ - "start" : "", - "end" : "" - ]; - string[] lv_ancestors_txt = [ "", "", "", "", "", "", "", "", ]; - int[string] lv = [ - "lv" : eN.bi.off, - "h0" : eN.bi.off, - "h1" : eN.bi.off, - "h2" : eN.bi.off, - "h3" : eN.bi.off, - "h4" : eN.bi.off, - "h5" : eN.bi.off, - "h6" : eN.bi.off, - "h7" : eN.bi.off, - "lev_int_collapsed" : 0, - ]; - int[string] collapsed_lev = [ - "h0" : eN.bi.off, - "h1" : eN.bi.off, - "h2" : eN.bi.off, - "h3" : eN.bi.off, - "h4" : eN.bi.off, - "h5" : eN.bi.off, - "h6" : eN.bi.off, - "h7" : eN.bi.off - ]; - string[string] heading_match_str = [ - "h_A": "^(none)", - "h_B": "^(none)", - "h_C": "^(none)", - "h_D": "^(none)", - "h_1": "^(none)", - "h_2": "^(none)", - "h_3": "^(none)", - "h_4": "^(none)" - ]; - Regex!char[string] heading_match_rgx = [ - "h_A": regex(r"^(none)"), - "h_B": regex(r"^(none)"), - "h_C": regex(r"^(none)"), - "h_D": regex(r"^(none)"), - "h_1": regex(r"^(none)"), - "h_2": regex(r"^(none)"), - "h_3": regex(r"^(none)"), - "h_4": regex(r"^(none)") - ]; - string _anchor_tag; - string toc_txt_; - an_object["glossary_nugget"] = ""; - an_object["blurb_nugget"] = ""; - comp_obj_ = set_object_heading("lev4", "frontmatter", "toc", "Table of Contents"); - comp_obj_.metainfo.identifier = ""; - comp_obj_.metainfo.dummy_heading = false; - comp_obj_.metainfo.object_number_off = true; - comp_obj_.metainfo.object_number_type = 0; - comp_obj_.tags.segment_anchor_tag_epub = "toc"; - comp_obj_.tags.anchor_tag_html = comp_obj_.tags.segment_anchor_tag_epub; - comp_obj_.tags.in_segment_html = comp_obj_.tags.anchor_tag_html; - comp_obj_.ptr.html_segnames = html_segnames_ptr; - comp_obj_.tags.anchor_tags = ["toc"]; - tag_assoc[comp_obj_.tags.anchor_tag_html]["seg_lv4"] = comp_obj_.tags.in_segment_html; - tag_assoc[comp_obj_.tags.segment_anchor_tag_epub]["seg_lv1to4"] = comp_obj_.tags.segment_anchor_tag_epub; - auto toc_head = comp_obj_; - html_segnames_ptr_cntr++; - the_document_toc_section = [toc_head]; - static auto mkup = InlineMarkup(); - static auto munge = ObjInlineMarkupMunge(); - auto note_section = NotesSection(); - auto bookindex_extract_hash = BookIndexNuggetHash(); - string[][string] lev4_subtoc; - string[][string] segnames = ["html": ["toc"], "epub": ["toc"]]; - int cnt1 = 1; int cnt2 = 1; int cnt3 = 1; - // abstraction init ↑ - debug (substitutions) { - writeln(__LINE__, ":", __FILE__, ": DEBUG substitutions:"); - if (!(conf_make_meta.make.headings.empty)) { - writeln(conf_make_meta.make.headings); - } - if (conf_make_meta.make.substitute) { - foreach(substitution_pair; conf_make_meta.make.substitute) { - writeln("regex to match: ", substitution_pair[Substitute.match]); - writeln("substitution to make: ", substitution_pair[Substitute.markup]); - } - } - if (conf_make_meta.make.bold) { - writeln("regex to match: ", conf_make_meta.make.bold[Substitute.match]); - writeln("substitution to make: ", conf_make_meta.make.bold[Substitute.markup]); - } - if (conf_make_meta.make.emphasis) { - writeln("regex to match: ", conf_make_meta.make.emphasis[Substitute.match]); - writeln("substitution to make: ", conf_make_meta.make.emphasis[Substitute.markup]); - } - if (conf_make_meta.make.italics) { - writeln("regex to match: ", conf_make_meta.make.italics[Substitute.match]); - writeln("substitution to make: ", conf_make_meta.make.italics[Substitute.markup]); - } - } - auto loopMarkupSrcByLine( - char[][] markup_sourcefile_content, - string[string] an_object, - uint[string] pith, - ) { - _loopMarkupSrcByLineStruct ret; - srcDocLoopLineByLine_: - foreach (line; markup_sourcefile_content) { - // ↓ markup document/text line by line - // "line" variable can be empty but should never be null - // scope - scope(exit) { } - scope(failure) { - stderr.writefln( - "\n%s\n%s\n\n%s:%s\nFAILED while processing the file: ❮❮ %s ❯❯ on line with text:\n%s\n", - __MODULE__, __FUNCTION__, - __FILE__, __LINE__, - manifested.src.filename, line, - ); - } - debug(source) { writeln(line); } - debug(srclines) { if (!line.empty) { writefln("* %s", line); } } - if (!line.empty) { pith = line._check_ocn_status_(pith); } - if ( pith["block_is"] == eN.blk_is.code - && pith["block_state"] == eN.blk_state.on - ) { - // block object: code - { - ST_txt_by_line_block_generic _get = line.txt_by_line_block_code(an_object, pith); - { - an_object = _get.this_object; - pith = _get.pith; - } - } - continue; - } else if (!matchFirst(line, rgx.skip_from_regular_parse)) { - // object other than "code block" object - // (includes regular text paragraph, headings & blocks other than code) - // heading, glossary, blurb, poem, group, block, quote, table - line = line.inline_markup_faces; // by text line (rather than by text object), linebreaks in para problematic - if (line.matchFirst(rgx.heading_biblio) - || (pith["section"] == eN.sect.bibliography - && ((!(line.matchFirst(rgx.heading_glossary))) - && (!(line.matchFirst(rgx.heading_blurb))) - && (!(line.matchFirst(rgx.heading))) - && (!(line.matchFirst(rgx.comment))))) - ) { - pith["section"] = eN.sect.bibliography; - if (opt_action.backmatter && opt_action.section_biblio) { - { - ST_txt_by_line_block_biblio _get = line.txt_by_line_block_biblio(pith, bib_entry, biblio_entry_str_json, biblio_arr_json); - { - pith = _get.pith; - bib_entry = _get.bib_entry; - biblio_entry_str_json = _get.biblio_entry_str_json; - biblio_arr_json = _get.biblio_arr_json; - } - } - debug(bibliobuild) { - writeln("- ", biblio_entry_str_json); - writeln("-> ", biblio_arr_json.length); - } - } - continue; - } else if (line.matchFirst(rgx.heading_glossary) - || (pith["section"] == eN.sect.glossary - && ((!(line.matchFirst(rgx.heading_biblio))) - && (!(line.matchFirst(rgx.heading_blurb))) - && (!(line.matchFirst(rgx.heading))) - && (!(line.matchFirst(rgx.comment))))) - ) { - // within section (block object): glossary - debug(glossary) { writeln(__LINE__); writeln(line); } - pith["section"] = eN.sect.glossary; - if (opt_action.backmatter && opt_action.section_glossary) { - ST_the_section add_to_glossary_sect = line.build_the_glossary_section(pith, tag_assoc); // double check, should not be necessary to pass pith - the_document_glossary_section ~= add_to_glossary_sect.comp_section_obj[0]; - if (add_to_glossary_sect.comp_section_obj.length > 1) { // heading - the_document_glossary_section ~= add_to_glossary_sect.comp_section_obj[1]; - } - pith = add_to_glossary_sect.pith; - tag_assoc = add_to_glossary_sect.tag_assoc; - } - continue; - } else if (line.matchFirst(rgx.heading_blurb) - || (pith["section"] == eN.sect.blurb - && ((!(line.matchFirst(rgx.heading_glossary))) - && (!(line.matchFirst(rgx.heading_biblio))) - && (!(line.matchFirst(rgx.heading))) - && (!(line.matchFirst(rgx.comment))))) - ) { - pith["section"] = eN.sect.blurb; - debug(blurb) { writeln(__LINE__); writeln(line); } - if ((opt_action.backmatter && opt_action.section_blurb) && !(line.empty)) { - ST_the_section add_to_blurb_sect = line.build_the_blurb_section(pith, tag_assoc, opt_action); // double check, should not be necessary to pass pith - the_document_blurb_section ~= add_to_blurb_sect.comp_section_obj[0]; - if (add_to_blurb_sect.comp_section_obj.length > 1) { // heading - the_document_blurb_section ~= add_to_blurb_sect.comp_section_obj[1]; - } - pith = add_to_blurb_sect.pith; - tag_assoc = add_to_blurb_sect.tag_assoc; - } - continue; - } else if (pith["block_state"] == eN.blk_state.on) { - if (pith["block_is"] == eN.blk_is.quote) { - line = line - ._doc_header_and_make_substitutions_(conf_make_meta) - ._doc_header_and_make_substitutions_fontface_(conf_make_meta); - { - auto _get = line.txt_by_line_block_quote(an_object, pith); - { - an_object = _get.this_object; - pith = _get.pith; - } - } - continue; - } else if (pith["block_is"] == eN.blk_is.group) { - line = line - ._doc_header_and_make_substitutions_(conf_make_meta) - ._doc_header_and_make_substitutions_fontface_(conf_make_meta) - .replaceAll(rgx.para_delimiter, mkup.br_line_spaced ~ "$1"); - { - auto _get = line.txt_by_line_block_group(an_object, pith); - { - an_object = _get.this_object; - pith = _get.pith; - } - } - continue; - } else if (pith["block_is"] == eN.blk_is.block) { - line = line - ._doc_header_and_make_substitutions_(conf_make_meta) - ._doc_header_and_make_substitutions_fontface_(conf_make_meta); - if (auto m = line.match(rgx.spaces_keep)) { - line = line - .replaceAll(rgx.spaces_keep, (m.captures[1]).translate([ ' ' : mkup.nbsp ])); - } - { - auto _get = line.txt_by_line_block_block(an_object, pith); - { - an_object = _get.this_object; - pith = _get.pith; - } - } - continue; - } else if (pith["block_is"] == eN.blk_is.poem) { - { - auto _get = line.txt_by_line_block_poem(an_object, pith, cntr, object_number_poem, conf_make_meta, tag_in_seg); - { - an_object = _get.this_object; - pith = _get.pith; - cntr = _get.cntr; - } - } - continue; - } else if (pith["block_is"] == eN.blk_is.table) { - { - auto _get = line.txt_by_line_block_table(an_object, pith, conf_make_meta); - { - an_object = _get.this_object; - pith = _get.pith; - conf_make_meta = _get.conf_make_meta; - } - } - continue; - } - } else { - // not within a block group - assert( - (pith["block_state"] == eN.blk_state.off) - || (pith["block_state"] == eN.blk_state.closing), - "block status: none or closed" - ); - if (line.matchFirst(rgx.block_open)) { - if (line.matchFirst(rgx.block_poem_open)) { - // poem to verse exceptions! - object_reset(an_object); - processing.remove("verse"); - object_number_poem["start"] = obj_cite_digits.object_number.to!string; - } - { - auto _get = line.txt_by_line_block_start(pith, dochas, object_number_poem); - { - pith = _get.pith; - dochas = _get.dochas; - object_number_poem = _get.object_number_poem; - } - } - continue; - } else if (!line.empty) { - // line not empty - non blocks (headings, paragraphs) & closed blocks - assert(!line.empty, "line tested, line not empty surely:\n \"" ~ line ~ "\""); - assert( - (pith["block_state"] == eN.blk_state.off) - || (pith["block_state"] == eN.blk_state.closing), - "code block status: none or closed" - ); - if (pith["block_state"] == eN.blk_state.closing) { - debug(check) { writeln(__LINE__); writeln(line); } - assert( - line.matchFirst(rgx.book_index_item) - || line.matchFirst(rgx.book_index_item_open) - || pith["section"] == eN.sect.book_index, - "\nblocks closed, unless followed by book index, non-matching line:\n \"" - ~ line ~ "\"" - ); - } - if (line.matchFirst(rgx.book_index_item) - || line.matchFirst(rgx.book_index_item_open) - || pith["section"] == eN.sect.book_index) { - { // book_index - auto _get = line.flow_book_index_(an_object, book_idx_tmp, pith, opt_action); - { - an_object = _get.this_object; - pith = _get.pith; - book_idx_tmp = _get.book_idx_tmp; - } - } - } else { - // not book_index - an_object_key = "body_nugget"; - if (auto m = line.matchFirst(rgx.comment)) { - // matched comment - debug(comment) { writeln(line); } - an_object[an_object_key] ~= line ~= "\n"; - comp_obj_comment = comp_obj_comment.init; - comp_obj_comment.metainfo.is_of_part = "comment"; // breaks flow - comp_obj_comment.metainfo.is_of_section = "comment"; // breaks flow - comp_obj_comment.metainfo.is_of_type = "comment"; - comp_obj_comment.metainfo.is_a = "comment"; - comp_obj_comment.text = an_object[an_object_key].strip; - the_document_body_section ~= comp_obj_comment; - { - auto _get = txt_by_line_common_reset_(line_occur, an_object, pith); - { - line_occur = _get.line_occur; - an_object = _get.this_object; - pith = _get.pith; - } - } - processing.remove("verse"); - ++cntr; - } else if ((line_occur["para"] == eN.bi.off - && line_occur["heading"] == eN.bi.off) - && pith["txt_is"] == eN.txt_is.off - ) { // heading or para but neither flag nor line exists - if ((conf_make_meta.make.headings.length > 2) - && (pith["make_headings"] == eN.bi.off)) { - // heading found - { - auto _get = line.flow_heading_found_(heading_match_str, conf_make_meta.make.headings, heading_match_rgx, pith); - { - heading_match_str = _get.heading_match_str; - heading_match_rgx = _get.heading_match_rgx; - pith = _get.pith; - } - } - } - if (pith["make_headings"] == eN.bi.on - && (line_occur["para"] == eN.bi.off - && line_occur["heading"] == eN.bi.off) - && pith["txt_is"] == eN.txt_is.off - ) { - // heading make set - { - auto _get = line.flow_heading_make_set_(line_occur, heading_match_rgx, pith); - { - line = _get.line; - an_object = _get.this_object; - pith = _get.pith; - } - } - } - // TODO node info: all headings identified at this point, - // - extract node info here?? - // - how long can it wait? - // - should be incorporated in composite objects - // - should happen before endnote links set (they need to be moved down?) - if (line.matchFirst(rgx.headings)) { - // heading match - line = line._doc_header_and_make_substitutions_(conf_make_meta); - { - auto _get = line.flow_heading_matched_( - an_object, - line_occur, - an_object_key, - lv, - collapsed_lev, - pith, - conf_make_meta, - ); - { - an_object = _get.this_object; - pith = _get.pith; - } - } - } else if (line_occur["para"] == eN.bi.off) { - // para match - an_object_key = "body_nugget"; - line = line - ._doc_header_and_make_substitutions_(conf_make_meta) - ._doc_header_and_make_substitutions_fontface_(conf_make_meta); - { - auto _get = line.flow_para_match_(an_object, an_object_key, indent, bullet, pith, line_occur); - { - an_object = _get.this_object; - an_object_key = _get.this_object_key; - pith = _get.pith; - indent = _get.indent; - bullet = _get.bullet; - line_occur = _get.line_occur; - } - } - } - } else if (line_occur["heading"] > eN.bi.off) { - // heading - debug(heading) { writeln(line); } - an_object[an_object_key] ~= line ~= "\n"; - ++line_occur["heading"]; - } else if (line_occur["para"] > eN.bi.off) { - // paragraph - debug(para) { writeln(an_object_key, "-> ", line); } - line = line - ._doc_header_and_make_substitutions_(conf_make_meta) - ._doc_header_and_make_substitutions_fontface_(conf_make_meta); - an_object[an_object_key] ~= " " ~ line; - ++line_occur["para"]; - } - } - } else if (pith["block_state"] == eN.blk_state.closing) { - // line empty, with blocks flag - { - auto _get = line.flow_block_flag_line_empty_( - an_object, - bookindex_extract_hash, - the_document_body_section, - bookindex_unordered_hashes, - obj_cite_digits, - comp_obj_, - cntr, - pith, - object_number_poem, - conf_make_meta, - tag_in_seg, - ); - { - an_object = _get.this_object; - the_document_body_section = _get.the_document_body_section; - bookindex_unordered_hashes = _get.bookindex_unordered_hashes; - obj_cite_digits = _get.obj_cite_digits; - comp_obj_ = _get.comp_obj_; - cntr = _get.cntr; - pith = _get.pith; - } - } - } else { - // line.empty, post contents, empty variables: - assert( - line.empty, - "\nline should be empty:\n \"" - ~ line ~ "\"" - ); - assert( - (pith["block_state"] == eN.blk_state.off), - "code block status: none" - ); - if (_new_doc) { - tag_assoc = tag_assoc.init; - lv0to3_tags = lv0to3_tags.init; - tag_in_seg = tag_in_seg.init; - } - if (pith["txt_is"] == eN.txt_is.heading - && line_occur["heading"] > eN.bi.off - ) { - // heading object (current line empty) - obj_cite_digits = (an_object["lev_markup_number"].to!int == 0) - ? ocn_emit(eN.ocn.reset) - : ocn_emit(pith["ocn"]); - an_object["is"] = "heading"; - an_object_key = "body_nugget"; - ST_txtAndAnchorTagPlusHasFootnotesUrlsImages substantive_object_and_anchor_tags_struct - = obj_im.obj_inline_markup_and_anchor_tags_and_misc(an_object, an_object_key, conf_make_meta, ((_new_doc) ? Yes._new_doc : No._new_doc)); - an_object["substantive"] = substantive_object_and_anchor_tags_struct.obj_txt; - anchor_tag = substantive_object_and_anchor_tags_struct.anchor_tag; - if (_new_doc) { - cnt1 = 1; - cnt2 = 1; - cnt3 = 1; - _new_doc = false; - } - if ( - an_object["lev_markup_number"].to!int == 4 - && (!(anchor_tag.empty) - || (lv0to3_tags.length > 0)) - ) { - tag_in_seg["seg_lv4"] = anchor_tag; - tag_in_seg["seg_lv1to4"] = anchor_tag; - lev_anchor_tag = anchor_tag; - tag_assoc[anchor_tag]["seg_lv4"] = tag_in_seg["seg_lv4"]; - tag_assoc[anchor_tag]["seg_lv1to4"] = tag_in_seg["seg_lv1to4"]; - if (lv0to3_tags.length > 0) { - // names used for html markup segments 1 to 4 (rather than epub which has separate segments for A to D) - foreach (lv0_to_lv3_html_tag; lv0to3_tags) { - tag_assoc[lv0_to_lv3_html_tag]["seg_lv4"] = anchor_tag; - } - } - anchor_tag_ = anchor_tag; - lv0to3_tags = lv0to3_tags.init; - } else if (an_object["lev_markup_number"].to!int > 4) { - tag_in_seg["seg_lv4"] = anchor_tag_; - tag_in_seg["seg_lv1to4"] = anchor_tag_; - lev_anchor_tag = anchor_tag; - tag_assoc[anchor_tag]["seg_lv4"] = tag_in_seg["seg_lv4"]; - tag_assoc[anchor_tag]["seg_lv1to4"] = tag_in_seg["seg_lv1to4"]; - } else if (an_object["lev_markup_number"].to!int < 4) { - string segn; - switch (an_object["lev_markup_number"].to!int) { - // names used for epub markup segments A to D - case 0: - segn = "_the_title"; - goto default; - case 1: - segn = "_part_" ~ cnt1.to!string; - ++cnt1; - goto default; - case 2: - segn = "_part_" ~ cnt1.to!string ~ "_" ~ cnt2.to!string; - ++cnt2; - goto default; - case 3: - segn = "_part_" ~ cnt1.to!string ~ "_" ~ cnt2.to!string ~ "_" ~ cnt3.to!string; - ++cnt3; - goto default; - default: - lv0to3_tags ~= obj_cite_digits.object_number.to!string; - lv0to3_tags ~= segn; - tag_in_seg["seg_lv4"] = segn; // for html segname need following lv4 not yet known - tag_in_seg["seg_lv1to4"] = segn; - break; - } - } - an_object["bookindex_nugget"] - = ("bookindex_nugget" in an_object) ? an_object["bookindex_nugget"] : ""; - bookindex_unordered_hashes - = bookindex_extract_hash.bookindex_nugget_hash(an_object["bookindex_nugget"], obj_cite_digits, tag_in_seg); - _anchor_tag = obj_cite_digits.identifier; - // (incrementally build toc) table of contents here! - { - auto _get = obj_im.flow_table_of_contents_gather_headings( - an_object, - conf_make_meta, - tag_in_seg, - _anchor_tag, - lev4_subtoc, - the_document_toc_section, - ); - { - the_document_toc_section = _get.the_document_toc_section; - lev4_subtoc = _get.lev4_subtoc; - } - } - if (an_object["lev_markup_number"] == "4") { - segnames["html"] ~= tag_in_seg["seg_lv4"]; - html_segnames_ptr = html_segnames_ptr_cntr; - html_segnames_ptr_cntr++; - } - if (an_object["lev_markup_number"].to!int <= 4) { - segnames["epub"] ~= tag_in_seg["seg_lv1to4"]; - } - auto comp_obj_ = node_construct.node_emitter_heading( - an_object, - tag_in_seg, - lev_anchor_tag, - tag_assoc, - obj_cite_digits, // OCNset - cntr, // int - heading_ptr, // int - lv_ancestors_txt, // string[] - html_segnames_ptr, // int - substantive_object_and_anchor_tags_struct, - ); - ++heading_ptr; - debug(segments) { - writeln(an_object["lev_markup_number"]); - writeln(tag_in_seg["seg_lv4"]); - writeln(tag_in_seg["seg_lv1to4"]); - } - the_document_body_section ~= comp_obj_; - debug(objectrelated1) { writeln(line); } // check - { - auto _get = txt_by_line_common_reset_(line_occur, an_object, pith); - { - line_occur = _get.line_occur; - an_object = _get.this_object; - pith = _get.pith; - } - } - an_object.remove("lev"); - an_object.remove("lev_markup_number"); - processing.remove("verse"); - ++cntr; - } else if (pith["txt_is"] == eN.txt_is.para - && line_occur["para"] > eN.bi.off - ) { // paragraph object (current line empty) - repeated character paragraph separator - if ((an_object[an_object_key].to!string).matchFirst(rgx.repeated_character_line_separator)) { - pith["ocn"] = eN.ocn.off; - } - obj_cite_digits = ocn_emit(pith["ocn"]); - an_object["bookindex_nugget"] = ("bookindex_nugget" in an_object) ? an_object["bookindex_nugget"] : ""; - bookindex_unordered_hashes = bookindex_extract_hash.bookindex_nugget_hash(an_object["bookindex_nugget"], obj_cite_digits, tag_in_seg); - an_object["is"] = "para"; - auto comp_obj_ = node_construct.node_location_emitter( - content_non_header, - tag_in_seg, - lev_anchor_tag, - tag_assoc, - obj_cite_digits, - cntr, - heading_ptr-1, - an_object["is"], - ); - ST_txtAndAnchorTagPlusHasFootnotesUrlsImages substantive_obj_misc_struct - = obj_im.obj_inline_markup_and_anchor_tags_and_misc(an_object, an_object_key, conf_make_meta, No._new_doc); - an_object["substantive"] = substantive_obj_misc_struct.obj_txt; - anchor_tag = substantive_obj_misc_struct.anchor_tag; - comp_obj_ = set_object_generic("body", "body", "para", "para", an_object["substantive"].to!string.strip, obj_cite_digits.object_number); - comp_obj_.tags.html_segment_anchor_tag_is = tag_in_seg["seg_lv4"]; - comp_obj_.tags.epub_segment_anchor_tag_is = tag_in_seg["seg_lv1to4"]; - comp_obj_.metainfo.identifier = obj_cite_digits.identifier; - comp_obj_.metainfo.object_number_off = (obj_cite_digits.off == 0) ? true : false; // TODO - comp_obj_.metainfo.o_n_book_index = obj_cite_digits.bkidx; - comp_obj_.metainfo.object_number_type = obj_cite_digits.type; - comp_obj_.attrib.indent_hang = indent["hang_position"]; - comp_obj_.attrib.indent_base = indent["base_position"]; - comp_obj_.attrib.bullet = bullet; - comp_obj_.tags.anchor_tags = [anchor_tag]; anchor_tag=""; - comp_obj_.has.inline_notes_reg = substantive_obj_misc_struct.has_notes_reg; - comp_obj_.has.inline_notes_star = substantive_obj_misc_struct.has_notes_star; - comp_obj_.has.inline_links = substantive_obj_misc_struct.has_links; - comp_obj_.has.image_without_dimensions = substantive_obj_misc_struct.has_images_without_dimensions; - the_document_body_section ~= comp_obj_; - tag_assoc = an_object.inline_para_link_anchor(tag_in_seg, tag_assoc); - { - auto _get = txt_by_line_common_reset_(line_occur, an_object, pith); - { - line_occur = _get.line_occur; - an_object = _get.this_object; - pith = _get.pith; - } - } - indent = [ - "hang_position" : 0, - "base_position" : 0, - ]; - bullet = false; - processing.remove("verse"); - ++cntr; - // } else { // could be useful to test line variable should be empty and never null - } - } // close else for line empty - } // close else for not the above - } // close after non code, other blocks or regular text - // unless (the_document_body_section.length == 0) ? - if (the_document_body_section.length > 0) { - if (((the_document_body_section[$-1].metainfo.is_a == "para") - || (the_document_body_section[$-1].metainfo.is_a == "heading") - || (the_document_body_section[$-1].metainfo.is_a == "quote") - || (the_document_body_section[$-1].metainfo.is_a == "group") - || (the_document_body_section[$-1].metainfo.is_a == "block") - || (the_document_body_section[$-1].metainfo.is_a == "verse")) - && (the_document_body_section.length > previous_length)) { - if ((the_document_body_section[$-1].metainfo.is_a == "heading") - && (the_document_body_section[$-1].metainfo.heading_lev_markup < 5)) { - pith["section"] = eN.sect.unset; - } - if (the_document_body_section[$-1].metainfo.is_a == "verse") { - // scan for endnotes for whole poem (each verse in poem) - foreach (i; previous_length .. the_document_body_section.length) { - if (the_document_body_section[i].metainfo.is_a == "verse") { - if ((the_document_body_section[i].text).match( - rgx.inline_notes_al_all_note - )) { - object_notes = note_section.gather_notes_for_endnote_section( - the_document_body_section, - tag_in_seg, - (i).to!int, - ); - } - } - } - } else { - // scan object for endnotes - previous_length = the_document_body_section.length.to!int; - if ((the_document_body_section[$-1].text).match( - rgx.inline_notes_al_all_note - )) { - previous_count = (the_document_body_section.length -1).to!int; - object_notes = note_section.gather_notes_for_endnote_section( - the_document_body_section, - tag_in_seg, - (the_document_body_section.length-1).to!int, - ); - } - } - previous_length = the_document_body_section.length.to!int; - } - } - } - ret.toc = the_document_toc_section; - ret.body = the_document_body_section; - ret.glossary = the_document_glossary_section; - ret.blurb = the_document_blurb_section; - ret.object_notes = object_notes; - ret.segnames = segnames; - return ret; - } - { // loopMarkupSrcByLine - auto _doc_by_line = loopMarkupSrcByLine(markup_sourcefile_content, an_object, pith); - the_document_toc_section = _doc_by_line.toc; - the_document_body_section = _doc_by_line.body; - the_document_glossary_section = _doc_by_line.glossary; - the_document_blurb_section = _doc_by_line.blurb; - segnames = _doc_by_line.segnames; - object_notes = _doc_by_line.object_notes; // endnotes, compare, not sure is used - destroy(_doc_by_line); - } - { // EOF backMatter - comp_obj_ = set_object_heading("lev1", "backmatter", "tail", ""); - comp_obj_.metainfo.identifier = ""; - comp_obj_.metainfo.dummy_heading = false; - comp_obj_.metainfo.object_number_off = false; - comp_obj_.metainfo.object_number_type = 0; - comp_obj_.tags.segment_anchor_tag_epub = "_part_eof"; - comp_obj_.tags.anchor_tag_html = comp_obj_.tags.segment_anchor_tag_epub; - comp_obj_.tags.in_segment_html = "tail"; - comp_obj_.tags.anchor_tags = ["section_eof"]; - comp_obj_.metainfo.dom_structure_markedup_tags_status = [ 0, 0, 0, 0, 0, 0, 0, 0]; - comp_obj_.metainfo.dom_structure_collapsed_tags_status = [ 0, 0, 0, 0, 0, 0, 0, 0]; - the_document_xml_dom_tail_section ~= comp_obj_; - tag_assoc[comp_obj_.tags.anchor_tag_html]["seg_lv4"] = comp_obj_.tags.in_segment_html; - tag_assoc[comp_obj_.tags.segment_anchor_tag_epub]["seg_lv1to4"] = comp_obj_.tags.segment_anchor_tag_epub; - } - // endNotes - ST_endnotes en_st = note_section.backmatter_endnote_objects(obj_cite_digits, opt_action); - { // endnotes - the_document_endnotes_section = en_st.endnotes; - obj_cite_digits = en_st.ocn; - debug(endnotes) { - writefln("%s %s", __LINE__, the_document_endnotes_section.length); - foreach (o; the_document_endnotes_section) { writeln(o); } - } - } - { // glossary - if (an_object["glossary_nugget"].length == 0) { - comp_obj_ = set_object_heading("lev1", "empty", "empty", "(skip) there is no Glossary section"); - comp_obj_.metainfo.identifier = ""; - comp_obj_.metainfo.dummy_heading = true; - comp_obj_.metainfo.object_number_off = true; - comp_obj_.metainfo.object_number_type = 0; - the_document_glossary_section ~= comp_obj_; - } - debug(glossary) { foreach (gloss; the_document_glossary_section) { writeln(gloss.text); } } - } - { // bibliography - string[] biblio_unsorted_incomplete = biblio_arr_json.dup; - ST_biblio_section biblio_section = backmatter_make_the_bibliography_section(biblio_unsorted_incomplete, bib_arr_json); - the_document_bibliography_section = biblio_section.bibliography_section; - tag_assoc = biblio_section.tag_assoc; - } - { // bookindex - BookIndexReportSection bi = BookIndexReportSection(); - ST_bookindex bi_st - = bi.backmatter_bookindex_build_abstraction_section(bookindex_unordered_hashes, obj_cite_digits, opt_action); - destroy(bookindex_unordered_hashes); - the_document_bookindex_section = bi_st.bookindex; - obj_cite_digits = bi_st.ocn; - debug(bookindex) { foreach (bi_entry; the_document_bookindex_section) { writeln(bi_entry); } } - } - { // blurb - if (an_object["blurb_nugget"].length == 0) { - comp_obj_ = set_object_heading("lev1", "empty", "empty", "(skip) there is no Blurb section"); - comp_obj_.metainfo.identifier = ""; - comp_obj_.metainfo.object_number_off = true; - comp_obj_.metainfo.object_number_type = 0; - comp_obj_.tags.segment_anchor_tag_epub = ""; - comp_obj_.tags.anchor_tag_html = ""; - comp_obj_.tags.in_segment_html = ""; - the_document_blurb_section ~= comp_obj_; - } - debug(blurb) { foreach (blurb; the_document_blurb_section) { writeln(blurb.text); } } - } - { // toc gather backmatter - the_document_toc_section ~= backmatter_gather_table_of_contents(the_document_endnotes_section, the_document_glossary_section, the_document_bibliography_section, the_document_bookindex_section, the_document_blurb_section); // - } - { // document head and body - the_document_head_section ~= the_document_body_section[0]; - the_document_body_section = the_document_body_section[1..$]; - } - { // document ancestors - ST_ancestors get_ancestors; - get_ancestors = the_document_body_section.after_doc_determine_ancestors(the_document_endnotes_section, the_document_glossary_section, the_document_bibliography_section, the_document_bookindex_section, the_document_blurb_section); - the_document_body_section = get_ancestors.the_document_body_section; - the_document_endnotes_section = get_ancestors.the_document_endnotes_section; - the_document_glossary_section = get_ancestors.the_document_glossary_section; - the_document_bibliography_section = get_ancestors.the_document_bibliography_section; - the_document_bookindex_section = get_ancestors.the_document_bookindex_section; - the_document_blurb_section = get_ancestors.the_document_blurb_section; - } - { // document segnames - ST_segnames get_segnames; - get_segnames = the_document_body_section.after_doc_determine_segnames(the_document_endnotes_section, the_document_glossary_section, the_document_bibliography_section, the_document_bookindex_section, the_document_blurb_section, segnames, html_segnames_ptr_cntr, html_segnames_ptr); // - segnames = get_segnames.segnames; - html_segnames_ptr_cntr = get_segnames.html_segnames_ptr_cntr; - html_segnames_ptr = get_segnames.html_segnames_ptr; - } - // document head - string[] segnames_0_to_4; - foreach (ref obj; the_document_head_section) { - if (obj.metainfo.is_a == "heading") { - debug(dom) { writeln(obj.text); } - if (obj.metainfo.heading_lev_markup <= 4) { - segnames_0_to_4 ~= obj.tags.segment_anchor_tag_epub; - } - if (obj.metainfo.heading_lev_markup == 0) { - // TODO second hit (of two) with same assertion failure, check, fix and reinstate - // assert( obj.metainfo.ocn == 1, - // "Title OCN should be 1 not: " ~ obj.metainfo.ocn.to!string); // bug introduced 0.18.1 - obj.metainfo.ocn = 1; - obj.metainfo.identifier = "1"; - obj.metainfo.object_number_type = OCNtype.ocn; - } - // dom structure (marked up & collapsed) - if (opt_action.meta_processing_xml_dom) { - obj = obj.obj_dom_structure_set_markup_tags(dom_structure_markedup_tags_status, obj.metainfo.heading_lev_markup); - obj = obj.obj_dom_set_collapsed_tags(dom_structure_collapsed_tags_status, obj.metainfo.heading_lev_collapsed); - } - obj = obj.obj_heading_ancestors(lv_ancestors_txt); - } - obj = _links(obj); - } - if (the_document_toc_section.length > 1) { - // scroll - dom_structure_markedup_tags_status_buffer = dom_structure_markedup_tags_status.dup; - dom_structure_collapsed_tags_status_buffer = dom_structure_collapsed_tags_status.dup; - foreach (ref obj; the_document_toc_section) { - if (obj.metainfo.is_a == "heading") { - if (obj.metainfo.heading_lev_markup <= 4) { - segnames_0_to_4 ~= obj.tags.segment_anchor_tag_epub; - if (obj.metainfo.heading_lev_markup == 4) { - obj.tags.segname_next = segnames["html"][obj.ptr.html_segnames + 1]; - assert(obj.tags.anchor_tag_html == segnames["html"][obj.ptr.html_segnames], - obj.tags.anchor_tag_html ~ "!=" ~ segnames["html"][obj.ptr.html_segnames]); - } - } - // dom structure (marked up & collapsed) - if (opt_action.meta_processing_xml_dom) { - obj = obj.obj_dom_structure_set_markup_tags(dom_structure_markedup_tags_status, obj.metainfo.heading_lev_markup); - obj = obj.obj_dom_set_collapsed_tags(dom_structure_collapsed_tags_status, obj.metainfo.heading_lev_collapsed); - } - obj = obj.obj_heading_ancestors(lv_ancestors_txt); - } - obj = _links(obj); - } - } - // images - string[] _images; - // multiple 1~ levels, loop through document body - if (the_document_body_section.length > 1) { - foreach (ref obj; the_document_body_section) { - if (!(obj.metainfo.identifier.empty)) { - if (!(((obj.metainfo.identifier) in tag_assoc) - && ("seg_lv4" in tag_assoc[(obj.metainfo.identifier)])) - ) { - tag_assoc[(obj.metainfo.identifier)]["seg_lv4"] - = obj.tags.html_segment_anchor_tag_is; - } - tag_assoc[(obj.metainfo.identifier)]["seg_lv1to4"] - = obj.tags.epub_segment_anchor_tag_is; - } - if (obj.metainfo.is_a == "heading") { - debug(dom) { writeln(obj.text); } - if (obj.metainfo.heading_lev_markup <= 4) { - segnames_0_to_4 ~= obj.tags.segment_anchor_tag_epub; - if (obj.metainfo.heading_lev_markup == 4) { - obj.tags.lev4_subtoc = lev4_subtoc[obj.tags.anchor_tag_html]; - obj.tags.segname_prev = segnames["html"][obj.ptr.html_segnames - 1]; - if (segnames["html"].length > obj.ptr.html_segnames + 1) { - obj.tags.segname_next = segnames["html"][obj.ptr.html_segnames + 1]; - } - assert(obj.tags.anchor_tag_html == segnames["html"][obj.ptr.html_segnames], - obj.tags.anchor_tag_html ~ "!=" ~ segnames["html"][obj.ptr.html_segnames]); - } - } - // dom structure (marked up & collapsed) - if (opt_action.meta_processing_xml_dom) { - obj = obj.obj_dom_structure_set_markup_tags(dom_structure_markedup_tags_status, obj.metainfo.heading_lev_markup); - obj = obj.obj_dom_set_collapsed_tags(dom_structure_collapsed_tags_status, obj.metainfo.heading_lev_collapsed); - } - obj = obj.obj_heading_ancestors(lv_ancestors_txt); - } else if (obj.metainfo.is_a == "para") { - _images ~= extract_images(obj.text); - obj = _image_dimensions(obj, manifested); - } - obj = _links(obj); - } - } - auto image_list = (_images.sort()).uniq; - // endnotes optional only one 1~ level - if (the_document_endnotes_section.length > 1) { - dom_structure_markedup_tags_status_buffer = dom_structure_markedup_tags_status.dup; - dom_structure_collapsed_tags_status_buffer = dom_structure_collapsed_tags_status.dup; - dom_structure_markedup_tags_status = dom_structure_markedup_tags_status_buffer.dup; - dom_structure_collapsed_tags_status = dom_structure_collapsed_tags_status_buffer.dup; - foreach (ref obj; the_document_endnotes_section) { - if (obj.metainfo.is_a == "heading") { - debug(dom) { writeln(obj.text); } - if (obj.metainfo.heading_lev_markup == 1) { - obj_cite_digits = ocn_emit(eN.ocn.on); - obj.metainfo.ocn = obj_cite_digits.object_number; - obj.metainfo.identifier = obj_cite_digits.identifier; - } - if (obj.metainfo.heading_lev_markup <= 4) { - segnames_0_to_4 ~= obj.tags.segment_anchor_tag_epub; - if (obj.metainfo.heading_lev_markup == 4) { - obj.tags.segname_prev = segnames["html"][obj.ptr.html_segnames - 1]; - if (segnames["html"].length > obj.ptr.html_segnames + 1) { - obj.tags.segname_next = segnames["html"][obj.ptr.html_segnames + 1]; - } - assert(obj.tags.anchor_tag_html == segnames["html"][obj.ptr.html_segnames], - obj.tags.anchor_tag_html ~ "!=" ~ segnames["html"][obj.ptr.html_segnames]); - } - } - // dom structure (marked up & collapsed) - if (opt_action.meta_processing_xml_dom) { - obj = obj.obj_dom_structure_set_markup_tags(dom_structure_markedup_tags_status, obj.metainfo.heading_lev_markup); - obj = obj.obj_dom_set_collapsed_tags(dom_structure_collapsed_tags_status, obj.metainfo.heading_lev_collapsed); - } - obj = obj.obj_heading_ancestors(lv_ancestors_txt); - } - obj = _links(obj); - } - } - // glossary optional only one 1~ level - if (the_document_glossary_section.length > 1) { - foreach (ref obj; the_document_glossary_section) { - if (obj.metainfo.is_a == "heading") { - debug(dom) { writeln(obj.text); } - if (obj.metainfo.heading_lev_markup == 1) { - obj_cite_digits = ocn_emit(eN.ocn.on); - obj.metainfo.ocn = obj_cite_digits.object_number; - obj.metainfo.identifier = obj_cite_digits.identifier; - } - if (obj.metainfo.heading_lev_markup <= 4) { - segnames_0_to_4 ~= obj.tags.segment_anchor_tag_epub; - if (obj.metainfo.heading_lev_markup == 4) { - obj.tags.segname_prev = segnames["html"][obj.ptr.html_segnames - 1]; - if (segnames["html"].length > obj.ptr.html_segnames + 1) { - obj.tags.segname_next = segnames["html"][obj.ptr.html_segnames + 1]; - } - assert(obj.tags.anchor_tag_html == segnames["html"][obj.ptr.html_segnames], - obj.tags.anchor_tag_html ~ "!=" ~ segnames["html"][obj.ptr.html_segnames]); - } - } - // dom structure (marked up & collapsed) - if (opt_action.meta_processing_xml_dom) { - obj = obj.obj_dom_structure_set_markup_tags(dom_structure_markedup_tags_status, obj.metainfo.heading_lev_markup); - obj = obj.obj_dom_set_collapsed_tags(dom_structure_collapsed_tags_status, obj.metainfo.heading_lev_collapsed); - } - obj = obj.obj_heading_ancestors(lv_ancestors_txt); - } else if (obj.metainfo.is_a == "glossary" && !(obj.text.empty)) { - obj_cite_digits = ocn_emit(eN.ocn.on); - obj.metainfo.ocn = obj_cite_digits.object_number; - obj.metainfo.identifier = obj_cite_digits.identifier; - } - obj = _links(obj); - } - } - // bibliography optional only one 1~ level - if (the_document_bibliography_section.length > 1) { - foreach (ref obj; the_document_bibliography_section) { - if (obj.metainfo.is_a == "heading") { - debug(dom) { writeln(obj.text); } - if (obj.metainfo.heading_lev_markup == 1) { - obj_cite_digits = ocn_emit(eN.ocn.on); - obj.metainfo.ocn = obj_cite_digits.object_number; - obj.metainfo.identifier = obj_cite_digits.identifier; - } - if (obj.metainfo.heading_lev_markup <= 4) { - segnames_0_to_4 ~= obj.tags.segment_anchor_tag_epub; - if (obj.metainfo.heading_lev_markup == 4) { - obj.tags.segname_prev = segnames["html"][obj.ptr.html_segnames - 1]; - if (segnames["html"].length > obj.ptr.html_segnames + 1) { - obj.tags.segname_next = segnames["html"][obj.ptr.html_segnames + 1]; - } - assert(obj.tags.anchor_tag_html == segnames["html"][obj.ptr.html_segnames], - obj.tags.anchor_tag_html ~ "!=" ~ segnames["html"][obj.ptr.html_segnames]); - } - } - // dom structure (marked up & collapsed) - if (opt_action.meta_processing_xml_dom) { - obj = obj.obj_dom_structure_set_markup_tags(dom_structure_markedup_tags_status, obj.metainfo.heading_lev_markup); - obj = obj.obj_dom_set_collapsed_tags(dom_structure_collapsed_tags_status, obj.metainfo.heading_lev_collapsed); - } - obj = obj.obj_heading_ancestors(lv_ancestors_txt); - } else if (obj.metainfo.is_a == "bibliography") { - obj_cite_digits = ocn_emit(eN.ocn.on); - obj.metainfo.ocn = obj_cite_digits.object_number; - obj.metainfo.identifier = obj_cite_digits.identifier; - } - obj = _links(obj); - } - } - // book index, optional only one 1~ level - int ocn_ = obj_cite_digits.object_number; - int ocn_bkidx_ = 0; - int ocn_bidx_; - if (the_document_bookindex_section.length > 1) { // scroll - dom_structure_markedup_tags_status_buffer = dom_structure_markedup_tags_status.dup; - dom_structure_collapsed_tags_status_buffer = dom_structure_collapsed_tags_status.dup; - foreach (ref obj; the_document_bookindex_section) { - if (obj.metainfo.is_a == "heading") { - // debug(dom) { } - if (obj.metainfo.heading_lev_markup <= 4) { - segnames_0_to_4 ~= obj.tags.segment_anchor_tag_epub; - } - if (obj.metainfo.heading_lev_markup == 1) { - obj_cite_digits = ocn_emit(eN.ocn.on); - obj.metainfo.ocn = obj_cite_digits.object_number; - obj.metainfo.identifier = obj_cite_digits.identifier; - } - if (obj.metainfo.heading_lev_markup <= 4) { - if (obj.metainfo.heading_lev_markup == 4) { - obj.tags.segname_prev = segnames["html"][obj.ptr.html_segnames - 1]; - if (segnames["html"].length > obj.ptr.html_segnames + 1) { - obj.tags.segname_next = segnames["html"][obj.ptr.html_segnames + 1]; - } - assert(obj.tags.anchor_tag_html == segnames["html"][obj.ptr.html_segnames], - obj.tags.anchor_tag_html ~ "!=" ~ segnames["html"][obj.ptr.html_segnames]); - } - } - // dom structure (marked up & collapsed) - if (opt_action.meta_processing_xml_dom) { - obj = obj.obj_dom_structure_set_markup_tags(dom_structure_markedup_tags_status, obj.metainfo.heading_lev_markup); - obj = obj.obj_dom_set_collapsed_tags(dom_structure_collapsed_tags_status, obj.metainfo.heading_lev_collapsed); - } - obj = obj.obj_heading_ancestors(lv_ancestors_txt); - } else if (obj.metainfo.is_a == "bookindex") { - obj_cite_digits = ocn_emit(eN.ocn.bkidx); - obj.metainfo.ocn = obj_cite_digits.object_number; - obj.metainfo.identifier = obj_cite_digits.identifier; - obj.metainfo.o_n_book_index = obj_cite_digits.bkidx; - obj.metainfo.object_number_type = OCNtype.bkidx; - } - obj = _links(obj); - } - // TODO assert failure, reinstate - // assert(obj_cite_digit_bkidx == ocn_bidx_ obj_cite_digit_bkidx ~ " == ocn_" ~ ocn_ ~ "?"); - } - // blurb optional only one 1~ level - if (the_document_blurb_section.length > 1) { - foreach (ref obj; the_document_blurb_section) { - if (obj.metainfo.is_a == "heading") { - debug(dom) { writeln(obj.text); } - if (obj.metainfo.heading_lev_markup == 1) { - obj_cite_digits = ocn_emit(eN.ocn.on); - obj.metainfo.ocn = obj_cite_digits.object_number; - obj.metainfo.identifier = obj_cite_digits.identifier; - } - if (obj.metainfo.heading_lev_markup <= 4) { - segnames_0_to_4 ~= obj.tags.segment_anchor_tag_epub; - if (obj.metainfo.heading_lev_markup == 4) { - obj.tags.segname_prev = segnames["html"][obj.ptr.html_segnames - 1]; - if (segnames["html"].length > obj.ptr.html_segnames + 1) { - obj.tags.segname_next = segnames["html"][obj.ptr.html_segnames + 1]; - } - assert(obj.tags.anchor_tag_html == segnames["html"][obj.ptr.html_segnames], - obj.tags.anchor_tag_html ~ "!=" ~ segnames["html"][obj.ptr.html_segnames]); - } - } - // dom structure (marked up & collapsed) - if (opt_action.meta_processing_xml_dom) { - obj = obj.obj_dom_structure_set_markup_tags(dom_structure_markedup_tags_status, obj.metainfo.heading_lev_markup); - obj = obj.obj_dom_set_collapsed_tags(dom_structure_collapsed_tags_status, obj.metainfo.heading_lev_collapsed); - } - obj = obj.obj_heading_ancestors(lv_ancestors_txt); - } else if (obj.metainfo.is_a == "blurb") { - obj_cite_digits = ocn_emit(eN.ocn.off); - obj.metainfo.object_number_off = obj_cite_digits.off; - obj.metainfo.object_number_type = OCNtype.non; - } - obj = _links(obj); - } - } - // get descendants - if (the_document_body_section.length > 1) { - auto pairs = after_doc_get_descendants( - the_document_head_section ~ - the_document_body_section ~ - the_document_endnotes_section ~ - the_document_glossary_section ~ - the_document_bibliography_section ~ - the_document_bookindex_section ~ - the_document_blurb_section ~ - the_document_xml_dom_tail_section - ); - debug(descendants_tuple) { - pairs = pairs.sort(); - foreach (pair; pairs) { // (pair; pairs.sort()) - writeln(pair[0], "..", pair[1]); - } - } - foreach (ref obj; the_document_head_section) { - if (obj.metainfo.is_a == "heading") { - foreach (pair; pairs) { - if (obj.metainfo.ocn == pair[0]) { - obj.metainfo.last_descendant_ocn = pair[1]; - } - } - } - } - if (the_document_body_section.length > 1) { - foreach (ref obj; the_document_body_section) { - if (obj.metainfo.is_a == "heading") { - foreach (pair; pairs) { - if (obj.metainfo.ocn == pair[0]) { - obj.metainfo.last_descendant_ocn = pair[1]; - } - } - } - } - } - if (the_document_endnotes_section.length > 1) { - foreach (ref obj; the_document_endnotes_section) { - if (obj.metainfo.is_a == "heading") { - foreach (pair; pairs) { - if (obj.metainfo.ocn == pair[0]) { - obj.metainfo.last_descendant_ocn = pair[1]; - } - } - } - } - } - if (the_document_glossary_section.length > 1) { - foreach (ref obj; the_document_glossary_section) { - if (obj.metainfo.is_a == "heading") { - foreach (pair; pairs) { - if (obj.metainfo.ocn == pair[0]) { - obj.metainfo.last_descendant_ocn = pair[1]; - } - } - } - } - } - if (the_document_bibliography_section.length > 1) { - foreach (ref obj; the_document_bibliography_section) { - if (obj.metainfo.is_a == "heading") { - foreach (pair; pairs) { - if (obj.metainfo.ocn == pair[0]) { - obj.metainfo.last_descendant_ocn = pair[1]; - } - } - } - } - } - if (the_document_bookindex_section.length > 1) { - foreach (ref obj; the_document_bookindex_section) { - if (obj.metainfo.is_a == "heading") { - foreach (pair; pairs) { - if (obj.metainfo.ocn == pair[0]) { - obj.metainfo.last_descendant_ocn = pair[1]; - } - } - } - } - } - if (the_document_blurb_section.length > 1) { - foreach (ref obj; the_document_blurb_section) { - if (obj.metainfo.is_a == "heading") { - foreach (pair; pairs) { - if (obj.metainfo.ocn == pair[0]) { - obj.metainfo.last_descendant_ocn = pair[1]; - } - } - } - } - } - if (the_document_xml_dom_tail_section.length > 1) { - foreach (ref obj; the_document_xml_dom_tail_section) { - if (obj.metainfo.is_a == "heading") { - foreach (pair; pairs) { - if (obj.metainfo.ocn == pair[0]) { - obj.metainfo.last_descendant_ocn = pair[1]; - } - } - } - } - } - } - // TODO - // - note create/insert heading object sole purpose eof close all open tags - // sort out: - // - obj.metainfo.dom_structure_markedup_tags_status = dom_structure_markedup_tags_status; - // - obj.metainfo.dom_structure_collapsed_tags_status = dom_structure_collapsed_tags_status; - comp_obj_ = set_object_heading("lev1", "empty", "empty", ""); - comp_obj_.metainfo.identifier = ""; - comp_obj_.metainfo.dummy_heading = true; - comp_obj_.metainfo.object_number_off = true; - comp_obj_.metainfo.object_number_type = 0; - comp_obj_.tags.segment_anchor_tag_epub = ""; - comp_obj_.tags.anchor_tag_html = ""; - comp_obj_.tags.in_segment_html = ""; - comp_obj_.tags.html_segment_anchor_tag_is = ""; - comp_obj_.tags.epub_segment_anchor_tag_is = ""; - comp_obj_.metainfo.heading_lev_markup = 9; - comp_obj_.metainfo.heading_lev_collapsed = 9; - comp_obj_.metainfo.parent_ocn = 0; - comp_obj_.metainfo.parent_lev_markup = 0; - comp_obj_.metainfo.dom_structure_markedup_tags_status = dom_structure_markedup_tags_status.dup; - comp_obj_.metainfo.dom_structure_collapsed_tags_status = dom_structure_collapsed_tags_status.dup; - comp_obj_ = comp_obj_.obj_dom_structure_set_markup_tags(dom_structure_markedup_tags_status, 0); - comp_obj_ = comp_obj_.obj_dom_set_collapsed_tags(dom_structure_collapsed_tags_status, 0); - comp_obj_ = comp_obj_.obj_heading_ancestors(lv_ancestors_txt); - // the_dom_tail_section ~= comp_obj_; // remove tail for now, decide on later - // the doc - ObjGenericComposite[][string] document_the = [ - "head": the_document_head_section, - "toc": the_document_toc_section, - // substantive/body: - "body": the_document_body_section, - // backmatter: - "endnotes": the_document_endnotes_section, - "glossary": the_document_glossary_section, - "bibliography": the_document_bibliography_section, - "bookindex": the_document_bookindex_section, - "blurb": the_document_blurb_section, - // dom tail only - "tail": the_document_xml_dom_tail_section, - ]; - // document parts keys as needed - string[][string] document_section_keys_sequenced = [ - "scroll": ["head", "toc", "body",], - "seg": ["head", "toc", "body",], - "sql": ["head", "body",], - "latex": ["head", "toc", "body",] - ]; - if (document_the["endnotes"].length > 1) { - document_section_keys_sequenced["scroll"] ~= "endnotes"; - document_section_keys_sequenced["seg"] ~= "endnotes"; - document_section_keys_sequenced["latex"] ~= "endnotes"; - } - if (document_the["glossary"].length > 1) { - document_section_keys_sequenced["scroll"] ~= "glossary"; - document_section_keys_sequenced["seg"] ~= "glossary"; - document_section_keys_sequenced["sql"] ~= "glossary"; - document_section_keys_sequenced["latex"] ~= "glossary"; - } - if (document_the["bibliography"].length > 1) { - document_section_keys_sequenced["scroll"] ~= "bibliography"; - document_section_keys_sequenced["seg"] ~= "bibliography"; - document_section_keys_sequenced["sql"] ~= "bibliography"; - document_section_keys_sequenced["latex"] ~= "bibliography"; - } - if (document_the["bookindex"].length > 1) { - document_section_keys_sequenced["scroll"] ~= "bookindex"; - document_section_keys_sequenced["seg"] ~= "bookindex"; - document_section_keys_sequenced["sql"] ~= "bookindex"; - document_section_keys_sequenced["latex"] ~= "bookindex"; - } - if (document_the["blurb"].length > 1) { - document_section_keys_sequenced["scroll"] ~= "blurb"; - document_section_keys_sequenced["seg"] ~= "blurb"; - document_section_keys_sequenced["sql"] ~= "blurb"; - document_section_keys_sequenced["latex"] ~= "blurb"; - } - if ((opt_action.html) - || (opt_action.html_scroll) - || (opt_action.html_seg) - || (opt_action.epub)) { - document_section_keys_sequenced["scroll"] ~= "tail"; - document_section_keys_sequenced["seg"] ~= "tail"; - } - // segnames - string[] segnames_4 = segnames["html"].dup; - string[] segnames_lv1to4 = segnames["epub"].dup; - debug(segnames) { - writeln("segnames_lv4: ", segnames_4); - writeln("segnames_lv1to4: ", segnames_lv1to4); - } - // restart - destroy(the_document_head_section); - destroy(the_document_toc_section); - destroy(the_document_body_section); - destroy(the_document_endnotes_section); - destroy(the_document_glossary_section); - destroy(the_document_bibliography_section); - destroy(the_document_bookindex_section); - destroy(the_document_blurb_section); - destroy(the_document_xml_dom_tail_section); - destroy(segnames); - destroy(bookindex_unordered_hashes); - destroy(an_object); - obj_cite_digits = ocn_emit(eN.ocn.reset); - biblio_arr_json = []; - obj_cite_digit_ = 0; - html_segnames_ptr = 0; - html_segnames_ptr_cntr = 0; - content_non_header = "8"; - dom_structure_markedup_tags_status = [ 0, 0, 0, 0, 0, 0, 0, 0,]; - dom_structure_markedup_tags_status_buffer = [ 0, 0, 0, 0, 0, 0, 0, 0,]; - dom_structure_collapsed_tags_status = [ 0, 0, 0, 0, 0, 0, 0, 0,]; - dom_structure_collapsed_tags_status_buffer = [ 0, 0, 0, 0, 0, 0, 0, 0,]; - lev_anchor_tag = ""; - anchor_tag = ""; - // identify parts - struct DocHas_ { - uint inline_links() { - return dochas["inline_links"]; - } - uint inline_notes_reg() { - return dochas["inline_notes"]; - } - uint inline_notes_star() { - return dochas["inline_notes_star"]; - } - uint codeblocks() { - return dochas["codeblock"]; - } - uint tables() { - return dochas["table"]; - } - uint blocks() { - return dochas["block"]; - } - uint groups() { - return dochas["group"]; - } - uint poems() { - return dochas["poem"]; - } - uint quotes() { - return dochas["quote"]; - } - ulong images() { // TODO not ideal rethink - return (image_list.to!string.strip("[","]").split(",").length); - } - auto imagelist() { - return image_list; - } - auto keys_seq() { - return docSectKeysSeq!()(document_section_keys_sequenced); - } - string[] segnames_lv4() { - return segnames_4; - } - string[] segnames_lv_0_to_4() { - return segnames_0_to_4; - } - string[string][string] tag_associations() { - return tag_assoc; - } - } - auto doc_has() { - return DocHas_(); - } - // the doc to be returned - struct ST_docAbstraction { - ObjGenericComposite[][string] document_the; - DocHas_ doc_has; - } - ST_docAbstraction ret; - { - ret.document_the = document_the; - ret.doc_has = doc_has; - } - return ret; - } // ← closed: abstract doc source -} |