From 748c9e60ae65433f225f7ac49de7b596cc1148d3 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Wed, 8 Oct 2025 19:14:19 -0400 Subject: text output, endnotes, add caller ocn (& some cleaning) --- src/sisudoc/io_out/rgx.d | 24 ++++++++++++------------ src/sisudoc/meta/metadoc_from_src.d | 2 +- src/sisudoc/meta/metadoc_from_src_functions.d | 20 ++++++++++++++++++++ src/sisudoc/meta/rgx.d | 24 ++++++++++++------------ 4 files changed, 45 insertions(+), 25 deletions(-) (limited to 'src') diff --git a/src/sisudoc/io_out/rgx.d b/src/sisudoc/io_out/rgx.d index 666e71f..b4bec5f 100644 --- a/src/sisudoc/io_out/rgx.d +++ b/src/sisudoc/io_out/rgx.d @@ -88,28 +88,28 @@ static template spineRgxOut() { static inline_notes_al_special = ctRegex!(`【(?:[*+]\s+)(.+?)】`, "mg"); // TODO remove match when special footnotes are implemented static inline_notes_al_gen = ctRegex!(`【.+?】`, "m"); static inline_notes_al_gen_text = ctRegex!(`【(?P.+?)】`, "m"); - static inline_notes_al_all_note = ctRegex!(`【(?P\d+|(?:[*]|[+])+)\s+(?P.+?)\s*】`, "mg"); - static inline_notes_al_regular_number_note = ctRegex!(`【(?P\d+)\s+(?P.+?)\s*】`, "mg"); - // static inline_notes_al_all_note = ctRegex!(`【(?P\d+|(?:[*]|[+])+)\s+(?P.+?)\s*(≫\s\d+)?\s*】`, "mg"); // ocn of origin would be useful in endnote section - // static inline_notes_al_regular_number_note = ctRegex!(`【(?P\d+)\s+(?P.+?)\s*(≫\s\d+)?\s*】`, "mg"); // ocn of origin would be useful in endnote section + static inline_notes_al_all_note = ctRegex!(`【(?P\d+|(?:[*]|[+])+)\s+(?P.+?)\s*(≫\s\d+)?\s*】`, "mg"); // ocn of origin would be useful in endnote section + static inline_notes_al_regular_number_note = ctRegex!(`【(?P\d+)\s+(?P.+?)\s*(≫\s\d+)?\s*】`, "mg"); // ocn of origin would be useful in endnote section + // static inline_notes_al_all_note = ctRegex!(`【(?P\d+|(?:[*]|[+])+)\s+(?P.+?)\s*】`, "mg"); + // static inline_notes_al_regular_number_note = ctRegex!(`【(?P\d+)\s+(?P.+?)\s*】`, "mg"); static inline_notes_al_special_char_note = ctRegex!(`【(?P(?:[*]|[+])+)\s+(?P.+?)】`, "mg"); static inline_al_delimiter_open_regular = ctRegex!(`【\s`, "m"); static inline_al_delimiter_open_symbol_star = ctRegex!(`【[*]\s`, "m"); static inline_al_delimiter_open_symbol_plus = ctRegex!(`【[+]\s`, "m"); static inline_text_and_note_al_ = ctRegex!(`(.+?(?:【[*+]*\s+.+?】|.+))`, "mg"); + static endnote_section_note = ctRegex!(`┥\s*⑆\^┨(?P\d+)\.┣\^┝┤(?P¤?.+?)├.+`, "mg"); /+ inline markup links +/ static inline_image = ctRegex!(`(?P
┥)☼(?P(?P[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P\d+)h(?P\d+))\s*(?P.*?┝┤.*?├)`, "mg");
     static inline_image_without_dimensions          = ctRegex!(`(?P
┥)☼(?P(?P[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P0)h(?P0))\s*(?P.*?┝┤.*?├)`, "mg");
     static inline_image_info                        = ctRegex!(`☼?(?P[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P\d+)h(?P\d+)`, "mg");
     static inline_link_anchor                       = ctRegex!(`┃(?P\S+?)┃`, "mg"); // TODO *~text_link_anchor
-    // space cleaning should not be necessary
-    static inline_link                              = ctRegex!(`┥\s*(?P.+?)\s*┝┤(?P#?(\S+?))├`, "mg");
-    static inline_link_empty                        = ctRegex!(`┥\s*(?P.+?)\s*┝┤├`, "mg");
-    static inline_link_number                       = ctRegex!(`┥\s*(?P.+?)\s*┝┤(?P[0-9]+)├`, "mg"); // not used
-    static inline_link_number_only                  = ctRegex!(`\s*(?P\s*┥.+?┝)┤(?P[0-9]+)├`, "mg");
-    static inline_link_stow_uri                     = ctRegex!(`┥\s*(?P.+?)\s*┝┤(?P[^ 0-9#┥┝┤├][^ 0-9┥┝┤├]+)├`, "mg"); // will not stow (stowed links) or object number internal links
-    static inline_link_hash                         = ctRegex!(`┥\s*(?P.+?)\s*┝┤(?P#(?P\S+?))├`, "mg");
-    static inline_link_seg_and_hash                 = ctRegex!(`┥\s*(?P.+?)\s*┝┤(?P(?P[^/#├]*)#(?P.+?))├`, "mg");
+    static inline_link                              = ctRegex!(`┥(?P.+?)┝┤(?P#?(\S+?))├`, "mg");
+    static inline_link_empty                        = ctRegex!(`┥(?P.+?)┝┤├`, "mg");
+    static inline_link_number                       = ctRegex!(`┥(?P.+?)┝┤(?P[0-9]+)├`, "mg"); // not used
+    static inline_link_number_only                  = ctRegex!(`(?P┥.+?┝)┤(?P[0-9]+)├`, "mg");
+    static inline_link_stow_uri                     = ctRegex!(`┥(?P.+?)┝┤(?P[^ 0-9#┥┝┤├][^ 0-9┥┝┤├]+)├`, "mg"); // will not stow (stowed links) or object number internal links
+    static inline_link_hash                         = ctRegex!(`┥(?P.+?)┝┤(?P#(?P\S+?))├`, "mg");
+    static inline_link_seg_and_hash                 = ctRegex!(`┥(?P.+?)┝┤(?P(?P[^/#├]*)#(?P.+?))├`, "mg");
     static inline_link_clean                        = ctRegex!(`┤(?:.+?)├|[┥┝]`, "mg");
     static inline_link_toc_to_backmatter            = ctRegex!(`┤#(?Pendnotes|bibliography|bookindex|glossary|blurb)├`, "mg");
     static url                                      = ctRegex!(`https?://`, "mg");
diff --git a/src/sisudoc/meta/metadoc_from_src.d b/src/sisudoc/meta/metadoc_from_src.d
index 904444a..4240a3f 100644
--- a/src/sisudoc/meta/metadoc_from_src.d
+++ b/src/sisudoc/meta/metadoc_from_src.d
@@ -770,7 +770,7 @@ template docAbstraction() {
                 comp_obj_.has.inline_links                      = substantive_obj_misc_struct.has_links;
                 comp_obj_.has.image_without_dimensions          = substantive_obj_misc_struct.has_images_without_dimensions;
                 the_document_body_section                       ~= comp_obj_;
-                tag_assoc                                           = an_object.inline_para_link_anchor(tag_in_seg, tag_assoc);
+                tag_assoc                                       = an_object.inline_para_link_anchor(tag_in_seg, tag_assoc);
                 {
                   ST_txt_by_line_common_reset _get = txt_by_line_common_reset_(line_occur, an_object, pith);
                   {
diff --git a/src/sisudoc/meta/metadoc_from_src_functions.d b/src/sisudoc/meta/metadoc_from_src_functions.d
index 6718e82..53e494b 100644
--- a/src/sisudoc/meta/metadoc_from_src_functions.d
+++ b/src/sisudoc/meta/metadoc_from_src_functions.d
@@ -4233,10 +4233,30 @@ template docAbstractionFunctions() {
     int                   html_segnames_ptr_cntr,
     int                   html_segnames_ptr,
   ) {
+    string[string][string] notes_;
+    if (the_document_body_section.length > 1) {
+      string _notes;
+      foreach (ref obj; the_document_body_section) {
+        if (obj.has.inline_notes_reg) {
+          if ((obj.text).matchFirst(rgx.inline_notes_al_gen)) {
+            foreach (m; (obj.text).matchAll(rgx.inline_notes_al_regular_number_note)) {
+              _notes ~= "\n\n" ~ m["num"] ~ ". " ~ m["note"] ~ " ≫" ~ obj.metainfo.ocn.to!string;
+              notes_[(m["num"])]["ocn"] = obj.metainfo.ocn.to!string;
+            }
+          }
+        }
+      }
+    }
     if (the_document_endnotes_section.length > 1) {
       segnames["html"] ~= "endnotes";
       segnames["epub"] ~= "endnotes";
       html_segnames_ptr = html_segnames_ptr_cntr;
+      foreach (ref obj; the_document_endnotes_section) {
+        auto matches = (obj.text).matchAll(rgx.endnote_section_note);
+        foreach (m; matches) {
+          obj.text = m.hit ~ " ≫" ~ notes_[(m["notenumber"])]["ocn"];
+        }
+      }
       foreach (ref obj; the_document_endnotes_section) {
         if (obj.metainfo.is_a == "heading") {
           obj.metainfo.parent_ocn = obj.metainfo.markedup_ancestors[obj.metainfo.parent_lev_markup];
diff --git a/src/sisudoc/meta/rgx.d b/src/sisudoc/meta/rgx.d
index 1a26f73..fcac959 100644
--- a/src/sisudoc/meta/rgx.d
+++ b/src/sisudoc/meta/rgx.d
@@ -229,28 +229,28 @@ static template spineRgxIn() {
     static inline_notes_al_special                  = ctRegex!(`【(?:[*+]\s+)(.+?)】`, "mg"); // TODO remove match when special footnotes are implemented
     static inline_notes_al_gen                      = ctRegex!(`【.+?】`, "m");
     static inline_notes_al_gen_text                 = ctRegex!(`【(?P.+?)】`, "m");
-    static inline_notes_al_all_note                 = ctRegex!(`【(?P\d+|(?:[*]|[+])+)\s+(?P.+?)\s*】`, "mg");
-    static inline_notes_al_regular_number_note      = ctRegex!(`【(?P\d+)\s+(?P.+?)\s*】`, "mg");
-    // static inline_notes_al_all_note                 = ctRegex!(`【(?P\d+|(?:[*]|[+])+)\s+(?P.+?)\s*(≫\s\d+)?\s*】`, "mg"); // ocn of origin would be useful in endnote section
-    // static inline_notes_al_regular_number_note      = ctRegex!(`【(?P\d+)\s+(?P.+?)\s*(≫\s\d+)?\s*】`, "mg"); // ocn of origin would be useful in endnote section
+    static inline_notes_al_all_note                 = ctRegex!(`【(?P\d+|(?:[*]|[+])+)\s+(?P.+?)\s*(≫\s\d+)?\s*】`, "mg"); // ocn of origin would be useful in endnote section
+    static inline_notes_al_regular_number_note      = ctRegex!(`【(?P\d+)\s+(?P.+?)\s*(≫\s\d+)?\s*】`, "mg"); // ocn of origin would be useful in endnote section
+    // static inline_notes_al_all_note                 = ctRegex!(`【(?P\d+|(?:[*]|[+])+)\s+(?P.+?)\s*】`, "mg");
+    // static inline_notes_al_regular_number_note      = ctRegex!(`【(?P\d+)\s+(?P.+?)\s*】`, "mg");
     static inline_notes_al_special_char_note        = ctRegex!(`【(?P(?:[*]|[+])+)\s+(?P.+?)】`, "mg");
     static inline_al_delimiter_open_regular         = ctRegex!(`【\s`, "m");
     static inline_al_delimiter_open_symbol_star     = ctRegex!(`【[*]\s`, "m");
     static inline_al_delimiter_open_symbol_plus     = ctRegex!(`【[+]\s`, "m");
     static inline_text_and_note_al_                 = ctRegex!(`(.+?(?:【[*+]*\s+.+?】|.+))`, "mg");
+    static endnote_section_note                     = ctRegex!(`┥\s*⑆\^┨(?P\d+)\.┣\^┝┤(?P¤?.+?)├.+`, "mg");
     /+ inline markup links +/
     static inline_image                             = ctRegex!(`(?P
┥)☼(?P(?P[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P\d+)h(?P\d+))\s*(?P.*?┝┤.*?├)`, "mg");
     static inline_image_without_dimensions          = ctRegex!(`(?P
┥)☼(?P(?P[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P0)h(?P0))\s*(?P.*?┝┤.*?├)`, "mg");
     static inline_image_info                        = ctRegex!(`☼?(?P[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P\d+)h(?P\d+)`, "mg");
     static inline_link_anchor                       = ctRegex!(`┃(?P\S+?)┃`, "mg"); // TODO *~text_link_anchor
-    // space cleaning should not be necessary
-    static inline_link                              = ctRegex!(`┥\s*(?P.+?)\s*┝┤(?P#?(\S+?))├`, "mg");
-    static inline_link_empty                        = ctRegex!(`┥\s*(?P.+?)\s*┝┤├`, "mg");
-    static inline_link_number                       = ctRegex!(`┥\s*(?P.+?)\s*┝┤(?P[0-9]+)├`, "mg"); // not used
-    static inline_link_number_only                  = ctRegex!(`\s*(?P\s*┥.+?┝)┤(?P[0-9]+)├`, "mg");
-    static inline_link_stow_uri                     = ctRegex!(`┥\s*(?P.+?)\s*┝┤(?P[^ 0-9#┥┝┤├][^ 0-9┥┝┤├]+)├`, "mg"); // will not stow (stowed links) or object number internal links
-    static inline_link_hash                         = ctRegex!(`┥\s*(?P.+?)\s*┝┤(?P#(?P\S+?))├`, "mg");
-    static inline_link_seg_and_hash                 = ctRegex!(`┥\s*(?P.+?)\s*┝┤(?P(?P[^/#├]*)#(?P.+?))├`, "mg");
+    static inline_link                              = ctRegex!(`┥(?P.+?)┝┤(?P#?(\S+?))├`, "mg");
+    static inline_link_empty                        = ctRegex!(`┥(?P.+?)┝┤├`, "mg");
+    static inline_link_number                       = ctRegex!(`┥(?P.+?)┝┤(?P[0-9]+)├`, "mg"); // not used
+    static inline_link_number_only                  = ctRegex!(`(?P┥.+?┝)┤(?P[0-9]+)├`, "mg");
+    static inline_link_stow_uri                     = ctRegex!(`┥(?P.+?)┝┤(?P[^ 0-9#┥┝┤├][^ 0-9┥┝┤├]+)├`, "mg"); // will not stow (stowed links) or object number internal links
+    static inline_link_hash                         = ctRegex!(`┥(?P.+?)┝┤(?P#(?P\S+?))├`, "mg");
+    static inline_link_seg_and_hash                 = ctRegex!(`┥(?P.+?)┝┤(?P(?P[^/#├]*)#(?P.+?))├`, "mg");
     static inline_link_clean                        = ctRegex!(`┤(?:.+?)├|[┥┝]`, "mg");
     static inline_link_toc_to_backmatter            = ctRegex!(`┤#(?Pendnotes|bibliography|bookindex|glossary|blurb)├`, "mg");
     static url                                      = ctRegex!(`https?://`, "mg");
-- 
cgit v1.2.3