/+
- Name: Spine, Doc Reform [a part of]
  - Description: documents, structuring, processing, publishing, search
    - static content generator
  - Author: Ralph Amissah
    [ralph.amissah@gmail.com]
  - Copyright: (C) 2015 - 2022 Ralph Amissah, All Rights
    Reserved.
  - License: AGPL 3 or later:
    Spine (SiSU), a framework for document structuring, publishing and
    search
    Copyright (C) Ralph Amissah
    This program is free software: you can redistribute it and/or modify it
    under the terms of the GNU AFERO General Public License as published by the
    Free Software Foundation, either version 3 of the License, or (at your
    option) any later version.
    This program is distributed in the hope that it will be useful, but WITHOUT
    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
    more details.
    You should have received a copy of the GNU General Public License along with
    this program. If not, see [https://www.gnu.org/licenses/].
    If you have Internet connection, the latest version of the AGPL should be
    available at these locations:
    [https://www.fsf.org/licensing/licenses/agpl.html]
    [https://www.gnu.org/licenses/agpl.html]
  - Spine (by Doc Reform, related to SiSU) uses standard:
    - docReform markup syntax
      - standard SiSU markup syntax with modified headers and minor modifications
    - docReform object numbering
      - standard SiSU object citation numbering & system
  - Homepages:
    [https://www.doc_reform.org]
    [https://www.sisudoc.org]
  - Git
    [https://git.sisudoc.org/projects/?p=software/spine.git;a=summary]
+/
/++
  module source_read_source_files;
  - open markup files
  - if master file scan for addional files to import/insert
+/
module doc_reform.io_in.read_source_files;
template spineRawMarkupContent() {
  import
    std.file,
    std.path,
    doc_reform.meta,
    doc_reform.io_in.paths_source,
    doc_reform.meta.rgx;
  mixin spineRgxIn;
  static auto rgx = RgxI();
  string[] _images=[];
  @safe string[] _extract_images(S)(S content_block) {
    string[] images_;
    string _content_block = content_block.to!string;
    if (auto m = _content_block.matchAll(rgx.image)) {
      images_ ~= m.captures[1].to!string;
    }
    return images_;
  }
  auto rawsrc = RawMarkupContent();
  alias ContentsInsertsImages = Tuple!(
    char[][], "contents",
    string[], "insert_files",
    string[], "images"
  );
  alias HeaderContentInsertsImages = Tuple!(
    char[],   "header",
    char[][], "src_txt",
    string[], "insert_files",
    string[], "images"
  );
  @safe auto spineRawMarkupContent(O,Fn)(O _opt_action, Fn fn_src) {
    auto _0_header_1_body_content_2_insert_filelist_tuple
      = rawsrc.sourceContentSplitIntoHeaderAndBody(_opt_action, rawsrc.sourceContent(fn_src), fn_src);
    return _0_header_1_body_content_2_insert_filelist_tuple;
  }
  struct RawMarkupContent {
    @safe final sourceContent(in string fn_src) {
      auto raw = MarkupRawUnit();
      string source_txt_str
        = raw.markupSourceReadIn(fn_src);
      return source_txt_str;
    }
    @safe final auto sourceContentSplitIntoHeaderAndBody(O)(
      O         _opt_action,
      in string source_txt_str,
      in string fn_src=""
    ) {
      auto raw = MarkupRawUnit();
      string[] insert_file_list;
      string[] images_list;
      HeaderContentInsertsImages t
        = raw.markupSourceHeaderContentRawLineTupleArray(source_txt_str);
      char[] header_raw = t.header;
      char[][] sourcefile_body_content = t.src_txt;
      if (fn_src.match(rgx.src_fn_master)) { // filename with path needed if master file (.ssm) not otherwise
        auto ins = Inserts();
        ContentsInsertsImages tu
          = ins.scan_master_src_for_insert_files_and_import_content(_opt_action, sourcefile_body_content, fn_src);
        sourcefile_body_content = tu.contents;
        insert_file_list = tu.insert_files.dup;
        images_list = tu.images.dup;
      } else if (_opt_action.source || _opt_action.pod) {
        auto ins = Inserts();
        ContentsInsertsImages tu
          = ins.scan_master_src_for_insert_files_and_import_content(_opt_action, sourcefile_body_content, fn_src);
        images_list = tu.images.dup;
      }
      string header_type = "";
      t = tuple(
        header_raw,
        sourcefile_body_content,
        insert_file_list,
        images_list
      );
      return t;
    }
  }
  struct MarkupRawUnit {
    import std.file;
    @safe final private string readInMarkupSource(in char[] fn_src) {
      enforce(
        exists(fn_src) != 0,
        "file not found: «" ~
        fn_src ~ "»"
      );
      string source_txt_str;
      try {
        if (exists(fn_src)) {
          if (fn_src.getLinkAttributes.attrIsFile) {
            source_txt_str = fn_src.readText;
          } else {
          }
        }
      } catch (ErrnoException ex) {
      } catch (UTFException ex) {
        // Handle validation errors
      } catch (FileException ex) {
        // Handle errors
      }
      std.utf.validate(source_txt_str);
      return source_txt_str;
    }
    @trusted final private char[][] header0Content1(in string src_text) { // cast(char[])
      /+ split string on _first_ match of "^:?A~\s" into [header, content] array/tuple +/
      char[][] header_and_content;
      auto m = (cast(char[]) src_text).matchFirst(rgx.heading_a);
      header_and_content ~= m.pre;
      header_and_content ~= m.hit ~ m.post;
      assert(header_and_content.length == 2,
        "document markup is broken, header body split == "
        ~ header_and_content.length.to!string
        ~ "; (header / body array split should == 2 (split is on level A~))"
      );
      return header_and_content;
    }
    @trusted final private char[][] markupSourceLineArray(in char[] src_text) { // cast(char[])
      char[][] source_line_arr
        = (cast(char[]) src_text).split(rgx.newline_eol_strip_preceding);
      return source_line_arr;
    }
    @safe string markupSourceReadIn(in string fn_src) {
      static auto rgx = RgxI();
      enforce(
        fn_src.match(rgx.src_pth_sst_or_ssm),
        "not a dr markup filename: «" ~
        fn_src ~ "»"
      );
      string source_txt_str = readInMarkupSource(fn_src);
      return source_txt_str;
    }
    @safe HeaderContentInsertsImages markupSourceHeaderContentRawLineTupleArray(in string source_txt_str) {
      string[] file_insert_list = [];
      string[] images_list = [];
      char[][] hc = header0Content1(source_txt_str);
      char[] header = hc[0];
      char[] source_txt = hc[1];
      char[][] source_line_arr = markupSourceLineArray(source_txt);
      HeaderContentInsertsImages t = tuple(
        header,
        source_line_arr,
        file_insert_list,
        images_list
      );
      return t;
    }
    @safe final char[][] getInsertMarkupSourceContentRawLineArray(
      in char[]    fn_src_insert,
      Regex!(char) rgx_file
    ) {
      enforce(
        fn_src_insert.match(rgx_file),
        "not a dr markup filename: «" ~
        fn_src_insert  ~ "»"
      );
      string source_txt_str = readInMarkupSource(fn_src_insert);
      char[][] source_line_arr = markupSourceLineArray(source_txt_str);
      return source_line_arr;
    }
  }
  struct Inserts {
    alias ContentsAndImages = Tuple!(
      char[][], "insert_contents",
      string[], "images"
    );
    @safe ContentsAndImages scan_subdoc_source(O)(
      O        _opt_action,
      char[][] markup_sourcefile_insert_content,
      string   fn_src
    ) {
      char[][] contents_insert;
      int code_block_status     = 0;
      enum codeBlock { off, curly, tic, }
      auto fn_pth_full = fn_src.match(rgx.src_pth_sst_or_ssm);
      auto markup_src_file_path = fn_pth_full.captures[1];
      foreach (line; markup_sourcefile_insert_content) {
        if (code_block_status == codeBlock.curly) {
          if (line.matchFirst(rgx.block_curly_code_close)) {
            code_block_status = codeBlock.off;
          }
          contents_insert ~= line;
        } else if (line.matchFirst(rgx.block_curly_code_open)) {
          code_block_status   = codeBlock.curly;
          contents_insert ~= line;
        } else if (code_block_status == codeBlock.tic) {
          if (line.matchFirst(rgx.block_tic_close)) {
            code_block_status = codeBlock.off;
          }
          contents_insert ~= line;
        } else if (line.matchFirst(rgx.block_tic_code_open)) {
          code_block_status   = codeBlock.tic;
          contents_insert ~= line;
        } else if (auto m = line.match(rgx.insert_src_fn_ssi_or_sst)) {
          auto insert_fn = m.captures[2];
          auto insert_sub_pth = m.captures[1];
          auto fn_src_insert
            = chainPath(markup_src_file_path, insert_sub_pth ~ insert_fn).array;
          auto raw = MarkupRawUnit();
          auto markup_sourcesubfile_insert_content
            = raw.getInsertMarkupSourceContentRawLineArray(fn_src_insert, rgx.src_fn_find_inserts);
          debug(insert_file) {
            writeln(line);
            writeln(fn_src_insert);
            writeln(
              "  length contents insert array: ",
              markup_sourcesubfile_insert_content.length
            );
          }
          if (_opt_action.source || _opt_action.pod) {
            _images ~= _extract_images(markup_sourcesubfile_insert_content);
          }
          auto ins = Inserts();
          /+
            - 1. load file
            - 2. read lines
            - 3. scan lines
              - a. if filename insert, and insert filename
                 - repeat 1
              - b. else
                 - add line to new array;
                 - build image list, search for any image files to add to image list
          +/
        } else {
          contents_insert ~= line; // images to extract for image list?
          if (_opt_action.source || _opt_action.pod) {
            string[] _image_linelist = _extract_images(line);
            if (_image_linelist.length > 0) {
              _images ~= _image_linelist;
            }
          }
        }
      } // end src subdoc (inserts) loop
      ContentsAndImages t = tuple(
        contents_insert,
        _images
      );
      return t;
    }
    @safe ContentsInsertsImages scan_master_src_for_insert_files_and_import_content(O)(
      O        _opt_action,
      char[][] sourcefile_body_content,
      string   fn_src
    ) {
      import std.algorithm;
      char[][] contents;
      int code_block_status     = 0;
      enum codeBlock { off, curly, tic, }
      auto fn_pth_full = fn_src.match(rgx.src_pth_sst_or_ssm);
      auto markup_src_file_path = fn_pth_full.captures[1];
      char[][] contents_insert;
      string[] _images          =[];
      string[] insert_file_list =[];
      foreach (line; sourcefile_body_content) {
        if (code_block_status == codeBlock.curly) {
          if (line.matchFirst(rgx.block_curly_code_close)) {
            code_block_status = codeBlock.off;
          }
          contents ~= line;
        } else if (line.matchFirst(rgx.block_curly_code_open)) {
          code_block_status = codeBlock.curly;
          contents ~= line;
        } else if (code_block_status == codeBlock.tic) {
          if (line.matchFirst(rgx.block_tic_close)) {
            code_block_status = codeBlock.off;
          }
          contents ~= line;
        } else if (line.matchFirst(rgx.block_tic_code_open)) {
          code_block_status = codeBlock.tic;
          contents ~= line;
        } else if (auto m = line.match(rgx.insert_src_fn_ssi_or_sst)) {
          auto insert_fn      = m.captures[2];
          auto insert_sub_pth = m.captures[1];
          auto fn_src_insert
            = chainPath(markup_src_file_path, insert_sub_pth ~ insert_fn).array;
            insert_file_list ~= fn_src_insert.to!string;
          auto raw = MarkupRawUnit();
          /+ TODO +/
          auto markup_sourcefile_insert_content
            = raw.getInsertMarkupSourceContentRawLineArray(fn_src_insert, rgx.src_fn_find_inserts);
          debug(insert_file) {
            writeln(line);
            writeln(fn_src_insert);
            writeln(
              "  length contents insert array: ",
              markup_sourcefile_insert_content.length
            );
          }
          auto ins = Inserts();
          ContentsAndImages contents_insert_tu = ins.scan_subdoc_source(
            _opt_action,
            markup_sourcefile_insert_content,
            fn_src_insert.to!string
          );
          contents ~= contents_insert_tu.insert_contents;
          if (_opt_action.source || _opt_action.pod) {
            string[] _image_linelist = _extract_images(contents_insert_tu.images);
            if (_image_linelist.length > 0) {
              _images ~= _image_linelist;
            }
          }
          /+
            - 1. load file
            - 2. read lines
            - 3. scan lines
              - a. if filename insert, and insert filename
                 - repeat 1
              - b. else
                 - add line to new array;
                 - build image list, search for any image files to add to image list
          +/
        } else {
          contents ~= line;
          if (_opt_action.source || _opt_action.pod) {
            string[] _image_linelist = _extract_images(line);
            if (_image_linelist.length > 0) {
              _images ~= _image_linelist;
            }
          }
        }
      } // end src doc loop
      string[] images = [];
      foreach(i; uniq(_images.sort())) {
        images ~= i;
      }
      debug(insert_file) {
        writeln(__LINE__);
        writeln(contents.length);
      }
      ContentsInsertsImages t = tuple(
        contents,
        insert_file_list,
        images
      );
      return t;
    }
  }
}