# encoding: utf-8 =begin * Name: SiSU ** Description: documents, structuring, processing, publishing, search *** document abstraction ** Author: Ralph Amissah [ralph@amissah.com] [ralph.amissah@gmail.com] ** Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Ralph Amissah, All Rights Reserved. ** License: GPL 3 or later: SiSU, a framework for document structuring, publishing and search Copyright (C) Ralph Amissah This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see [http://www.gnu.org/licenses/]. If you have Internet connection, the latest version of the GPL should be available at these locations: [http://www.fsf.org/licensing/licenses/gpl.html] [http://www.gnu.org/licenses/gpl.html] ** SiSU uses: * Standard SiSU markup syntax, * Standard SiSU meta-markup syntax, and the * Standard SiSU object citation numbering and system ** Hompages: [http://www.jus.uio.no/sisu] [http://www.sisudoc.org] ** Git [http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=summary] [http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=blob;f=lib/sisu/ao_doc_str.rb;hb=HEAD] =end module SiSU_AO_DocumentStructureExtract require_relative 'ao_persist' # ao_persist.rb class Instantiate < SiSU_Param::Parameters::Instructions def initialize @@counter=@@column=@@columns=0 @@line_mode='' end end class Build def initialize(md,data) @md,@data=md,data SiSU_AO_DocumentStructureExtract::Instantiate.new @pb=SiSU_AO_DocumentStructure::ObjectLayout.new.break(Hx[:br_page]) @pbn=SiSU_AO_DocumentStructure::ObjectLayout.new.break(Hx[:br_page_new]) @pbl=SiSU_AO_DocumentStructure::ObjectLayout.new.break(Hx[:br_page_line]) @per=SiSU_AO_Persist::PersistDocStructExt.new @make=SiSU_Env::ProcessingSettings.new(@md) end def ln_get(lv) case lv when /A/ then 0 when /B/ then 1 when /C/ then 2 when /D/ then 3 when /1/ then 4 when /2/ then 5 when /3/ then 6 when /4/ then 7 when /5/ then 8 when /6/ then 9 end end def image_test(str) str=~/\{\s*\S+?\.png.+?\}https?:\/\/\S+/ \ ? true : false end def bullet_test(str) (str=~/\*/) \ ? true : false end def quotes? @per.quote==:open \ ? true : false end def hang_and_indent_test(str) hang_indent=if str=~/^_([1-9])[^_]/ [$1,$1] elsif str=~/^__([1-9])/ [0,$1] elsif str=~/^_([0-9])_([0-9])/ [$1,$2] else [0,0] end hang,indent=hang_indent[0],hang_indent[1] [hang,indent] end def hang_and_indent_def_test(str1,str2) hang_indent=if str1=~/^_([1-9])[^_]/ [$1,$1] elsif str1=~/^__([1-9])/ [0,$1] elsif str1=~/^_([0-9])_([0-9])/ [$1,$2] else [0,0] end obj=if str2 =~/^(.+?)\s+\\\\(?:\s+|\n)/ str2.gsub(/^(.+?)(\s+\\\\(?:\s+|\n))/, "#{Mx[:fa_bold_o]}\\1#{Mx[:fa_bold_c]}\\2") else str2.gsub(/^(.+?)\n/, "#{Mx[:fa_bold_o]}\\1#{Mx[:fa_bold_c]}\n") end hang,indent=hang_indent[0],hang_indent[1] [ hang, indent, obj, ] end def endnote_test?(str) (str=~/~\{.+?\}~|~\[.+?\]~/) \ ? true : false end def extract_tags(str,nametag=nil) tags=[] if str.nil? else if str =~/(?:^|[ ])\*~([a-z0-9._-]+)(?=[ #{Mx[:br_nl]}]|$)/ str=str.gsub(/(^|[ ])\*~([a-z0-9._-]+)(?=[ #{Mx[:br_nl]}]|$)/i, "\\1#{Mx[:tag_o]}\\2#{Mx[:tag_c]}"). gsub(/ [ ]+/i,' ') tags=str.scan(/#{Mx[:tag_o]}(\S+?)#{Mx[:tag_c]}/).flatten.uniq str=str.gsub(/[ ]?#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}[ ]?/,' ') #may be issues with spaces would leave one, but "code" blocks? end tags=nametag ? (tags << nametag) : tags tags.each do |t| t.gsub!(/[^a-z0-9._-]/,'') end end [ str, tags, ] end def rgx_idx_ocn_seg @rgx_idx_ocn_seg=/(.+?)\s*[+](\d+)/ end def construct_idx_array_and_hash(idxraw) idx_array_raw=idxraw.scan(/[^;]+/) idx_hash,idx_array,idx_lst={},[],[] idx_array_raw.each do |idx| idx=idx.strip idx_lst=case idx when /\S+?\s*:/ idx_couplet_tmp=[] idx_couplet=idx.scan(/\s*[^:]+\s*/) if idx_couplet[1] =~/[|]/ idx_couplet_tmp << idx_couplet[0] << idx_couplet[1].scan(/\s*[^|]+\s*/) else idx_couplet_tmp << idx_couplet[0] << [idx_couplet[1]] end idx_couplet=idx_couplet_tmp else [idx] end term_nodes=[] idx_lst.each do |term_node| case term_node when String term_node= term_node[0].chr.capitalize + term_node[1,term_node.length] term_node=(term_node =~/.+?[+]\d+/) \ ? term_node : (term_node + '+0') term_nodes << term_node use,plus=rgx_idx_ocn_seg.match(term_node)[1,2] @use=use.strip unless idx_hash[@use] \ and defined? idx_hash[@use] idx_hash[@use]= { sub: [], plus: plus } end when Array subterm_nodes=[] term_node.each do |subterm_node| subterm_node=(subterm_node =~/.+?[+]\d+/) \ ? subterm_node : (subterm_node + '+0') subterm_nodes << subterm_node sub,sub_plus=rgx_idx_ocn_seg.match(subterm_node)[1,2] unless idx_hash[@use] \ and defined? idx_hash[@use] idx_hash[@use]= { sub: [], plus: 0 } end idx_hash[@use][:sub] << { sub.strip => { plus: sub_plus } } end term_nodes << subterm_nodes end end idx_array << term_nodes end { hash: idx_hash, array: idx_array, } end def extract_structure_loop(data,tuned_file) data.each do |t_o| if t_o =~/^--([+~-])[#]$/ h=case $1 when /[+]/ @per.ocn=:on { flag: :ocn_on, } when /[~]/ @per.ocn=:ocn_off_headings_keep { flag: :ocn_off, mod: :headings_keep, } when /[-]/ #of particular relevance with level 1~ which is required to precede substantive text & used e.g. in html segmented text @per.ocn=:ocn_off_headings_dummy_lev1 { flag: :ocn_off, mod: :headings_exclude, } else @per.ocn=:on { flag: :ocn_on, } end t_o=SiSU_AO_DocumentStructure::ObjectFlag.new.flag_ocn(h) next end if t_o =~/^:[~](#{SiSU_is.language_list_regex?}|-)$/ # work with for identifying language of objects lng=$1 h=case lng when /(?:#{SiSU_is.language_list_regex?})/ @per.lng=:on @per.lng_is=lng.to_sym { flag: :lng_on, act: lng.to_sym, } else # ^:~- if @per.lng==:on @per.lng=:off @per.lng_is=:doc_default { flag: :lng_off, act: :doc_default, } end end t_o=SiSU_AO_DocumentStructure::ObjectFlag.new.flag_lng(h) next end t_o=t_o.gsub(/(?:\n\s*\n)+/m,"\n") if @per.code==:off unless t_o =~/^(?:@\S+?:|%+)\s/ # extract book index for paragraph if any idx=if t_o=~/^=\{\s*(.+)\s*\}\s*$\Z/m m=$1 m=m.split(/[ ]*\n/).join(' '). gsub(/\s+([|:;])\s+/,'\1'). gsub(/\s+([+]\d+)\s+/,'\1') t_o=t_o.gsub(/\n=\{.+?\}\s*$/m,'') idx_array_and_hash=construct_idx_array_and_hash(m) idx_array_and_hash[:hash] else nil end end if t_o !~/^(?:code(?:\.[a-z][0-9a-z_]+)?|box(?:\.[a-z_]+)?|poem|alt|group|block)\{|^\}(?:code|poem|alt|group|block)|^(?:table\{|\{table)[ ~]/ \ and t_o !~/^```[ ]+(?:code(?:\.[a-z][0-9a-z_]+)?|box(?:\.[a-z_]+)?|poem|alt|group|block|table)|^```(?:\s+[~-][#]|\s+\~\{.+?\}\~)?\s*$|^`:quote_(?:open|close)`/ \ and @per.code==:off \ and @per.poem==:off \ and @per.group==:off \ and @per.block==:off \ and @per.alt==:off \ and @per.box==:off \ and @per.table==:off t_o=case t_o when /^#{Mx[:meta_o]}\S+?#{Mx[:meta_c]}/ #metadata, header if t_o=~/^#{Mx[:meta_o]}(\S+?)#{Mx[:meta_c]}\s*(.+)/m tag,obj=$1,$2 @metadata[tag]=obj end t_o=nil when /^%+\s/ #comment t_o=if t_o=~/^%+\s+(.+)/ h={ obj: $1 } SiSU_AO_DocumentStructure::ObjectComment.new.comment(h) else nil end when /^:?([A-D1-6])\~/ #heading / lv lv=$1 ln=ln_get(lv) t_o=if t_o=~/^:?[A-D1-6]\~\s+(.+)/m obj=$1 note=endnote_test?(obj) obj,tags=extract_tags(obj) if @per.ocn==:ocn_off_headings_dummy_lev1 \ or @per.ocn==:ocn_off_headings_keep unless obj =~ /[~-][#]\s*$/ if @per.ocn==:ocn_off_headings_dummy_lev1 \ and t_o =~/^1\~\S*\s+/m obj << ' -#' elsif @per.ocn==:ocn_off_headings_dummy_lev1 \ or @per.ocn==:ocn_off_headings_keep obj << ' ~#' end end end h={ lv: lv, ln: ln, obj: obj, idx: idx, tags: tags, } SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h) elsif t_o=~/^:?[A-D1-6]\~(\S+?)-\s+(.+)/m name,obj=$1,$2 note=endnote_test?(obj) obj,tags=extract_tags(obj) if @per.ocn==:ocn_off_headings_dummy_lev1 \ or @per.ocn==:ocn_off_headings_keep unless obj =~ /[~-][#]\s*$/ if @per.ocn==:ocn_off_headings_dummy_lev1 \ and t_o =~/^1\~\S*\s+/m obj << ' -#' elsif @per.ocn==:ocn_off_headings_dummy_lev1 \ or @per.ocn==:ocn_off_headings_keep obj << ' ~#' end end end h={ lv: lv, name: name, obj: obj, idx: idx, autonum_: false, tags: tags, } SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h) elsif t_o=~/^:?[A-D1-6]\~(\S+)\s+(.+)/m name,obj=$1,$2 note=endnote_test?(obj) obj,tags=extract_tags(obj,name) if @per.ocn==:ocn_off_headings_dummy_lev1 \ or @per.ocn==:ocn_off_headings_keep unless obj =~ /[~-][#]\s*$/ if @per.ocn==:ocn_off_headings_dummy_lev1 \ and t_o =~/^1\~\S*\s+/m obj << ' -#' elsif @per.ocn==:ocn_off_headings_dummy_lev1 \ or @per.ocn==:ocn_off_headings_keep obj << ' ~#' end end end h={ lv: lv, name: name, obj: obj, idx: idx, tags: tags, } SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h) else nil end when /^_(?:[1-9]!?|[1-9]?\*)\s+/ #indented and/or bullet paragraph t_o=if t_o=~/^(_(?:[1-9]?\*|[1-9]!?)\s+)(.+)/m tst,obj=$1,$2 if t_o=~/^_[1-9]!\s+.+/m hang,indent,obj=hang_and_indent_def_test(tst,obj) else hang,indent=hang_and_indent_test(tst) end bullet=bullet_test(tst) image=image_test(obj) note=endnote_test?(obj) obj,tags=extract_tags(obj) unless obj=~/\A\s*\Z/m if @per.ocn==:ocn_off_headings_dummy_lev1 \ or @per.ocn==:ocn_off_headings_keep unless obj =~ /[~-][#]\s*$/ obj << ' ~#' end end h={ bullet_: bullet, hang: hang, indent: indent, obj: obj, idx: idx, note_: note, image_: image, tags: tags, quote: quotes?, } SiSU_AO_DocumentStructure::ObjectPara.new.paragraph(h) end else nil end when /^_[0-9]?_[0-9]!?\s+/ #hanging indent paragraph t_o=if t_o=~/^(_[0-9]?_[0-9]!?\s+)(.+)/m tst,obj=$1,$2 if t_o=~/^_[0-9]?_[0-9]!\s+.+/m hang,indent,obj=hang_and_indent_def_test(tst,obj) else hang,indent=hang_and_indent_test(tst) end image=image_test(obj) note=endnote_test?(obj) obj,tags=extract_tags(obj) unless obj=~/\A\s*\Z/m if @per.ocn==:ocn_off_headings_dummy_lev1 \ or @per.ocn==:ocn_off_headings_keep unless obj =~ /[~-][#]\s*$/ obj << ' ~#' end end h={ hang: hang, indent: indent, obj: obj, idx: idx, note_: note, image_: image, tags: tags, quote: quotes?, } SiSU_AO_DocumentStructure::ObjectPara.new.paragraph(h) end else nil end when /^<(?:br)?:(?:pa?r|o(?:bj|---)?)>\s*$/ #[br:par] #[br:obj] SiSU_AO_DocumentStructure::ObjectLayout.new.break(Hx[:br_obj]) when /^(?:-\\\\-|<:pb>)\s*$/ #[br:pg] SiSU_AO_DocumentStructure::ObjectLayout.new.break(Hx[:br_page],:markup) when /^(?:=\\\\=|<:pn>)\s*$/ #[br:pgn] SiSU_AO_DocumentStructure::ObjectLayout.new.break(Hx[:br_page_new],:markup) when /^-\.\.-\s*$/ #[br:pgl] SiSU_AO_DocumentStructure::ObjectLayout.new.break(Hx[:br_page_line],:markup) else #paragraph image=image_test(t_o) note=endnote_test?(t_o) obj,tags=extract_tags(t_o) if @per.ocn==:ocn_off_headings_dummy_lev1 \ or @per.ocn==:ocn_off_headings_keep unless obj =~ /[~-][#]\s*$/ obj << ' ~#' end end unless obj=~/\A\s*\Z/m h={ bullet_: false, indent: 0, hang: 0, obj: obj, idx: idx, note_: note, image_: image, tags: tags, quote: quotes?, } t_o=SiSU_AO_DocumentStructure::ObjectPara.new.paragraph(h) end t_o=SiSU_AO_DocumentStructureExtract::Structure.new(@md).structure_markup(t_o) #must happen earlier, node info etc. require end elsif @per.code==:off if t_o =~/^(?:code(?:\.[a-z][0-9a-z_]+)?\{|```[ ]+code(?:\.[a-z][0-9a-z_]+)?)/ @per.code=case t_o when /^code(?:\.[a-z][0-9a-z_]+)?\{/ then :curls when /^```[ ]+code/ then :tics else @per.code #error end @per.lngsyn=if t_o =~/^(?:code\.[a-z][0-9a-z_]+\{|```[ ]+code\.[a-z_]+)/ case t_o when /^code\.([a-z][0-9a-z_]+)\{/ :"#{$1}" when /^```[ ]+code\.([a-z][0-9a-z_]+)/ :"#{$1}" else :txt end else :txt end @@counter=1 @codeblock_numbered= (t_o =~/^(?:code(?:\.[a-z][0-9a-z_]+)?\{#|```[ ]+code(?:\.[a-z][0-9a-z_]+)?\s[#])/) \ ? true : false @num_id[:code_block] +=1 h={ is_for: :code, obj: '', sym: :code_block_open, num: @num_id[:code_block], syntax: @per.lngsyn, } t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) elsif t_o =~/^(?:poem\{|```[ ]+poem)/ @per.poem=case t_o when /^poem\{/ then :curls when /^```[ ]+poem/ then :tics else @per.poem #error end @num_id[:poem] +=1 h={ is_for: :poem, obj: '', sym: :poem_open, num: @num_id[:poem], } t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) tuned_file << t_o elsif t_o =~/^(?:box(?:\.[a-z_]+)?\{|```[ ]+box(?:\.[a-z_]+)?)/ @per.box=case t_o when /^box\{/ then :curls when /^```[ ]+box/ then :tics else @per.box #error end @num_id[:box] +=1 h={ is_for: :box, obj: '', sym: :box_open, num: @num_id[:box], } t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) tuned_file << t_o elsif t_o =~/^(?:group\{|```[ ]+group)/ @per.group=case t_o when /^group\{/ then :curls when /^```[ ]+group/ then :tics else @per.group #error end @num_id[:group] +=1 h={ is_for: :group, obj: '', sym: :group_open, num: @num_id[:group], } t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) tuned_file << t_o elsif t_o =~/^(?:block\{|```[ ]+block)/ @per.block=case t_o when /^block\{/ then :curls when /^```[ ]+block/ then :tics else @per.block #error end @num_id[:block] +=1 h={ is_for: :block, obj: '', sym: :block_open, num: @num_id[:block], } t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) tuned_file << t_o elsif t_o =~/^(?:alt\{|```[ ]+alt)/ @per.alt=case t_o when /^alt\{/ then :curls when /^```[ ]+alt/ then :tics else @per.alt #error end @num_id[:alt] +=1 h={ is_for: :alt, obj: '', sym: :alt_open, num: @num_id[:alt], } t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) tuned_file << t_o elsif t_o =~/^`:quote_open`/ @per.quote=:open @num_id[:quote] +=1 h={ is_for: :quote, obj: '', sym: :quote_open, num: @num_id[:quote], } t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) #tuned_file << t_o #% find second source, entered twice, should be once so closed off here elsif t_o =~/^(?:table\{|```[ ]+table|\{table)[ ~]/ @num_id[:table] +=1 h={ is_for: :table, obj: '', sym: :table_open, num: @num_id[:table], } ins_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) tuned_file << ins_o if t_o=~/^table\{(?:~h)?\s+/ @per.table=:curls @rows='' case t_o when /table\{~h\s+c(\d+);\s+(.+)/ cols=$1 col=$2.scan(/\d+/) heading=true when /table\{\s+c(\d+);\s+(.+)/ cols=$1 col=$2.scan(/\d+/) heading=false end @h={ head_: heading, cols: cols, widths: col, idx: idx, } elsif t_o=~/^```[ ]+table(?:~h)?\s+c\d+/ @per.table=:tics @rows='' case t_o when /^```[ ]+table~h\s+c(\d+);\s+(.+)/ cols=$1 col=$2.scan(/\d+/) heading=true when /^```[ ]+table\s+c(\d+);\s+(.+)/ cols=$1 col=$2.scan(/\d+/) heading=false end @h={ head_: heading, cols: cols, widths: col, idx: idx, } elsif t_o=~/^\{table(?:~h)?(?:\s+\d+;?)?\}\n.+\Z/m m1,m2,hd=nil,nil,nil tbl=/^\{table(?:~h)?(?:\s+\d+;?)?\}\n(.+)\Z/m.match(t_o)[1] hd=((t_o =~/^\{table~h/) ? true : false) tbl,tags=extract_tags(tbl) rws=tbl.split(/\n/) rows='' cols=nil rws.each do |r| cols=(cols ? cols : (r.scan('|').length) +1) r=r.gsub(/\s*\|\s*/m,"#{Mx[:tc_p]}") #r.gsub!(/\|/m,"#{Mx[:tc_p]}") rows += r + Mx[:tc_c] end col=[] if t_o =~/^\{table(?:~h)?\s+(\d+);?\}/ #width of col 1 given as %, usually when wider than rest that are even c1=$1.to_i width=(100 - c1)/(cols - 1) col=[ c1 ] (cols - 1).times { col << width } else #all columns of equal width width=100.00/cols cols.times { col << width } end h={ head_: hd, cols: cols, widths: col, obj: rows, idx: idx, tags: tags, num: @num_id[:table], } t_o=SiSU_AO_DocumentStructure::ObjectTable.new.table(h) \ unless h.nil? tuned_file << t_o h={ is_for: :table, obj: '', sym: :table_close, num: @num_id[:table], } t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) t_o elsif t_o=~/^```[ ]+table(?:~h)?\s+/ m1,m2,hd=nil,nil,nil h=case t_o when /^```[ ]+table~h\s+(.+?)\n(.+)\Z/m #two table representations should be consolidated as one m1,tbl,hd=$1,$2,true when /^```[ ]+table\s+(.+?)\n(.+)\Z/m #two table representations should be consolidated as one m1,tbl,hd=$1,$2,false else nil end tbl,tags=extract_tags(tbl) col=m1.scan(/\d+/) rws=tbl.split(/\n/) rows='' rws.each do |r| r=r.gsub(/\s*\|\s*/m,"#{Mx[:tc_p]}") #r.gsub!(/\|/m,"#{Mx[:tc_p]}") rows += r + Mx[:tc_c] end h={ head_: hd, cols: col.length, widths: col, obj: rows, idx: idx, tags: tags, num: @num_id[:table], } t_o=SiSU_AO_DocumentStructure::ObjectTable.new.table(h) \ unless h.nil? tuned_file << t_o h={ is_for: :table, obj: '', sym: :table_close, num: @num_id[:table], } t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) t_o elsif t_o=~/^\{table(?:~h)?\s+/ m1,m2,hd=nil,nil,nil h=case t_o when /\{table~h\s+(.+?)\}\n(.+)\Z/m #two table representations should be consolidated as one m1,tbl,hd=$1,$2,true when /\{table\s+(.+?)\}\n(.+)\Z/m #two table representations should be consolidated as one m1,tbl,hd=$1,$2,false else nil end tbl,tags=extract_tags(tbl) col=m1.scan(/\d+/) rws=tbl.split(/\n/) rows='' rws.each do |r| r=r.gsub(/\s*\|\s*/m,"#{Mx[:tc_p]}") #r.gsub!(/\|/m,"#{Mx[:tc_p]}") rows += r + Mx[:tc_c] end h={ head_: hd, cols: col.length, widths: col, obj: rows, idx: idx, tags: tags, num: @num_id[:table], } t_o=SiSU_AO_DocumentStructure::ObjectTable.new.table(h) \ unless h.nil? tuned_file << t_o h={ is_for: :table, obj: '', sym: :table_close, num: @num_id[:table], } t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) t_o end end t_o end if @per.table==:curls or @per.table==:tics if (@per.table==:curls \ and t_o =~/^\}table/) \ or (@per.table==:tics \ and t_o =~/^```(?:\s+[~-][#]|\s+\~\{.+?\}\~)?\s*$/) @per.table=:off headings,columns,widths,idx=@h[:head_],@h[:cols],@h[:widths],@h[:idx] @h={ head_: headings, cols: columns, widths: widths, idx: idx, obj: @rows, } t_o=SiSU_AO_DocumentStructure::ObjectTable.new.table(@h) tuned_file << t_o @h,@rows=nil,'' h={ is_for: :table, obj: '', sym: :table_close, num: @num_id[:table], } t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) t_o else if t_o.is_a?(String) \ and t_o !~/^(?:table\{|```[ ]+table)/ t_o=t_o.gsub(/^\n+/m,''). gsub(/\n+/m,"#{Mx[:tc_p]}") @rows += t_o + Mx[:tc_c] end t_o=nil end end if @per.code==:curls \ or @per.code==:tics if (@per.code==:curls \ && t_o =~/^\}code/) \ or (@per.code==:tics \ && t_o =~/^```(?:\s+[~-][#]|\s+\~\{.+?\}\~)?\s*$/m) @per.code=:off if @tuned_code[-1] @tuned_code[-1]. gsub!(/\s*(?:#{Mx[:br_line]}|#{Mx[:br_nl]})\s*\Z/m,'') end obj=@tuned_code.join("\n") tags=[] h={ obj: obj, idx: idx, syntax: @per.lngsyn, tags: tags, num: @num_id[:code_block], number_: @codeblock_numbered, } @per.lngsyn=:txt t_o=SiSU_AO_DocumentStructure::ObjectBlockTxt.new.code(h) @tuned_code=[] tuned_file << t_o h={ is_for: :code, obj: '', sym: :code_close, num: @num_id[:code_block], } t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) end if (@per.code==:curls \ || @per.code==:tics) \ and t_o.is_a?(String) sub_array=t_o.dup + "#{Mx[:br_nl]}" @line_mode=[] sub_array.scan(/.+/) {|w| @line_mode << w if w =~/[\S]+/} t_o=SiSU_AO_DocumentStructureExtract::Build.new(@md,@line_mode).build_lines(:code).join @tuned_code << t_o t_o=nil end elsif (@per.poem==:curls \ || @per.poem==:tics) \ or (@per.box==:curls \ || @per.box==:tics) \ or (@per.group==:curls \ || @per.group==:tics) \ or (@per.block==:curls \ || @per.block==:tics) \ or (@per.alt==:curls \ || @per.alt==:tics) \ or (@per.quote==:open \ && t_o =~/`:quote_close`/m) #not if (@per.poem==:curls \ && t_o =~/^\}poem$/m) \ or (@per.poem==:tics \ && t_o =~/^```(?:\s+[~-][#]|\s+\~\{.+?\}\~)?\s*$/) @per.poem=:off h={ is_for: :poem, obj: '', idx: idx, sym: :poem_close, num: @num_id[:poem], } t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) elsif (@per.box==:curls \ && t_o =~/^\}box/) \ or (@per.box==:tics \ && t_o =~/^```(?:\s+[~-][#]|\s+\~\{.+?\}\~)?\s*$/) @per.box=:off obj,tags=extract_tags(@tuned_block.join("\n")) h={ obj: obj, idx: idx, tags: tags, num: @num_id[:box], } @tuned_block=[] t_o=SiSU_AO_DocumentStructure::ObjectBlockTxt.new.box(h) tuned_file << t_o h={ is_for: :box, obj: '', idx: idx, sym: :box_close, num: @num_id[:box], } t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) elsif (@per.group==:curls \ && t_o =~/^\}group/) \ or (@per.group==:tics \ && t_o =~/^```(?:\s+[~-][#]|\s+\~\{.+?\}\~)?\s*$/) @per.group=:off obj,tags=extract_tags(@tuned_block.join("\n")) h={ obj: obj, idx: idx, tags: tags, num: @num_id[:group], } @tuned_block=[] t_o=SiSU_AO_DocumentStructure::ObjectBlockTxt.new.group(h) tuned_file << t_o h={ is_for: :group, obj: '', sym: :group_close, num: @num_id[:group], } t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) elsif (@per.block==:curls \ && t_o =~/^\}block/) \ or (@per.block==:tics \ && t_o =~/^```(?:\s+[~-][#]|\s+\~\{.+?\}\~)?\s*$/) @per.block=:off obj,tags=extract_tags(@tuned_block.join("\n")) h={ obj: obj, idx: idx, tags: tags, num: @num_id[:block], } @tuned_block=[] t_o=SiSU_AO_DocumentStructure::ObjectBlockTxt.new.block(h) tuned_file << t_o h={ is_for: :block, obj: '', sym: :block_close, num: @num_id[:block], } t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) elsif (@per.alt==:curls \ && t_o =~/^\}alt/) \ or (@per.alt==:tics \ && t_o =~/^```(?:\s+[~-][#]|\s+\~\{.+?\}\~)?\s*$/) @per.alt=:off obj,tags=extract_tags(@tuned_block.join("\n")) h={ obj: obj, idx: idx, tags: tags, num: @num_id[:alt], } t_o=SiSU_AO_DocumentStructure::ObjectBlockTxt.new.alt(h) @tuned_block=[] tuned_file << t_o h={ is_for: :alt, obj: '', sym: :alt_close, num: @num_id[:alt], } t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) elsif @per.quote==:open \ and t_o =~/`:quote_close`/m @per.quote=:off h={ is_for: :quote, idx: idx, obj: '', sym: :quote_close, num: @num_id[:quote], } t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) elsif @per.quote==:open t_o,tags=extract_tags(t_o) h={ indent: 1, obj: t_o, idx: idx, note_: note, image_: image, tags: tags, quote: quotes?, } SiSU_AO_DocumentStructure::ObjectPara.new.paragraph(h) end if (@per.poem==:curls \ || @per.poem==:tics) \ or (@per.group==:curls \ || @per.group==:tics) \ or (@per.alt==:curls \ || @per.alt==:tics) \ and t_o =~/\S/ \ and t_o !~/^(?:\}(?:verse|code|box|alt|group|block)|(?:verse|code(?:\.[a-z][0-9a-z_]+)?|box(?:\.[a-z_]+)?|alt|group|block)\{)/ \ and t_o !~/^```[ ]+(?:code(?:\.[a-z][0-9a-z_]+)?|box(?:\.[a-z_]+)?|poem|alt|group|block)|^```(?:\s+[~-][#]|\s+\~\{.+?\}\~)?\s*$/ # fix logic sub_array=t_o.dup @line_mode=sub_array.scan(/.+/) type=if @per.poem==:curls or @per.poem==:tics t_o=SiSU_AO_DocumentStructureExtract::Build.new(@md,@line_mode).build_lines(type).join poem=t_o.split(/\n\n/) poem.each do |v| v=v.gsub(/\n/m,"#{Mx[:br_nl]}\n") obj,tags=extract_tags(v) h={ obj: obj, tags: tags, num: @num_id[:poem], } t_o=SiSU_AO_DocumentStructure::ObjectBlockTxt.new.verse(h) tuned_file << t_o end :poem else :group end end @verse_count+=1 if @per.poem==:curls or @per.poem==:tics end if @per.code==:off if @per.poem==:curls or @per.poem==:tics \ or @per.box==:curls or @per.box==:tics \ or @per.group==:curls or @per.group==:tics \ or @per.alt==:curls or @per.alt==:tics \ or (@per.quote==:open and t_o =~/`:quote_close`/m) if t_o.is_a?(String) t_o=t_o.gsub(/\n/m,"#{Mx[:br_nl]}"). gsub(/[ ][ ]/m,"#{Mx[:nbsp]*2}"). gsub(/#{Mx[:nbsp]}\s/,"#{Mx[:nbsp]*2}") t_o=t_o + Mx[:br_nl] if t_o =~/\S+/ elsif t_o.is==:group \ || t_o.is==:block \ || t_o.is==:alt \ || t_o.is==:box \ || t_o.is==:verse t_o.obj=t_o.obj.gsub(/\n/m,"#{Mx[:br_nl]}"). gsub(/[ ][ ]/m,"#{Mx[:nbsp]*2}"). gsub(/#{Mx[:nbsp]}\s/,"#{Mx[:nbsp]*2}") end @tuned_block << t_o if t_o =~/\S+/ else tuned_file << t_o end else tuned_file << t_o end end tuned_file end def identify_parts tuned_file=[] @tuned_block,@tuned_code=[],[] @@counter,@verse_count=0,0 @num_id={ code_block: 0, poem: 0, box: 0, group: 0, alt: 0, quote: 0, table: 0, } @metadata={} if @md.flag_auto_biblio \ or @md.flag_biblio @data,bibliography=SiSU_AO_Appendices::Bibliography.new(@md,@data).biblio_extraction end if @md.flag_glossary @data,glossary=SiSU_AO_Appendices::Glossary.new(@md,@data).glossary_extraction end tuned_file=extract_structure_loop(@data,tuned_file) if @md.flag_endnotes tuned_file << @pb h={ ln: 1, lc: 1, obj: 'Endnotes', autonum_: false, } tuned_file << SiSU_AO_DocumentStructure::ObjectHeading.new.heading_insert(h) h={ ln: 4, lc: 2, obj: 'Endnotes', name: 'endnotes', autonum_: false, } tuned_file << SiSU_AO_DocumentStructure::ObjectHeading.new.heading_insert(h) h={ obj: 'Endnotes' } end if @md.flag_glossary tuned_file << @pb h={ ln: 1, lc: 1, obj: 'Glossary', autonum_: false, } tuned_file << SiSU_AO_DocumentStructure::ObjectHeading.new.heading_insert(h) h={ ln: 4, lc: 2, obj: 'Glossary', name: 'glossary', autonum_: false, } tuned_file << SiSU_AO_DocumentStructure::ObjectHeading.new.heading_insert(h) h={ obj: 'Glossary' } if glossary.length > 0 tuned_file=extract_structure_loop(glossary,tuned_file) end end if @md.flag_auto_biblio tuned_file << @pb h={ ln: 1, lc: 1, obj: 'References', autonum_: false, } tuned_file << SiSU_AO_DocumentStructure::ObjectHeading.new.heading_insert(h) h={ ln: 4, lc: 2, obj: 'Bibliography', name: 'biblio', autonum_: false, } tuned_file << SiSU_AO_DocumentStructure::ObjectHeading.new.heading_insert(h) h={ obj: 'Bibliography' } citenumber=0 bibliography.each do |cite| citenumber +=1 if cite.is_a?(Hash) h={ obj: cite[:obj], #obj: %{[#{citenumber}] } + cite[:obj], tags: [cite[:id]], hang: 0, indent: 2, ocn_: false, } tuned_file << SiSU_AO_DocumentStructure::ObjectPara.new.paragraph(h) end elsif @md.flag_biblio tuned_file << @pb h={ ln: 1, lc: 1, obj: 'References', autonum_: false, } tuned_file << SiSU_AO_DocumentStructure::ObjectHeading.new.heading_insert(h) h={ ln: 4, lc: 2, obj: 'Bibliography', name: 'biblio', autonum_: false, } tuned_file << SiSU_AO_DocumentStructure::ObjectHeading.new.heading_insert(h) h={ obj: 'Bibliography' } if not bibliography.nil? \ and bibliography.length > 0 tuned_file=extract_structure_loop(bibliography,tuned_file) else tuned_file, citations = SiSU_AO_Appendices::Citations.new(@md,tuned_file).songsheet # ao_appendices.rb citenumber=0 citations.compact.each do |c| citenumber +=1 if c.is_a?(Hash) if c[:is]==:book h={ obj: %{#{c[:author]}. /{#{c[:publication]}}/ (#{c[:year]})}, #obj: %{[#{citenumber}] *{#{c[:author]}}* /{#{c[:publication]}}/ (#{c[:year]})}, hang: 0, indent: 2, ocn_: false, } tuned_file << SiSU_AO_DocumentStructure::ObjectPara.new.paragraph(h) elsif c[:is]==:article h={ obj: %{#{c[:author]}. /{"#{c[:title]}"}/ #{c[:publication]} editor #{c[:editor]} (#{c[:year]})}, #obj: %{[#{citenumber}] *{#{c[:author]}}* /{"#{c[:title]}"}/ #{c[:publication]} editor #{c[:editor]} (#{c[:year]})}, hang: 0, indent: 2, ocn_: false, } tuned_file << SiSU_AO_DocumentStructure::ObjectPara.new.paragraph(h) end end end end if @md.book_idx tuned_file << @pb h={ ln: 1, lc: 1, obj: 'Index', autonum_: false, } tuned_file << SiSU_AO_DocumentStructure::ObjectHeading.new.heading_insert(h) h={ ln: 4, lc: 2, obj: 'Index', name: 'book_index', autonum_: false, } tuned_file << SiSU_AO_DocumentStructure::ObjectHeading.new.heading_insert(h) h={ obj: 'Index' } end tuned_file << @pb if @make.build.metadata? h={ ln: 1, lc: 1, obj: 'Metadata', autonum_: false, ocn_: false, } tuned_file << SiSU_AO_DocumentStructure::ObjectHeading.new.heading_insert(h) h={ ln: 4, lc: 2, obj: 'SiSU Metadata, document information', name: 'metadata', autonum_: false, ocn_: false, } tuned_file << SiSU_AO_DocumentStructure::ObjectHeading.new.heading_insert(h) end h={ obj: 'eof', } meta=SiSU_AO_DocumentStructure::ObjectMetadata.new.metadata(@metadata) [tuned_file,meta,bibliography,glossary] end def table_rows_and_columns_array(table_str) table=[] table_str.split(/#{Mx[:tc_c]}/).each do |table_row| table_row_with_columns=table_row.split(/#{Mx[:tc_p]}/) table << table_row_with_columns end table end def meta_heading(h) h={ lv: h[:lv], ln: h[:ln], name: h[:name], obj: h[:obj], ocn: '0', } SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h) end def meta_para(str) h={ obj: str, ocn_: false, } SiSU_AO_DocumentStructure::ObjectPara.new.paragraph(h) end def build_lines(type=:none) lines=@data lines.each.map do |line| line=if line =~/\S/ \ and line !~/^(?:code(?:\.[a-z][0-9a-z_]+)?\{|\}code)/ \ and line !~/^(?:```[ ]+code(?:\.[a-z][0-9a-z_]+)?|```(?:\s+[~-][#]|\s+\~\{.+?\}\~)?\s*$)/ \ and not line.is_a?(Hash) #watch @@counter+=1 if @per.code==:curls or @per.code==:tics line=line.gsub(/\s\s/,"#{Mx[:nbsp]*2}"). gsub(/#{Mx[:nbsp]}\s/,"#{Mx[:nbsp]*2}") line=line.gsub(/^/,"#{Mx[:gr_o]}codeline#{Mx[:gr_c]}") if type==:code # REMOVE try sort for texpdf special case line=if line =~/(?:https?|file|ftp):\/\/\S+$/ line.gsub(/\s*$/," #{Mx[:br_nl]}") else line.gsub(/\s*$/,"#{Mx[:br_nl]}") #unless type=='code' end elsif line =~/^\s*$/ line.gsub(/\s*$/,"#{Mx[:br_nl]}") else line end line end end end class Structure # this must happen early def initialize(md) @md=md end def structure(data) data.compact.each do |dob| structure_markup(dob) end end def structure_markup(dob) #build structure where structure provided only in meta header dob=if dob.is==:para \ && (((dob.hang !~/[1-9]/) && (dob.indent !~/[1-9]/)) \ || (dob.hang != dob.indent)) \ and not dob.bullet_ dob=case dob.obj when /^#{@md.lv0}/ h={ is: :heading, lv: 'A', ln: 0, } SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h,dob) when /^#{@md.lv1}/ h={ is: :heading, lv: 'B', ln: 1, } SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h,dob) when /^#{@md.lv2}/ h={ is: :heading, lv: 'C', ln: 2, } SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h,dob) when /^#{@md.lv3}/ h={ is: :heading, lv: 'D', ln: 3, } SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h,dob) when /^#{@md.lv4}/ h={ is: :heading, lv: '1', ln: 4, } SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h,dob) when /^#{@md.lv5}/ h={ is: :heading, lv: '2', ln: 5, } SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h,dob) when /^#{@md.lv6}/ h={ is: :heading, lv: '3', ln: 6, } SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h,dob) else dob end else dob end dob end end class OCN def initialize(md,data,fnx,process) @md,@data,@fnx,@process=md,data,fnx,process end def structure_info def lv %w[A~ B~ C~ D~ 1 2 3 4] end def possible_parents(child) case child when /A~/ then 'none' when /B~/ then 'A~' when /C~/ then 'B~' when /D~/ then 'C~' when /1/ then 'A~, B~, C~, D~' when /2/ then '1' when /3/ then '2' when /4/ then '3' end end def possible_children(parent) case parent when /A~/ then 'B~, 1' when /B~/ then 'C~, 1' when /C~/ then 'D~, 1' when /D~/ then '1' when /1/ then '2' when /2/ then '3' when /3/ then '4' when /4/ then 'none' end end self end def document_structure_check_info(node,node_parent,status=:ok) node_ln=/^([0-7])/.match(node)[1].to_i node_parent_ln=/^([0-7])/.match(node_parent)[1].to_i if status==:error \ or @md.opt.act[:maintenance][:set]==:on puts %{node: #{node}, parent node: #{node_parent} #{status.upcase}} if status==:error node_ln=/^([0-7])/.match(node)[1].to_i node_parent_ln=/^([0-7])/.match(node_parent)[1].to_i STDERR.puts %{current level: #{structure_info.lv[node_ln]} (possible parent levels: #{structure_info.possible_parents(structure_info.lv[node_ln])}) parent level: #{structure_info.lv[node_parent_ln]} (possible child levels: #{structure_info.possible_children(structure_info.lv[node_parent_ln])}) SKIPPED processing file: [#{@md.opt.lng}] "#{@md.fns}"} if @md.opt.act[:no_stop][:set]==:on $process_document = :skip else exit end end end end def warning_incorrect_parent_level_or_level(txt) puts %{ERROR. There is an error in markup of heading levels either here or in the parent heading. The current header reads: "#{txt}" has incorrect level and/or parent level --} end def required_headers_present? if @process == :complete unless (defined? @md.title \ and @md.title.full) STDERR.puts %{required header missing: @title: SKIPPED processing file: [#{@md.opt.lng}] "#{@md.fns}" } if @md.opt.act[:no_stop][:set]==:on $process_document = :skip else exit end end unless (defined? @md.creator.author \ and @md.creator.author) STDERR.puts %{required header missing: @creator: :author: anonymous? SKIPPED processing file: [#{@md.opt.lng}] "#{@md.fns}" } if @md.opt.act[:no_stop][:set]==:on $process_document = :skip else exit end end end end def ocn #and auto segment numbering increment required_headers_present? data=@data @o_array=[] node=ocn=ocn_dv=ocn_sp=ocnh=ocnh0=ocnh1=ocnh2=ocnh3=ocnh4=ocnh5=ocnh6=ocnh7=ocno=ocnp=ocnt=ocnc=ocng=ocni=ocnu=0 # h heading, o other, t table, g group, i image regex_exclude_ocn_and_node = /#{Rx[:meta]}|^@\S+?:\s|^4~endnotes|^#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}|^\^~ |<:e[:_]\d+?>|^<:\#|<:- |<[:!]!4|