aboutsummaryrefslogtreecommitdiffhomepage
path: root/lib/sisu/v5/ao_doc_str.rb
diff options
context:
space:
mode:
authorRalph Amissah <ralph@amissah.com>2014-05-25 10:01:43 -0400
committerRalph Amissah <ralph@amissah.com>2014-05-25 10:01:43 -0400
commit16c30474f06ed3774ad524a38b55b7840de057d0 (patch)
tree03f20a30c1853b3d5cb893556aa633be2407a4d8 /lib/sisu/v5/ao_doc_str.rb
parentv5 v6: version & changelog (& rakefile) (diff)
v5: merge v6 to v5, version bump to 5.4.*, reason adds structure checksisu_5.4.0
* ao, document structure check, stop processing on major error (with error message & text at location of failure) [reason for version bump the addition of structure check] * utils, add ok code marker * ao, heading with no ocn, distinguish ~# from -# * ~# is general & means no ocn (for any object to which it is applied) * -# is relevant only for 1~ dummy headings & instructs that they should be removed from output where possible * applied so far to pdf, odt & plaintext * ao_images, reduce warnings when ruby RMagic absent as program used directly * reduced dependency on ruby RMagic library (as some time way back had issues) * ao, document markup structure check, skip processing file on major error * with error message & text at location of failure * texpdf, urls in creator cause breakage * texpdf, mailto markup links set for normal text objects * texpdf, '&' in heading breaks toc (now removed from toc (not heading)), bug * revisit, bug * texpdf, for urls switch to sans serif (small fontsize) * instead of typewriter, latex default * texpdf, pdf colored hyperlinks configurable * --pdf-hyperlinks-color --pdf-hyperlinks-no-color or --pdf-hyperlinks-monochrome * ['default']['pdf_hyperlinks']='color' (other options switch hyperlink color off 'no-color' 'color-off' 'monochrome') * texpdf, pdf default font size configurable (cli & sisurc.yml) (no fractions) * --pdf-fontsize-12 --pdf-fontsize-8 * default: texpdf_fontsize: 12 * texpdf, headings and table of contents representation * fixes 1~ and 2~ result in the same formatting 1~ 2~ & 3~ now differentiated, see discussion in sisu.org under #744383 * fixes :A smaller formatted than :B A~ B~ C~ now the same size, see discussion in sisu.org under #744383 * :B and :C result in the same formatting, issue explained see sisu.org * texpdf, (internal coding) fontface, rename texpdf_font texpdf_fontface * texpdf, (internal coding) use symbols to identify page orientation * param, metadata rights, line-breaks instead of semicolons separating rights * fixes remove trailing semicolon after :copyright: * digests sha512 option implemented * options sha512 sha256 md5 * command line --sha512 * rcconf.yml ['defsault']['digest'] = sha512 * xml object citation numbering (docbook fictionbook)
Diffstat (limited to 'lib/sisu/v5/ao_doc_str.rb')
-rw-r--r--lib/sisu/v5/ao_doc_str.rb168
1 files changed, 119 insertions, 49 deletions
diff --git a/lib/sisu/v5/ao_doc_str.rb b/lib/sisu/v5/ao_doc_str.rb
index dd7f32f3..b66f01d1 100644
--- a/lib/sisu/v5/ao_doc_str.rb
+++ b/lib/sisu/v5/ao_doc_str.rb
@@ -234,10 +234,10 @@ module SiSU_AO_DocumentStructureExtract
@@flag[:ocn]=:on
{flag: :ocn_on}
when /[~]/
- @@flag[:ocn]=:off_headings_substantive
- {flag: :ocn_off, mod: :headings_substantive}
- when /[-]/
- @@flag[:ocn]=:off_headings_exclude
+ @@flag[:ocn]=:ocn_off_headings_keep
+ {flag: :ocn_off, mod: :headings_keep}
+ when /[-]/ #of particular relevance with level 1~ which is required to precede substantive text & used e.g. in html segmented text
+ @@flag[:ocn]=:ocn_off_headings_dummy_lev1
{flag: :ocn_off, mod: :headings_exclude}
else
@@flag[:ocn]=:on
@@ -286,12 +286,14 @@ module SiSU_AO_DocumentStructureExtract
obj=$1
note=endnote_test?(obj)
obj,tags=extract_tags(obj)
- if @@flag[:ocn]==:off_headings_exclude \
- or @@flag[:ocn]==:off_headings_substantive
+ if @@flag[:ocn]==:ocn_off_headings_dummy_lev1 \
+ or @@flag[:ocn]==:ocn_off_headings_keep
unless obj =~ /[~-][#]\s*$/
- if @@flag[:ocn]==:off_headings_exclude
+ if @@flag[:ocn]==:ocn_off_headings_dummy_lev1 \
+ and t_o =~/^1\~\S*\s+/m
obj << ' -#'
- elsif @@flag[:ocn]==:off_headings_substantive
+ elsif @@flag[:ocn]==:ocn_off_headings_dummy_lev1 \
+ or @@flag[:ocn]==:ocn_off_headings_keep
obj << ' ~#'
end
end
@@ -302,12 +304,14 @@ module SiSU_AO_DocumentStructureExtract
name,obj=$1,$2
note=endnote_test?(obj)
obj,tags=extract_tags(obj)
- if @@flag[:ocn]==:off_headings_exclude \
- or @@flag[:ocn]==:off_headings_substantive
+ if @@flag[:ocn]==:ocn_off_headings_dummy_lev1 \
+ or @@flag[:ocn]==:ocn_off_headings_keep
unless obj =~ /[~-][#]\s*$/
- if @@flag[:ocn]==:off_headings_exclude
+ if @@flag[:ocn]==:ocn_off_headings_dummy_lev1 \
+ and t_o =~/^1\~\S*\s+/m
obj << ' -#'
- elsif @@flag[:ocn]==:off_headings_substantive
+ elsif @@flag[:ocn]==:ocn_off_headings_dummy_lev1 \
+ or @@flag[:ocn]==:ocn_off_headings_keep
obj << ' ~#'
end
end
@@ -318,12 +322,14 @@ module SiSU_AO_DocumentStructureExtract
name,obj=$1,$2
note=endnote_test?(obj)
obj,tags=extract_tags(obj,name)
- if @@flag[:ocn]==:off_headings_exclude \
- or @@flag[:ocn]==:off_headings_substantive
+ if @@flag[:ocn]==:ocn_off_headings_dummy_lev1 \
+ or @@flag[:ocn]==:ocn_off_headings_keep
unless obj =~ /[~-][#]\s*$/
- if @@flag[:ocn]==:off_headings_exclude
+ if @@flag[:ocn]==:ocn_off_headings_dummy_lev1 \
+ and t_o =~/^1\~\S*\s+/m
obj << ' -#'
- elsif @@flag[:ocn]==:off_headings_substantive
+ elsif @@flag[:ocn]==:ocn_off_headings_dummy_lev1 \
+ or @@flag[:ocn]==:ocn_off_headings_keep
obj << ' ~#'
end
end
@@ -345,8 +351,8 @@ module SiSU_AO_DocumentStructureExtract
note=endnote_test?(obj)
obj,tags=extract_tags(obj)
unless obj=~/\A\s*\Z/m
- if @@flag[:ocn]==:off_headings_exclude \
- or @@flag[:ocn]==:off_headings_substantive
+ if @@flag[:ocn]==:ocn_off_headings_dummy_lev1 \
+ or @@flag[:ocn]==:ocn_off_headings_keep
unless obj =~ /[~-][#]\s*$/
obj << ' ~#'
end
@@ -368,8 +374,8 @@ module SiSU_AO_DocumentStructureExtract
note=endnote_test?(obj)
obj,tags=extract_tags(obj)
unless obj=~/\A\s*\Z/m
- if @@flag[:ocn]==:off_headings_exclude \
- or @@flag[:ocn]==:off_headings_substantive
+ if @@flag[:ocn]==:ocn_off_headings_dummy_lev1 \
+ or @@flag[:ocn]==:ocn_off_headings_keep
unless obj =~ /[~-][#]\s*$/
obj << ' ~#'
end
@@ -391,8 +397,8 @@ module SiSU_AO_DocumentStructureExtract
image=image_test(t_o)
note=endnote_test?(t_o)
obj,tags=extract_tags(t_o)
- if @@flag[:ocn]==:off_headings_exclude \
- or @@flag[:ocn]==:off_headings_substantive
+ if @@flag[:ocn]==:ocn_off_headings_dummy_lev1 \
+ or @@flag[:ocn]==:ocn_off_headings_keep
unless obj =~ /[~-][#]\s*$/
obj << ' ~#'
end
@@ -847,18 +853,43 @@ module SiSU_AO_DocumentStructureExtract
if status==:error
node_ln=/^([0-6])/.match(node)[1].to_i
node_parent_ln=/^([0-6])/.match(node_parent)[1].to_i
- puts %{current level: #{structure_info.lv[node_ln]} (possible parent levels: #{structure_info.possible_parents(structure_info.lv[node_ln])})
+ STDERR.puts %{current level: #{structure_info.lv[node_ln]} (possible parent levels: #{structure_info.possible_parents(structure_info.lv[node_ln])})
parent level: #{structure_info.lv[node_parent_ln]} (possible child levels: #{structure_info.possible_children(structure_info.lv[node_parent_ln])})
--- }
+SKIPPED processing file: [#{@md.opt.lng}] "#{@md.fns}"}
+ $process_document = :skip
end
end
end
def warning_incorrect_parent_level_or_level(txt)
- puts %{warning,
-#{txt}
-has incorrect level and/or parent level}
+ puts %{ERROR. There is an error in markup of heading levels either here or in the parent heading.
+The current header reads:
+"#{txt}"
+has incorrect level and/or parent level
+--}
+ end
+ def required_headers_present?
+ unless (defined? @md.title \
+ and @md.title.full)
+ STDERR.puts %{required header missing:
+
+@title:
+SKIPPED processing file: [#{@md.opt.lng}] "#{@md.fns}"
+}
+ $process_document = :skip
+ end
+ unless (defined? @md.creator.author \
+ and @md.creator.author)
+ STDERR.puts %{required header missing:
+
+@creator:
+ :author: anonymous?
+SKIPPED processing file: [#{@md.opt.lng}] "#{@md.fns}"
+}
+ $process_document = :skip
+ end
end
def ocn #and auto segment numbering increment
+ required_headers_present?
data=@data
@o_array=[]
node=ocn=ocn_dv=ocn_sp=ocnh=ocnh0=ocnh1=ocnh2=ocnh3=ocnh4=ocnh5=ocnh6=ocno=ocnp=ocnt=ocnc=ocng=ocni=ocnu=0 # h heading, o other, t table, g group, i image
@@ -866,6 +897,7 @@ has incorrect level and/or parent level}
parent=node1=node2=node3=node4=node5=node6=nil
node0='0:0;0'
@collapsed_lv0=0
+ @lev_occurences={ a: 0, b: 0, c: 0, d: 0, l1: 0, l2: 0, l3: 0 }
data.each do |dob|
h={}
if (dob.obj !~ regex_exclude_ocn_and_node || dob.is==:code) \
@@ -875,7 +907,7 @@ has incorrect level and/or parent level}
&& dob.ocn_
#dob.ln now is determined, and set earlier, check how best to remove this -->
if dob.is==:heading
- ln=case dob.lv
+ @ln=ln=case dob.lv
when 'A' then 0
when 'B' then 1
when 'C' then 2
@@ -905,11 +937,13 @@ has incorrect level and/or parent level}
end
if ln==0 \
or ln=~@md.lv0
+ @lev_occurences[:a] += 1
if not dob.obj =~/~#|-#/
ocn_flag=true
ocnh0+=1 #heading
node0="0:#{ocnh0};#{ocn}"
else
+ #document_structure_check_info(node0,node0,:error) #fix
ocn_flag=false
node0="0:0;0"
end
@@ -919,11 +953,13 @@ has incorrect level and/or parent level}
node,ocn_sp,parent=node0,"h#{ocnh}",'ROOT'
elsif ln==1 \
or ln=~@md.lv1
+ @lev_occurences[:b] += 1
if not dob.obj =~/~#|-#/
ocn_flag=true
ocnh1+=1 #heading
node1="1:#{ocnh1};#{ocn}"
else
+ #document_structure_check_info(node0,node0,:error) #fix
ocn_flag=false
node1="1:0;0"
end
@@ -932,6 +968,7 @@ has incorrect level and/or parent level}
@collapsed_lv1=@collapsed_lv0+1
node0
else
+ warning_incorrect_parent_level_or_level(dob.obj)
document_structure_check_info(node0,node0,:error)
node0
end
@@ -939,11 +976,13 @@ has incorrect level and/or parent level}
node,ocn_sp,parent=node1,"h#{ocnh}",node0 #FIX
elsif ln==2 \
or ln=~@md.lv2
+ @lev_occurences[:c] += 1
if not dob.obj =~/~#|-#/
ocn_flag=true
ocnh2+=1
node2="2:#{ocnh2};#{ocn}"
else
+ #document_structure_check_info(node0,node0,:error) #fix
ocn_flag=false
node2="2:0;0"
end
@@ -952,6 +991,7 @@ has incorrect level and/or parent level}
@collapsed_lv2=@collapsed_lv1+1
node1
else
+ warning_incorrect_parent_level_or_level(dob.obj)
document_structure_check_info(node2,node0,:error)
node0
end
@@ -959,11 +999,13 @@ has incorrect level and/or parent level}
node,ocn_sp=node2,"h#{ocnh}"
elsif ln==3 \
or ln=~@md.lv3
+ @lev_occurences[:d] += 1
if not dob.obj =~/~#|-#/
ocn_flag=true
ocnh3+=1
node3="3:#{ocnh3};#{ocn}"
else
+ #document_structure_check_info(node0,node0,:error) #fix
ocn_flag=false
node3="3:0;0"
end
@@ -988,6 +1030,7 @@ or this level should be level :B~ rather than #{dob.lv}}
node,ocn_sp=node3,"h#{ocnh}"
elsif ln==4 \
or ln=~@md.lv4
+ @lev_occurences[:l1] += 1
if not dob.obj =~/~#|-#/
ocn_flag=true
ocnh4+=1
@@ -1021,6 +1064,7 @@ or this level should be level :B~ rather than #{dob.lv}}
node,ocn_sp=node4,"h#{ocnh}"
elsif ln==5 \
or ln=~@md.lv5
+ @lev_occurences[:l2] += 1
if not dob.obj =~/~#|-#/
ocn_flag=true
ocnh5+=1
@@ -1049,6 +1093,7 @@ or this level should be level :B~ rather than #{dob.lv}}
@collapsed_lv5=@collapsed_lv1+1
node1
else
+ warning_incorrect_parent_level_or_level(dob.obj)
document_structure_check_info(node5,node0,:error)
node0
end
@@ -1056,6 +1101,7 @@ or this level should be level :B~ rather than #{dob.lv}}
node,ocn_sp=node5,"h#{ocnh}"
elsif ln==6 \
or ln=~@md.lv6
+ @lev_occurences[:l3] += 1
if not dob.obj =~/~#|-#/
ocn_flag=true
ocnh6+=1
@@ -1092,6 +1138,7 @@ or this level should be 5~ rather #{dob.lv}" #level 6
@collapsed_lv6=@collapsed_lv1+1
node1
else
+ warning_incorrect_parent_level_or_level(dob.obj)
document_structure_check_info(node6,node0,:error)
node0
end
@@ -1099,6 +1146,29 @@ or this level should be 5~ rather #{dob.lv}" #level 6
node,ocn_sp=node6,"h#{ocnh}"
end
else
+ unless @lev_occurences[:l1] > 0
+ STDERR.puts %{Substantive text objects must follow a level 1~ heading and there are none at this point in processing: #{@lev_occurences[:l1]}
+}
+ end
+ unless @ln >= 4
+ lev=case @ln
+ when 0 then 'A'
+ when 1 then 'B'
+ when 2 then 'C'
+ when 3 then 'D'
+ when 4 then '1'
+ when 5 then '2'
+ when 6 then '3'
+ when 7 then '4'
+ when 8 then '5'
+ when 9 then '6'
+ end
+ STDERR.puts %{Substantive text objects must follow a level 1~ 2~ or 3~ heading: #{lev}~
+SKIPPED processing file: [#{@md.opt.lng}] "#{@md.fns}"}
+ puts dob.obj.gsub(/^(.{1,80})/,'"\1"')
+ $process_document = :skip
+ break
+ end
if not dob.obj =~/~#|-#/
ocn_flag=true
else
@@ -1129,9 +1199,15 @@ or this level should be 5~ rather #{dob.lv}" #level 6
dob.ln,dob.node,dob.ocn,dob.ocn_,dob.odv,dob.osp,dob.parent,dob.lc=ln,node,ocn,ocn_flag,ocn_dv,ocn_sp,parent,collapsed_level
else
ocnu+=1
- dob.obj=dob.obj.gsub(/#{Mx[:fa_o]}[~-]##{Mx[:fa_c]}/,'') if dob.obj
- ocn_dv,ocn_sp="u#{ocnu}","u#{ocnu}"
- dob.ln,dob.node,dob.ocn,dob.ocn_,dob.odv,dob.osp,dob.parent,dob.lc=ln,node,nil,ocn_flag,ocn_dv,ocn_sp,parent,collapsed_level
+ heading_use=:ok
+ if dob.obj=~/#{Mx[:pa_non_object_no_heading]}/
+ dob.obj=dob.obj.gsub(/#{Mx[:pa_non_object_no_heading]}/,'')
+ heading_use=:ok
+ elsif dob.obj=~/#{Mx[:pa_non_object_dummy_heading]}/
+ dob.obj=dob.obj.gsub(/#{Mx[:pa_non_object_dummy_heading]}/,'')
+ heading_use=:dummy
+ end
+ dob.ln,dob.node,dob.ocn,dob.ocn_,dob.use_,dob.odv,dob.osp,dob.parent,dob.lc=ln,node,nil,ocn_flag,heading_use,ocn_dv,ocn_sp,parent,collapsed_level
end
else
if dob.of !=:meta \
@@ -1148,24 +1224,6 @@ or this level should be 5~ rather #{dob.lv}" #level 6
end
end
h
- elsif dob.obj=~/#{Mx[:pa_non_object_no_heading]}/
- dob.obj=dob.obj.gsub(/#{Mx[:pa_non_object_no_heading]}/,'')
- if dob.is==:para
- h={ obj: dob.obj, ocn_: false, ocn: nil, hang: dob.hang, indent: dob.indent, bullet_: dob.bullet_, tags: dob.tags, parent: dob.parent }
- dob=SiSU_AO_DocumentStructure::ObjectPara.new.paragraph(h,dob)
- elsif dob.is==:heading
- h={ obj: dob.obj, ocn_: false, ocn: nil, toc_: true, parent: dob.parent }
- dob=SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h,dob)
- end
- elsif dob.obj=~/#{Mx[:pa_non_object_dummy_heading]}/
- dob.obj=dob.obj.gsub(/#{Mx[:pa_non_object_dummy_heading]}/,'')
- if dob.is==:para
- h={ obj: dob.obj, ocn_: false, ocn: nil, hang: dob.hang, indent: dob.indent, bullet_: dob.bullet_, tags: dob.tags, parent: dob.parent }
- dob=SiSU_AO_DocumentStructure::ObjectPara.new.paragraph(h,dob)
- elsif dob.is==:heading
- h={ obj: dob.obj, ocn_: false, ocn: nil, toc_: false, parent: dob.parent }
- dob=SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h,dob)
- end
else dob
end
if dob.is==:code \
@@ -1177,6 +1235,18 @@ or this level should be 5~ rather #{dob.lv}" #level 6
end
@o_array << dob
end
+ unless @lev_occurences[:a] == 1
+ STDERR.puts %{The number of level A~ in this document: #{@lev_occurences[:a]}
+There must be one level A~ (no more and no less)
+SKIPPED processing file: [#{@md.opt.lng}] "#{@md.fns}"}
+ $process_document = :skip
+ end
+ unless @lev_occurences[:l1] > 0
+ STDERR.puts %{The number of level 1~ in this document: #{@lev_occurences[:l1]}
+There must be at least one level 1~ (and as many as required)
+SKIPPED processing file: [#{@md.opt.lng}] "#{@md.fns}"}
+ $process_document = :skip
+ end
@o_array
end
end