# coding: utf-8
=begin
* Name: SiSU
* Description: a framework for document structuring, publishing and search
* Author: Ralph Amissah
* Copyright: (C) 1997 - 2009 Ralph Amissah All Rights Reserved.
* License: GPL 3 or later:
SiSU, a framework for document structuring, publishing and search
Copyright (C) Ralph Amissah
This program is free software: you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation, either version 3 of the License, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
You should have received a copy of the GNU General Public License along with
this program. If not, see .
If you have Internet connection, the latest version of the GPL should be
available at these locations:
* SiSU uses:
* Standard SiSU markup syntax,
* Standard SiSU meta-markup syntax, and the
* Standard SiSU object citation numbering and system
* Hompages:
* Download:
* Ralph Amissah
** Description: xml (dom style) output processing
** Notes: tidy -xml dom.xml >> index.tidy
=end
module SiSU_XML_DOM
require "#{SiSU_lib}/defaults"
include SiSU_Viz
require "#{SiSU_lib}/particulars"
include SiSU_Particulars
require "#{SiSU_lib}/sysenv"
include SiSU_Env
require "#{SiSU_lib}/dal"
require "#{SiSU_lib}/shared_xml"
require "#{SiSU_lib}/xml_format"
include SiSU_XML_format
include SiSU_XML_munge
require "#{SiSU_lib}/rexml"
include SiSU_Rexml
@@alt_id_count,@@tablehead,@@number_of_cols=0,0,0
@@tablefoot=''
class Source
def initialize(opt)
@opt=opt
@particulars=SiSU_Particulars::Combined_singleton.instance.get_all(opt)
end
def read
begin
@env,@md,@dal_array=@particulars.env,@particulars.md,@particulars.dal_array
path=@env.path.output_tell
loc=@env.url.output_tell
tool=if @opt.cmd =~/[MV]/; "#{@env.program.web_browser} #{loc}/#{@md.fnb}/#{@md.fn[:dom]}\n\t#{@env.program.xml_viewer} #{path}/#{@md.fnb}/#{@md.fn[:dom]}"
elsif @opt.cmd =~/v/; "#{@env.program.web_browser} #{loc}/#{@md.fnb}/#{@md.fn[:dom]}"
else ''
end
SiSU_Screen::Ansi.new(@opt.cmd,'invert','XML DOM',tool).colorize unless @opt.cmd =~/q/
SiSU_Screen::Ansi.new(@opt.cmd,@opt.fns,"#{@env.path.output_tell}/#{@md.fnb}/#{@md.fn[:dom]}").flow if @opt.cmd =~/[MV]/
SiSU_XML_DOM::Source::Songsheet.new(@particulars).songsheet
rescue; SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error
ensure
#file closed in songsheet
end
end
private
class Songsheet
def initialize(particulars)
@env,@md,@dal_array,@particulars=particulars.env,particulars.md,particulars.dal_array,particulars
end
def songsheet
begin
SiSU_XML_DOM::Source::Scroll.new(@particulars).songsheet
SiSU_XML_DOM::Source::Tidy.new(@md,@env).xml if @md.cmd =~/[vVM]/ # test wellformedness, comment out when not in use
SiSU_Rexml::Rexml.new(@md,@md.fn[:dom]).xml if @md.cmd =~/M/ # test rexml parsing, comment out when not in use #debug
rescue; SiSU_Errors::Info_error.new($!,$@,@md.cmd,@md.fns).error
ensure
end
end
end
class Scroll
require "#{SiSU_lib}/shared_txt"
include SiSU_text_utils
@@dp=nil
@@xml={ :body=>[],:open=>[],:close=>[],:head=>[],:sc=>[] }
def initialize(particulars)
@env,@md,@dal_array=particulars.env,particulars.md,particulars.dal_array
@vz=SiSU_Env::Get_init.instance.skin
@dp=@@dp ||=SiSU_Env::Info_env.new.digest.pattern
@regx=/^(?:(?:#{Mx[:br_page]}\s*|#{Mx[:br_page_new]}\s*)?#{Mx[:lv_o]}\d:(\S*)#{Mx[:lv_c]}\s*)?(.+?)\s*#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/
@tab="\t"
@trans=SiSU_XML_munge::Trans.new(@md)
@sys=SiSU_Env::System_call.new
end
def songsheet
pre
@data=markup(@dal_array)
post
publish
end
protected
def xml_markup(para='')
para.gsub!(/#{Mx[:en_a_o]}(\d+)\s+(.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}#{Mx[:en_a_c]}/,
'\1\2 ')
para.gsub!(/#{Mx[:en_b_o]}([*+]\d+)\s+(.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}#{Mx[:en_b_c]}/,
'\1\2 ')
para.gsub!(/#{Mx[:en_a_o]}([*+]+)\s+(.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}#{Mx[:en_a_c]}/,
'\1\2 ')
end
def xml_head(meta)
txt=meta.text
txt.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,' ')
txt.gsub!(/ & /,' and ')
el=meta.el.gsub(/\./,'_')
el_txt=meta.el.gsub(/\./,' ')
@@xml[:head] <<= if meta.type == 'meta'
<
#{@tab*2}#{el_txt.capitalize}:
#{@tab*2}<#{el}>
#{@tab*3}#{txt}
#{@tab*2}#{el}>
#{@tab}
WOK
else ''
end
end
def xml_sc(md='')
sc=if @md.sc_info
<
filename:
#{@md.sc_filename}
version number:
#{@md.sc_number}
version date:
#{@md.sc_date}
WOK
else ''
end
@@xml[:sc]=sc #<<<
#{@tab*n2}
#{@tab*n1}#{xml_content}
WOK
if lv == 4
@copen[1]=true
@copen[2]=@copen[3]=false
elsif lv == 5
@copen[2]=true
@copen[3]=false
elsif lv == 6
@copen[3]=true
end
end
def xml_structure(lv='',ocn='',para='',hname='' )
lv=lv.to_i
n=lv - 1
n1=lv
n2=lv + 1
n3=lv + 2
v=lv - 3
tag=''
tag="\n#{@tab*n3}#{hname}\n" if hname !=nil
#if para[@regx]
# paragraph="#{para[@regx,2]}"
# util=SiSU_text_utils::Paragraph.new(paragraph,70)
# wrapped=util.line_wrap
#end
case lv
when 1..3
xml_element=""
3.downto(lv) do |x|
y=x - 1
if @cont[1] \
or @cont[2] \
or @cont[3]
@@xml[:body] << "#{@tab*5}\n"
end
#@@xml[:body] << "#{@tab*5}\n" if @cont[1] == true or @cont[2] == true or @cont[3] == true
@cont[1]=false if @cont[1]
@cont[2]=false if @cont[2]
@cont[3]=false if @cont[3]
####### attempt to close contents
if @copen[3] # 6~
[3,2,1].each { |v| @@xml[:body] << "#{@tab*n}\n" }
@copen[1]=@copen[2]=@copen[3]=false
elsif @copen[2] # 5~
[2,1].each { |v| @@xml[:body] << "#{@tab*n}\n" }
@copen[1]=@copen[2]=@copen[3]=false
elsif @copen[1] # 4~
[1].each { |v| @@xml[:body] << "#{@tab*n}\n" }
@copen[1]=@copen[2]=@copen[3]=false
end
@@xml[:body] << "#{@tab*y}\n" if @level[x]
@level[x]=false
end
when 4..6
6.downto(lv) do |x|
y=x - 1
if @level[x] == true
u=x - 3;
@xml_contents_close[x]=''
end
end
cv=lv - 3
xml_element=""
xml_content="\n#{@tab*5}"
case lv
when 4
@@xml[:body] << "#{@tab*5}\n" if @cont[1]
if @copen[3] == true # 6~
[3,2,1].each { |v| @@xml[:body] << "#{@tab*n}\n" }
elsif @copen[2] == true # 5~
[2,1].each { |v| @@xml[:body] << "#{@tab*n}\n" }
elsif @copen[1] == true # 4~
[1].each { |v| @@xml[:body] << "#{@tab*n}\n" }
end
@cont[1]=true
when 5
if @cont[2] \
or @cont[1]
@@xml[:body] << "#{@tab*5}\n"
end
if @copen[3] == true #6~
[3,2].each { |v| @@xml[:body] << "#{@tab*n}\n" }
elsif @copen[2] == true #5~
[2].each { |v| @@xml[:body] << "#{@tab*n}\n" }
end
@cont[2]=true
when 6
if @cont[3] \
or @cont[2] \
or @cont[1]
@@xml[:body] << "#{@tab*5}\n"
end
if @copen[3] #6{
[3].each { |v| @@xml[:body] << "#{@tab*n}\n" }
end
@cont[3]=true
end
end
xml_element(lv,ocn,para,hname,tag,xml_element,xml_content)
@level[lv]=true
((lv+1)..6).each { |x| @level[x]=false }
end
def group_structure(para='',ocn='')
para.gsub!(/#{Mx[:gr_o]}group(?:-end)?#{Mx[:gr_c]}/,'')
para=@trans.markup_group(para)
para.gsub!(/#{Mx[:en_a_o]}(\d+)\s+(.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}#{Mx[:en_a_c]}/m,
'\1\2 ')
para.strip!
@@xml[:body] << %{#{@tab*6}" << "\n"
end
def poem_structure(para='',ocn='')
para.gsub!(/#{Mx[:gr_o]}verse(?:-end)?#{Mx[:gr_c]}/,'')
para=@trans.markup_group(para)
para.strip!
@@xml[:body] << %{#{@tab*6}" << "\n"
end
def code_structure(para='',ocn='')
para.gsub!(/#{Mx[:gr_o]}code(?:-end)?#{Mx[:gr_c]}/,'')
para=@trans.markup_group(para)
para.gsub!(/\s\s/,' ')
para.strip!
@@xml[:body] << %{#{@tab*6}" << "\n"
end
def table_structure(table='',ocn='') #tables
@@xml[:body] << %{#{@tab*0}" << "\n" #if para[@regx]
@endnotes=[]
end
def markup(data)
xml_sc(@md)
@level,@cont,@copen,@xml_contents_close=[],[],[],[]
@rcdc=false
(0..6).each { |x| @cont[x]=@level[x]=false }
(4..6).each { |x| @xml_contents_close[x]='' }
data.each do |para|
@trans.char_enc.utf8(para) if @sys.locale =~/utf-?8/i #% utf8
para=@trans.markup(para)
if para =~/^#{Rx[:meta]}\s*(.+?)$/ # for headers
d_meta=SiSU_text_utils::Header_scan.new(@md,para).meta
if d_meta; xml_head(d_meta)
end
end
if @rcdc==false \
and (para =~/~metadata/ \
or para =~/#{Mx[:lv_o]}1:meta#{Mx[:lv_c]}\s*Document Information/)
@rcdc=true
end
if para !~/(^#{Rx[:meta]}|#{Mx[:br_eof]}|#{Mx[:br_endnotes]})/
if para =~/.+?#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/
paranum=para[@regx,3]
@p_num=SiSU_XML_format::Paragraph_number.new(@md,paranum)
end
@sto=SiSU_text_parts::Split_text_object.new(@md,para).xml
### problem in scroll, it appears tables are getting paragraph numbers
unless @rcdc
m=/#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/
if para =~m
txt_obj={:txt =>@sto.text}
format_scroll=SiSU_XML_format::Format_scroll.new(@md,txt_obj) if @sto.format =~/i[1-9]|ordinary/
case @sto.format
when /^(1):(\S*)/
xml_markup(para)
xml_structure($1,@sto.ocn,para,$2)
para=@sto.lev_para_ocn.heading_body1 #if para =~m
when /^(2):(\S*)/
xml_markup(para)
xml_structure($1,@sto.ocn,para,$2)
para=@sto.lev_para_ocn.heading_body2 #if para =~m
when /^(3):(\S*)/
xml_markup(para)
xml_structure($1,@sto.ocn,para,$2)
para=@sto.lev_para_ocn.heading_body3 #if para =~m
when /^(4):(\S+)/ # work on see SiSU_text_parts::Split_text_object
xml_markup(para)
xml_structure($1,@sto.ocn,para,$2)
para=@sto.lev_para_ocn.heading_body4 #if para =~m
when /^(5):(\S*)/
xml_markup(para)
xml_structure($1,@sto.ocn,para,$2)
para=@sto.lev_para_ocn.heading_body5 #if para =~m
when /^(6):(\S*)/
xml_markup(para)
xml_structure($1,@sto.ocn,para,$2)
para=@sto.lev_para_ocn.heading_body6 #if para =~m
else
matched=/#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/mi.match(para)
stamp,ocn=matched[0],matched[1]
if para =~ /#{Mx[:gr_o]}verse#{Mx[:gr_c]}/
para.gsub!(/#{stamp}/,'')
poem_structure(para,ocn)
elsif para =~ /#{Mx[:gr_o]}group#{Mx[:gr_c]}/
para.gsub!(/#{stamp}/,'')
group_structure(para,ocn)
elsif para =~ /#{Mx[:gr_o]}code#{Mx[:gr_c]}/
para.gsub!(/#{stamp}/,'')
code_structure(para,ocn)
elsif para =~/#{Mx[:gr_o]}Th?.+/ # tables come as single block #work area 2005w13
table=SiSU_Tables::Table_xml.new(para,ocn)
para=table.table_split
@@xml[:body] << table_structure(para,ocn)
else #xml_structure(para, nil, nil, nil)
type=case para
when /^\s*(?:#{Mx[:pa_o]}:i([1-9])#{Mx[:pa_c]}\s*)?#{Mx[:gl_bullet]}/
m=$1
para.gsub!(/^(\s*(?:#{Mx[:pa_o]}:i([1-9])#{Mx[:pa_c]}\s*)?)#{Mx[:gl_bullet]}/,'\1')
"indent_bullet#{m}"
when /^\s*#{Mx[:pa_o]}:i([1-9])#{Mx[:pa_c]}/; "indent#{$1}"
else 'norm'
end
xml_markup(para)
if para[@regx] \
and para[@regx,3]
@@xml[:body] << %{#{@tab*6}" << "\n" if para[@regx]
end
end
elsif para =~/(#{Mx[:br_eof]}|#{Mx[:br_endnotes]})/ \
and para !~/#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/
#format_scroll=MonoSiSU.new(' Note')
#para=format_scroll.boldPara
elsif para =~/(MetaData)/ \
and para =~/#{Mx[:id_o]}~\d+;[m]\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ #debug 2003w46 add rc info
txt_obj={:txt =>' MetaData'}
format_scroll=Format_scroll.new(@md,txt_obj)
para=format_scroll.bold_para
elsif para =~/(Owner Details)/ \
and para !~/#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/
txt_obj={:txt =>' Owner Details'}
format_scroll=Format_scroll.new(@md,txt_obj)
@@xml[:owner_details]=format_scroll.bold_para
para=''
#elsif para =~/(.*)<:#>(.*)/
# one,two=$1,$2
# format_text=Format_text_object.new(one,two)
# para=format_text.seg_no_paranum
end
if para =~// \
and para =~/^(-\{{2}~\d+|)/ # -endnote
para=''
end
if para =~/.*<:#>.*$/
para=if para =~ /#{Mx[:pa_o]}:i[1-9]#{Mx[:pa_c]}/
txt_obj={:txt =>para}
format_text=Format_text_object.new(@md,txt_obj)
format_text.scr_inden_ocn_e_no_paranum
end
end
if para !~/#{@vz.margin_txt_0}|#{@vz.margin_txt_1}|#{@vz.margin_txt_2}/
# i don't get the condition for no paranum
end
else #
end
para.gsub!(/#{Mx[:pa_o]}:\S+#{Mx[:pa_c]}/,'') if para
end
end
@content_flag=true
6.downto(4) do |x|
y=x - 1; v=x - 3
if @level[x] == true #2004w36 bug fix? watch/test previous logic broke on free.for.all @coontent_flag introduced
if @content_flag==true
@@xml[:body] << "#{@tab*5}\n#{@tab*y}\n"
@content_flag=false
else
@@xml[:body] << "\n#{@tab*y}\n"
end
end
end
3.downto(1) do |x|
y=x - 1
@@xml[:body] << "#{@tab*y}\n" if @level[x] == true
end
#6.downto(1) { |x| y=x - 1; @@xml[:body] << "#{@tab*y}\n" if @level[x] == true }
end
def pre
rdf=SiSU_XML_tags::RDF.new(@md)
dir=SiSU_Env::Info_env.new
css=SiSU_Env::CSS_select.new(@md).xml_dom
encoding=if @sys.locale =~/utf-?8/i; ''
else ''
end
@@xml[:open] =<
#{rdf.comment_xml}
WOK
@@xml[:head] << "\n"
@@xml[:body] << "\n"
end
def post
@@xml[:head] << @@xml[:sc]
@@xml[:head] << "\n"
@@xml[:body] << "\n"
@@xml[:close] = "\n"
end
def publish
content=[]
content << @@xml[:open] << @@xml[:head] << @@xml[:body] << @@xml[:metadata]
content << @@xml[:owner_details] if @md.stmp =~/\w\w/
content << @@xml[:tail] << @@xml[:close]
content.flatten!.compact!
Output.new(content,@md).xml
@@xml[:head],@@xml[:body],@@xml[:tail]=[],[],[] # check whether should be nil
end
end
class Output
include SiSU_Param
def initialize(data,md)
@data,@md=data,md
end
def xml
SiSU_Env::SiSU_file.new(@md).mkdir
filename_xml=SiSU_Env::SiSU_file.new(@md,@md.fn[:dom]).mkfile
@data.each do |para|
#para.strip!
para.gsub!(/#{Mx[:pa_o]}:\S+#{Mx[:pa_c]}/,'') #; para.gsub!(/<:\S+?>|/,'')
para="#{para}\n" unless para.empty?
filename_xml.puts para
end
filename_xml.close
end
end
class Tidy
def initialize(md,dir)
@md,@env=md,dir
@prog=SiSU_Env::Info_program.new
end
def xml
if @prog.tidy !=false
if @md.cmd =~/[VM]/
tell=SiSU_Screen::Ansi.new(@md.cmd,'invert','Using XML Tidy','check document structure')
tell.colorize unless @md.cmd =~/q/
tell.grey_open unless @md.cmd =~/q/
tidyfile='/dev/null' #don't want one or screen output, check for alternative flags
tidy=SiSU_Env::System_call.new("#{@env.path.output}/#{@md.fnb}/#{@md.fn[:dom]}",tidyfile)
tidy.well_formed?
tell.p_off unless @md.cmd =~/q/
end
end
end
end
end
end
__END__