diff options
author | Ralph Amissah <ralph.amissah@gmail.com> | 2007-06-02 11:25:19 +0100 |
---|---|---|
committer | Ralph Amissah <ralph.amissah@gmail.com> | 2007-06-02 11:25:19 +0100 |
commit | d0f1974a7b93db754f70d013738e6ad7d16b4d24 (patch) | |
tree | 252594de9ed0f78fd398609b9a0f298eb5b56ccd /lib/sisu/v0/docbook.rb | |
parent | sisu-0.52.7 + md5s (diff) |
0.53.0, pre-build, see changelog, library naming changed for scm, placed under v0 (instead of 0.53)
Diffstat (limited to 'lib/sisu/v0/docbook.rb')
-rw-r--r-- | lib/sisu/v0/docbook.rb | 561 |
1 files changed, 561 insertions, 0 deletions
diff --git a/lib/sisu/v0/docbook.rb b/lib/sisu/v0/docbook.rb new file mode 100644 index 00000000..e8675f45 --- /dev/null +++ b/lib/sisu/v0/docbook.rb @@ -0,0 +1,561 @@ +=begin + * Name: SiSU information Structuring Universe - Structured information, Serialized Units + * Author: Ralph Amissah + * http://www.jus.uio.no/sisu + * http://www.jus.uio.no/sisu/SiSU/download.html + + * Description: xml (dom style) output processing + + * Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 Ralph Amissah + + * License: GPL 2 or later + + Summary of GPL 2 + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + http://www.fsf.org/licenses/gpl.html + http://www.gnu.org/copyleft/gpl.html + http://www.jus.uio.no/sisu/gpl2.fsf + + SiSU was first released to the public on January 4th 2005 + + SiSU uses: + + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + © Ralph Amissah 1997, current 2007. + All Rights Reserved. + + * Ralph Amissah: ralph@amissah.com + ralph.amissah@gmail.com + + * Notes: tidy -xml dom.xml >> index.tidy +=end +module SiSU_Docbook + require "#{SiSU_lib}/defaults" + require "#{SiSU_lib}/param" + include SiSU_Param + include SiSU_Viz + require "#{SiSU_lib}/sysenv" + include SiSU_Env + require "#{SiSU_lib}/dal" + require "#{SiSU_lib}/shared_xml" + require "#{SiSU_lib}/xml_format" + include SiSU_XML_format + include SiSU_XML_munge + require "#{SiSU_lib}/rexml" + include SiSU_Rexml + @@alt_id_count,@@tablehead,@@number_of_cols=0,0,0 + @@tablefoot='' + class Source + def initialize(opt) + @opt=opt + end + def read + begin + @md=SiSU_Param::Parameters.new(@opt).get + @env=SiSU_Env::Info_env.new(@opt.fns) + path=@env.path.output_tell + loc=@env.url.output_tell + tool=if @opt.cmd =~/[MV]/; "#{@env.program.web_browser} #{loc}/#{@md.fnb}/#{@md.fn[:docbook]}\n\t#{@env.program.xml_viewer} #{path}/#{@md.fnb}/#{@md.fn[:docbook]}" + elsif @opt.cmd =~/v/; "#{@env.program.web_browser} #{loc}/#{@md.fnb}/#{@md.fn[:docbook]}" + else '' + end + tell=SiSU_Screen::Ansi.new(@opt.cmd,'invert','XML DOM',tool) + tell.colorize unless @opt.cmd =~/q/ + tell=SiSU_Screen::Ansi.new(@opt.cmd,@opt.fns,"#{@env.path.output_tell}/#{@md.fnb}/#{@md.fn[:docbook]}") + tell.flow if @opt.cmd =~/[MV]/ + @dal_array=SiSU_DAL::Source.new(@opt).get # dal file drawn here + SiSU_Docbook::Source::Songsheet.new(@dal_array,@md,@env).songsheet + rescue; SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error + ensure #file closed in songsheet + end + end + private + class Songsheet + def initialize(data,md='',dir='') + @data,@md,@env=data,md,dir + end + def songsheet + begin + SiSU_Docbook::Source::Scroll.new(@data,@md).songsheet + SiSU_Docbook::Source::Tidy.new(@md,@env).xml if @md.cmd =~/[vVM]/i # test wellformedness, comment out when not in use + SiSU_Rexml::Rexml.new(@md,@md.fn[:docbook]).xml if @md.cmd =~/M/ # test rexml parsing, comment out when not in use #debug + rescue; SiSU_Errors::Info_error.new($!,$@,@md.cmd,@md.fns).error + ensure + end + end + end + class Scroll + Heading,Heading_close,Contents=[],[],[] + Heading[0]='part' + Heading[1]='part level="1"' + Heading[2]='part level="2"' + Heading[3]='part level="3"' + Heading_close[1]=Heading[0] + Heading_close[2]=Heading[0] + Heading_close[3]=Heading[0] + #Contents[0]='preface' + Contents[1]='chapter' + Contents[2]='sect1' + Contents[3]='sect2' + @@xml={ :body=>[],:open=>[],:close=>[],:head=>[],:sc=[] } + @@dp=nil + require "#{SiSU_lib}/shared_txt" + include SiSU_text_utils + def initialize(data='',md='') + @data,@md=data,md + @vz=SiSU_Env::Get_init.instance.skin + @dp=@@dp ||=SiSU_Env::Info_env.new.digest.pattern + @regx=/^(?:(?:<:p[bn]>\s*)?\d~(?:(\S+))?\s+)?(.+?)\s*<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + @tab="\t" + @trans=SiSU_XML_munge::Trans.new(@md) + @sys=SiSU_Env::System_call.new + end + def songsheet + pre + markup + post + publish + end + protected + def xml_markup(para='') + para.gsub!(/~\{(\d+)\s+(.+?)\s*<#@dp>\}~/, + '<footnote><para>\1 \2</para></footnote> ') + end + def xml_head(meta) + txt=meta.text + txt.gsub!(/<br(?: \/)?>/,'') + txt.gsub!(/ & /,' and ') + @@xml[:head] <<=if meta.type == 'meta' + <<WOK +#{@tab}<#{meta.el}> +#{@tab*2}#{txt} +#{@tab}</#{meta.el}> +WOK + else '' + end + end + def xml_sc(md='') + sc=if @md.sc_info + <<WOK + <source_control> + <sc class="sourcefile"> + #{@md.sc_filename} + </sc> + <sc class="number"> + #{@md.sc_number} + </sc> + <sc class="date"> + #{@md.sc_date} + </sc> + </source_control> +WOK + else '' + end + @@xml[:sc]=sc + end + def xml_element(lv='',ocn='',para='',hname='',tag='',xml_element='') + lv=lv.to_i + n=lv - 1 + n1=lv + n2=lv + 1 + n3=lv + 2 + v=lv - 3 + tag='' + tag="\n#{@tab*n3}<nametag>#{hname}</nametag>\n" if hname + @@xml[:body] <<<<WOK +#{@tab*n}#{xml_element} +#{@tab*n1}<title>#{para[@regx, 2]}</title> +WOK + if lv == 4 + @copen[1]=true + @copen[2]=@copen[3]=false + elsif lv == 5 + @copen[2]=true + @copen[3]=false + elsif lv == 6 + @copen[3]=true + end + end + def xml_structure(lv='',ocn='',para='',hname='' ) + lv=lv.to_i + n=lv - 1 + n1=lv + n2=lv + 1 + n3=lv + 2 + v=lv - 3 + tag='' + tag="\n#{@tab*n3}<nametag>#{hname}</nametag>\n" if hname !=nil + #if para[@regx] + # paragraph="#{para[@regx, 2]}" + # util=SiSU_text_utils::Paragraph.new(paragraph, 70) + # wrapped=util.line_wrap + #end + case lv + when 1..3 + xml_element="<#{Heading[lv]}>" + 3.downto(lv) do |x| + y=x - 1 + @cont[1]=false if @cont[1] + @cont[2]=false if @cont[2] + @cont[3]=false if @cont[3] + ####### attempt to close contents + if @copen[3] # 6~ + [3,2,1].each do |v| + @@xml[:body] << "#{@tab*n}</#{Contents[v]}>\n" + end + @copen[1]=@copen[2]=@copen[3]=false + elsif @copen[2] # 5~ + [2,1].each do |v| + @@xml[:body] << "#{@tab*n}</#{Contents[v]}>\n" + end + @copen[1]=@copen[2]=@copen[3]=false + elsif @copen[1] # 4~ + [1].each do |v| + @@xml[:body] << "#{@tab*n}</#{Contents[v]}>\n" + end + @copen[1]=@copen[2]=@copen[3]=false + end + @@xml[:body] << "#{@tab*y}</#{Heading_close[x]}>\n" if @level[x] + @level[x]=false + end + when 4..6 + 6.downto(lv) do |x| + y=x - 1 + if @level[x] == true + u=x - 3; + @xml_contents_close[x]='' + end + end + cv=lv - 3 + if para =~/^4~\S+/ + m=/^4~(\S+)/.match(para)[1] + id=if m =~/^\d+$/; 'ch' + m + else 'ch_' + m + end + elsif para =~/^5~\S+/ + m=/^5~(\S+)/.match(para)[1] + id= 'sec_' + m + elsif para =~/^6~\S+/ + m=/^6~(\S+)/.match(para)[1] + id= 'subsec_' + m + else '' + end + xml_element=%{<#{Contents[cv]} id="#{id}">} #hmmm gsub were it possible + case lv + when 4 + if @copen[3] == true # 6~ + [3,2,1].each do |v| + @@xml[:body] << "#{@tab*n}</#{Contents[v]}>\n" + end + elsif @copen[2] == true # 5~ + [2,1].each do |v| + @@xml[:body] << "#{@tab*n}</#{Contents[v]}>\n" + end + elsif @copen[1] == true # 4~ + [1].each do |v| + @@xml[:body] << "#{@tab*n}</#{Contents[v]}>\n" + end + end + @cont[1]=true + when 5 + if @copen[3] == true #6~ + [3,2].each do |v| + @@xml[:body] << "#{@tab*n}</#{Contents[v]}>\n" + end + elsif @copen[2] == true #5~ + [2].each do |v| + @@xml[:body] << "#{@tab*n}</#{Contents[v]}>\n" + end + end + @cont[2]=true + when 6 + [3].each do |v| + @@xml[:body] << "#{@tab*n}</#{Contents[v]}>\n" if @copen[3] #watch should possibly be outside... + end + @cont[3]=true + end + end + xml_element(lv,ocn,para,hname,tag,xml_element) + @level[lv]=true + ((lv+1)..6).each { |x| @level[x]=false } + end + def group_structure(para='',ocn='') + para.gsub!(/<:group(?:-end)?>/,'') + para.strip! + @@xml[:body] << %{#{@tab*7}<para class="group">#{@tab*1}\n} + @@xml[:body] << %{#{@tab*8}#{para}#{@tab*1}\n} + @@xml[:body] << %{#{@tab*7}</para>\n} + end + def poem_structure(para='',ocn='') + para.gsub!(/<:verse(?:-end)?>/,'') + para.strip! + @@xml[:body] << %{#{@tab*7}<para class="verse">#{@tab*1}\n} + @@xml[:body] << %{#{@tab*8}#{para}#{@tab*1}\n} + @@xml[:body] << %{#{@tab*7}</para>\n} + end + def code_structure(para='',ocn='') + para.gsub!(/<:code(?:-end)?>/,'') + para.strip! + @@xml[:body] << %{#{@tab*7}<para class="code">#{@tab*1}\n} + @@xml[:body] << %{#{@tab*8}#{para}#{@tab*1}\n} + @@xml[:body] << %{#{@tab*7}</para>\n} + end + #def table_structure(table='',ocn='') #tables + # @@xml[:body] << %{#{@tab*1}#{table}\n#{@tab*1}\n} # unless lv # main text, contents, body KEEP #{ocn} + # @endnotes=[] + #end + def tidywords(wordlist) + wordlist.each do |x| + x.gsub!(/&/,'&') unless x =~/&\S+;/ + end + end + def markup + data=@data + dir=SiSU_Env::Info_env.new(@md.fns) + xml_sc(@md) + @rcdc=false + @level,@cont,@copen,@xml_contents_close=[],[],[],[] + (0..6).each { |x| @cont[x]=@level[x]=false } + (4..6).each { |x| @xml_contents_close[x]='' } + data.each do |para| + wordlist=para.scan(/\S+|\n/) #\n needed for tables, check though added 2005w17 + para=tidywords(wordlist).join(' ').strip + para.gsub!(/<[-~]#>/,'') + para.gsub!(/<0;\w\d+;[um]\d+><#@dp:#@dp>/,'') + para.gsub!(/<:pb>\s*/,'') + para.gsub!(/\{(\S+?\.(?:png|jpg|gif)) .+?\}(?:(?:https?|ftp):\/\/\S+|image)/, + %{<imagedata fileref="#{dir.url.images_local}\/\\1"/>}) + #para.gsub!(/\{(\S+?\.png) \d+x\d+ \".+?\" \}(?:http:\/\/\S+|image)/,'<image>\1</image>') + para.gsub!(/ /,' ') + @trans.char_enc.utf8(para) if @sys.locale =~/utf-?8/i #% utf8 + if para =~/^0~(\S+)\s+(.+?)\Z/m # for headers + d_meta=SiSU_text_utils::Header_scan.new(@md,para).meta + if d_meta; xml_head(d_meta) + end + end + @rcdc=true if @rcdc ==false and (para =~/^\d~metadata/ or para =~/^1~\s+Document Information/) + if para !~/(^0~|<ENDNOTES>|<EOF>)/ + if para =~/.+?<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + paranum=para[@regx, 3] + @p_num=SiSU_XML_format::Paragraph_number.new(@md,paranum) + end + @sto=SiSU_text_parts::Split_text_object.new(@md,para).lev_segname_para_ocn + ### problem in scroll, it appears tables are getting paragraph numbers + unless @rcdc + m=/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + if para =~m + format_scroll=SiSU_XML_format::Format_scroll.new(@md,@sto.text) if @sto.format =~/i[12]|null/ + case @sto.format + when /^(1)~(?:(\S+))?/ + xml_markup(para) + xml_structure($1,@sto.ocn,para,$2) + para=@sto.lev_para_ocn.heading_body1 + when /^(2)~(?:(\S+))?/ + xml_markup(para) + xml_structure($1,@sto.ocn,para,$2) + para=@sto.lev_para_ocn.heading_body2 + when /^(3)~(?:(\S+))?/ + xml_markup(para) + xml_structure($1,@sto.ocn,para,$2) + para=@sto.lev_para_ocn.heading_body3 + when /^(4)~(\S+)/ # work on see SiSU_text_parts::Split_text_object + xml_markup(para) + xml_structure($1,@sto.ocn,para,$2) + para=@sto.lev_para_ocn.heading_body4 + when /^(5)~(?:(\S+))?/ + xml_markup(para) + xml_structure($1,@sto.ocn,para,$2) + para=@sto.lev_para_ocn.heading_body5 + when /^(6)~(?:(\S+))?/ + xml_markup(para) + xml_structure($1,@sto.ocn,para,$2) + para=@sto.lev_para_ocn.heading_body6 + #when /^(i1)$/ + # #format_scroll.gsubBody + # #para=@sto.lev_para_ocn.scrIndent1 + #when /^(i2)$/ + # format_scroll.gsubBody + # para=@sto.lev_para_ocn.scrIndent2 + #when /^(center)$/ + # para.gsub!(/(.+)/, + # %{<center>(\\1)</center>}) + # para=@sto.lev_para_ocn.scrPara + #when /^(b|bold)$/ + # para.gsub!(/(.+)/, + # %{<b>(\\1)</b>}) + # para=@sto.lev_para_ocn.scrPara + #when /null/ # see whether u can improve + # if (para !~/#{@margin.txt_0}|#{@margin.txt_1}|#{@margin.txt_2}/) + # #format_scroll.gsubBody + # #para=@sto.lev_para_ocn.scrPara + # end + else + matched=/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/mi.match(para) + stamp,ocn=matched[0],matched[1] + if para =~ /<:verse>/ + para.gsub!(/#{stamp}/,'') + poem_structure(para,ocn) + elsif para =~ /<:group>/ + para.gsub!(/#{stamp}/,'') + group_structure(para,ocn) + elsif para =~ /<:code>/ + para.gsub!(/#{stamp}/,'') + code_structure(para,ocn) + elsif para =~/<!Th?.+/ # tables come as single block #work area 2005w13 + table=SiSU_Tables::Table_xml.new(para,ocn) + para=table.table_split + @@xml[:body] << para + #@@xml[:body] << table_structure(para,ocn) + else #xml_structure(para, nil, nil, nil) + xml_markup(para) + @@xml[:body] << "#{@tab*7}<para>#{para[@regx, 2]}</para>\n" if para[@regx, 2] # main text, contents, body KEEP ocn = #{para[@regx, 3]} == #{ocn} + end + end + elsif para =~/(Note|Endnotes?)/ and para !~/<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + #format_scroll=MonoSiSU.new('<br /><a name="notes">Note</a>') + #para=format_scroll.boldPara + elsif para =~/(MetaData)/ and para =~/<~\d+;[m]\d+;\w\d+><#@dp:#@dp>$/ #debug 2003w46 add rc info + format_scroll=Format_scroll.new(@md,'<br /><a name="metadata">MetaData</a>') + para=format_scroll.bold_para + elsif para =~/(Owner Details)/ and para !~/<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + format_scroll=Format_scroll.new(@md,'<br /><a name="owner.details">Owner Details</a>') + @@xml[:owner_details]=format_scroll.bold_para + para='' + elsif para =~/(.*)<:#>(.*)/ + one, two=$1,$2 + format_text=Format_text_object.new(one,two) + para=format_text.seg_no_paranum + end + para='' if para =~/<a name="n\d+">/ and para =~/^(-\{{2}~\d+|<!e[:_]\d+!>)/ # -endnote + if para =~/.*<:#>.*$/ + para=case para + when /<:i1>/ + format_text=Format_text_object.new(para,'') + format_text.scr_inden_ocn_e_no_paranum + when /<:i2>/ + format_text=Format_text_object.new(para,'') + format_text.scr_inden_ocn_e_no_paranum + end + end + if para =~/<:center>/ + one, two=/(.*)<:center>(.*)/.match(para)[1,2] + format_text=Format_text_object.new(one, two) + para=format_text.center + end + else + end + para.gsub!(/<:\S+?>/,'') + para.gsub!(/<!.+!>/,' ') + end + end + @content_flag=true + 6.downto(4) do |x| + y=x - 1; v=x - 3 + if @level[x] == true #2004w36 bug fix? watch/test previous logic broke on free.for.all @coontent_flag introduced + if @content_flag==true + @@xml[:body] << "\n#{@tab*y}</#{Contents[v]}>\n" + @content_flag=false + else + @@xml[:body] << "\n#{@tab*y}</#{Contents[v]}>\n" + end + end + end + 3.downto(1) do |x| + y=x - 1 + @@xml[:body] << "#{@tab*y}</#{Heading_close[x]}>\n" if @level[x] == true + end + end + def pre + rdf=SiSU_XML_tags::RDF.new(@md) + dir=SiSU_Env::Info_env.new + css=SiSU_Env::CSS_select.new(@md).docbook_xml + encoding='<?xml version="1.0"?> +<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook V5.0//EN" + "http://www.oasis-open.org/docbook/xml/5.0/docbook.dtd">' + #encoding='<?xml version="1.0" encoding="utf-8"?>' + @@xml[:open] =<<WOK +#{encoding} +<?xml-stylesheet type="text/css" href="../#{dir.path.style}/#{css}"?> +#{rdf.comment_xml} +<book> +WOK + @@xml[:head] << "<bookinfo>\n" + end + def post + @@xml[:head] << @@xml[:sc] + @@xml[:head] << "</bookinfo>\n" + @@xml[:close] = "</book>\n" + end + def publish + content=[] + data=@data + content << @@xml[:open] << @@xml[:head] << @@xml[:body] << @@xml[:metadata] + content << @@xml[:owner_details] if @md.stmp =~/\w\w/ + content << @@xml[:tail] << @@xml[:close] + Output.new(content.to_s,@md).xml + @@xml[:head],@@xml[:body],@@xml[:tail]=[],[],[] + end + end + class Output + include SiSU_Param + def initialize(data,md) + @data,@md=data,md + end + def xml + @sisu=[] + @data.each do |para| + para.gsub!(/<:\S+?>/,'') + para.gsub!(/<!.+?!>/,'') + para="#{para}\n" unless para.empty? + @sisu << para + end + new_file_data=@sisu.to_s + @sisu=new_file_data.scan(/.+/) + SiSU_Env::SiSU_file.new(@md).mkdir + filename_xml=SiSU_Env::SiSU_file.new(@md,@md.fn[:docbook]).mkfile + @sisu.each {|para| filename_xml.puts para} + filename_xml.close + end + end + class Tidy + def initialize(md,dir) + @md,@env=md,dir + @prog=SiSU_Env::Info_program.new + end + def xml + if @prog.tidy !=false + if @md.cmd =~/[VM]/ + tell=SiSU_Screen::Ansi.new(@md.cmd,'invert','Using XML Tidy','check document structure') + tell.colorize unless @md.cmd =~/q/ + tell.grey_open unless @md.cmd =~/q/ + tidyfile='/dev/null' #don't want one or screen output, check for alternative flags + tidy=SiSU_Env::System_call.new("#{@env.path.output}/#{@md.fnb}/#{@md.fn[:docbook]}",tidyfile) + tidy.well_formed? + tell.p_off unless @md.cmd =~/q/ + end + end + end + end + end +end +__END__ + |