diff options
| author | Ralph Amissah <ralph@amissah.com> | 2007-10-19 01:01:26 +0100 | 
|---|---|---|
| committer | Ralph Amissah <ralph@amissah.com> | 2007-10-19 01:01:26 +0100 | 
| commit | db15c23ad882d0a61a7244d381544c653accf659 (patch) | |
| tree | 4cdfa57e9db90e43535e196ad63cfb68c7e27b91 /lib | |
| parent | minor detail (diff) | |
first pass at fictionbook output; images posted with document rsync; also ...
* first pass at fictionbook output - fictionbook is interesting, (develop over
  time or drop support)
* images in document posted with document rsync
* ruby1.9 strip bug re-encountered in debian sid build version, (unless it is
  my setup)
see changelog
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/sisu/v0/hub.rb | 3 | ||||
| -rw-r--r-- | lib/sisu/v0/options.rb | 9 | ||||
| -rw-r--r-- | lib/sisu/v0/plaintext.rb | 8 | ||||
| -rw-r--r-- | lib/sisu/v0/shared_xml.rb | 23 | ||||
| -rw-r--r-- | lib/sisu/v0/sysenv.rb | 6 | ||||
| -rw-r--r-- | lib/sisu/v0/xml_fictionbook.rb | 311 | 
6 files changed, 355 insertions, 5 deletions
| diff --git a/lib/sisu/v0/hub.rb b/lib/sisu/v0/hub.rb index 3b145dac..029c99c5 100644 --- a/lib/sisu/v0/hub.rb +++ b/lib/sisu/v0/hub.rb @@ -169,6 +169,7 @@ module SiSU                    when /^html$/;            SiSU_HTML::Source.new(@opt).read            # -h -H                    when /^xml$/;             SiSU_XML_SAX::Source.new(@opt).read         # -x                    when /^xml_dom$/;         SiSU_XML_DOM::Source.new(@opt).read         # -X +                  when /^xml_fictionbook$/; SiSU_XML_Fictionbook::Source.new(@opt).read # -f                    when /^xhtml$/;           SiSU_XHTML::Source.new(@opt).read           # -b                    when /^embedded$/;        SiSU_Embedded::Source.new(@opt).read        # -m (image and other content)                    when /^manifest$/;        SiSU_Manifest::Source.new(@opt).read        # -y @@ -442,6 +443,8 @@ p "here #{__FILE__} #{__LINE__}" if @opt =~/M/          end          if @opt.cmd =~/X/; op('xml_dom','xml dom')             #% -X xml dom type          end +        if @opt.cmd =~/f/; op('xml_fictionbook','xml fictionbook') #% -f xml fictionbook +        end          if @opt.cmd =~/b/; op('xhtml','xhtml sax')             #% -b xhtml sax type          end          #if @opt.cmd =~/B/; op('docbook','docbook xml')         #% -B docbook xml diff --git a/lib/sisu/v0/options.rb b/lib/sisu/v0/options.rb index 39de0530..4c8afb58 100644 --- a/lib/sisu/v0/options.rb +++ b/lib/sisu/v0/options.rb @@ -94,7 +94,14 @@ module SiSU_commandline          end          s << " #{y}" unless y.empty?        end -      s.strip! # String.strip is broken in ruby 1.9.0 (2007-09-10 patchlevel 0) [i486-linux], 2007-09-18:38/2 +if RUBY_VERSION > '1.9' +#debug strip +p 'problem with ruby 1.9.0 (2007-09-10 patchlevel 0) [i486-linux] on Debian Sid' +p 'p content.class  == String                    == String           == ' + s.class.to_s +p 'p s              ==  -mv gpl.fsf.sst          ==  -mv gpl.fsf.sst == ' + s +p 'p s.strip        == -mv gpl.fsf.sst #expected == -mv #i get       == ' + s.strip + ' #actual, you get' +end +      s.strip!  # String.strip is broken in ruby 1.9.0 (2007-09-10 patchlevel 0) [i486-linux], 2007-09-18:38/2        a=s.split(/\s+/)        a.each do |x|          if x =~/^-[a-z0-5]+/i \ diff --git a/lib/sisu/v0/plaintext.rb b/lib/sisu/v0/plaintext.rb index 1a0303cd..56f2c253 100644 --- a/lib/sisu/v0/plaintext.rb +++ b/lib/sisu/v0/plaintext.rb @@ -486,19 +486,19 @@ WOK        end        def plaintext                                                            #%plaintext output          SiSU_Env::SiSU_file.new(@md).mkdir -        filename_plaintext=SiSU_Env::SiSU_file.new(@md,@md.fn[:plain]).mkfile +        file_plaintext=SiSU_Env::SiSU_file.new(@md,@md.fn[:plain]).mkfile          @sisu=[]          @content.each do |para|                                                # this is a hack            if para.class == Array \            and para.length > 0              para.each do |line|                line.gsub!(/\s+$/m,'') -              filename_plaintext.puts line           #unix plaintext +              file_plaintext.puts line           #unix plaintext              end -          else filename_plaintext.puts para           #unix plaintext # /^([*=-]|\.){5}/ +          else file_plaintext.puts para           #unix plaintext # /^([*=-]|\.){5}/            end          end -        filename_plaintext.close +        file_plaintext.close        end      end    end diff --git a/lib/sisu/v0/shared_xml.rb b/lib/sisu/v0/shared_xml.rb index d2897a1f..5d427782 100644 --- a/lib/sisu/v0/shared_xml.rb +++ b/lib/sisu/v0/shared_xml.rb @@ -389,6 +389,29 @@ module SiSU_XML_munge        para=tidywords(wordlist).join(' ').strip        para      end +    def markup_fictionbook(para='') +      para.gsub!(/~\{([\d*+]+).+?\}~/,'[\1]') +      para.gsub!(/\/\{(.+?)\}\//,'<i>\1</i>') +      para.gsub!(/[*!]\{(.+?)\}[*!]/,'<b>\1</b>') +      para.gsub!(/_\{(.+?)\}_/,'<u>\1</u>') +      para.gsub!(/-\{(.+?)\}-/,'<del>\1</del>') +      para.gsub!(/<br(\s*\/)?>/,'<br />') +      para.gsub!(/<:pb>\s*/,'') +      para.gsub!(/<[-~]#>/,'') +      #temporary --> +      para.gsub!(/<:\S+?>/,'') +      #<-- temporary +      para.gsub!(/<[-~]#>/,'') +      para.gsub!(/(^|\s)&\s+/,'\1& ') #sort +      para.gsub!(/&([^;]{1,5})/,'&\1') #sort, rough estimate, revisit #WATCH found in node not sax +      para.gsub!(/(?:^|[^_\\])\{(\S+?\.(?:png|jpg|gif)) .+?\}(?:(?:https?|file|ftp):\/\/\S+|image)/, +        "<image.path>#{@dir.url.images_local}\/\\1</image.path>") +      para.gsub!(/ /,' ') +      #para.gsub!(/ /,' ') #clean +      wordlist=para.scan(/\S+|\n/) #\n needed for tables, check though added 2005w17 +      para=tidywords(wordlist).join(' ').strip +      para +    end      def markup_group(para='')        para.gsub!(/</,'<'); para.gsub!(/>/,'>')        para.gsub!(/<:?br(?:\s+\/)?>/,'<br />') diff --git a/lib/sisu/v0/sysenv.rb b/lib/sisu/v0/sysenv.rb index e5c2c394..3f92e9b7 100644 --- a/lib/sisu/v0/sysenv.rb +++ b/lib/sisu/v0/sysenv.rb @@ -2315,7 +2315,9 @@ WOK      def rsync        self.remote_host_base.each do |remote_conn|          local_gen=@source_path +        local_gen_image="#{@env.path.webserv}/#{@env.path.stub_pwd}/_sisu/image"          remote_gen="#{remote_conn[:name]}/#{@env.path.stub_pwd}/." +        remote_images="#{remote_conn[:name]}/#{@env.path.stub_pwd}/_sisu/image/."          local_src=@source_path_src          remote_src="#{remote_conn[:name]}/#{@env.path.stub_src}/."          src_txt=@opt.fnc @@ -2330,6 +2332,10 @@ WOK            if FileTest.file?("#{local_src}/#{src_txt}") \            or FileTest.file?("#{local_src}/#{src_pod}")              System_call.new("#{local_src}/#{src_txt} #{local_src}/#{src_pod}",remote_src,@opt.cmd).rsync +            if defined? @md.ec[:image] +              images="#{local_gen_image}/" + @md.ec[:image].join(" #{local_gen_image}/") +              System_call.new(images,remote_images,@opt.cmd).rsync +            end            end          elsif  @opt.cmd =~/U/            puts "#{__FILE__} #{__LINE__}" if @opt.cmd =~/M/ diff --git a/lib/sisu/v0/xml_fictionbook.rb b/lib/sisu/v0/xml_fictionbook.rb new file mode 100644 index 00000000..8722f1d5 --- /dev/null +++ b/lib/sisu/v0/xml_fictionbook.rb @@ -0,0 +1,311 @@ +=begin + + * Name: SiSU + + * Description: extract and print an XML rendition of document structure to screen + + * Author: Ralph Amissah + + * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, +   2007 Ralph Amissah All Rights Reserved. + + * License: GPL 3 or later: + +   SiSU, a framework for document structuring, publishing and search + +   Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, +   2007 Ralph Amissah + +   This program is free software: you can redistribute it and/or modify it +   under the terms of the GNU General Public License as published by the Free +   Software Foundation, either version 3 of the License, or (at your option) +   any later version. + +   This program is distributed in the hope that it will be useful, but WITHOUT +   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for +   more details. + +   You should have received a copy of the GNU General Public License along with +   this program. If not, see <http://www.gnu.org/licenses/>. + +   If you have Internet connection, the latest version of the GPL should be +   available at these locations: +   <http://www.fsf.org/licenses/gpl.html> +   <http://www.gnu.org/copyleft/gpl.html> +   <http://www.jus.uio.no/sisu/gpl.fsf> + + * SiSU uses: +   * Standard SiSU markup syntax, +   * Standard SiSU meta-markup syntax, and the +   * Standard SiSU object citation numbering and system + + * Hompages: +   <http://www.jus.uio.no/sisu> +   <http://www.sisudoc.org> + + * Download: +   <http://www.jus.uio.no/sisu/SiSU/download.html> + + * Ralph Amissah +   <ralph@amissah.com> +   <ralph.amissah@gmail.com> + + ** Description: SiSU information Structuring Universe, processing + +=end +module SiSU_XML_Fictionbook +  require "#{SiSU_lib}/dal" +  require "#{SiSU_lib}/sysenv" +  require "#{SiSU_lib}/shared_txt" +  require "#{SiSU_lib}/shared_xml" +  include SiSU_Env +  include SiSU_Param +  include SiSU_text_utils +  include SiSU_XML_munge +  pwd=Dir.pwd +  class Source +    def initialize(opt) +      @opt=opt +      @sp='  ' +    end +    def read +      begin +        @md=SiSU_Param::Parameters.new(@opt).get +        @dal_array=SiSU_DAL::Source.new(@opt).get +        SiSU_XML_Fictionbook::Source::Scroll.new(@dal_array,@md).songsheet +      rescue; SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error +      ensure +      end +    end +    private +    class Scroll <Source +      def initialize(data='',md='') +        @data,@md=data,md +        @trans=SiSU_XML_munge::Trans.new(@md) +        @dp=SiSU_Env::Info_env.new.digest.pattern +        @file_fictionbook=SiSU_Env::SiSU_file.new(@md,'fictionbook.xml').mkfile +        #@file_fictionbook=SiSU_Env::SiSU_file.new(@md,@md.fn[:fictionbook]).mkfile +      end +      def songsheet +        @t='fictionbook' +        @s=['section',          #@s=['body', +          'section', +          'section', +          'section', +          'section', +          'section', +          'section' +        ] +        head +        extract_endnotes +        structure +        #endnotes +        tail +      end +      def head +        version=SiSU_Env::Info_version.new.get_version +        rb_ver=SiSU_Env::Info_version.new.rbversion +        date_available=if defined? @md.dc_date_available; "\n     <p>#{@md.dc_date_available} Initial version</p>" +        else '' +        end +        date_modified=if defined? @md.dc_date_available; "\n      <p>#{@md.dc_date_modified} Last Modified</p>" +        else '' +        end +        head=<<WOK +<?xml version="1.0" encoding="UTF-8"?> +<FictionBook xmlns:xlink="http://www.w3.org/1999/xlink" + xmlns="http://www.gribuser.ru/xml/fictionbook/2.0"> +<description> +  <title-info> +    <genre match="100">***</genre> +    <author> +      <first-name>***</first-name> +      <middle-name>***</middle-name> +      <last-name>***</last-name> +    </author> +    <book-title>#{@md.dc_title}</book-title> +    <annotation> +    </annotation> +    <date value="#{@md.dc_date}">yyyy</date> +  </title-info> +     <document-info> +    <author> +      <first-name/> +      <last-name/> +      <nickname/> +    </author> +    <program-used>#{version[:project]} #{version[:version]} and #{rb_ver}</program-used> +    <date value="#{version[:date]}">#{version[:date]}</date> +    <src-ocr/> +    <version>1.0</version> +    <history>#{date_available}#{date_modified} +    </history> +  </document-info> +</description> +<body> +WOK +        put(head) +      end +      def extract_endnotes +        @endnotes=[] +        @data.each do |para| +          @endnotes << para.scan(/~\{(.+?)<#{@dp}>\}~/m) +        end +        @endnotes.flatten! +      end +      def endnotes +        @endnotes.each do |endnote| +          endnote.strip! +          endnote=@trans.markup_fictionbook(endnote) +          endnote="<p>#{endnote}</p>" +          util=SiSU_text_utils::Wrap.new(endnote,80,10) +          endnote=util.line_wrap +          put(endnote) +        end +      end +      def tail +        tail=<<WOK +</body> +</FictionBook> +WOK +        put(tail) +      end +      def markup(para,type='') +        para.strip! +        para=para.gsub(/^[1-9]~(?:\S+)?\s+/,'') +        para=para.gsub(/<~(\d+);(?:\d:\d+|\S\d+);\S\d+><#@dp:#@dp>/,'[(\1)]') +        para=@trans.markup_fictionbook(para) +        para=if type.empty?; "<p>#{para}</p>" +        else "<#{type}><p>#{para}</p></#{type}>" +        end +        util=SiSU_text_utils::Wrap.new(para,80,10) +        wrap=util.line_wrap +      end +      def put(line) +        @file_fictionbook.puts line +        puts line if @md.cmd =~/V/ +      end +      def structure_build_tag_close(lev,h) +        @sp='  ' +        case h[0] +        when 1 +          put("#{@sp*1}</#{@s[1]}>") if (lev <= 1) and h[1] +          put("</#{@s[0]}>")         if (lev == 0) +        when 2 +          put("#{@sp*2}</#{@s[2]}>") if (lev <= 2) and h[2] +          put("#{@sp*1}</#{@s[1]}>") if (lev <= 1) and h[1] +          put("</#{@s[0]}>")         if (lev == 0) +        when 3 +          put("#{@sp*3}</#{@s[3]}>") if (lev <= 3) and h[3] +          put("#{@sp*2}</#{@s[2]}>") if (lev <= 2) and h[2] +          put("#{@sp*1}</#{@s[1]}>") if (lev <= 1) and h[1] +          put("</#{@s[0]}>")         if (lev == 0) +        when 4 +          put("#{@sp*4}</#{@s[4]}>") if (lev <= 4) +          put("#{@sp*3}</#{@s[3]}>") if (lev <= 3) and h[3] +          put("#{@sp*2}</#{@s[2]}>") if (lev <= 2) and h[2] +          put("#{@sp*1}</#{@s[1]}>") if (lev <= 1) and h[1] +          put("</#{@s[0]}>")         if (lev == 0) +        when 5 +          put("#{@sp*5}</#{@s[5]}>") if (lev <= 5) +          put("#{@sp*4}</#{@s[4]}>") if (lev <= 4) +          put("#{@sp*3}</#{@s[3]}>") if (lev <= 3) and h[3] +          put("#{@sp*2}</#{@s[2]}>") if (lev <= 2) and h[2] +          put("#{@sp*1}</#{@s[1]}>") if (lev <= 1) and h[1] +          put("</#{@s[0]}>")         if (lev == 0) +        when 6 +          put("#{@sp*6}</#{@s[6]}>") if (lev <= 6) +          put("#{@sp*5}</#{@s[5]}>") if (lev <= 5) +          put("#{@sp*4}</#{@s[4]}>") if (lev <= 4) +          put("#{@sp*3}</#{@s[3]}>") if (lev <= 3) and h[3] +          put("#{@sp*2}</#{@s[2]}>") if (lev <= 2) and h[2] +          put("#{@sp*1}</#{@s[1]}>") if (lev <= 1) and h[1] +          put("</#{@s[0]}>")         if (lev == 0) +        end +      end +      def structure_build(ds) +        @h=[0,false,false,false] +        put("<#{@s[0]}>") +        ds.each_with_index do |x,i| +          @ef=false +          case x[:lev] +          when /^1/ +            structure_build_tag_close(1,@h) +            #put("#{@sp*1}<#{@s[1]}>\n#{x[:para]}\n#{@sp*1}#{x[:ocn]} #{x[:lev]} #{x[:hdr]}") +            y="#{@sp*1}<#{@s[1]}>\n#{x[:para]}" +            @h=[1,true,false,false] +          when /^2/ +            structure_build_tag_close(2,@h) +            y="#{@sp*2}<#{@s[2]}>\n#{x[:para]}" +            @h=[2,true,true,false] +          when /^0:0/ #endnotes and metadata +            structure_build_tag_close(2,@h) +            y="#{@sp*2}<#{@s[2]}>\n#{x[:para]}" +            @h=[2,true,true,false] +            @ef=true if x[:hdr] =~/u0/ +          when /^3/ +            structure_build_tag_close(3,@h) +            y="#{@sp*3}<#{@s[3]}>\n#{x[:para]}" +            @h=[3,true,true,true] +          when /^4/ +            structure_build_tag_close(4,@h) +            y="#{@sp*4}<#{@s[4]}>\n#{x[:para]}" +            @h[0]=4 +          when /^m2/ #metadata +            structure_build_tag_close(4,@h) +            y="#{@sp*4}<#{@s[4]}>\n#{x[:para]}" +            @h[0]=4 +          #when /^0:0/ #endnotes +          #  structure_build_tag_close(4,@h) +          #  y="#{@sp*4}<#{@s[4]}>\n#{x[:para]}" +          #  @h[0]=4 +          #  @ef=true +          when /^5/ +            structure_build_tag_close(5,@h) +            y="#{@sp*5}<#{@s[5]}>\n#{x[:para]}" +            @h[0]=5 +          when /^6/ +            structure_build_tag_close(6,@h) +            y="#{@sp*6}<#{@s[6]}>\n#{x[:para]}" +            @h[0]=6 +          else +            y=if @md.cmd =~/V/; "#{x[:para]}" +            else nil +            end +          end +          put(y) if y +          endnotes if @ef +        end +        structure_build_tag_close(0,@h) +      end +      def structure +        data=@data +        @ds=[] +        c=0 +        data.each do |para| +          rgx_headers=/<~(\d+);((?:\w|[0-6]:)\d+);(\w\d+)>/ +          if para =~rgx_headers +            x=(rgx_headers).match(para) +            if x[3] =~/^[hum]\d+/ +              @ds[c]={} +              @ds[c][:ocn]=x[1] +              @ds[c][:lev]=x[2] +              @ds[c][:hdr]=x[3] +              @ds[c][:para]=markup(para,'title') +              #puts "#{x[1]} #{x[2]} #{x[3]}" +            else +              @ds[c]={} +              @ds[c][:para]=markup(para) if @md.cmd =~/V/ +            end +            c+=1 +            #puts "#{para} #{x[1]} #{x[2]} #{x[3]}" +          end +        end +        structure_build(@ds) +        @ds +      end +    end +  end +end +__END__ | 
