diff options
| author | Ralph Amissah <ralph@amissah.com> | 2007-07-27 12:26:02 +0100 | 
|---|---|---|
| committer | Ralph Amissah <ralph@amissah.com> | 2007-07-27 12:26:02 +0100 | 
| commit | f4b6f637ab695ab1e7ff879f5d10d4f4bceaa603 (patch) | |
| tree | 7f3d3ebba89ce94f61b99d4e114a4c202d21c6d9 | |
| parent | Merge branch 'upstream' into debian/sid (diff) | |
| parent | multiple url matching refinements, open archive initiative (diff) | |
Merge branch 'upstream' into debian/sid
| -rw-r--r-- | CHANGELOG | 10 | ||||
| -rw-r--r-- | lib/sisu/v0/db_import.rb | 2 | ||||
| -rw-r--r-- | lib/sisu/v0/html_tune.rb | 4 | ||||
| -rw-r--r-- | lib/sisu/v0/hub.rb | 33 | ||||
| -rw-r--r-- | lib/sisu/v0/manifest.rb | 1 | ||||
| -rw-r--r-- | lib/sisu/v0/odf.rb | 12 | ||||
| -rw-r--r-- | lib/sisu/v0/param.rb | 3 | ||||
| -rw-r--r-- | lib/sisu/v0/shared_html_lite.rb | 8 | ||||
| -rw-r--r-- | lib/sisu/v0/shared_xml.rb | 10 | ||||
| -rw-r--r-- | lib/sisu/v0/sysenv.rb | 5 | ||||
| -rw-r--r-- | lib/sisu/v0/texpdf_format.rb | 4 | ||||
| -rw-r--r-- | lib/sisu/v0/xml_md_oai_pmh_dc.rb | 209 | 
12 files changed, 264 insertions, 37 deletions
| @@ -12,9 +12,18 @@ http://www.jus.uio.no/sisu/pkg/src/sisu_0.55.6.orig.tar.gz    sisu_0.55.6-1.dsc    sisu_0.55.6-1.diff.gz +  * matching of multiple urls within a paragraph +    * db html (html_lite), bug fix +    * multiple uls listed, refinement: html, html_lite, xml, odf, texpdf + +  * open archive initiative for metadata harvesting, initial implementation, +    Dublin Core, XML output available (-O), decide use later (filenames, output +    dir etc.) +    * debian vim      * moved vim install back to addons      * added recommends vim-addon-manager +    (thanks zack)  %% sisu_0.55.5.orig.tar.gz (2007-07-22:29/7)  http://www.jus.uio.no/sisu/pkg/src/sisu_0.55.5.orig.tar.gz @@ -36,6 +45,7 @@ http://www.jus.uio.no/sisu/pkg/src/sisu_0.55.5.orig.tar.gz      open standards    * debian vim, syntax and ftplugin install moved to /usr/share/vim-scripts +    (syntax file synced with Bram, thanks)  %% sisu_0.55.4.orig.tar.gz (2007-07-20:29/5)  http://www.jus.uio.no/sisu/pkg/src/sisu_0.55.4.orig.tar.gz diff --git a/lib/sisu/v0/db_import.rb b/lib/sisu/v0/db_import.rb index 91360613..5ef26466 100644 --- a/lib/sisu/v0/db_import.rb +++ b/lib/sisu/v0/db_import.rb @@ -125,7 +125,7 @@ module SiSU_DB_import        string.gsub!(/<:(?:code|alt|group|verse)(?:-end)?>/,'')        string.gsub!(/<:name#\S+?>/,'')        string.gsub!(/\{\s*(\S+?\.(?:png|jpg))(?:\s+\d+x\d+)?(.+?)\}\S+/,'[image: \1] \2') -      string.gsub!(/\{\s*(.+?)\s*\}http:\/\/\S+?([.,!?]?(?:\s|$))/,'\1\2') +      string.gsub!(/\{\s*(.+?)\s*\}https?:\/\/\S+?([.,!?]?(?:\s|$))/,'\1\2')      end      def unicode_special_character_escape(string)        #string.gsub!(/(["';:,])/, %{\\\\\\1}) diff --git a/lib/sisu/v0/html_tune.rb b/lib/sisu/v0/html_tune.rb index 1d3461c3..66c45aed 100644 --- a/lib/sisu/v0/html_tune.rb +++ b/lib/sisu/v0/html_tune.rb @@ -325,9 +325,9 @@ module SiSU_Tune            if (para =~/\b\S+\@\S+?\.\S+/ and para !~/(\"\S+\@\S+?\.\S+\"|>\S+\@\S+?\.\S+?<)/)              para.gsub!(/\b(\S+\@\S+?\.\S+)(\s)/,'<<a href="mailto:\1">\1</a>>\2')            end -          para.gsub!(/(^|\s)[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1<a href="\2" target="_top">\2</a>\3') #http ftp matches escaped, no decoration +          para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'<a href="\1" target="_top">\1</a>\2') #http ftp matches escaped, no decoration            para.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1<a href="\2" target="_top">\2</a>\3') #special case \{ e.g. \}http://url -          para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,%{\\1#{@url_brace.xml_open}<a href="\\2" target="_top">\\2</a>#{@url_brace.xml_close}\\3}) #http ftp matches with decoration +          para.gsub!(/\b((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,%{#{@url_brace.xml_open}<a href="\\1" target="_top">\\1</a>#{@url_brace.xml_close}\\2}) #http ftp matches with decoration            if (para =~/..\/\S+/ and para !~/(\"..\/\S+?\"|>\s*..\/\S+<)/)              para.gsub!(/(\.\.\/\S+)/,'<a href="\1">\1</a>')            end diff --git a/lib/sisu/v0/hub.rb b/lib/sisu/v0/hub.rb index 35160d23..125a0500 100644 --- a/lib/sisu/v0/hub.rb +++ b/lib/sisu/v0/hub.rb @@ -150,6 +150,7 @@ module SiSU                    when /^plaintext$/;       SiSU_Plaintext::Source.new(@opt).read     # -a -A -e -E -f                    when /^wikispeak$/;       SiSU_Wikispeak::Source.new(@opt).read     # -g                    when /^odf$/;             SiSU_ODF::Source.new(@opt).read           # -o +                  when /^xml_md_oai_pmh_dc$/; SiSU_XML_metadata::OAI_PMH.new(@opt).read # -O                    when /^texpdf$/;          SiSU_TeX::Source.new(@opt).read           # -p                    when /^texinfo$/;         SiSU_TexInfo::Source.new(@opt).read       # -I                    #when /^docbook$/;         SiSU_Docbook::Source.new(@opt).read       # -B @@ -283,7 +284,7 @@ p "here #{__FILE__} #{__LINE__}" if @opt =~/M/      def actions        if @opt.mod.inspect =~/--convert|--to|--from/; require "#{SiSU_lib}/sst_convert_markup"        end -      if @opt.cmd =~/([AabCcDdEeFfgGHhIiLMmNnoprRSsTtQqUuVvwWXxYyZ_0-9])/ and +      if @opt.cmd =~/([AabCcDdEeFfgGHhIiLMmNnOoprRSsTtQqUuVvwWXxYyZ_0-9])/ and           @opt.cmd =~/^-/ and           @opt.mod.inspect !~/--(?:sitemaps|query|identify)/ or           @opt.mod.inspect =~/--(?:(?:sq)?lite|pg(?:sql)?)/ #and @@ -292,7 +293,7 @@ p "here #{__FILE__} #{__LINE__}" if @opt =~/M/          flag=SiSU_Env::Info_processing_flag.new          extra=''          if @opt.cmd !~/[mn]/ -          extra+=if @opt.cmd =~/[abeghHhINoptTwXxz]/ and @opt.cmd !~/[mn]/; 'm'           #% add dal +          extra+=if @opt.cmd =~/[abeghHhINOoptTwXxz]/ and @opt.cmd !~/[mn]/; 'm'           #% add dal            elsif ((@opt.cmd =~/[Dd]/ or (@opt.mod.inspect =~/--(?:(?:sq)?lite|pg(?:sql)?)/)) \            and @opt.mod.inspect !~/(?:remove|(?:(?:re)?create(?:all)?|dropall|drop)$)/) \            and @opt.cmd !~/[mn]/ @@ -447,32 +448,34 @@ p "here #{__FILE__} #{__LINE__}" if @opt =~/M/          end          @opt.files=@opt.files.collect {|x| x=x.gsub(/\.ssm$/,'._sst') }          if @opt.cmd =~/S/ -          op('sisupod_make','sisupod (zip)')                         #% -S make sisupod +          op('sisupod_make','sisupod (zip)')                   #% -S make sisupod            if @opt.fns=~/\.kdi._sst/ -            op('share_src_kdissert','kdissert (kdi)')                #% -S share kdissert source +            op('share_src_kdissert','kdissert (kdi)')          #% -S share kdissert source            end          end -        if @opt.cmd =~/N/; op('digests','digests')                   #% -N digest tree +        if @opt.cmd =~/N/; op('digests','digests')             #% -N digest tree          end -        if @opt.cmd =~/[hHz]/; op('html','html')                     #% -h -H -z html css +        if @opt.cmd =~/[hHz]/; op('html','html')               #% -h -H -z html css          end -        if @opt.cmd =~/[aAfeE]/; op('plaintext','plaintext')         #% -a -A -f -e -E plaintext -a creates ms-dos type; -A creates unix type, plaintext file +        if @opt.cmd =~/[aAfeE]/; op('plaintext','plaintext')   #% -a -A -f -e -E plaintext -a creates ms-dos type; -A creates unix type, plaintext file          end -        if @opt.cmd =~/g/; op('wikispeak','wikispeak')             #% -g wiki +        if @opt.cmd =~/g/; op('wikispeak','wikispeak')         #% -g wiki          end -        if @opt.cmd =~/o/; op('odf','OpenDocument')                  #% -o opendocument +        if @opt.cmd =~/o/; op('odf','OpenDocument')            #% -o opendocument          end -        if @opt.cmd =~/x/; op('xml','xml sax')                       #% -x xml sax type +        if @opt.cmd =~/x/; op('xml','xml sax')                 #% -x xml sax type          end -        if @opt.cmd =~/X/; op('xml_dom','xml dom')                   #% -X xml dom type +        if @opt.cmd =~/X/; op('xml_dom','xml dom')             #% -X xml dom type          end -        if @opt.cmd =~/b/; op('xhtml','xhtml sax')                   #% -b xhtml sax type +        if @opt.cmd =~/b/; op('xhtml','xhtml sax')             #% -b xhtml sax type          end -        #if @opt.cmd =~/B/; op('docbook','docbook xml')               #% -B docbook xml +        #if @opt.cmd =~/B/; op('docbook','docbook xml')         #% -B docbook xml          #end -        if @opt.cmd =~/w/; op('concordance','Concordance')           #% -w concordance +        if @opt.cmd =~/w/; op('concordance','Concordance')     #% -w concordance          end -        if @opt.cmd =~/t/                                            #% -t termsheet/standard form +        if @opt.cmd =~/O/; op('xml_md_oai_pmh_dc','OAI PMH')   #% -O open archive initiative, metadata harvesting +        end +        if @opt.cmd =~/t/                                      #% -t termsheet/standard form            SiSU_Help::Help.new('termsheet').help_request          	@opt.files.each do |fns|              if FileTest.file?(fns) diff --git a/lib/sisu/v0/manifest.rb b/lib/sisu/v0/manifest.rb index 931ea96b..998d3c59 100644 --- a/lib/sisu/v0/manifest.rb +++ b/lib/sisu/v0/manifest.rb @@ -445,7 +445,6 @@ module SiSU_Manifest          begin            id,file='',''            vz=SiSU_Env::Get_init.instance.skin -          #vz=SiSU_Viz::Skin.new            banner_table=if vz.banner_home_button_only !~ /http:\/\/www\.jus\.uio\.no\/sisu/ and vz.banner_home_button_only !~  /sisu\.home\.png/  <<WOK  <table summary="band" width="100%" border="0" cellpadding="3" cellspacing="0"> diff --git a/lib/sisu/v0/odf.rb b/lib/sisu/v0/odf.rb index cff57888..fbd4cc62 100644 --- a/lib/sisu/v0/odf.rb +++ b/lib/sisu/v0/odf.rb @@ -295,12 +295,12 @@ module SiSU_ODF        end        def normal(para)                                                           #P1 - P3          para.gsub!(@serial,'') -        para.gsub!(/(^|\s)[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, -          %{\\1<text:a xlink:type="simple" xlink:href="\\2">\\2</text:a>\\3}) #http ftp matches escaped, no decoration +        para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, +          %{<text:a xlink:type="simple" xlink:href="\\1">\\1</text:a>\\2}) #http ftp matches escaped, no decoration          para.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,            %{\\1<text:a xlink:type="simple" xlink:href="\\2">\\2</text:a>\\3}) #special case \{ e.g. \}http://url -        para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, -          %{\\1#{@url_brace.xml_open}<text:a xlink:type="simple" xlink:href="\\2">\\2</text:a>#{@url_brace.xml_close}\\3}) #http ftp matches with decoration +        para.gsub!(/\b((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, +          %{#{@url_brace.xml_open}<text:a xlink:type="simple" xlink:href="\\1">\\1</text:a>#{@url_brace.xml_close}\\2}) #http ftp matches with decoration          para.gsub!(/([a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+)/,            %{#{@url_brace.xml_open}<text:a xlink:type="simple" xlink:href="mailto:\\1">\\1</text:a>#{@url_brace.xml_close}})          para=case para @@ -375,8 +375,8 @@ module SiSU_ODF          parray=[]          para.split(/<:?br(?: \/)?>/).each do |parablock|            parablock=group_clean(parablock) -          parablock.gsub!(/(^|\s)[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, -            %{\\1<text:a xlink:type="simple" xlink:href="\\2">\\2</text:a>\\3}) #http ftp matches escaped, no decoration +          parablock.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, +            %{<text:a xlink:type="simple" xlink:href="\\1">\\1</text:a>\\2}) #http ftp matches escaped, no decoration            parray << %{<text:p text:style-name="P5">#{parablock}</text:p>} if parablock =~/\S+/          end          para=parray.join + '<text:p text:style-name="Standard"/>' diff --git a/lib/sisu/v0/param.rb b/lib/sisu/v0/param.rb index 2ab37afb..2264c48a 100644 --- a/lib/sisu/v0/param.rb +++ b/lib/sisu/v0/param.rb @@ -338,7 +338,7 @@ module SiSU_Param                end              when /^(?:0~type|@type:)\s+(.+?)$/m;             @dc_type=$1                            #% metainfo DC              when /^(?:0~format|@format:)\s+(.+?)$/m;         @dc_format=$1                          #% metainfo DC -            when /^(?:0~identifier|@identifier:)\s+(.+?)$/m; @dc_identifier=$1                      #% metainfo DC +            #when /^(?:0~identifier|@identifier:)\s+(.+?)$/m; @dc_identifier=$1                      #% metainfo DC              when /^(?:0~source|@source:)\s+(.+?)$/m;         @dc_source=$1                          #% metainfo DC              when /^(?:0~language(?:\.document)?|@language(?:\.document)?:)\s+(.+?)$/m               #% metainfo DC                x=$1.strip @@ -655,6 +655,7 @@ module SiSU_Param          end if @flv          @lang.uniq!          @fn=SiSU_Env::Env_call.new(@fns).lang(fn_set_lang[:c]) +        @dc_identifier="#{@env.url.root}/#@fnb/#{@fn[:toc]}" #DC note constructed dc identifier          if @en[:note] > 0 and @en[:sum] > 0            if @en[:sum] > 0            else tell=SiSU_Screen::Ansi.new(@cmd,'both endnote styles used',"~{ #{@en[:sum]} }~ and ^~ #{@en[:mark]}") diff --git a/lib/sisu/v0/shared_html_lite.rb b/lib/sisu/v0/shared_html_lite.rb index 76def67d..16491ebf 100644 --- a/lib/sisu/v0/shared_html_lite.rb +++ b/lib/sisu/v0/shared_html_lite.rb @@ -126,14 +126,14 @@ module SiSU_Format_Shared      end      def markup(para)        if para =~/\{.+?\}((?:http|ftp)\S+|image)/ -        @word_mode=para.scan(/\{.+?\}(?:(?:https?|ftp)\S+|image)|\S+/) -        word_mode=urls(para) +        wm=para.scan(/\{.+?\}(?:(?:https?|ftp)\S+|image)|\S+/) +        word_mode=urls(wm)          words=word_mode.join(' ')          para.gsub!(/.+/,words)        end -      para.gsub!(/(^|\s)[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1<a href="\2" target="_top">\2</a>\3') #http ftp matches escaped, no decoration +      para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'<a href="\1" target="_top">\1</a>\2') #http ftp matches escaped, no decoration        para.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1<a href="\2" target="_top">\2</a>\3') #special case \{ e.g. \}http://url -      para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,%{\\1#{@url_brace.xml_open}<a href="\\2" target="_top">\\2</a>#{@url_brace.xml_close}\\3}) #http ftp matches with decoration +      para.gsub!(/\b((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,%{#{@url_brace.xml_open}<a href="\\1" target="_top">\\1</a>#{@url_brace.xml_close}\\2}) #http ftp matches with decoration        para      end      def paragraph diff --git a/lib/sisu/v0/shared_xml.rb b/lib/sisu/v0/shared_xml.rb index 995044db..249085a1 100644 --- a/lib/sisu/v0/shared_xml.rb +++ b/lib/sisu/v0/shared_xml.rb @@ -356,11 +356,11 @@ module SiSU_XML_munge        #para.gsub!(/^_\*\s+/,'<image xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:actuate="onLoad" xlink:show="embed" xlink:href="/usr/share/sisu/image/bullet_red.png" width="12" height="12" alt="*" /> ')        para.gsub!(/(^|\s)\{\s*(\S+?\.(?:jpg|png|gif))\s+(\d+)x(\d+)(\s+[^}]+)?\}(https?:\/\/\S+)/,%{\\1<image xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:actuate="onLoad" xlink:show="embed" xlink:href="#{@dir.url.images_local}/\\2" width="\\3" height="\\4" />[\\2] \\5})        para.gsub!(/(^|\s)\{\s*(\S+?\.(?:jpg|png|gif))(\s+[^}]+)?\}(https?:\/\/\S+)/,%{\\1<image xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:actuate="onLoad" xlink:show="embed" xlink:href="#{@dir.url.images_local}/\\2"/>\\2}) -      para.gsub!(/(^|\s)\{([^}]+)\}(https?:\/\/[^"><]+?)([,.:;"><]?(?:\s|$))/, -        '\1<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\3">\2</link>\4') #watch, compare html_tune -      para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, -        %{\\1#{@url_brace.xml_open}<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\\2">\\2</link>#{@url_brace.xml_close}\\3}) -      para.gsub!(/(^|\s)[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\2\3') #escaped urls not linked, deal with later +      para.gsub!(/\B\{([^}]+)\}(https?:\/\/[^"><]+?)([,.:;"><]?(?:\s|$))/, +        '<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\2">\1</link>\3') #watch, compare html_tune +      para.gsub!(/\b((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, +        %{#{@url_brace.xml_open}<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\\1">\\1</link>#{@url_brace.xml_close}\\2}) +      para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\1">\1</link>\2') #escaped urls not linked, deal with later        #para.gsub!(/(^|\s)[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\2">\2</link>\3') #escaped urls not linked, deal with later        para.gsub!(/ /,' ') #clean        para diff --git a/lib/sisu/v0/sysenv.rb b/lib/sisu/v0/sysenv.rb index e9e39268..91e03459 100644 --- a/lib/sisu/v0/sysenv.rb +++ b/lib/sisu/v0/sysenv.rb @@ -454,6 +454,7 @@ module SiSU_Env          :digest          => filename(code,'digest','.txt'),          :metadata        => filename(code,'metadata','.html'), #chk          :manifest        => filename(code,'sisu_manifest','.html'), +        :oai_pmh         => filename(code,'oai_pmh','.xml'),          :sitemap         => filename(code,'sitemap','.xml'),          :sitemap_touch   => filename(code,"sitemap_#@fnb",'.xml'),          :sxs             => filename(code,@fnb,'.sxs.xml'), @@ -1780,6 +1781,8 @@ module SiSU_Env          end          if @md.cmd =~ /o/;                         ft << @md.fn[:odf]          end +        if @md.cmd =~ /O/;                         ft << @md.fn[:oai_pmh] +        end          if @md.cmd =~ /s/;                         ft << @md.fns          end          if @md.cmd =~ /S/;                         ft << 'sisupod.zip' << '.kdi' @@ -1808,6 +1811,8 @@ module SiSU_Env          end          if @opt.cmd =~ /o/;                        ft << 'opendocument.odt' << '??.opendocument.odt' << 'opendocument.??.odt'          end +        if @opt.cmd =~ /O/;                        ft << 'oai_pmh.xml' +        end          if @opt.cmd =~ /s/;                        ft << '.sst' << '.ssi' << '.ssm'          end          if @opt.cmd =~ /S/;                        ft << 'sisupod.zip' << '.kdi' diff --git a/lib/sisu/v0/texpdf_format.rb b/lib/sisu/v0/texpdf_format.rb index 4a8d2cb5..81646f23 100644 --- a/lib/sisu/v0/texpdf_format.rb +++ b/lib/sisu/v0/texpdf_format.rb @@ -501,8 +501,8 @@ WOK        @string.gsub!(/<\/a>/,' ')        @string.gsub!(/[^\}>_]((?:https?|ftp):\/\/\S+?)(<\/\S>)/,' \begin{scriptsize}\href{\1}{\1} \end{scriptsize}\2') #special case        @string.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\begin{scriptsize}\\href{\2}{\2}\end{scriptsize}\3') #special case \{ e.g. \}http://url -      @string.gsub!(/(^|\s)(?:\\_|\\)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\begin{scriptsize}\\href{\2}{\2}\end{scriptsize}\3') #specially escaped url no decoration -      @string.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,"\\1#{@url_brace.tex_open}\\begin{scriptsize}\\href{\\2}{\\2}\\end{scriptsize}#{@url_brace.tex_close}\\3") #url matching with decoration <url> +      @string.gsub!(/\B(?:\\_|\\)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\begin{scriptsize}\\href{\1}{\1}\end{scriptsize}\2') #specially escaped url no decoration +      @string.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?=\s|$))/,"\\1#{@url_brace.tex_open}\\begin{scriptsize}\\href{\\2}{\\2}\\end{scriptsize}#{@url_brace.tex_close}\\3") #url matching with decoration <url> positive lookahead, sequence issue with { linked }http://url cannot use \b at start        @string.gsub!(/<:ee>/,'')        @string.gsub!(/<!>/,' ')        #proposed change, insert, but may be redundant diff --git a/lib/sisu/v0/xml_md_oai_pmh_dc.rb b/lib/sisu/v0/xml_md_oai_pmh_dc.rb new file mode 100644 index 00000000..1d7008a1 --- /dev/null +++ b/lib/sisu/v0/xml_md_oai_pmh_dc.rb @@ -0,0 +1,209 @@ +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, +   2007 Ralph Amissah All Rights Reserved. + + * License: GPL 3 or later: + +   SiSU, a framework for document structuring, publishing and search + +   Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, +   2007 Ralph Amissah + +   This program is free software: you can redistribute it and/or modify it +   under the terms of the GNU General Public License as published by the Free +   Software Foundation, either version 3 of the License, or (at your option) +   any later version. + +   This program is distributed in the hope that it will be useful, but WITHOUT +   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for +   more details. + +   You should have received a copy of the GNU General Public License along with +   this program. If not, see <http://www.gnu.org/licenses/>. + +   If you have Internet connection, the latest version of the GPL should be +   available at these locations: +   <http://www.fsf.org/licenses/gpl.html> +   <http://www.gnu.org/copyleft/gpl.html> +   <http://www.jus.uio.no/sisu/gpl.fsf> + + * SiSU uses: +   * Standard SiSU markup syntax, +   * Standard SiSU meta-markup syntax, and the +   * Standard SiSU object citation numbering and system + + * Hompages: +   <http://www.jus.uio.no/sisu> +   <http://www.sisudoc.org> + + * Download: +   <http://www.jus.uio.no/sisu/SiSU/download.html> + + * Ralph Amissah +   <ralph@amissah.com> +   <ralph.amissah@gmail.com> + + ** Description: summary of generated outputs and metadata + +=end +module SiSU_XML_metadata +  require "#{SiSU_lib}/sysenv" +  include SiSU_Env +  require "#{SiSU_lib}/param" +  include SiSU_Param +  class OAI_PMH +    def initialize(opt) +      @md=SiSU_Param::Parameters.new(opt).get +      @oai_pmh=[] +    end +    def read +      output +    end +    def pre +<<WOK +<?xml version="1.0" encoding="UTF-8"?> +<oai_dc:dc +  xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" +  xmlns:dc="http://purl.org/dc/elements/1.1/" +  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" +  xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ +  http://www.openarchives.org/OAI/2.0/oai_dc.xsd"> +WOK +    end +    def body +      if @md.dc_title                                                            # DublinCore 1 - title +        @oai_pmh << %{  <dc:title xml:lang="en">#{@md.dc_title}</dc:title>\n} +        #@oai_pmh << %{  <dc:title xml:lang="en">#{seg_name}#{@md.dc_title}</dc:title>\n} +      end +      if @md.dc_creator                                                          # DublinCore 2 - creator/author (author) +        txt=meta_content_clean(@md.dc_creator) +        @oai_pmh << %{  <dc:creator>#{txt}</dc:creator>\n} +      end +      if @md.dc_subject                                                          # DublinCore 3 - subject (us library of congress, eric or udc, or schema???) +        txt=meta_content_clean(@md.dc_subject) +        @oai_pmh << %{  <dc:subject>#{txt}</dc:subject>\n} +      end +      if @md.dc_description                                                      # DublinCore 4 - description +        txt=meta_content_clean(@md.dc_description) +        @oai_pmh << %{  <dc:description>#{txt}</dc:description>\n} +      end +      if @md.dc_publisher                                                        # DublinCore 5 - publisher (current copy published by) +        txt=meta_content_clean(@md.dc_publisher) +        @oai_pmh << %{  <dc:publisher>#{txt}</dc:publisher>\n} +      end +      if @md.dc_contributor                                                      # DublinCore 6 - contributor +        txt=meta_content_clean(@md.dc_contributor) +        @oai_pmh << %{  <dc:contributor>#{txt}</dc:contributor>\n} +      end +      if @md.dc_date                                                             # DublinCore 7 - date year-mm-dd +        @oai_pmh << %{  <dc:date>#{@md.dc_date}</dc:date>\n} +      end +      if @md.dc_date_created                                                     # DublinCore 7 - date.created +        @oai_pmh << %{  <dc:date_created>#{@md.dc_date_created}</dc:date_created>\n} +      end +      if @md.dc_date_issued                                                      # DublinCore 7 - date.issued +        @oai_pmh << %{  <dc:date_issued>#{@md.dc_date_issued}</dc:date_issued>\n} +      end +      if @md.dc_date_available                                                   # DublinCore 7 - date.available +        @oai_pmh << %{  <dc:date_available>#{@md.dc_date_available}</dc:date_available>\n} +      end +      if @md.dc_date_valid                                                       # DublinCore 7 - date.valid +        @oai_pmh << %{  <dc:date_valid>#{@md.dc_date_valid}</dc:date_valid>\n} +      end +      if @md.dc_date_modified                                                    # DublinCore 7 - date.modified +        @oai_pmh <<  %{  <dc:date_modified>#{@md.dc_date_modified}</dc:date_modified>\n} +      end +      if @md.dc_type                                                             # DublinCore 8 - type +        txt=meta_content_clean(@md.dc_type) +        @oai_pmh << %{  <dc:type>#{txt}</dc:type>\n} +      end +      if @md.dc_format                                                           # DublinCore 9 - format +        txt=meta_content_clean(@md.dc_format) +        @oai_pmh << %{  <dc:format>#{txt}</dc:format>\n} +      end +      if @md.dc_identifier                                                       # DublinCore 10 - identifier +        txt=meta_content_clean(@md.dc_identifier) +        @oai_pmh << %{  <dc:identifier>#{txt}</dc:identifier>\n} +      end +      if @md.dc_source                                                           # DublinCore 11 - source +        txt=meta_content_clean(@md.dc_source) +        @oai_pmh << %{  <dc:source>#{txt}</dc:source>\n} +      end +      if @md.dc_language[:name]                                                  # DublinCore 12 - language (English) +        @oai_pmh << %{  <dc:language>#{@md.dc_language[:name]}</dc:language>\n} +      end +      if @md.language_original[:name] +        @oai_pmh << %{  <dc:language>#{@md.language_original[:name]}</dc:language>\n} +      end +      if @md.dc_relation                                                         # DublinCore 13 - relation +        txt=meta_content_clean(@md.dc_relation) +        @oai_pmh << %{  <dc:relation>#{txt}</dc:relation>\n} +      end +      if @md.dc_coverage                                                         # DublinCore 14 - coverage +        txt=meta_content_clean(@md.dc_coverage) +        @oai_pmh << %{  <dc:coverage>#{txt}</dc:coverage>\n} +      end +      if @md.dc_rights                                                           # DublinCore 15 - rights +        txt=meta_content_clean(@md.dc_rights) +        @oai_pmh << %{  <dc:rights>#{txt}</dc:rights>\n} +      end +      if @md.keywords +        txt=meta_content_clean(@md.keywords) +        @oai_pmh << %{  <dc:keywords>#{txt}</dc:keywords>\n} +      end +      @oai_pmh +    end +    def meta_content_clean(content='') +      unless content.nil? +        content.tr!('"',"'") +      end +      content +    end +    def post +      '</oai_dc:dc>' +    end +    def output +      SiSU_Env::SiSU_file.new(@md).mkdir +      oai_pmh=SiSU_Env::SiSU_file.new(@md,@md.fn[:oai_pmh]).mkfile #implement in param +      oai_pmh << pre +      body.each do |x| +        oai_pmh << x +      end +      oai_pmh << post +    end +  end +end +__END__ +http://www.openarchives.org/pmh/ +http://www.openarchives.org/OAI/2.0/openarchivesprotocol.htm#dublincore +http://es.dublincore.org/documents/usageguide/elements.shtml +http://dublincore.org/documents/dces/ +see also http://dublincore.org/documents/dcmes-xml/ +#http://www.openarchives.org/OAI/2.0/openarchivesprotocol.htm#dublincore +#sample implementation, e.g. 2 +<?xml version="1.0" encoding="UTF-8"?> +<oai_dc:dc +    xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" +    xmlns:dc="http://purl.org/dc/elements/1.1/" +    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" +    xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ +    http://www.openarchives.org/OAI/2.0/oai_dc.xsd"> +  <dc:title xml:lang="en">Grassmann's space analysis</dc:title> +  <dc:creator>Hyde, E. W. (Edward Wyllys)</dc:creator> +  <dc:subject>LCSH:Ausdehnungslehre; LCCN QA205.H99</dc:subject> +  <dc:publisher>J. Wiley & Sons</dc:publisher> +  <dc:date>Created: 1906; Available: 1991</dc:date> +  <dc:type>text</dc:type> +  <dc:identifier>http://resolver.library.cornell.edu/math/1796949 +     </dc:identifier> +  <dc:language>english</dc:language> +  <dc:rights xml:lang="en">Public Domain</dc:rights> +</oai_dc:dc> | 
