diff options
| author | Ralph Amissah <ralph@amissah.com> | 2007-07-30 09:06:51 +0100 | 
|---|---|---|
| committer | Ralph Amissah <ralph@amissah.com> | 2007-07-30 09:06:51 +0100 | 
| commit | a99e0de5885441989c2ae9ae6fad15fd35d0bb97 (patch) | |
| tree | c78f7a8ef21b06edd5e69b4396e45edc7b59aeb2 | |
| parent | using postive lookahead for url matching, test if to change (diff) | |
url matching, semi-colon as possible terminator, in dal match https
| -rw-r--r-- | CHANGELOG | 10 | ||||
| -rw-r--r-- | lib/sisu/v0/dal_doc_str_code.rb | 2 | ||||
| -rw-r--r-- | lib/sisu/v0/dal_syntax.rb | 6 | ||||
| -rw-r--r-- | lib/sisu/v0/html_tune.rb | 10 | ||||
| -rw-r--r-- | lib/sisu/v0/odf.rb | 20 | ||||
| -rw-r--r-- | lib/sisu/v0/shared_html_lite.rb | 10 | ||||
| -rw-r--r-- | lib/sisu/v0/shared_xml.rb | 4 | ||||
| -rw-r--r-- | lib/sisu/v0/texpdf_format.rb | 9 | 
8 files changed, 40 insertions, 31 deletions
| @@ -6,6 +6,16 @@ Reverse Chronological:  %% STABLE MANIFEST +%% sisu_0.55.7.orig.tar.gz (2007-07-30:31/1) +http://www.jus.uio.no/sisu/pkg/src/sisu_0.55.7.orig.tar.gz +  sisu_0.55.7.orig.tar.gz +  sisu_0.55.7-1.dsc +  sisu_0.55.7-1.diff.gz + +  * url matching refinement +    * add semi-colon as possible url terminator +    * dal match https +  %% sisu_0.55.6.orig.tar.gz (2007-07-28:30/6)  http://www.jus.uio.no/sisu/pkg/src/sisu_0.55.6.orig.tar.gz    69368f8eb4da28d07f3a1ee1ea5b89f3 1271022 sisu_0.55.6.orig.tar.gz diff --git a/lib/sisu/v0/dal_doc_str_code.rb b/lib/sisu/v0/dal_doc_str_code.rb index e6a3ae1e..18ac03d8 100644 --- a/lib/sisu/v0/dal_doc_str_code.rb +++ b/lib/sisu/v0/dal_doc_str_code.rb @@ -147,7 +147,7 @@ module SiSU_document_structure_code          if line =~/\S/ and line !~/^(?:alt|code|group|poem)\{|^\}(?:alt|code|group|poem)|<:(?:code|verse|alt|group).+/            line.gsub!(/\s\s/,'  ')            line.gsub!(/^/,'<:codeline>') if type=='code' # try sort for texpdf special case -          if line =~/http:\/\/\S+$/ +          if line =~/https?:\/\/\S+$/              line.gsub!(/$/,' <:br>')            else              line.gsub!(/$/,'<:br>') #unless type=='code' diff --git a/lib/sisu/v0/dal_syntax.rb b/lib/sisu/v0/dal_syntax.rb index ce5fdc72..4fb0f5d3 100644 --- a/lib/sisu/v0/dal_syntax.rb +++ b/lib/sisu/v0/dal_syntax.rb @@ -65,7 +65,7 @@ module Syntax        @data,@md=data,md        @vz=SiSU_Env::Get_init.instance.skin        @data_new=[] -      @http_m='\{.+?\}http://\S+|http:\S+|\.\.\/\S+|\S+?\.png\b|[*]~\S+|^0~.+|<:(?:code|group|alt|verse)(?:-end)?>|<:br>' +      @http_m='\{.+?\}https?://\S+|https?:\S+|\.\.\/\S+|\S+?\.png\b|[*]~\S+|^0~.+|<:(?:code|group|alt|verse)(?:-end)?>|<:br>'        @manmkp_ital='[i/]\\{.+?\\}[i/]'        tail_m_ital=%q{(?:\s|[.,;:?!'")]|~\^|~\\\{\s|$)}        tail_m_bold=%q{(?:(?:<\/i>)?(?:\s|[.,;:?!'")]|~\^|~\\\{\s|$))?} @@ -283,8 +283,8 @@ module Syntax          else line.gsub!(/(<br \/>)/i,"\\1\n")          end        else #code blocks -        line.gsub!(/(^|\s)(http:\/\/\S+)/,'\1_\2')              #line.gsub!(/(^|\s)(http:\/\/\S+)/,"\\1\\\\\\2") #escape urls -        line.gsub!(/(^|\s)<(http:\/\/\S+)>([\s,.]|$)/,'\1\2\3') #clean/unescape urls with decoration, re-apply decoration later +        line.gsub!(/(^|\s)(https?:\/\/\S+)/,'\1_\2')              #line.gsub!(/(^|\s)(http:\/\/\S+)/,"\\1\\\\\\2") #escape urls +        line.gsub!(/(^|\s)<(https?:\/\/\S+)>([\s,.]|$)/,'\1\2\3') #clean/unescape urls with decoration, re-apply decoration later          line.gsub!(/<:codeline>/,"\n")        end        line diff --git a/lib/sisu/v0/html_tune.rb b/lib/sisu/v0/html_tune.rb index ac8d6594..cca41056 100644 --- a/lib/sisu/v0/html_tune.rb +++ b/lib/sisu/v0/html_tune.rb @@ -245,8 +245,8 @@ module SiSU_Tune        @words=[]        data.each do |word|          @words << if word=~/\{(.+?)\}((?:https?|ftp)\S+|image)/ -          if word =~/\{(.+?)\}((?:https?|ftp)\S+|image)([.,](?:\s|$))/ -            m,u,d=/\{(.+?)\}((?:https?|ftp)\S+|image)([.,](?:\s|$))/.match(word).captures +          if word =~/\{(.+?)\}((?:https?|ftp)\S+|image)([;.,](?:\s|$))/ +            m,u,d=/\{(.+?)\}((?:https?|ftp)\S+|image)([;.,](?:\s|$))/.match(word).captures            else m,u=/\{(.+?)\}((?:https?|ftp)\S+|image)/.match(word).captures              d=''            end @@ -325,9 +325,9 @@ module SiSU_Tune            if (para =~/\b\S+\@\S+?\.\S+/ and para !~/(\"\S+\@\S+?\.\S+\"|>\S+\@\S+?\.\S+?<)/)              para.gsub!(/\b(\S+\@\S+?\.\S+)(\s)/,'<<a href="mailto:\1">\1</a>>\2')            end -          para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'<a href="\1" target="_top">\1</a>\2') #http ftp matches escaped, no decoration -          para.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1<a href="\2" target="_top">\2</a>\3') #special case \{ e.g. \}http://url -          para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?=\s|$))/,%{\\1#{@url_brace.xml_open}<a href="\\2" target="_top">\\2</a>#{@url_brace.xml_close}\\3}) #http ftp matches with decoration +          para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'<a href="\1" target="_top">\1</a>\2') #http ftp matches escaped, no decoration +          para.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'\1<a href="\2" target="_top">\2</a>\3') #special case \{ e.g. \}http://url +          para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/,%{\\1#{@url_brace.xml_open}<a href="\\2" target="_top">\\2</a>#{@url_brace.xml_close}\\3}) #http ftp matches with decoration            if (para =~/..\/\S+/ and para !~/(\"..\/\S+?\"|>\s*..\/\S+<)/)              para.gsub!(/(\.\.\/\S+)/,'<a href="\1">\1</a>')            end diff --git a/lib/sisu/v0/odf.rb b/lib/sisu/v0/odf.rb index ff788116..6025dfb2 100644 --- a/lib/sisu/v0/odf.rb +++ b/lib/sisu/v0/odf.rb @@ -272,21 +272,21 @@ module SiSU_ODF          end          para        end -      def text_link_odf(txt,url) +      def text_link_odf(txt,url,trail)          txt.gsub!(/(\\\+)/,'+') #this is convoluted, and risky :-(          url.gsub!(/(\\\+)/,'+') #this is convoluted, and risky :-( -        %{<text:a xlink:type="simple" xlink:href="#{url}">#{txt}</text:a>} +        %{<text:a xlink:type="simple" xlink:href="#{url}">#{txt.strip}</text:a>#{trail}}        end        def text_link(para)          para.gsub!(@serial,'') -        m=para.scan(/(\{([^}]+?)\}((?:https?|ftp)\S+))/) #sort +        m=para.scan(/(\{([^}]+?)\}((?:https?|ftp)\S+?))([;.,]?$)/) #sort          if m            m.each do |i| -            txt,url=i[1],i[2] +            txt,url,trail=i[1],i[2]              txt.gsub!(/([)(\]\[])/,"\\\\\\1")              txt.gsub!(/([+?])/,"\\\\\\1") # problems with +              url.gsub!(/([+?])/,"\\\\\\1") # problems with + -            para.gsub!(/\{\s*#{txt}\}#{url}/m,text_link_odf(txt,url)) #make sure trailing ']' are not caught in url +            para.gsub!(/\{\s*#{txt}\}#{url}/m,text_link_odf(txt,url,trail)) #make sure trailing ']' are not caught in url              para.gsub!(/\\([)(\]\[?])/,'\1') #clumsy fix            end            m=nil @@ -295,13 +295,13 @@ module SiSU_ODF        end        def normal(para)                                                           #P1 - P3          para.gsub!(@serial,'') -        para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, +        para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,            %{<text:a xlink:type="simple" xlink:href="\\1">\\1</text:a>\\2}) #http ftp matches escaped, no decoration -        para.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, +        para.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,            %{\\1<text:a xlink:type="simple" xlink:href="\\2">\\2</text:a>\\3}) #special case \{ e.g. \}http://url -        para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?=\s|$))/, +        para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/,            %{\\1#{@url_brace.xml_open}<text:a xlink:type="simple" xlink:href="\\2">\\2</text:a>#{@url_brace.xml_close}\\3}) #http ftp matches with decoration -        #para.gsub!(/\b((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, also works +        #para.gsub!(/\b((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/, also works            #%{#{@url_brace.xml_open}<text:a xlink:type="simple" xlink:href="\\1">\\1</text:a>#{@url_brace.xml_close}\\2}) #http ftp matches with decoration          para.gsub!(/([a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+)/,            %{#{@url_brace.xml_open}<text:a xlink:type="simple" xlink:href="mailto:\\1">\\1</text:a>#{@url_brace.xml_close}}) @@ -377,7 +377,7 @@ module SiSU_ODF          parray=[]          para.split(/<:?br(?: \/)?>/).each do |parablock|            parablock=group_clean(parablock) -          parablock.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, +          parablock.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,              %{<text:a xlink:type="simple" xlink:href="\\1">\\1</text:a>\\2}) #http ftp matches escaped, no decoration            parray << %{<text:p text:style-name="P5">#{parablock}</text:p>} if parablock =~/\S+/          end diff --git a/lib/sisu/v0/shared_html_lite.rb b/lib/sisu/v0/shared_html_lite.rb index 43fb4446..1218aa79 100644 --- a/lib/sisu/v0/shared_html_lite.rb +++ b/lib/sisu/v0/shared_html_lite.rb @@ -88,8 +88,8 @@ module SiSU_Format_Shared        @words=[]        data.each do |word|          @words << if word=~/\{(.+?)\}((?:https?|ftp)\S+|image)/ -          if word =~/\{(.+?)\}((?:https?|ftp)\S+|image)([.,](?:\s|$))/ -            m,u,d=/\{(.+?)\}((?:https?|ftp)\S+|image)([.,](?:\s|$))/.match(word).captures +          if word =~/\{(.+?)\}((?:https?|ftp)\S+|image)([;.,](?:\s|$))/ +            m,u,d=/\{(.+?)\}((?:https?|ftp)\S+|image)([;.,](?:\s|$))/.match(word).captures            else m,u=/\{(.+?)\}((?:https?|ftp)\S+|image)/.match(word).captures              d=''            end @@ -131,9 +131,9 @@ module SiSU_Format_Shared          words=word_mode.join(' ')          para.gsub!(/.+/,words)        end -      para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'<a href="\1" target="_top">\1</a>\2') #http ftp matches escaped, no decoration -      para.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1<a href="\2" target="_top">\2</a>\3') #special case \{ e.g. \}http://url -      para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?=\s|$))/,%{\\1#{@url_brace.xml_open}<a href="\\2" target="_top">\\2</a>#{@url_brace.xml_close}\\3}) #http ftp matches with decoration +      para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'<a href="\1" target="_top">\1</a>\2') #http ftp matches escaped, no decoration +      para.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'\1<a href="\2" target="_top">\2</a>\3') #special case \{ e.g. \}http://url +      para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/,%{\\1#{@url_brace.xml_open}<a href="\\2" target="_top">\\2</a>#{@url_brace.xml_close}\\3}) #http ftp matches with decoration        para      end      def paragraph diff --git a/lib/sisu/v0/shared_xml.rb b/lib/sisu/v0/shared_xml.rb index c30dc5db..c54ab42d 100644 --- a/lib/sisu/v0/shared_xml.rb +++ b/lib/sisu/v0/shared_xml.rb @@ -360,11 +360,11 @@ module SiSU_XML_munge          '\1<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\3">\2</link>\4') #watch, compare html_tune        #para.gsub!(/\B\{([^}]+)\}(https?:\/\/[^"><]+?)([,.:;"><]?(?:\s|$))/,        #  '<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\2">\1</link>\3') #watch, compare html_tune -      para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?=\s|$))/, +      para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/,          %{\\1#{@url_brace.xml_open}<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\\2">\\2</link>#{@url_brace.xml_close}\\3})        #para.gsub!(/\b((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, #also works          #%{#{@url_brace.xml_open}<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\\1">\\1</link>#{@url_brace.xml_close}\\2}) -      para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\1">\1</link>\2') #escaped urls not linked, deal with later +      para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\1">\1</link>\2') #escaped urls not linked, deal with later        #para.gsub!(/(^|\s)[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\2">\2</link>\3') #escaped urls not linked, deal with later        para.gsub!(/ /,' ') #clean        para diff --git a/lib/sisu/v0/texpdf_format.rb b/lib/sisu/v0/texpdf_format.rb index 81646f23..92333d28 100644 --- a/lib/sisu/v0/texpdf_format.rb +++ b/lib/sisu/v0/texpdf_format.rb @@ -423,8 +423,7 @@ WOK        end        @string      end -    def special_characters_1(para) -      # ~ ^ $ & % _ { }                                                          #LaTeX special characters - KEEP list +    def special_characters_1(para)             # ~ ^ $ & % _ { }  #LaTeX special characters - KEEP list        #p @@utf_8.list        #@string=Iconv.conv('ISO-8859-1', 'UTF-8', @string)        word=@string.scan(/\S+|\n/) #unless line =~/^(?:0~\S|%+\s)/ @@ -791,9 +790,9 @@ WOK        @words=[]        @string.each do |word|          @words << if word=~/\{.+?\}(?:https?|ftp):\S+/ -          if word =~/\\\{(.+?)\\\}((?:https?|ftp)\S+?)([.,](?:\s|$))/ -            r=%r/\\\{(.+?)\\?\}((?:https?|ftp):\S+?)(?:[.,](?:\s|$)|(?:\s|$))/ -            d=/\\\{.+?\\?\}(?:https?|ftp):\S+([.,](?:\s|$))/.match(word).captures.to_s +          if word =~/\\\{(.+?)\\\}((?:https?|ftp)\S+?)([;.,](?:\s|$))/ +            r=%r/\\\{(.+?)\\?\}((?:https?|ftp):\S+?)(?:[;.,](?:\s|$)|(?:\s|$))/ +            d=/\\\{.+?\\?\}(?:https?|ftp):\S+([;.,](?:\s|$))/.match(word).captures.to_s            else              r=%r/\\\{(.+?)\\?\}((?:https?|ftp):\S+)/              d='' | 
