# coding: utf-8
=begin
* Name: SiSU
* Description: a framework for document structuring, publishing and search
metadata harvest, extract topics and associated writings from document set
(topics use topic_register header)
* Author: Ralph Amissah
* Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
2007, 2008 Ralph Amissah All Rights Reserved.
* License: GPL 3 or later:
SiSU, a framework for document structuring, publishing and search
Copyright (C) Ralph Amissah
This program is free software: you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation, either version 3 of the License, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
You should have received a copy of the GNU General Public License along with
this program. If not, see .
If you have Internet connection, the latest version of the GPL should be
available at these locations:
* SiSU uses:
* Standard SiSU markup syntax,
* Standard SiSU meta-markup syntax, and the
* Standard SiSU object citation numbering and system
* Hompages:
* Download:
* Ralph Amissah
** Description: simple xml representation (sax style)
=end
module HARVEST_topics
require "#{SiSU_lib}/author_format"
class Songsheet
def initialize(opt)
@opt=opt
@file_list=opt.files
@env=SiSU_Env::Info_env.new
end
def songsheet
files,idx_array=[],[]
@file_list.each do |f|
if f =~/.+?\.ss[tm]$/
files << f[/(.+?\.ss[tm])$/,1]
else
print "not .sst or .ssm ? << #{f} >> "
end
end
files.each do |filename|
file_array=[]
File.open(filename,'r') do |file|
file.each_line("\n\n") do |line|
if line =~/^@\S+?: /
#line=line.gsub(/\n/,' ')
file_array << line
elsif line =~/^(?:\s*\n|%+ )/
else break
end
end
end
#file_array=IO.readlines("#{filename}","\n\r")
idx_array=HARVEST_topics::Harvest.new(file_array,filename,idx_array).extract_harvest
end
the_idx=HARVEST_topics::Index.new(idx_array,@@the_idx_topics).construct_book_topic_index
#HARVEST_topics::Output_index.new(the_idx).screen_print.cycle
HARVEST_topics::Output_index.new(@opt,the_idx).html_print.html_songsheet
puts "file://#{@env.path.output_md_harvest}/harvest_topics.html"
puts "file://#{@env.path.pwd}/harvest_topics.html" if @opt.cmd.inspect =~/-M/
end
end
class Harvest
def initialize(data,filename,idx_array)
@data,@filename,@idx_array=data,filename,idx_array
end
def extract_harvest
data,filename,idx_array=@data,@filename,@idx_array
@idx_lst,@title,@subtitle,@fulltitle,@author,@author_format=nil,nil,nil,nil,nil,nil
rgx={}
rgx[:author]=/^@(?:author|creator):\s+(.+)/
rgx[:title]=/^@title:\s+(.+)/
rgx[:subtitle]=/^@subtitle:\s+(.+)/
rgx[:idx]=/^@topic_register:\s+(.+)/
data.each do |para|
if para=~ rgx[:idx]
@idx_list=rgx[:idx].match(para)[1]
end
if para=~ rgx[:title]
@title=rgx[:title].match(para)[1]
end
if para=~ rgx[:subtitle]
@subtitle=rgx[:subtitle].match(para)[1]
end
if para=~ rgx[:author]
@author_format=rgx[:author].match(para)[1]
end
break if @title and @subtitle and @author and @idx_lst
end
@fulltitle=if @subtitle
@title + ' - ' + @subtitle
else @title
end
if @title and @author_format and @idx_list
creator=FORMAT::Author.new(@author_format.strip).author_details
@authors,@authorship=creator[:authors],creator[:authorship]
file=filename.sub(/\.ss[mt]$/,'')
idx_array <<=if @idx_list =~/;/
g=@idx_list.scan(/[^;]+/)
idxl=[]
g.each do |i|
i.strip!
idxl << { :filename => filename, :file => file, :rough_idx => i, :title => @fulltitle, :author => creator }
end
idxl
else { :filename => filename, :file => file, :rough_idx => @idx_list, :title => @fulltitle, :author => creator }
end
else
p "missing author field: #@filename title: #@title; author: #@author_format; idx: #@idx_list"
end
idx_array.flatten!
idx_array
end
end
class Index
def initialize(idx_array,the_idx)
@idx_array,@the_idx=idx_array,the_idx
@@the_idx_topics=@the_idx
end
def capital(txt)
txt[0].chr.capitalize + txt[1,txt.length]
end
def contents(hash,idx)
names=''
idx[:author][:last_first_format_a].each do |n|
s=n.sub(/(.+?)(?:,.+|$)/,'\1').gsub(/\s+/,'_')
names += %{#{n}, }
end
hash << { :filename => idx[:filename], :file => idx[:file], :author => names, :title => idx[:title] }
end
def construct_book_topic_index
idx_array=@idx_array
idx_array.each do |idx|
@lv0,@lv1,@lv2,@lv3,@lv4={},{},{},{},{}
if idx[:rough_idx]
idx_lst=idx[:rough_idx].scan(/[^:]+/)
else
puts "no topic register in: << #{idx[:filename]} >>"
next
end
idx_lst_alt=[]
idx_lst.each {|lev| idx_lst_alt << lev.scan(/[^|]+/)}
depth = idx_lst_alt.length - 1
range = 0..depth
range.each do |t|
if idx_lst_alt[t]
case t
when 0
lev0=idx_lst_alt[t]
lev0.each do |lv0|
lv0=capital(lv0)
if @@the_idx_topics[lv0].class==NilClass
@@the_idx_topics[lv0]={:md => []}
end
@lv0=lv0 if lev0.length == 1
j=@@the_idx_topics[lv0][:md]
contents(j,idx) if idx_lst_alt.length - 1 == t
end
when 1
lev1=idx_lst_alt[t]
lev1.each do |lv1|
lv1=capital(lv1)
if @@the_idx_topics[@lv0][lv1].class==NilClass
@@the_idx_topics[@lv0][lv1]={:md => []}
end
@lv1=lv1 if lev1.length == 1
j=@@the_idx_topics[@lv0][lv1][:md]
contents(j,idx) if idx_lst_alt.length - 1 == t
end
when 2
lev2=idx_lst_alt[t]
lev2.each do |lv2|
lv2=capital(lv2)
if @@the_idx_topics[@lv0][@lv1][lv2].class==NilClass
@@the_idx_topics[@lv0][@lv1][lv2]={:md => []}
end
@lv2=lv2 if lev2.length == 1
j=@@the_idx_topics[@lv0][@lv1][lv2][:md]
contents(j,idx) if idx_lst_alt.length - 1 == t
end
when 3
lev3=idx_lst_alt[t]
lev3.each do |lv3|
lv3=capital(lv3)
if @@the_idx_topics[@lv0][@lv1][@lv2][lv3].class==NilClass
@@the_idx_topics[@lv0][@lv1][@lv2][lv3]={:md => []}
end
@lv3=lv3 if lev3.length == 1
j=@@the_idx_topics[@lv0][@lv1][@lv2][lv3][:md]
contents(j,idx) if idx_lst_alt.length - 1 == t
end
when 4
lev4=idx_lst_alt[t]
lev4.each do |lv4|
lv4=capital(lv4)
if @@the_idx_topics[@lv0][@lv1][@lv2][@lv3][lv4].class==NilClass
@@the_idx_topics[@lv0][@lv1][@lv2][@lv3][lv4]={:md => []}
end
@lv4=lv4 if lev4.length == 1
j=@@the_idx_topics[@lv0][@lv1][@lv2][@lv3][lv4][:md]
contents(j,idx) if idx_lst_alt.length - 1 == t
end
end
end
end
end
@the_idx
end
end
class Output_index
def initialize(opt,the_idx)
@opt,@the_idx=opt,the_idx
@env=SiSU_Env::Info_env.new
@rc=Get_init.instance.yamlrc
@page='sisu_manifest.html'
@output={}
@output[:html]=File.new("#{@env.path.output_md_harvest}/harvest_topics.html",'w')
if @opt.cmd.inspect =~/-M/
@output[:html_mnt]=File.new("#{@env.path.pwd}/harvest_topics.html",'w')
end
@alph=%W[9 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z]
@letter=@alph.shift
end
def html_print
def html_songsheet
html_head
html_alph
html_body
html_tail
end
def html_head_adjust(type='')
css_path=if type !~/maintenance/
'../_sisu/css/harvest.css'
else 'harvest.css'
end
<SiSU Metadata Harvest - Topics
WOK
end
def html_head
@output[:html_mnt] << html_head_adjust('maintenance') if @opt.cmd.inspect =~/-M/
@output[:html] << html_head_adjust
end
def html_alph
a=[]
a << '
'
@alph.each do |x|
a << if x =~/[0-9]/; ''
else
%{#{x}, }
end
end
@output[:html_mnt] << a if @opt.cmd.inspect =~/-M/
@output[:html] << a.join
end
def html_tail
a=[]
a <<<
WOK
@output[:html_mnt] << a if @output[:html_mnt].class == File
@output[:html] << a
end
def do_html(html)
@output[:html] << html
end
def do_html_maintenance(html)
@output[:html_mnt] << html if @output[:html_mnt].class == File
end
def do_string(attrib,string)
html=%{
#{string}
}
do_html(html)
do_html_maintenance(html) if @output[:html_mnt].class == File
end
def do_string_default(attrib,string)
html=%{
#{string}
}
do_html(html)
end
def do_string_maintenance(attrib,string)
html=%{
#{string}
}
do_html_maintenance(html) if @output[:html_mnt].class == File
end
def do_string_name(attrib,string)
f=/^(\S)/.match(string)[1]
if @letter < f
while @letter < f
if @alph.length > 0
@letter=@alph.shift
if @output[:html_mnt].class == File
@output[:html_mnt] << %{\n
}
do_html(html)
do_html_maintenance(html) if @output[:html_mnt].class == File
end
def do_array(lv,array)
lv+=1
array.each do |b|
do_case(lv,b)
end
end
def do_hash_md(attrib,hash)
html=%{#{hash[:title]} - #{hash[:author]}}
do_string_default(attrib,html)
end
def do_hash_md_maintenance(attrib,hash)
if @output[:html_mnt].class == File #should not be run for presentation output
html=%{[src] #{hash[:title]} - #{hash[:author]}}
do_string_maintenance(attrib,html)
end
end
def do_hash(lv,hash)
lv+=1
key=[]
hash.each_key do |m|
if m == :md
do_case(lv,hash[m])
elsif m != :title and m != :author and m != :filename and m != :file and m != :rough_idx
key << m
elsif m == :title
do_hash_md('work',hash)
do_hash_md_maintenance('work',hash)
end
end
if key.length > 0
key.sort.each do |m|
attrib="lev#{lv}"
if lv == 0
do_string_name(attrib,m)
else do_string(attrib,m)
end
do_case(lv,hash[m])
end
end
end
def do_case(lv,a)
y = a.class
case
when y == String
attrib="lev#{lv}"
if lv == 0
do_string_name(attrib,a)
else do_string(attrib,a)
end
#do_string_name(attrib,a)
when y == Array
do_array(lv,a)
when y == Hash
do_hash(lv,a)
end
end
def html_body
the_idx=@the_idx
the_idx.sort.each do |a|
do_case(-1,a)
end
end
self
end
def screen_print
def do_string(lv,string)
s=' '*4
puts s*lv + string
end
def do_array(lv,array)
lv+=1
array.each do |b|
do_case(lv,b)
end
end
def do_hash_md(lv,hash)
string=hash[:title] + ' - ' + hash[:author]
do_string(lv,string)
end
def do_hash(lv,hash)
lv+=1
key=[]
hash.each_key do |m|
if m == :md
do_case(lv,hash[m])
elsif m != :title and m != :author and m != :filename and m != :file and m != :rough_idx
key << m
elsif m == :title
do_hash_md(lv,hash)
end
end
if key.length > 0
key.sort.each do |m|
do_string(lv,m)
do_case(lv,hash[m])
end
end
end
def do_case(lv,a)
s=' '*4
y = a.class
case
when y == String
do_string(lv,a)
when y == Array
do_array(lv,a)
when y == Hash
do_hash(lv,a)
end
end
def cycle
the_idx=@the_idx
the_idx.each do |a|
do_case(-1,a)
end
end
self
end
def screen_print_unsorted
def do_string(lv,string)
s=' '*4
puts s*lv + string
end
def do_array(lv,array)
lv+=1
array.each do |b|
do_case(lv,b)
end
end
def do_hash_md(lv,hash)
string=hash[:title] + ' - ' + hash[:author]
do_string(lv,string)
end
def do_hash(lv,hash)
lv+=1
hash.each_key do |m|
if m == :md
do_case(lv,hash[m])
else
if m != :title and m != :author and m != :filename and m != :file and m != :rough_idx
do_string(lv,m)
do_case(lv,hash[m])
elsif m == :title
do_hash_md(lv,hash)
else
end
end
end
end
def do_case(lv,a)
s=' '*4
y = a.class
case
when y == String
do_string(lv,a)
when y == Array
do_array(lv,a)
when y == Hash
do_hash(lv,a)
end
end
def cycle
the_idx=@the_idx
the_idx.each do |a|
do_case(-1,a)
end
end
self
end
end
end
__END__
terms -|_ t{tl1} -|_ {fa}[fa]{filenames and other details}
| |_ {tl2} -|_ {fa}[fa]{filenames and other details}
| | |_{tl3} -|_ {fa}[fa]{filenames and other details}
| | | |_{tl4} - {fa}[fa]{filenames and other details}
| | | |
| | | |_{tl4a} - {fa}[fa]{filenames and other details}
| | | |
| | | |_{tl4b} - {fa}[fa]{filenames and other details}
| | | |
| | | |_ ...
| | |
| | |_{tl3a} - {fa}[fa]{filenames and other details}
| |
| |_{tl2a} - {fa}[fa]{filenames and other details}
|
|_ t{tl1a} -|_ {fa}[fa]{filenames and other details}
|_ ...