diff options
| author | Ralph Amissah <ralph@amissah.com> | 2010-04-20 19:01:55 -0400 | 
|---|---|---|
| committer | Ralph Amissah <ralph@amissah.com> | 2010-04-20 19:01:55 -0400 | 
| commit | 63c5a3cead1fb5cbd9b1bff653f269dce8d8052c (patch) | |
| tree | eb3e09a1199ae2dc79b3f7db97ff1024b25cfb0c | |
| parent | dal, minor cosmetic re-arrangement (diff) | |
db name, tables, columns, indexes changes, review (need another version bump 2.2.0)
* db (sql) table structure, further review and changes (hence breakage & version bump)
  * new pgsql db name prefix "sisu_v2b_"
  * new table column words in doc_objects & endnotes, VARCHAR 3000 to contain
    list of unique sorted words in object
  * increase use of VARCHAR
  * constants takes on related additions
* param, extensive db column size checks for metadata
| -rw-r--r-- | lib/sisu/v2/constants.rb | 7 | ||||
| -rw-r--r-- | lib/sisu/v2/db_columns.rb | 158 | ||||
| -rw-r--r-- | lib/sisu/v2/db_create.rb | 27 | ||||
| -rw-r--r-- | lib/sisu/v2/db_drop.rb | 72 | ||||
| -rw-r--r-- | lib/sisu/v2/db_import.rb | 120 | ||||
| -rw-r--r-- | lib/sisu/v2/db_indexes.rb | 52 | ||||
| -rw-r--r-- | lib/sisu/v2/db_load_tuple.rb | 32 | ||||
| -rw-r--r-- | lib/sisu/v2/db_remove.rb | 4 | ||||
| -rw-r--r-- | lib/sisu/v2/db_sqltxt.rb | 34 | ||||
| -rw-r--r-- | lib/sisu/v2/param.rb | 57 | 
10 files changed, 349 insertions, 214 deletions
| diff --git a/lib/sisu/v2/constants.rb b/lib/sisu/v2/constants.rb index 3fcb1e3a..9a24736c 100644 --- a/lib/sisu/v2/constants.rb +++ b/lib/sisu/v2/constants.rb @@ -131,8 +131,8 @@ Px[:lv4]=     '-'  Px[:lv5]=     '.'  Px[:lv6]=     '.'  #Px[:lv5_6]=   '.' -Db[:name_prefix]="SiSU#{SiSU_version_dir}a_" -Db[:name_prefix_db]="sisu_#{SiSU_version_dir}a_" +Db[:name_prefix]="SiSU#{SiSU_version_dir}b_" +Db[:name_prefix_db]="sisu_#{SiSU_version_dir}b_"  Db[:col_title]=800  Db[:col_title_part]=400  Db[:col_title_edition]=10 @@ -148,6 +148,9 @@ Db[:col_classify_identify]=256  Db[:col_classify_library]=30  Db[:col_classify_small]=16  Db[:col_filename]=256 +Db[:col_digest]=64 +Db[:col_filesize]=10 +Db[:col_info_note]=3000  __END__  consider:    〔comment〕 diff --git a/lib/sisu/v2/db_columns.rb b/lib/sisu/v2/db_columns.rb index ee66c59e..0c2eb367 100644 --- a/lib/sisu/v2/db_columns.rb +++ b/lib/sisu/v2/db_columns.rb @@ -208,7 +208,7 @@ module SiSU_DB_columns            'title_note'          end          def create_column -          "#{name}                TEXT NULL," +          "#{name}                VARCHAR(#{Db[:col_info_note]}) NULL,"          end          def column_comment            %{COMMENT ON COLUMN metadata_and_text.#{name} @@ -1119,7 +1119,7 @@ module SiSU_DB_columns            'rights'          end          def create_column -          "#{name}                  TEXT NULL,"  +          "#{name}                VARCHAR(#{Db[:col_info_note]}) NULL,"          end          def column_comment            %{COMMENT ON COLUMN metadata_and_text.#{name} @@ -1141,7 +1141,7 @@ module SiSU_DB_columns            'rights_copyright_text'          end          def create_column -          "#{name}                  TEXT NULL,"  +          "#{name}                VARCHAR(#{Db[:col_info_note]}) NULL,"          end          def column_comment            %{COMMENT ON COLUMN metadata_and_text.#{name} @@ -1163,7 +1163,7 @@ module SiSU_DB_columns            'rights_copyright_translation'          end          def create_column -          "#{name}                  TEXT NULL,"  +          "#{name}                VARCHAR(#{Db[:col_info_note]}) NULL,"          end          def column_comment            %{COMMENT ON COLUMN metadata_and_text.#{name} @@ -1185,7 +1185,7 @@ module SiSU_DB_columns            'rights_copyright_illustrations'          end          def create_column -          "#{name}                  TEXT NULL,"  +          "#{name}                VARCHAR(#{Db[:col_info_note]}) NULL,"          end          def column_comment            %{COMMENT ON COLUMN metadata_and_text.#{name} @@ -1207,7 +1207,7 @@ module SiSU_DB_columns            'rights_copyright_photographs'          end          def create_column -          "#{name}                  TEXT NULL,"  +          "#{name}                VARCHAR(#{Db[:col_info_note]}) NULL,"          end          def column_comment            %{COMMENT ON COLUMN metadata_and_text.#{name} @@ -1229,7 +1229,7 @@ module SiSU_DB_columns            'rights_copyright_preparation'          end          def create_column -          "#{name}                  TEXT NULL,"  +          "#{name}                VARCHAR(#{Db[:col_info_note]}) NULL,"          end          def column_comment            %{COMMENT ON COLUMN metadata_and_text.#{name} @@ -1251,7 +1251,7 @@ module SiSU_DB_columns            'rights_copyright_digitization'          end          def create_column -          "#{name}                  TEXT NULL,"  +          "#{name}                VARCHAR(#{Db[:col_info_note]}) NULL,"          end          def column_comment            %{COMMENT ON COLUMN metadata_and_text.#{name} @@ -1273,7 +1273,7 @@ module SiSU_DB_columns            'rights_copyright_audio'          end          def create_column -          "#{name}                  TEXT NULL,"  +          "#{name}                VARCHAR(#{Db[:col_info_note]}) NULL,"          end          def column_comment            %{COMMENT ON COLUMN metadata_and_text.#{name} @@ -1295,7 +1295,7 @@ module SiSU_DB_columns            'rights_copyright_video'          end          def create_column -          "#{name}                  TEXT NULL,"  +          "#{name}                VARCHAR(#{Db[:col_info_note]}) NULL,"          end          def column_comment            %{COMMENT ON COLUMN metadata_and_text.#{name} @@ -1317,7 +1317,7 @@ module SiSU_DB_columns            'rights_license'          end          def create_column -          "#{name}                  TEXT NULL,"  +          "#{name}                VARCHAR(#{Db[:col_info_note]}) NULL,"          end          def column_comment            %{COMMENT ON COLUMN metadata_and_text.#{name} @@ -1355,8 +1355,7 @@ module SiSU_DB_columns            'classify_topic_register'          end          def create_column -          "#{name}                VARCHAR(#{Db[:col_classify_txt_long]}) NULL," -          #"#{name}                  TEXT NULL,"  +          "#{name}                VARCHAR(#{Db[:col_info_note]}) NULL,"          end          def column_comment            %{COMMENT ON COLUMN metadata_and_text.#{name} @@ -1804,21 +1803,18 @@ module SiSU_DB_columns          self        end  =begin -#% misc -@make: - :skin: -@links: +#% src  =end -      def filename +      def src_filename          def name -          'filename' +          'src_filename'          end          def create_column            "#{name}                VARCHAR(#{Db[:col_filename]}) NULL,"          end          def column_comment            %{COMMENT ON COLUMN metadata_and_text.#{name} -           IS 'metadata document filename';} +           IS 'sisu markup source text filename';}          end          def tuple            t=if defined? @md.fns \ @@ -1831,56 +1827,61 @@ module SiSU_DB_columns          end          self        end -      def sisutxt                      # consider naming sisusrc +      def src_fingerprint          def name -          'sisutxt' +          'src_fingerprint' #hash/digest, sha256 or md5          end          def create_column -          "#{name}                TEXT NULL," +          "#{name}                VARCHAR(#{Db[:col_digest]}) NULL," +          #"#{name}                TEXT NULL,"          end          def column_comment            %{COMMENT ON COLUMN metadata_and_text.#{name} -           IS 'sisu markup text (if shared)';} +            IS 'sisu markup source text fingerprint, hash digest sha256 (or md5)';}          end          def tuple -          t=if @md.mod.inspect=~/import|update/ \ -          and FileTest.exist?(@md.fns) -            ["#{name}, ","'#{@sisutxt}', "] +          t=if defined? @md.dgst \ +          and @md.dgst.class==Array \ +          and @md.dgst[1]=~/\S+/ +            txt=@md.dgst[1] +            ["#{name}, ","'#{txt}', "]            else ['','']            end          end          self        end -      def fulltext +      def src_filesize          def name -          'fulltext' +          'src_filesize'          end          def create_column -          "#{name}                TEXT NULL," +          "#{name}                VARCHAR(#{Db[:col_filesize]}) NULL,"          end          def column_comment            %{COMMENT ON COLUMN metadata_and_text.#{name} -            IS 'document full text clean, searchable';} +            IS 'sisu markup source text file size';}          end          def tuple -          t=if @md.mod.inspect=~/import|update/ \ -          and  FileTest.exist?(@md.fns) -            ["#{name}, ","'#{@fulltext}', "] -          else ['',''] -          end +         t=if defined? @md.filesize \ +         and @md.filesize=~/\S+/ +           txt=@md.filesize +           special_character_escape(txt) +           ["#{name}, ","'#{txt}', "] +         else ['',''] +         end          end          self        end -      def word_count +      def src_word_count          def name -          'word_count' +          'src_word_count'          end          def create_column            "#{name}                TEXT NULL,"          end          def column_comment            %{COMMENT ON COLUMN metadata_and_text.#{name} -            IS 'document word count';} +            IS 'sisu markup source text word count';}          end          def tuple            t=if defined? @md.wc_words \ @@ -1893,23 +1894,47 @@ module SiSU_DB_columns          end          self        end -      def digest +      def src_txt                      # consider naming sisusrc          def name -          'dgst' +          'src_text'          end          def create_column            "#{name}                TEXT NULL,"          end          def column_comment            %{COMMENT ON COLUMN metadata_and_text.#{name} -            IS 'document hash digest sha256 (or md5)';} +           IS 'sisu markup source text (if shared)';}          end          def tuple -          t=if defined? @md.dgst \ -          and @md.dgst=~/\S+/ -            txt=@md.dgst -            special_character_escape(txt) -            ["#{name}, ","'#{txt}', "] +          t=if @md.mod.inspect=~/import|update/ \ +          and FileTest.exist?(@md.fns) +            ["#{name}, ","'#{@sisutxt}', "] +          else ['',''] +          end +        end +        self +      end +=begin +#% misc +@make: + :skin: +@links: +=end +      def fulltext +        def name +          'fulltext' +        end +        def create_column +          "#{name}                TEXT NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +            IS 'document full text clean, searchable';} +        end +        def tuple +          t=if @md.mod.inspect=~/import|update/ \ +          and  FileTest.exist?(@md.fns) +            ["#{name}, ","'#{@fulltext}', "]            else ['','']            end          end @@ -1924,12 +1949,35 @@ module SiSU_DB_columns          end          def column_comment            %{COMMENT ON COLUMN metadata_and_text.#{name} -           IS 'metadata document skin name';} +           IS 'source text skin name';} +        end +        def tuple +          t=if defined? @md.skin_name \ +          and @md.skin_name=~/\S+/ +            txt=@md.skin_name +            special_character_escape(txt) +            ["#{name}, ","'#{txt}', "] +          else ['',''] +          end +        end +        self +      end +      def skin_fingerprint                      #check +        def name +          'skin_fingerprint' +        end +        def create_column +          "#{name}                VARCHAR(#{Db[:col_digest]}) NULL," +        end +        def column_comment +          %{COMMENT ON COLUMN metadata_and_text.#{name} +           IS 'source text skin fingerprint';}          end          def tuple -          t=if defined? @md.notes.skin_name \ -          and @md.notes.skin_name=~/\S+/ -            txt=@md.notes.skin_name +          t=if defined? @md.dgst_skin \ +          and @md.dgst_skin.class==Array \ +          and @md.dgst_skin[1]=~/\S+/ +            txt=@md.dgst_skin[1]              special_character_escape(txt)              ["#{name}, ","'#{txt}', "]            else ['',''] @@ -1946,7 +1994,7 @@ module SiSU_DB_columns          end          def column_comment            %{COMMENT ON COLUMN metadata_and_text.#{name} -           IS 'metadata document skin';} +           IS 'source text skin';}          end          def tuple            t=if defined? @md.skin \ @@ -1972,9 +2020,9 @@ module SiSU_DB_columns             IS 'metadata document links';}          end          def tuple -          t=if defined? @md.notes.links \ -          and @md.notes.links=~/\S+/ -            txt=@md.notes.links +          t=if defined? @md.links \ +          and @md.links=~/\S+/ +            txt=@md.links              special_character_escape(txt)              ["#{name}, ","'#{txt}', "]            else ['',''] diff --git a/lib/sisu/v2/db_create.rb b/lib/sisu/v2/db_create.rb index 1fc00168..c1fed045 100644 --- a/lib/sisu/v2/db_create.rb +++ b/lib/sisu/v2/db_create.rb @@ -182,13 +182,16 @@ module SiSU_DB_create              #{column.notes_prefix_a.create_column}              #{column.notes_prefix_b.create_column}              #{column.notes_suffix.create_column} +            /* src */ +            #{column.src_filename.create_column} +            #{column.src_fingerprint.create_column} +            #{column.src_filesize.create_column} +            #{column.src_word_count.create_column} +            #{column.src_txt.create_column}              /* misc */ -            #{column.filename.create_column} -            #{column.sisutxt.create_column}              #{column.fulltext.create_column} -            #{column.word_count.create_column} -            #{column.digest.create_column}              #{column.skin_name.create_column} +            #{column.skin_fingerprint.create_column}              #{column.skin.create_column}              #{column.links.create_column.gsub(/,$/,'')}  /*          subj                 VARCHAR(64) NULL, */ @@ -215,7 +218,8 @@ module SiSU_DB_create              ocns            VARCHAR(6),              clean           TEXT NULL,              body            TEXT NULL, -            seg             VARCHAR(120) NULL, +            words           VARCHAR(3000) NULL, +            seg             VARCHAR(256) NULL,              lev_an          VARCHAR(1),              lev             SMALLINT NULL,              lev1            SMALLINT, @@ -254,6 +258,7 @@ module SiSU_DB_create              nr              SMALLINT,              clean           TEXT NULL,              body            TEXT NULL, +            words           VARCHAR(3000) NULL,              ocn             SMALLINT,              ocnd            VARCHAR(6),              ocns            VARCHAR(6), @@ -276,6 +281,7 @@ module SiSU_DB_create              nr              SMALLINT,              clean           TEXT NULL,              body            TEXT NULL, +            words           VARCHAR(3000) NULL,              ocn             SMALLINT,              ocnd            VARCHAR(6),              ocns            VARCHAR(6), @@ -298,6 +304,7 @@ module SiSU_DB_create              nr              SMALLINT,              clean           TEXT NULL,              body            TEXT NULL, +            words           VARCHAR(3000) NULL,              ocn             SMALLINT,              ocnd            VARCHAR(6),              ocns            VARCHAR(6), @@ -430,12 +437,14 @@ module SiSU_DB_create            %{#{column.notes_prefix_a.column_comment}},            %{#{column.notes_prefix_b.column_comment}},            %{#{column.notes_suffix.column_comment}}, -          %{#{column.filename.column_comment}}, -          %{#{column.sisutxt.column_comment}}, +          %{#{column.src_filename.column_comment}}, +          %{#{column.src_fingerprint.column_comment}}, +          %{#{column.src_filesize.column_comment}}, +          %{#{column.src_word_count.column_comment}}, +          %{#{column.src_txt.column_comment}},            %{#{column.fulltext.column_comment}}, -          %{#{column.word_count.column_comment}}, -          %{#{column.digest.column_comment}},            %{#{column.skin_name.column_comment}}, +          %{#{column.skin_fingerprint.column_comment}},            %{#{column.skin.column_comment}},            %{#{column.links.column_comment}},          ] diff --git a/lib/sisu/v2/db_drop.rb b/lib/sisu/v2/db_drop.rb index 7189da56..35d971af 100644 --- a/lib/sisu/v2/db_drop.rb +++ b/lib/sisu/v2/db_drop.rb @@ -133,37 +133,49 @@ module SiSU_DB_drop          ensure          end        end +      def conn_execute_array(sql_arr) +        @conn.transaction do |conn| +          sql_arr.each do |sql| +            conn.execute(sql) +          end +        end +      end        def indexes                                                             #% drop all indexes -        #@conn.do(%{ -        #  DROP INDEX object_nr ON doc_objects(ocn); -        #  DROP INDEX body ON doc_objects(body); -        #  DROP INDEX clean ON doc_objects(clean); -        #  DROP INDEX lev1 ON doc_objects(lev1); -        #  DROP INDEX lev2 ON doc_objects(lev2); -        #  DROP INDEX lev3 ON doc_objects(lev3); -        #  DROP INDEX lev4 ON doc_objects(lev4); -        #  DROP INDEX lev5 ON doc_objects(lev5); -        #  DROP INDEX lev6 ON doc_objects(lev6); -        #  DROP INDEX endnote_nr ON endnotes(nr); -        #  DROP INDEX endnote ON endnotes(body); -        #  DROP INDEX title ON metadata_and_text(title); -        #  DROP INDEX filename ON metadata_and_text(filename) -        #  /* -        #  DROP INDEX object_nr ON doc_objects(ocn) CASCADE; -        #  DROP INDEX body ON doc_objects(body) CASCADE; -        #  DROP INDEX clean ON doc_objects(clean) CASCADE; -        #  DROP INDEX lev1 ON doc_objects(lev1) CASCADE; -        #  DROP INDEX lev2 ON doc_objects(lev2) CASCADE; -        #  DROP INDEX lev3 ON doc_objects(lev3) CASCADE; -        #  DROP INDEX lev4 ON doc_objects(lev4) CASCADE; -        #  DROP INDEX lev5 ON doc_objects(lev5) CASCADE; -        #  DROP INDEX lev6 ON doc_objects(lev6) CASCADE; -        #  DROP INDEX endnote_nr ON endnotes(nr) CASCADE; -        #  DROP INDEX endnote ON endnotes(body) CASCADE; -        #  DROP INDEX title ON metadata_and_text(title) CASCADE; -        #  DROP INDEX filename ON metadata_and_text(filename) CASCADE -        #  */ -        #}) +        print "\n          drop documents common indexes\n" unless @opt.cmd =~/q/ +        sql_arr=[ +          %{DROP INDEX idx_text_words;}, +          %{DROP INDEX idx_title;}, +          %{DROP INDEX idx_author;}, +          %{DROP INDEX idx_filename;}, +          %{DROP INDEX idx_topics;}, +          %{DROP INDEX idx_ocn;}, +          %{DROP INDEX idx_digest_clean;}, +          %{DROP INDEX idx_digest_all;}, +          %{DROP INDEX idx_lev1;}, +          %{DROP INDEX idx_lev2;}, +          %{DROP INDEX idx_lev3;}, +          %{DROP INDEX idx_lev4;}, +          %{DROP INDEX idx_lev5;}, +          %{DROP INDEX idx_lev6;}, +          %{DROP INDEX idx_endnote_words;}, +          %{DROP INDEX idx_endnote_nr;}, +          %{DROP INDEX idx_digest_en;}, +          %{DROP INDEX idx_endnote_words_asterisk;}, +          %{DROP INDEX idx_endnote_nr_asterisk;}, +          %{DROP INDEX idx_endnote_asterisk;}, +          %{DROP INDEX idx_digest_en_asterisk;}, +          %{DROP INDEX idx_endnote_words_plus;}, +          %{DROP INDEX idx_endnote_nr_plus;}, +          %{DROP INDEX idx_endnote_plus;}, +          %{DROP INDEX idx_digest_en_plus}, +        ] +        conn_execute_array(sql_arr) +        print "\n          drop documents TEXT indexes\n" unless @opt.cmd =~/q/ +        sql_arr=[ +          %{DROP INDEX idx_clean;}, +          %{DROP INDEX idx_endnote}, +        ] +        conn_execute_array(sql_arr)        end        self      end diff --git a/lib/sisu/v2/db_import.rb b/lib/sisu/v2/db_import.rb index 45aca11b..e351f6fc 100644 --- a/lib/sisu/v2/db_import.rb +++ b/lib/sisu/v2/db_import.rb @@ -122,7 +122,7 @@ module SiSU_DB_import        tell.puts_blue unless @opt.cmd =~/q/        tell=SiSU_Screen::Ansi.new(@opt.cmd,'Marshal Load',@fnc)        tell.print_grey if @opt.cmd =~/v/ -      select_first_match=%{ SELECT metadata_and_text.tid FROM metadata_and_text WHERE metadata_and_text.filename = '#{@opt.fns}'; } +      select_first_match=%{ SELECT metadata_and_text.tid FROM metadata_and_text WHERE metadata_and_text.src_filename = '#{@opt.fns}'; }        file_exist=@sql_type=~/sqlite/ \        ? @conn.get_first_value(select_first_match) \        : @conn.select_one(select_first_match) @@ -265,7 +265,10 @@ module SiSU_DB_import                @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_minus                special_character_escape(@col[:body])                @col[:plaintext]=@col[:body].dup -              strip_markup(@col[:plaintext]) +              @col[:plaintext]=strip_markup(@col[:plaintext]) +              @col[:plaintext]=clean_searchable_text(@col[:plaintext]) +              @col[:words]=@col[:plaintext].dup +              @col[:words]=unique_words(@col[:words])                if @en[0]; @en_a,@en_z=@en[0].first,@en[0].last                end                if @en_ast[0]; @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last @@ -298,7 +301,10 @@ module SiSU_DB_import                @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_plus                special_character_escape(@col[:body])                @col[:plaintext]=@col[:body].dup -              strip_markup(@col[:plaintext]) +              @col[:plaintext]=strip_markup(@col[:plaintext]) +              @col[:plaintext]=clean_searchable_text(@col[:plaintext]) +              @col[:words]=@col[:plaintext].dup +              @col[:words]=unique_words(@col[:words])                @en_a,@en_z=@en[0].first,@en[0].last if @en[0]                @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0]                @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0] @@ -324,7 +330,10 @@ module SiSU_DB_import                @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_plus                special_character_escape(@col[:body])                @col[:plaintext]=@col[:body].dup -              strip_markup(@col[:plaintext]) +              @col[:plaintext]=strip_markup(@col[:plaintext]) +              @col[:plaintext]=clean_searchable_text(@col[:plaintext]) +              @col[:words]=@col[:plaintext].dup +              @col[:words]=unique_words(@col[:words])                @en_a,@en_z=@en[0].first,@en[0].last if @en[0]                @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0]                @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0] @@ -350,7 +359,10 @@ module SiSU_DB_import                @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_plus                special_character_escape(@col[:body])                @col[:plaintext]=@col[:body].dup -              strip_markup(@col[:plaintext]) +              @col[:plaintext]=strip_markup(@col[:plaintext]) +              @col[:plaintext]=clean_searchable_text(@col[:plaintext]) +              @col[:words]=@col[:plaintext].dup +              @col[:words]=unique_words(@col[:words])                @en_a,@en_z=@en[0].first,@en[0].last if @en[0]                @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0]                @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0] @@ -389,12 +401,15 @@ module SiSU_DB_import                end                special_character_escape(@col[:body])                @col[:plaintext]=@col[:body].dup -              strip_markup(@col[:plaintext]) +              @col[:plaintext]=strip_markup(@col[:plaintext]) +              @col[:plaintext]=clean_searchable_text(@col[:plaintext]) +              @col[:words]=@col[:plaintext].dup +              @col[:words]=unique_words(@col[:words])                t=SiSU_DB_tuple::Load_documents.new(@conn,@col,@opt,@file)                @tuple_array << t.tuple                @en,@en_ast,@en_pls=[],[],[]                @col[:en_a]=@col[:en_z]=nil -              @col[:lev]=@col[:plaintext]=@col[:body]='' +              @col[:lev]=@col[:plaintext]=@col[:body]=@col[:words]=''              end              if notedata =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/                                         #% import into database endnotes tables                endnote_array=notedata.scan(/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/) @@ -406,9 +421,9 @@ module SiSU_DB_import                    @id_n+=1                    special_character_escape(txt)                    body=SiSU_Format_Shared::CSS_Format.new(@md,data).endnote(nr,txt) -                  #special_character_escape(body) -                  #special_character_escape(txt)                    strip_markup(txt) +                  words=txt.dup +                  words=unique_words(words)                    if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1)                      puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n"                      open("#{Dir.pwd}/pg_documents_error_log",'a') do |error| @@ -418,16 +433,17 @@ module SiSU_DB_import                    end                    if txt                      en={ :type => 'endnotes', -                      :id   => @id_n, -                      :lid  => @col[:lid], -                      :nr   => nr, -                      :txt  => txt, -                      :body => body, -                      :ocn  => @col[:ocn], -                      :ocnd => @col[:ocnd], -                      :ocns => @col[:ocns], -                      :id_t => @@id_t, -                      :hash => digest_clean +                      :id      => @id_n, +                      :lid     => @col[:lid], +                      :nr      => nr, +                      :txt     => txt, +                      :body    => body, +                      :words   => words, +                      :ocn     => @col[:ocn], +                      :ocnd    => @col[:ocnd], +                      :ocns    => @col[:ocns], +                      :id_t    => @@id_t, +                      :hash    => digest_clean                      }                      t=SiSU_DB_tuple::Load_endnotes.new(@conn,en,@opt,@file)                      @tuple_array << t.tuple @@ -447,6 +463,8 @@ module SiSU_DB_import                    special_character_escape(txt)                    body=SiSU_Format_Shared::CSS_Format.new(@md,data).endnote(nr,txt)                    strip_markup(txt) +                  words=txt.dup +                  words=unique_words(words)                    if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1)                      puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n"                      open("#{Dir.pwd}/pg_documents_error_log",'a') do |error| @@ -456,16 +474,17 @@ module SiSU_DB_import                    end                    if txt                      en={ :type => 'endnotes_asterisk', -                      :id   => @id_n, -                      :lid  => @col[:lid], -                      :nr   => nr, -                      :txt  => txt, -                      :body => body, -                      :ocn  => @col[:ocn], -                      :ocnd => @col[:ocnd], -                      :ocns => @col[:ocns], -                      :id_t => @@id_t, -                      :hash => digest_clean +                      :id      => @id_n, +                      :lid     => @col[:lid], +                      :nr      => nr, +                      :txt     => txt, +                      :body    => body, +                      :words   => words, +                      :ocn     => @col[:ocn], +                      :ocnd    => @col[:ocnd], +                      :ocns    => @col[:ocns], +                      :id_t    => @@id_t, +                      :hash    => digest_clean                      }                      t=SiSU_DB_tuple::Load_endnotes.new(@conn,en,@opt,@file)                      @tuple_array << t.tuple @@ -485,6 +504,8 @@ module SiSU_DB_import                    special_character_escape(txt)                    body=SiSU_Format_Shared::CSS_Format.new(@md,data).endnote(nr,txt)                    strip_markup(txt) +                  words=txt.dup +                  words=unique_words(words)                    if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1)                      puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n"                      open("#{Dir.pwd}/pg_documents_error_log",'a') do |error| @@ -494,16 +515,17 @@ module SiSU_DB_import                    end                    if txt                      en={ :type => 'endnotes_plus', -                      :id   => @id_n, -                      :lid  => @col[:lid], -                      :nr   => nr, -                      :txt  => txt, -                      :body => body, -                      :ocn  => @col[:ocn], -                      :ocnd => @col[:ocnd], -                      :ocns => @col[:ocns], -                      :id_t => @@id_t, -                      :hash => digest_clean +                      :id      => @id_n, +                      :lid     => @col[:lid], +                      :nr      => nr, +                      :txt     => txt, +                      :body    => body, +                      :words   => words, +                      :ocn     => @col[:ocn], +                      :ocnd    => @col[:ocnd], +                      :ocns    => @col[:ocns], +                      :id_t    => @@id_t, +                      :hash    => digest_clean                      }                      t=SiSU_DB_tuple::Load_endnotes.new(@conn,en,@opt,@file)                      @tuple_array << t.tuple @@ -526,25 +548,25 @@ module SiSU_DB_import            endnotes(@txt).range            @en << endnotes(@txt).standard if @txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/            @en_ast << endnotes(@txt).asterisk if @txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/ -          @en_pls << endnotes(@txt).plus if @txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_o]}/ +          @en_pls << endnotes(@txt).plus if @txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/            @txt=endnotes(@txt).clean_text          end          @txt        end        def standard -        x=if @txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/; @txt.scan(/#{Mx[:en_a_o]}(\d+).+?#{Mx[:en_a_c]}/) -        else nil -        end +        x=(@txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/) \ +        ? @txt.scan(/#{Mx[:en_a_o]}(\d+).+?#{Mx[:en_a_c]}/) \ +        : nil        end        def asterisk -        x=if @txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/; @txt.scan(/#{Mx[:en_b_o]}[*](\d+).+?#{Mx[:en_b_c]}/) -        else nil -        end +        x=(@txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/) \ +        ? @txt.scan(/#{Mx[:en_b_o]}[*](\d+).+?#{Mx[:en_b_c]}/) \ +        : nil        end        def plus -        x=if @txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/; @txt.scan(/#{Mx[:en_b_o]}[+](\d+).+?#{Mx[:en_b_c]}/) -        else nil -        end +        x=(@txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/) \ +        ? @txt.scan(/#{Mx[:en_b_o]}[+](\d+).+?#{Mx[:en_b_c]}/) \ +        : nil        end        def clean_text(base_url=nil)          if base_url diff --git a/lib/sisu/v2/db_indexes.rb b/lib/sisu/v2/db_indexes.rb index 3cbcc20c..fb5ddd76 100644 --- a/lib/sisu/v2/db_indexes.rb +++ b/lib/sisu/v2/db_indexes.rb @@ -73,38 +73,44 @@ module SiSU_DB_index        def base          print "\n          create documents common indexes\n" unless @opt.cmd =~/q/          sql_arr=[ -          %{CREATE INDEX object_nr ON doc_objects(ocn);}, -          %{CREATE INDEX digest_clean ON doc_objects(digest_clean);}, -          %{CREATE INDEX digest_all ON doc_objects(digest_all);}, -          %{CREATE INDEX lev1 ON doc_objects(lev1);}, -          %{CREATE INDEX lev2 ON doc_objects(lev2);}, -          %{CREATE INDEX lev3 ON doc_objects(lev3);}, -          %{CREATE INDEX lev4 ON doc_objects(lev4);}, -          %{CREATE INDEX lev5 ON doc_objects(lev5);}, -          %{CREATE INDEX lev6 ON doc_objects(lev6);}, -          %{CREATE INDEX endnote_nr ON endnotes(nr);}, -          %{CREATE INDEX digest_en ON endnotes(digest_clean);}, -          %{CREATE INDEX endnote_nr_asterisk ON endnotes_asterisk(nr);}, -          %{CREATE INDEX endnote_asterisk ON endnotes_asterisk(clean);}, -          %{CREATE INDEX digest_en_asterisk ON endnotes_asterisk(digest_clean);}, -          %{CREATE INDEX endnote_nr_plus ON endnotes_plus(nr);}, -          %{CREATE INDEX endnote_plus ON endnotes_plus(clean);}, -          %{CREATE INDEX digest_en_plus ON endnotes_plus(digest_clean);}, -          %{CREATE INDEX title ON metadata_and_text(title);}, -          %{CREATE INDEX filename ON metadata_and_text(filename)}, +          %{CREATE INDEX idx_text_words ON doc_objects(words);}, +          %{CREATE INDEX idx_ocn ON doc_objects(ocn);}, +          %{CREATE INDEX idx_digest_clean ON doc_objects(digest_clean);}, +          %{CREATE INDEX idx_digest_all ON doc_objects(digest_all);}, +          %{CREATE INDEX idx_lev1 ON doc_objects(lev1);}, +          %{CREATE INDEX idx_lev2 ON doc_objects(lev2);}, +          %{CREATE INDEX idx_lev3 ON doc_objects(lev3);}, +          %{CREATE INDEX idx_lev4 ON doc_objects(lev4);}, +          %{CREATE INDEX idx_lev5 ON doc_objects(lev5);}, +          %{CREATE INDEX idx_lev6 ON doc_objects(lev6);}, +          %{CREATE INDEX idx_endnote_words ON endnotes(words);}, +          %{CREATE INDEX idx_endnote_nr ON endnotes(nr);}, +          %{CREATE INDEX idx_digest_en ON endnotes(digest_clean);}, +          %{CREATE INDEX idx_endnote_words_asterisk ON endnotes_asterisk(words);}, +          %{CREATE INDEX idx_endnote_nr_asterisk ON endnotes_asterisk(nr);}, +          %{CREATE INDEX idx_endnote_asterisk ON endnotes_asterisk(clean);}, +          %{CREATE INDEX idx_digest_en_asterisk ON endnotes_asterisk(digest_clean);}, +          %{CREATE INDEX idx_endnote_words_plus ON endnotes_plus(words);}, +          %{CREATE INDEX idx_endnote_nr_plus ON endnotes_plus(nr);}, +          %{CREATE INDEX idx_endnote_plus ON endnotes_plus(clean);}, +          %{CREATE INDEX idx_digest_en_plus ON endnotes_plus(digest_clean);}, +          %{CREATE INDEX idx_title ON metadata_and_text(title);}, +          %{CREATE INDEX idx_author ON metadata_and_text(creator_author);}, +          %{CREATE INDEX idx_filename ON metadata_and_text(src_filename);}, +          %{CREATE INDEX idx_topics ON metadata_and_text(classify_topic_register)},          ]          conn_execute_array(sql_arr)        end        def text -        print "\n          create documents text indexes\n" unless @opt.cmd =~/q/ +        print "\n          create documents TEXT indexes\n" unless @opt.cmd =~/q/          sql_arr=[ -          %{CREATE INDEX clean ON doc_objects(clean);}, -          %{CREATE INDEX endnote ON endnotes(clean);} +          %{CREATE INDEX idx_clean ON doc_objects(clean);}, +          %{CREATE INDEX idx_endnote ON endnotes(clean);}          ]          conn_execute_array(sql_arr)        end        base -      @opt.cmd=~/D/ || ((@opt.mod=~/psql/) ? '' : text) +      text #@opt.cmd=~/D/ || ((@opt.mod=~/psql/) ? '' : text)      end    end  end diff --git a/lib/sisu/v2/db_load_tuple.rb b/lib/sisu/v2/db_load_tuple.rb index cc00b74a..23b6249d 100644 --- a/lib/sisu/v2/db_load_tuple.rb +++ b/lib/sisu/v2/db_load_tuple.rb @@ -79,11 +79,11 @@ module SiSU_DB_tuple      end      def tuple                                                                    #% import line        sql_entry=if @col[:en_a] -        "INSERT INTO doc_objects (lid, metadata_tid, lev, lev_an, clean, body, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, en_a, en_z, t_of, t_is, node, parent, digest_clean, digest_all) " + -        "VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:lev_an]}', '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:en_a]}', '#{@col[:en_z]}', '#{@col[:t_of]}', '#{@col[:t_is]}', '#{@col[:node]}', '#{@col[:parent]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');" +        "INSERT INTO doc_objects (lid, metadata_tid, lev, lev_an, clean, body, words, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, en_a, en_z, t_of, t_is, node, parent, digest_clean, digest_all) " + +        "VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:lev_an]}', '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:words]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:en_a]}', '#{@col[:en_z]}', '#{@col[:t_of]}', '#{@col[:t_is]}', '#{@col[:node]}', '#{@col[:parent]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');"        else -        "INSERT INTO doc_objects (lid, metadata_tid, lev, lev_an, clean, body, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, t_of, t_is, node, parent, digest_clean, digest_all) " + -        "VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:lev_an]}', '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:t_of]}', '#{@col[:t_is]}', '#{@col[:node]}', '#{@col[:parent]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');" +        "INSERT INTO doc_objects (lid, metadata_tid, lev, lev_an, clean, body, words, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, t_of, t_is, node, parent, digest_clean, digest_all) " + +        "VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:lev_an]}', '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:words]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:t_of]}', '#{@col[:t_is]}', '#{@col[:node]}', '#{@col[:parent]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');"        end        if @opt.cmd =~/M/          if @opt.cmd =~/V/ @@ -191,12 +191,14 @@ module SiSU_DB_tuple  #{@tp.column.notes_prefix_a.tuple[0]}  #{@tp.column.notes_prefix_b.tuple[0]}  #{@tp.column.notes_suffix.tuple[0]} -#{@tp.column.filename.tuple[0]} -#{@tp.column.sisutxt.tuple[0]} +#{@tp.column.src_filename.tuple[0]} +#{@tp.column.src_fingerprint.tuple[0]} +#{@tp.column.src_filesize.tuple[0]} +#{@tp.column.src_word_count.tuple[0]} +#{@tp.column.src_txt.tuple[0]}  #{@tp.column.fulltext.tuple[0]} -#{@tp.column.word_count.tuple[0]} -#{@tp.column.digest.tuple[0]}  #{@tp.column.skin_name.tuple[0]} +#{@tp.column.skin_fingerprint.tuple[0]}  #{@tp.column.skin.tuple[0]}  #{@tp.column.links.tuple[0]}  tid) @@ -272,12 +274,14 @@ tid)  #{@tp.column.notes_prefix_a.tuple[1]}  #{@tp.column.notes_prefix_b.tuple[1]}  #{@tp.column.notes_suffix.tuple[1]} -#{@tp.column.filename.tuple[1]} -#{@tp.column.sisutxt.tuple[1]} +#{@tp.column.src_filename.tuple[1]} +#{@tp.column.src_fingerprint.tuple[1]} +#{@tp.column.src_filesize.tuple[1]} +#{@tp.column.src_word_count.tuple[1]} +#{@tp.column.src_txt.tuple[1]}  #{@tp.column.fulltext.tuple[1]} -#{@tp.column.word_count.tuple[1]} -#{@tp.column.digest.tuple[1]}  #{@tp.column.skin_name.tuple[1]} +#{@tp.column.skin_fingerprint.tuple[1]}  #{@tp.column.skin.tuple[1]}  #{@tp.column.links.tuple[1]}  #{@id} @@ -311,8 +315,8 @@ tid)        @conn,@en,@opt,@file=conn,en,opt,file      end      def tuple -      sql_entry="INSERT INTO #{@en[:type]} (nid, document_lid, nr, clean, body, ocn, ocnd, ocns, metadata_tid, digest_clean) " + -      "VALUES ('#{@en[:id]}', '#{@en[:lid]}', '#{@en[:nr]}', '#{@en[:txt]}', '#{@en[:body]}', '#{@en[:ocn]}', '#{@en[:ocnd]}', '#{@en[:ocns]}', '#{@en[:id_t]}', '#{@en[:hash]}');" +      sql_entry="INSERT INTO #{@en[:type]} (nid, document_lid, nr, clean, body, words, ocn, ocnd, ocns, metadata_tid, digest_clean) " + +      "VALUES ('#{@en[:id]}', '#{@en[:lid]}', '#{@en[:nr]}', '#{@en[:txt]}', '#{@en[:body]}', '#{@en[:words]}', '#{@en[:ocn]}', '#{@en[:ocnd]}', '#{@en[:ocns]}', '#{@en[:id_t]}', '#{@en[:hash]}');"        if @opt.cmd =~/M/          @file.puts sql_entry        else diff --git a/lib/sisu/v2/db_remove.rb b/lib/sisu/v2/db_remove.rb index e7942a15..5a7f1244 100644 --- a/lib/sisu/v2/db_remove.rb +++ b/lib/sisu/v2/db_remove.rb @@ -72,9 +72,9 @@ module SiSU_DB_remove          : false        end        del_id=if driver_sqlite3 -        @conn.get_first_value(%{ SELECT tid FROM metadata_and_text WHERE filename = '#{@opt.fns}'; }).to_i +        @conn.get_first_value(%{ SELECT tid FROM metadata_and_text WHERE src_filename = '#{@opt.fns}'; }).to_i        else -        x=@conn.select_one(%{ SELECT metadata_and_text.tid FROM metadata_and_text WHERE metadata_and_text.filename = '#{@opt.fns}'; }) +        x=@conn.select_one(%{ SELECT metadata_and_text.tid FROM metadata_and_text WHERE metadata_and_text.src_filename = '#{@opt.fns}'; })          x ? (x.join.to_i) : nil        end        if del_id diff --git a/lib/sisu/v2/db_sqltxt.rb b/lib/sisu/v2/db_sqltxt.rb index 68e9ef8a..17a92683 100644 --- a/lib/sisu/v2/db_sqltxt.rb +++ b/lib/sisu/v2/db_sqltxt.rb @@ -72,9 +72,10 @@ module SiSU_DB_text      end      def clean_searchable_text(arr) #produce clean, searchable, plaintext from document source        txt_arr,en=[],[] +      arr=arr.class==String ? arr.split(/\n+/m) : arr        arr.each do |s| -        s.gsub!(/([*\/_-])\{(.+?)\}\1/,'\2') -        s.gsub!(/^(?:group|poem|code)\{/,''); s.gsub!(/^\}(?:group|poem|code)/,'') +        s.gsub!(/([*\/_-])\{(.+?)\}\1/m,'\2') +        s.gsub!(/^(?:group|poem|code)\{/m,''); s.gsub!(/^\}(?:group|poem|code)/m,'')          s.gsub!(/\A(?:@\S+:\s+.+)\Z/m,'')          if s =~/^:A~/            if defined? @md.creator \ @@ -82,26 +83,26 @@ module SiSU_DB_text            and not @md.creator.author.empty?              s.gsub!(/@author/,@md.creator.author)            else -            tell=SiSU_Screen::Ansi.new('v','WARNING Document Author information missing; provide @creator: :author:') -            tell.warn +            tell=SiSU_Screen::Ansi.new('v','WARNING Document Author information missing; provide @creator: :author:',@md.fnb) +            tell.warn unless @md.cmd.inspect =~/q/            end            if defined? @md.title \            and defined? @md.title.full \            and not @md.title.full.empty?              s.gsub!(/@title/,@md.title.full)            else -            tell=SiSU_Screen::Ansi.new('v','WARNING Document Title missing; provide @title:') -            tell.warn +            tell=SiSU_Screen::Ansi.new('v','WARNING Document Title missing; provide @title:',@md.fnb) +            tell.warn unless @md.cmd.inspect =~/q/            end          end -        s.gsub!(/^(?:_[1-9]\*?|_\*)\s+/,'') -        s.gsub!(/^(?:[1-9]\~(\S+)?)\s+/,'') -        s.gsub!(/^(?::?[A-C]\~(\S+)?)\s+/,'') -        s.gsub!(/^%{1,3} .+/,'') #removed even if contained in code block -        s.gsub!(/<br>/,' ') -        en << s.scan(/~\{\s*(.+?)\s*\}~/) -        s.gsub!(/~\{.+?\}~/,'') -        s.gsub!(/ \s+/,' ') +        s.gsub!(/^(?:_[1-9]\*?|_\*)\s+/m,'') +        s.gsub!(/^(?:[1-9]\~(\S+)?)\s+/m,'') +        s.gsub!(/^(?::?[A-C]\~(\S+)?)\s+/m,'') +        s.gsub!(/^%{1,3} .+/m,'') #removed even if contained in code block +        s.gsub!(/<br>/m,' ') +        en << s.scan(/~\{\s*(.+?)\s*\}~/m) +        s.gsub!(/~\{.+?\}~/m,'') +        s.gsub!(/ \s+/m,' ')          #special_character_escape(s)          s        end @@ -124,6 +125,11 @@ module SiSU_DB_text        str.strip!        str      end +    def unique_words(str) +      a=str.scan(/[a-zA-Z0-9\\\/_-]{2,}/) #a=str.scan(/\S+{2,}/) +      str=a.uniq.sort.join(' ') +      str +    end    end  end  __END__ diff --git a/lib/sisu/v2/param.rb b/lib/sisu/v2/param.rb index ef91f10d..87dd9aab 100644 --- a/lib/sisu/v2/param.rb +++ b/lib/sisu/v2/param.rb @@ -162,8 +162,8 @@ module SiSU_Param            puts "#{n} is #{s.class}: programming error, String expected #{__FILE__}:#{__LINE__}"            s          else  -          tell=SiSU_Screen::Ansi.new('v',"#{n} length #{s.length} exceeds set db field length #{l}, metadata dropped") -          tell.warn +          tell=SiSU_Screen::Ansi.new('v',"#{n} length #{s.length} exceeds set db field length #{l}, metadata dropped",@opt.fns) +          tell.warn unless @opt.cmd =~/q/            nil           end        end @@ -244,7 +244,9 @@ module SiSU_Param            validate_length(s,l,n)          end          def note -          @h['note']                   #TEXT +          s=@h['note'] +          l,n=Db[:col_info_note],'title.note' +          validate_length(s,l,n)          end          def short            s=(@h['short'] ? @h['short'] : @h['main']) @@ -380,9 +382,9 @@ module SiSU_Param        def rights          a=@s.split(/[ ]*\n[ ]*/m)          @h=build_hash(a) -        def copyright                  # TEXT used db sql  +        def copyright            def text #you may wish to expand to take from all -            r=if @h['copyright'] +            s=if @h['copyright']                @h['copyright']              elsif @h['text']                @h['text'] @@ -393,26 +395,40 @@ module SiSU_Param                tell.warn unless @opt.cmd =~/q/                ''              end +            l,n=Db[:col_info_note],'rights.copyright.text' +            validate_length(s,l,n)            end            def translation -            r=(@h['translation'] ? @h['translation'] : nil) +            s=(@h['translation'] ? @h['translation'] : nil) +            l,n=Db[:col_info_note],'rights.copyright.translation' +            validate_length(s,l,n)            end            def illustrations -            r=(@h['illustrations'] ? @h['illustrations'] : nil) +            s=(@h['illustrations'] ? @h['illustrations'] : nil) +            l,n=Db[:col_info_note],'rights.copyright.illustrations' +            validate_length(s,l,n)            end            def photographs -            r=(@h['photographs'] ? @h['photographs'] : nil) +            s=(@h['photographs'] ? @h['photographs'] : nil) +            l,n=Db[:col_info_note],'rights.copyright.photographs' +            validate_length(s,l,n)            end            def digitiztion -            r=(@h['digitization'] ? @h['digitization'] : nil) +            s=(@h['digitization'] ? @h['digitization'] : nil) +            l,n=Db[:col_info_note],'rights.copyright.digitization' +            validate_length(s,l,n)            end            def audio -            r=(@h['audio'] ? @h['audio'] : nil) +            s=(@h['audio'] ? @h['audio'] : nil) +            l,n=Db[:col_info_note],'rights.copyright.audio' +            validate_length(s,l,n)            end            self          end          def license -          r=(@h['license'] ? @h['license'] : nil) +          s=(@h['license'] ? @h['license'] : nil) +          l,n=Db[:col_info_note],'rights.license' +          validate_length(s,l,n)          end          def all            s=if @h['all']; @h['all'] @@ -445,6 +461,9 @@ module SiSU_Param              if s.empty?                tell=SiSU_Screen::Ansi.new(@cmd,'WARNING Document Rights information missing; provide @rights: :copyright:')                tell.warn unless @opt.cmd =~/q/ +            else +              l,n=Db[:col_info_note],'rights.all' +              validate_length(s,l,n)              end              s            end @@ -472,7 +491,7 @@ module SiSU_Param          end          def topic_register            s=@h['topic_register'] -          l,n=Db[:col_classify_txt_long],'classify.topic_register' +          l,n=Db[:col_info_note],'classify.topic_register'            validate_length(s,l,n)          end          def type @@ -743,9 +762,9 @@ module SiSU_Param        @doc={ :lv=>[] }        @doc[:fns],@doc[:fnb],@doc[:scr_suffix]='','',''        @@publisher='SiSU scribe' -      attr_accessor :cmd,:make,:mod,:env,:fn,:fns,:fnb,:fnn,:fnt,:fnl,:flv,:fnz,:fnstex,:ocn,:sfx_src,:pdf,:file_type,:dir_out,:dir_tex,:dir_lout,:txt_path,:site_skin,:sisu,:sisu_version,:ruby_version,:title,:subtitle,:full_title,:html_title,:subtitle_tex,:creator,:classify,:author_home,:author,:author_title,:author_nationality,:authors,:authorship,:translator,:illustrator,:prepared_by,:digitized_by,:subject,:description,:publisher,:contributor,:date,:date_created,:date_issued,:date_available,:date_valid,:date_modified,:date_translated,:date_added_to_site,:date_scheme,:date_created_scheme,:date_issued_scheme,:date_available_scheme,:date_valid_scheme,:date_modified_scheme,:type,:format,:identifier,:source,:language,:language_original,:relation,:coverage,:rights,:keywords,:comments,:abstract,:cls_loc,:cls_dewey,:cls_pg,:cls_isbn,:papersize,:papersize_array,:toc,:lv1,:lv2,:lv3,:lv4,:lv5,:lv6,:lvs,:pagenew,:pagebreak,:num_top,:toc_lev_limit,:flag_endnotes,:flag_auto_endnotes,:flag_separate_endnotes,:flag_separate_endnotes_make,:markup,:markup_instruction,:markup_version,:markup_declared,:flag_tables,:vocabulary,:doc_skin,:doc_css,:yaml,:lnk,:prefix_a,:prefix_b,:suffix,:information,:contact,:icon,:image,:ad_url,:ad_png,:ad_alt,:ad_began,:flag_promo,:promo,:ad_home,:stmp,:stmpd,:sc_filename,:sc_number,:sc_date,:sc_time,:sc_info,:yamladdr,:locale,:wc_lines,:wc_words,:wc_bytes,:file_encoding,:file_size,:user,:home,:hostname,:pwd,:firstseg,:programs,:author_copymark,:lang,:en,:dgst,:dgst_skin,:generated,:tags,:tag_array,:concord_make,:seg_names,:seg_autoname_safe,:set_header_title,:set_heading_top,:set_heading_seg,:heading_seg_first,:heading_seg_first_flag,:base_program,:ec,:opt,:sem_tag,:book_idx,:topic_register,:topic_register_array,:original_publication,:original_publication_date,:original_publication_nationality,:original_publication_institution,:writing_focus,:audio,:daisy +      attr_accessor :cmd,:make,:mod,:env,:fn,:fns,:fnb,:fnn,:fnt,:fnl,:flv,:fnz,:fnstex,:ocn,:sfx_src,:pdf,:file_type,:dir_out,:dir_tex,:dir_lout,:txt_path,:site_skin,:sisu,:sisu_version,:ruby_version,:title,:subtitle,:full_title,:html_title,:subtitle_tex,:creator,:classify,:author_home,:author,:author_title,:author_nationality,:authors,:authorship,:translator,:illustrator,:prepared_by,:digitized_by,:subject,:description,:publisher,:contributor,:date,:date_created,:date_issued,:date_available,:date_valid,:date_modified,:date_translated,:date_added_to_site,:date_scheme,:date_created_scheme,:date_issued_scheme,:date_available_scheme,:date_valid_scheme,:date_modified_scheme,:type,:format,:identifier,:source,:language,:language_original,:relation,:coverage,:rights,:keywords,:comments,:abstract,:cls_loc,:cls_dewey,:cls_pg,:cls_isbn,:papersize,:papersize_array,:toc,:lv1,:lv2,:lv3,:lv4,:lv5,:lv6,:lvs,:pagenew,:pagebreak,:num_top,:toc_lev_limit,:flag_endnotes,:flag_auto_endnotes,:flag_separate_endnotes,:flag_separate_endnotes_make,:markup,:markup_instruction,:markup_version,:markup_declared,:flag_tables,:vocabulary,:doc_skin,:doc_css,:yaml,:lnk,:prefix_a,:prefix_b,:suffix,:information,:contact,:icon,:image,:ad_url,:ad_png,:ad_alt,:ad_began,:flag_promo,:promo,:ad_home,:stmp,:stmpd,:sc_filename,:sc_number,:sc_date,:sc_time,:sc_info,:yamladdr,:locale,:wc_lines,:wc_words,:wc_bytes,:file_encoding,:filesize,:user,:home,:hostname,:pwd,:firstseg,:programs,:author_copymark,:lang,:en,:dgst,:dgst_skin,:generated,:tags,:tag_array,:concord_make,:seg_names,:seg_autoname_safe,:set_header_title,:set_heading_top,:set_heading_seg,:heading_seg_first,:heading_seg_first_flag,:base_program,:ec,:opt,:sem_tag,:book_idx,:topic_register,:topic_register_array,:original_publication,:original_publication_date,:original_publication_nationality,:original_publication_institution,:writing_focus,:audio,:daisy        def initialize(fns_array,opt) -        @env=@fn=@fns=@fnb=@fnn=@fnt=@fnl=@flv=@fnz=@fnstex=@ocn=@sfx_src=@pdf=@file_type=@dir_out=@dir_tex=@dir_lout=@txt_path=@make=@flag_endnotes=@flag_auto_endnotes=@flag_separate_endnotes=@flag_separate_endnotes_make=@site_skin=@sisu=@sisu_version=@ruby_version=@title=@subtitle=@full_title=@html_title=@subtitle_tex=@creator=@classify=@author_home=@author=@author_title=@author_nationality=@translator=@illustrator=@prepared_by=@digitized_by=@subject=@description=@publisher=@contributor=@date=@date_created=@date_issued=@date_available=@date_valid=@date_modified=@date_translated=@date_added_to_site=@date_scheme=@date_created_scheme=@date_issued_scheme=@date_available_scheme=@date_valid_scheme=@date_modified_scheme=@type=@format=@identifier=@source=@language=@language_original=@relation=@coverage=@rights=@keywords=@comments=@abstract=@cls_loc=@cls_dewey=@cls_pg=@cls_isbn=@papersize=@toc=@lv1=@lv2=@lv3=@lv4=@lv5=@lv6=@pagenew=@pagebreak=@num_top=@toc_lev_limit=@flag_tables=@vocabulary=@doc_skin=@doc_css=@yaml=@lnk=@prefix_a=@prefix_b=@suffix=@information=@contact=@icon=@ad_url=@ad_png=@ad_alt=@ad_began=@promo=@ad_home=@stmp=@stmpd=@sc_filename=@sc_number=@sc_date=@sc_time=@sc_info=@yamladdr=@locale=@wc_lines=@wc_words=@wc_bytes=@file_encoding=@file_size=@firstseg=@programs=@author_copymark=@lang=@en=@dgst=@dgst_skin=@generated=@heading_seg_first=@base_program=@topic_register=@original_publication_details=@original_publication=@original_publication_date=@original_publication_nationality=@original_publication_institution=@writing_focus=@audio=nil +        @env=@fn=@fns=@fnb=@fnn=@fnt=@fnl=@flv=@fnz=@fnstex=@ocn=@sfx_src=@pdf=@file_type=@dir_out=@dir_tex=@dir_lout=@txt_path=@make=@flag_endnotes=@flag_auto_endnotes=@flag_separate_endnotes=@flag_separate_endnotes_make=@site_skin=@sisu=@sisu_version=@ruby_version=@title=@subtitle=@full_title=@html_title=@subtitle_tex=@creator=@classify=@author_home=@author=@author_title=@author_nationality=@translator=@illustrator=@prepared_by=@digitized_by=@subject=@description=@publisher=@contributor=@date=@date_created=@date_issued=@date_available=@date_valid=@date_modified=@date_translated=@date_added_to_site=@date_scheme=@date_created_scheme=@date_issued_scheme=@date_available_scheme=@date_valid_scheme=@date_modified_scheme=@type=@format=@identifier=@source=@language=@language_original=@relation=@coverage=@rights=@keywords=@comments=@abstract=@cls_loc=@cls_dewey=@cls_pg=@cls_isbn=@papersize=@toc=@lv1=@lv2=@lv3=@lv4=@lv5=@lv6=@pagenew=@pagebreak=@num_top=@toc_lev_limit=@flag_tables=@vocabulary=@doc_skin=@doc_css=@yaml=@lnk=@prefix_a=@prefix_b=@suffix=@information=@contact=@icon=@ad_url=@ad_png=@ad_alt=@ad_began=@promo=@ad_home=@stmp=@stmpd=@sc_filename=@sc_number=@sc_date=@sc_time=@sc_info=@yamladdr=@locale=@wc_lines=@wc_words=@wc_bytes=@file_encoding=@filesize=@firstseg=@programs=@author_copymark=@lang=@en=@dgst=@dgst_skin=@generated=@heading_seg_first=@base_program=@topic_register=@original_publication_details=@original_publication=@original_publication_date=@original_publication_nationality=@original_publication_institution=@writing_focus=@audio=nil          @data,@fns,@cmd,@mod,@opt=fns_array,opt.fns,opt.cmd,opt.mod,opt #@data used as data          @flag_tables,@set_header_title,@set_heading_top,@set_heading_seg,@heading_seg_first_flag,@flag_promo,@book_idx=false,false,false,false,false,false,false          @seg_autoname_safe=true @@ -1107,16 +1126,22 @@ module SiSU_Param            @papersize=determine_papersize(@mod.inspect)          end          @papersize_array=@papersize.scan(/(?:a4|letter|legal|book|a5|b5)/i) +        fn=@fns=~/\.ssm\.sst$/ ? @fns.gsub(/.sst/,'') : @fns #decide what to do a filesize on .ssm tells very little about actual document size +        @filesize=(File.size(fn)).to_s          if @sys.openssl !=false            skin=@doc_skin \            ? (SiSU_Env::Info_skin.new(@opt,@doc_skin).select) \            : SiSU_Env::Info_skin.new(@opt).select            @dgst,@dgst_skin=[],[]            if @env.digest.type =~/sha256/ -            @dgst=@sys.sha256(@env.source_file_with_path) +            dgst=@sys.sha256(@env.source_file_with_path) +            @dgst=dgst[1].length==64 ? dgst : nil +            puts 'check document (sha256) digest' if not @dgst              @dgst_skin=skin ? (@sys.sha256(skin)) : nil             else -            @dgst=@sys.md5(@env.source_file_with_path) +            dgst=@sys.md5(@env.source_file_with_path) +            @dgst=dgst[1].length==32 ? dgst : nil +            puts 'check document (md5) digest' if not @dgst              @dgst_skin=skin ? (@sys.md5(skin)) : nil            end          end | 
