#!/usr/bin/env rdmd
/+
- Name: Spine, Doc Reform
- Description: documents, structuring, processing, publishing, search
- static content generator
- Author: Ralph Amissah
[ralph.amissah@gmail.com]
- Copyright: (C) 2015 - 2021 Ralph Amissah, All Rights
Reserved.
- License: AGPL 3 or later:
Spine (SiSU), a framework for document structuring, publishing and
search
Copyright (C) Ralph Amissah
This program is free software: you can redistribute it and/or modify it
under the terms of the GNU AFERO General Public License as published by the
Free Software Foundation, either version 3 of the License, or (at your
option) any later version.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
You should have received a copy of the GNU General Public License along with
this program. If not, see [https://www.gnu.org/licenses/].
If you have Internet connection, the latest version of the AGPL should be
available at these locations:
[https://www.fsf.org/licensing/licenses/agpl.html]
[https://www.gnu.org/licenses/agpl.html]
- Spine (by Doc Reform, related to SiSU) uses standard:
- docReform markup syntax
- standard SiSU markup syntax with modified headers and minor modifications
- docReform object numbering
- standard SiSU object citation numbering & system
- Homepages:
[https://www.doc_reform.org]
[https://www.sisudoc.org]
- Git
[https://git.sisudoc.org/projects/?p=software/spine.git;a=summary]
+/
/+ dub.sdl
name "spine search"
description "spine cgi search"
/+ obt - org-mode generated file +/
+/
import std.format;
import std.range;
import std.regex;
import arsd.cgi;
import d2sqlite3;
import std.process : environment;
void cgi_function_intro(Cgi cgi) {
mixin(import("configuration.txt"));
string header;
string table;
string form;
struct Config {
string http_request_type;
string http_host;
// string server_name;
string web_doc_root_path;
string doc_collection_sub_root;
string cgi_root;
string cgi_script_name;
string cgi_search_form_title;
string cgi_search_form_html_title;
string data_path_html;
string http_url;
string query_base_url;
string query_string;
string request_method;
string db_path;
}
auto conf = Config();
conf.http_request_type = environment.get("REQUEST_SCHEME", _cfg.http_request_type);
conf.http_host = environment.get("SERVER_NAME", _cfg.http_host);
// conf.http_host = environment.get("HTTP_HOST", _cfg.http_host);
conf.web_doc_root_path = environment.get("DOCUMENT_ROOT", _cfg.www_doc_root);
conf.doc_collection_sub_root = "/spine/static"; // (output_path - web_doc_root_path)
conf.cgi_root = environment.get("CONTEXT_DOCUMENT_ROOT", _cfg.cgi_bin_subpath);
conf.cgi_script_name = environment.get("SCRIPT_NAME", _cfg.cgi_filename);
conf.cgi_search_form_title = _cfg.cgi_search_form_title; // ≅ SiSU spine search form
conf.cgi_search_form_html_title = ""
~ _cfg.cgi_search_form_title
~ "";
conf.query_base_url = conf.http_request_type ~ "://" ~ conf.http_host ~ "/" ~ conf.cgi_script_name ~ "?";
conf.query_string = environment.get("QUERY_STRING", "");
conf.http_url = environment.get("HTTP_REFERER", conf.query_base_url ~ conf.query_string);
conf.request_method = environment.get("REQUEST_METHOD", "POST");
conf.db_path = _cfg.db_sqlite_path;
struct CGI_val {
string db_selected = "";
string sql_match_limit = ""; // radio: ( 1000 | 2500 )
string sql_match_offset = "";
string search_text = "";
string results_type = ""; // index
bool checked_echo = false;
bool checked_stats = false;
bool checked_url = false;
bool checked_searched = false;
bool checked_tip = false;
bool checked_sql = false;
}
auto cv = CGI_val();
cv.db_selected = _cfg.db_sqlite_filename;
auto text_fields() {
string canned_query_str = environment.get("QUERY_STRING", "");
if ("query_string" in cgi.post) {
canned_query_str = environment.get("QUERY_STRING", "");
}
string[string] canned_query;
if (conf.request_method == "POST") {
} else if (conf.request_method == "GET") {
foreach (pair_str; canned_query_str.split("&")) {
// cgi.write(pair_str ~ "
");
string[] pair = pair_str.split("=");
canned_query[pair[0]] = pair[1];
}
// foreach (field, content; canned_query) {
// cgi.write(field ~ ": " ~ content ~ "
");
// }
}
static struct Rgx {
// static canned_query = ctRegex!(`\A(?P
");
} else if (environment.get("REQUEST_METHOD", "POST") == "GET") {
got.canned_query = environment.get("QUERY_STRING", "");
// cgi.write("f.canned_query: " ~ got.canned_query ~ "
");
got.search_text_area = "";
if ("sf" in canned_query && !(canned_query["sf"]).empty) {
got.text = canned_query["sf"].split("%20").join(" ");
got.search_text_area ~= "text: " ~ got.text ~ "\n";
}
if ("au" in canned_query && !(canned_query["au"]).empty) {
got.author = canned_query["au"].split("%20").join(" ");
got.search_text_area ~= "author: " ~ got.author ~ "\n";
}
if ("ti" in canned_query && !(canned_query["ti"]).empty) {
got.title = canned_query["ti"].split("%20").join(" ");
got.search_text_area ~= "title: " ~ got.title ~ "\n";
}
if ("uid" in canned_query && !(canned_query["uid"]).empty) {
got.uid = canned_query["uid"].split("%20").join(" ");
got.search_text_area ~= "uid: " ~ got.uid ~ "\n";
}
if ("fn" in canned_query && !(canned_query["fn"]).empty) {
got.fn = canned_query["fn"].split("%20").join(" ");
got.search_text_area ~= "fn: " ~ got.fn ~ "\n";
}
if ("kw" in canned_query && !(canned_query["kw"]).empty) {
got.keywords = canned_query["kw"].split("%20").join(" ");
got.search_text_area ~= "keywords: " ~ got.keywords ~ "\n";
}
if ("tr" in canned_query && !(canned_query["tr"]).empty) {
got.topic_register = canned_query["tr"].split("%20").join(" ");
got.search_text_area ~= "topic_register: " ~ got.topic_register ~ "\n";
}
if ("su" in canned_query && !(canned_query["su"]).empty) {
got.subject = canned_query["su"].split("%20").join(" ");
got.search_text_area ~= "subject: " ~ got.subject ~ "\n";
}
if ("de" in canned_query && !(canned_query["de"]).empty) {
got.description = canned_query["de"].split("%20").join(" ");
got.search_text_area ~= "description: " ~ got.description ~ "\n";
}
if ("pb" in canned_query && !(canned_query["pb"]).empty) {
got.publisher = canned_query["pb"].split("%20").join(" ");
got.search_text_area ~= "publisher: " ~ got.publisher ~ "\n";
}
if ("ed" in canned_query && !(canned_query["ed"]).empty) {
got.editor = canned_query["ed"].split("%20").join(" ");
got.search_text_area ~= "editor: " ~ got.editor ~ "\n";
}
if ("ct" in canned_query && !(canned_query["ct"]).empty) {
got.contributor = canned_query["ct"].split("%20").join(" ");
got.search_text_area ~= "contributor: " ~ got.contributor ~ "\n";
}
if ("dt" in canned_query && !(canned_query["dt"]).empty) {
got.date = canned_query["dt"].split("%20").join(" ");
got.search_text_area ~= "date: " ~ got.date ~ "\n";
}
if ("rt" in canned_query && !(canned_query["rt"]).empty) {
got.results_type = canned_query["rt"].split("%20").join(" ");
// got.search_text_area ~= "results_type: " ~ got.results_type ~ "\n";
}
if ("fmt" in canned_query && !(canned_query["fmt"]).empty) {
got.format = canned_query["fmt"].split("%20").join(" ");
got.search_text_area ~= "format: " ~ got.format ~ "\n";
}
if ("src" in canned_query && !(canned_query["src"]).empty) {
got.source = canned_query["src"].split("%20").join(" ");
got.search_text_area ~= "source: " ~ got.source ~ "\n";
}
if ("lng" in canned_query && !(canned_query["lng"]).empty) {
got.language = canned_query["lng"].split("%20").join(" ");
got.search_text_area ~= "language: " ~ got.language ~ "\n";
}
if ("rl" in canned_query && !(canned_query["rl"]).empty) {
got.relation = canned_query["rl"].split("%20").join(" ");
got.search_text_area ~= "relation: " ~ got.relation ~ "\n";
}
if ("cv" in canned_query && !(canned_query["cv"]).empty) {
got.coverage = canned_query["cv"].split("%20").join(" ");
got.search_text_area ~= "coverage: " ~ got.coverage ~ "\n";
}
if ("rgt" in canned_query && !(canned_query["rgt"]).empty) {
got.rights = canned_query["rgt"].split("%20").join(" ");
got.search_text_area ~= "rights: " ~ got.rights ~ "\n";
}
if ("cmt" in canned_query && !(canned_query["cmt"]).empty) {
got.comment = canned_query["cmt"].split("%20").join(" ");
got.search_text_area ~= "comment: " ~ got.comment ~ "\n";
}
// if ("abstract" in canned_query && !(canned_query["abstract"]).empty) {
// got.abstract = canned_query["abstract"];
// }
if ("bfn" in canned_query && !(canned_query["bfn"]).empty) { // search_field
got.src_filename_base = canned_query["bfn"].split("%20").join(" ");
got.search_text_area ~= "src_filename_base: " ~ got.src_filename_base ~ "\n";
}
if ("sml" in canned_query && !(canned_query["sml"]).empty) {
got.sql_match_limit = canned_query["sml"].split("%20").join(" ");
// got.search_text_area ~= "sql_match_limit: " ~ got.sql_match_limit ~ "\n";
}
// cgi.write("f.search_text_area: " ~ got.search_text_area ~ "
");
}
return got;
}
auto tf = text_fields; //
struct SQL_select {
string the_body = "";
string the_range = "";
}
auto sql_select = SQL_select();
string canned_url () {
string _url = "";
if (environment.get("REQUEST_METHOD", "POST") == "POST") {
_url = conf.query_base_url ~ tf.canned_query;
} else if (environment.get("REQUEST_METHOD", "POST") == "GET") {
_url = conf.query_base_url ~ environment.get("QUERY_STRING", "");
}
return _url;
}
auto regex_canned_search () {
static struct RgxCS {
static track_offset = ctRegex!(`(?P
" ~ arrow_previous ~ arrow_next;
return _previous_next;
}
{
header = format(q"┃
┃",
conf.cgi_search_form_html_title,
);
}
{
string post_value(string field_name, string type="box", string set="on") {
string val = "";
switch (type) {
case "field":
val = ((field_name in cgi.post && !(cgi.post[field_name]).empty)
? cgi.post[field_name]
: (field_name in cgi.get)
? cgi.get[field_name]
: "");
val = tf.search_text_area;
break;
case "box": // generic for checkbox or radio; checkbox set == "on" radio set == "name set"
val = ((field_name in cgi.post && !(cgi.post[field_name]).empty)
? (cgi.post[field_name] == set ? "checked" : "off")
: (field_name in cgi.get)
? (cgi.get[field_name] == set ? "checked" : "off")
: "off");
break;
case "radio": // used generic bo
val = ((field_name in cgi.post && !(cgi.post[field_name]).empty)
? (cgi.post[field_name] == set ? "checked" : "off")
: (field_name in cgi.get)
? (cgi.get[field_name] == set ? "checked" : "off")
: "checked");
break;
case "checkbox": // used generic bo
val = ((field_name in cgi.post && !(cgi.post[field_name]).empty)
? (cgi.post[field_name] == set ? "checked" : "off")
: (field_name in cgi.get)
? (cgi.get[field_name] == set ? "checked" : "off")
: "checked");
break;
default:
}
return val;
}
string the_can(string fv) {
string show_the_can = post_value("url");
string _the_can = "";
if (show_the_can == "checked") {
tf = text_fields;
string method_get_url = conf.query_base_url ~ environment.get("QUERY_STRING", "");
string method_post_url_construct = conf.query_base_url ~ tf.canned_query;
// assert(method_get_url == environment.get("HTTP_REFERER", conf.query_base_url ~ conf.query_string));
if (conf.request_method == "POST") {
_the_can =
""
~ "POST: "
~ ""
~ method_post_url_construct
~ ""
~ "
%s
";
} else if (conf.request_method == "GET") {
_the_can =
""
~ "GET: "
~ ""
~ method_get_url
~ "";
}
conf.http_url = conf.http_request_type ~ "://" ~ conf.http_host ~ conf.cgi_script_name ~ tf.canned_query;
}
return _the_can;
}
string provide_tip() {
string searched_tip = post_value("se");
string tip = "";
if (searched_tip == "checked") {
string search_field = post_value("sf", "field");
tf = text_fields;
tip = format(q"┃
database: %s; selected view: index
search string: %s %s %s %s %s %s
%s %s %s %s %s %s
┃",
cv.db_selected,
(tf.text.empty ? "" : "\"text: " ~ tf.text ~ "; "),
(tf.title.empty ? "" : "\"title: " ~ tf.title ~ "; "),
(tf.author.empty ? "" : "\"author: " ~ tf.author ~ "; "),
(tf.date.empty ? "" : "\"date " ~ tf.date ~ "; "),
(tf.uid.empty ? "" : "\"uid: " ~ tf.uid ~ "; "),
(tf.fn.empty ? "" : "\"fn: " ~ tf.fn ~ "; "),
(tf.text.empty ? "" : "text: " ~ tf.text ~ "
"),
(tf.title.empty ? "" : "title: " ~ tf.title ~ "
"),
(tf.author.empty ? "" : "author: " ~ tf.author ~ "
"),
(tf.date.empty ? "" : "date: " ~ tf.date ~ "
"),
(tf.uid.empty ? "" : "\"uid: " ~ tf.uid ~ "; "),
(tf.fn.empty ? "" : "\"fn: " ~ tf.fn ~ "; "),
);
}
return tip;
}
form = format(q"┃
┃",
_cfg.cgi_filename,
(post_value("ec") == "checked") ? post_value("sf", "field") : "",
provide_tip,
search_note,
the_can(post_value("sf", "field")),
cv.db_selected,
post_value("rt", "box", "idx"),
post_value("rt", "box", "txt"),
post_value("sml", "box", "1000"),
post_value("sml", "box", "2500"),
post_value("ec"),
post_value("url"),
post_value("se"),
post_value("sql"),
);
{
string set_value(string field_name, string default_val) {
string val;
if (field_name in cgi.post) {
val = cgi.post[field_name];
} else if (field_name in cgi.get) {
val = cgi.get[field_name];
} else { val = default_val; }
return val;
}
bool set_bool(string field_name) {
bool val;
if (field_name in cgi.post
&& cgi.post[field_name] == "on") {
val = true;
} else if (field_name in cgi.get
&& cgi.get[field_name] == "on") {
val = true;
} else { val = false; }
return val;
}
cv.db_selected = set_value("selected_db", _cfg.db_sqlite_filename); // selected_db_name == db (spine.search.db or whatever)
cv.sql_match_limit = set_value("sml", "1000");
cv.sql_match_offset = set_value("smo", "0");
cv.search_text = set_value("sf", "");
cv.results_type = set_value("rt", "idx");
cv.checked_echo = set_bool("ec");
cv.checked_stats = set_bool("sts");
cv.checked_url = set_bool("url");
cv.checked_searched = set_bool("se");
cv.checked_tip = set_bool("tip");
cv.checked_sql = set_bool("sql");
tf = text_fields;
}
}
{
cgi.write(header);
cgi.write(table);
cgi.write(form);
// cgi.write(previous_next);
{ // debug environment
// foreach (k, d; environment.toAA) {
// cgi.write(k ~ ": " ~ d ~ "
");
// }
}
{ // debug cgi info:
// cgi.write("db with path: " ~ conf.db_path ~ "/" ~ cv.db_selected ~ "
\n");
// cgi.write("http url: " ~ conf.http_url ~ "
\n");
// cgi.write("query base url: " ~ conf.query_base_url ~ "
\n");
// cgi.write("db_selected: " ~ cv.db_selected ~ "
\n");
// cgi.write("search_text: " ~ cv.search_text ~ "
\n");
// cgi.write("sql_match_limit: " ~ cv.sql_match_limit ~ ";\n");
// cgi.write("sql_match_offset: " ~ cv.sql_match_offset ~ ";\n");
// cgi.write("results_type: " ~ cv.results_type ~ "
\n");
// cgi.write("cv.checked_echo: " ~ (cv.checked_echo ? "checked" : "off") ~ "; \n");
// cgi.write("cv.checked_stats: " ~ (cv.checked_stats ? "checked" : "off") ~ "; \n");
// cgi.write("cv.checked_url: " ~ (cv.checked_url ? "checked" : "off") ~ "; \n");
// cgi.write("cv.checked_searched: " ~ (cv.checked_searched ? "checked" : "off") ~ ";
\n");
// cgi.write("cv.checked_tip: " ~ (cv.checked_tip ? "checked" : "off") ~ "; \n");
// cgi.write("cv.checked_sql: " ~ (cv.checked_sql ? "checked" : "off") ~ "
\n");
}
}
auto db = Database(conf.db_path ~ "/" ~ cv.db_selected);
{
uint sql_match_offset_counter(T)(T cv) {
sql_match_offset_count += cv.sql_match_limit.to!uint;
return sql_match_offset_count;
}
void sql_search_query() {
string highlight_text_matched(string _txt, string search_field) {
string _mark_open = "┤";
string _mark_close = "├";
string _span_match = "";
string _span_close = "";
string _sf_str = search_field.strip.split("%20").join(" ").strip;
string[] _sf_arr = _sf_str.split(regex(r"\s+AND\s+|\s+OR\s+"));
auto rgx_url = regex(r"]+?>");
foreach (_sf; _sf_arr) {
auto rgx_matched_text = regex(_sf, "i");
auto rgx_marked_pair = regex(r"┤(?P
"
~ sql_select.the_body.strip.split("\n ").join(" ").split("\n").join("
")
~ "\n"
)
: "";
cgi.write(previous_next);
auto select_query_results = db.execute(sql_select.the_body).cached;
string _old_uid = "";
if (!select_query_results.empty) {
string _date_published = "0000";
string _close_para = "";
string _matched_ocn_open = "";
foreach (idx, row; select_query_results) {
if (row["uid"].as!string != _old_uid) {
_close_para = (idx == 1) ? "" : "
"; _old_uid = row["uid"].as!string; _date_published = (row["date_published"].as!string.match(regex(r"^([0-9]{4})"))) ? row["date_published"].as!string : "0000"; // used in regex that breaks if no match auto m = _date_published.match(regex(r"^([0-9]{4})")); string _date = (m.hit == "0000") ? "(year?) " : "(" ~ m.hit ~ ") "; cgi.write( _close_para ~ "
\"" ~ row["title"].as!string ~ "\"" ~ " " ~ _date ~ "[" ~ row["language_document_char"].as!string ~ "] " ~ row["creator_author_last_first"].as!string ~ " " ~ show_matched_objects(row["src_filename_base"].as!string) ~ "
" ~ "\n");
}
cgi.write("