#!/usr/bin/perl # # January 11, 1999: Added a README, and some minor changes. # February 14, 1998: Uses "templates", and handles previews for different # file formats (html, pdf, txt) # November 17, 1997: Better abstractions. # November 11, 1997: rewrote "head" bits in perl. Uses GET instead of POST. # Spring (?), 1997: First release. Simplistic interface to Swish. # The user should only need to modify this line: # $template_file = "/data/www/jswishi/example.html"; $template_file = "/home/zbohy/zatma.zbohy/English/search/search.html"; use CGI qw(:standard); ## BEGINING of template processing: # Read the template. $template = ""; if (!open(INPUT, "< $template_file")) { print STDERR "Can't open $template_file as input.\n"; exit(0); } else { while () { $template .= $_; } close (INPUT); } # Parse the template for categories. while ($template =~ m/)//i; push @category, $8; push @category_type, $2; push @category_file, $6; push @category_total, 0; $_ = $4; if (m/UNSELECTED/i) { push @category_selection, 0; # false (0) == UNSELECTED } else { push @category_selection, 1; # true (1) == SELECTED } } # Parse the template for replacement rules. while ($template =~ m/)//i; push @replace, $2; push @replacement, $4; } # Parse the template for page size (number of results per page) $template =~ s/()//i; $page_size = $2; # Parse the template for the maximum number of matches to find. $template =~ s/()//i; $maximum_matches = $2; # Parse the template for preview read size (how big a file chunk to read for preview) if ($template =~ m//i) { $template =~ s/()//i; $preview_read_size = $2; } else { $preview_read_size = 2000; } # Parse the template for preview size (how long the preview should be) if ($template =~ m//i) { $template =~ s/()//i; $preview_size = $2; } else { $preview_size = 320; } # Parse the template for the path to this script. $template =~ s/()//i; $searchcgi = $2; # Parse the template for the swish command. $template =~ s/()//i; $swish = $2; # The template is divided into several sections described in the README; # the next regular expressions splice the template into these sections. $template =~ s/.*?//s; $document_head = $&; $template =~ s/.*?//s; $document_tail = $&; $template =~ s/.*?//s; $search_summary_head = $&; $template =~ s/.*?//s; $search_summary_tail = $&; $template =~ s/.*?//s; $search_summary_body = $&; $template =~ s/.*?//s; $search_results_head = $&; $template =~ s/.*?//s; $search_results_head2 = $&; $template =~ s/.*?//s; $search_results_html = $&; $template =~ s/.*?//s; $search_results_pdf = $&; $template =~ s/.*?//s; $search_results_calendar = $&; $template =~ s/.*?//s; $search_results_tail = $&; $template =~ s/.*?//s; $search_box_head = $&; $template =~ s/.*?//s; $search_box_tail = $&; $template =~ s/.*?//s; $search_box_body_A = $&; $template =~ s/.*?//s; $search_box_body_B = $&; ## END of template processing. print header; print $document_head; ## BEGINING of search results processing if (param()) { $i=0; @scope = param("scope"); foreach $category_entry (@category) { $category_selection[$i] = 0; # unselected foreach $scope_entry (@scope) { if ($category_entry eq $scope_entry) { $category_selection[$i] = 1; # selected } } $i++; } if (param("start")) { $start = param("start"); $end = param("end"); } else { $start = 1; $end = $page_size; } $words = param("words"); $words =~ s/\W/ /g; if (@category) { my ($file, $command, $return_value, $results_complete, $results_partial); print $search_summary_head; $i=0; foreach $category_entry (@category) { if ($category_selection[$i] == 1) { $results_partial = ""; # "swish-e" defines the normal swish search. if ($category_type[$i] =~ m/swish\-e/i) { $file = "-f " . $category_file[$i]; $file =~ s/\,/ /g; # commas -> spaces $command = "$swish -m $maximum_matches $file -w \"" . $words . "\""; # DEBUGGING:: # print $command; $return_value = `$command`; $results_partial .= &swish_parse($return_value, $start, $end); $category_total[$i] = $total; } # "calendar" defines the calendar backend, a mysql example. if ($category_type[$i] =~ m/calendar/i) { $results_partial .= &process_calendar($category_file[$i], $words, $start, $end); $category_total[$i] = $total; } $results_partial .= $search_results_tail; $results_partial_heading = $search_results_head; if ($start > 1) { $newstart = $start - $page_size; $newend = $newstart + $page_size - 1; $previous_page = "Previous Page"; $results_partial_heading =~ s/Previous Page/$previous_page/; } if ($end < $category_total[$i]) { $newstart = $start + $page_size; $newend = $newstart + $page_size - 1; $next_page = "Next Page"; $results_partial_heading =~ s/Next Page/$next_page/; } $results_partial_heading =~ s/Entry Title/$category_entry/g; if ($category_total[$i] != 0) { $results_complete = $results_complete . $results_partial_heading . $results_partial; } } $i++; } $i=0; foreach $category_entry (@category) { if ($category_selection[$i] == 1) { $results_search_summary = $search_summary_body; if ($category_total[$i] == 0) { $results_search_summary =~ s/Entry Title/$category_entry/g; $results_search_summary =~ s/Entry Range//; } else { $results_search_summary =~ s/Entry Title/$category_entry<\/a>/g; if ($start <= $category_total[$i]) { if ($end > $category_total[$i]) { $results_search_summary =~ s/Entry Range/$start - $category_total[$i]/; } else { $results_search_summary =~ s/Entry Range/$start - $end/; } } } $results_search_summary =~ s/Entry Total/$category_total[$i] Total/; print $results_search_summary; } $i++; } print $search_summary_tail; print $results_complete; } } # Print the search box. $search_box_head =~ s/DEFAULT\_VALUE/$words/; print $search_box_head; $i=0; foreach $category_entry (@category) { if ($i==0) { $search_box = $search_box_body_A; } else { $search_box = $search_box_body_B; } if ($category_selection[$i] == 0) { $search_box =~ s/CHECKED//i; } $search_box =~ s/Entry Title/$category_entry/g; print $search_box; $i++; } print $search_box_tail; print $document_tail; exit(0); ### END OF MAIN PROCEDURE ### Use the replacement rules to manipulate the links. sub link_replacement { my($link, $replace_this, $with_this, $i); $link = $_[0]; $i=0; foreach $replace_this (@replace) { $with_this = $replacement[$i]; $link =~ s/$replace_this/$with_this/i; $i++; } return($link); } ### Create a result entry for an html or text file. sub process_html { my($link, $title, $size, $score, $preview, $rvalue, $number); $link = $_[0]; $title = $_[1]; $size = $_[2]; $score = $_[3]; $number = $_[4]; $preview = &head($link); # Kill html tags. Change these expressions to your preference. # 1) Look for tag and, if found, # delete everything except the inside text $preview =~ s|\n| |gsx; $preview =~ s|<\!\-\- page description\s*(.*?)\s*end page description \-\->.*?$|$1|gsi; # 2) Remove all info until an "" tag is found $preview =~ s|<\!\-\- begin document \-\->|8763023|g; $preview =~ s|.*?8763023||gsi; # 3) Remove standard tags $preview =~ s|||gsi; $preview =~ s|||gsi; $preview =~ s|.*?||gsi; $preview =~ s|

.*?

||gsi; $preview =~ s|

.*?

||gsi; $preview =~ s|

.*?