#!/usr/bin/perl require 5; use strict; =item header Fluid Dynamics Search Engine, Version 2.x Copyright 1997-2000 by Zoltan Milosevic. Please adhere to the copyright notice and conditions of use, described in the attached help file and hosted at the URL below. For the latest version and help files, visit: http://www.xav.com/scripts/search/ This search engine is managed from the web, and it comes with a password to keep it secure. You can set the password when you first visit this script using the special "Mode=Admin" query string - for example: http://my.host.com/search.pl?Mode=Admin =cut my $all_code = <<'END_OF_FILE'; use vars qw( $VERSION $realms %const %FORM %Rules @MonthNames @SearchTerms ); $VERSION = '2.0.0.0009'; # Give location relative to this script: my $DataFilesDir = 'searchdata'; %const = ( 'help_file' => 'http://www.xav.com/scripts/search/admin_help.html', 'script_start_time' => time, 'script_name' => $ENV{'SCRIPT_NAME'}, 'admin_url' => $ENV{'SCRIPT_NAME'} . '?Mode=Admin', 'form_password' => '', 'request_method' => 'POST', 'log_file' => 'search.log.txt', 'temp_file' => 'search.temp.txt', 'back_file' => 'search.back.txt', 'pending_file' => 'search.pending.txt', 'realm_file' => 'search.realms.txt', 'file_mask' => 0766, ); @MonthNames = ('Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'); my $No_Documents_Found = <<"EOM";
Results: No documents were found.
EOM my ($is_error, $error_message, $Header, $Footer); MainBlock: { ($is_error, $error_message, $Header, $Footer) = load_files($DataFilesDir); if ($is_error) { print "Content-Type: text/html\015\012\015\012"; print "

$error_message

"; last MainBlock; } %FORM = ReadInput(); if ($FORM{'NextLink'}) { # security re-director from admin screen (prevents query-string-based # password from showing up in referer logs of remote systems: print "Content-Type: text/html\015\012\015\012"; print <<"EOM"; $FORM{'NextLink'} EOM } elsif ($FORM{'Mode'} eq 'AnonAdd') { print "Content-Type: text/html\015\012\015\012"; print $Header; if (defined($FORM{'Realm'}) and defined($FORM{'URL'})) { &AddURL(0, 1, $FORM{'Realm'}, $FORM{'URL'}); } &PrintAddRemoteSiteForm('Add Your Own Website', '', $FORM{'Realm'}, 1); &PrintFooter($Footer, $Rules{'allowanonadd'}); } elsif (($FORM{'Mode'} ne 'Admin') and (not $FORM{'Terms'})) { print "Content-Type: text/html\015\012\015\012"; print $Header; &SearchForm; my ($ErrorMsg, $FileText) = ReadFile("templates/tips.htm"); if ($ErrorMsg) { print "

$ErrorMsg

"; } else { print $FileText; } &PrintFooter($Footer, $Rules{'allowanonadd'}); } elsif ($FORM{'Mode'} ne 'Admin') { print "Content-Type: text/html\015\012\015\012"; # Anonymous search engine code: # Idea: add all non-forbidden terms as a string and add that as a phrase # with highest priority. Multiply hit relevance by length of string to # give longer search terms and phrases more weight. # This controls the display, so these extra terms aren't shown to the user: my $Rank = $FORM{'Rank'} ? $FORM{'Rank'} : 1; my ($bTermsExist, $Ignored_Terms, $Important_Terms, $DocSearch, $RealmSearch, @search_terms) = &parse_search_terms($FORM{'Terms'}, $FORM{'Match'}); @SearchTerms = @search_terms; my $Realm = $FORM{'Realm'} ? $FORM{'Realm'} : 'All'; my $NumPagesSearched = 0; my @HITS = (); if ($bTermsExist) { # Search terms have been formatted. Now search the database(s): # each sub populates @HITS as needed. # If Realm is specific, search it - otherwise search all: if ($Realm ne 'All') { my ($type, $file) = $realms->lookup($Realm, 'type', 'file'); if ($type == 1) { ($NumPagesSearched, @HITS) = SearchRunTime($Realm, $DocSearch); } else { ($NumPagesSearched, @HITS) = SearchIndexFile($file, $RealmSearch); } } else { my $RH; my ($pages_searched, @hits) = (0); foreach $RH ($realms->list('has_file')) { ($pages_searched, @hits) = SearchIndexFile($$RH{'file'}, $RealmSearch); push(@HITS, @hits); $NumPagesSearched += $pages_searched; } foreach $RH ($realms->list('is_runtime')) { ($pages_searched, @hits) = SearchRunTime($$RH{'name'}, $DocSearch); push(@HITS, @hits); $NumPagesSearched += $pages_searched; } } } else { print ""; } my ($HitCount, $PerPage, $Next, $summary) = (scalar @HITS, $Rules{'hits per page'}); if (($FORM{'maxhits'} =~ m!^(\d+)$!) and ($FORM{'maxhits'} > 0)) { $PerPage = $1; } my $Remaining = $HitCount - $Rank - $PerPage + 1; my $RangeUpper = $Rank + $PerPage - 1; if ($Remaining >= $PerPage) { $Next = $PerPage; } elsif ($Remaining > 0) { $Next = $Remaining; } else { $RangeUpper = $HitCount; } if ($Ignored_Terms) { $summary = "
Ignored: $Ignored_Terms.
"; } else { $summary = "
\n"; } $summary .= "Your search for $Important_Terms found the following documents (of $NumPagesSearched documents searched):
Displaying documents $Rank-$RangeUpper of $HitCount, with best matches first.

"; print $Header; print SelectAd(1, @SearchTerms); print "Search Results$summary"; print SelectAd(2, @SearchTerms); PrintHits: { if ($HitCount < 1) { print $No_Documents_Found; last PrintHits; } print "
\n"; my $i = $Rank; foreach ((reverse sort @HITS)[($Rank-1)..($RangeUpper-1)]) { next unless (m!^(\d+)\.(\d+) u= (.+) t= (.*?) d= (.*?) $!); my ($relevance, $URL, $Title, $Description) = ($1, $3, $4, $5); my ($DD, $MM, $YYYY, $FBYTES) = (unpack('A2A2A2A4A*', $2))[1..4]; print StandardVersion($i, $URL, $Title, $Description, $FBYTES, $DD, $MonthNames[$MM], $YYYY); $i++; } print "Documents $Rank-$RangeUpper of $HitCount displayed.
\n"; print "
\n"; last PrintHits if ($Remaining < 1); my ($url_realm, $url_terms) = (url_encode($FORM{'Realm'}), url_encode($FORM{'Terms'})); print "

 Next "; ($Next == 1) ? print 'Match' : print "$Next Matches"; print ' 

'; } print SelectAd(3, @SearchTerms); &SearchForm; print SelectAd(4, @SearchTerms); &PrintFooter($Footer, $Rules{'allowanonadd'}); log_search($FORM{'Terms'}, $HitCount); } elsif ($FORM{'Mode'} eq 'Admin') { my ($is_auth, $form_password, $url_password) = Authenticate($Rules{'password'}); last MainBlock unless ($is_auth); $const{'form_password'} = $form_password; $const{'admin_url'} .= $url_password; print "Content-Type: text/html\015\012\015\012"; print $Header; # is the $DataFilesDir writable? unless ((-R '.') and (-W '.') and (-d '.')) { print <<"EOM";

Data Folder Required

This script requires a writable folder named "$DataFilesDir".

A folder exists with that name, but it isn't readable and writable.

Give this folder RWX permissions for Everyone. Your ISP can usually assist with this.

Need help? Visit $const{'help_file'}.

EOM last MainBlock; } if (not $FORM{'Action'}) { &HTML_UI(); } elsif ($FORM{'Action'} =~ m!^Add\s?URL$!) { # allow for single URL, this will need to be cleaned up. my @addresses_to_index = (); if (defined $FORM{'URL'}) { push(@addresses_to_index, $FORM{'URL'}); } else { foreach (keys %FORM) { next unless (m!^AddLink!); push(@addresses_to_index, $FORM{$_}); } } if (($FORM{'EntireSite'}) and ('1' eq $FORM{'EntireSite'})) { $FORM{'StartTime'} = $const{'script_start_time'} - 15; my $LimitSite = $FORM{'URL'}; # turns http://io.com to http://io.com/ $LimitSite .= '/' if ($LimitSite =~ m!^http://([^\/]+)$!i); # turns http://www.io.com/~bob to http://www.io.com/~bob/ $LimitSite .= '/' if ($LimitSite =~ m!/([^\/\.]+)$!i); # turns http://io.com/index.html to http://io.com/ $LimitSite = $1 if ($LimitSite =~ m!^(.*?)(\w+)\.(\w+)$!); $FORM{'Action'} = 'CrawlEntireSite'; $FORM{'LimitSite'} = $LimitSite; } &AddURL(0, 0, $FORM{'Realm'}, @addresses_to_index); } elsif ($FORM{'Action'} eq 'Build') { my $StartFile = 0; if (($FORM{'StartFile'}) and ($FORM{'StartFile'} =~ m!^\d+$!)) { $StartFile = $FORM{'StartFile'}; } &BuildIndex($FORM{'Realm'}, $StartFile); } elsif ($FORM{'Action'} eq 'Review') { &ReviewIndex($FORM{'Realm'}); } elsif ($FORM{'Action'} eq 'ReCrawlRealm') { unless ($FORM{'StartTime'}) { $FORM{'StartTime'} = $const{'script_start_time'} - 5; } &ReCrawlRealm($FORM{'Realm'}); } elsif ($FORM{'Action'} eq 'CrawlEntireSite') { &CrawlEntireSite($FORM{'Realm'}); } elsif ($FORM{'Action'} eq 'MaintainRealm') { &MaintainRealm($FORM{'Realm'}); } elsif ($FORM{'Action'} eq 'ViewLog') { &ViewLog($const{'log_file'}); } elsif ($FORM{'Action'} eq 'Edit') { &PrintEditRecordForm($FORM{'Realm'}, $FORM{'URL'}); } elsif ($FORM{'Action'} eq 'SaveEditedRecord') { &SaveEditedRecord(); } elsif ($FORM{'Action'} eq 'DeleteRecord') { &DeleteRecord(); } elsif ($FORM{'Action'} eq 'forceUnLock') { &forceUnLock(); } elsif ($FORM{'Action'} eq 'CreateRealmForm') { &CreateRealmForm($FORM{'Realm'}); } elsif ($FORM{'Action'} eq 'CreateRealm') { &CreateRealm($FORM{'Realm'}, $FORM{'File'}, $FORM{'BaseDir'}, $FORM{'BaseURL'}); } elsif ($FORM{'Action'} eq 'DeleteRealm') { &DeleteRealm($FORM{'Realm'}); } elsif ($FORM{'Action'} eq 'AdPage') { &AdPage($const{'request_method'}, $const{'script_name'}, $const{'form_password'}, %FORM); } elsif ($FORM{'Action'} eq 'AddForbidSite') { my ($is_error, $error_message) = WriteRule('settings.txt', 'forbidsites', "$Rules{'forbidsites'} $FORM{'URL'}"); if ($is_error) { print "

Error: could not save Forbid Sites array. Function WriteRule returned: $error_message

"; } else { print "

URL '$FORM{'URL'}' is now forbidden.

"; } } else { &HTML_UI(); } &Admin_HTML_Footer(); } else { print '

No action path followed; unhandled error

'; } } =item load_files Usage: my ($is_error, $error_message, $header, $footer, %Rules) = load_files($data_files_dir); This function attempts to load all the script-specific data from files; specifically, it: changes directory to $data_files_dir parses settings.txt and puts the contents into the %Rules hash. parses templates/header.htm and stores contents in $header parses templates/footer.htm and stores contents in $footer requires modules "fdse_common.pl" and "search_ads.pl" Failures with any of these actions are considered fatal errors, and the return values are set appropriately. =cut sub load_files { my ($data_files_dir) = @_; my ($is_error, $error_message, $header, $footer) = (0, '', '', ''); FileFolderCheck: { # This manually sets the current working directory to the directory that # contains this script. This is necessary in case people have used a # relative path to the $DataFilesDir: if ($0 =~ m!^(.*)(\\|/)!) { chdir($1); push(@INC, "$1/searchmods"); } foreach ('common.pl', 'fdse_realms.pl', 'search_ads.pl') { eval "require qq{searchmods/$_}"; if ($@) { $error_message = "Failed to require '$_' - $@."; next FileFolderCheck; } } # Next we chdir() into the $DataFilesDir. All lookups on data files will # use their relative names so we have to be able to access that directory. # If chdir fails then this script will fail right away. unless (chdir($data_files_dir)) { $error_message = <<"EOM";

Error - Data Folder Required

This script requires a writable folder named "$DataFilesDir". Received error "$!" when trying to cd to that folder.

Need help? Visit $const{'help_file'}.

EOM next; } # Can we load the rules? ($error_message, %Rules) = LoadRules('settings.txt'); next if ($error_message); ($error_message, $header) = ReadFile("templates/header.htm"); next if ($error_message); ($error_message, $footer) = ReadFile("templates/footer.htm"); next if ($error_message); $realms = new fdse_realms; $realms->load_from_file('search.realms.txt'); last; } continue { $is_error = 1; } return ($is_error, $error_message, $header, $footer); } END_OF_FILE undef($@); eval $all_code; if ($@) { print "Content-Type: text/html\015\012\015\012"; print "

Error in $0:

$@"; }