#!/usr/bin/perl
require 5;
use strict;

=head1 copyright

Fluid Dynamics Search Engine

Copyright 1997-2001 by Zoltan Milosevic.  Please adhere to the copyright
notice and conditions of use, described in the attached help file and hosted
at the URL below.  For the latest version and help files, visit:

	http://www.xav.com/scripts/search/

This search engine is managed from the web, and it comes with a password to
keep it secure.  You can set the password when you first visit this script
using the special "Mode=Admin" query string - for example:

	http://my.host.com/search.pl?Mode=Admin

If you edit the source code, you'll find it useful to restore the function comments and #&Assert checks:

	cd "search/searchmods/powerusr/"
	hacksubs.pl build_map
	hacksubs.pl restore_comments
	hacksubs.pl assert_on

=cut

use vars qw( $VERSION %FORM $realms %const %Rules $global_lockfile_count @lang_strings );

$VERSION = '2.0.0.0037';

my $all_code = <<'END_OF_FILE';
local $_;
$global_lockfile_count = 1;

$ENV{'SCRIPT_NAME'} = '' unless $ENV{'SCRIPT_NAME'};#avoid uninit errs

%const = (
	'help_file'      => 'http://www.xav.com/scripts/search/admin_help.html',
	'script_start_time' => time(),

	'script_name'    => $ENV{'SCRIPT_NAME'},
	'admin_url'      => $ENV{'SCRIPT_NAME'} . '?Mode=Admin',
	'search_url'     => $ENV{'SCRIPT_NAME'},
	'form_password'  => '',

	'request_method' => 'POST',
# warning - if you change 'request_method' to GET, most admin actions will fail because browsers don't xfer more than 2000 bytes per GET request

	'log_file'       => 'search.log.txt',
	'pending_file'   => 'search.pending.txt',
	'file_mask'      => 0766,
	);

my $err_msg = '';
Err: {

	# Give the folder where all data files are located:
	$err_msg = &load_files( 'searchdata' );
	next Err if ($err_msg);


	my $address_offer = '';

	#reverse compat - 0035
	my $terms = $FORM{'Terms'} || $FORM{'terms'} || $FORM{'q'} || '';
	$FORM{'Terms'} = $FORM{'terms'} = $FORM{'q'} = $terms;
	#/reverse compat

	AddressAsTerm: {
		last unless ($Rules{'handling url search terms'} > 1);
		last if ($terms =~ m!\s!);
		my $address = '';
		if ($terms =~ m!^(http|ftp|https|telnet)://(\w+)\.(\w+)(.*)$!) {
			$address = $terms;
			}
		elsif ($terms =~ m!^www\.(\w+)\.(\w+)(.*)$!i) {
			$address = "http://$terms";
			}
		if ($address) {
			$address_offer = sprintf( $lang_strings[3], &html_encode($address), &html_encode($address) );
			if ($Rules{'handling url search terms'} == 3) {
				print "Status: 302 Moved Temporarily\015\012";
				print "Location: $address\015\012";
				print "Content-Type: text/html\015\012\015\012";
				print $address_offer;
				last Err;
				}
			}
		}



	if ($FORM{'NextLink'}) {
		#changed 0034 - fixes bug where NextLink contains &
		if (($ENV{'QUERY_STRING'}) and ($ENV{'QUERY_STRING'} =~ m!^NextLink=(.*)$!)) {
			$FORM{'NextLink'} = $1;
			}
		my $html_link = &html_encode($FORM{'NextLink'});
		# security re-director from admin screen (prevents query-string-based
		# password from showing up in referer logs of remote systems:
		print "Content-Type: text/html\015\012\015\012";
		print "<META HTTP-EQUIV=\"refresh\" CONTENT=\"0;URL=$html_link\"></HEAD><A HREF=\"$html_link\">$html_link</A>";
		last Err;
		}

	if ($FORM{'Mode'} eq 'Admin') {
		$err_msg = &admin_main();
		next Err if ($err_msg);
		last Err;
		}

	# improve perceived snappiness
	$| = 1;
	print "Content-Type: text/html\015\012\015\012";
	&PrintTemplate( 0, 'header.htm', $Rules{'language'} );
	$| = 0;

	if ($FORM{'Mode'} eq 'AnonAdd') {
		&anonadd_main();
		}
	elsif (not ($terms)) {
		print &str_search_form( $const{'search_url'} );
		&PrintTemplate( 0, 'tips.htm', $Rules{'language'}, \%const );
		}
	else {

		if ($address_offer) {
			&pprintf( $address_offer );
			}

			my $Rank = $FORM{'Rank'} || 1;

			my ($bTermsExist, $Ignored_Terms, $Important_Terms, $DocSearch, $RealmSearch, $where_clause, @SearchTerms) = &parse_search_terms($terms, $FORM{'Match'});

			my $Realm = $FORM{'Realm'} || 'All';

			my $NumPagesSearched = 0;
			my @HITS = ();

			Search: {
				next Search unless ($bTermsExist);

				my $p_realm_data = ();

				if ($Rules{'sql: enable'}) {
					if ($Realm ne 'All') {
						($err_msg, $p_realm_data) = &hashref($realms, $Realm );
						next Err if ($err_msg);
						$where_clause .= " AND realm_id = $$p_realm_data{'realm_id'}";
						$NumPagesSearched = $$p_realm_data{'pagecount'};
						}
					else {
						foreach $p_realm_data (&listrealms($realms, 'all')) {
							$NumPagesSearched += $$p_realm_data{'pagecount'};
							}
						}
					my $foo = 0;
					&SearchDatabase($where_clause, $DocSearch, \$foo, \@HITS);
					}
				else {
					if ($Realm ne 'All') {
						($err_msg, $p_realm_data) = &hashref($realms, $Realm );
						next Err if ($err_msg);
						if ($$p_realm_data{'is_runtime'}) {
							&SearchRunTime($Realm, $DocSearch, \$NumPagesSearched, \@HITS);
							}
						else {
							&SearchIndexFile($$p_realm_data{'file'}, $RealmSearch, \$NumPagesSearched, \@HITS);
							}
						}
					else {
						foreach $p_realm_data (&listrealms($realms,'has_file')) {
							&SearchIndexFile($$p_realm_data{'file'}, $RealmSearch, \$NumPagesSearched, \@HITS);
							}
						foreach $p_realm_data (&listrealms($realms,'is_runtime')) {
							&SearchRunTime($$p_realm_data{'name'}, $DocSearch, \$NumPagesSearched, \@HITS);
							}
						}
					}
				}

			my ($HitCount, $PerPage, $Next) = (scalar @HITS, $Rules{'hits per page'}, 0);

			if (($FORM{'maxhits'} =~ m!^(\d+)$!) and ($FORM{'maxhits'} > 0)) {
				$PerPage = $1;
				}

			my $Remaining = $HitCount - $Rank - $PerPage + 1;
			my $RangeUpper = $Rank + $PerPage - 1;


			if ($Remaining >= $PerPage) {
				$Next = $PerPage;
				}
			elsif ($Remaining > 0) {
				$Next = $Remaining;
				}
			else {
				$RangeUpper = $HitCount;
				}
			my $p_Ads = &SelectAdEx( \@SearchTerms );
			print $$p_Ads[0];

			print '<B>' . $lang_strings[10] . '</B><BR>';

			if ($Ignored_Terms) {
				printf( $lang_strings[11], &html_encode($Ignored_Terms));
				}

			if ($HitCount) {
				printf( $lang_strings[12], &html_encode($Important_Terms), $NumPagesSearched);
				}
			else {
				printf( $lang_strings[13], &html_encode($Important_Terms), $NumPagesSearched);
				}

			print '<BR>';
			print $$p_Ads[1];

			PrintHits: {
				if ($HitCount < 1) {
					print "<P>$lang_strings[19]</P>\n";
					last PrintHits;
					}

				printf( $lang_strings[14], $Rank, $RangeUpper, $HitCount );

				my $i = $Rank;
				foreach ((reverse sort @HITS)[($Rank-1)..($RangeUpper-1)]) {
					next unless (m!^(\d+)\.(\d+)\s*\d*\s*\d* u= (.+) t= (.*?) d= (.*?) c= (.*?)$!);
					my ($relevance, $URL, $Title, $Description, $context) = ($1, $3, $4, $5, $6, $7, $8);
					my ($DD, $MM, $YYYY, $FBYTES) = (unpack('A2A2A2A4A*', $2))[1..4];
					print &StandardVersion(
						\@SearchTerms,
						'redirector' => $Rules{'redirector'},
						'rank' => $i,
						'url' => $URL,
						'title' => $Title,
						'description' => $Description,
						'size' => $FBYTES,
						'dd' => $DD,
						'mm' => $MM,
						'yyyy' => $YYYY,
						'context' => $context,
						);
					$i++;
					}

				my ($url_realm, $url_terms) = (&url_encode($FORM{'Realm'}), &url_encode($terms));
				my $linkhits = "$const{'search_url'}?Realm=$url_realm&Match=$FORM{'Match'}&Terms=$url_terms";

				#changed 0035 - optimize against loading common_parse_page.pl
				unless (&realm_count($realms,'is_runtime')) {
					$linkhits .= '&nocpp=1';
					}

				print &str_jumptext( $Rank, $PerPage, $HitCount, "$linkhits&Rank=", 1 );
				}
			print $$p_Ads[2];
			print &str_search_form( $const{'search_url'} );
			print $$p_Ads[3];
			&log_search( $Realm, $terms, $Rank, $HitCount, $NumPagesSearched );
			}

	if (($Rules{'allowanonadd'}) and (&realm_count($realms,'has_no_base_url'))) {
		&PrintTemplate( 0, 'linkline2.txt', $Rules{'language'}, \%const );
		}
	else {
		&PrintTemplate( 0, 'linkline1.txt', $Rules{'language'}, \%const );
		}
	&PrintTemplate( 0, 'footer.htm', $Rules{'language'} );
	last Err;
	}
continue {
	print "Content-Type: text/html\015\012\015\012";
	print "<P><B>Error:</B> $err_msg.</P>\n";
	}





sub load_files {
	my ($data_files_dir) = @_;

	my $err_msg = '';
	Err: {
		local $_;

		# This manually sets the current working directory to the directory that
		# contains this script. This is necessary in case people have used a
		# relative path to the $data_files_dir:

		if ($0 =~ m!^(.*)(\\|/)!) {
			chdir($1);
			push(@INC, "$1/searchmods", './searchmods', '../searchmods');
			}
		else {
			push(@INC, './searchmods', '../searchmods');
			}

		unless (-e 'searchmods') {
			$err_msg = "directory 'searchmods' does not exist";
			next Err;
			}

		require 'common.pl';
		&ReadInput(\%FORM);

		require 'common_parse_page.pl' unless ($FORM{'nocpp'});

		if ($FORM{'ApproveRealm'}) {
			$FORM{'Realm'} = $FORM{'ApproveRealm'};
			$FORM{'Mode'} = 'Admin';
			$FORM{'Action'} = 'FilterRules';
			$FORM{'subaction'} = 'ShowPending';
			}
		unless ($FORM{'Mode'}) {
			#reverse compat - pre-0010
			if ($FORM{'AddSite'}) {
				$FORM{'Mode'} = 'AnonAdd';
				$FORM{'URL'} = $FORM{'AddSite'};
				delete $FORM{'AddSite'};
				}
			#/reverse compat
			}
		#reverse compat 0030
		if (($FORM{'Action'}) and ($FORM{'Action'} eq 'ReCrawlRealm')) {
			$FORM{'Action'} = 'rebuild';
			}
		#/reverse compat

		my $is_admin_rq = (($FORM{'Mode'}) and (($FORM{'Mode'} eq 'Admin') or ($FORM{'Mode'} eq 'AnonAdd'))) ? 1 : 0;
		$is_admin_rq = 1 if ($ENV{'FDSE_NO_EXEC'});

		if ($is_admin_rq) {
			require 'common_admin.pl';
			require 'common_parse_page.pl';
			}

		unless (chdir($data_files_dir)) {
			$err_msg = "unable to chdir to data files directory '$data_files_dir' - $!";
			next Err;
			}
		$const{'bypass_file_locking'} = (-e 'bypass_file_locking.txt') ? 1 : 0;

		# Can we load the rules?

		($err_msg, %Rules) = &LoadRules();
		next Err if ($err_msg);

		my $lang_strings_file = 'templates/' . $Rules{'language'} . '/strings.txt';

		my ($obj, $p_rhandle) = ();
		$obj = &LockFile_new();

		($err_msg, $p_rhandle) = &Read($obj, $lang_strings_file );
		next Err if ($err_msg);

		@lang_strings = (0);
		my $i = 1;
		while (defined($_ = readline($$p_rhandle))) {
			s!(\r|\n|\015|\012)!!g;
			push(@lang_strings,$_);
			unless ($is_admin_rq) { last if ($i > 69); }
			$i++;
			}
		$err_msg = $obj->Close();
		next Err if ($err_msg);

		unless (&Trim($lang_strings[1]) eq "VERSION $VERSION") {
			$err_msg = "strings file is not version $VERSION ($lang_strings[1])";
			next Err;
			}

		$realms = &fdse_realms_new();
		$realms->use_database( $Rules{'sql: enable'} );
		$realms->load();

		$const{'is_demo'} = 1 if (-e 'is_demo');
		last Err;
		}
	return $err_msg;
	}

END_OF_FILE

undef($@);
eval $all_code;
if ($@) {
	my $errstr = $@;
	print "Content-Type: text/html\015\012\015\012";
	print "<HR><P><B>Perl Execution Error</B> in $0:</P><BLOCKQUOTE><XMP>$@</XMP></BLOCKQUOTE>";
	$errstr =~ s!\"!\&quot;!g;
	$errstr =~ s!\<!\&lt;!g;
	$errstr =~ s!\>!\&gt;!g;
print <<"EOM";

<FORM METHOD="post" ACTION="http://www.xav.com/bug.pl">
<INPUT TYPE=hidden NAME="product" VALUE="search">
<INPUT TYPE=hidden NAME="version" VALUE="$VERSION">
<INPUT TYPE=hidden NAME="Perl Version" VALUE="$]">
<INPUT TYPE=hidden NAME="Script Path" VALUE="$0">
<INPUT TYPE=hidden NAME="Perl Error" VALUE="$errstr">
EOM

my ($name, $value) = ();
while (($name, $value) = each %FORM) {
	print "<INPUT TYPE=hidden NAME=\"Form: $name\" VALUE=\"$value\">\n";
	}
print <<"EOM";

<P>Please report this error to the script author:</P>
<BLOCKQUOTE><INPUT TYPE="submit" VALUE="Report Error"></BLOCKQUOTE>
</FORM><HR>

EOM

	}
1;
