#!/usr/bin/perl -w
# -T can only be given on the command line (taint checking)
use strict; # homework by Eric Auer...

require "query-read.pl"; # read the index into a %hash
# usage: %{$hsh{$word}} has elements $hsh{$word}->{$file}
# which are position strings like "2,5,11". We also have special
# words: "title:word" and "wordcount:"

my $oneword;
my %results;
my %stopwords = ("and",1,"und",1,"en",1);
my $adjust;

my %hsh = %{&reftoindex()};


# print "$hsh{'ping'}->{'pong'}";


=pod
foreach $oneword (sort keys %hsh) {
  print "$oneword(";
  my $entry = join(",", keys %{$hsh{$oneword}});
  print "$entry) \n";
}
=cut


$adjust = 0.0;
foreach $oneword (keys %{$hsh{"wordcount:"}}) {
  if (log($hsh{"wordcount:"}->{$oneword}) >
      $adjust) {
    $adjust = log($hsh{"wordcount:"}->{$oneword});
  }
}
print "The longest file contains " . exp($adjust) . " words\n";


do {
  %results = ();              # 0 pairs (empty list)
  print "Search for which term?\n";
  $oneword = <STDIN>;
  chomp $oneword;
  $oneword = lc($oneword);    # only lowercase terms

  if (defined $stopwords{$oneword}) {
    print "You may not search for <$oneword>,"
     .    " that could lead to a huge list of hits\n";
  } else {

    if (defined $hsh{$oneword}) {
      print "$oneword was found. Results by relevance:\n";
      foreach my $onefile (keys %{$hsh{$oneword}}) {
        # print "In file $onefile, ";
        my @posn = split(/,/,$hsh{$oneword}->{$onefile});
        # print $#posn . " of "
        #  . $hsh{"wordcount:"}->{$onefile} . " times\n";
        $results{$onefile} = $adjust + log(
          $#posn / $hsh{"wordcount:"}->{$onefile});
        # store result along with relevance (log relative
        # frequency)
      }
      foreach my $res (sort {$results{$a} <=> $results{$b}}
                       (keys(%results)) ) {
        printf "%-20s (%3.3f)\n", $res, $results{$res};
      }
    } else {
      if ($oneword) {
        print "$oneword was not found in any indexed file\n";
      }
    }

  }
  print "\n";
} while ($oneword);


print "\n";

