#!/usr/bin/perl -w
# -T can only be given on the command line (taint checking)
use strict; # homework by Eric Auer...

# I use "X" as the special char
my $a = "aX*X*X*b"; # should become "aX*b"...
my $b = "aX*XXX*b";  # should become "aXXX*b"...

# possible occurances: X*, X+, X?, but also plain X
# idea: X* or X+ -> max is infinite
#       X or X?  -> increase max by one
#       X+ or X  -> increase min by one
# I am not using "while ($x =~ s/#//)" because that
# would be even more confusing ;-)

sub regexptune {
  my $a = $_[0]; # copy arg (argh! using @_ would trigger
                 # converting to num-of-args. Big yuck!
  my $min = 0;
  my $max = 0;
  my $out = "";
  my $i;
  my @aa = split(//,$a); # string to array (not using substr)
  for ($i=0; $i < length($a) ; ) {
    my $ch = $aa[$i];
    if ($ch ne "X") { # no longer busy with this part

#      print STDERR "char $ch, min $min, max $max\n";
      $out .= "X" x $min; # force the minimum number
      $max -= $min; # how many more are allowed?
      if ($max < 0) { # we have something with * in it
        $out .= "X*"; # add the infinity
      } else {
        $out .= "X?" x $max; # allow some more...
      }
      $out .= $ch; # add the current char, of course!
      $min = 0; # reset...
      $max = 0; # ...the count
      $i++; # we made it :-)

    } else { # start of an optimizeable area

      $i++;
      if ($i == length($a)) {
        if ($aa[$i] eq "X") {
          $min++; $max = ($max < 0) ? $max : ($max+1);
          # do not ignore X if it is the last char...
        }
        last;
      } # we fell off the loop
      $ch = $aa[$i]; # check the next: possibly * + or ?
#      print STDERR "X+char $ch, min $min, max $max\n";

      if ($ch eq "?") { $max = ($max < 0) ? $max : ($max+1); }
         # $min++ here was wrong
      elsif ($ch eq "*") { $max = -1; } # -1 is for infinity
      elsif ($ch eq "+") { $max = -1; $min++; }
      else { $min++; $max = ($max < 0) ? $max : ($max+1); $i--; }
      # the last case decrements $i because we started by assuming
      # to have a XY type two-char subexpression, but the else
      # means that we have found a single "X"...
      $i++; # we are done with another part :-)

    }
  }

#  print STDERR "finalizing: min $min, max $max\n";
  # work out remaining counter stuff...
  $out .= "X" x $min; # force the minimum number
  $max -= $min; # how many more are allowed?
  if ($max < 0) { # we have something with * in it
    $out .= "X*"; # add the infinity
  } else {
    $out .= "X?" x $max; # allow some more...
  }

  return $out; # return our optimized-to-the-max string!
}


print "$a optimizes to " . &regexptune($a) . "\n";
print "$b optimizes to " . &regexptune($b) . "\n";

