# hepburn7.pl
# Attempts to rearrange order of rules until it finds a consistent one

$number_of_trials = 100;

$input_file = "Japanese-ToConvert.txt";
open (INFILE, $input_file) or die "Warning! Can't open input file: $!\n";
$check_file = "Japanese-Converted.txt";
open (CHECKFILE, $check_file) or die "Warning! Can't open check file: $!\n";

$rules_file = "JapaneseRules.txt";
open (RULESFILE, $rules_file) or die "Warning! Can't open rule file: $!\n";

# Read in the file and store each line in the rules array of arrays
while ($line = <RULESFILE>) {
    chomp($line);
    ($kunrei, $hepburn) = split("\t", $line);
    # Now, place this pair onto the end of the @rules array
    push(@rules, [ $kunrei, $hepburn ]);
}

# Now read in the input forms, and store them so we can learn from them
while ($line = <INFILE>) {
    chomp($line);      
    push (@inputs, $line);    
    
    $check_line = <CHECKFILE>;    
    chomp($check_line);        
    push (@answers, $check_line);        
}

# The hypothesis space of rule orders is the number of possible permutations
# One way to explore them would be to go about it systematically (trying every
# possible permutation).   
# Another possibility is to try permutations out randomly until you hit on one
# that works
# (Neither is optimal, of course-- but given no better options, when would the
# first be sensible, and when might you prefer the second?)
# Here, we'll do the random stabs in the dark approach:

# We want to keep a copy of the start state, so we can keep going back to it

for (my $i = 0; $i <= $#rules; $i++) {
	print "keeping original copy of rule $i\n";	
	
      push (@original_rules, @rules->[$i]);            
}    

for ($t = 1; $t <= $number_of_trials; $t++) {
    
    # For each trial, we start at the start state and try solving it again
    @rules = undef;    
    
    for (my $i = 0; $i <= $#original_rules; $i++) {
        push (@rules, @original_rules->[$i]);        
    }
    
    $iterations = 0;
    $number_correct = 0;    
    
    while ($number_correct != ($#inputs + 1)) {
	$number_correct = 0;
	$iterations++;        
	
	# Try flipping two rules
	$r1 = rand($#rules + 1);
	$r2 = rand($#rules + 1);

	# The following contains an extra fancy bit of code to round of the number when it's printed.
	# Instead of the variables $r1 and $r2, we put a placeholder "%.3f" meaning a floating point
	# (decimal) number with three decimal places.  Then, after the string, we list the variables
	# that should go in those spots (in order)
	
	# printf "Flipping rules %.3f ($rules[$r1][0] -> $rules[$r1][1]) and %.3f ($rules[$r2][0] -> $rules[$r2][1])\n", $r1, $r2;        
	@rules[$r1, $r2] = @rules[$r2, $r1];        
	
	for ($i = 0; $i <= $#inputs; $i++) {
	    # We'll start with the current input, and transform it
	    $output = $inputs[$i];        
	    for ($r = 0; $r <= $#rules; $r++) {
		$output =~ s/$rules[$r][0]/$rules[$r][1]/g;	
	    }
	    # Now check answer against the "real" answer in the checkfile
	    if ($output eq $answers[$i]) {
		$number_correct++;		
	    } 
	}
    }    
    $total_iterations += $iterations;        
    print "Trial $t took $iterations iterations\n";    

}

# Now that we're done, the average iterations is the total over the number of trials
$average_iterations = $total_iterations / $number_of_trials;
printf "\nAfter $number_of_trials trials, the average solution time is %.2f iterations\n", $average_iterations;