#! /usr/bin/perl -w

if ($#ARGV != 0) {
print << "EOF";
$0 - convert statistical data (mscore) output files to a gnuplot input file

Usage: $0 orderfile < timestats-file

Orderfile should be a file containing the filenames in the desired order.
The last 3 parameters are output filenames.

(c) Goetz Schwandtner 2002
EOF
    exit -1;
}

######################################################################
# read in order file
######################################################################
open( ORDER, "<$ARGV[0]");

# array containing all filenames read
@numtoname = ();
# hash for back conversion (filename -> number)
%nametonum= ();

$i= 0;

while ($line= <ORDER>) {
    chop ($line);
# if a line does not entrirely consist of whitepaces, it contains a filename
    if ($line !~ /^\s*$/) {
	$numtoname[$i]= $line;
	$nametonum{$line}= $i;
	$i++;
    }
}

# now read time values
# we only use the entry "real" which specifies the elapsed time, not
# user ("user") or system ("sys") time, since that is what is 
# most interesting for us

# entry gives the index in the numtoname field
$entrynum= -1;

# array to store the values, in order of "nametonum"
@numseq= ();
@shortest= ();
@longest= ();

while ($line= <STDIN>) {
    chop($line);

    if ($line =~ /^Alignment: (.*)$/) {
	# found new header line
	if (!defined($nametonum{$1})) {
	    die "Found undefined file name: $line";
	}
	$entrynum= $nametonum{$1};
    } elsif ($line =~ /^Num_seq: (\d*)$/) {
	# got info about current number of sequences
	die "Input file format error: Num_seq!" if $entrynum== -1;
	$numseq[$entrynum]= $1;
    } elsif ($line =~ /^shortest_orig: (\d*)$/) {
	# got info about current number of sequences
	die "Input file format error: shortest_orig!" if $entrynum== -1;
	$shortest[$entrynum]= $1;
    } elsif ($line =~ /^longest_orig: (\d*)$/) {
	# got info about current number of sequences
	die "Input file format error: longest_orig!" if $entrynum== -1;
	$longest[$entrynum]= $1;
    } # else ignore line
}

######################################################################
# print gnuplot output
######################################################################

open(NUMSEQ, ">numseqs.gnuplot");
for ($i= 0; $i < $#numtoname; $i++) {
    print NUMSEQ $i." ".$numseq[$i]."\n";
}

open(SHORTEST, ">shortest.gnuplot");
for ($i= 0; $i < $#numtoname; $i++) {
    print SHORTEST $i." ".$shortest[$i]."\n";
}

open(LONGEST, ">longest.gnuplot");
for ($i= 0; $i < $#numtoname; $i++) {
    print LONGEST $i." ".$longest[$i]."\n";
}
