#! /usr/bin/perl 

if ($#ARGV != 0) {
    print << "EOF";
Usage: $0 headerfile header-file < hmma-output-file > msf-file

    Converts a hmma program output type alignment file to a msf-file
	using a msf-file as header file for the output file

(c) Goetz Schwandtner 2001
EOF

    exit -1;
}

# get header and sequence names from header file 

open( HEADER, "<$ARGV[0]");

$numofseq=0;

%numbers = ();

# eat up lines until double-dash line found. then alignment starts
while (($line=<HEADER>) && ($line !~ /\/\//) ) {
    # if line containing name (consisting of \w,_ and - chars) is found,
    # store name
    if ($line =~ /Name:\s*([\w_-]+)/) {
	$names[$numofseq] = $1;
	# hash for backward conversion: name -> index
	$numbers{"$1"} = $numofseq;
	$numofseq++;
    }
print $line;  
}

print "//\n";

#eat up all leading lines
while (($line=<STDIN>) && ($line !~ /alignment/)) {
}

$line=<STDIN>;

# initialize alignment storage variables
for ($i=0; $i <= $numofseq; $i++) {
    $align[$i] = "";
}

$i=0;

while (($line=<STDIN>) && ($line !~ /^Alignment/) ) {
    chop($line);
    # did we find a line containing name and part of a sequence ?
    if ($line =~ /^([^\s]*)\s*([^\s]*)$/ ) {
	# if first field is a valid name, then add sequence part to
	# corresponding sequence
	if (defined($numbers{"$1"})) {
	    $align[$numbers{"$1"}] .= $2;
	}
    }
}

# write down sequences. since all are same length use first for stop
while ($align[0] ne "") {
    print "\n\n";
    for ($i=0; $i < $numofseq; $i++) {
	$align[$i] =~ s/^(.{1,60})//;
	print $names[$i]."           ".$1."\n";
    } 
}
