#! /usr/bin/perl 

if ($#ARGV != 0) {
    print << "EOF";
Usage: $0 headerfile < msa-output-file > msf-file

    Converts a MSA program output type alignment file to a msf-file
	using a msf-file as header file for the output file

(c) Goetz Schwandtner 2001
EOF

    exit -1;
}

# get header and sequence names from header file 

open( HEADER, "<$ARGV[0]");

$numofseq=0;

# eat up lines until double-dash line found. then alignment starts
while (($line=<HEADER>) && ($line !~ /\/\//) ) {
    # if line containing name (consisting of \w,_ and - chars) is found,
    # store name
    if ($line =~ /Name:\s*([\w_-]+)/) {
	$names[$numofseq] = $1;
	$numofseq++;
    }
    print $line;  
}

print "//\n";

# in the dialign2-1 output file, we use the fasta
# eat up all leading lines
while (($line=<STDIN>) && ($line !~ /FASTA/)) {
}

$line=<STDIN>;

# initialize alignment storage variables
for ($i=0; $i <= $numofseq; $i++) {
    $align[$i] = "";
}

$i=-1;

while (($line=<STDIN>) && ($i < $numofseq) ) {
    chop($line);
    if ($line =~ /^>/) {
	# if a line containing a name is reached, read next block
	$i++;
    } elsif ($line =~ /^\s*$/) {
	# skip emtpy lines; if last sequence is read, use this for
	# end of parsing the input file
	if ($i == $numofseq - 1) {
	    $i++;
	}
    } else {
        if ($i >= 0) { # add new block to alignment line
            $align[$i] .= $line;
	}
    }
}

die "Format error: Input file truncated?" if ($i < $numofseq);

# write down sequences. since all are same length use first for stop
while ($align[0] ne "") {
    print "\n\n";
    for ($i=0; $i < $numofseq; $i++) {
	$align[$i] =~ s/^(.{1,60})//;
	print $names[$i]."           ".$1."\n";
    } 
}
