#! /usr/bin/perl

if ($#ARGV != -1) {
    print << 'EOF';
Usage: msf2fasta < infile > outfile
 
Converts a MSF-format style alignment file to a fasta file

(c) by Goetz Schwandtner <goetz@informatik.uni-mainz.de> 09/2001
EOF

exit -1;
}

$numofseq=0;

# eat up lines until double-dash line found. then alignment starts
while (($line=<>) && ($line !~ /\/\//) ) {
    # count lines containing description of a sequence
    if ($line =~ /Name: *([^\s]*)/) {
	$names[$numofseq] = $1;
	$numofseq++;
    }
}

#now that we know how many lines we need, read the alignment
# $i gives the number of the next sequence to be read
$i=0;

while ($line=<>) {
    chop($line); # avoid difficulties ...
    # does line start with a sequence name?
    if ($line =~ /^([\w_-]+)\s*(.*)$/) {
	$s = $2;
	# remove internal whitespaces in alignment if neccessary
	$s =~ s/\s//g;
	# remove gap characters since we want pure sequences
	$s =~ s/\.//g;
	# make sequences uppercase
	$s =~ tr/a-z/A-Z/;
	$align[$i] .= $s;
	$i++;
	$i=0 if ($i >= $numofseq);
    }
}

# now everything is collected - print alignment

for ($i=0; $i<$numofseq; $i++) {
    print ">".$names[$i]."\n".$align[$i]."\n";
}
