#!/usr/bin/perl
#
# Copyright 2007, M.L. Hekkelman, CMBI
#
# This is a simple script that takes from the commandline
# a databank name and a sequence (or a filename for a file
# containing a fasta formatted sequence).
# The script then performs a blast against the databank
# on a blast soap server and waits for the job to finish.
# Then it prints out in fasta format each sequence producing
# a hit.
#
# Example:
# /home/vriend/whatif/blast/gerts-blast.pl -d sprot -f <file> -o file_ids.list -l 100
# /home/vriend/whatif/blast/gerts-blast.pl -d sprot -s SCPP...LLKH -o file_ids.list -l 2500
#
# Parameters
# -d   databank         (see http://mrs.cmbi.ru.nl/mrs-3/status.do under ID, check in
#                        BLAST under 'Choose the databank to search' for indexed ones)
#                       (e.g. sprot, uniprot, etc.)
# -f   <file>           (<file> is name of file (including path) that holds sequence or sequences
#                        in FASTA format. When multiple sequences are given, multiple BLASTs will
#                        run and results will be concatenated in the -o output file. -s has priority
#                        over -f)
# -s   SCPP...LLKH      (give the whole sequence (without ... of course). Don't use funny characters
#                        or Windows symbols.)
# -a                    (add sequence to output list (sequence is in FastA format))
# -o   output file name (e.g. blast_hits.list)
# -e   expect cut off	
#
# Example input file (without # of course):
#      >cram_craab sequence 1 for example
#      TTCCPSIVARSNFNVCRLPGTPEALCATYTGCIIIPGATCPGDYAN
#      >thn_brarp sequence 2 for example
#      MEGKTVILGVIIMSLVMAQNQVEAKICCPR 
#      TIDRNIYNACRLTGASMTNCANLSGCKIVSTHK
#
# Example output file (... means lines deleted):
#      - hits for cram_craab
#      cram_craab
#      thn_dencl
#      thn3_visal
#      thna_pholi
#      ....
#      thn22_arath
#      - hits for thn_brarp
#      thn_brarp
#      thn3_visal
#      thn5_horvu
#      ....
#


use warnings;
use Data::Dumper;
use Getopt::Std;

$| = 1;

my %opts;

getopts('d:f:s:o:l:e:a', \%opts);

# read in the parameters

my $db = $opts{'d'} or die "No databank specified\n";
my $out = $opts{'o'} or die "No output file specified\n";
my $limit = $opts{'l'}; $limit = 250 unless defined $limit;
my $cut_off_expect = $opts{'e'}; $cut_off_expect = 1e-6 unless defined $cut_off_expect;
my $add_seq = $opts{'a'};

my %seq;
my $query = $opts{'s'};

if (defined $query) {
	$seq{'cmdline'} = $query;
}
elsif (defined $opts{'f'}) {
	my $filename = $opts{'f'};
	my ($seq, $id);

	open IN, "<${filename}" or die "Could not open file ${filename}: $!";
	while (my $line = <IN>) {
		$line =~ s/\s//g;
		
		if ($line =~ m/>(\S+)/) {
			if (defined $id and defined $seq) {
				$seq{$id} = $seq;
			}
			
			$id = $1;
			$seq = "";
		}
		else {
			chomp($line);
			$seq .= $line;
		}
	}
	
	if (defined $id and defined $seq) {
		$seq{$id} = $seq;
	}
	
	close IN;
}
else {
	die "No sequence specified\n";
}

# build our SOAP objects
# Note that we're using objects defined in the package MySoap defined below.

my $blast_soap =
	new MySoap(
		ns_url => 'http://mrs.cmbi.ru.nl/mrsws-blast',
		ns => 'service',
		url => 'http://cmbi8.cmbi.ru.nl:8082/mrsws-blast'
	);

my $regular_soap =
	new MySoap(
		ns_url => 'http://mrs.cmbi.ru.nl/mrsws',
		ns => 'service',
		url => 'http://cmbi8:8081/mrsws'
	);

# Now we're ready to do the real work, iterate over each sequence
# in %seq and print out the results.

open OUT, ">$out" or die "Could not open output file: $!\n";

foreach my $id (keys %seq) {
	
	print OUT "- hits for $id\n";
	
	# Tell the user we're about to blast the next sequence
	print "Blasting $id...";
	
	# Submit the next blast job
	my $job_id = $blast_soap->soapCall("BlastAsync",
		{db => $db, query => $seq{$id},
		'report-limit' => ['xsd:unsignedLong', $limit]
	});
	
	# and wait until it is finished (or exited with an error)
	my $status;
	while (1)
	{
		$status = $blast_soap->soapCall("BlastJobStatus", {"job-id" => $job_id});
		last if $status eq 'finished' or $status eq 'error';
		
		# indicate we're not hanging
		print ".";
		sleep 1;
	}
	
	# done, tell user what happened
	print "done, status is $status\n";
	
	# no use printing result when there aren't any
	next unless $status eq 'finished';
	
	# Fetch the blast results from the server
	# note that we receive the paramsall parameter of result
	# that's less than optimal, but I know of no other way to
	# access result arrays for now. Should perhaps investigate this a bit more...
	my @r = $blast_soap->soapCall("BlastJobResult", {"job-id" => $job_id});

	# for each hit	
	foreach my $hit (@r) {
		
		# skip over all the other data that was returned, we only want hits
		next unless ref($hit) eq 'HASH' and defined $hit->{id};
		
		my $expect;
		
		if (ref($hit->{hsps}) eq 'ARRAY') {
			$expect = @{$hit->{hsps}}[0]->{expect};
		}
		else {
			$expect = $hit->{hsps}->{expect};
		}
		
		last if $expect > $cut_off_expect;
		
		my $hit_id = $hit->{id};
		print OUT "$hit_id $expect\n";
		
		if ($add_seq) {
			my $fasta = $regular_soap->soapCall("GetEntry", {db => $db, id => $hit_id, 'format' => [ "Format", 'fasta' ] });
			print OUT "$fasta//\n";
		}
	}
}

close OUT;

# done
exit;	

package MySoap;

# This is a wrapper to simplify the use of SOAP::Lite a little bit.

use strict;
use warnings;
use SOAP::Lite; # +trace => [qw(debug)];
use Data::Dumper;

sub new
{
	my $invocant = shift;
	
	my $self = {
		@_
	};
	
	$self->{soap} = SOAP::Lite->uri($self->{ns_url})->proxy($self->{url});
	
	return bless $self, "MySoap";
}

# the soapCall method takes a function name (the one at the remote SOAP server)
# and a hash of parameters. The parameters can be simple name, value pairs but
# for parameters that are not of type xsd:string you can pass in the more complex
# name, array pair where array contains a type and the value to use.
# E.g. the format parameter of GetEntry is of type $ns:Format and so we pass in
# the value as:  'format' => ['Format', 'fasta']

sub soapCall()
{
	my ($self, $func, $param) = @_;

	my $ns = $self->{ns};
	my $ns_url = $self->{ns_url};
	my $soap = $self->{soap};

	my @args;

	foreach my $k (keys %{$param}) {
		if (ref($param->{$k}) eq 'ARRAY') {
			my $f = @{$param->{$k}}[0];
			my $v = @{$param->{$k}}[1];
			
			$f = "$ns:$f" unless substr($f, 0, 4) eq 'xsd:';
			
			push @args, SOAP::Data->name("$ns:$k")->type($f => $v);
		}
		else {
			push @args, SOAP::Data->name("$ns:$k")->type('xsd:string' => $param->{$k});
		}
	}

    my $result = $soap->call(SOAP::Data->name("$ns:$func")->attr({"xmlns:$ns" => $ns_url}) => @args);
    
    my $err;
    if ($result->fault)
    {
        $err .= $result->faultdetail."\n"   if defined $result->faultdetail;
        $err .= $result->faultcode."\n"     if defined $result->faultcode;
        $err .= $result->faultstring."\n"   if defined $result->faultstring;
        $err .= $result->faultactor."\n"    if defined $result->faultactor;
    }

    return ($result->paramsall, $err) if defined wantarray and wantarray;
    die $err if defined $err;
    return $result->result;
};

1;
