breseq

diff extern/bioperl-live/Bio/SearchIO/Writer/HTMLResultWriter.pm @ 297:082829bf459b

Added BioPerl
author Jeffrey Barrick
date Mon Sep 27 23:42:36 2010 -0400 (2010-09-27)
parents
children
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/extern/bioperl-live/Bio/SearchIO/Writer/HTMLResultWriter.pm	Mon Sep 27 23:42:36 2010 -0400
     1.3 @@ -0,0 +1,988 @@
     1.4 +#
     1.5 +# BioPerl module for Bio::SearchIO::Writer::HTMLResultWriter
     1.6 +#
     1.7 +# Please direct questions and support issues to <bioperl-l@bioperl.org> 
     1.8 +#
     1.9 +# Cared for by Jason Stajich <jason@bioperl.org>
    1.10 +#
    1.11 +# Copyright Jason Stajich
    1.12 +#
    1.13 +# You may distribute this module under the same terms as perl itself
    1.14 +
    1.15 +# Changes 2003-07-31 (jason)
    1.16 +# Gary has cleaned up the code a lot to produce better looking HTML
    1.17 +
    1.18 +# POD documentation - main docs before the code
    1.19 +
    1.20 +=head1 NAME
    1.21 +
    1.22 +Bio::SearchIO::Writer::HTMLResultWriter - write a Bio::Search::ResultI in HTML
    1.23 +
    1.24 +=head1 SYNOPSIS
    1.25 +
    1.26 +  use Bio::SearchIO;
    1.27 +  use Bio::SearchIO::Writer::HTMLResultWriter;
    1.28 +
    1.29 +  my $in = Bio::SearchIO->new(-format => 'blast',
    1.30 +			     -file   => shift @ARGV);
    1.31 +
    1.32 +  my $writer = Bio::SearchIO::Writer::HTMLResultWriter->new();
    1.33 +  my $out = Bio::SearchIO->new(-writer => $writer);
    1.34 +  $out->write_result($in->next_result);
    1.35 +
    1.36 +
    1.37 +  # to filter your output
    1.38 +  my $MinLength = 100; # need a variable with scope outside the method
    1.39 +  sub hsp_filter { 
    1.40 +      my $hsp = shift;
    1.41 +      return 1 if $hsp->length('total') > $MinLength;
    1.42 +  }
    1.43 +  sub result_filter { 
    1.44 +      my $result = shift;
    1.45 +      return $hsp->num_hits > 0;
    1.46 +  }
    1.47 +
    1.48 +  my $writer = Bio::SearchIO::Writer::HTMLResultWriter->new
    1.49 +                     (-filters => { 'HSP' => \&hsp_filter} );
    1.50 +  my $out = Bio::SearchIO->new(-writer => $writer);
    1.51 +  $out->write_result($in->next_result);
    1.52 +
    1.53 +  # can also set the filter via the writer object
    1.54 +  $writer->filter('RESULT', \&result_filter);
    1.55 +
    1.56 +=head1 DESCRIPTION
    1.57 +
    1.58 +This object implements the SearchWriterI interface which will produce
    1.59 +a set of HTML for a specific L<Bio::Search::Report::ReportI> interface.
    1.60 +
    1.61 +See L<Bio::SearchIO::SearchWriterI> for more info on the filter method.
    1.62 +
    1.63 +=head1 FEEDBACK
    1.64 +
    1.65 +=head2 Mailing Lists
    1.66 +
    1.67 +User feedback is an integral part of the evolution of this and other
    1.68 +Bioperl modules. Send your comments and suggestions preferably to
    1.69 +the Bioperl mailing list.  Your participation is much appreciated.
    1.70 +
    1.71 +  bioperl-l@bioperl.org                  - General discussion
    1.72 +  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
    1.73 +
    1.74 +=head2 Support 
    1.75 +
    1.76 +Please direct usage questions or support issues to the mailing list:
    1.77 +
    1.78 +I<bioperl-l@bioperl.org>
    1.79 +
    1.80 +rather than to the module maintainer directly. Many experienced and 
    1.81 +reponsive experts will be able look at the problem and quickly 
    1.82 +address it. Please include a thorough description of the problem 
    1.83 +with code and data examples if at all possible.
    1.84 +
    1.85 +=head2 Reporting Bugs
    1.86 +
    1.87 +Report bugs to the Bioperl bug tracking system to help us keep track
    1.88 +of the bugs and their resolution. Bug reports can be submitted via the
    1.89 +web:
    1.90 +
    1.91 +  http://bugzilla.open-bio.org/
    1.92 +
    1.93 +=head1 AUTHOR - Jason Stajich
    1.94 +
    1.95 +Email jason-at-bioperl-dot-org
    1.96 +
    1.97 +=head1 CONTRIBUTORS
    1.98 +
    1.99 +Gary Williams G.Williams@hgmp.mrc.ac.uk
   1.100 +
   1.101 +=head1 APPENDIX
   1.102 +
   1.103 +The rest of the documentation details each of the object methods.
   1.104 +Internal methods are usually preceded with a _
   1.105 +
   1.106 +=cut
   1.107 +
   1.108 +
   1.109 +package Bio::SearchIO::Writer::HTMLResultWriter;
   1.110 +use strict;
   1.111 +use vars qw(%RemoteURLDefault
   1.112 +            $MaxDescLen $DATE $AlignmentLineWidth $Revision);
   1.113 +
   1.114 +# Object preamble - inherits from Bio::Root::RootI
   1.115 +
   1.116 +BEGIN {
   1.117 +    $Revision = '$Id$';
   1.118 +    $DATE = localtime(time);
   1.119 +    %RemoteURLDefault = ( 
   1.120 +      'PROTEIN' => 'http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=protein&cmd=search&term=%s',			  
   1.121 +      'NUCLEOTIDE' => 'http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=nucleotide&cmd=search&term=%s'
   1.122 +    );
   1.123 +    $MaxDescLen = 60;
   1.124 +    $AlignmentLineWidth = 60;
   1.125 +}
   1.126 +
   1.127 +
   1.128 +use base qw(Bio::Root::Root Bio::SearchIO::SearchWriterI);
   1.129 +
   1.130 +=head2 new
   1.131 +
   1.132 + Title   : new
   1.133 + Usage   : my $obj = Bio::SearchIO::Writer::HTMLResultWriter->new();
   1.134 + Function: Builds a new Bio::SearchIO::Writer::HTMLResultWriter object 
   1.135 + Returns : Bio::SearchIO::Writer::HTMLResultWriter
   1.136 + Args    : -filters => hashref with any or all of the keys (HSP HIT RESULT)
   1.137 +           which have values pointing to a subroutine reference
   1.138 +           which will expect to get a 
   1.139 +           -nucleotide_url => URL sprintf string base for the nt sequences
   1.140 +           -protein_url => URL sprintf string base for the aa sequences
   1.141 +           -no_wublastlinks => boolean. Do not display WU-BLAST lines 
   1.142 +                               even if they are parsed out.
   1.143 +                               Links = (1) 
   1.144 +
   1.145 +=cut
   1.146 +
   1.147 +sub new {
   1.148 +  my($class,@args) = @_;
   1.149 +
   1.150 +  my $self = $class->SUPER::new(@args);
   1.151 +  my ($p,$n,$filters,
   1.152 +      $nowublastlinks) = $self->_rearrange([qw(PROTEIN_URL 
   1.153 +					       NUCLEOTIDE_URL 
   1.154 +					       FILTERS
   1.155 +					       NO_WUBLASTLINKS)],@args);
   1.156 +  $self->remote_database_url('p',$p || $RemoteURLDefault{'PROTEIN'});
   1.157 +  $self->remote_database_url('n',$n || $RemoteURLDefault{'NUCLEOTIDE'});
   1.158 +  $self->no_wublastlinks(! $nowublastlinks);
   1.159 +  if( defined $filters ) {
   1.160 +      if( !ref($filters) =~ /HASH/i ) { 
   1.161 +	  $self->warn("Did not provide a hashref for the FILTERS option, ignoring.");
   1.162 +      } else { 
   1.163 +	  while( my ($type,$code) = each %{$filters} ) {
   1.164 +	      $self->filter($type,$code);
   1.165 +	  }
   1.166 +      }
   1.167 +  }
   1.168 +
   1.169 +  return $self;
   1.170 +}
   1.171 +
   1.172 +=head2 remote_database_url
   1.173 +
   1.174 + Title   : remote_database_url
   1.175 + Usage   : $obj->remote_database_url($type,$newval)
   1.176 + Function: This should return or set a string that contains a %s which can be
   1.177 +           filled in with sprintf.
   1.178 + Returns : value of remote_database_url
   1.179 + Args    : $type - 'PROTEIN' or 'P' for protein URLS
   1.180 +                   'NUCLEOTIDE' or 'N' for nucleotide URLS
   1.181 +           $value - new value to set [optional]
   1.182 +
   1.183 +
   1.184 +=cut
   1.185 +
   1.186 +sub remote_database_url{
   1.187 +   my ($self,$type,$value) = @_;
   1.188 +   if( ! defined $type || $type !~ /^(P|N)/i ) { 
   1.189 +       $self->warn("Must provide a type (PROTEIN or NUCLEOTIDE)");
   1.190 +       return '';
   1.191 +   }
   1.192 +   $type = uc $1;
   1.193 +   if( defined $value) {
   1.194 +      $self->{'remote_database_url'}->{$type} = $value;
   1.195 +    }
   1.196 +   return $self->{'remote_database_url'}->{$type};
   1.197 +}
   1.198 +
   1.199 +=head2 to_string
   1.200 +
   1.201 + Purpose   : Produces data for each Search::Result::ResultI in a string.
   1.202 +           : This is an abstract method. For some useful implementations,
   1.203 +           : see ResultTableWriter.pm, HitTableWriter.pm, 
   1.204 +           : and HSPTableWriter.pm.
   1.205 + Usage     : print $writer->to_string( $result_obj, @args );
   1.206 + Argument  : $result_obj = A Bio::Search::Result::ResultI object
   1.207 +           : @args = any additional arguments used by your implementation.
   1.208 + Returns   : String containing data for each search Result or any of its
   1.209 +           : sub-objects (Hits and HSPs).
   1.210 + Throws    : n/a
   1.211 +
   1.212 +=cut
   1.213 +
   1.214 +sub to_string {
   1.215 +    my ($self,$result,$num) = @_; 
   1.216 +    $num ||= 0;
   1.217 +    return unless defined $result;
   1.218 +    my $links = $self->no_wublastlinks;
   1.219 +    my ($resultfilter,$hitfilter, $hspfilter) = ( $self->filter('RESULT'),
   1.220 +						  $self->filter('HIT'),
   1.221 +						  $self->filter('HSP') );
   1.222 +    return '' if( defined $resultfilter && ! &{$resultfilter}($result) );    
   1.223 +
   1.224 +    my ($qtype,$dbtype,$dbseqtype,$type);
   1.225 +    my $alg = $result->algorithm;
   1.226 +    # This is actually wrong for the FASTAs I think
   1.227 +    if(  $alg =~ /T(FAST|BLAST)([XY])/i ) {
   1.228 +	$qtype      = $dbtype = 'translated';
   1.229 +	$dbseqtype = $type       = 'PROTEIN';
   1.230 +    } elsif( $alg =~ /T(FAST|BLAST)N/i ) {
   1.231 +	$qtype      = '';
   1.232 +	$dbtype     = 'translated';
   1.233 +	$type       = 'PROTEIN';
   1.234 +	$dbseqtype  = 'NUCLEOTIDE';
   1.235 +    } elsif( $alg =~ /(FAST|BLAST)N/i || 
   1.236 +	     $alg =~ /(WABA|EXONERATE)/i ) {
   1.237 +	$qtype      = $dbtype = '';
   1.238 +	$type = $dbseqtype  = 'NUCLEOTIDE';
   1.239 +    } elsif( $alg =~ /(FAST|BLAST)P/  || 
   1.240 +	     $alg =~ /SSEARCH|HMM(PFAM|SEARCH)/i ) {
   1.241 +	$qtype      = $dbtype = '';
   1.242 +	$type = $dbseqtype  = 'PROTEIN';
   1.243 +    } elsif( $alg =~ /(FAST|BLAST)[XY]/i ) {
   1.244 +	$qtype      = 'translated';
   1.245 +        $dbtype     = 'PROTEIN';
   1.246 +	$dbseqtype  = $type      = 'PROTEIN';
   1.247 +    } else { 
   1.248 +	$self->warn("algorithm was ", $result->algorithm, " couldn't match\n");
   1.249 +    }
   1.250 +    
   1.251 +    
   1.252 +    my %baselens = ( 'Sbjct:'   => ( $dbtype eq 'translated' )  ? 3 : 1,
   1.253 +		     'Query:'   => ( $qtype  eq 'translated' )  ? 3 : 1);
   1.254 +
   1.255 +    my $str;
   1.256 +    if( $num <= 1 ) { 
   1.257 +	$str = &{$self->start_report}($result);
   1.258 +    }
   1.259 +
   1.260 +    $str .= &{$self->title}($result);
   1.261 +
   1.262 +    $str .= $result->algorithm_reference || $self->algorithm_reference($result);
   1.263 +    $str .= &{$self->introduction}($result);
   1.264 +
   1.265 +    $str .= "<table border=0>
   1.266 +            <tr><th>Sequences producing significant alignments:</th>
   1.267 +            <th>Score<br>(bits)</th><th>E<br>value</th></tr>";
   1.268 +
   1.269 +    my $hspstr = '<p><p>';
   1.270 +    if( $result->can('rewind')) {
   1.271 +        $result->rewind(); # support stream based parsing routines
   1.272 +    }
   1.273 +
   1.274 +    while( my $hit = $result->next_hit ) {
   1.275 +	next if( $hitfilter && ! &{$hitfilter}($hit) );
   1.276 +	my $nm = $hit->name();
   1.277 +	
   1.278 +	$self->debug( "no $nm for name (".$hit->description(). "\n") 
   1.279 +	    unless $nm;
   1.280 +	my ($gi,$acc) = &{$self->id_parser}($nm);
   1.281 +	my $p = "%-$MaxDescLen". "s";
   1.282 +	my $descsub;
   1.283 +	if( length($hit->description) > ($MaxDescLen - 3) ) {
   1.284 +	    $descsub = sprintf($p,
   1.285 +		substr($hit->description,0,$MaxDescLen-3) . "...");
   1.286 +	} else { 
   1.287 +	    $descsub = sprintf($p,$hit->description);
   1.288 +	}
   1.289 +
   1.290 +	my $url_desc  = &{$self->hit_link_desc()}($self,$hit, $result);
   1.291 +	my $url_align = &{$self->hit_link_align()}($self,$hit, $result);
   1.292 +
   1.293 +	my @hsps = $hit->hsps;
   1.294 +	
   1.295 +	if( ! @hsps ) {
   1.296 +	    # no HSPs so no link 
   1.297 +	    $str .= sprintf('<tr><td>%s %s</td><td>%s</td><td>%.2g</td></tr>'."\n",
   1.298 +			    $url_desc, $descsub, 
   1.299 +			    ($hit->bits ? $hit->bits : 
   1.300 +			     (defined $hsps[0] ? $hsps[0]->bits : ' ')),
   1.301 +			    ( $hit->significance ? $hit->significance :
   1.302 +			      (defined $hsps[0] ? $hsps[0]->evalue : ' ')) 
   1.303 +			    );
   1.304 +	} else { 
   1.305 +	    # failover to first HSP if the data does not contain a 
   1.306 +	    # bitscore/significance value for the Hit (NCBI XML data for one)
   1.307 +
   1.308 +	    $str .= sprintf('<tr><td>%s %s</td><td>%s</td><td><a href="#%s">%.2g</a></td></tr>'."\n",
   1.309 +			    $url_desc, $descsub, 
   1.310 +			    ($hit->bits ? $hit->bits : 
   1.311 +			     (defined $hsps[0] ? $hsps[0]->bits : ' ')),
   1.312 +			    $acc,
   1.313 +			    ( $hit->significance ? $hit->significance :
   1.314 +			      (defined $hsps[0] ? $hsps[0]->evalue : ' ')) 
   1.315 +			    );
   1.316 +        my $dline = &{$self->hit_desc_line}($self, $hit, $result);
   1.317 +	    $hspstr .= "<a name=\"$acc\">\n".
   1.318 +		sprintf("><b>%s</b> %s</br><dd>Length = %s</dd><p>\n\n", $url_align, 
   1.319 +			$dline , &_numwithcommas($hit->length));
   1.320 +	    my $ct = 0;
   1.321 +	    foreach my $hsp (@hsps ) {
   1.322 +		next if( $hspfilter && ! &{$hspfilter}($hsp) );
   1.323 +		$hspstr .= sprintf(" Score = %s bits (%s), Expect = %s",
   1.324 +				   $hsp->bits || $hsp->score, 
   1.325 +				   $hsp->score || $hsp->bits, 
   1.326 +				   $hsp->evalue || '');
   1.327 +		if( defined $hsp->pvalue ) {
   1.328 +		    $hspstr .= ", P = ".$hsp->pvalue;
   1.329 +		}
   1.330 +		$hspstr .= "<br>\n";
   1.331 +		$hspstr .= sprintf(" Identities = %d/%d (%d%%)",
   1.332 +				   ( $hsp->frac_identical('total') * 
   1.333 +				     $hsp->length('total')),
   1.334 +				   $hsp->length('total'),
   1.335 +				   $hsp->frac_identical('total') * 100);
   1.336 +
   1.337 +		if( $type eq 'PROTEIN' ) {
   1.338 +		    $hspstr .= sprintf(", Positives = %d/%d (%d%%)",
   1.339 +				       ( $hsp->frac_conserved('total') * 
   1.340 +					 $hsp->length('total')),
   1.341 +				       $hsp->length('total'),
   1.342 +				       $hsp->frac_conserved('total') * 100);
   1.343 +		}
   1.344 +		if( $hsp->gaps ) {
   1.345 +		    $hspstr .= sprintf(", Gaps = %d/%d (%d%%)",
   1.346 +				       $hsp->gaps('total'),
   1.347 +				       $hsp->length('total'),
   1.348 +				       (100 * $hsp->gaps('total') / 
   1.349 +					$hsp->length('total')));
   1.350 +		}
   1.351 +
   1.352 +		my ($hframe,$qframe)   = ( $hsp->hit->frame, $hsp->query->frame);
   1.353 +		my ($hstrand,$qstrand) = ($hsp->hit->strand,$hsp->query->strand);
   1.354 +		# so TBLASTX will have Query/Hit frames
   1.355 +		#    BLASTX  will have Query frame
   1.356 +		#    TBLASTN will have Hit frame
   1.357 +		if( $hstrand || $qstrand ) {
   1.358 +		    $hspstr .= ", Frame = ";
   1.359 +		    my ($signq, $signh);
   1.360 +		    unless( $hstrand ) {
   1.361 +			$hframe = undef;
   1.362 +			# if strand is null or 0 then it is protein
   1.363 +			# and this no frame
   1.364 +		    } else { 
   1.365 +			$signh = $hstrand < 0 ? '-' : '+';
   1.366 +		    }
   1.367 +		    unless( $qstrand  ) {
   1.368 +			$qframe = undef;
   1.369 +			# if strand is null or 0 then it is protein
   1.370 +		    } else { 
   1.371 +			$signq =$qstrand < 0 ? '-' : '+';
   1.372 +		    }
   1.373 +		    # remember bioperl stores frames as 0,1,2 (GFF way)
   1.374 +		    # BLAST reports reports as 1,2,3 so
   1.375 +		    # we have to add 1 to the frame values
   1.376 +		    if( defined $hframe && ! defined $qframe) {  
   1.377 +			$hspstr .= "$signh".($hframe+1);
   1.378 +		    } elsif( defined $qframe && ! defined $hframe) {  
   1.379 +			$hspstr .= "$signq".($qframe+1);
   1.380 +		    } else { 
   1.381 +			$hspstr .= sprintf(" %s%d / %s%d",
   1.382 +					   $signq,$qframe+1,
   1.383 +					   $signh, $hframe+1);
   1.384 +		    }
   1.385 +		}
   1.386 +		if($links && 
   1.387 +		   $hsp->can('links') && defined(my $lnks = $hsp->links) ) {
   1.388 +		    $hspstr .= sprintf("<br>\nLinks = %s\n",$lnks);
   1.389 +		}
   1.390 +
   1.391 +		$hspstr .= "</a><p>\n<pre>";
   1.392 +
   1.393 +		my @hspvals = ( {'name' => 'Query:',
   1.394 +				 'seq'  => $hsp->query_string,
   1.395 +				 'start' => ($qstrand >= 0 ? 
   1.396 +					     $hsp->query->start : 
   1.397 +					     $hsp->query->end),
   1.398 +					     'end'   => ($qstrand >= 0 ? 
   1.399 +							 $hsp->query->end : 
   1.400 +							 $hsp->query->start),
   1.401 +							 'index' => 0,
   1.402 +							 'direction' => $qstrand || 1
   1.403 +						     },
   1.404 +				{ 'name' => ' 'x6,
   1.405 +				  'seq'  => $hsp->homology_string,
   1.406 +				  'start' => undef,
   1.407 +				  'end'   => undef,
   1.408 +				  'index' => 0,
   1.409 +				  'direction' => 1
   1.410 +				  },
   1.411 +				{ 'name'  => 'Sbjct:',
   1.412 +				  'seq'   => $hsp->hit_string,
   1.413 +				  'start' => ($hstrand >= 0 ? 
   1.414 +					      $hsp->hit->start : 
   1.415 +					      $hsp->hit->end),
   1.416 +					      'end'   => ($hstrand >= 0 ? 
   1.417 +							  $hsp->hit->end : 
   1.418 +							  $hsp->hit->start),
   1.419 +							  'index' => 0, 
   1.420 +							  'direction' => $hstrand || 1
   1.421 +						      }
   1.422 +				);	    
   1.423 +
   1.424 +
   1.425 +		# let's set the expected length (in chars) of the starting number
   1.426 +		# in an alignment block so we can have things line up
   1.427 +		# Just going to try and set to the largest
   1.428 +
   1.429 +		my ($numwidth) = sort { $b <=> $a }(length($hspvals[0]->{'start'}),
   1.430 +						    length($hspvals[0]->{'end'}),
   1.431 +						    length($hspvals[2]->{'start'}),
   1.432 +						    length($hspvals[2]->{'end'}));
   1.433 +		my $count = 0;
   1.434 +		while ( $count < $hsp->length('total') ) {
   1.435 +		    foreach my $v ( @hspvals ) {
   1.436 +			my $piece = substr($v->{'seq'}, $v->{'index'} + $count,
   1.437 +					   $AlignmentLineWidth);
   1.438 +			my $cp = $piece;
   1.439 +			my $plen = scalar ( $cp =~ tr/\-//);
   1.440 +			my ($start,$end) = ('','');
   1.441 +			if( defined $v->{'start'} ) { 
   1.442 +			    $start = $v->{'start'};
   1.443 +			    # since strand can be + or - use the direction
   1.444 +			    # to signify which whether to add or substract from end
   1.445 +			    my $d = $v->{'direction'} * ( $AlignmentLineWidth - $plen )*
   1.446 +				$baselens{$v->{'name'}};
   1.447 +			    if( length($piece) < $AlignmentLineWidth ) {
   1.448 +				$d = (length($piece) - $plen) * $v->{'direction'} * 
   1.449 +				    $baselens{$v->{'name'}};
   1.450 +			    }
   1.451 +			    $end   = $v->{'start'} + $d - $v->{'direction'};
   1.452 +			    $v->{'start'} += $d;
   1.453 +			}
   1.454 +			$hspstr .= sprintf("%s %-".$numwidth."s %s %s\n",
   1.455 +					   $v->{'name'},
   1.456 +					   $start,
   1.457 +					   $piece,
   1.458 +					   $end
   1.459 +					   );
   1.460 +		    }
   1.461 +		    $count += $AlignmentLineWidth;
   1.462 +		    $hspstr .= "\n\n";
   1.463 +		}
   1.464 +		$hspstr .= "</pre>\n";
   1.465 +	    }
   1.466 +	}
   1.467 +#	$hspstr .= "</pre>\n";
   1.468 +    }
   1.469 +
   1.470 +    $str .= "</table><p>\n".$hspstr;
   1.471 +    my ($pav, $sav) = ($result->available_parameters, $result->available_statistics);
   1.472 +    if ($pav || $sav) {
   1.473 +        # make table of search statistics and end the web page
   1.474 +        $str .= "<p><p><hr><h2>Search Parameters</h2>";
   1.475 +        if ($pav) {
   1.476 +        $str .= "<table border=1><tr><th>Parameter</th><th>Value</th>\n";
   1.477 +        foreach my $param ( sort $result->available_parameters ) {
   1.478 +            $str .= "<tr><td>$param</td><td>". $result->get_parameter($param) ."</td></tr>\n";
   1.479 +        }
   1.480 +        $str .= "</table>";
   1.481 +        }
   1.482 +        
   1.483 +        if ($sav) {
   1.484 +        $str .= "<p><h2>Search Statistics</h2><table border=1><tr><th>Statistic</th><th>Value</th></tr>\n";
   1.485 +        foreach my $stat ( sort $result->available_statistics ) {
   1.486 +            $str .= "<tr><td>$stat</td><td>". $result->get_statistic($stat). "</td>\n";
   1.487 +        }
   1.488 +        $str .=  "</tr></table>";
   1.489 +        }
   1.490 +    }
   1.491 +    $str .= $self->footer() . "<P>\n";
   1.492 +    return $str;
   1.493 +}
   1.494 +
   1.495 +=head2 hit_link_desc
   1.496 +
   1.497 + Title   : hit_link_desc
   1.498 + Usage   : $self->hit_link_desc(\&link_function);
   1.499 + Function: Get/Set the function which provides an HTML 
   1.500 +           link(s) for the given hit to be used
   1.501 +           within the description section at the top of the BLAST report.
   1.502 +           This allows a person reading the report within
   1.503 +           a web browser to go to one or more database entries for
   1.504 +           the given hit from the description section.
   1.505 + Returns : Function reference
   1.506 + Args    : Function reference
   1.507 + See Also: L<default_hit_link_desc()>
   1.508 +
   1.509 +=cut
   1.510 +
   1.511 +sub hit_link_desc{
   1.512 +    my( $self, $code ) = @_; 
   1.513 +    if ($code) {
   1.514 +        $self->{'_hit_link_desc'} = $code;
   1.515 +    }
   1.516 +    return $self->{'_hit_link_desc'} || \&default_hit_link_desc;
   1.517 +}
   1.518 +
   1.519 +=head2 default_hit_link_desc
   1.520 +
   1.521 + Title   : default_hit_link_desc
   1.522 + Usage   : $self->default_hit_link_desc($hit, $result)
   1.523 + Function: Provides an HTML link(s) for the given hit to be used
   1.524 +           within the description section at the top of the BLAST report.
   1.525 +           This allows a person reading the report within
   1.526 +           a web browser to go to one or more database entries for
   1.527 +           the given hit from the description section.
   1.528 + Returns : string containing HTML markup "<a href...")
   1.529 +
   1.530 +           The default implementation returns an HTML link to the
   1.531 +           URL supplied by the remote_database_url() method
   1.532 +           and using the identifier supplied by the id_parser() method.
   1.533 +           It will use the NCBI GI if present, and the accession if not.
   1.534 +
   1.535 + Args    : First argument is a Bio::Search::Hit::HitI
   1.536 +           Second argument is a Bio::Search::Result::ResultI
   1.537 +
   1.538 +See Also: L<hit_link_align>, L<remote_database>, L<id_parser>
   1.539 +
   1.540 +=cut
   1.541 +
   1.542 +sub default_hit_link_desc {
   1.543 +    my($self, $hit, $result) = @_;
   1.544 +    my $type = ( $result->algorithm =~ /(P|X|Y)$/i ) ? 'PROTEIN' : 'NUCLEOTIDE';
   1.545 +    my ($gi,$acc) = &{$self->id_parser}($hit->name);
   1.546 +
   1.547 +    my $url = length($self->remote_database_url($type)) > 0 ? 
   1.548 +              sprintf('<a href="%s">%s</a>',
   1.549 +                      sprintf($self->remote_database_url($type),$gi || $acc), 
   1.550 +                      $hit->name()) :  $hit->name();
   1.551 +
   1.552 +    return $url;
   1.553 +}
   1.554 +
   1.555 +
   1.556 +=head2 hit_link_align
   1.557 +
   1.558 + Title   : hit_link_align
   1.559 + Usage   : $self->hit_link_align(\&link_function);
   1.560 + Function: Get/Set the function which provides an HTML link(s) 
   1.561 +           for the given hit to be used
   1.562 +           within the HSP alignment section of the BLAST report.
   1.563 +           This allows a person reading the report within
   1.564 +           a web browser to go to one or more database entries for
   1.565 +           the given hit from the alignment section.
   1.566 + Returns : string containing HTML markup "<a href...")
   1.567 +
   1.568 +           The default implementation delegates to hit_link_desc().
   1.569 +
   1.570 + Args    : First argument is a Bio::Search::Hit::HitI
   1.571 +           Second argument is a Bio::Search::Result::ResultI
   1.572 +
   1.573 +See Also: L<hit_link_desc>, L<remote_database>, L<id_parser>
   1.574 +
   1.575 +=cut
   1.576 +
   1.577 +sub hit_link_align {
   1.578 +    my ($self,$code) = @_;
   1.579 +    if ($code) {
   1.580 +        $self->{'_hit_link_align'} = $code;
   1.581 +    }
   1.582 +    return $self->{'_hit_link_align'} || \&default_hit_link_desc;
   1.583 +}
   1.584 +
   1.585 +=head2 hit_desc_line
   1.586 +
   1.587 + Title   : hit_desc_line
   1.588 + Usage   : $self->hit_desc_line(\&link_function);
   1.589 + Function: Get/Set the function which provides HTML for the description
   1.590 +           information from a hit. This allows one to parse
   1.591 +           the rest of the description and split up lines, add links, etc.
   1.592 + Returns : Function reference
   1.593 + Args    : Function reference
   1.594 + See Also: L<default_hit_link_desc()>
   1.595 +
   1.596 +=cut
   1.597 +
   1.598 +sub hit_desc_line{
   1.599 +    my( $self, $code ) = @_; 
   1.600 +    if ($code) {
   1.601 +        $self->{'_hit_desc_line'} = $code;
   1.602 +    }
   1.603 +    return $self->{'_hit_desc_line'} || \&default_hit_desc_line;
   1.604 +}
   1.605 +
   1.606 +=head2 default_hit_desc_line
   1.607 +
   1.608 + Title   : default_hit_desc_line
   1.609 + Usage   : $self->default_hit_desc_line($hit, $result)
   1.610 + Function: Parses the description line information, splits based on the
   1.611 +           hidden \x01 between independent descriptions, checks the lines for
   1.612 +           possible web links, and adds HTML link(s) for the given hit to be
   1.613 +           used.
   1.614 +
   1.615 + Returns : string containing HTML markup "<a href...")
   1.616 +           The default implementation returns an HTML link to the
   1.617 +           URL supplied by the remote_database_url() method
   1.618 +           and using the identifier supplied by the id_parser() method.
   1.619 +           It will use the NCBI GI if present, and the accession if not.
   1.620 +
   1.621 + Args    : First argument is a Bio::Search::Hit::HitI
   1.622 +           Second argument is a Bio::Search::Result::ResultI
   1.623 +
   1.624 +See Also: L<hit_link_align>, L<remote_database>, L<id_parser>
   1.625 +
   1.626 +=cut
   1.627 +
   1.628 +sub default_hit_desc_line {
   1.629 +    my($self, $hit, $result) = @_;
   1.630 +    my $type = ( $result->algorithm =~ /(P|X|Y)$/i ) ? 'PROTEIN' : 'NUCLEOTIDE';
   1.631 +    my @descs = split /\x01/, $hit->description;
   1.632 +    #my $descline = join("</br>",@descs)."</br>";
   1.633 +    my $descline = '';
   1.634 +    #return $descline;
   1.635 +    for my $sec (@descs) {
   1.636 +        my $url = '';
   1.637 +        if ($sec =~ s/((?:gi\|(\d+)\|)?        # optional GI
   1.638 +                     (\w+)\|([A-Z\d\.\_]+) # main 
   1.639 +                     (\|[A-Z\d\_]+)?) # optional secondary ID//xms) {
   1.640 +            my ($name, $gi, $db, $acc) = ($1, $2, $3, $4);
   1.641 +            #$acc ||= ($rest) ? $rest : $gi;
   1.642 +            $acc =~ s/^\s+(\S+)/$1/;
   1.643 +            $acc =~ s/(\S+)\s+$/$1/;
   1.644 +            $url =
   1.645 +            length($self->remote_database_url($type)) > 0 ? 
   1.646 +              sprintf('<a href="%s">%s</a> %s',
   1.647 +                      sprintf($self->remote_database_url($type),
   1.648 +                      $gi || $acc || $db), 
   1.649 +                      $name, $sec) :  $sec;
   1.650 +        } else {
   1.651 +            $url = $sec;
   1.652 +        }
   1.653 +        $descline .= "$url</br>\n";
   1.654 +    }
   1.655 +    return $descline;
   1.656 +}
   1.657 +
   1.658 +=head2 start_report
   1.659 +
   1.660 +  Title   : start_report
   1.661 +  Usage   : $index->start_report( CODE )
   1.662 +  Function: Stores or returns the code to
   1.663 +            write the start of the <HTML> block, the <TITLE> block
   1.664 +            and the start of the <BODY> block of HTML.   Useful
   1.665 +            for (for instance) specifying alternative
   1.666 +            HTML if you are embedding the output in
   1.667 +            an HTML page which you have already started.
   1.668 +            (For example a routine returning a null string).
   1.669 +            Returns \&default_start_report (see below) if not
   1.670 +            set. 
   1.671 +  Example : $index->start_report( \&my_start_report )
   1.672 +  Returns : ref to CODE if called without arguments
   1.673 +  Args    : CODE
   1.674 +
   1.675 +=cut
   1.676 +
   1.677 +sub start_report {
   1.678 +    my( $self, $code ) = @_; 
   1.679 +    if ($code) {
   1.680 +        $self->{'_start_report'} = $code;
   1.681 +    }
   1.682 +    return $self->{'_start_report'} || \&default_start_report;
   1.683 +}
   1.684 +
   1.685 +=head2 default_start_report
   1.686 +
   1.687 + Title   : default_start_report
   1.688 + Usage   : $self->default_start_report($result)
   1.689 + Function: The default method to call when starting a report.
   1.690 + Returns : sting
   1.691 + Args    : First argument is a Bio::Search::Result::ResultI
   1.692 +
   1.693 +=cut
   1.694 +
   1.695 +sub default_start_report {
   1.696 +    my ($result) = @_;
   1.697 +    return sprintf(
   1.698 +    qq{<HTML>
   1.699 +      <HEAD> <CENTER><TITLE>Bioperl Reformatted HTML of %s output with Bioperl Bio::SearchIO system</TITLE></CENTER></HEAD>
   1.700 +      <!------------------------------------------------------------------->
   1.701 +      <!-- Generated by Bio::SearchIO::Writer::HTMLResultWriter          -->
   1.702 +      <!-- %s -->
   1.703 +      <!-- http://bioperl.org                                            -->
   1.704 +      <!------------------------------------------------------------------->
   1.705 +      <BODY BGCOLOR="WHITE">
   1.706 +    },$result->algorithm,$Revision);
   1.707 +    
   1.708 +}
   1.709 +
   1.710 +=head2 title
   1.711 +
   1.712 + Title   : title
   1.713 + Usage   : $self->title($CODE)
   1.714 +
   1.715 +  Function: Stores or returns the code to provide HTML for the given
   1.716 +            BLAST report that will appear at the top of the BLAST report
   1.717 +            HTML output.  Useful for (for instance) specifying
   1.718 +            alternative routines to write your own titles.
   1.719 +            Returns \&default_title (see below) if not
   1.720 +            set. 
   1.721 +  Example : $index->title( \&my_title )
   1.722 +  Returns : ref to CODE if called without arguments
   1.723 +  Args    : CODE
   1.724 +
   1.725 +=cut
   1.726 +
   1.727 +sub title {
   1.728 +    my( $self, $code ) = @_; 
   1.729 +    if ($code) {
   1.730 +        $self->{'_title'} = $code;
   1.731 +    }
   1.732 +    return $self->{'_title'} || \&default_title;
   1.733 +}
   1.734 +
   1.735 +=head2 default_title
   1.736 +
   1.737 + Title   : default_title
   1.738 + Usage   : $self->default_title($result)
   1.739 + Function: Provides HTML for the given BLAST report that will appear
   1.740 +           at the top of the BLAST report HTML output.
   1.741 + Returns : string containing HTML markup
   1.742 +           The default implementation returns <CENTER> <H1> HTML
   1.743 +           containing text such as:
   1.744 +           "Bioperl Reformatted HTML of BLASTP Search Report
   1.745 +                     for gi|1786183|gb|AAC73113.1|"
   1.746 + Args    : First argument is a Bio::Search::Result::ResultI
   1.747 +
   1.748 +=cut
   1.749 +
   1.750 +sub default_title {
   1.751 +    my ($result) = @_;
   1.752 +
   1.753 +    return sprintf(
   1.754 +        qq{<CENTER><H1><a href="http://bioperl.org">Bioperl</a> Reformatted HTML of %s Search Report<br> for %s</H1></CENTER>},
   1.755 +		    $result->algorithm,
   1.756 +		    $result->query_name());
   1.757 +}
   1.758 +
   1.759 +
   1.760 +=head2 introduction
   1.761 +
   1.762 + Title   : introduction
   1.763 + Usage   : $self->introduction($CODE)
   1.764 +
   1.765 +  Function: Stores or returns the code to provide HTML for the given
   1.766 +            BLAST report detailing the query and the
   1.767 +            database information.
   1.768 +            Useful for (for instance) specifying
   1.769 +            routines returning alternative introductions.
   1.770 +            Returns \&default_introduction (see below) if not
   1.771 +            set. 
   1.772 +  Example : $index->introduction( \&my_introduction )
   1.773 +  Returns : ref to CODE if called without arguments
   1.774 +  Args    : CODE
   1.775 +
   1.776 +=cut
   1.777 +
   1.778 +sub introduction {
   1.779 +    my( $self, $code ) = @_; 
   1.780 +    if ($code) {
   1.781 +        $self->{'_introduction'} = $code;
   1.782 +    }
   1.783 +    return $self->{'_introduction'} || \&default_introduction;
   1.784 +}
   1.785 +
   1.786 +=head2 default_introduction
   1.787 +
   1.788 + Title   : default_introduction
   1.789 + Usage   : $self->default_introduction($result)
   1.790 + Function: Outputs HTML to provide the query
   1.791 +           and the database information
   1.792 + Returns : string containing HTML
   1.793 + Args    : First argument is a Bio::Search::Result::ResultI
   1.794 +           Second argument is string holding literature citation
   1.795 +
   1.796 +=cut
   1.797 +
   1.798 +sub default_introduction {
   1.799 +    my ($result) = @_;
   1.800 +
   1.801 +    return sprintf(
   1.802 +    qq{
   1.803 +    <b>Query=</b> %s %s<br><dd>(%s letters)</dd>
   1.804 +    <p>
   1.805 +    <b>Database:</b> %s<br><dd>%s sequences; %s total letters<p></dd>
   1.806 +    <p>
   1.807 +  }, 
   1.808 +		   $result->query_name, 
   1.809 +		   $result->query_description, 
   1.810 +		   &_numwithcommas($result->query_length), 
   1.811 +		   $result->database_name(),
   1.812 +		   &_numwithcommas($result->database_entries()), 
   1.813 +		   &_numwithcommas($result->database_letters()),
   1.814 +		   );
   1.815 +}
   1.816 +
   1.817 +=head2 end_report
   1.818 +
   1.819 + Title   : end_report
   1.820 + Usage   : $self->end_report()
   1.821 + Function: The method to call when ending a report, this is
   1.822 +           mostly for cleanup for formats which require you to 
   1.823 +           have something at the end of the document (</BODY></HTML>)
   1.824 +           for HTML
   1.825 + Returns : string
   1.826 + Args    : none
   1.827 +
   1.828 +=cut
   1.829 +
   1.830 +sub end_report {
   1.831 +    return "</BODY>\n</HTML>\n";
   1.832 +}
   1.833 +
   1.834 +# copied from Bio::Index::Fasta
   1.835 +# useful here as well
   1.836 +
   1.837 +=head2 id_parser
   1.838 +
   1.839 +  Title   : id_parser
   1.840 +  Usage   : $index->id_parser( CODE )
   1.841 +  Function: Stores or returns the code used by record_id to
   1.842 +            parse the ID for record from a string.  Useful
   1.843 +            for (for instance) specifying a different
   1.844 +            parser for different flavours of FASTA file. 
   1.845 +            Returns \&default_id_parser (see below) if not
   1.846 +            set. If you supply your own id_parser
   1.847 +            subroutine, then it should expect a fasta
   1.848 +            description line.  An entry will be added to
   1.849 +            the index for each string in the list returned.
   1.850 +  Example : $index->id_parser( \&my_id_parser )
   1.851 +  Returns : ref to CODE if called without arguments
   1.852 +  Args    : CODE
   1.853 +
   1.854 +=cut
   1.855 +
   1.856 +sub id_parser {
   1.857 +    my( $self, $code ) = @_;
   1.858 +    
   1.859 +    if ($code) {
   1.860 +        $self->{'_id_parser'} = $code;
   1.861 +    }
   1.862 +    return $self->{'_id_parser'} || \&default_id_parser;
   1.863 +}
   1.864 +
   1.865 +
   1.866 +
   1.867 +=head2 default_id_parser
   1.868 +
   1.869 +  Title   : default_id_parser
   1.870 +  Usage   : $id = default_id_parser( $header )
   1.871 +  Function: The default Fasta ID parser for Fasta.pm
   1.872 +            Returns $1 from applying the regexp /^>\s*(\S+)/
   1.873 +            to $header.
   1.874 +  Returns : ID string
   1.875 +            The default implementation checks for NCBI-style
   1.876 +            identifiers in the given string ('gi|12345|AA54321').
   1.877 +            For these IDs, it extracts the GI and accession and
   1.878 +            returns a two-element list of strings (GI, acc).
   1.879 +  Args    : a fasta header line string
   1.880 +
   1.881 +=cut
   1.882 +
   1.883 +sub default_id_parser {    
   1.884 +    my ($string) = @_;
   1.885 +    my ($gi,$acc);
   1.886 +    if( $string =~ s/gi\|(\d+)\|?// ) 
   1.887 +    { $gi = $1; $acc = $1;}
   1.888 +    
   1.889 +    if( $string =~ /(\w+)\|([A-Z\d\.\_]+)(\|[A-Z\d\_]+)?/ ) {
   1.890 +	$acc = defined $2 ? $2 : $1;
   1.891 +    } else {
   1.892 +        $acc = $string;
   1.893 +	$acc =~ s/^\s+(\S+)/$1/;
   1.894 +	$acc =~ s/(\S+)\s+$/$1/;	
   1.895 +    } 
   1.896 +    return ($gi,$acc);
   1.897 +}
   1.898 +	
   1.899 +sub MIN { $a <=> $b ? $a : $b; }
   1.900 +sub MAX { $a <=> $b ? $b : $a; }
   1.901 +
   1.902 +sub footer { 
   1.903 +    my ($self) = @_;
   1.904 +    return "<hr><h5>Produced by Bioperl module ".ref($self)." on $DATE<br>Revision: $Revision</h5>\n"
   1.905 +    
   1.906 +}
   1.907 +
   1.908 +=head2 algorithm_reference
   1.909 +
   1.910 + Title   : algorithm_reference
   1.911 + Usage   : my $reference = $writer->algorithm_reference($result);
   1.912 + Function: Returns the appropriate Bibliographic reference for the 
   1.913 +           algorithm format being produced
   1.914 + Returns : String
   1.915 + Args    : L<Bio::Search::Result::ResultI> to reference
   1.916 +
   1.917 +
   1.918 +=cut
   1.919 +
   1.920 +sub algorithm_reference {
   1.921 +   my ($self,$result) = @_;
   1.922 +   return '' if( ! defined $result || !ref($result) ||
   1.923 +		 ! $result->isa('Bio::Search::Result::ResultI')) ;   
   1.924 +   if( $result->algorithm =~ /BLAST/i ) {
   1.925 +       my $res = $result->algorithm . ' ' . $result->algorithm_version . "<p>";
   1.926 +       if( $result->algorithm_version =~ /WashU/i ) {
   1.927 +	   return $res .
   1.928 +"Copyright (C) 1996-2000 Washington University, Saint Louis, Missouri USA.<br>
   1.929 +All Rights Reserved.<p>
   1.930 +<b>Reference:</b>  Gish, W. (1996-2000) <a href=\"http://blast.wustl.edu\">http://blast.wustl.edu</a><p>";	   
   1.931 +       } else {
   1.932 +	   return $res . 
   1.933 +"<b>Reference:</b> Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer,<br>
   1.934 +Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997),<br>
   1.935 +\"Gapped BLAST and PSI-BLAST: a new generation of protein database search<br>
   1.936 +programs\",  Nucleic Acids Res. 25:3389-3402.<p>";
   1.937 +
   1.938 +       }       
   1.939 +   } elsif( $result->algorithm =~ /FAST/i ) {
   1.940 +       return $result->algorithm . " " . $result->algorithm_version . "<br>" .
   1.941 +	   "\n<b>Reference:</b> Pearson et al, Genomics (1997) 46:24-36<p>";
   1.942 +   } else { 
   1.943 +       return '';
   1.944 +   }
   1.945 +}
   1.946 +
   1.947 +# from Perl Cookbook 2.17
   1.948 +sub _numwithcommas {
   1.949 +    my $num = reverse( $_[0] );
   1.950 +    $num =~ s/(\d{3})(?=\d)(?!\d*\.)/$1,/g;
   1.951 +    return scalar reverse $num;
   1.952 +}
   1.953 +
   1.954 +=head2 Methods Bio::SearchIO::SearchWriterI
   1.955 +
   1.956 +L<Bio::SearchIO::SearchWriterI> inherited methods.
   1.957 +
   1.958 +=head2 filter
   1.959 +
   1.960 + Title   : filter
   1.961 + Usage   : $writer->filter('hsp', \&hsp_filter);
   1.962 + Function: Filter out either at HSP,Hit,or Result level
   1.963 + Returns : none
   1.964 + Args    : string => data type,
   1.965 +           CODE reference
   1.966 +
   1.967 +
   1.968 +=cut
   1.969 +
   1.970 +
   1.971 +=head2 no_wublastlinks
   1.972 +
   1.973 + Title   : no_wublastlinks
   1.974 + Usage   : $obj->no_wublastlinks($newval)
   1.975 + Function: Get/Set boolean value regarding whether or not to display
   1.976 +           Link = (1) 
   1.977 +           type output in the report output (WU-BLAST only)
   1.978 + Returns : boolean
   1.979 + Args    : on set, new boolean value (a scalar or undef, optional)
   1.980 +
   1.981 +
   1.982 +=cut
   1.983 +
   1.984 +sub no_wublastlinks{
   1.985 +    my $self = shift;
   1.986 +
   1.987 +    return $self->{'no_wublastlinks'} = shift if @_;
   1.988 +    return $self->{'no_wublastlinks'};
   1.989 +}
   1.990 +
   1.991 +1;