1
# $Id: LinkSet.pm 15163 2008-12-15 18:39:09Z cjfields $
3
# BioPerl module for Bio::Tools::EUtilities::Link::LinkSet
5
# Cared for by Chris Fields
7
# Copyright Chris Fields
9
# You may distribute this module under the same terms as perl itself
11
# POD documentation - main docs before the code
13
# Part of the EUtilities BioPerl package
17
Bio::Tools::EUtilities::Link::LinkSet
31
User feedback is an integral part of the
32
evolution of this and other Bioperl modules. Send
33
your comments and suggestions preferably to one
34
of the Bioperl mailing lists. Your participation
37
bioperl-l@lists.open-bio.org - General discussion
38
http://www.bioperl.org/wiki/Mailing_lists - About the mailing lists
42
Report bugs to the Bioperl bug tracking system to
43
help us keep track the bugs and their resolution.
44
Bug reports can be submitted via the web.
46
http://bugzilla.open-bio.org/
50
Email cjfields at uiuc dot edu
54
The rest of the documentation details each of the
55
object methods. Internal methods are usually
60
# Let the code begin...
62
package Bio::Tools::EUtilities::Link::LinkSet;
66
use base qw(Bio::Root::Root Bio::Tools::EUtilities::HistoryI);
67
use Bio::Tools::EUtilities::Link::UrlLink;
68
use Bio::Tools::EUtilities::Info::LinkInfo;
71
my ($class,@args) = @_;
72
my $self = $class->SUPER::new(@args);
73
my ($type) = $self->_rearrange([qw(DATATYPE)],@args);
75
$self->eutil('elink');
76
$self->datatype($type);
83
Usage : my @ids = $linkset->get_ids
84
Function : returns list of retrieved IDs
85
Returns : array of IDs
87
Notes : Cmd Description
88
acheck same as get_submitted_ids
89
lcheck same as get_submitted_ids
90
ncheck same as get_submitted_ids
91
prlinks same as get_submitted_ids
92
llinks same as get_submitted_ids
93
llinkslib same as get_submitted_ids
94
neighbor linked IDs for database in get_database
95
neighbor_history linked IDs for database in get_database
101
unless ($self->{'_sorted_id'}) {
102
@{$self->{'_sorted_id'}} =
104
$self->{'_id'}->{$a}->[0] <=>
105
$self->{'_id'}->{$b}->[0]
106
} keys %{$self->{'_id'}};
108
return @{$self->{'_sorted_id'}};
114
Usage : my $db = $info->get_database;
115
Function : returns single database name (eutil-compatible). This is the
116
queried database. For elinks (which have 'db' and 'dbfrom')
117
this is equivalent to db/dbto (use get_dbfrom() to for the latter).
118
Note that this only returns the first db; in some cases this may
119
not be what you want (when multiple dbs are queried, for instance)
122
Notes : with all elink cmd arguments
127
return ($_[0]->get_databases)[0];
130
=head2 get_db (alias for get_database)
135
return shift->get_database;
138
=head2 get_dbto (alias for get_database)
143
return shift->get_database;
148
Title : get_databases
149
Usage : my $string = $linkset->get_databases;
150
Function : retrieve databases referred to for this linkset
151
these may be present as a single database or embedded in
152
Returns : array of strings
160
my @dbs = sort map {$_->get_database}
161
grep {!$tmp{$_->get_database}++} ($self->get_LinkInfo);
162
unshift @dbs, $self->{'_dbto'} if $self->{'_dbto'} && !$tmp{$self->{'_dbto'}}++;
166
=head2 get_dbs (alias for get_databases)
171
return shift->get_databases;
177
Usage : my $string = $linkset->get_dbfrom;
178
Function : retrieve originating database for this linkset
184
sub get_dbfrom { return shift->{'_dbfrom'} }
186
=head2 get_link_names
188
Title : get_link_names
189
Usage : my $string = $linkset->get_link_names;
190
Function : retrieve eutil-compatible link names
191
Returns : array of strings
193
Notes : Each LinkSet can hold multiple LinkInfo objects (each containing
194
a link name). Also, some LinkSets define a single link name. This
195
returns an array with all unique linknames globbed both sources, if
204
if ($self->{'_linkname'}) {
205
push @lns, $self->{'_linkname'};
206
$tmps{$self->{'_linkname'}}++;
208
push @lns, map {$_->get_link_name} $self->get_LinkInfo;
214
Title : get_link_name
215
Usage : my $string = $linkset->get_link_name;
216
Function : retrieve eutil-compatible link name
217
Returns : single link name
223
return ($_[0]->get_linknames)[0];
226
=head2 get_submitted_ids
228
Title : get_submitted_ids
229
Usage : my $string = $linkset->get_submitted_ids;
230
Function : retrieve original ID list
236
sub get_submitted_ids {
238
my $datatype = $self->datatype;
239
if ($datatype eq 'idcheck' || $datatype eq 'urllink') {
240
return $self->get_ids;
241
} elsif ($self->{'_submitted_ids'}) {
242
return @{$self->{'_submitted_ids'}};
251
Usage : if (my $linkset->has_scores) {...}
252
Function : returns TRUE if score data is present
260
return exists $self->{'_has_scores'} ? 1 : 0;
266
Usage : %scores = $linkset->get_scores;
267
Function : returns flattened list or hash ref containing ID => score pairs
268
Returns : hash or hash ref (based on list or scalar context)
275
# do we want to cache this or generate only when needed? Likely won't be
276
# called more than once...
277
return unless $self->has_scores;
278
my %scores = map {$_ => $self->{'_id'}->{$_}->[1]} keys %{$self->{'_id'}};
282
=head2 get_score_by_id
284
Title : get_score_by_id
285
Usage : $score = $linkset->get_score_by_id($id);
286
Function : returns the score for a particular primary ID
288
Args : [REQUIRED] Primary ID for the score lookup
292
sub get_score_by_id {
293
my ($self, $id) = @_;
294
($id && exists $self->{'_id'}->{$id}) ? return $self->{'_id'}->{$id}->[1] :
301
Usage : if ($linkset->has_linkout) {...}
302
Function : returns TRUE if the single ID present in this linkset has a linkout
305
Notes : this checks cmd=lcheck (boolean for a linkout) and also backchecks
306
cmd=acheck for databases with name 'LinkOut'
312
if (exists $self->{'_haslinkout'}) {
313
return $self->{'_haslinkout'} eq 'Y' ? 1 : 0;
315
return (grep {$_ eq 'LinkOut'} $self->get_databases) ? 1 : 0;
322
Usage : if ($linkset->has_neighbor) {...}
323
Function : returns TRUE if the single ID present in this linkset has a neighbor
327
Notes : this checks cmd=ncheck (boolean for a neighbor in same database); no
328
other checks performed at this time
334
if (exists $self->{'_hasneighbor'}) {
335
return $self->{'_hasneighbor'} eq 'Y' ? 1 : 0;
344
Usage : while (my $url = $linkset->next_UrlLink) {...}
345
Function : iterate through UrlLink objects
346
Returns : Bio::Tools::EUtilities::Link::UrlLink
353
unless ($self->{"_urllinks_it"}) {
354
my @ul = $self->get_UrlLinks;
355
$self->{"_urllinks_it"} = sub {return shift @ul}
357
$self->{'_urllinks_it'}->();
363
Usage : my @urls = $linkset->get_UrlLinks
364
Function : returns all UrlLink objects
365
Returns : list of Bio::Tools::EUtilities::Link::UrlLink
372
return ref $self->{'_urllinks'} ? @{ $self->{'_urllinks'} } : return;
377
Title : next_LinkInfo
378
Usage : while (my $info = $linkset->next_LinkInfo) {...}
379
Function : iterate through LinkInfo objects
380
Returns : Bio::Tools::EUtilities::Link::LinkInfo
387
unless ($self->{"_linkinfo_it"}) {
388
my @li = $self->get_LinkInfo;
389
$self->{"_linkinfo_it"} = sub {return shift @li}
391
$self->{'_linkinfo_it'}->();
397
Usage : my @links = $linkset->get_LinkInfo
398
Function : returns all LinkInfo objects
399
Returns : list of Bio::Tools::EUtilities::Link::LinkInfo
406
return ref $self->{'_linkinfo'} ? @{ $self->{'_linkinfo'} } : return ();
412
Usage : $info->rewind() # rewinds all (default)
413
$info->rewind('links') # rewinds only links
414
Function : 'rewinds' (resets) specified interators (all if no arg)
416
Args : [OPTIONAL] String:
417
'all' - all iterators (default)
418
'linkinfo' or 'linkinfos' - LinkInfo objects only
419
'urllinks' - UrlLink objects only
424
my %VALID_DATA = ('linkinfo' => 'linkinfo',
425
'linkinfos' => 'linkinfo',
426
'urllinks' => 'urllinks');
429
my ($self, $arg) = @_;
431
if (exists $VALID_DATA{$arg}) {
432
delete $self->{'_'.$arg.'_it'};
433
} elsif ($arg eq 'all') {
434
delete $self->{'_'.$_.'_it'} for values %VALID_DATA;
439
# private methods and handlers
443
'IdList' => \&_add_submitted_ids,
444
'Id' => \&_add_retrieved_ids,
445
'LinkInfo' => \&_add_linkinfo,
446
'Link' => \&_add_retrieved_ids,
447
'ObjUrl' => \&_add_objurls,
451
my ($self, $data) = @_;
452
for my $key (qw(IdList Link Id ObjUrl LinkInfo)) {
453
next if !exists $data->{$key};
454
my $handler = $DATA_HANDLER{$key};
455
$self->$handler($data);
456
delete $data->{$key};
459
if ($self->datatype eq 'idcheck' && exists $data->{content}) {
460
%{$self->{'_id'} } = ($data->{content} => [1]);
461
delete $data->{content}
463
map {$self->{'_'.lc $_} = $data->{$_}} keys %$data;
468
sub _add_submitted_ids {
469
my ($self, $data) = @_;
470
@{$self->{'_submitted_ids'}} = @{$data->{IdList}->{Id}} ;
473
sub _add_retrieved_ids {
474
my ($self, $data) = @_;
475
# map all IDs to deal with possible scores
476
# ID => {'count' = POSITION, 'score' => SCORE}
477
if (exists $data->{Link}) {
479
for my $link (@{$data->{Link}}) {
480
if (exists $link->{Score}) {
481
$self->{'_has_scores'}++;
482
$self->{'_id'}->{$link->{Id}->[0]} = [ $ct++,$link->{Score}];
484
$self->{'_id'}->{$link->{Id}->[0]} = [ $ct++ ];
488
elsif (exists $data->{Id}) { # urls
489
%{$self->{'_id'} } = ($data->{Id}->[0] => [1]);
494
my ($self, $data) = @_;
495
for my $urldata (@{$data->{ObjUrl}}) {
496
$urldata->{dbfrom} = $data->{DbFrom} if exists $data->{DbFrom};
497
my $obj = Bio::Tools::EUtilities::Link::UrlLink->new(-eutil => 'elink',
498
-datatype => 'urldata',
499
-verbose => $self->verbose
501
$obj->_add_data($urldata);
502
push @{$self->{'_urllinks'}}, $obj;
507
my ($self, $data) = @_;
508
for my $linkinfo (@{$data->{LinkInfo}}) {
509
$linkinfo->{dbfrom} = $data->{DbFrom} if exists $data->{DbFrom};
510
my $obj = Bio::Tools::EUtilities::Info::LinkInfo->new(-eutil => 'elink',
511
-datatype => 'linkinfo',
512
-verbose => $self->verbose
514
$obj->_add_data($linkinfo);
515
push @{$self->{'_linkinfo'}}, $obj;
522
Usage : $foo->to_string()
523
Function : converts current object to string
525
Args : (optional) simple data for text formatting
526
Note : Used generally for debugging and for various print methods
532
my $level = shift || 0;
533
my $pad = 20 - $level;
535
my %tags = (1 => ['get_databases' => 'DB'],
536
2 => ['get_ids' => 'ID'],
537
3 => ['get_link_names' => 'Link Names'],
538
5 => ['get_submitted_ids' => 'Submitted IDs'],
539
6 => ['has_scores' => 'Scores?'],
540
7 => ['has_linkout' => 'LinkOut?'],
541
8 => ['has_neighbor' => 'DB Neighbors?'],
542
9 => ['get_webenv' => 'WebEnv'],
543
10 => ['get_query_key' => 'Key'],
546
for my $tag (sort {$a <=> $b} keys %tags) {
547
my ($m, $nm) = ($tags{$tag}->[0], $tags{$tag}->[1]);
548
# using this awkward little construct to deal with both lists and scalars
549
my @content = grep {defined $_} $self->$m();
550
next unless @content;
551
$string .= sprintf("%-*s%-*s%s\n",
554
$self->_text_wrap(':',
556
join(', ',@content)));
558
while (my $li = $self->next_LinkInfo) {
559
$string .= $li->to_string(4);
561
while (my $ui = $self->next_UrlLink) {
562
$string .= $ui->to_string(4);
564
if ($self->has_scores) {
565
$string .= "Scores:\n";
566
my %scores = $self->get_scores;
567
$string .= sprintf("%-*s%-*s%s\n",
569
$pad - 4, 'ID', 'Score'
571
for my $id ($self->get_ids) {
572
$string .= sprintf("%-*s%-*s%s\n",
574
$pad - 4, $id, $scores{$id}