114
116
=head2 Annotations
116
118
LANL DB annotations have been organized into a number of natural
117
groupings, tagged C<Geo>, C<Patient>, C<Virus>, and <StdMap>. After a
119
groupings, tagged C<Geo>, C<Patient>, C<Virus>, and C<StdMap>. After a
118
120
successful query, each id is associated with a tree of
119
121
L<Bio::Annotation::SimpleValue> objects. These can be accessed with
120
methods C<get_value()> and C<put_value()> described in APPENDIX.
122
methods C<get_value> and C<put_value> described in APPENDIX.
122
124
=head2 Delayed/partial query runs
161
163
bioperl-l@bioperl.org - General discussion
162
164
http://bioperl.org/wiki/Mailing_lists - About the mailing lists
168
Please direct usage questions or support issues to the mailing list:
170
I<bioperl-l@bioperl.org>
172
rather than to the module maintainer directly. Many experienced and
173
reponsive experts will be able look at the problem and quickly
174
address it. Please include a thorough description of the problem
175
with code and data examples if at all possible.
164
177
=head2 Reporting Bugs
166
179
Report bugs to the Bioperl bug tracking system to help us keep track
510
525
sub add_annotations_for_id{
511
526
my $self = shift;
512
527
my ($id, $ac) = @_;
528
$id = "" unless defined $id; # avoid warnings
513
529
$ac = new Bio::Annotation::Collection unless defined $ac;
514
530
$self->throw(-class=>'Bio::Root::BadParameter'
515
531
-text=>'Bio::Annotation::Collection required at arg 2',
623
Usage : $ac->get_keys($tagname_level_1, $tagname_level_2,...)
624
Function: Get an array of tagnames underneath the named tag nodes
625
Example : # prints the values of the members of Category 1...
626
print map { $ac->get_value($_) } $ac->get_keys('Category 1') ;
627
Returns : array of tagnames or empty list if the arguments represent a leaf
628
Args : [array of] tagname[s]
604
632
=head1 GenBank accession manipulation methods
606
634
=head2 get_accessions
745
773
return $self->{'_session_id'} = shift if @_;
746
774
return $self->{'_session_id'};
779
Usage : $obj->_run_level($newval)
780
Function: returns the level at which the query has so far been run
782
Returns : value of _run_level (a scalar)
783
Args : on set, new value (a scalar or undef, optional)
790
return $self->{'_RUN_LEVEL'} = shift if @_;
791
return $self->{'_RUN_LEVEL'};
749
794
=head2 _run_option
1252
1298
my $seqs_found_re = qr{Displaying$tags_re*(?:\s*[0-9-]*\s*)*$tags_re*of$tags_re*\s*([0-9]+)$tags_re*sequences found};
1253
1299
my $no_seqs_found_re = qr{Sorry.*no sequences found};
1254
1300
my $too_many_re = qr{too many records: $tags_re*([0-9]+)};
1301
my $sys_error_re = qr{[Ss]ystem error};
1302
my $sys_error_extract_re = qr{${tags_re}error:.*?<td[^>]+>${tags_re}(.*?)<br>};
1255
1303
# find something like:
1256
1304
# <strong>tables without join:</strong><br>SequenceAccessions<br>
1257
1305
my $tbl_no_join_re = qr{tables without join}i;
1287
1335
# squish fieldnames into hash keys
1288
1336
my %q = @query;
1289
1337
@interface = grep {defined} map {my ($tbl,$col) = /^(.*)\.(.*)$/} keys %q;
1338
my $err_val = ""; # to contain informative (ha!) value if error is parsed
1290
1340
eval { # encapsulate communication errors here, defer biothrows...
1292
1342
#mark the useragent should be setable from outside (so we can modify timeouts, etc)
1393
1450
my $self = shift;
1395
1452
my ($seqGet) = (@_);
1396
my (@data, @cols, %antbl, %antype,%anxlt, @ankeys );
1453
my (@data, @cols, %antbl, %antype);
1397
1454
my $numseq = 0;
1398
1455
my ($schema, @retseqs, %rec, $ac);
1400
'country' => 'sample_country',
1401
'coreceptor' => 'second_receptor',
1402
'patient health' => 'health_status',
1403
'year' => 'sample_year'
1406
1456
$schema = $self->_schema;
1408
1458
$self->_lanl_response ||
1412
1462
foreach my $rsp (@{$self->_lanl_response}) {
1413
1463
@data = split(/\r|\n/, $rsp->content);
1414
$numseq += ( shift(@data) =~ /Number.*:\s([0-9]+)/ )[0];
1415
@cols = split(/\t/, shift @data);
1467
} while ($l !~ /Number/);
1468
$numseq += ( $l =~ /Number.*:\s([0-9]+)/ )[0];
1469
@cols = split(/\t/, shift(@data));
1417
1470
# mappings from column headings to annotation keys
1418
1471
# squish into hash keys
1419
1472
my %q = @{ shift @{$self->_lanl_query} };
1420
1473
%antbl = $schema->ankh(keys %q);
1421
foreach (values %antbl) {
1423
my $k = $_->{ankey};
1426
$_->{ankey} = $k; #replace with normalized version
1427
$antype{$k} = $_->{antype};
1430
foreach (@cols) { #these are the data column headers
1434
### conversion kludge for specials
1435
### (i.e.,column headers that do not match the
1436
### true field names)
1437
$c = $specials{$c} if (grep /$c/, keys %specials);
1440
### following line grep: looks for a match of the
1441
### column name at the end of the true field names to
1442
### make the translation...
1443
### only captures the first match.
1444
my ($match_fld) = grep (/$c$/i, keys %antbl);
1445
$anxlt{$_} = $antbl{$match_fld}->{ankey} if $match_fld;
1474
# get the category for each annotation
1475
map { $antype{ $_->{ankey} } = $_->{antype} } values %antbl;
1476
# normalize column headers
1477
map { tr/ /_/; $_ = lc; } @cols;
1447
1478
foreach (@data) {
1448
1479
@rec{@cols} = split /\t/;
1449
my $id = $rec{'SE_id'};
1480
my $id = $rec{'se_id'};
1451
1481
$self->add_id($id);
1452
1482
$ac = new Bio::Annotation::Collection();
1454
1483
#create annotations
1455
# need to handle reference, comment, dblink annots
1456
1484
foreach (@cols) {
1457
#accession should be added in here as a matter of course
1458
my $k = $anxlt{$_}; # annot key
1460
my $t = $antype{$k}; # annot type
1486
my $t = $antype{$_} || "Unclassified";
1461
1487
my $d = $rec{$_}; # the data
1462
$ac->put_value(-KEYS=>[$t, $k], -VALUE=>$d) if $k;
1488
$ac->put_value(-KEYS=>[$t, $_], -VALUE=>$d);
1464
1490
$self->add_annotations_for_id($id, $ac);