242
242
for my $type (@types) {
243
my ($primary_tag,$source_tag);
244
if (ref $type && $type->isa('Bio::DB::GFF::Typename')) {
245
$primary_tag = $type->method;
246
$source_tag = $type->source;
248
($primary_tag,$source_tag) = split ':',$type,2;
250
if (defined $source_tag) {
251
my $id = $db->{lc "$primary_tag:$source_tag"};
252
$result{$id}++ if defined $id;
254
@all_types = $self->types unless @all_types;
255
$result{$db->{$_}}++ foreach grep {/^$primary_tag:/} @all_types;
243
my ($primary_tag,$source_tag);
244
if (ref $type && $type->isa('Bio::DB::GFF::Typename')) {
245
$primary_tag = $type->method;
246
$source_tag = $type->source;
248
($primary_tag,$source_tag) = split ':',$type,2;
250
if (defined $source_tag) {
251
my $id = $db->{lc "$primary_tag:$source_tag"};
252
$result{$id}++ if defined $id;
254
@all_types = $self->types unless @all_types;
255
$result{$db->{$_}}++ foreach grep {/^$primary_tag:/} @all_types;
261
261
sub _update_location_index {
263
my ($obj,$id,$delete) = @_;
265
my $db = $self->index_db('locations')
266
or $self->throw("Couldn't find 'locations' index file");
268
my $seq_id = $obj->seq_id || '';
269
my $start = $obj->start || '';
270
my $end = $obj->end || '';
271
my $strand = $obj->strand;
272
my $bin_min = int $start/BINSIZE;
273
my $bin_max = int $end/BINSIZE;
275
my $typeid = $self->add_typeid($self->_obj_to_type($obj));
276
my $seq_no = $self->add_seqid($seq_id);
278
for (my $bin = $bin_min; $bin <= $bin_max; $bin++ ) {
279
my $key = $seq_no * MAX_SEQUENCES + $bin;
280
$self->update_or_delete($delete,$db,$key,pack("i5",$id,$start,$end,$strand,$typeid));
263
my ($obj,$id,$delete) = @_;
265
my $db = $self->index_db('locations')
266
or $self->throw("Couldn't find 'locations' index file");
268
my $seq_id = $obj->seq_id || '';
269
my $start = $obj->start || '';
270
my $end = $obj->end || '';
271
my $strand = $obj->strand;
272
my $bin_min = int $start/BINSIZE;
273
my $bin_max = int $end/BINSIZE;
275
my $typeid = $self->add_typeid($self->_obj_to_type($obj));
276
my $seq_no = $self->add_seqid($seq_id);
278
for (my $bin = $bin_min; $bin <= $bin_max; $bin++ ) {
279
my $key = $seq_no * MAX_SEQUENCES + $bin;
280
$self->update_or_delete($delete,$db,$key,pack("i5",$id,$start,$end,$strand,$typeid));
287
my ($seq_id,$start,$end,$strand,
288
$name,$class,$allow_aliases,
293
) = rearrange([['SEQID','SEQ_ID','REF'],'START',['STOP','END'],'STRAND',
294
'NAME','CLASS','ALIASES',
295
['TYPES','TYPE','PRIMARY_TAG'],
296
['ATTRIBUTES','ATTRIBUTE'],
301
my (@from,@where,@args,@group);
302
$range_type ||= 'overlaps';
305
unless (defined $name or defined $seq_id or defined $types or defined $attributes) {
306
my $is_indexed = $self->index_db('is_indexed');
307
@result = $is_indexed ? grep {$is_indexed->{$_}} keys %{$self->db}
308
: grep { !/^\./ }keys %{$self->db};
314
if (defined($name)) {
315
# hacky backward compatibility workaround
316
undef $class if $class && $class eq 'Sequence';
317
$name = "$class:$name" if defined $class && length $class > 0;
318
$result &&= $self->filter_by_name($name,$allow_aliases,\%found);
321
if (defined $seq_id) { # location with or without types
322
my $typelist = defined $types ? $self->_matching_types($types) : undef;
323
$result &&= $self->filter_by_type_and_location($seq_id,$start,$end,$strand,$range_type,
327
elsif (defined $types) { # types without location
328
$result &&= $self->filter_by_type($types,\%found);
331
if (defined $attributes) {
332
$result &&= $self->filter_by_attribute($attributes,\%found);
335
push @result,keys %found if $result;
336
return $iterator ? Bio::DB::SeqFeature::Store::berkeleydb::Iterator->new($self,\@result)
337
: map {$self->fetch($_)} @result;
287
my ($seq_id,$start,$end,$strand,
288
$name,$class,$allow_aliases,
293
) = rearrange([['SEQID','SEQ_ID','REF'],'START',['STOP','END'],'STRAND',
294
'NAME','CLASS','ALIASES',
295
['TYPES','TYPE','PRIMARY_TAG'],
296
['ATTRIBUTES','ATTRIBUTE'],
301
my (@from,@where,@args,@group);
302
$range_type ||= 'overlaps';
305
unless (defined $name or defined $seq_id or defined $types or defined $attributes) {
306
my $is_indexed = $self->index_db('is_indexed');
307
@result = $is_indexed ? grep {$is_indexed->{$_}} keys %{$self->db}
308
: grep { !/^\./ }keys %{$self->db};
314
if (defined($name)) {
315
# hacky backward compatibility workaround
316
undef $class if $class && $class eq 'Sequence';
317
$name = "$class:$name" if defined $class && length $class > 0;
318
$result &&= $self->filter_by_name($name,$allow_aliases,\%found);
321
if (defined $seq_id) { # location with or without types
322
my $typelist = defined $types ? $self->_matching_types($types) : undef;
323
$result &&= $self->filter_by_type_and_location(
324
$seq_id, $start, $end, $strand, $range_type, $typelist, \%found
328
elsif (defined $types) { # types without location
329
$result &&= $self->filter_by_type($types,\%found);
332
if (defined $attributes) {
333
$result &&= $self->filter_by_attribute($attributes,\%found);
336
push @result,keys %found if $result;
337
return $iterator ? Bio::DB::SeqFeature::Store::berkeleydb::Iterator->new($self,\@result)
338
: map {$self->fetch($_)} @result;
340
341
sub filter_by_type {
342
my ($types,$filter) = @_;
343
my @types = ref $types eq 'ARRAY' ? @$types : $types;
345
my $index = $self->index_db('types');
346
my $db = tied(%$index);
350
for my $type (@types) {
351
my ($primary_tag,$source_tag);
352
if (ref $type && $type->isa('Bio::DB::GFF::Typename')) {
353
$primary_tag = $type->method;
354
$source_tag = $type->source;
356
($primary_tag,$source_tag) = split ':',$type,2;
359
$primary_tag = quotemeta($primary_tag);
360
$source_tag = quotemeta($source_tag);
361
my $match = length $source_tag ? "^$primary_tag:$source_tag\$" : "^$primary_tag:";
362
my $key = lc "$primary_tag:$source_tag";
365
# If filter is already provided, then it is usually faster to
368
for my $id (keys %$filter) {
369
my $obj = $self->_fetch($id) or next;
370
push @results,$id if $obj->type =~ /$match/i;
376
my $types = $self->typeid_db;
377
my @typeids = map {$types->{$_}} grep {/$match/} keys %$types;
378
for my $t (@typeids) {
380
for (my $status = $db->seq($k,$value,R_CURSOR);
381
$status == 0 && $k == $t;
382
$status = $db->seq($k,$value,R_NEXT)) {
383
next if %$filter && !$filter->{$value}; # don't even bother
384
push @results,$value;
389
$self->update_filter($filter,\@results);
343
my ($types,$filter) = @_;
344
my @types = ref $types eq 'ARRAY' ? @$types : $types;
346
my $index = $self->index_db('types');
347
my $db = tied(%$index);
351
for my $type (@types) {
352
my ($primary_tag,$source_tag);
353
if (ref $type && $type->isa('Bio::DB::GFF::Typename')) {
354
$primary_tag = $type->method;
355
$source_tag = $type->source;
357
($primary_tag,$source_tag) = split ':',$type,2;
360
$primary_tag = quotemeta($primary_tag);
361
$source_tag = quotemeta($source_tag);
362
my $match = length $source_tag ? "^$primary_tag:$source_tag\$" : "^$primary_tag:";
363
my $key = lc "$primary_tag:$source_tag";
366
# If filter is already provided, then it is usually faster to
369
for my $id (keys %$filter) {
370
my $obj = $self->_fetch($id) or next;
371
push @results,$id if $obj->type =~ /$match/i;
377
my $types = $self->typeid_db;
378
my @typeids = map {$types->{$_}} grep {/$match/} keys %$types;
379
for my $t (@typeids) {
381
for (my $status = $db->seq($k,$value,R_CURSOR);
382
$status == 0 && $k == $t;
383
$status = $db->seq($k,$value,R_NEXT)) {
384
next if %$filter && !$filter->{$value}; # don't even bother
385
push @results,$value;
390
$self->update_filter($filter,\@results);
392
393
sub filter_by_type_and_location {
394
my ($seq_id,$start,$end,$strand,$range_type,$typelist,$filter) = @_;
397
my $index = $self->index_db('locations');
398
my $db = tied(%$index);
400
my $binstart = defined $start ? int $start/BINSIZE : 0;
401
my $binend = defined $end ? int $end/BINSIZE : MAX_SEQUENCES-1;
406
$start = MININT if !defined $start;
407
$end = MAXINT if !defined $end;
409
my $seq_no = $self->seqid_id($seq_id);
410
return unless defined $seq_no;
412
if ($range_type eq 'overlaps' or $range_type eq 'contains') {
413
my $keystart = $seq_no * MAX_SEQUENCES + $binstart;
414
my $keystop = $seq_no * MAX_SEQUENCES + $binend;
417
for (my $status = $db->seq($keystart,$value,R_CURSOR);
418
$status == 0 && $keystart <= $keystop;
419
$status = $db->seq($keystart,$value,R_NEXT)) {
420
my ($id,$fstart,$fend,$fstrand,$ftype) = unpack("i5",$value);
421
next if $seenit{$id}++;
422
next if $strand && $fstrand != $strand;
423
next if $typelist && !$typelist->{$ftype};
424
if ($range_type eq 'overlaps') {
425
next unless $fend >= $start && $fstart <= $end;
427
elsif ($range_type eq 'contains') {
428
next unless $fstart >= $start && $fend <= $end;
430
next if %$filter && !$filter->{$id}; # don't bother
435
# for contained in, we look for features originating and terminating outside the specified range
436
# this is incredibly inefficient, but fortunately the query is rare (?)
437
elsif ($range_type eq 'contained_in') {
438
my $keystart = $seq_no * MAX_SEQUENCES;
439
my $keystop = $seq_no * MAX_SEQUENCES + $binstart;
442
# do the left part of the range
443
for (my $status = $db->seq($keystart,$value,R_CURSOR);
444
$status == 0 && $keystart <= $keystop;
445
$status = $db->seq($keystart,$value,R_NEXT)) {
446
my ($id,$fstart,$fend,$fstrand,$ftype) = unpack("i5",$value);
447
next if $seenit{$id}++;
448
next if $strand && $fstrand != $strand;
449
next if $typelist && !$typelist->{$ftype};
450
next unless $fstart <= $start && $fend >= $end;
451
next if %$filter && !$filter->{$id}; # don't bother
455
# do the right part of the range
456
$keystart = $seq_no*MAX_SEQUENCES+$binend;
457
for (my $status = $db->seq($keystart,$value,R_CURSOR);
459
$status = $db->seq($keystart,$value,R_NEXT)) {
460
my ($id,$fstart,$fend,$fstrand,$ftype) = unpack("i5",$value);
461
next if $seenit{$id}++;
462
next if $strand && $fstrand != $strand;
463
next unless $fstart <= $start && $fend >= $end;
464
next if $typelist && !$typelist->{$ftype};
465
next if %$filter && !$filter->{$id}; # don't bother
471
$self->update_filter($filter,\@results);
395
my ($seq_id,$start,$end,$strand,$range_type,$typelist,$filter) = @_;
398
my $index = $self->index_db('locations');
399
my $db = tied(%$index);
401
my $binstart = defined $start ? int $start/BINSIZE : 0;
402
my $binend = defined $end ? int $end/BINSIZE : MAX_SEQUENCES-1;
407
$start = MININT if !defined $start;
408
$end = MAXINT if !defined $end;
410
my $seq_no = $self->seqid_id($seq_id);
411
return unless defined $seq_no;
413
if ($range_type eq 'overlaps' or $range_type eq 'contains') {
414
my $keystart = $seq_no * MAX_SEQUENCES + $binstart;
415
my $keystop = $seq_no * MAX_SEQUENCES + $binend;
418
for (my $status = $db->seq($keystart,$value,R_CURSOR);
419
$status == 0 && $keystart <= $keystop;
420
$status = $db->seq($keystart,$value,R_NEXT)) {
421
my ($id,$fstart,$fend,$fstrand,$ftype) = unpack("i5",$value);
422
next if $seenit{$id}++;
423
next if $strand && $fstrand != $strand;
424
next if $typelist && !$typelist->{$ftype};
425
if ($range_type eq 'overlaps') {
426
next unless $fend >= $start && $fstart <= $end;
428
elsif ($range_type eq 'contains') {
429
next unless $fstart >= $start && $fend <= $end;
431
next if %$filter && !$filter->{$id}; # don't bother
436
# for contained in, we look for features originating and terminating outside the specified range
437
# this is incredibly inefficient, but fortunately the query is rare (?)
438
elsif ($range_type eq 'contained_in') {
439
my $keystart = $seq_no * MAX_SEQUENCES;
440
my $keystop = $seq_no * MAX_SEQUENCES + $binstart;
443
# do the left part of the range
444
for (my $status = $db->seq($keystart,$value,R_CURSOR);
445
$status == 0 && $keystart <= $keystop;
446
$status = $db->seq($keystart,$value,R_NEXT)) {
447
my ($id,$fstart,$fend,$fstrand,$ftype) = unpack("i5",$value);
448
next if $seenit{$id}++;
449
next if $strand && $fstrand != $strand;
450
next if $typelist && !$typelist->{$ftype};
451
next unless $fstart <= $start && $fend >= $end;
452
next if %$filter && !$filter->{$id}; # don't bother
456
# do the right part of the range
457
$keystart = $seq_no*MAX_SEQUENCES+$binend;
458
for (my $status = $db->seq($keystart,$value,R_CURSOR);
460
$status = $db->seq($keystart,$value,R_NEXT)) {
461
my ($id,$fstart,$fend,$fstrand,$ftype) = unpack("i5",$value);
462
next if $seenit{$id}++;
463
next if $strand && $fstrand != $strand;
464
next unless $fstart <= $start && $fend >= $end;
465
next if $typelist && !$typelist->{$ftype};
466
next if %$filter && !$filter->{$id}; # don't bother
472
$self->update_filter($filter,\@results);
474
475
sub build_summary_statistics {