190
191
my $self = shift;
191
192
my $indexed = shift;
192
my $data = $self->data;
194
my $data = $self->data;
195
my $primary_id = $obj->primary_id;
196
$primary_id = 1 + @{$data} unless $primary_id; # primary id of 0 causes a downstream bug
197
$self->data->[$primary_id] = $obj;
196
for my $obj (@objs) {
197
# Add unique ID to feature if needed
198
my $primary_id = $self->_autoid($obj);
199
# Store feature (overwriting any existing feature with the same primary ID
200
# as required by Bio::DB::SF::Store)
201
$data->{$primary_id} = $obj;
203
$self->{_index}{ids}{$primary_id} = undef;
204
$self->_update_indexes($obj);
213
# If a feature has no ID, assign it a unique ID
214
my ($self, $obj) = @_;
215
my $data = $self->data;
216
my $primary_id = $obj->primary_id;
217
if (not defined $primary_id) {
219
$primary_id = 1 + scalar keys %{$data};
220
while (exists $data->{$primary_id}) {
198
223
$obj->primary_id($primary_id);
199
$self->{_index}{ids}{$primary_id} = undef if $indexed;
200
$self->_update_indexes($obj) if $indexed;
230
my ($self, $id) = @_;
231
if (exists $self->{_index}{ids}{$id}) {
233
$self->_update_indexes( $self->fetch($id), 1 );
234
delete $self->{_index}{ids}{$id};
236
delete $self->data->{$id};
209
my $data = $self->data;
241
my ($self, $id) = @_;
242
return $self->data->{$id};
213
245
sub _add_SeqFeature {
217
my $parent_id = (ref $parent ? $parent->primary_id : $parent);
218
defined $parent_id or $self->throw("$parent should have a primary_id");
246
my ($self, $parent, @children) = @_;
248
my $parent_id = ref $parent ? $parent->primary_id : $parent;
249
defined $parent_id or $self->throw("Parent $parent should have a primary ID");
219
250
for my $child (@children) {
220
251
my $child_id = ref $child ? $child->primary_id : $child;
221
defined $child_id or $self->throw("no primary ID known for $child");
252
defined $child_id or $self->throw("Child $child should have a primary ID");
222
253
$self->{_children}{$parent_id}{$child_id}++;
226
259
sub _fetch_SeqFeatures {
260
my ($self, $parent, @types) = @_;
230
261
my $parent_id = $parent->primary_id;
231
defined $parent_id or $self->throw("$parent should have a primary_id");
262
defined $parent_id or $self->throw("Parent $parent should have a primary ID");
232
263
my @children_ids = keys %{$self->{_children}{$parent_id}};
233
264
my @children = map {$self->fetch($_)} @children_ids;
236
my $regexp = join '|',map {quotemeta($_)} $self->find_types(@types);
237
return grep {($_->primary_tag.':'.$_->source_tag) =~ /^$regexp$/i} @children;
268
for my $c (@children) {
269
push @{$$data{$c->primary_tag}{$c->source_tag||''}}, $c;
272
for my $type (@types) {
273
$type .= ':' if (not $type =~ m/:/);
274
my ($primary_tag, undef, $source_tag) = ($type =~ m/^(.*?)(:(.*?))$/);
276
if ($source_tag eq '') {
277
for my $source (keys %{$$data{$primary_tag}}) {
278
if (exists $$data{$primary_tag}{$source_tag}) {
279
push @children, @{$$data{$primary_tag}{$source_tag}};
283
if (exists $$data{$primary_tag}{$source_tag}) {
284
push @children, @{$$data{$primary_tag}{$source_tag}};
243
293
sub _update_indexes {
246
defined (my $id = $obj->primary_id) or return;
247
$self->_update_name_index($obj,$id);
248
$self->_update_type_index($obj,$id);
249
$self->_update_location_index($obj,$id);
250
$self->_update_attribute_index($obj,$id);
294
my ($self, $obj, $del) = @_;
295
defined (my $id = $obj->primary_id) or return;
297
$self->_update_name_index($obj,$id, $del);
298
$self->_update_type_index($obj,$id, $del);
299
$self->_update_location_index($obj, $id, $del);
300
$self->_update_attribute_index($obj,$id, $del);
253
303
sub _update_name_index {
256
my ($names,$aliases) = $self->feature_names($obj);
304
my ($self, $obj, $id, $del) = @_;
305
my ($names, $aliases) = $self->feature_names($obj);
257
306
foreach (@$names) {
258
$self->{_index}{name}{lc $_}{$id} = 1;
308
$self->{_index}{name}{lc $_}{$id} = 1;
310
delete $self->{_index}{name}{lc $_}{$id};
311
if (scalar keys %{ $self->{_index}{name}{lc $_} } == 0) {
312
delete $self->{_index}{name}{lc $_};
260
316
foreach (@$aliases) {
261
$self->{_index}{name}{lc $_}{$id} ||= 2;
318
$self->{_index}{name}{lc $_}{$id} ||= 2;
320
delete $self->{_index}{name}{lc $_}{$id};
321
if (scalar keys %{ $self->{_index}{name}{lc $_} } == 0) {
322
delete $self->{_index}{name}{lc $_};
265
328
sub _update_type_index {
269
my $primary_tag = $obj->primary_tag;
270
my $source_tag = $obj->source_tag || '';
271
return unless defined $primary_tag;
273
$primary_tag .= ":$source_tag";
274
$self->{_index}{type}{lc $primary_tag}{$id} = undef;
329
my ($self, $obj, $id, $del) = @_;
330
my $primary_tag = lc($obj->primary_tag) || return;
331
my $source_tag = lc($obj->source_tag || '');
333
$self->{_index}{type}{$primary_tag}{$source_tag}{$id} = undef;
335
delete $self->{_index}{type}{$primary_tag}{$source_tag}{$id};
336
if ( scalar keys %{$self->{_index}{type}{$primary_tag}{$source_tag}} == 0 ) {
337
delete $self->{_index}{type}{$primary_tag}{$source_tag};
338
if (scalar keys %{$self->{_index}{type}{$primary_tag}} == 0 ) {
339
delete $self->{_index}{type}{$primary_tag};
277
345
sub _update_location_index {
346
my ($self, $obj, $id, $del) = @_;
281
347
my $seq_id = $obj->seq_id || '';
282
348
my $start = $obj->start || 0;
283
349
my $end = $obj->end || 0;
284
350
my $strand = $obj->strand;
285
351
my $bin_min = int $start/BINSIZE;
286
352
my $bin_max = int $end/BINSIZE;
288
353
for (my $bin = $bin_min; $bin <= $bin_max; $bin++ ) {
289
$self->{_index}{location}{lc $seq_id}{$bin}{$id} = undef;
355
$self->{_index}{location}{lc $seq_id}{$bin}{$id} = undef;
357
delete $self->{_index}{location}{lc $seq_id}{$bin}{$id};
358
if (scalar keys %{$self->{_index}{location}{lc $seq_id}{$bin}{$id}} == 0) {
359
delete $self->{_index}{location}{lc $seq_id}{$bin}{$id};
361
if (scalar keys %{$self->{_index}{location}{lc $seq_id}{$bin}} == 0) {
362
delete $self->{_index}{location}{lc $seq_id}{$bin};
364
if (scalar keys %{$self->{_index}{location}{lc $seq_id}} == 0) {
365
delete $self->{_index}{location}{lc $seq_id};
294
371
sub _update_attribute_index {
372
my ($self, $obj, $id, $del) = @_;
298
373
for my $tag ($obj->get_all_tags) {
299
374
for my $value ($obj->get_tag_values($tag)) {
300
$self->{_index}{attribute}{lc $tag}{lc $value}{$id} = undef;
376
$self->{_index}{attribute}{lc $tag}{lc $value}{$id} = undef;
378
delete $self->{_index}{attribute}{lc $tag}{lc $value}{$id};
379
if ( scalar keys %{$self->{_index}{attribute}{lc $tag}{lc $value}} == 0) {
380
delete $self->{_index}{attribute}{lc $tag}{lc $value};
382
if ( scalar keys %{$self->{_index}{attribute}{lc $tag}} == 0) {
383
delete $self->{_index}{attribute}{lc $tag};
385
if ( scalar keys %{$self->{_index}{attribute}} == 0) {
386
delete $self->{_index}{attribute};
357
445
sub filter_by_type {
359
my ($types,$filter) = @_;
360
my @types = ref $types eq 'ARRAY' ? @$types : $types;
362
my $index = $self->{_index}{type};
364
my @types_found = $self->find_types(@types);
446
my ($self, $types_req, $filter) = @_;
447
my @types_req = ref $types_req eq 'ARRAY' ? @$types_req : $types_req;
449
my $types = $self->{_index}{type};
450
my @types_found = $self->find_types(\@types_req);
367
for my $type (@types_found) {
368
next unless exists $index->{$type};
369
push @results,keys %{$index->{$type}};
453
for my $type_found (@types_found) {
454
my ($primary_tag, undef, $source_tag) = ($type_found =~ m/^(.*?)(:(.*?))$/);
455
next unless exists $types->{$primary_tag}{$source_tag};
456
push @results, keys %{$types->{$primary_tag}{$source_tag}};
372
459
$self->update_filter($filter,\@results);
463
my ($self, $types_req) = @_;
380
my $index = $self->{_index}{type};
382
for my $type (@types) {
384
my ($primary_tag,$source_tag);
385
if (ref $type && $type->isa('Bio::DB::GFF::Typename')) {
386
$primary_tag = $type->method;
387
$source_tag = $type->source;
389
($primary_tag,$source_tag) = split ':',$type,2;
391
push @types_found,defined $source_tag ? lc "$primary_tag:$source_tag"
392
: grep {/^$primary_tag:/i} keys %{$index};
466
my $types = $self->{_index}{type};
468
for my $type_req (@$types_req) {
470
# Type is the primary tag and an optional source tag
471
my ($primary_tag, $source_tag);
472
if (ref $type_req && $type_req->isa('Bio::DB::GFF::Typename')) {
473
$primary_tag = $type_req->method;
474
$source_tag = $type_req->source;
476
($primary_tag, undef, $source_tag) = ($type_req =~ m/^(.*?)(:(.*))?$/);
478
($primary_tag, $source_tag) = (lc $primary_tag, lc($source_tag || ''));
480
next if not exists $$types{$primary_tag};
482
if ($source_tag eq '') {
483
# Match all sources for this primary_tag
484
push @types_found, map {"$primary_tag:$_"} (keys %{ $$types{$primary_tag} });
486
# Match only the requested source
487
push @types_found, "$primary_tag:$source_tag";
394
491
return @types_found;
399
return keys %{$self->{_index}{attribute}};
496
return keys %{$self->{_index}{attribute}};
402
499
sub filter_by_attribute {
404
my ($attributes,$filter) = @_;
500
my ($self, $attributes, $filter) = @_;
406
502
my $index = $self->{_index}{attribute};
613
706
return $self->{fasta_fh} = IO::File->new($self->{fasta_file},">");
713
my ($seq_name,$start,$end,$types,$bins) =
714
rearrange([['SEQID','SEQ_ID','REF'],'START',['STOP','END'],
715
['TYPES','TYPE','PRIMARY_TAG'],'BINS'],@_);
717
my @features = $self->_features(-seq_id=> $seq_name,
722
my $binsize = ($end-$start+1)/$bins;
724
my @coverage_array = (0) x $bins;
726
for my $f (@features) {
727
$report_tag ||= $f->primary_tag;
730
my $start_bin = int(($fs-$start)/$binsize);
731
my $end_bin = int(($fe-$start)/$binsize);
732
$start_bin = 0 if $start_bin < 0;
733
$end_bin = $bins-1 if $end_bin >= $bins;
734
$coverage_array[$_]++ for ($start_bin..$end_bin);
736
return wantarray ? (\@coverage_array,$report_tag) : \@coverage_array;
742
if (my $fa = $self->{fasta_db}) {
743
if (my @s = eval {$fa->ids}) {
748
my $l = $self->{_index}{location} or return;
616
752
package Bio::DB::SeqFeature::Store::memory::Iterator;
755
my ($class, $store, $ids) = @_;
622
756
return bless {store => $store,
623
757
ids => $ids},ref($class) || $class;