66
67
# Let the code begin...
68
69
package Bio::Location::Fuzzy;
72
use Bio::Location::FuzzyLocationI;
73
use Bio::Location::Atomic;
75
@ISA = qw(Bio::Location::Atomic Bio::Location::FuzzyLocationI );
78
use vars qw( %FUZZYCODES %FUZZYPOINTENCODE %FUZZYRANGEENCODE
79
@LOCATIONCODESBSANE );
81
@LOCATIONCODESBSANE = (undef, 'EXACT', 'WITHIN', 'BETWEEN',
84
%FUZZYCODES = ( 'EXACT' => '..', # Position is 'exact
72
use base qw(Bio::Location::Atomic Bio::Location::FuzzyLocationI);
74
our @LOCATIONCODESBSANE = (undef, 'EXACT', 'WITHIN', 'BETWEEN', 'UNCERTAIN',
77
our %FUZZYCODES = ( 'EXACT' => '..', # Position is 'exact
85
78
# Exact position is unknown, but is within the range specified, ((1.2)..100)
94
89
# The following regular expressions map to fuzzy location types. Every
95
90
# expression must match the complete encoded point string, and must
96
91
# contain two groups identifying min and max. Empty matches are automatic.
97
92
# converted to undef, except for 'EXACT', for which max is set to equal
100
'\>(\d+)(.{0})' => 'AFTER',
101
'\<(.{0})(\d+)' => 'BEFORE',
103
'(\d+)(.{0})\>' => 'AFTER',
104
'(.{0})(\d+)\<' => 'BEFORE',
105
'(\d+)\.(\d+)' => 'WITHIN',
106
'(\d+)\^(\d+)' => 'BETWEEN',
109
%FUZZYRANGEENCODE = ( '\.' => 'WITHIN',
95
our %FUZZYPOINTENCODE = (
96
'\>(\d+)(.{0})' => 'AFTER',
97
'\<(.{0})(\d+)' => 'BEFORE',
99
'\?(\d*)' => 'UNCERTAIN',
100
'(\d+)(.{0})\>' => 'AFTER',
101
'(.{0})(\d+)\<' => 'BEFORE',
102
'(\d+)\.(\d+)' => 'WITHIN',
103
'(\d+)\^(\d+)' => 'BETWEEN',
106
our %FUZZYRANGEENCODE = ( '\.' => 'WITHIN',
108
'\^' => 'IN-BETWEEN' );
121
116
Args : -start => value for start (initialize by superclass)
122
117
-end => value for end (initialize by superclass)
123
118
-strand => value for strand (initialize by superclass)
124
-location_type => either ('EXACT', 'WITHIN', 'BETWEEN') OR
119
-location_type => either ('EXACT','WITHIN','IN-BETWEEN',
120
'UNCERTAIN') OR ( 1,2,3,4)
126
121
-start_ext=> extension for start - defaults to 0,
127
122
-start_fuz= fuzzy code for start can be
128
( 'EXACT', 'WITHIN', 'BETWEEN', 'BEFORE', 'AFTER') OR
123
('EXACT','WITHIN','BETWEEN','BEFORE','AFTER',
129
125
a value 1 - 5 corresponding to index+1 above
130
126
-end_ext=> extension for end - defaults to 0,
131
127
-end_fuz= fuzzy code for end can be
132
( 'EXACT', 'WITHIN', 'BETWEEN', 'BEFORE', 'AFTER') OR
128
('EXACT','WITHIN','BETWEEN','BEFORE','AFTER',
133
130
a value 1 - 5 corresponding to index+1 above
164
161
sub location_type {
165
162
my ($self,$value) = @_;
166
163
if( defined $value || ! defined $self->{'_location_type'} ) {
167
$value = 'EXACT' unless defined $value;
168
if(! defined $FUZZYCODES{$value}) {
170
if( $value =~ /\.\./ ) {
172
} elsif( $value =~ /^\.$/ ) {
174
} elsif( $value =~ /\^/ ) {
178
$self->throw("Use Bio::Location::Simple for IN-BETWEEN locations [". $self->start. "] and [". $self->end. "]")
179
if defined $self->start && defined $self->end && ($self->end - 1 == $self->start);
182
} elsif( $value ne 'EXACT' && $value ne 'WITHIN' &&
183
$value ne 'BETWEEN' ) {
184
$self->throw("Did not specify a valid location type");
187
$self->{'_location_type'} = $value;
164
$value = 'EXACT' unless defined $value;
165
if(! defined $FUZZYCODES{$value} ) {
167
if( $value =~ /\.\./ ) {
169
} elsif( $value =~ /^\.$/ ) {
171
} elsif( $value =~ /\^/ ) {
172
$value = 'IN-BETWEEN';
173
$self->throw("Use Bio::Location::Simple for IN-BETWEEN locations [".
174
$self->start. "] and [". $self->end. "]")
175
if defined $self->start && defined $self->end &&
176
($self->end - 1 == $self->start);
177
} elsif( $value =~ /\?/ ) {
178
$value = 'UNCERTAIN';
179
} elsif( $value ne 'EXACT' && $value ne 'WITHIN' &&
180
$value ne 'IN-BETWEEN' ) {
181
$self->throw("Did not specify a valid location type");
184
$self->{'_location_type'} = $value;
189
186
return $self->{'_location_type'};
450
456
sub to_FTstring {
452
458
my (%vals) = ( 'start' => $self->start,
453
'min_start' => $self->min_start,
454
'max_start' => $self->max_start,
455
'start_code' => $self->start_pos_type,
457
'min_end' => $self->min_end,
458
'max_end' => $self->max_end,
459
'end_code' => $self->end_pos_type );
459
'min_start' => $self->min_start,
460
'max_start' => $self->max_start,
461
'start_code' => $self->start_pos_type,
463
'min_end' => $self->min_end,
464
'max_end' => $self->max_end,
465
'end_code' => $self->end_pos_type );
461
467
my (%strs) = ( 'start' => '',
463
469
my ($delimiter) = $FUZZYCODES{$self->location_type};
470
$delimiter = $FUZZYCODES{'EXACT'} if ($self->location_type eq 'UNCERTAIN');
472
my $policy = ref($self->coordinate_policy);
464
474
# I'm lazy, lets do this in a loop since behaviour will be the same for
476
# The CoordinatePolicy now dictates start/end data here (bug 992) - cjf
466
477
foreach my $point ( qw(start end) ) {
467
if( $vals{$point."_code"} ne 'EXACT' ) {
469
if( (!defined $vals{"min_$point"} ||
470
!defined $vals{"max_$point"})
471
&& ( $vals{$point."_code"} eq 'WITHIN' ||
472
$vals{$point."_code"} eq 'BETWEEN')
474
$vals{"min_$point"} = '' unless defined $vals{"min_$point"};
475
$vals{"max_$point"} = '' unless defined $vals{"max_$point"};
477
$self->warn("Fuzzy codes for start are in a strange state, (".
478
join(",", ($vals{"min_$point"},
480
$vals{$point."_code"})). ")");
483
if( defined $vals{$point."_code"} &&
484
($vals{$point."_code"} eq 'BEFORE' ||
485
$vals{$point."_code"} eq 'AFTER')
487
$strs{$point} .= $FUZZYCODES{$vals{$point."_code"}};
489
if( defined $vals{"min_$point"} ) {
490
$strs{$point} .= $vals{"min_$point"};
492
if( defined $vals{$point."_code"} &&
493
($vals{$point."_code"} eq 'WITHIN' ||
494
$vals{$point."_code"} eq 'BETWEEN')
496
$strs{$point} .= $FUZZYCODES{$vals{$point."_code"}};
498
if( defined $vals{"max_$point"} ) {
499
$strs{$point} .= $vals{"max_$point"};
501
if(($vals{$point."_code"} eq 'WITHIN') ||
502
($vals{$point."_code"} eq 'BETWEEN')) {
503
$strs{$point} = "(".$strs{$point}.")";
506
$strs{$point} = $vals{$point};
478
if( ($vals{$point."_code"} ne 'EXACT') &&
479
($vals{$point."_code"} ne 'UNCERTAIN') ) {
481
# must have max and min defined to use 'WITHIN', 'BETWEEN'
482
if ((!defined $vals{"min_$point"} ||
483
!defined $vals{"max_$point"}) &&
484
( $vals{$point."_code"} eq 'WITHIN' ||
485
$vals{$point."_code"} eq 'BETWEEN'))
487
$vals{"min_$point"} = '' unless defined $vals{"min_$point"};
488
$vals{"max_$point"} = '' unless defined $vals{"max_$point"};
490
$self->warn("Fuzzy codes for start are in a strange state, (".
491
join(",", ($vals{"min_$point"},
493
$vals{$point."_code"})). ")");
497
if (defined $vals{$point."_code"} &&
498
($vals{$point."_code"} eq 'BEFORE' ||
499
$vals{$point."_code"} eq 'AFTER'))
501
$strs{$point} .= $FUZZYCODES{$vals{$point."_code"}};
502
$strs{$point} .= $vals{"$point"};
505
if( defined $vals{$point."_code"} &&
506
($vals{$point."_code"} eq 'WITHIN' ||
507
$vals{$point."_code"} eq 'BETWEEN'))
509
# Expect odd results with anything but WidestCoordPolicy for now
510
$strs{$point} .= ($point eq 'start') ?
512
$FUZZYCODES{$vals{$point."_code"}}.
515
$vals{'min_'.$point}.
516
$FUZZYCODES{$vals{$point."_code"}}.
518
$strs{$point} = "(".$strs{$point}.")";
521
} elsif ($vals{$point."_code"} eq 'UNCERTAIN') {
522
$strs{$point} = $FUZZYCODES{$vals{$point."_code"}};
523
$strs{$point} .= $vals{$point} if defined $vals{$point};
525
$strs{$point} = $vals{$point};
510
529
my $str = $strs{'start'} . $delimiter . $strs{'end'};
511
530
if($self->is_remote() && $self->seq_id()) {
512
$str = $self->seq_id() . ":" . $str;
531
$str = $self->seq_id() . ":" . $str;
514
533
if( defined $self->strand &&
515
$self->strand == -1 ) {
516
$str = "complement(" . $str . ")";
534
$self->strand == -1 &&
535
$self->location_type() ne "UNCERTAIN") {
536
$str = "complement(" . $str . ")";
517
537
} elsif($self->location_type() eq "WITHIN") {
540
560
# strip off leading and trailing space
541
561
$string =~ s/^\s*(\S+)\s*/$1/;
542
562
foreach my $pattern ( keys %FUZZYPOINTENCODE ) {
543
if( $string =~ /^$pattern$/ ) {
544
my ($min,$max) = ($1,$2);
545
if($FUZZYPOINTENCODE{$pattern} eq 'EXACT') {
548
$max = undef if(length($max) == 0);
549
$min = undef if(length($min) == 0);
551
return ($FUZZYPOINTENCODE{$pattern},$min,$max);
563
if( $string =~ /^$pattern$/ ) {
564
my ($min,$max) = ($1,$2) unless (($1 eq '') && (!defined $2));
565
if( ($FUZZYPOINTENCODE{$pattern} eq 'EXACT') ||
566
($FUZZYPOINTENCODE{$pattern} eq 'UNCERTAIN')
570
$max = undef if((defined $max) && (length($max) == 0));
571
$min = undef if((defined $min) && (length($min) == 0));
573
return ($FUZZYPOINTENCODE{$pattern},$min,$max);
554
576
if( $self->verbose >= 1 ) {
555
$self->warn("could not find a valid fuzzy encoding for $string");
577
$self->warn("could not find a valid fuzzy encoding for $string");