1
package Circos::Expression;
7
Circos::Expression - expression and text parsing routines for Geometry in Circos
11
This module is not meant to be used directly.
15
Circos is an application for the generation of publication-quality,
16
circularly composited renditions of genomic data and related
19
Circos is particularly suited for visualizing alignments, conservation
20
and intra and inter-chromosomal relationships. However, Circos can be
21
used to plot any kind of 2D data in a circular layout - its use is not
22
limited to genomics. Circos' use of lines to relate position pairs
23
(ribbons add a thickness parameter to each end) is effective to
24
display relationships between objects or positions on one or more
27
All documentation is in the form of tutorials at L<http://www.circos.ca>.
31
# -------------------------------------------------------------------
40
use Carp qw( carp confess croak );
42
use Params::Validate qw(:all);
44
use Math::VecStat qw(average);
45
use List::Util qw(min max);
46
use Text::Balanced qw(extract_bracketed);
48
use lib "$FindBin::RealBin";
49
use lib "$FindBin::RealBin/../lib";
50
use lib "$FindBin::RealBin/lib";
52
use Circos::Configuration;
53
use Circos::Constants;
60
for my $f (qw(format_condition)) {
64
# -------------------------------------------------------------------
65
sub format_condition {
67
# apply suffixes kb, Mb, Gb (case-insensitive) trailing any numbers
68
# and apply appropriate multiplier to the number
70
my $condition = shift;
71
$condition =~ s/([\d\.]+)kb/sprintf("%d",$1*1e3)/eig;
72
$condition =~ s/([\d\.]+)Mb/sprintf("%d",$1*1e6)/eig;
73
$condition =~ s/([\d\.]+)Gb/sprintf("%d",$1*1e9)/eig;
74
$condition =~ s/(\d+)bp/$1/ig;
78
# -------------------------------------------------------------------
80
my $expr = parse_expression(@_);
81
my $eval = eval format_condition($expr);
82
fatal_error("rules","parse_error",$expr,$@) if $@;
83
printdebug_group("rule","expression","[$expr]","eval",$eval);
87
# -------------------------------------------------------------------
88
sub parse_expression {
90
# var(VAR) refers to variable VAR in the point's data structure
94
# var(CHR) var(START) var(END)
96
# When the variable name is suffixed with a number, this number
97
# indexes the points coordinate. For links, a point has two
100
# var(CHR1) var(CHR2)
102
# If a point has two coordinates and the non-suffixed version is
103
# used, then an error is returned unless the value is the same
106
# Dynamically generated variables are
113
my ( $datum, $expr, $param_path ) = @_;
115
printdebug_group("rule","eval expression",$expr);
117
return 1 if true_or_yes($expr);
118
return 0 if false_or_no($expr);
120
my $expr_orig = $expr;
121
my $num_coord = @{$datum->{data}};
123
# (.+?) replaced by (\w+)
124
# parse _field_ and var(field)
125
my $delim_rx = qr/(_(\w+)_)/;
126
my $var_rx = qr/(var\((\w+)\))/;
127
while ( $expr =~ /$var_rx/i || $expr =~ /$delim_rx/i ) {
128
my ($template,$var) = ($1,lc $2);
129
my ($varroot,$varnum);
130
if ($var =~ /^(.+?)(\d+)$/ ) {
131
($varroot,$varnum) = ($1,$2);
133
($varroot,$varnum) = ($var,undef);
135
my $value = fetch_variable($datum,$expr,$varroot,$varnum,$param_path);
136
replace_string( \$expr, $template, $value );
139
# parse functions f(var)
140
for my $f (qw(on between fromto tofrom from to )) {
141
# for perl 5.10 using recursive rx
142
# my $parens = qr/(\((?:[^()]++|(?-1))*+\))/;
143
# no longer using this, to make the code compatible with 5.8
144
# while( $expr =~ /($f$parens)/ ) {
146
while(my ($template,$arg) = extract_balanced($expr,$f,"(",")")) {
147
$template = $f . $template;
149
my ($arg1) = split(",",$arg);
150
fatal_error("rule","fn_wrong_arg",$f,$expr_orig,1) if ! defined $arg1;
151
#printinfo($template,$arg_nested,$arg,$arg1);
152
my $result = grep($_ =~ /^$arg1$/, map {$_->{chr}} @{$datum->{data}});
153
replace_string( \$expr, $template, $result);
154
} elsif ($f eq "between") {
155
my ($arg1,$arg2) = split(",",$arg);
156
fatal_error("rule","fn_wrong_arg",$f,$expr_orig,2) if ! defined $arg1 || ! defined $arg2;
157
fatal_error("rule","fn_need_2_coord",$f,$expr_orig,$arg1,$arg2) if $num_coord != 2;
159
($datum->{data}[0]{chr} =~ /^$arg1$/i && $datum->{data}[1]{chr} =~ /^$arg2$/i)
161
($datum->{data}[0]{chr} =~ /^$arg2$/i && $datum->{data}[1]{chr} =~ /^$arg1$/i);
162
replace_string( \$expr, $template, $result || 0);
163
} elsif ($f eq "fromto") {
164
my ($arg1,$arg2) = split(",",$arg);
165
fatal_error("rule","fn_wrong_arg",$f,$expr_orig,2) if ! defined $arg1 || ! defined $arg2;
166
fatal_error("rule","fn_need_2_coord",$f,$expr_orig,$arg1,$arg2) if $num_coord != 2;
167
my $result = $datum->{data}[0]{chr} =~ /^$arg1$/i && $datum->{data}[1]{chr} =~ /^$arg2$/i;
168
replace_string( \$expr, $template, $result || 0);
169
} elsif ($f eq "tofrom") {
170
my ($arg1,$arg2) = split(",",$arg);
171
fatal_error("rule","fn_wrong_arg",$f,$expr_orig,2) if ! defined $arg1 || ! defined $arg2;
172
fatal_error("rule","fn_need_2_coord",$f,$expr_orig,$arg1,$arg2) if $num_coord != 2;
173
my $result = $datum->{data}[0]{chr} =~ /^$arg2$/i && $datum->{data}[1]{chr} =~ /^$arg1$/i;
174
replace_string( \$expr, $template, $result || 0);
175
} elsif ($f eq "to") {
176
my ($arg1) = split(",",$arg);
177
fatal_error("rule","fn_wrong_arg",$f,$expr_orig,1) if ! defined $arg1;
178
fatal_error("rule","fn_need_2_coord",$f,$expr_orig,"-",$arg1) if $num_coord != 2;
179
my $result = $datum->{data}[1]{chr} =~ /^$arg1$/i;
180
replace_string( \$expr, $template, $result || 0);
181
} elsif ($f eq "from") {
182
my ($arg1) = split(",",$arg);
183
fatal_error("rule","fn_wrong_arg",$f,$expr_orig,1) if ! defined $arg1;
184
fatal_error("rule","fn_need_2_coord",$f,$expr_orig,$arg1,"-") if $num_coord != 2;
185
my $result = $datum->{data}[0]{chr} =~ /^$arg1$/i;
186
replace_string( \$expr, $template, $result || 0);
194
my ($datum,$expr,$var,$varnum,$param_path) = @_;
196
my $num_coord = @{$datum->{data}};
198
# If this data collection has only one data value (e.g. scatter plot)
199
# then assume that any expression without an explicit number is refering
200
# to the data point (e.g. _SIZE_ acts like _SIZE1_)
201
if($num_coord == 1) {
202
if(! defined $varnum) {
203
# var(START) treated like var(START1)
205
} elsif ($varnum != 1) {
206
# var(STARTN) must have N=1
207
fatal_error("rule","bad_coord",$var,$varnum,$num_coord);
209
} elsif ($num_coord == 2) {
210
if(! defined $varnum) {
211
# var(START) treated like var(START1) but only if var(START1) == var(START2)
212
my $v1 = fetch_variable($datum,$expr,$var,1,$param_path);
213
my $v2 = fetch_variable($datum,$expr,$var,2,$param_path);
217
fatal_error("rule","conflicting_coord",
222
} elsif ($varnum != 1 && $varnum != 2) {
223
# var(STARTN) must have N=1 or N=2
224
fatal_error("rule","bad_coord",$var,$varnum,$num_coord);
227
fatal_error("rule","wrong_coord_num",$num_coord);
230
my $varidx = $varnum - 1;
232
my $data = $datum->{data};
237
if( exists $datum->{param}{$var} ) {
238
$value = $datum->{param}{$var};
239
} elsif ( exists $data->[$varidx]{$var} ) {
240
$value = $data->[$varidx]{$var};
241
} elsif ( $param_path && defined seek_parameter( $var, @$param_path ) ) {
242
$value = seek_parameter( $var, @$param_path );
243
} elsif ( $var eq "size" ) {
244
$value = $data->[$varidx]{end} - $data->[$varidx]{start} + 1;
245
} elsif ( $var eq "pos" ) {
246
$value = round ($data->[$varidx]{start}+$data->[$varidx]{end})/2;
247
} elsif ( $var eq "intrachr" ) {
248
fatal_error("rule","need_2_coord","intrachr",$num_coord) if $num_coord != 2;
249
$value = $data->[0]{chr} eq $data->[1]{chr} ? 1 : 0;
250
} elsif ( $var eq "interchr" ) {
251
fatal_error("rule","need_2_coord","intrachr",$num_coord) if $num_coord != 2;
252
$value = $data->[0]{chr} ne $data->[1]{chr} ? 1 : 0;
254
if(fetch_conf("skip_missing_expression_vars")) {
257
fatal_error("rules","no_such_field",$expr,$var,Dumper($datum));
260
$value = Circos::unit_strip($value);
261
printdebug_group("rule","found variable",$var."[$varnum]","value",$value);
265
# -------------------------------------------------------------------
267
my ( $target, $source, $value ) = @_;
268
if ( $value =~ /[^0-9-.]/ && $value ne "undef" ) {
269
$$target =~ s/\Q$source\E/'$value'/g;
271
$$target =~ s/\Q$source\E/$value/g;
276
################################################################
277
# Given an expression (e.g. var(abc) == 1) and a prefix (e.g. var)
278
# extract arguments that follow the prefix which are encapsulated
279
# in balanced delimiters (delim_start, delim_end)
281
# Returns the raw arguments and a version stripped of delimiters
283
# var (abc ( def ) )def(a)
290
# If no balanced argument is found, returns undef
292
sub extract_balanced {
293
my ($expr,$prefix,$delim_start,$delim_end) = @_;
294
if($expr =~ /($prefix\s*)(\Q$delim_start\E.*)/) {
296
my @result = extract_bracketed($arg,$delim_start);
297
if(defined $result[0]) {
298
my $balanced = $result[0];
299
$balanced =~ s/^\s*\Q$delim_start\E\s*//;
300
$balanced =~ s/\s*\Q$delim_end\E\s*$//;
301
return ($result[0],$balanced);