1
package Circos::Ideogram;
7
Circos::Ideogram - ideogram routines for Circos
11
This module is not meant to be used directly.
15
Circos is an application for the generation of publication-quality,
16
circularly composited renditions of genomic data and related
19
Circos is particularly suited for visualizing alignments, conservation
20
and intra and inter-chromosomal relationships. However, Circos can be
21
used to plot any kind of 2D data in a circular layout - its use is not
22
limited to genomics. Circos' use of lines to relate position pairs
23
(ribbons add a thickness parameter to each end) is effective to
24
display relationships between objects or positions on one or more
27
All documentation is in the form of tutorials at L<http://www.circos.ca>.
31
# -------------------------------------------------------------------
41
use Carp qw( carp confess croak );
44
use File::Spec::Functions;
46
use Math::VecStat qw(max);
48
use Params::Validate qw(:all);
49
#use Regexp::Common qw(number);
51
use POSIX qw(floor ceil);
53
use lib "$FindBin::RealBin";
54
use lib "$FindBin::RealBin/../lib";
55
use lib "$FindBin::RealBin/lib";
57
use Circos::Configuration;
58
use Circos::Constants;
63
################################################################
65
# Determine which chromosomes are going to be displayed. Several parameters
66
# are used together to determine the list and order of displayed chromosomes.
69
# - chromosomes_breaks
70
# - chromosomes_display_default
71
# - chromosomes_order_by_karyotype
73
# If chromosomes_display_default is set to 'yes', then any chromosomes that
74
# appear in the karyotype are appended to the 'chromosomes' parameter.
75
# The order in which they are appended depends on the value of 'chromosomes_order_by_karyotype'.
77
# If you want to display only those chromosomes that are mentioned in the
78
# 'chromosomes' parameter, then set chromosomes_display_default=no.
80
# Both 'chromosomes' and 'chromosomes_breaks' define a list of chromosome regions
81
# to show, delimited by ;
83
# name{[tag]}{:runlist}
89
# hs1[a]:0-100,150-200
90
# hs1;hs2[a];hs3:0-100
92
# You can also use "=" as the field delimiter,
97
# The functional role of 'chromosomes' and 'chromosomes_breaks' is the same. The latter
98
# gives an opportunity to separate definitions of regions which are not shown.
101
sub parse_chromosomes {
102
my $karyotype = shift;
106
# get the chromosomes in the karyotype
107
my @chrs_in_k = keys %$karyotype;
108
# sort them by their appearance in the file
109
@chrs_in_k = sort { $karyotype->{$a}{chr}{display_order} <=> $karyotype->{$b}{chr}{display_order} } @chrs_in_k;
110
printdebug_group("chrfilter","karyotypeorder",@chrs_in_k);
111
# sort them by digit in chromosome name (e.g. chr1 before chr11 before chrx) (done in Karyotype::sort_karyotype)
112
my @chrs_in_k_native_sort = sort { $karyotype->{$a}{chr}{sort_idx} <=> $karyotype->{$b}{chr}{sort_idx} } @chrs_in_k;
113
printdebug_group("chrfilter","nativesort",@chrs_in_k_native_sort);
115
if ( $CONF{chromosomes_display_default} ) {
117
# The default order for chromosomes is string-then-number if
118
# chromosomes contain a number, and if not then asciibetic
120
# I used to have this based on the order in the KARYOTYPE (use
121
# {CHR}{chr}{display_order} field) but decided to change it.
123
if ( $CONF{chromosomes_order_by_karyotype} ) {
124
# @chrs_in_k already ordered by appearance
125
printdebug_group("chrfilter","using karyotypeorder");
127
# sort the chromosomes with digits in them
128
@chrs_in_k = @chrs_in_k_native_sort;
129
printdebug_group("chrfilter","using nativesort");
132
################################################################
133
# Reconstruct the $CONF{chromosomes} argument using
134
# chromosomes from karyotype and those in $CONF{chromosomes}
135
my ($chrs_in_c,@chrs_ordered);
137
# First, parse chromosomes and regular expressions in the chromosomes string
138
if ( $CONF{chromosomes} ) {
139
$chrs_in_c = parse_chromosomes_string($CONF{chromosomes});
140
#printdumper($chrs_in_c);exit;
143
for my $chr (@chrs_in_k) {
149
# for each chromosome in $CONF{chromosomes}
151
for my $chr_in_c (@$chrs_in_c) {
152
next if $isrx && ! $chr_in_c->{rx};
153
next if ! $isrx && $chr_in_c->{rx};
154
my $reject = $chr_in_c->{reject};
156
if ($isrx && $chr =~ $chr_in_c->{rx}) {
159
if (! $isrx && $chr eq $chr_in_c->{chr}) {
162
printdebug_group("chrfilter",$chr,"inchromosomes",$chr_in_c->{chr},"isrx",defined $chr_in_c->{rx},"match",$match,"reject",$reject);
164
push @chr_in_c_found, $chr_in_c;
173
printdebug_group("chrfilter",$chr,"rx",$isrx,"accept",$accept);
176
if (@chr_in_c_found) {
177
#printdumper(@chr_in_c_found);
178
for my $c (@chr_in_c_found) {
179
next if $c->{reject};
186
$str .= sprintf("[%s]",$c->{tag});
189
$str .= sprintf(":%s",$c->{runlist});
191
push @chrs_ordered, $str;
193
push @chrs_ordered, $chr;
197
push @chrs_ordered, $chr;
200
push @chrs_ordered, "-$chr";
203
push @chrs_ordered, $chr;
206
$CONF{chromosomes} = join( $SEMICOLON, @chrs_ordered );
208
printdebug_group("chrfilter","effective 'chromosomes' parameter",$CONF{chromosomes});
210
my %karyotype_chrs_seen;
213
for my $pair ([$CONF{chromosomes},1],[$CONF{chromosomes_breaks},0]) {
214
my ($string,$accept_default) = @$pair;
215
my $chrstring_list = Circos::Configuration::make_parameter_list_array($string,qr/\s*;\s*/);
216
for my $chrstring (@$chrstring_list) {
217
my $chr_record = parse_chromosomes_record($chrstring);
218
my ($reject,$chr,$runlist,$tag,$chrrx) = @{$chr_record}{qw(reject chr runlist tag rx)};
219
$tag = $EMPTY_STR if !defined $tag;
220
$chr = $EMPTY_STR if !defined $chr;
221
$runlist = $EMPTY_STR if !defined $runlist;
222
if ($chr eq $EMPTY_STR) {
223
fatal_error("ideogram","unparsable_def",$chrstring);
225
next if $isrx && ! defined $chrrx;
226
next if ! $isrx && defined $chrrx;
227
# $accept identifies whether the regions indicate inclusions or exclusions
228
# $accept=1 this region is to be included
229
# $accept=0 this region is to be included (region prefixed by -)
230
my $accept = $accept_default;
231
$accept = 0 if $reject;
232
if ( $isrx && $tag) {
233
fatal_error("ideogram","regex_tag",$chrstring,$tag);
235
my $chrkey = make_key($chr,$tag);
236
#printinfo($isrx, $chrrx, $isrx ? "RX" : "NOTRX", !$isrx && defined $chrrx ? "next" : "accept");
237
if ( ! $isrx && ! defined $karyotype->{$chr}{chr} ) {
238
fatal_error("ideogram","use_undefined",$chrstring,$chr);
243
for my $c (@chrs_in_k_native_sort) {
244
next if $accept && $karyotype_chrs_seen{ make_key($c,$tag) };
245
if ($c =~ /$chrrx/i) {
246
push @chrs_to_store, $c;
247
$karyotype_chrs_seen{ make_key($c,$tag) }++;
248
$karyotype_chrs_seen{ make_key($c,"") }++;
252
for my $c (@chrs_in_k_native_sort) {
253
next if $accept && $karyotype_chrs_seen{ make_key($c,$tag) };
255
push @chrs_to_store, $c;
256
$karyotype_chrs_seen{ make_key($c,$tag) }++;
257
$karyotype_chrs_seen{ make_key($c,"") }++;
261
#printdumper(\%karyotype_chrs_seen);
262
printdebug_group("chrfilter","chrrx",$chrstring,"rx?",$isrx,"accept",$accept,"tag",$tag || "-","chrs",@chrs_to_store);
263
next unless @chrs_to_store;
268
return sprintf("%s_%s",$chr,$tag);
271
# all numbers in runlist are automatically multiplied by
272
# chromosomes_units value - this saves you from having to type
273
# a lot of zeroes in the runlists
275
if ( $CONF{chromosomes_units} ) {
276
$runlist =~ s/([\.\d]+)/$1*$CONF{chromosomes_units}/eg;
279
for my $c (@chrs_to_store) {
280
# are we trying to remove this chromosome?
281
printdebug_group("chrfilter","parsed chromosome range", $c, $runlist || $DASH );
282
my $set = $runlist ? Set::IntSpan->new($runlist) : $karyotype->{$c}{chr}{set};
284
################################################################
285
# uncertain - what was I trying to do here?
286
$set->remove($set->max) if $runlist;
288
$set->remove( $set->min ) if $set->min;
289
$set->remove( $set->max );
291
################################################################
302
$karyotype->{$c}{chr}{display_region}{accept} ||= Set::IntSpan->new();
303
$karyotype->{$c}{chr}{display_region}{accept} = $karyotype->{$c}{chr}{display_region}{accept}->union($set);
305
if ($accept_default) {
306
@chrs = grep($_->{chr} ne $c && $_->{tag} ne $tag, @chrs);
308
$karyotype->{$c}{chr}{display_region}{reject} ||= Set::IntSpan->new();
309
$karyotype->{$c}{chr}{display_region}{reject} = $karyotype->{$c}{chr}{display_region}{reject}->union($set);
316
if ( ! grep( $_->{accept}, @chrs ) ) {
317
fatal_error("ideogram","no_ideograms_to_draw");
321
printdebug_group("chrfilter","chrlist",sprintf("%2d %4s %4s %d %10s %10s",
324
$c->{tag}||$EMPTY_STR,
326
defined $c->{set}->min ? $c->{set}->min : "(-",
327
defined $c->{set}->max ? $c->{set}->max : "-)"));
332
sub parse_chromosomes_string {
336
for my $record ( @{Circos::Configuration::make_parameter_list_array($str,qr/\s*$delim\s*/)} ) {
337
push @$data, parse_chromosomes_record($record);
350
sub parse_chromosomes_record {
352
my $default_delim = "[:=]";
353
my ($chr,$runlist) = split(Circos::Configuration::fetch_configuration("list_field_delim") || $default_delim ,$str);
354
my ($tag,$reject,$rx);
355
( $reject, $chr, $tag ) = $chr =~ /([!-])?(.+?)(?:\[([^\[\]]+)\])?$/;
356
$reject = $reject ? 1 : 0;
357
$rx = parse_as_rx($chr) || undef;
358
my $isrx = $rx ? 1 : 0;
359
printdebug_group("chrfilter","parsed chr record",$str,"chr",$chr,"tag",$tag,"reject",$reject,"runlist",$runlist,"rx",$rx,"rx?",$isrx);
363
reject=> $reject ? 1 : 0,
364
accept=> $reject ? 0 : 1,
369
# -------------------------------------------------------------------
370
sub report_chromosomes {
371
my $karyotype = shift;
374
$karyotype->{$a}{chr}{display_order} <=> $karyotype->{$b}{chr}{display_order}
377
next unless $karyotype->{$chr}{chr}{display};
381
$karyotype->{$chr}{chr}{display_order},
382
$karyotype->{$chr}{chr}{scale},
383
$karyotype->{$chr}{chr}{display_region}
384
? $karyotype->{$chr}{chr}{display_region}->run_list
386
$karyotype->{$chr}{chr}{length_cumul}