7
tv_grab_es_miguiatv - Alternative TV grabber for Spain.
11
tv_grab_es_miguiatv --help
13
tv_grab_es_miguiatv [--config-file FILE] --configure [--gui OPTION]
15
tv_grab_es_miguiatv [--config-file FILE] [--output FILE] [--days N]
16
[--offset N] [--quiet]
18
tv_grab_es_miguiatv --list-channels
20
tv_grab_es_miguiatv --capabilities
22
tv_grab_es_miguiatv --version
26
Output TV listings for spanish channels from www.miguiatv.com.
27
Supports analogue and digital (D+) channels.
29
First run B<tv_grab_es_miguiatv --configure> to choose, which channels you want
30
to download. Then running B<tv_grab_es_miguiatv> with no arguments will output
31
listings in XML format to standard output.
33
B<--configure> Prompt for which channels,
34
and write the configuration file.
36
B<--config-file FILE> Set the name of the configuration file, the
37
default is B<~/.xmltv/tv_grab_es_miguiatv.conf>. This is the file written by
38
B<--configure> and read when grabbing.
40
B<--gui OPTION> Use this option to enable a graphical interface to be used.
41
OPTION may be 'Tk', or left blank for the best available choice.
42
Additional allowed values of OPTION are 'Term' for normal terminal output
43
(default) and 'TermNoProgressBar' to disable the use of XMLTV::ProgressBar.
45
B<--output FILE> Write to FILE rather than standard output.
47
B<--days N> Grab N days. The default is 3.
49
B<--offset N> Start N days in the future. The default is to start
52
B<--quiet> Suppress the progress messages normally written to standard
55
B<--capabilities> Show which capabilities the grabber supports. For more
56
information, see L<http://membled.com/twiki/bin/view/Main/XmltvCapabilities>
58
B<--version> Show the version of the grabber.
60
B<--help> Print a help message and exit.
68
Alberto Gonz�lez (alberto@pesadilla.org) based on tv_grab_es_laguiatv from CandU and tv_grab_es from Ramon Roca.
77
######################################################################
81
use XMLTV::Version '$Id: tv_grab_es_miguiatv,v 1.1 2008/01/13 21:22:51 atirc Exp $ ';
82
use XMLTV::Capabilities qw/baseline manualconfig cache/;
83
use XMLTV::Description 'Spain (miguiatv.com)';
86
use HTML::TreeBuilder;
87
use HTML::Entities; # parse entities
94
use XMLTV::ProgressBar;
96
use XMLTV::Config_file;
101
# Todo: perhaps we should internationalize messages and docs?
102
use XMLTV::Usage <<END
103
$0: get Spanish television listings in XMLTV format
104
To configure: $0 --configure [--config-file FILE]
105
To grab listings: $0 [--config-file FILE] [--output FILE] [--days N]
106
[--offset N] [--quiet]
107
To list channels: $0 --list-channels
108
To show capabilities: $0 --capabilities
109
To show version: $0 --version
113
# Attributes of the root element in output.
114
my $HEAD = { 'source-info-url' => 'http://www.miguiatv.com/todos-los-canales.html',
115
'source-data-url' => "http://www.miguiatv.com/todos-los-canales.html",
116
'generator-info-name' => 'XMLTV',
117
'generator-info-url' => 'http://membled.com/work/apps/xmltv/',
120
# Whether zero-length programmes should be included in the output.
121
my $WRITE_ZERO_LENGTH = 0;
122
my $DO_SLOWER_DESC_GET = 0;
127
# Global channel_data
130
# debug print function
139
######################################################################
142
# Get options, including undocumented --cache option.
143
XMLTV::Memoize::check_argv('XMLTV::Get_nice::get_nice_aux');
144
my ($opt_days, $opt_offset, $opt_help, $opt_output,
145
$opt_configure, $opt_config_file, $opt_gui,
146
$opt_quiet, $opt_list_channels);
147
$opt_days = 3; # default
148
$opt_offset = 0; # default
149
$opt_quiet = 0; # default
150
GetOptions('days=i' => \$opt_days,
151
'offset=i' => \$opt_offset,
152
'help' => \$opt_help,
153
'configure' => \$opt_configure,
154
'config-file=s' => \$opt_config_file,
155
'gui:s' => \$opt_gui,
156
'output=s' => \$opt_output,
157
'quiet' => \$opt_quiet,
158
'list-channels' => \$opt_list_channels
161
die 'number of days must not be negative'
162
if (defined $opt_days && $opt_days < 0);
163
usage(1) if $opt_help;
165
XMLTV::Ask::init($opt_gui);
167
my $mode = XMLTV::Mode::mode('grab', # default
168
$opt_configure => 'configure',
169
$opt_list_channels => 'list-channels',
172
# File that stores which channels to download.
174
= XMLTV::Config_file::filename($opt_config_file, 'tv_grab_es_miguiatv', $opt_quiet);
176
my @config_lines; # used only in grab mode
177
if ($mode eq 'configure') {
178
XMLTV::Config_file::check_no_overwrite($config_file);
180
elsif ($mode eq 'grab') {
181
@config_lines = XMLTV::Config_file::read_lines($config_file);
183
elsif ($mode eq 'list-channels') {
184
# Config file not used.
188
# Whatever we are doing, we need the channels data.
189
my %channels; # sets @ch_all
192
######################################################################
193
# write configuration
195
if ($mode eq 'configure') {
196
%channels = get_channels();
198
open(CONF, ">$config_file") or die "cannot write to $config_file: $!";
200
# Ask about getting descs
201
my $getdescs = ask_boolean("Do you want to get descriptions (very slow)");
202
warn("cannot read input, using default")
203
if not defined $getdescs;
205
print CONF "getdescriptions ";
206
print CONF "yes\n" if $getdescs;
207
print CONF "no\n" if not $getdescs;
209
# Ask about each channel.
210
my @chs = sort keys %channels;
211
my @names = map { $channels{$_} } @chs;
212
my @qs = map { "Add channel $_?" } @names;
213
my @want = ask_many_boolean(1, @qs);
216
warn("cannot read input, stopping channel questions"), last
218
# No need to print to user - XMLTV::Ask is verbose enough.
220
# Print a config line, but comment it out if channel not wanted.
221
print CONF '#' if not $w;
222
my $name = shift @names;
223
print CONF "channel $_ $name\n";
224
# TODO don't store display-name in config file.
227
close CONF or warn "cannot close $config_file: $!";
228
say("Finished configuration.");
234
# Not configuration, we must be writing something, either full
235
# listings or just channels.
237
die if $mode ne 'grab' and $mode ne 'list-channels';
239
# Options to be used for XMLTV::Writer.
241
if (defined $opt_output) {
242
my $fh = new IO::File(">$opt_output");
243
die "cannot write to $opt_output: $!" if not defined $fh;
244
$w_args{OUTPUT} = $fh;
246
$w_args{encoding} = 'ISO-8859-15';
247
my $writer = new XMLTV::Writer(%w_args);
248
$writer->start($HEAD);
250
if ($mode eq 'list-channels') {
251
$writer->write_channel($_) foreach @ch_all;
256
######################################################################
257
# We are producing full listings.
258
die if $mode ne 'grab';
262
foreach (@config_lines) {
265
if (/getdescriptions:?\s+(\S+)/)
269
$DO_SLOWER_DESC_GET = 1;
272
elsif (/^channel:?\s+(\S+)\s+([^\#]+)/)
276
$ch_name =~ s/\s*$//;
277
push @channels, $ch_did;
278
$channels{$ch_did} = $ch_name;
281
warn "$config_file:$line_num: bad line\n";
285
######################################################################
288
# Assume the listings source uses CET (see BUGS above).
289
my $now = DateCalc(parse_date('now'), "$opt_offset days");
290
die "No channels specified, run me with --configure\n"
291
if not keys %channels;
295
# the order in which we fetch the channels matters
296
foreach my $ch_did (@channels) {
297
my $ch_name=$channels{$ch_did};
298
my $ch_xid="$ch_did.miguiatv.com";
299
$writer->write_channel({ id => $ch_xid,
300
'display-name' => [ [ $ch_name ] ] });
301
my $day=UnixDate($now,'%Q');
302
for (my $i=0;$i<$opt_days;$i++) {
303
push @to_get, [ $day, $ch_xid, $ch_did ];
305
$day=nextday($day); die if not defined $day;
309
# This progress bar is for both downloading and parsing. Maybe
310
# they could be separate.
313
my $bar = new XMLTV::ProgressBar({name => 'getting listings', count => scalar @to_get})
316
foreach (process_table($_->[0], $_->[1], $_->[2])) {
317
$writer->write_programme($_);
319
update $bar if not $opt_quiet;
321
$bar->finish() if not $opt_quiet;
324
######################################################################
325
# subroutine definitions
327
# Use Log::TraceMessages if installed.
329
eval { require Log::TraceMessages };
335
*t = \&Log::TraceMessages::t;
336
*d = \&Log::TraceMessages::d;
337
Log::TraceMessages::check_argv();
342
# process_table: fetch a URL and process it
345
# Date::Manip object giving the day to grab
346
# xmltv id of channel
347
# elpais.es id of channel
349
# returns: list of the programme hashes to write
353
my ($date, $ch_xmltv_id, $ch_es_id) = @_;
354
my $today = UnixDate($date, '%Y%m%d');
356
my $url = $urls{$ch_es_id};
357
$url =~ s/programacion/$today/;
358
debug_print "Getting $url\n";
360
local $SIG{__WARN__} = sub
365
# parse the page to a document object
366
my $tree = get_nice_tree($url);
367
my @program_data = get_program_data($tree);
368
my $bump_start_day=0;
371
while (@program_data) {
372
my $cur = shift @program_data;
373
my $next = shift @program_data;
374
unshift @program_data,$next if $next;
376
my $p = make_programme_hash($date, $ch_xmltv_id, $ch_es_id, $cur, $next);
378
require Data::Dumper;
379
my $d = Data::Dumper::Dumper($cur);
380
warn "cannot write programme on $ch_xmltv_id on $date:\n$d\n";
386
if (!$bump_start_day && bump_start_day($cur,$next)) {
388
$date = UnixDate(DateCalc($date,"+ 1 day"),'%Q');
395
sub make_programme_hash {
396
my ($date, $ch_xmltv_id, $ch_es_id, $cur, $next) = @_;
400
$prog{channel}=$ch_xmltv_id;
401
$prog{title}=[ [ $cur->{title}, $LANG ] ];
402
$prog{"sub-title"}=[ [ $cur->{subtitle}, $LANG ] ] if defined $cur->{subtitle};
403
$prog{category}=[ [ $cur->{category}, $LANG ] ];
405
t "turning local time $cur->{time}, on date $date, into UTC";
406
eval { $prog{start}=utc_offset("$date $cur->{time}", '+0100') };
408
warn "bad time string: $cur->{time}";
411
t "...got $prog{start}";
412
# FIXME: parse description field further
414
$prog{desc}=[ [ $cur->{desc}, $LANG ] ] if defined $cur->{desc};
419
my ($cur,$next) = @_;
420
if (!defined($next)) {
423
my $start = UnixDate($cur->{time},'%H:%M');
424
my $stop = UnixDate($next->{time},'%H:%M');
425
if (Date_Cmp($start,$stop)>0) {
433
# get time, title, description
438
#my @inputs = $tree->find("class","show_even","class","show_odd");
439
my @inputs = $tree->find("tr");
440
for my $elem (@inputs) {
441
if($elem->attr('class') && ($elem->attr('class') eq "show_odd" || $elem->attr('class') eq "show_even")) {
442
my $time = $elem->attr('_content')->[0]->attr('_content')->[0];
443
my $td = pop @{$elem->attr('_content')};
444
for my $table (@{$td->attr('_content')}) {
445
if($table->attr('_content')->[0]->attr('_content')->[0]->attr('_content')->[0]->attr('_content')->[1]) {
446
my $title = $table->attr('_content')->[0]->attr('_content')->[0]->attr('_content')->[0]->attr('_content')->[1]->attr('_content')->[0];
447
my $category = $table->attr('_content')->[0]->attr('_content')->[1]->attr('_content')->[0]->attr('_content')->[0];
448
if($table->attr('_content')->[1]->attr('_content')->[0]) {
449
my $description = $table->attr('_content')->[1]->attr('_content')->[0]->attr('_content')->[0];
452
category => $category,
466
my $xml = XMLin($tree);
467
if(ref($xml->{channel}->{item}) eq "ARRAY") {
468
my $elementos = $#{$xml->{channel}->{item}};
469
for (my $i=0;$i<$elementos;$i++) {
470
my ($title,$time) = split(/\s*-\s*/,$xml->{channel}->{item}->[$i]->{title},2);
471
my $description = $xml->{channel}->{item}->[$i]->{description};
472
($time) = $time =~ /(\d+:\d+)/;
473
my $year = (((localtime(time))[5])+1900);
474
#$time = $year . $mes . sprintf("%02d",$dia) . $hora . $minuto . "00 +0100";
475
$description =~ s/[^\n]*\n//;
476
if(length($description) > 5) {
491
# get channel listing
494
my $bar = new XMLTV::ProgressBar({name => 'finding channels', count => 1})
497
my $url="http://www.miguiatv.com/todos-los-canales.html";
501
my $tree = get_nice_tree $url;
502
my @inputs = $tree->find("div");
503
foreach my $elem (@inputs) {
504
if($elem->attr('class') && $elem->attr('class') eq "footer_channels") {
506
for my $div ( @{$elem->attr('_content')}) {
507
for my $li ( @{$div->attr('_content')}) {
508
pop @{$li->attr('_content')};
509
for my $ul ( @{$li->attr('_content')}) {
510
if(ref($ul) eq "HTML::Element") {
511
if($ul->attr('href')) {
512
$channel_name = pop @{$ul->attr('_content')};
513
$channel_name =~ s/^\s+//;
514
$channel_name =~ s/\s+$//;
515
$channel_id = convert_name_to_id($channel_name);
516
$channels{$channel_id}=$channel_name;
525
die "no channels could be found" if not keys %channels;
526
update $bar if not $opt_quiet;
527
$bar->finish() if not $opt_quiet;
531
# get xml list for channels
534
my $bar = new XMLTV::ProgressBar({name => 'getting urls', count => 1})
537
my $url="http://www.miguiatv.com/todos-los-canales.html";
541
my $tree = get_nice_tree $url;
542
my @inputs = $tree->find("div");
543
foreach my $elem (@inputs) {
544
if($elem->attr('class') && $elem->attr('class') eq "footer_channels") {
546
for my $div ( @{$elem->attr('_content')}) {
547
for my $li ( @{$div->attr('_content')}) {
548
pop @{$li->attr('_content')};
549
for my $ul ( @{$li->attr('_content')}) {
550
if(ref($ul) eq "HTML::Element") {
551
if($ul->attr('href')) {
552
$channel_name = pop @{$ul->attr('_content')};
553
$channel_name =~ s/^\s+//;
554
$channel_name =~ s/\s+$//;
555
$channel_id = convert_name_to_id($channel_name);
556
$urls{$channel_id}=$ul->attr('href');
565
die "no channels could be found" if not keys %urls;
567
update $bar if not $opt_quiet;
568
$bar->finish() if not $opt_quiet;
570
sub convert_name_to_id
575
$str =~ s/([^A-Za-z0-9])/sprintf("-%02X", ord($1))/seg;
581
# Bump a DDMMYYYY date by one.
584
my $p = parse_date($d);
585
my $n = DateCalc($p, '+ 1 day');
586
return UnixDate($n, '%Q');