~ubuntu-branches/ubuntu/raring/xmltv/raring

« back to all changes in this revision

Viewing changes to grab/re/tv_grab_re

  • Committer: Bazaar Package Importer
  • Author(s): Chris Butler
  • Date: 2009-03-17 11:59:59 UTC
  • mfrom: (1.2.12 upstream) (4.1.3 squeeze)
  • Revision ID: james.westby@ubuntu.com-20090317115959-9wut9kj1mxg8biiu
Tags: 0.5.55-1
* New upstream release
  - debian/xmltv-util.install: removed broken grabbers tv_grab_br_net,
    tv_grab_es, tv_grab_za, and tv_grab_jp
* Removed tv_grab_it patch

Show diffs side-by-side

added added

removed removed

Lines of Context:
64
64
 
65
65
use warnings;
66
66
use strict;
67
 
use XMLTV::Version '$Id: tv_grab_re,v 1.17 2008/07/08 14:50:22 ecastelnau Exp $ ';
 
67
use XMLTV::Version '$Id: tv_grab_re,v 1.19 2008/12/14 21:29:41 ecastelnau Exp $ ';
68
68
use XMLTV::Capabilities qw/baseline manualconfig/;
69
69
use XMLTV::Description 'Reunion Island';
70
70
use Getopt::Long;
88
88
### Main declarations
89
89
###
90
90
my %BROADCASTERS = (
91
 
#       'CANALSAT' => "Canal Satellite Reunion",
 
91
        'CANALSAT' => "Canal Satellite Reunion",
92
92
        'PARABOLE' => "Parabole Reunion"
93
93
);
94
94
my $CANALSAT_BASE_URL = "http://fw-web.canalreunion.net/";
195
195
 
196
196
sub init_user_agent() {
197
197
        # Change HTTP Headers to make canalsat-reunion.com happy
198
 
        $XMLTV::Get_nice::ua->default_headers->push_header('Accept'=>'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8');
199
 
        $XMLTV::Get_nice::ua->default_headers->push_header('Accept-Language'=>'fr-fr,fr;q=0.8,en-us;q=0.5,en;q=0.3');
200
 
        $XMLTV::Get_nice::ua->default_headers->push_header('Accept-Encoding'=>'gzip,deflate');
201
 
        $XMLTV::Get_nice::ua->default_headers->push_header('Accept-Charset'=>'ISO-8859-1,utf-8;q=0.7,*;q=0.7');
202
 
        $XMLTV::Get_nice::ua->default_headers->push_header('Keep-Alive'=>'300');
203
 
        $XMLTV::Get_nice::ua->default_headers->header('Connection'=>'keep-alive');
 
198
        #$XMLTV::Get_nice::ua->default_headers->push_header('Accept'=>'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8');
 
199
        #$XMLTV::Get_nice::ua->default_headers->push_header('Accept-Language'=>'fr-fr,fr;q=0.8,en-us;q=0.5,en;q=0.3');
 
200
        #$XMLTV::Get_nice::ua->default_headers->push_header('Accept-Encoding'=>'gzip,deflate');
 
201
        #$XMLTV::Get_nice::ua->default_headers->push_header('Accept-Charset'=>'ISO-8859-1,utf-8;q=0.7,*;q=0.7');
 
202
        #$XMLTV::Get_nice::ua->default_headers->push_header('Keep-Alive'=>'300');
 
203
        #$XMLTV::Get_nice::ua->default_headers->header('Connection'=>'keep-alive');
204
204
        #$XMLTV::Get_nice::ua->agent("Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9b5) Gecko/2008041514 Firefox/3.0b5");
205
205
 
206
206
        # init cookies
207
 
        my $cookies = HTTP::Cookies->new(
208
 
                file => "$ENV{HOME}/.xmltv/tv_grab_re.cookies",
209
 
                autosave => 1,
210
 
                ignore_discard => 1,
211
 
                hide_cookie2 => 1,
212
 
        );
213
 
        $cookies->clear();
214
 
        $XMLTV::Get_nice::ua->cookie_jar($cookies);
 
207
        #my $cookies = HTTP::Cookies->new(
 
208
        #       file => "$ENV{HOME}/.xmltv/tv_grab_re.cookies",
 
209
        #       autosave => 1,
 
210
        #       ignore_discard => 1,
 
211
        #       hide_cookie2 => 1,
 
212
        #);
 
213
        #$cookies->clear();
 
214
        #$XMLTV::Get_nice::ua->cookie_jar($cookies);
215
215
#       $cookies->load();
216
216
 
217
217
        #$XMLTV::Get_nice::ua->default_headers->push_header('Referer'=>);
218
 
        my $html = $XMLTV::Get_nice::ua->get("http://www.canalsat-reunion.com");
219
 
$cookies->extract_cookies($html);
 
218
        #my $html = $XMLTV::Get_nice::ua->get("http://www.canalsat-reunion.com");
 
219
#$cookies->extract_cookies($html);
220
220
#print '1: '; $cookies->as_string();
221
221
 
222
 
        $XMLTV::Get_nice::ua->default_headers->header('Referer'=>'http://www.canalsat-reunion.com');
223
 
        $html = $XMLTV::Get_nice::ua->get("http://fw-web.canalreunion.net/347.0.html");
 
222
        #$XMLTV::Get_nice::ua->default_headers->header('Referer'=>'http://www.canalsat-reunion.com');
 
223
        #$html = $XMLTV::Get_nice::ua->get("http://fw-web.canalreunion.net/347.0.html");
224
224
#$cookies->extract_cookies($html);
225
225
#print '1a: '; $cookies->as_string();
226
226
 
227
 
        $XMLTV::Get_nice::ua->default_headers->header('Referer'=>'http://fw-web.canalreunion.net/347.0.html');
228
 
$XMLTV::Get_nice::ua->cookie_jar($cookies);
229
 
        $html = $XMLTV::Get_nice::ua->get("http://fw-web.canalreunion.net/408.0.html");
 
227
        #$XMLTV::Get_nice::ua->default_headers->header('Referer'=>'http://fw-web.canalreunion.net/347.0.html');
 
228
#$XMLTV::Get_nice::ua->cookie_jar($cookies);
 
229
        #$html = $XMLTV::Get_nice::ua->get("http://fw-web.canalreunion.net/408.0.html");
230
230
#$cookies->extract_cookies($html);
231
231
#print '2: '; $cookies->as_string();
232
232
 
233
 
        $XMLTV::Get_nice::ua->default_headers->header('Referer' => 'http://fw-web.canalreunion.net/408.0.html');
 
233
        #$XMLTV::Get_nice::ua->default_headers->header('Referer' => 'http://fw-web.canalreunion.net/408.0.html');
234
234
        # post normalement      
235
 
$XMLTV::Get_nice::ua->cookie_jar($cookies);
236
 
        $html = $XMLTV::Get_nice::ua->post("http://fw-web.canalreunion.net/552.0.html?&no_cache=1&programme[fuseauChoix]=2&programme[validation_choifuseaux]=Envoyer");
 
235
#$XMLTV::Get_nice::ua->cookie_jar($cookies);
 
236
#       my $html = $XMLTV::Get_nice::ua->post("http://fw-web.canalreunion.net/552.0.html",
 
237
#                                                                                                       no_cache=>1,
 
238
#                                                                                                       'programme[fuseauChoix]'=>2,
 
239
#                                                                                                       'programme[jour]'=>"12/12/2008",
 
240
#                                                                                                       'programme[horaires]'=>"6.",
 
241
#                                                                                                       'programme[chaine]'=>42,
 
242
#                                                                                                       'programme[genre]'=>"",
 
243
#                                                                                                       'Submit'=>"OK",
 
244
#                                                                                                       'programme[pdf]'=>"",
 
245
#                                                                                                       'programme[confirmation_choixfuseau]'=>"Envoyer");
237
246
#$cookies->extract_cookies($html);
238
247
#print '3: '; $cookies->as_string();
239
248
 
240
 
        $XMLTV::Get_nice::ua->default_headers->header('Referer' => 'http://fw-web.canalreunion.net/552.0.html&no_cache=1');     
241
 
$XMLTV::Get_nice::ua->cookie_jar($cookies);
242
 
        $html = $XMLTV::Get_nice::ua->get("http://fw-web.canalreunion.net/552.0.html?&no_cache=1&programme[fuseauChoix]=2&programme[validation_choifuseaux]=Envoyer");
 
249
        #$XMLTV::Get_nice::ua->default_headers->header('Referer' => 'http://fw-web.canalreunion.net/552.0.html&no_cache=1');    
 
250
#$XMLTV::Get_nice::ua->cookie_jar($cookies);
 
251
        #$html = $XMLTV::Get_nice::ua->get("http://fw-web.canalreunion.net/552.0.html?&no_cache=1&programme[fuseauChoix]=2&programme[validation_choifuseaux]=Envoyer");
243
252
#$cookies->extract_cookies($html);
244
253
#print '4: '; $cookies->as_string();
245
 
 
246
254
#print $html->content;
247
255
 
248
 
$cookies->save();
 
256
#$cookies->save();
 
257
}
 
258
 
 
259
sub post_nice_tree ($;%) {
 
260
        my $url = shift;
 
261
        my $form = shift;
 
262
 
 
263
 
 
264
        require HTML::TreeBuilder;
 
265
   my $html =  $XMLTV::Get_nice::ua->post($url, $form);
 
266
   
 
267
   my $t = new HTML::TreeBuilder;
 
268
   $t->parse($html->content) or die "cannot parse content of $url\n";
 
269
   $t->eof;
 
270
   return $t;
249
271
}
250
272
 
251
273
sub get_channels_list($) {
340
362
                }
341
363
                
342
364
                # acteurs
343
 
                if ($tt =~ / de (.*)  Avec (.*) /) {
 
365
                if ($tt =~ / de (.*)  avec (.*) /) {
344
366
                        push @directors, $1;
345
367
 
346
368
                        my @a = split(',', $2);
352
374
                }       
353
375
 
354
376
                # présentateur
355
 
                if ($tt =~ / de Pr.sent. par (.*)/) {
 
377
                if ($tt =~ / pr.sent. par (.*)/) {
356
378
                        my $str = $1;
357
379
 
358
380
                        if ($str =~ /,/) {
390
412
                # the start tag of programs for this day
391
413
                my $start = DateCalc($today, "+ $n days");
392
414
                my $stop;
393
 
                my $url_day = UnixDate($start, "%d%%2F%m%%2F%Y");
 
415
                my $url_day = UnixDate($start, "%d/%m/%Y");
394
416
 
395
417
                # build the url
396
 
                my $url = "http://fw-web.canalreunion.net/552.0.html?&no_cache=1&";
397
 
                $url .= "programme[jour]=".$url_day."&";
398
 
                $url .= "programme[horaires]=6.&"; # whole day
399
 
                $url .= "programme[chaine]=".$idchaine."&";
400
 
                $url .= "programme[genre]=&";
401
 
                $url .= "Submit=OK&";
402
 
                $url .= "programme[pdf]=";
403
 
                
 
418
                my $url = "http://fw-web.canalreunion.net/552.0.html";
 
419
                my %form;
 
420
                                
 
421
                $form{'programme[confirmation_choixfuseau]'} = "Envoyer";
 
422
                $form{'programme[pdf]'} = "";
 
423
                $form{'Submit'} = "OK";
 
424
                $form{'programme[genre]'} = "";
 
425
                $form{'programme[chaine]'} = $idchaine;
 
426
                $form{'programme[horaires]'} = 6.; # whole day
 
427
                $form{'programme[jour]'} = $url_day;
 
428
                $form{'programme[fuseauChoix]'} = 2;
 
429
                $form{'no_cache'} = 1;
 
430
 
 
431
 
404
432
                # get request and parse
405
 
                my $html = get_nice_tree $url;
 
433
                my $html = post_nice_tree($url, \%form);
406
434
                $html->objectify_text();
407
435
                #$html->dump();
408
436
 
445
473
                                $prog{'length'} = $duration * 60;
446
474
 
447
475
                                $start = DateCalc($start, "+$duration min");
 
476
                                $stop = $start;
448
477
                        }
449
478
 
450
479
                        # here is the title
452
481
                        $tt = $tag->attr_get_i('onmouseover');
453
482
                        if ($tt =~ /<span class=blanc bold majuscule>(.*)<\/span>/ ) {
454
483
                                # "Fin des programmes" is not a real tv show
455
 
                                #next if ($1 eq "Fin des programmes");
 
484
                                next if ($1 eq "Fin des programmes");
456
485
 
457
486
                                my $title = $1;
458
487
                                $title =~ s/\\'/'/g;