328
# Returns a list of the image enclosures in the RSS or Atom feed.
329
# Elements of the list are references, [ "url", "guid" ].
334
$ua->agent ("$progname/$version");
335
$ua->timeout (10); # bail sooner than the default of 3 minutes
337
my $body = (LWP::Simple::get($url) || '');
339
error ("not an RSS or Atom feed: $url")
340
unless ($body =~ m@^<\?xml\s@si);
342
$body =~ s@(<ENTRY|<ITEM)@\001$1@gsi;
343
my @items = split(/\001/, $body);
349
foreach my $item (@items) {
353
# First look for <link rel="enclosure" href="...">
356
$item =~ s!(<LINK[^<>]*>)!{
358
my ($rel) = ($link =~ m/\bREL\s*=\s*[\"\']?([^<>\'\"]+)/si);
359
my ($type) = ($link =~ m/\bTYPE\s*=\s*[\"\']?([^<>\'\"]+)/si);
360
my ($href) = ($link =~ m/\bHREF\s*=\s*[\"\']([^<>\'\"]+)/si);
362
if ($rel && lc($rel) eq 'enclosure') {
364
$href = undef unless ($type =~ m@^image/@si); # omit videos
366
$iurl = $href if ($href);
372
# Then look for <media:content url="...">
375
$item =~ s!(<MEDIA:CONTENT[^<>]*>)!{
377
my ($href) = ($link =~ m/\bURL\s*=\s*[\"\']([^<>\'\"]+)/si);
378
$iurl = $href if $href;
383
# Then look for <description>... with an <img href="..."> inside.
386
$item =~ s!(<description[^<>]*>.*?</description>)!{
388
$desc =~ s/</</gs;
389
$desc =~ s/>/>/gs;
390
$desc =~ s/"/\"/gs;
391
$desc =~ s/'/\'/gs;
392
$desc =~ s/&/&/gs;
393
my ($href) = ($desc =~ m@<IMG[^<>]*\bSRC=[\"\']?([^\"\'<>]+)@si);
394
$iurl = $href if ($href);
399
# Could also do <content:encoded>, but the above probably covers all
400
# of the real-world possibilities.
403
# Find a unique ID for this image, to defeat image farms.
404
# First look for <id>...</id>
405
($id) = ($item =~ m!<ID\b[^<>]*>\s*([^<>]+?)\s*</ID>!si) unless $id;
407
# Then look for <guid isPermaLink=...> ... </guid>
408
($id) = ($item =~ m!<GUID\b[^<>]*>\s*([^<>]+?)\s*</GUID>!si) unless $id;
410
# Then look for <link> ... </link>
411
($id) = ($item =~ m!<LINK\b[^<>]*>\s*([^<>]+?)\s*</LINK>!si) unless $id;
415
$id = $iurl unless $id;
419
my @P = ($iurl, $id);
421
} elsif ($iurl ne $o) {
422
print STDERR "$progname: WARNING: dup ID \"$id\"" .
423
" for \"$o\" and \"$iurl\"\n";
432
# Like md5_base64 but uses filename-safe characters.
443
# Given the URL of an image, download it into the given directory
444
# and return the file name.
446
sub download_image($$$) {
447
my ($url, $uid, $dir) = @_;
449
my ($ext) = ($url =~ m@\.([a-z\d]+)$@si);
450
my $file = md5_file ($uid);
451
$file .= '.' . lc($ext) if $ext;
453
# Don't bother doing If-Modified-Since to see if the URL has changed.
454
# If we have already downloaded it, assume it's good.
455
if (-f "$dir/$file") {
456
print STDERR "$progname: exists: $dir/$file for $uid / $url\n"
461
# Special-case kludge for Flickr:
462
# Their RSS feeds sometimes include only the small versions of the images.
463
# So if the URL ends in "s" (75x75), "t" (100x100) or "m" (240x240),then
464
# munge it to be "b" (1024x1024).
466
$url =~ s@_[stm](\.[a-z]+)$@_b$1@si
467
if ($url =~ m@^https?://[^/?#&]*?flickr\.com/@si);
469
print STDERR "$progname: downloading: $dir/$file for $uid / $url\n"
471
$ua->agent ("$progname/$version");
472
my $status = LWP::Simple::mirror ($url, "$dir/$file");
473
if (!LWP::Simple::is_success ($status)) {
474
print STDERR "$progname: error $status: $url\n"; # keep going
484
if ($url !~ m/^https?:/si) { # not a URL: local directory.
485
return (undef, $url);
488
my $dir = "$ENV{HOME}/Library/Caches"; # MacOS location
490
$dir = "$dir/org.jwz.xscreensaver.feeds";
491
} elsif (-d "$ENV{HOME}/tmp") {
492
$dir = "$ENV{HOME}/tmp/.xscreensaver-feeds";
494
$dir = "$ENV{HOME}/.xscreensaver-feeds";
498
mkdir ($dir) || error ("mkdir $dir: $!");
499
print STDERR "$progname: mkdir $dir/\n" if ($verbose);
502
# MD5 for directory name to use for cache of a feed URL.
503
$dir .= '/' . md5_file ($url);
506
mkdir ($dir) || error ("mkdir $dir: $!");
507
print STDERR "$progname: mkdir $dir/ for $url\n" if ($verbose);
510
# At this point, we have the directory corresponding to this URL.
511
# Now check to see if the files in it are up to date, and download
514
my $stamp = '.timestamp';
515
my $lock = "$dir/$stamp";
517
print STDERR "$progname: awaiting lock: $lock\n"
520
my $mtime = ((stat($lock))[9]) || 0;
523
open ($lock_fd, '+>>', $lock) || error ("unable to write $lock: $!");
524
flock ($lock_fd, LOCK_EX) || error ("unable to lock $lock: $!");
525
seek ($lock_fd, 0, 0) || error ("unable to rewind $lock: $!");
527
my $poll_p = ($mtime + $feed_max_age < time);
529
$poll_p = 1 unless ($cache_p); # poll again now with --no-cache cmd line arg.
531
# Even if the cache is young, let's make sure there are at least
532
# a few files in it, and re-check if not.
536
opendir (my $dirh, $dir) || error ("$dir: $!");
537
foreach my $f (readdir ($dirh)) {
538
next if ($f =~ m/^\./s);
545
print STDERR "$progname: no files in cache of $url\n" if ($verbose);
552
print STDERR "$progname: loading $url\n" if ($verbose);
555
opendir (my $dirh, $dir) || error ("$dir: $!");
556
foreach my $f (readdir ($dirh)) {
557
next if ($f eq '.' || $f eq '..');
558
$files{$f} = 0; # 0 means "file exists, should be deleted"
564
# Download each image currently in the feed.
567
my @urls = parse_feed ($url);
568
foreach my $p (@urls) {
569
my ($furl, $id) = @$p;
570
my $f = download_image ($furl, $id, $dir);
572
$files{$f} = 1; # Got it, don't delete
576
print STDERR "$progname: empty feed: $url\n" if ($count <= 0);
578
# Now delete any files that are no longer in the feed.
579
# But if there was nothing in the feed (network failure?)
580
# then don't blow away the old files.
583
foreach my $f (keys(%files)) {
586
} elsif ($files{$f}) {
589
if (unlink ("$dir/$f")) {
590
print STDERR "$progname: rm $dir/$f\n" if ($verbose > 1);
592
print STDERR "$progname: rm $dir/$f: $!\n"; # don't bail
597
# Both feed and cache are empty. No files at all.
598
error ("empty feed: $url") if ($kept <= 1);
600
$mtime = time(); # update the timestamp
604
# Not yet time to re-check the URL.
605
print STDERR "$progname: using cache: $url\n" if ($verbose);
609
# Unlock and update the write date on the .timestamp file.
611
truncate ($lock_fd, 0) || error ("unable to truncate $lock: $!");
612
seek ($lock_fd, 0, 0) || error ("unable to rewind $lock: $!");
613
utime ($mtime, $mtime, $lock_fd) || error ("unable to touch $lock: $!");
614
flock ($lock_fd, LOCK_UN) || error ("unable to unlock $lock: $!");
617
print STDERR "$progname: unlocked $lock\n" if ($verbose > 1);
619
# Don't bother using the imageDirectory cache. We know that this directory
620
# is flat, and we can assume that an RSS feed doesn't contain 100,000 images
621
# like ~/Pictures/ might.
625
# Return the URL and directory name of the files of that URL's local cache.
316
631
sub find_random_file($) {