~ubuntu-branches/ubuntu/trusty/freeguide/trusty

Viewing changes to xmltv/share/perl/5.8.8/XMLTV.pm

Committer: Bazaar Package Importer
Author(s): Daniel Watkins
Date: 2008-09-07 15:49:32 UTC
mfrom: (1.2.6 upstream) (4.1.2 squeeze)
Revision ID: james.westby@ubuntu.com-20080907154932-2jvgv76btq068fe0

Tags: 0.10.9-1

http://bugs.debian.org/492789

http://bugs.debian.org/492544

* New upstream release. (Closes: #492789)
* Moved package from contrib to main. (Closes: #492544)
* Added lintian override for 'build-depends-without-arch-dep ant', as ant is
used in the clean target.

files added:
.classpath

.project

debian/patches

debian/patches/00list

debian/patches/01-fix-desktop-file.dpatch

debian/source.lintian-overrides

doc/sourceforge-redirects

doc/sourceforge-redirects/COPYING

doc/sourceforge-redirects/VERSION.php

doc/sourceforge-redirects/contact.html

doc/sourceforge-redirects/contributors.html

doc/sourceforge-redirects/design.html

doc/sourceforge-redirects/dev

doc/sourceforge-redirects/dev/index.php

doc/sourceforge-redirects/developers.html

doc/sourceforge-redirects/downinst.html

doc/sourceforge-redirects/downinst.linux.html

doc/sourceforge-redirects/downinst.macos.html

doc/sourceforge-redirects/help.html

doc/sourceforge-redirects/index.html

doc/sourceforge-redirects/license.html

doc/sourceforge-redirects/redirect.js

doc/sourceforge-redirects/screenshots.html

doc/sourceforge-redirects/toc.html

doc/sourceforge-redirects/upload-web.sh

doc/sourceforge-redirects/userguide.choosing_channels.html

doc/sourceforge-redirects/userguide.features.html

doc/sourceforge-redirects/userguide.html

doc/sourceforge-redirects/userguide.look_and_feel.html

doc/sourceforge-redirects/userguide.mainscreen.html

src/%home%

src/%home%/.freeguide-eclipse

src/freeguide/common/lib/fgspecific/data/TVProgrammeOverlapIsEqualComparator.java

src/freeguide/common/lib/fgspecific/data/TVProgrammeStartTimeComparator.java

src/freeguide/common/lib/fgspecific/data/test

src/freeguide/common/lib/fgspecific/data/test/TestTVChannel.java

src/freeguide/common/lib/fgspecific/data/test/TestTVProgrammeStartTimeComparator.java

src/freeguide/common/plugininterfaces/FGLogger.java

src/freeguide/common/plugininterfaces/IExecutionController.java

src/freeguide/plugins/grabber/xmltv/SingleConfigCommandRunner.java

src/freeguide/plugins/program/freeguide/lib/fgspecific/ConfigCommandRunner.java

src/freeguide/plugins/program/freeguide/lib/fgspecific/ExecutionController.java

src/freeguide/plugins/program/freeguide/lib/fgspecific/GrabberCommandRunner.java

src/freeguide/plugins/program/freeguide/lib/general/ICommandRunner.java

src/freeguide/plugins/program/freeguide/migration/Migrate0_10_6To0_10_7.java

src/freeguide/plugins/program/freeguide/migration/Migrate0_10_7To0_10_8.java

src/freeguide/plugins/program/freeguide/migration/Migrate0_10_8To0_10_9.java

src/freeguide/test

src/freeguide/test/slow

src/freeguide/test/slow/FreeGuideSlowTest.java

src/freeguide/test/slow/ImportSlowTest.java

src/resources-utf8/i18n/MessagesBundle_ru.properties

src/resources-utf8/i18n/Startup_ru.properties

src/resources-utf8/i18n/grabber_cosmostv_ru.properties

src/resources-utf8/i18n/grabber_kulichki_ru.properties

src/resources-utf8/i18n/grabber_newsvm_ru.properties

src/resources-utf8/i18n/grabber_ntvplus_ru.properties

src/resources-utf8/i18n/grabber_rbc_ru.properties

src/resources-utf8/i18n/grabber_vsetv_ru.properties

src/resources-utf8/i18n/reminder_alarm_ru.properties

src/resources-utf8/i18n/ui_horizontal_manylabels_ru.properties

src/resources-utf8/i18n/ui_vertical_simple_ru.properties

src/resources/i18n/MessagesBundle_ru.properties

src/resources/i18n/Startup_ru.properties

src/resources/i18n/grabber_cosmostv_ru.properties

src/resources/i18n/grabber_kulichki_ru.properties

src/resources/i18n/grabber_newsvm_ru.properties

src/resources/i18n/grabber_ntvplus_ru.properties

src/resources/i18n/grabber_rbc_ru.properties

src/resources/i18n/grabber_vsetv_ru.properties

src/resources/i18n/reminder_alarm_ru.properties

src/resources/i18n/ui_horizontal_manylabels_ru.properties

src/resources/i18n/ui_vertical_simple_ru.properties

src/run-freeguide-dev.sh

xmltv/doc/man/tv_grab_es_miguiatv.txt

xmltv/share/xmltv/tv_grab_uk_rt/channels_platforms

xmltv/share/xmltv/tv_grab_uk_rt/prog_titles_to_process

xmltv/share/xmltv/tv_grab_uk_rt/regional_channels_by_postcode

files removed:
install/webstart/myKeystore

src/freeguide-run-clean.sh

src/freeguide-run.sh

src/freeguide/common/gui/ExecutorDialog.java.rej

src/freeguide/plugins/grabber/xmltv/GrabberXMLTV.java.rej

src/freeguide/plugins/other

src/freeguide/plugins/other/xmltvexe

src/freeguide/plugins/program/freeguide/lib/fgspecific/GrabberController.java

src/freeguide/plugins/program/freeguide/lib/fgspecific/GrabberController.java.rej

src/freeguide/plugins/reminder/advanced

xmltv/doc/man/tv_grab_nc.txt

xmltv/doc/man/tv_grab_nl.txt

xmltv/doc/man/tv_grab_nl_wolf.txt

xmltv/doc/man/tv_grab_no.txt

xmltv/share/xmltv/tv_grab_eu_epgdata

xmltv/share/xmltv/tv_grab_eu_epgdata/channel_ids

files modified:
ant/dist-install-tgz.xml

ant/dist-src.xml

ant/plugins-info.xml

build.xml

debian/changelog

debian/compat

debian/control

debian/copyright

debian/rules

doc/VERSION.php

doc/compile_manual.sh

doc/contributors.xml

doc/docbook-xsl-1.69.1/common/ChangeLog

doc/docbook-xsl-1.69.1/common/autoidx-ng.xsl

doc/docbook-xsl-1.69.1/common/common.xsl

doc/docbook-xsl-1.69.1/common/gentext.xsl

doc/docbook-xsl-1.69.1/common/l10n.xsl

doc/docbook-xsl-1.69.1/common/labels.xsl

doc/docbook-xsl-1.69.1/common/pi.xsl

doc/docbook-xsl-1.69.1/common/refentry.xsl

doc/docbook-xsl-1.69.1/common/subtitles.xsl

doc/docbook-xsl-1.69.1/common/table.xsl

doc/docbook-xsl-1.69.1/common/targets.xsl

doc/docbook-xsl-1.69.1/common/titles.xsl

doc/docbook-xsl-1.69.1/lib/lib.xml

doc/docbook-xsl-1.69.1/lib/lib.xsl

doc/docbook-xsl-1.69.1/lib/lib.xweb

doc/docbook-xsl-1.69.1/xhtml/admon.xsl

doc/docbook-xsl-1.69.1/xhtml/autoidx-ng.xsl

doc/docbook-xsl-1.69.1/xhtml/autoidx.xsl

doc/docbook-xsl-1.69.1/xhtml/autotoc.xsl

doc/docbook-xsl-1.69.1/xhtml/biblio.xsl

doc/docbook-xsl-1.69.1/xhtml/block.xsl

doc/docbook-xsl-1.69.1/xhtml/callout.xsl

doc/docbook-xsl-1.69.1/xhtml/changebars.xsl

doc/docbook-xsl-1.69.1/xhtml/chunk-code.xsl

doc/docbook-xsl-1.69.1/xhtml/chunk-common.xsl

doc/docbook-xsl-1.69.1/xhtml/chunk.xsl

doc/docbook-xsl-1.69.1/xhtml/chunker.xsl

doc/docbook-xsl-1.69.1/xhtml/chunkfast.xsl

doc/docbook-xsl-1.69.1/xhtml/chunktoc.xsl

doc/docbook-xsl-1.69.1/xhtml/component.xsl

doc/docbook-xsl-1.69.1/xhtml/division.xsl

doc/docbook-xsl-1.69.1/xhtml/docbook.xsl

doc/docbook-xsl-1.69.1/xhtml/ebnf.xsl

doc/docbook-xsl-1.69.1/xhtml/footnote.xsl

doc/docbook-xsl-1.69.1/xhtml/formal.xsl

doc/docbook-xsl-1.69.1/xhtml/glossary.xsl

doc/docbook-xsl-1.69.1/xhtml/graphics.xsl

doc/docbook-xsl-1.69.1/xhtml/html-rtf.xsl

doc/docbook-xsl-1.69.1/xhtml/html.xsl

doc/docbook-xsl-1.69.1/xhtml/htmltbl.xsl

doc/docbook-xsl-1.69.1/xhtml/index.xsl

doc/docbook-xsl-1.69.1/xhtml/info.xsl

doc/docbook-xsl-1.69.1/xhtml/inline.xsl

doc/docbook-xsl-1.69.1/xhtml/keywords.xsl

doc/docbook-xsl-1.69.1/xhtml/lists.xsl

doc/docbook-xsl-1.69.1/xhtml/maketoc.xsl

doc/docbook-xsl-1.69.1/xhtml/manifest.xsl

doc/docbook-xsl-1.69.1/xhtml/math.xsl

doc/docbook-xsl-1.69.1/xhtml/oldchunker.xsl

doc/docbook-xsl-1.69.1/xhtml/onechunk.xsl

doc/docbook-xsl-1.69.1/xhtml/param.xsl

doc/docbook-xsl-1.69.1/xhtml/pi.xsl

doc/docbook-xsl-1.69.1/xhtml/profile-chunk-code.xsl

doc/docbook-xsl-1.69.1/xhtml/profile-chunk.xsl

doc/docbook-xsl-1.69.1/xhtml/profile-docbook.xsl

doc/docbook-xsl-1.69.1/xhtml/profile-onechunk.xsl

doc/docbook-xsl-1.69.1/xhtml/qandaset.xsl

doc/docbook-xsl-1.69.1/xhtml/refentry.xsl

doc/docbook-xsl-1.69.1/xhtml/sections.xsl

doc/docbook-xsl-1.69.1/xhtml/synop.xsl

doc/docbook-xsl-1.69.1/xhtml/table.xsl

doc/docbook-xsl-1.69.1/xhtml/task.xsl

doc/docbook-xsl-1.69.1/xhtml/titlepage.xsl

doc/docbook-xsl-1.69.1/xhtml/toc.xsl

doc/docbook-xsl-1.69.1/xhtml/verbatim.xsl

doc/docbook-xsl-1.69.1/xhtml/xref.xsl

doc/html-local/contributors.html

doc/html-local/downinst.html

doc/html-local/help.html

doc/html-local/index.html

install/linux/freeguide.desktop

repository/repositoryInfo.xml

src/freeguide/build/docxslt/DocXSLT.java

src/freeguide/build/patchallfiles/PatchAllFiles.java

src/freeguide/build/patchallfiles/PatchApplication_java.java

src/freeguide/build/patchallfiles/PatchBuild.java

src/freeguide/build/patchallfiles/PatchFile.java

src/freeguide/build/patchallfiles/PatchPlugins.java

src/freeguide/build/patchallfiles/PatchRepository.java

src/freeguide/common/gui/FavouriteEditorDialog.java

src/freeguide/common/gui/FavouritesController.java

src/freeguide/common/gui/JWaitFrame.java

src/freeguide/common/lib/fgspecific/Application.java

src/freeguide/common/lib/fgspecific/StorageHelper.java

src/freeguide/common/lib/fgspecific/data/TVChannel.java

src/freeguide/common/lib/fgspecific/data/TVChannelsSet.java

src/freeguide/common/lib/fgspecific/data/TVProgramme.java

src/freeguide/common/lib/fgspecific/selection/Favourite.java

src/freeguide/common/lib/fgspecific/selection/SelectionManager.java

src/freeguide/common/lib/general/GridBagEasy.java

src/freeguide/common/lib/general/Utils.java

src/freeguide/common/lib/grabber/HtmlHelper.java

src/freeguide/common/lib/grabber/HttpBrowser.java

src/freeguide/common/lib/importexport/XMLTVImport.java

src/freeguide/common/lib/importexport/XMLTVImportHandler.java

src/freeguide/common/plugininterfaces/IApplication.java

src/freeguide/common/plugininterfaces/IModuleConfigureFromWizard.java

src/freeguide/common/plugininterfaces/IModuleGrabber.java

src/freeguide/common/plugininterfaces/IModuleStorage.java

src/freeguide/plugins/grabber/cosmostv/GrabberCosmostv.java

src/freeguide/plugins/grabber/hallmark/GrabberHallmark.java

src/freeguide/plugins/grabber/hallmark/HallmarkConfigurationUIController.java

src/freeguide/plugins/grabber/hallmark/HallmarkConfigurationUIPanel.java

src/freeguide/plugins/grabber/hallmark/HallmarkInfo.java

src/freeguide/plugins/grabber/hallmark/HallmarkParserDescription.java

src/freeguide/plugins/grabber/kulichki/GrabberKulichki.java

src/freeguide/plugins/grabber/kulichki/HandlerChannels.java

src/freeguide/plugins/grabber/kulichki/HandlerProg.java

src/freeguide/plugins/grabber/newsvm/GrabberNewsvm.java

src/freeguide/plugins/grabber/ntvplus/GrabberNtvplus.java

src/freeguide/plugins/grabber/rbc/GrabberRbc.java

src/freeguide/plugins/grabber/vsetv/GrabberVsetv.java

src/freeguide/plugins/grabber/vsetv/HandlerChannelsList.java

src/freeguide/plugins/grabber/vsetv/HandlerDates.java

src/freeguide/plugins/grabber/vsetv/HandlerParseProg.java

src/freeguide/plugins/grabber/vsetv/HandlerSettings.java

src/freeguide/plugins/grabber/xmltv/GrabberXMLTV.java

src/freeguide/plugins/grabber/xmltv/XMLTVConfig.java

src/freeguide/plugins/grabber/xmltv/XMLTVConfigureUIController.java

src/freeguide/plugins/importexport/jtv/JTV.java

src/freeguide/plugins/importexport/palmatv/ExportPalmAtv.java

src/freeguide/plugins/program/freeguide/FreeGuide.java

src/freeguide/plugins/program/freeguide/Progressor.java

src/freeguide/plugins/program/freeguide/dialogs/ChannelSetEditorDialog.java

src/freeguide/plugins/program/freeguide/dialogs/ChannelSetListDialog.java

src/freeguide/plugins/program/freeguide/lib/fgspecific/PluginsManager.java

src/freeguide/plugins/program/freeguide/lib/general/LookAndFeelManager.java

src/freeguide/plugins/program/freeguide/migration/Migrate.java

src/freeguide/plugins/program/freeguide/migration/Migrate0_10_1To0_10_2.java

src/freeguide/plugins/program/freeguide/migration/Migrate0_10_2To0_10_3.java

src/freeguide/plugins/program/freeguide/migration/Migrate0_10_3To0_10_4.java

src/freeguide/plugins/program/freeguide/migration/Migrate0_10_4To0_10_5.java

src/freeguide/plugins/program/freeguide/migration/Migrate0_10_5To0_10_6.java

src/freeguide/plugins/program/freeguide/migration/MigrateOldTo0_10_1.java

src/freeguide/plugins/program/freeguide/migration/MigrationProcessBase.java

src/freeguide/plugins/program/freeguide/options/BrowserOptionPanel.java

src/freeguide/plugins/program/freeguide/options/LayoutOptionPanel.java

src/freeguide/plugins/program/freeguide/options/OptionPanel.java

src/freeguide/plugins/program/freeguide/options/PrivacyOptionPanel.java

src/freeguide/plugins/program/freeguide/plugin.xml

src/freeguide/plugins/program/freeguide/updater/ModifiedBooleanRenderer.java

src/freeguide/plugins/program/freeguide/updater/ModifiedDefaultTableCellRenderer.java

src/freeguide/plugins/program/freeguide/updater/TablePluginsModel.java

src/freeguide/plugins/program/freeguide/viewer/MainController.java

src/freeguide/plugins/program/freeguide/viewer/MainFrame.java

src/freeguide/plugins/program/freeguide/wizard/FirstTimeWizard.java

src/freeguide/plugins/program/freeguide/wizard/WizardFrame.java

src/freeguide/plugins/program/freeguide/wizard/WizardPanel.java

src/freeguide/plugins/reminder/alarm/AlarmReminder.java

src/freeguide/plugins/reminder/alarm/AlarmUIController.java

src/freeguide/plugins/reminder/alarm/AlarmUIPanel.java

src/freeguide/plugins/ui/horizontal/manylabels/HorizontalViewer.java

src/freeguide/plugins/ui/horizontal/manylabels/HorizontalViewerHandlers.java

src/freeguide/plugins/ui/horizontal/manylabels/templates/HandlerPersonalGuide.java

src/freeguide/plugins/ui/horizontal/manylabels/templates/HandlerProgrammeInfo.java

src/freeguide/startup/Startup.java

src/resources-utf8/i18n/MessagesBundle.properties

src/resources-utf8/i18n/MessagesBundle_en_RC.properties

src/resources-utf8/plugins/grabber/xmltv/commands.properties

src/resources/i18n/MessagesBundle.properties

src/resources/i18n/MessagesBundle_en_RC.properties

src/resources/plugins/grabber/xmltv/commands.properties

xmltv/ChangeLog.txt

xmltv/README

xmltv/README.txt

xmltv/doc/man/tv_find_grabbers.txt

xmltv/doc/man/tv_grab_br_net.txt

xmltv/doc/man/tv_grab_ch_search.txt

xmltv/doc/man/tv_grab_es_laguiatv.txt

xmltv/doc/man/tv_grab_eu_epgdata.txt

xmltv/doc/man/tv_grab_fi.txt

xmltv/doc/man/tv_grab_fr.txt

xmltv/doc/man/tv_grab_it.txt

xmltv/doc/man/tv_grab_na_dd.txt

xmltv/doc/man/tv_grab_no_gfeed.txt

xmltv/doc/man/tv_grab_uk_rt.txt

xmltv/doc/man/tv_validate_file.txt

xmltv/ls-xmltv

xmltv/share/perl/5.8.8/XMLTV.pm

xmltv/share/perl/5.8.8/XMLTV/Ask/Term.pm

xmltv/share/perl/5.8.8/XMLTV/IMDB.pm

xmltv/share/perl/5.8.8/XMLTV/Options.pm

xmltv/share/perl/5.8.8/XMLTV/Supplement.pm

xmltv/share/perl/5.8.8/XMLTV/ValidateFile.pm

xmltv/share/xmltv/tv_grab_be/channel_ids_fr

xmltv/share/xmltv/tv_grab_be/channel_ids_nl

xmltv/share/xmltv/tv_grab_ch_search/channel_ids

xmltv/share/xmltv/tv_grab_it/channel_ids

xmltv/share/xmltv/tv_grab_uk_rt/channel_ids

xmltv/share/xmltv/xmltv.dtd

xmltv/xmltv.dtd

xmltv/xmltv.exe

Show diffs side-by-side

added added

removed removed

# -*- perl -*-

# $Id: XMLTV.pm.in,v 1.132 2007/11/05 08:14:27 rmeden Exp $

package XMLTV;

use strict;

use base 'Exporter';

our @EXPORT = ();

our @EXPORT_OK = qw(read_data parse parsefile write_data

best_name list_channel_keys list_programme_keys);

# For the time being the version of this library is tied to that of

# the xmltv package as a whole. This number should be checked by the

# mkdist tool.

our $VERSION = '0.5.50';

# Work around changing behaviour of XML::Twig. On some systems (like

# mine) it always returns UTF-8 data unless KeepEncoding is specified.

# However the encoding() method tells you the encoding of the original

# document, not of the data you receive. To be sure of what you're

# getting, it is easiest on such a system to not give KeepEncoding and

# just use UTF-8.

# But on other systems (seemingly perl 5.8 and above), XML::Twig tries

# to keep the original document's encoding in the strings returned.

# You then have to call encoding() to find out what you're getting.

# To make sure of this behaviour we set KeepEncoding to true on such a

# system.

# Setting KeepEncoding true everywhere seems to do no harm, it's a

# pity that we lose conversion to UTF-8 but at least it's the same

# everywhere. So the library is distributed with this flag on.

my $KEEP_ENCODING = 1;

my %warned_unknown_key;

sub warn_unknown_keys( $$ );

=pod

=head1 NAME

XMLTV - Perl extension to read and write TV listings in XMLTV format

=head1 SYNOPSIS

use XMLTV;

my $data = XMLTV::parsefile('tv.xml');

my ($encoding, $credits, $ch, $progs) = @$data;

my $langs = [ 'en', 'fr' ];

print 'source of listings is: ', $credits->{'source-info-name'}, "\n"

if defined $credits->{'source-info-name'};

foreach (values %$ch) {

my ($text, $lang) = @{XMLTV::best_name($langs, $_->{'display-name'})};

print "channel $_->{id} has name $text\n";

print "...in language $lang\n" if defined $lang;

}

foreach (@$progs) {

print "programme on channel $_->{channel} at time $_->{start}\n";

next if not defined $_->{desc};

foreach (@{$_->{desc}}) {

my ($text, $lang) = @$_;

print "has description $text\n";

print "...in language $lang\n" if defined $lang;

}

The value of $data will be something a bit like:

[ 'UTF-8',

{ 'source-info-name' => 'Ananova', 'generator-info-name' => 'XMLTV' },

{ 'radio-4.bbc.co.uk' => { 'display-name' => [ [ 'en', 'BBC Radio 4' ],

[ 'en', 'Radio 4' ],

[ undef, '4' ] ],

'id' => 'radio-4.bbc.co.uk' },

... },

[ { start => '200111121800', title => [ [ 'Simpsons', 'en' ] ],

channel => 'radio-4.bbc.co.uk' },

... ] ]

=head1 DESCRIPTION

This module provides an interface to read and write files in XMLTV

format (a TV listings format defined by xmltv.dtd). In general element

names in the XML correspond to hash keys in the Perl data structure.

You can think of this module as a bit like B<XML::Simple>, but

specialized to the XMLTV file format.

The Perl data structure corresponding to an XMLTV file has four

elements. The first gives the character encoding used for text data,

typically UTF-8 or ISO-8859-1. (The encoding value could also be

undef meaning 'unknown', when the library canE<39>t work out what it

is.) The second element gives the attributes of the root <tv>

element, which give information about the source of the TV listings.

The third element is a list of channels, each list element being a

hash corresponding to one <channel> element. The fourth element is

similarly a list of programmes. More details about the data structure

are given later. The easiest way to find out what it looks like is to

load some small XMLTV files and use B<Data::Dumper> to print out the

100

resulting structure.

101

102

=head1 USAGE

103

104

=over

105

106

=cut

107

108

use XML::Twig;

109

use XML::Writer 0.600;

110

use Date::Manip;

111

use Carp;

112

use Data::Dumper;

113

114

# Use Lingua::Preferred if available, else kludge a replacement.

115

sub my_which_lang { return $_[1]->[0] }

116

BEGIN {

117

eval { require Lingua::Preferred };

118

*which_lang = $@ ? \&my_which_lang : \&Lingua::Preferred::which_lang;

119

}

120

121

# Use Log::TraceMessages if installed.

122

BEGIN {

123

eval { require Log::TraceMessages };

124

if ($@) {

125

*t = sub {};

126

*d = sub { '' };

127

}

128

else {

129

*t = \&Log::TraceMessages::t;

130

*d = \&Log::TraceMessages::d;

131

}

132

}

133

134

# Attributes and subelements of channel. Each subelement additionally

135

# needs a handler defined. Multiplicity is given for both, but for

136

# attributes the only allowable values are '1' and '?'.

137

138

# Ordering of attributes is not really important, but we keep the same

139

# order as they are given in the DTD so that output looks nice.

140

141

# The ordering of the subelements list gives the order in which these

142

# elements must appear in the DTD. In fact, these lists just

143

# duplicate information in the DTD and add details of what handlers

144

# to call.

145

146

our @Channel_Attributes = ([ 'id', '1' ]);

147

our @Channel_Handlers =

148

(

149

[ 'display-name', 'with-lang', '+' ],

150

[ 'icon', 'icon', '*' ],

151

[ 'url', 'scalar', '*' ],

152

);

153

154

# Same for <programme> elements.

155

our @Programme_Attributes =

156

(

157

[ 'start', '1' ],

158

[ 'stop', '?' ],

159

[ 'pdc-start', '?' ],

160

[ 'vps-start', '?' ],

161

[ 'showview', '?' ],

162

[ 'videoplus', '?' ],

163

[ 'channel', '1' ],

164

[ 'clumpidx', '?' ],

165

);

166

our @Programme_Handlers =

167

(

168

[ 'title', 'with-lang', '+' ],

169

[ 'sub-title', 'with-lang', '*' ],

170

[ 'desc', 'with-lang/m', '*' ],

171

[ 'credits', 'credits', '?' ],

172

[ 'date', 'scalar', '?' ],

173

[ 'category', 'with-lang', '*' ],

174

[ 'language', 'with-lang', '?' ],

175

[ 'orig-language', 'with-lang', '?' ],

176

[ 'length', 'length', '?' ],

177

[ 'icon', 'icon', '*' ],

178

[ 'url', 'scalar', '*' ],

179

[ 'country', 'with-lang', '*' ],

180

[ 'episode-num', 'episode-num', '*' ],

181

[ 'video', 'video', '?' ],

182

[ 'audio', 'audio', '?' ],

183

[ 'previously-shown', 'previously-shown', '?' ],

184

[ 'premiere', 'with-lang/em', '?' ],

185

[ 'last-chance', 'with-lang/em', '?' ],

186

[ 'new', 'presence', '?' ],

187

[ 'subtitles', 'subtitles', '*' ],

188

[ 'rating', 'rating', '*' ],

189

[ 'star-rating', 'star-rating', '*' ],

190

);

191

192

# And a hash mapping names like 'with-lang' to pairs of subs. The

193

# first for reading, the second for writing. Note that the writers

194

# alter the passed-in data as a side effect! (If the writing sub is

195

# called with an undef XML::Writer then it writes nothing but still

196

# warns for (most) bad data checks - and still alters the data.)

197

198

our %Handlers = ();

199

200

# Undocumented interface for adding extensions to the XMLTV format:

201

# first add an entry to @XMLTV::Channel_Handlers or

202

# @XMLTV::Programme_Handlers with your new element's name, 'type' and

203

# multiplicity. The 'type' should be a string you invent yourself.

204

# Then $XMLTV::Handlers{'type'} should be a pair of subroutines, a

205

# reader and a writer. (Unless you want to use one of the existing

206

# types such as 'with-lang' or 'scalar'.)

207

208

# Note that elements and attributes beginning 'x-' are skipped over

209

# _automatically_, so you can't parse them with this method. A better

210

# way to add extensions is needed - doing this not encouraged but is

211

# sometimes necessary.

212

213

214

# read_data() is a deprecated name for parsefile().

215

sub read_data( $ ) { # FIXME remove altogether

216

warn "XMLTV::read_data() deprecated, use XMLTV::parsefile() instead\n";

217

&parsefile;

218

}

219

220

# Private.

221

sub sanity( $ ) {

222

for (shift) {

223

croak 'no <tv> element found' if not /<tv/;

224

}

225

}

226

227

=pod

228

229

=item parse(document)

230

231

Takes an XMLTV document (a string) and returns the Perl data

232

structure. It is assumed that the document is valid XMLTV; if not

233

the routine may die() with an error (although the current implementation

234

just warns and continues for most small errors).

235

236

The first element of the listref returned, the encoding, may vary

237

according to the encoding of the input document, the versions of perl

238

and C<XML::Parser> installed, the configuration of the XMLTV library

239

and other factors including, but not limited to, the phase of the

240

moon. With luck it should always be either the encoding of the input

241

file or UTF-8.

242

243

Attributes and elements in the XML file whose names begin with 'x-'

244

are skipped silently. You can use these to include information which

245

is not currently handled by the XMLTV format, or by this module.

246

247

=cut

248

sub parse( $ ) {

249

my $str = shift;

250

sanity($str);

251

# FIXME commonize with parsefiles()

252

my ($encoding, $credits);

253

my %channels;

254

my @programmes;

255

parse_callback($str,

256

sub { $encoding = shift },

257

sub { $credits = shift },

258

sub { for (shift) { $channels{$_->{id}} = $_ } },

259

sub { push @programmes, shift });

260

return [ $encoding, $credits, \%channels, \@programmes ];

261

}

262

263

=pod

264

265

=item parsefiles(filename...)

266

267

Like C<parse()> but takes one or more filenames instead of a string

268

document. The data returned is the merging of those file contents:

269

the programmes will be concatenated in their original order, the

270

channels just put together in arbitrary order (ordering of channels

271

should not matter).

272

273

It is necessary that each file have the same character encoding, if

274

not, an exception is thrown. Ideally the credits information would

275

also be the same between all the files, since there is no obvious way to

276

merge it - but if the credits information differs from one file to the

277

next, one file is picked arbitrarily to provide credits and a warning

278

is printed. If two files give differing channel definitions for the

279

same XMLTV channel id, then one is picked arbitrarily and a warning

280

is printed.

281

282

In the simple case, with just one file, you neednE<39>t worry

283

about mismatching of encodings, credits or channels.

284

285

The deprecated function C<parsefile()> is a wrapper allowing just one

286

filename.

287

288

=cut

289

sub parsefiles( @ ) {

290

die 'one or more filenames required' if not @_;

291

my ($encoding, $credits);

292

my %channels;

293

my @programmes;

294

parsefiles_callback(sub { $encoding = shift },

295

sub { $credits = shift },

296

sub { for (shift) { $channels{$_->{id}} = $_ } },

297

sub { push @programmes, shift },

298

@_);

299

return [ $encoding, $credits, \%channels, \@programmes ];

300

}

301

302

sub parsefile( $ ) { parsefiles(@_) }

303

304

=pod

305

306

=item parse_callback(document, encoding_callback, credits_callback,

307

channel_callback, programme_callback)

308

309

An alternative interface. Whereas C<parse()> reads the whole document

310

and then returns a finished data structure, with this routine you

311

specify a subroutine to be called as each <channel> element is read

312

and another for each <programme> element.

313

314

The first argument is the document to parse. The remaining arguments

315

are code references, one for each part of the document.

316

317

The callback for encoding will be called once with a string giving the

318

encoding. In present releases of this module, it is also possible for

319

the value to be undefined meaning 'unknown', but itE<39>s hoped that

320

future releases will always be able to figure out the encoding used.

321

322

The callback for credits will be called once with a hash reference.

323

For channels and programmes, the appropriate function will be called

324

zero or more times depending on how many channels / programmes are

325

found in the file.

326

327

The four subroutines will be called in order, that is, the encoding

328

and credits will be done before the channel handler is called and all

329

the channels will be dealt with before the first programme handler is

330

called.

331

332

If any of the code references is undef, nothing is called for that part

333

of the file.

334

335

For backwards compatibility, if the value for 'encoding callback' is

336

not a code reference but a scalar reference, then the encoding found

337

will be stored in that scalar. Similarly if the 'credits callback'

338

is a scalar reference, the scalar it points to will be set to point

339

to the hash of credits. This style of interface is deprecated: new

340

code should just use four callbacks.

341

342

For example:

343

344

my $document = '<tv>...</tv>';

345

346

my $encoding;

347

sub encoding_cb( $ ) { $encoding = shift }

348

349

my $credits;

350

sub credits_cb( $ ) { $credits = shift }

351

352

# The callback for each channel populates this hash.

353

my %channels;

354

sub channel_cb( $ ) {

355

my $c = shift;

356

$channels{$c->{id}} = $c;

357

}

358

359

# The callback for each programme. We know that channels are

360

# always read before programmes, so the %channels hash will be

361

# fully populated.

362

363

sub programme_cb( $ ) {

364

my $p = shift;

365

print "got programme: $p->{title}->[0]->[0]\n";

366

my $c = $channels{$p->{channel}};

367

print 'channel name is: ', $c->{'display-name'}->[0]->[0], "\n";

368

}

369

370

# Let's go.

371

XMLTV::parse_callback($document, \&encoding_cb, \&credits_cb,

372

\&channel_cb, \&programme_cb);

373

374

=cut

375

# Private.

376

sub new_doc_callback( $$$$ ) {

377

my ($enc_cb, $cred_cb, $ch_cb, $p_cb) = @_;

378

t 'creating new XML::Twig';

379

t '\@Channel_Handlers=' . d \@Channel_Handlers;

380

t '\@Programme_Handlers=' . d \@Programme_Handlers;

381

new XML::Twig(StartTagHandlers =>

382

{ '/tv' => sub {

383

my ($t, $node) = @_;

384

my $enc;

385

if ($KEEP_ENCODING) {

386

t 'KeepEncoding on, get original encoding';

387

$enc = $t->encoding();

388

}

389

else {

390

t 'assuming UTF-8 encoding';

391

$enc = 'UTF-8';

392

}

393

394

if (defined $enc_cb) {

395

for (ref $enc_cb) {

396

if ($_ eq 'CODE') {

397

$enc_cb->($enc);

398

}

399

elsif ($_ eq 'SCALAR') {

400

$$enc_cb = $enc;

401

}

402

else {

403

die "callback should be code ref or scalar ref, or undef";

404

}

405

}

406

}

407

408

if (defined $cred_cb) {

409

my $cred = get_attrs($node);

410

for (ref $cred_cb) {

411

if ($_ eq 'CODE') {

412

$cred_cb->($cred);

413

}

414

elsif ($_ eq 'SCALAR') {

415

$$cred_cb = $cred;

416

}

417

else {

418

die "callback should be code ref or scalar ref, or undef";

419

}

420

}

421

}

422

# Most of the above code can be removed in the

423

# next release.

424

425

426

427

428

TwigHandlers =>

429

{ '/tv/channel' => sub {

430

my ($t, $node) = @_;

431

die if not defined $node;

432

my $c = node_to_channel($node);

433

$t->purge();

434

if (not $c) {

435

warn "skipping bad channel element\n";

436

}

437

else {

438

$ch_cb->($c);

439

}

440

441

442

'/tv/programme' => sub {

443

my ($t, $node) = @_;

444

die if not defined $node;

445

my $p = node_to_programme($node);

446

$t->purge();

447

if (not $p) {

448

warn "skipping bad programme element\n";

449

}

450

else {

451

$p_cb->($p);

452

}

453

454

455

456

KeepEncoding => $KEEP_ENCODING,

457

);

458

}

459

460

sub parse_callback( $$$$$ ) {

461

my ($str, $enc_cb, $cred_cb, $ch_cb, $p_cb) = @_;

462

sanity($str);

463

new_doc_callback($enc_cb, $cred_cb, $ch_cb, $p_cb)->parse($str);

464

}

465

466

=pod

467

468

=item parsefiles_callback(encoding_callback, credits_callback,

469

channel_callback, programme_callback,

470

filenames...)

471

472

As C<parse_callback()> but takes one or more filenames to open,

473

merging their contents in the same manner as C<parsefiles()>. Note

474

that the reading is still gradual - you get the channels and

475

programmes one at a time, as they are read.

476

477

Note that the same <channel> may be present in more than one file, so

478

the channel callback will get called more than once. ItE<39>s your

479

responsibility to weed out duplicate channel elements (since writing

480

them out again requires that each have a unique id).

481

482

For compatibility, there is an alias C<parsefile_callback()> which is

483

the same but takes only a single filename, B<before> the callback

484

arguments. This is deprecated.

485

486

=cut

487

sub parsefile_callback( $$$$$ ) {

488

my ($f, $enc_cb, $cred_cb, $ch_cb, $p_cb) = @_;

489

parsefiles_callback($enc_cb, $cred_cb, $ch_cb, $p_cb, $f);

490

}

491

492

sub parsefiles_callback( $$$$@ ) {

493

my ($enc_cb, $cred_cb, $ch_cb, $p_cb, @files) = @_;

494

die "one or more files required" if not @files;

495

my $all_encoding; my $have_encoding = 0;

496

my $all_credits;

497

my %all_channels;

498

499

my $do_next_file; # to be defined below

500

my $my_enc_cb = sub( $ ) {

501

my $e = shift;

502

t 'encoding callback';

503

if ($have_encoding) {

504

t 'seen encoding before, just check';

505

my ($da, $de) = (defined $all_encoding, defined $e);

506

if (not $da and not $de) {

507

warn "two files both have unspecified character encodings, hope they're the same\n";

508

}

509

elsif (not $da and $de) {

510

warn "encoding $e not being returned to caller\n";

511

$all_encoding = $e;

512

}

513

elsif ($da and not $de) {

514

warn "input file with unspecified encoding, assuming same as others ($all_encoding)\n";

515

}

516

elsif ($da and $de) {

517

if ($all_encoding ne $e) {

518

die "this file's encoding $e differs from others' $all_encoding - aborting\n";

519

}

520

}

521

else { die }

522

}

523

else {

524

t 'not seen encoding before, call user';

525

$enc_cb->($e) if $enc_cb;

526

$all_encoding = $e;

527

$have_encoding = 1;

528

}

529

};

530

531

my $my_cred_cb = sub( $ ) {

532

my $c = shift;

533

if (defined $all_credits) {

534

if (Dumper($all_credits) ne Dumper($c)) {

535

warn "different files have different credits, picking one arbitrarily\n";

536

# In fact, we pick the last file in the list since this is the

537

# first to be opened.

538

539

}

540

}

541

else {

542

$cred_cb->($c) if $cred_cb;

543

$all_credits = $c;

544

}

545

};

546

547

my $my_ch_cb = sub( $ ) {

548

my $c = shift;

549

my $id = $c->{id};

550

if (defined $all_channels{$id} and Dumper($all_channels{$id}) ne Dumper($c)) {

551

warn "differing channels with id $id, picking one arbitrarily\n";

552

}

553

else {

554

$all_channels{$id} = $c;

555

$ch_cb->($c) if $ch_cb;

556

}

557

};

558

559

my $my_p_cb = sub( $ ) {

560

$do_next_file->(); # if any

561

$p_cb->(@_) if $p_cb;

562

};

563

564

$do_next_file = sub() {

565

while (@files) {

566

# Last first.

567

my $f = pop @files;

568

569

# FIXME commonize these augmented warning messages. Weird

570

# stuff (up to and including segfaults) happens if you

571

# call warn() or die() from these handlers.

572

573

local $SIG{__WARN__} = sub {

574

my $msg = shift;

575

$msg = "warning: something's wrong" if not defined $msg;

576

chomp $msg;

577

print STDERR "$f: $msg\n";

578

};

579

local $SIG{__DIE__} = sub {

580

my $msg = shift;

581

$msg = "warning: something's wrong" if not defined $msg;

582

chomp $msg;

583

print STDERR "$f: $msg\n";

584

exit(1);

585

};

586

587

my $t = new_doc_callback($my_enc_cb, $my_cred_cb, $my_ch_cb, $my_p_cb);

588

$t->parsefile($f);

589

}

590

};

591

592

# Let's go.

593

$do_next_file->();

594

}

595

596

=pod

597

598

=item write_data(data, options...)

599

600

Takes a data structure and writes it as XML to standard output. Any

601

extra arguments are passed on to XML::WriterE<39>s constructor, for example

602

603

my $f = new IO::File '>out.xml'; die if not $f;

604

write_data($data, OUTPUT => $f);

605

606

The encoding used for the output is given by the first element of the

607

data.

608

609

Normally, there will be a warning for any Perl data which is not

610

understood and cannot be written as XMLTV, such as strange keys in

611

hashes. But as an exception, any hash key beginning with an

612

underscore will be skipped over silently. You can store 'internal use

613

only' data this way.

614

615

If a programme or channel hash contains a key beginning with 'debug',

616

this key and its value will be written out as a comment inside the

617

<programme> or <channel> element. This lets you include small

618

debugging messages in the XML output.

619

620

=cut

621

sub write_data( $;@ ) {

622

my $data = shift;

623

my $writer = new XMLTV::Writer(encoding => $data->[0], @_);

624

$writer->start($data->[1]);

625

$writer->write_channels($data->[2]);

626

$writer->write_programme($_) foreach @{$data->[3]};

627

$writer->end();

628

}

629

630

631

# Private.

632

633

# get_attrs()

634

635

# Given a node, return a hashref of its attributes. Skips over

636

# the 'x-whatever' attributes.

637

638

sub get_attrs( $ ) {

639

my $node = shift; die if not defined $node;

640

my %r = %{$node->atts()};

641

foreach (keys %r) {

642

if (/^x-/) {

643

delete $r{$_};

644

}

645

else {

646

tidy(\$r{$_});

647

}

648

}

649

return \%r;

650

}

651

652

653

# Private.

654

655

# get_text()

656

657

# Given a node containing only text, return that text (with whitespace

658

# either side stripped). If the node has no children (as in

659

# <foo></foo> or <foo />), this is considered to be the empty string.

660

661

# Parameter: whether newlines are allowed (defaults to false)

662

663

sub get_text( $;$ ) {

664

my $node = shift;

665

my $allow_nl = shift; $allow_nl = 0 if not defined $allow_nl;

666

my @children = get_subelements($node);

667

if (@children == 0) {

668

return '';

669

}

670

elsif (@children == 1) {

671

my $v = $children[0]->pcdata();

672

t 'got pcdata: ' . d $v;

673

if (not defined $v) {

674

my $name = get_name($node);

675

warn "node $name expected to contain text has other stuff\n";

676

}

677

else {

678

# Just hope that the encoding we got uses \n...

679

if (not $allow_nl and $v =~ tr/\n//d) {

680

my $name = get_name($node);

681

warn "removing newlines from content of node $name\n";

682

}

683

tidy(\$v);

684

}

685

t 'returning: ' . d $v;

686

return $v;

687

}

688

elsif (@children > 1) {

689

my $name = get_name($node);

690

warn "node $name expected to contain text has more than one child\n";

691

return undef;

692

}

693

else { die }

694

}

695

696

# Private. Clean up parsed text. Takes ref to scalar.

697

sub tidy( $ ) {

698

our $v; local *v = shift; die if not defined $v;

699

if ($XML::Twig::VERSION < 3.01 || $KEEP_ENCODING) {

700

# Old versions of XML::Twig had stupid behaviour with

701

# entities - and so do the new ones if KeepEncoding is on.

702

703

for ($v) {

704

s/>/>/g;

705

s/</</g;

706

s/'/\'/g;

707

s/"/\"/g;

708

s/&/&/g; # needs to be last

709

}

710

}

711

else {

712

t 'new XML::Twig, not KeepEncoding, entities already dealt with';

713

}

714

715

for ($v) {

716

s/^\s+//;

717

s/\s+$//;

718

719

# On Windows there seems to be an inconsistency between

720

# XML::Twig and XML::Writer. The former returns text with

721

# \r\n line endings to the application, but the latter adds \r

722

# characters to text outputted. So reading some text and

723

# writing it again accumulates an extra \r character. We fix

724

# this by removing \r from the input here.

725

726

tr/\r//d;

727

}

728

}

729

730

# Private.

731

732

# get_subelements()

733

734

# Return a list of all subelements of a node. Whitespace is

735

# ignored; anything else that isn't a subelement is warned about.

736

# Skips over elements with name 'x-whatever'.

737

738

sub get_subelements( $ ) {

739

grep { (my $tmp = get_name($_)) !~ /^x-/ } $_[0]->children();

740

}

741

742

# Private.

743

744

# get_name()

745

746

# Return the element name of a node.

747

748

sub get_name( $ ) { $_[0]->gi() }

749

750

# Private.

751

752

# dump_node()

753

754

# Return some information about a node for debugging.

755

756

sub dump_node( $ ) {

757

my $n = shift;

758

# Doesn't seem to be easy way to get 'type' of node.

759

my $r = 'name: ' . get_name($n) . "\n";

760

for (trunc($n->text())) {

761

$r .= "value: $_\n" if defined and length;

762

}

763

return $r;

764

}

765

# Private. Truncate a string to a reasonable length and add '...' if

766

# necessary.

767

768

sub trunc {

769

local $_ = shift;

770

return undef if not defined;

771

if (length > 1000) {

772

return substr($_, 0, 1000) . '...';

773

}

774

return $_;

775

}

776

777

=pod

778

779

=item best_name(languages, pairs [, comparator])

780

781

The XMLTV format contains many places where human-readable text is

782

given an optional 'lang' attribute, to allow mixed languages. This is

783

represented in Perl as a pair [ text, lang ], although the second

784

element may be missing or undef if the language is unknown. When

785

several alernatives for an element (such as <title>) can be given, the

786

representation is a list of [ text, lang ] pairs. Given such a list,

787

what is the best text to use? It depends on the userE<39>s preferred

788

language.

789

790

This function takes a list of acceptable languages and a list of [string,

791

language] pairs, and finds the best one to use. This means first finding

792

the appropriate language and then picking the 'best' string in that

793

language.

794

795

The best is normally defined as the first one found in a usable

796

language, since the XMLTV format puts the most canonical versions

797

first. But you can pass in your own comparison function, for example

798

if you want to choose the shortest piece of text that is in an

799

acceptable language.

800

801

The acceptable languages should be a reference to a list of language

802

codes looking like 'ru', or like 'de_DE'. The text pairs should be a

803

reference to a list of pairs [ string, language ]. (As a special case

804

if this list is empty or undef, that means no text is present, and the

805

result is undef.) The third argument if present should be a cmp-style

806

function that compares two strings of text and returns 1 if the first

807

argument is better, -1 if the second better, 0 if theyE<39>re equally

808

good.

809

810

Returns: [s, l] pair, where s is the best of the strings to use and l

811

is its language. This pair is 'live' - it is one of those from the

812

list passed in. So you can use C<best_name()> to find the best pair

813

from a list and then modify the content of that pair.

814

815

(This routine depends on the C<Lingua::Preferred> module being

816

installed; if that module is missing then the first available

817

language is always chosen.)

818

819

Example:

820

821

my $langs = [ 'de', 'fr' ]; # German or French, please

822

823

# Say we found the following under $p->{title} for a programme $p.

824

my $pairs = [ [ 'La CitE des enfants perdus', 'fr' ],

825

[ 'The City of Lost Children', 'en_US' ] ];

826

827

my $best = best_name($langs, $pairs);

828

print "chose title $best->[0]\n";

829

830

=cut

831

sub best_name( $$;$ ) {

832

my ($wanted_langs, $pairs, $compare) = @_;

833

t 'best_name() ENTRY';

834

t 'wanted langs: ' . d $wanted_langs;

835

t '[text,lang] pairs: ' . d $pairs;

836

t 'comparison fn: ' . d $compare;

837

return undef if not defined $pairs;

838

my @pairs = @$pairs;

839

840

my @avail_langs;

841

my (%seen_lang, $seen_undef);

842

# Collect the list of available languages.

843

foreach (map { $_->[1] } @pairs) {

844

if (defined) {

845

next if $seen_lang{$_}++;

846

}

847

else {

848

next if $seen_undef++;

849

}

850

push @avail_langs, $_;

851

}

852

853

my $pref_lang = which_lang($wanted_langs, \@avail_langs);

854

855

# Gather up [text, lang] pairs which have the desired language.

856

my @candidates;

857

foreach (@pairs) {

858

my ($text, $lang) = @$_;

859

next unless ((not defined $lang)

860

or (defined $pref_lang and $lang eq $pref_lang));

861

push @candidates, $_;

862

}

863

864

return undef if not @candidates;

865

866

# If a comparison function was passed in, use it to compare the

867

# text strings from the candidate pairs.

868

869

@candidates = sort { $compare->($a->[0], $b->[0]) } @candidates

870

if defined $compare;

871

872

# Pick the first candidate. This will be the one ordered first by

873

# the comparison function if given, otherwise the earliest in the

874

# original list.

875

876

return $candidates[0];

877

}

878

879

880

=item list_channel_keys(), list_programme_keys()

881

882

Some users of this module may wish to enquire at runtime about which

883

keys a programme or channel hash can contain. The data in the hash

884

comes from the attributes and subelements of the corresponding element

885

in the XML. The values of attributes are simply stored as strings,

886

while subelements are processed with a handler which may return a

887

complex data structure. These subroutines returns a hash mapping key

888

to handler name and multiplicity. This lets you know what data types

889

can be expected under each key. For keys which come from attributes

890

rather than subelements, the handler is set to 'scalar', just as for

891

subelements which give a simple string. See L<"DATA STRUCTURE"> for

892

details on what the different handler names mean.

893

894

It is not possible to find out which keys are mandatory and which

895

optional, only a list of all those which might possibly be present.

896

An example use of these routines is the L<tv_grep(1)> program, which

897

creates its allowed command line arguments from the names of programme

898

subelements.

899

900

=cut

901

# Private.

902

sub list_keys( $$ ) {

903

my %r;

904

905

# Attributes.

906

foreach (@{shift()}) {

907

my ($k, $mult) = @$_;

908

$r{$k} = [ 'scalar', $mult ];

909

}

910

911

# Subelements.

912

foreach (@{shift()}) {

913

my ($k, $h_name, $mult) = @$_;

914

$r{$k} = [ $h_name, $mult ];

915

}

916

917

return \%r;

918

}

919

# Public.

920

sub list_channel_keys() {

921

list_keys(\@Channel_Attributes, \@Channel_Handlers);

922

}

923

sub list_programme_keys() {

924

list_keys(\@Programme_Attributes, \@Programme_Handlers);

925

}

926

927

=pod

928

929

=item catfiles(w_args, filename...)

930

931

Concatenate several listings files, writing the output to somewhere

932

specified by C<w_args>. Programmes are catenated together, channels

933

are merged, for credits we just take the first and warn if the others

934

differ.

935

936

The first argument is a hash reference giving information to pass to

937

C<XMLTV::Writer>E<39>s constructor. But do not specify encoding, this

938

will be taken from the input files. Currently C<catfiles()> will fail

939

work if the input files have different encodings.

940

941

=cut

942

sub catfiles( $@ ) {

943

my $w_args = shift;

944

my $w;

945

my %seen_ch;

946

XMLTV::parsefiles_callback

947

(sub {

948

die if defined $w;

949

$w = new XMLTV::Writer(%$w_args, encoding => shift);

950

951

sub { $w->start(shift) },

952

sub {

953

my $c = shift;

954

my $id = $c->{id};

955

if (not defined $seen_ch{$id}) {

956

$w->write_channel($c);

957

$seen_ch{$id} = $c;

958

}

959

elsif (Dumper($seen_ch{$id}) eq Dumper($c)) {

960

# They're identical, okay.

961

}

962

else {

963

warn "channel $id may differ between two files, "

964

. "picking one arbitrarily\n";

965

}

966

967

sub { $w->write_programme(shift) },

968

@_);

969

$w->end();

970

}

971

972

=pod

973

974

=item cat(data, ...)

975

976

Concatenate (and merge) listings data. Programmes are catenated

977

together, channels are merged, for credits we just take the first and

978

warn if the others differ (except that the 'date' of the result is the

979

latest date of all the inputs).

980

981

Whereas C<catfiles()> reads and writes files, this function takes

982

already-parsed listings data and returns some more listings data. It

983

is much more memory-hungry.

984

985

=cut

986

sub cat( @ ) { cat_aux(1, @_) }

987

988

=pod

989

990

=item cat_noprogrammes

991

992

Like C<cat()> but ignores the programme data and just returns

993

encoding, credits and channels. This is in case for scalability

994

reasons you want to handle programmes individually, but still

995

merge the smaller data.

996

997

=cut

998

sub cat_noprogrammes( @ ) { cat_aux(0, @_) }

999

1000

sub cat_aux( @ ) {

1001

my $all_encoding;

1002

my ($all_credits_nodate, $all_credits_date);

1003

my %all_channels;

1004

my @all_progs;

1005

my $do_progs = shift;

1006

1007

foreach (@_) {

1008

t 'doing arg: ' . d $_;

1009

my ($encoding, $credits, $channels, $progs) = @$_;

1010

1011

if (not defined $all_encoding) {

1012

$all_encoding = $encoding;

1013

}

1014

elsif ($encoding ne $all_encoding) {

1015

die "different files have different encodings, cannot continue\n";

1016

}

1017

1018

# If the credits are different between files there's not a lot

1019

# we can do to merge them. Apart from 'date', that is. There

1020

# we can say that the date of the concatenated listings is the

1021

# newest date from all the sources.

1022

1023

my %credits_nodate = %$credits; # copy

1024

my $d = delete $credits_nodate{date};

1025

if (defined $d) {

1026

# Need to 'require' rather than 'use' this because

1027

# XMLTV.pm is loaded during the build process and

1028

# XMLTV::Date isn't available then. Urgh.

1029

1030

require XMLTV::Date;

1031

my $dp = XMLTV::Date::parse_date($d);

1032

for ($all_credits_date) {

1033

if (not defined

1034

or Date_Cmp(XMLTV::Date::parse_date($_), $dp) < 0) {

1035

$_ = $d;

1036

}

1037

}

1038

}

1039

1040

# Now in uniqueness checks ignore the date.

1041

if (not defined $all_credits_nodate) {

1042

$all_credits_nodate = \%credits_nodate;

1043

}

1044

elsif (Dumper(\%credits_nodate) ne Dumper($all_credits_nodate)) {

1045

warn "different files have different credits, taking from first file\n";

1046

}

1047

1048

foreach (keys %$channels) {

1049

if (not defined $all_channels{$_}) {

1050

$all_channels{$_} = $channels->{$_};

1051

}

1052

elsif (Dumper($all_channels{$_}) ne Dumper($channels->{$_})) {

1053

warn "channel $_ differs between two files, taking first appearance\n";

1054

}

1055

}

1056

1057

push @all_progs, @$progs if $do_progs;

1058

}

1059

1060

$all_encoding = 'UTF-8' if not defined $all_encoding;

1061

1062

my %all_credits;

1063

%all_credits = %$all_credits_nodate

1064

if defined $all_credits_nodate;

1065

$all_credits{date} = $all_credits_date

1066

if defined $all_credits_date;

1067

1068

if ($do_progs) {

1069

return [ $all_encoding, \%all_credits, \%all_channels, \@all_progs ];

1070

}

1071

else {

1072

return [ $all_encoding, \%all_credits, \%all_channels ];

1073

}

1074

}

1075

1076

1077

# For each subelement of programme, we define a subroutine to read it

1078

# and one to write it. The reader takes an node for a single

1079

# subelement and returns its value as a Perl scalar (warning and

1080

# returning undef if error). The writer takes an XML::Writer, an

1081

# element name and a scalar value and writes a subelement for that

1082

# value. Note that the element name is passed in to the writer just

1083

# for symmetry, so that neither the writer or the reader have to know

1084

# what their element is called.

1085

1086

=pod

1087

1088

=back

1089

1090

=head1 DATA STRUCTURE

1091

1092

For completeness, we describe more precisely how channels and

1093

programmes are represented in Perl. Each element of the channels list

1094

is a hashref corresponding to one <channel> element, and likewise for

1095

programmes. The possible keys of a channel (programme) hash are the

1096

names of attributes or subelements of <channel> (<programme>).

1097

1098

The values for attributes are not processed in any way; an attribute

1099

C<fred="jim"> in the XML will become a hash element with key C<'fred'>,

1100

value C<'jim'>.

1101

1102

But for subelements, there is further processing needed to turn the

1103

XML content of a subelement into Perl data. What is done depends on

1104

what type of data is stored under that subelement. Also, if a certain

1105

element can appear several times then the hash key for that element

1106

points to a list of values rather than just one.

1107

1108

The conversion of a subelementE<39>s content to and from Perl data is

1109

done by a handler. The most common handler is I<with-lang>, used for

1110

human-readable text content plus an optional 'lang' attribute. There

1111

are other handlers for other data structures in the file format.

1112

Often two subelements will share the same handler, since they hold the

1113

same type of data. The handlers defined are as follows; note that

1114

many of them will silently strip leading and trailing whitespace in

1115

element content. Look at the DTD itself for an explanation of the

1116

whole file format.

1117

1118

Unless specified otherwise, it is not allowed for an element expected

1119

to contain text to have empty content, nor for the text to contain

1120

newline characters.

1121

1122

=over

1123

1124

=item I<credits>

1125

1126

Turns a list of credits (for director, actor, writer, etc.) into a

1127

hash mapping 'role' to a list of names. The names in each role are

1128

kept in the same order.

1129

1130

=cut

1131

$Handlers{credits}->[0] = sub( $ ) {

1132

my $node = shift;

1133

my @roles = qw(director actor writer adapter producer presenter

1134

commentator guest);

1135

my %known_role; ++$known_role{$_} foreach @roles;

1136

my %r;

1137

foreach (get_subelements($node)) {

1138

my $role = get_name($_);

1139

unless ($known_role{$role}++) {

1140

warn "unknown thing in credits: $role";

1141

next;

1142

}

1143

push @{$r{$role}}, get_text($_);

1144

}

1145

return \%r;

1146

};

1147

$Handlers{credits}->[1] = sub( $$$ ) {

1148

my ($w, $e, $v) = @_; die if not defined $v;

1149

my %h = %$v;

1150

return if not %h; # don't write empty element

1151

t 'writing credits: ' . d \%h;

1152

# TODO some 'do nothing' setting in XML::Writer to replace this

1153

# convention of passing undef.

1154

1155

$w->startTag($e) if $w;

1156

foreach (qw[director actor writer adapter producer presenter

1157

commentator guest] ) {

1158

next unless defined $h{$_};

1159

my @people = @{delete $h{$_}};

1160

foreach my $person (@people) {

1161

die if not defined $person;

1162

$w->dataElement($_, $person) if $w;

1163

}

1164

}

1165

warn_unknown_keys($e, \%h);

1166

$w->endTag($e) if $w;

1167

};

1168

1169

=pod

1170

1171

=item I<scalar>

1172

1173

Reads and writes a simple string as the content of the XML element.

1174

1175

=cut

1176

$Handlers{scalar}->[0] = sub( $ ) {

1177

my $node = shift;

1178

return get_text($node);

1179

};

1180

$Handlers{scalar}->[1] = sub( $$$ ) {

1181

my ($w, $e, $v) = @_;

1182

t 'scalar';

1183

$w->dataElement($e, $v) if $w;

1184

};

1185

1186

=pod

1187

1188

=item I<length>

1189

1190

Converts the content of a <length> element into a number of seconds

1191

(so <length units="minutes">5</minutes> would be returned as 300). On

1192

writing out again tries to convert a number of seconds to a time in

1193

minutes or hours if that would look better.

1194

1195

=cut

1196

$Handlers{length}->[0] = sub( $ ) {

1197

my $node = shift; die if not defined $node;

1198

my %attrs = %{get_attrs($node)};

1199

my $d = get_text($node);

1200

if ($d =~ /^\s*$/) {

1201

warn "empty 'length' element";

1202

return undef;

1203

}

1204

if ($d !~ tr/0-9// or $d =~ tr/0-9//c) {

1205

warn "bad content of 'length' element: $d";

1206

return undef;

1207

}

1208

my $units = $attrs{units};

1209

if (not defined $units) {

1210

warn "missing 'units' attr in 'length' element";

1211

return undef;

1212

}

1213

# We want to return a length in seconds.

1214

if ($units eq 'seconds') {

1215

# Okay.

1216

}

1217

elsif ($units eq 'minutes') {

1218

$d *= 60;

1219

}

1220

elsif ($units eq 'hours') {

1221

$d *= 60 * 60;

1222

}

1223

else {

1224

warn "bad value of 'units': $units";

1225

return undef;

1226

}

1227

return $d;

1228

};

1229

$Handlers{length}->[1] = sub( $$$ ) {

1230

my ($w, $e, $v) = @_;

1231

t 'length';

1232

my $units;

1233

if ($v % 3600 == 0) {

1234

$units = 'hours';

1235

$v /= 3600;

1236

}

1237

elsif ($v % 60 == 0) {

1238

$units = 'minutes';

1239

$v /= 60;

1240

}

1241

else {

1242

$units = 'seconds';

1243

}

1244

$w->dataElement($e, $v, units => $units) if $w;

1245

};

1246

1247

=pod

1248

1249

=item I<episode-num>

1250

1251

The representation in Perl of XMLTVE<39>s odd episode numbers is as a

1252

pair of [ content, system ]. As specified by the DTD, if the system is

1253

not given in the file then 'onscreen' is assumed. Whitespace in the

1254

'xmltv_ns' system is unimportant, so on reading it is normalized to

1255

a single space on either side of each dot.

1256

1257

=cut

1258

$Handlers{'episode-num'}->[0] = sub( $ ) {

1259

my $node = shift; die if not defined $node;

1260

my %attrs = %{get_attrs($node)};

1261

my $system = $attrs{system};

1262

$system = 'onscreen' if not defined $system;

1263

my $content = get_text($node);

1264

if ($system eq 'xmltv_ns') {

1265

# Make it look nice.

1266

$content =~ s/\s+//g;

1267

$content =~ s/\./ . /g;

1268

}

1269

return [ $content, $system ];

1270

};

1271

$Handlers{'episode-num'}->[1] = sub( $$$ ) {

1272

my ($w, $e, $v) = @_;

1273

t 'episode number';

1274

if (not ref $v or ref $v ne 'ARRAY') {

1275

warn "not writing episode-num whose content is not an array";

1276

return;

1277

}

1278

my ($content, $system) = @$v;

1279

$system = 'onscreen' if not defined $system;

1280

$w->dataElement($e, $content, system => $system) if $w;

1281

};

1282

1283

=pod

1284

1285

=item I<video>

1286

1287

The <video> section is converted to a hash. The <present> subelement

1288

corresponds to the key 'present' of this hash, 'yes' and 'no' are

1289

converted to Booleans. The same applies to <colour>. The content of

1290

the <aspect> subelement is stored under the key 'aspect'. These keys

1291

can be missing in the hash just as the subelements can be missing in

1292

the XML.

1293

1294

=cut

1295

$Handlers{video}->[0] = sub ( $ ) {

1296

my $node = shift;

1297

my %r;

1298

foreach (get_subelements($node)) {

1299

my $name = get_name($_);

1300

my $value = get_text($_);

1301

if ($name eq 'present') {

1302

warn "'present' seen twice" if defined $r{present};

1303

$r{present} = decode_boolean($value);

1304

}

1305

elsif ($name eq 'colour') {

1306

warn "'colour' seen twice" if defined $r{colour};

1307

$r{colour} = decode_boolean($value);

1308

}

1309

elsif ($name eq 'aspect') {

1310

warn "'aspect' seen twice" if defined $r{aspect};

1311

$value =~ /^\d+:\d+$/ or warn "bad aspect ratio: $value";

1312

$r{aspect} = $value;

1313

}

1314

}

1315

return \%r;

1316

};

1317

$Handlers{video}->[1] = sub( $$$ ) {

1318

my ($w, $e, $v) = @_;

1319

t "'video' element";

1320

my %h = %$v;

1321

return if not %h; # don't write empty element

1322

$w->startTag($e) if $w;

1323

if (defined (my $val = delete $h{present})) {

1324

$w->dataElement('present', encode_boolean($val)) if $w;

1325

}

1326

if (defined (my $val = delete $h{colour})) {

1327

$w->dataElement('colour', encode_boolean($val)) if $w;

1328

}

1329

if (defined (my $val = delete $h{aspect})) {

1330

$w->dataElement('aspect', $val) if $w;

1331

}

1332

if (defined (my $val = delete $h{quality})) {

1333

$w->dataElement('quality', $val) if $w;

1334

}

1335

warn_unknown_keys("zz $e", \%h);

1336

$w->endTag($e) if $w;

1337

};

1338

1339

=pod

1340

1341

=item I<audio>

1342

1343

This is similar to I<video>. <present> is a Boolean value, while

1344

the content of <stereo> is stored unchanged.

1345

1346

=cut

1347

$Handlers{audio}->[0] = sub( $ ) {

1348

my $node = shift;

1349

my %r;

1350

foreach (get_subelements($node)) {

1351

my $name = get_name($_);

1352

my $value = get_text($_);

1353

if ($name eq 'present') {

1354

warn "'present' seen twice" if defined $r{present};

1355

$r{present} = decode_boolean($value);

1356

}

1357

elsif ($name eq 'stereo') {

1358

warn "'stereo' seen twice" if defined $r{stereo};

1359

if ($value eq '') {

1360

warn "empty 'stereo' element not permitted, should be <stereo>stereo</stereo>";

1361

$value = 'stereo';

1362

}

1363

warn "bad value for 'stereo': '$value'"

1364

if ($value ne 'mono'

1365

and $value ne 'stereo'

1366

and $value ne 'surround'

1367

and $value ne 'dolby digital'

1368

and $value ne 'dolby');

1369

$r{stereo} = $value;

1370

}

1371

}

1372

return \%r;

1373

};

1374

$Handlers{audio}->[1] = sub( $$$ ) {

1375

my ($w, $e, $v) = @_;

1376

my %h = %$v;

1377

return if not %h; # don't write empty element

1378

$w->startTag($e) if $w;

1379

if (defined (my $val = delete $h{present})) {

1380

$w->dataElement('present', encode_boolean($val)) if $w;

1381

}

1382

if (defined (my $val = delete $h{stereo})) {

1383

$w->dataElement('stereo', $val) if $w;

1384

}

1385

warn_unknown_keys($e, \%h);

1386

$w->endTag($e) if $w;

1387

};

1388

1389

=pod

1390

1391

=item I<previously-shown>

1392

1393

The 'start' and 'channel' attributes are converted to keys in a hash.

1394

1395

=cut

1396

$Handlers{'previously-shown'}->[0] = sub( $ ) {

1397

my $node = shift; die if not defined $node;

1398

my %attrs = %{get_attrs($node)};

1399

my $r = {};

1400

foreach (qw(start channel)) {

1401

my $v = delete $attrs{$_};

1402

$r->{$_} = $v if defined $v;

1403

}

1404

foreach (keys %attrs) {

1405

warn "unknown attribute $_ in previously-shown";

1406

}

1407

return $r;

1408

};

1409

$Handlers{'previously-shown'}->[1] = sub( $$$ ) {

1410

my ($w, $e, $v) = @_;

1411

$w->emptyTag($e, %$v) if $w;

1412

};

1413

1414

=pod

1415

1416

=item I<presence>

1417

1418

The content of the element is ignored: it signfies something by its

1419

very presence. So the conversion from XML to Perl is a constant true

1420

value whenever the element is found; the conversion from Perl to XML

1421

is to write out the element if true, donE<39>t write anything if false.

1422

1423

=cut

1424

$Handlers{presence}->[0] = sub( $ ) {

1425

my $node = shift;

1426

# The 'new' element is empty, it signifies newness by its very

1427

# presence.

1428

1429

return 1;

1430

};

1431

$Handlers{presence}->[1] = sub( $$$ ) {

1432

my ($w, $e, $v) = @_;

1433

if (not $v) {

1434

# Not new, so don't create an element.

1435

}

1436

else {

1437

$w->emptyTag($e) if $w;

1438

}

1439

};

1440

1441

=pod

1442

1443

=item I<subtitles>

1444

1445

The 'type' attribute and the 'language' subelement (both optional)

1446

become keys in a hash. But see I<language> for what to pass as the

1447

value of that element.

1448

1449

=cut

1450

$Handlers{subtitles}->[0] = sub( $ ) {

1451

my $node = shift; die if not defined $node;

1452

my %attrs = %{get_attrs($node)};

1453

my %r;

1454

$r{type} = $attrs{type} if defined $attrs{type};

1455

foreach (get_subelements($node)) {

1456

my $name = get_name($_);

1457

if ($name eq 'language') {

1458

warn "'language' seen twice" if defined $r{language};

1459

$r{language} = read_with_lang($_, 0, 0);

1460

}

1461

else {

1462

warn "bad content of 'subtitles' element: $name";

1463

}

1464

}

1465

return \%r;

1466

};

1467

$Handlers{subtitles}->[1] = sub( $$$ ) {

1468

my ($w, $e, $v) = @_;

1469

t 'subtitles';

1470

my ($type, $language) = ($v->{type}, $v->{language});

1471

my %attrs; $attrs{type} = $type if defined $type;

1472

if (defined $language) {

1473

$w->startTag($e, %attrs) if $w;

1474

write_with_lang($w, 'language', $language, 0, 0);

1475

$w->endTag($e) if $w;

1476

}

1477

else {

1478

$w->emptyTag($e, %attrs) if $w;

1479

}

1480

};

1481

1482

=pod

1483

1484

=item I<rating>

1485

1486

The rating is represented as a tuple of [ rating, system, icons ].

1487

The last element is itself a listref of structures returned by the

1488

I<icon> handler.

1489

1490

=cut

1491

$Handlers{rating}->[0] = sub( $ ) {

1492

my $node = shift; die if not defined $node;

1493

my %attrs = %{get_attrs($node)};

1494

my $system = delete $attrs{system} if exists $attrs{system};

1495

foreach (keys %attrs) {

1496

warn "unknown attribute in rating: $_";

1497

}

1498

my @children = get_subelements($node);

1499

1500

# First child node is value.

1501

my $value_node = shift @children;

1502

if (not defined $value_node) {

1503

warn "missing 'value' element inside rating";

1504

return undef;

1505

}

1506

if ((my $name = get_name($value_node)) ne 'value') {

1507

warn "expected 'value' node inside rating, got '$name'";

1508

return undef;

1509

}

1510

1511

my $rating = read_value($value_node);

1512

1513

# Remaining children are icons.

1514

my @icons = map { read_icon($_) } @children;

1515

1516

return [ $rating, $system, \@icons ];

1517

};

1518

$Handlers{rating}->[1] = sub( $$$ ) {

1519

my ($w, $e, $v) = @_;

1520

if (not ref $v or ref $v ne 'ARRAY') {

1521

warn "not writing rating whose content is not an array";

1522

return;

1523

}

1524

my ($rating, $system, $icons) = @$v;

1525

if (defined $system) {

1526

$w->startTag($e, system => $system) if $w;

1527

}

1528

else {

1529

$w->startTag($e) if $w;

1530

}

1531

1532

write_value($w, 'value', $rating) if $w;

1533

if ($w) { write_icon($w, 'icon', $_) foreach @$icons };

1534

$w->endTag($e) if $w;

1535

};

1536

1537

=pod

1538

1539

=item I<star-rating>

1540

1541

In XML this is a string 'X/Y' plus a list of icons. In Perl represented

1542

as a pair [ rating, icons ] similar to I<rating>.

1543

1544

Multiple star ratings are now supported. For backward compatability,

1545

you may specify a single [rating,icon] or the preferred double array

1546

[[rating,system,icon],[rating2,system2,icon2]] (like 'ratings')

1547

1548

1549

=cut

1550

$Handlers{'star-rating'}->[0] = sub( $ ) {

1551

my $node = shift;

1552

my %attrs = %{get_attrs($node)};

1553

my $system = delete $attrs{system} if exists $attrs{system};

1554

my @children = get_subelements($node);

1555

1556

# First child node is value.

1557

my $value_node = shift @children;

1558

if (not defined $value_node) {

1559

warn "missing 'value' element inside star-rating";

1560

return undef;

1561

}

1562

if ((my $name = get_name($value_node)) ne 'value') {

1563

warn "expected 'value' node inside star-rating, got '$name'";

1564

return undef;

1565

}

1566

my $rating = read_value($value_node);

1567

1568

# Remaining children are icons.

1569

my @icons = map { read_icon($_) } @children;

1570

1571

return [ $rating, $system, \@icons ];

1572

};

1573

$Handlers{'star-rating'}->[1] = sub ( $$$ ) {

1574

my ($w, $e, $v) = @_;

1575

1576

# 10/31/2007 star-rating can now have multiple values (and system=)

1577

# let's make it so old code still works!

1578

1579

if (not ref $v or ref $v ne 'ARRAY') {

1580

$v=[$v];

1581

# warn "not writing star-rating whose content is not an array";

1582

# return;

1583

}

1584

my ($rating, $system, $icons) = @$v;

1585

if (defined $system) {

1586

$w->startTag($e, system => $system) if $w;

1587

}

1588

else {

1589

$w->startTag($e) if $w;

1590

}

1591

write_value($w, 'value', $rating) if $w;

1592

if ($w) { write_icon($w, 'icon', $_) foreach @$icons };

1593

$w->endTag($e) if $w;

1594

};

1595

1596

=pod

1597

1598

=item I<icon>

1599

1600

An icon in XMLTV files is like the <img> element in HTML. It is

1601

represented in Perl as a hashref with 'src' and optionally 'width'

1602

and 'height' keys.

1603

1604

=cut

1605

sub write_icon( $$$ ) {

1606

my ($w, $e, $v) = @_;

1607

croak "no 'src' attribute for icon\n" if not defined $v->{src};

1608

croak "bad width $v->{width} for icon\n"

1609

if defined $v->{width} and $v->{width} !~ /^\d+$/;

1610

croak "bad height $v->{height} for icon\n"

1611

if defined $v->{height} and $v->{height} !~ /^\d+$/;

1612

1613

foreach (keys %$v) {

1614

warn "unrecognized key in icon: $_\n"

1615

if $_ ne 'src' and $_ ne 'width' and $_ ne 'height';

1616

}

1617

1618

$w->emptyTag($e, %$v);

1619

}

1620

sub read_icon( $ ) {

1621

my $node = shift; die if not defined $node;

1622

my %attrs = %{get_attrs($node)};

1623

warn "missing 'src' attribute in icon" if not defined $attrs{src};

1624

return \%attrs;

1625

}

1626

$Handlers{icon}->[0] = \&read_icon;

1627

$Handlers{icon}->[1] = sub( $$$ ) {

1628

my ($w, $e, $v) = @_;

1629

write_icon($w, $e, $v) if $w;

1630

};

1631

1632

# To keep things tidy some elements that can have icons store their

1633

# textual content inside a subelement called 'value'. These two

1634

# routines are a bit trivial but they're here for consistency.

1635

1636

sub read_value( $ ) {

1637

my $value_node = shift;

1638

my $v = get_text($value_node);

1639

if (not defined $v or $v eq '') {

1640

warn "no content of 'value' element";

1641

return undef;

1642

}

1643

return $v;

1644

}

1645

sub write_value( $$$ ) {

1646

my ($w, $e, $v) = @_;

1647

$w->dataElement($e, $v) if $w;

1648

};

1649

1650

1651

# Booleans in XMLTV files are 'yes' or 'no'.

1652

sub decode_boolean( $ ) {

1653

my $value = shift;

1654

if ($value eq 'no') {

1655

return 0;

1656

}

1657

elsif ($value eq 'yes') {

1658

return 1;

1659

}

1660

else {

1661

warn "bad boolean: $value";

1662

return undef;

1663

}

1664

}

1665

sub encode_boolean( $ ) {

1666

my $v = shift;

1667

warn "expected a Perl boolean like 0 or 1, not '$v'\n"

1668

if $v and $v != 1;

1669

return $v ? 'yes' : 'no';

1670

}

1671

1672

1673

=pod

1674

1675

=item I<with-lang>

1676

1677

In XML something like title can be either <title>Foo</title>

1678

or <title lang="en">Foo</title>. In Perl these are stored as

1679

[ 'Foo' ] and [ 'Foo', 'en' ]. For the former [ 'Foo', undef ]

1680

would also be okay.

1681

1682

This handler also has two modifiers which may be added to the name

1683

after '/'. I</e> means that empty text is allowed, and will be

1684

returned as the empty tuple [], to mean that the element is present

1685

but has no text. When writing with I</e>, undef will also be

1686

understood as present-but-empty. You cannot however specify a

1687

language if the text is empty.

1688

1689

The modifier I</m> means that the text is allowed to span multiple

1690

lines.

1691

1692

So for example I<with-lang/em> is a handler for text with language,

1693

where the text may be empty and may contain newlines. Note that the

1694

I<with-lang-or-empty> of earlier releases has been replaced by

1695

I<with-lang/e>.

1696

1697

=cut

1698

sub read_with_lang( $$$ ) {

1699

my ($node, $allow_empty, $allow_nl) = @_;

1700

die if not defined $node;

1701

my %attrs = %{get_attrs($node)};

1702

my $lang = $attrs{lang} if exists $attrs{lang};

1703

my $value = get_text($node, $allow_nl);

1704

if (not length $value) {

1705

if (not $allow_empty) {

1706

warn 'empty string for with-lang value';

1707

return undef;

1708

}

1709

warn 'empty string may not have language' if defined $lang;

1710

return [];

1711

}

1712

if (defined $lang) {

1713

return [ $value, $lang ];

1714

}

1715

else {

1716

return [ $value ];

1717

}

1718

}

1719

$Handlers{'with-lang'}->[0] = sub( $ ) { read_with_lang($_[0], 0, 0) };

1720

$Handlers{'with-lang/'}->[0] = sub( $ ) { read_with_lang($_[0], 0, 0) };

1721

$Handlers{'with-lang/e'}->[0] = sub( $ ) { read_with_lang($_[0], 1, 0) };

1722

$Handlers{'with-lang/m'}->[0] = sub( $ ) { read_with_lang($_[0], 0, 1) };

1723

$Handlers{'with-lang/em'}->[0] = sub( $ ) { read_with_lang($_[0], 1, 1) };

1724

$Handlers{'with-lang/me'}->[0] = sub( $ ) { read_with_lang($_[0], 1, 1) };

1725

1726

sub write_with_lang( $$$$$ ) {

1727

my ($w, $e, $v, $allow_empty, $allow_nl) = @_;

1728

if (not ref $v or ref $v ne 'ARRAY') {

1729

warn "not writing with-lang whose content is not an array";

1730

return;

1731

}

1732

1733

if (not @$v) {

1734

if (not $allow_empty) {

1735

warn "not writing no content for $e";

1736

return;

1737

}

1738

$v = [ '' ];

1739

}

1740

1741

my ($text, $lang) = @$v;

1742

t 'writing character data: ' . d $text;

1743

if (not defined $text) {

1744

warn "not writing undefined value for $e";

1745

return;

1746

}

1747

1748

1749

# strip whitespace silently.

1750

# we used to use a warn, but later on the code catches this and drops the record

1751

1752

my $old_text = $text;

1753

$text =~ s/^\s+//;

1754

$text =~ s/\s+$//;

1755

1756

if (not length $text) {

1757

if (not $allow_empty) {

1758

warn "not writing empty content for $e";

1759

return;

1760

}

1761

if (defined $lang) {

1762

warn "not writing empty content with language for $e";

1763

return;

1764

}

1765

$w->emptyTag($e) if $w;

1766

return;

1767

}

1768

1769

if (not $allow_nl and $text =~ tr/\n//) {

1770

warn "not writing text containing newlines for $e";

1771

return;

1772

}

1773

1774

if (defined $lang) {

1775

$w->dataElement($e, $text, lang => $lang) if $w;

1776

}

1777

else {

1778

$w->dataElement($e, $text) if $w;

1779

}

1780

}

1781

$Handlers{'with-lang'}->[1] = sub( $$$ ) { write_with_lang($_[0], $_[1], $_[2], 0, 0) };

1782

$Handlers{'with-lang/'}->[1] = sub( $$$ ) { write_with_lang($_[0], $_[1], $_[2], 0, 0) };

1783

$Handlers{'with-lang/e'}->[1] = sub( $$$ ) { write_with_lang($_[0], $_[1], $_[2], 1, 0) };

1784

$Handlers{'with-lang/m'}->[1] = sub( $$$ ) { write_with_lang($_[0], $_[1], $_[2], 0, 1) };

1785

$Handlers{'with-lang/em'}->[1] = sub( $$$ ) { write_with_lang($_[0], $_[1], $_[2], 1, 1) };

1786

$Handlers{'with-lang/me'}->[1] = sub( $$$ ) { write_with_lang($_[0], $_[1], $_[2], 1, 1) };

1787

1788

# Sanity check.

1789

foreach (keys %Handlers) {

1790

my $v = $Handlers{$_};

1791

if (@$v != 2

1792

or ref($v->[0]) ne 'CODE'

1793

or ref($v->[1]) ne 'CODE') {

1794

die "bad handler pair for $_\n";

1795

}

1796

}

1797

1798

=pod

1799

1800

=back

1801

1802

Now, which handlers are used for which subelements (keys) of channels

1803

and programmes? And what is the multiplicity (should you expect a

1804

single value or a list of values)?

1805

1806

The following tables map subelements of <channel> and of <programme>

1807

to the handlers used to read and write them. Many elements have their

1808

own handler with the same name, and most of the others use

1809

I<with-lang>. The third column specifies the multiplicity of the

1810

element: B<*> (any number) will give a list of values in Perl, B<+>

1811

(one or more) will give a nonempty list, B<?> (maybe one) will give a

1812

scalar, and B<1> (exactly one) will give a scalar which is not undef.

1813

1814

=head2 Handlers for <channel>

1815

1816

1817

=over

1818

1819

=item display-name, I<with-lang>, B<+>

1820

1821

=item icon, I<icon>, B<*>

1822

1823

=item url, I<scalar>, B<*>

1824

1825

1826

=back

1827

1828

=head2 Handlers for <programme>

1829

1830

1831

=over

1832

1833

=item title, I<with-lang>, B<+>

1834

1835

=item sub-title, I<with-lang>, B<*>

1836

1837

=item desc, I<with-lang/m>, B<*>

1838

1839

=item credits, I<credits>, B<?>

1840

1841

=item date, I<scalar>, B<?>

1842

1843

=item category, I<with-lang>, B<*>

1844

1845

=item language, I<with-lang>, B<?>

1846

1847

=item orig-language, I<with-lang>, B<?>

1848

1849

=item length, I<length>, B<?>

1850

1851

=item icon, I<icon>, B<*>

1852

1853

=item url, I<scalar>, B<*>

1854

1855

=item country, I<with-lang>, B<*>

1856

1857

=item episode-num, I<episode-num>, B<*>

1858

1859

=item video, I<video>, B<?>

1860

1861

=item audio, I<audio>, B<?>

1862

1863

=item previously-shown, I<previously-shown>, B<?>

1864

1865

=item premiere, I<with-lang/em>, B<?>

1866

1867

=item last-chance, I<with-lang/em>, B<?>

1868

1869

=item new, I<presence>, B<?>

1870

1871

=item subtitles, I<subtitles>, B<*>

1872

1873

=item rating, I<rating>, B<*>

1874

1875

=item star-rating, I<star-rating>, B<*>

1876

1877

1878

=back

1879

1880

At present, no parsing or validation on dates is done because dates

1881

may be partially specified in XMLTV. For example '2001' means that

1882

the year is known but not the month, day or time of day. Maybe in the

1883

future dates will be automatically converted to and from

1884

B<Date::Manip> objects. For now they just use the I<scalar> handler.

1885

Similar remarks apply to URLs.

1886

1887

=cut

1888

# Private.

1889

sub node_to_programme( $ ) {

1890

my $node = shift; die if not defined $node;

1891

my %programme;

1892

1893

# Attributes of programme element.

1894

%programme = %{get_attrs($node)};

1895

t 'attributes: ' . d \%programme;

1896

1897

# Check the required attributes are there. As with most checking,

1898

# this isn't an alternative to using a validator but it does save

1899

# some headscratching during debugging.

1900

1901

foreach (qw(start channel)) {

1902

if (not defined $programme{$_}) {

1903

warn "programme missing '$_' attribute\n";

1904

return undef;

1905

}

1906

}

1907

my @known_attrs = map { $_->[0] } @Programme_Attributes;

1908

my %ka; ++$ka{$_} foreach @known_attrs;

1909

foreach (keys %programme) {

1910

unless ($ka{$_}) {

1911

warn "deleting unknown attribute '$_'";

1912

delete $programme{$_};

1913

}

1914

}

1915

1916

call_handlers_read($node, \@Programme_Handlers, \%programme);

1917

return \%programme;

1918

}

1919

1920

1921

# Private.

1922

sub node_to_channel( $ ) {

1923

my $node = shift; die if not defined $node;

1924

my %channel;

1925

t 'node_to_channel() ENTRY';

1926

1927

%channel = %{get_attrs($node)};

1928

t 'attributes: ' . d \%channel;

1929

if (not defined $channel{id}) {

1930

warn "channel missing 'id' attribute\n";

1931

}

1932

foreach (keys %channel) {

1933

unless (/^_/ or $_ eq 'id') {

1934

warn "deleting unknown attribute '$_'";

1935

delete $channel{$_};

1936

}

1937

}

1938

1939

t '\@Channel_Handlers=' . d \@Channel_Handlers;

1940

call_handlers_read($node, \@Channel_Handlers, \%channel);

1941

return \%channel;

1942

}

1943

1944

1945

1946

# Private.

1947

1948

# call_handlers_read()

1949

1950

# Read the subelements of a node according to a list giving a

1951

# handler subroutine for each subelement.

1952

1953

# Parameters:

1954

# node

1955

# Reference to list of handlers: tuples of

1956

# [element-name, handler-name, multiplicity]

1957

# Reference to hash for storing results

1958

1959

# Warns if errors, but attempts to contine.

1960

1961

sub call_handlers_read( $$$ ) {

1962

my ($node, $handlers, $r) = @_;

1963

t 'call_handlers_read() using handlers: ' . d $handlers;

1964

1965

die unless ref($r) eq 'HASH';

1966

our %r; local *r = $r;

1967

t 'going through each child of node';

1968

1969

# Current position in handlers. We expect to read the subelements

1970

# in the correct order as specified by the DTD.

1971

1972

my $handler_pos = 0;

1973

1974

SUBELEMENT: foreach (get_subelements($node)) {

1975

t 'doing subelement';

1976

my $name = get_name($_);

1977

t "tag name: $name";

1978

1979

# Search for a handler - from $handler_pos onwards. But

1980

# first, just warn if somebody is trying to use an element in

1981

# the wrong place (trying to go backwards in the list).

1982

1983

my $found_pos;

1984

foreach my $i (0 .. $handler_pos - 1) {

1985

if ($name eq $handlers->[$i]->[0]) {

1986

warn "element $name not expected here";

1987

next SUBELEMENT;

1988

}

1989

}

1990

for (my $i = $handler_pos; $i < @$handlers; $i++) {

1991

if ($handlers->[$i]->[0] eq $name) {

1992

t 'found handler';

1993

$found_pos = $i;

1994

last;

1995

}

1996

else {

1997

t "doesn't match name $handlers->[$i]->[0]";

1998

my ($handler_name, $h, $multiplicity)

1999

= @{$handlers->[$i]};

2000

die if not defined $handler_name;

2001

die if $handler_name eq '';

2002

2003

# Before we skip over this element, check that we got

2004

# the necessary values for it.

2005

2006

if ($multiplicity eq '?') {

2007

# Don't need to check whether this set.

2008

}

2009

elsif ($multiplicity eq '1') {

2010

if (not defined $r{$handler_name}) {

2011

warn "no element $handler_name found";

2012

}

2013

}

2014

elsif ($multiplicity eq '*') {

2015

# It's okay if nothing was ever set. We don't

2016

# insist on putting in an empty list.

2017

2018

}

2019

elsif ($multiplicity eq '+') {

2020

if (not defined $r{$handler_name}) {

2021

warn "no element $handler_name found";

2022

}

2023

elsif (not @{$r{$handler_name}}) {

2024

warn "strangely, empty list for $handler_name";

2025

}

2026

}

2027

else {

2028

warn "bad value of $multiplicity: $!";

2029

}

2030

}

2031

}

2032

if (not defined $found_pos) {

2033

warn "unknown element $name";

2034

next;

2035

}

2036

# Next time we begin searching from this position.

2037

$handler_pos = $found_pos;

2038

2039

# Call the handler.

2040

t 'calling handler';

2041

my ($handler_name, $h_name, $multiplicity)

2042

= @{$handlers->[$found_pos]};

2043

die if $handler_name ne $name;

2044

my $h = $Handlers{$h_name}; die "no handler $h_name" if not $h;

2045

my $result = $h->[0]->($_); # call reader sub

2046

t 'result: ' . d $result;

2047

warn("skipping bad $name\n"), next if not defined $result;

2048

2049

# Now set the value. We can't do multiplicity checking yet

2050

# because there might be more elements of this type still to

2051

# come.

2052

2053

if ($multiplicity eq '?' or $multiplicity eq '1') {

2054

warn "seen $name twice"

2055

if defined $r{$name};

2056

$r{$name} = $result;

2057

}

2058

elsif ($multiplicity eq '*' or $multiplicity eq '+') {

2059

push @{$r{$name}}, $result;

2060

}

2061

else {

2062

warn "bad multiplicity: $multiplicity";

2063

}

2064

}

2065

}

2066

2067

sub warn_unknown_keys( $$ ) {

2068

my $elem_name = shift;

2069

our %k; local *k = shift;

2070

foreach (keys %k) {

2071

/^_/

2072

or $warned_unknown_key{$elem_name}->{$_}++

2073

or warn "unknown key $_ in $elem_name hash\n";

2074

}

2075

}

2076

2077

package XMLTV::Writer;

2078

use base 'XML::Writer';

2079

use Carp;

2080

2081

use Date::Manip qw/UnixDate DateCalc/;

2082

2083

# Use Log::TraceMessages if installed.

2084

BEGIN {

2085

eval { require Log::TraceMessages };

2086

if ($@) {

2087

*t = sub {};

2088

*d = sub { '' };

2089

}

2090

else {

2091

*t = \&Log::TraceMessages::t;

2092

*d = \&Log::TraceMessages::d;

2093

}

2094

}

2095

2096

BEGIN {

2097

Date::Manip::Date_Init("TZ=UTC");

2098

}

2099

2100

# Override dataElement() to refuse writing empty or whitespace

2101

# elements.

2102

2103

sub dataElement( $$$@ ) {

2104

my ($self, $elem, $content, @rest) = @_;

2105

if ($content !~ /\S/) {

2106

warn "not writing empty content for $elem";

2107

return;

2108

}

2109

return $self->SUPER::dataElement($elem, $content, @rest);

2110

}

2111

2112

=pod

2113

2114

=head1 WRITING

2115

2116

When reading a file you have the choice of using C<parse()> to gulp

2117

the whole file and return a data structure, or using

2118

C<parse_callback()> to get the programmes one at a time, although

2119

channels and other data are still read all at once.

2120

2121

There is a similar choice when writing data: the C<write_data()>

2122

routine prints a whole XMLTV document at once, but if you want to

2123

write an XMLTV document incrementally you can manually create an

2124

C<XMLTV::Writer> object and call methods on it. Synopsis:

2125

2126

use XMLTV;

2127

my $w = new XMLTV::Writer();

2128

$w->comment("Hello from XML::Writer's comment() method");

2129

$w->start({ 'generator-info-name' => 'Example code in pod' });

2130

my %ch = (id => 'test-channel', 'display-name' => [ [ 'Test', 'en' ] ]);

2131

$w->write_channel(\%ch);

2132

my %prog = (channel => 'test-channel', start => '200203161500',

2133

title => [ [ 'News', 'en' ] ]);

2134

$w->write_programme(\%prog);

2135

$w->end();

2136

2137

XMLTV::Writer inherits from XML::Writer, and provides the following extra

2138

or overridden methods:

2139

2140

=over

2141

2142

=item new(), the constructor

2143

2144

Creates an XMLTV::Writer object and starts writing an XMLTV file, printing

2145

the DOCTYPE line. Arguments are passed on to XML::WriterE<39>s constructor,

2146

except for the following:

2147

2148

the 'encoding' key if present gives the XML character encoding.

2149

For example:

2150

2151

my $w = new XMLTV::Writer(encoding => 'ISO-8859-1');

2152

2153

If encoding is not specified, XML::WriterE<39>s default is used

2154

(currently UTF-8).

2155

2156

XMLTW::Writer can also filter out specific days from the data. This is

2157

useful if the datasource provides data for periods of time that does not

2158

match the days that the user has asked for. The filtering is controlled

2159

with the days, offset and cutoff arguments:

2160

2161

my $w = new XMLTV::Writer(

2162

offset => 1,

2163

days => 2,

2164

cutoff => "050000" );

2165

2166

In this example, XMLTV::Writer will discard all entries that do not have

2167

starttimes larger than or equal to 05:00 tomorrow and less than 05:00

2168

two days after tomorrow. The time offset is stripped off the starttime before

2169

the comparison is made.

2170

2171

=cut

2172

sub new {

2173

my $proto = shift;

2174

my $class = ref($proto) || $proto;

2175

my %args = @_;

2176

croak 'OUTPUT requires a filehandle, not a filename or anything else'

2177

if exists $args{OUTPUT} and not ref $args{OUTPUT};

2178

my $encoding = delete $args{encoding};

2179

my $days = delete $args{days};

2180

my $offset = delete $args{offset};

2181

my $cutoff = delete $args{cutoff};

2182

2183

my $self = $class->SUPER::new(DATA_MODE => 1, DATA_INDENT => 2, %args);

2184

bless($self, $class);

2185

2186

if (defined $encoding) {

2187

$self->xmlDecl($encoding);

2188

}

2189

else {

2190

# XML::Writer puts in 'encoding="UTF-8"' even if you don't ask

2191

# for it.

2192

2193

warn "assuming default UTF-8 encoding for output\n";

2194

$self->xmlDecl();

2195

}

2196

2197

# $Log::TraceMessages::On = 1;

2198

$self->{mintime} = "19700101000000";

2199

$self->{maxtime} = "29991231235959";

2200

2201

2202

if (defined( $days ) and defined( $offset ) and defined( $cutoff )) {

2203

$self->{mintime} = UnixDate(

2204

DateCalc( "today", "+" . $offset . " days" ),

2205

"%Y%m%d") . $cutoff;

2206

t "using mintime $self->{mintime}";

2207

2208

$self->{maxtime} = UnixDate(

2209

DateCalc("today", "+" . $offset+$days . " days"),

2210

"%Y%m%d" ) . $cutoff;

2211

t "using maxtime $self->{maxtime}";

2212

}

2213

elsif (defined( $days ) or defined( $offset ) or defined($cutoff)) {

2214

croak 'You must specify days, offset and cutoff or none of them';

2215

}

2216

2217

{

2218

local $^W = 0; $self->doctype('tv', undef, 'xmltv.dtd');

2219

}

2220

$self->{xmltv_writer_state} = 'new';

2221

return $self;

2222

}

2223

2224

=pod

2225

2226

=item start()

2227

2228

Write the start of the <tv> element. Parameter is a hashref which gives

2229

the attributes of this element.

2230

2231

=cut

2232

sub start {

2233

my $self = shift;

2234

die 'usage: XMLTV::Writer->start(hashref of attrs)' if @_ != 1;

2235

my $attrs = shift;

2236

2237

for ($self->{xmltv_writer_state}) {

2238

if ($_ eq 'new') {

2239

# Okay.

2240

}

2241

elsif ($_ eq 'channels' or $_ eq 'programmes') {

2242

croak 'cannot call start() more than once on XMLTV::Writer';

2243

}

2244

elsif ($_ eq 'end') {

2245

croak 'cannot do anything with end()ed XMLTV::Writer';

2246

}

2247

else { die }

2248

2249

$_ = 'channels';

2250

}

2251

$self->startTag('tv', order_attrs(%{$attrs}));

2252

}

2253

2254

=pod

2255

2256

=item write_channels()

2257

2258

Write several channels at once. Parameter is a reference to a hash

2259

mapping channel id to channel details. They will be written sorted

2260

by id, which is reasonable since the order of channels in an XMLTV

2261

file isnE<39>t significant.

2262

2263

=cut

2264

sub write_channels {

2265

my ($self, $channels) = @_;

2266

t('write_channels(' . d($self) . ', ' . d($channels) . ') ENTRY');

2267

croak 'expected hashref of channels' if ref $channels ne 'HASH';

2268

2269

for ($self->{xmltv_writer_state}) {

2270

if ($_ eq 'new') {

2271

croak 'must call start() on XMLTV::Writer first';

2272

}

2273

elsif ($_ eq 'channels') {

2274

# Okay.

2275

}

2276

elsif ($_ eq 'programmes') {

2277

croak 'cannot write channels after writing programmes';

2278

}

2279

elsif ($_ eq 'end') {

2280

croak 'cannot do anything with end()ed XMLTV::Writer';

2281

}

2282

else { die }

2283

}

2284

2285

my @ids = sort keys %$channels;

2286

t 'sorted list of channel ids: ' . d \@ids;

2287

foreach (@ids) {

2288

t "writing channel with id $_";

2289

my $ch = $channels->{$_};

2290

$self->write_channel($ch);

2291

}

2292

t('write_channels() EXIT');

2293

}

2294

2295

=pod

2296

2297

=item write_channel()

2298

2299

Write a single channel. You can call this routine if you want, but

2300

most of the time C<write_channels()> is a better interface.

2301

2302

=cut

2303

sub write_channel {

2304

my ($self, $ch) = @_;

2305

croak 'undef channel hash passed' if not defined $ch;

2306

croak "expected a hashref, got: $ch" if ref $ch ne 'HASH';

2307

2308

for ($self->{xmltv_writer_state}) {

2309

if ($_ eq 'new') {

2310

croak 'must call start() on XMLTV::Writer first';

2311

}

2312

elsif ($_ eq 'channels') {

2313

# Okay.

2314

}

2315

elsif ($_ eq 'programmes') {

2316

croak 'cannot write channels after writing programmes';

2317

}

2318

elsif ($_ eq 'end') {

2319

croak 'cannot do anything with end()ed XMLTV::Writer';

2320

}

2321

else { die }

2322

}

2323

2324

my %ch = %$ch; # make a copy

2325

my $id = delete $ch{id};

2326

die "no 'id' attribute in channel" if not defined $id;

2327

write_element_with_handlers($self, 'channel', { id => $id },

2328

\@XMLTV::Channel_Handlers, \%ch);

2329

}

2330

2331

=pod

2332

2333

=item write_programme()

2334

2335

Write details for a single programme as XML.

2336

2337

=cut

2338

sub write_programme {

2339

my $self = shift;

2340

die 'usage: XMLTV::Writer->write_programme(programme hash)' if @_ != 1;

2341

my $ref = shift;

2342

croak 'write_programme() expects programme hashref'

2343

if ref $ref ne 'HASH';

2344

t('write_programme(' . d($self) . ', ' . d($ref) . ') ENTRY');

2345

2346

for ($self->{xmltv_writer_state}) {

2347

if ($_ eq 'new') {

2348

croak 'must call start() on XMLTV::Writer first';

2349

}

2350

elsif ($_ eq 'channels') {

2351

$_ = 'programmes';

2352

}

2353

elsif ($_ eq 'programmes') {

2354

# Okay.

2355

}

2356

elsif ($_ eq 'end') {

2357

croak 'cannot do anything with end()ed XMLTV::Writer';

2358

}

2359

else { die }

2360

}

2361

2362

# We make a copy of the programme hash and delete elements from it

2363

# as they are dealt with; then we can easily spot any unhandled

2364

# elements at the end.

2365

2366

my %p = %$ref;

2367

2368

# First deal with those hash keys that refer to metadata on when

2369

# the programme is broadcast. After taking those out of the hash,

2370

# we can use the handlers to output individual details.

2371

2372

my %attrs;

2373

die if not @XMLTV::Programme_Attributes;

2374

foreach (@XMLTV::Programme_Attributes) {

2375

my ($name, $mult) = @$_;

2376

t "looking for key $name";

2377

my $val = delete $p{$name};

2378

if ($mult eq '?') {

2379

# No need to check anything.

2380

}

2381

elsif ($mult eq '1') {

2382

if (not defined $val) {

2383

warn "programme hash missing $name key, skipping";

2384

return;

2385

}

2386

}

2387

else { die "bad multiplicity for attribute: $mult" }

2388

$attrs{$name} = $val if defined $val;

2389

}

2390

2391

# We use string comparisons without timeoffsets for comparing times.

2392

my( $start ) = split( /\s+/, $attrs{start} );

2393

if( $start lt $self->{mintime} or

2394

$start ge $self->{maxtime} ) {

2395

t "skipping programme with start $attrs{start}";

2396

return;

2397

}

2398

2399

t "beginning 'programme' element";

2400

write_element_with_handlers($self, 'programme', \%attrs,

2401

\@XMLTV::Programme_Handlers, \%p);

2402

}

2403

2404

=pod

2405

2406

=item end()

2407

2408

Say youE<39>ve finished writing programmes. This ends the <tv> element

2409

and the file.

2410

2411

=cut

2412

sub end {

2413

my $self = shift;

2414

2415

for ($self->{xmltv_writer_state}) {

2416

if ($_ eq 'new') {

2417

croak 'must call start() on XMLTV::Writer first';

2418

}

2419

elsif ($_ eq 'channels' or $_ eq 'programmes') {

2420

$_ = 'end';

2421

}

2422

elsif ($_ eq 'end') {

2423

croak 'cannot do anything with end()ed XMLTV::Writer';

2424

}

2425

else { die }

2426

}

2427

2428

$self->endTag('tv');

2429

$self->SUPER::end(@_);

2430

}

2431

2432

2433

# Private.

2434

# order_attrs()

2435

2436

# In XML the order of attributes is not significant. But to make

2437

# things look nice we try to output them in the same order as given in

2438

# the DTD.

2439

2440

# Takes a list of (key, value, key, value, ...) and returns one with

2441

# keys in a nice-looking order.

2442

2443

sub order_attrs {

2444

die "expected even number of elements, from a hash"

2445

if @_ % 2;

2446

my @a = ((map { $_->[0] } (@XMLTV::Channel_Attributes,

2447

@XMLTV::Programme_Attributes)),

2448

qw(date source-info-url source-info-name source-data-url

2449

generator-info-name generator-info-url));

2450

2451

my @r;

2452

my %in = @_;

2453

foreach (@a) {

2454

if (exists $in{$_}) {

2455

my $v = delete $in{$_};

2456

push @r, $_, $v;

2457

}

2458

}

2459

2460

foreach (sort keys %in) {

2461

warn "unknown attribute $_" unless /^_/;

2462

push @r, $_, $in{$_};

2463

}

2464

2465

return @r;

2466

}

2467

2468

2469

# Private.

2470

2471

# Writes the elements of a hash to an XMLTV::Writer using a list of

2472

# handlers. Deletes keys (modifying the hash passed in) as they are

2473

# written.

2474

2475

# Requires all mandatory keys be present in the hash - if you're not

2476

# sure then use check_multiplicity() first.

2477

2478

# Returns true if the element was successfully written, or if any

2479

# errors found don't look serious enough to cause bad XML. If the

2480

# XML::Writer object passed in is undef then nothing is written (since

2481

# the write handlers are coded like that.)

2482

2483

sub call_handlers_write( $$$ ) {

2484

my ($self, $handlers, $input) = @_;

2485

t 'writing input hash: ' . d $input;

2486

die if not defined $input;

2487

2488

my $bad = 0;

2489

foreach (@$handlers) {

2490

my ($name, $h_name, $multiplicity) = @$_;

2491

my $h = $XMLTV::Handlers{$h_name}; die "no handler $h_name" if not $h;

2492

my $writer = $h->[1]; die if not defined $writer;

2493

t "doing handler for $name$multiplicity";

2494

local $SIG{__WARN__} = sub {

2495

warn "$name element: $_[0]";

2496

$bad = 1;

2497

};

2498

my $val = delete $input->{$name};

2499

t 'got value(s): ' . d $val;

2500

if ($multiplicity eq '1') {

2501

$writer->($self, $name, $val);

2502

}

2503

elsif ($multiplicity eq '?') {

2504

$writer->($self, $name, $val) if defined $val;

2505

}

2506

elsif ($multiplicity eq '*' or $multiplicity eq '+') {

2507

croak "value for key $name should be an array ref"

2508

if defined $val and ref $val ne 'ARRAY';

2509

foreach (@{$val}) {

2510

t 'writing value: ' . d $_;

2511

$writer->($self, $name, $_);

2512

t 'finished writing multiple values';

2513

}

2514

}

2515

else {

2516

warn "bad multiplicity specifier: $multiplicity";

2517

}

2518

}

2519

t 'leftover keys: ' . d([ sort keys %$input ]);

2520

return not $bad;

2521

}

2522

2523

2524

# Private.

2525

2526

# Warns about missing keys that are supposed to be mandatory. Returns

2527

# true iff everything is okay.

2528

2529

sub check_multiplicity( $$ ) {

2530

my ($handlers, $input) = @_;

2531

foreach (@$handlers) {

2532

my ($name, $h_name, $multiplicity) = @$_;

2533

t "checking handler for $name: $h_name with multiplicity $multiplicity";

2534

if ($multiplicity eq '1') {

2535

if (not defined $input->{$name}) {

2536

warn "hash missing value for $name";

2537

return 0;

2538

}

2539

}

2540

elsif ($multiplicity eq '?') {

2541

# Okay if not present.

2542

}

2543

elsif ($multiplicity eq '*') {

2544

# Not present, or undef, is treated as empty list.

2545

}

2546

elsif ($multiplicity eq '+') {

2547

t 'one or more, checking for a listref with no undef values';

2548

my $val = $input->{$name};

2549

if (not defined $val) {

2550

warn "hash missing value for $name (expected list)";

2551

return 0;

2552

}

2553

if (ref($val) ne 'ARRAY') {

2554

die "hash has bad contents for $name (expected list)";

2555

return 0;

2556

}

2557

2558

t 'all values: ' . d $val;

2559

my @new_val = grep { defined } @$val;

2560

t 'values that are defined: ' . d \@new_val;

2561

if (@new_val != @$val) {

2562

warn "hash had some undef elements in list for $name, removed";

2563

@$val = @new_val;

2564

}

2565

2566

if (not @$val) {

2567

warn "hash has empty list of $name properties (expected at least one)";

2568

return 0;

2569

}

2570

}

2571

else {

2572

warn "bad multiplicity specifier: $multiplicity";

2573

}

2574

}

2575

return 1;

2576

}

2577

2578

2579

# Private.

2580

2581

# Write a complete element with attributes, and subelements written

2582

# using call_handlers_write(). The advantage over doing it by hand is

2583

# that if some required keys are missing, nothing is written (rather

2584

# than an incomplete and invalid element).

2585

2586

sub write_element_with_handlers( $$$$$ ) {

2587

my ($w, $name, $attrs, $handlers, $hash) = @_;

2588

if (not check_multiplicity($handlers, $hash)) {

2589

warn "keys missing in $name hash, not writing";

2590

return;

2591

}

2592

2593

# Special 'debug' keys written as comments inside the element.

2594

my %debug_keys;

2595

foreach (grep /^debug/, keys %$hash) {

2596

$debug_keys{$_} = delete $hash->{$_};

2597

}

2598

2599

# Call all the handlers with no writer object and make sure

2600

# they're happy.

2601

2602

if (not call_handlers_write(undef, $handlers, { %$hash })) {

2603

warn "bad data inside $name element, not writing\n";

2604

return;

2605

}

2606

2607

$w->startTag($name, order_attrs(%$attrs));

2608

foreach (sort keys %debug_keys) {

2609

my $val = $debug_keys{$_};

2610

$w->comment((defined $val) ? "$_: $val" : $_);

2611

}

2612

call_handlers_write($w, $handlers, $hash);

2613

XMLTV::warn_unknown_keys($name, $hash);

2614

$w->endTag($name);

2615

}

2616

2617

=pod

2618

2619

=back

2620

2621

=head1 AUTHOR

2622

2623

Ed Avis, ed@membled.com

2624

2625

=head1 SEE ALSO

2626

2627

The file format is defined by the DTD xmltv.dtd, which is included in

2628

the xmltv package along with this module. It should be installed in

2629

your systemE<39>s standard place for SGML and XML DTDs.

2630

2631

The xmltv package has a web page at

2632

<http://membled.com/work/apps/xmltv/> which carries

2633

information about the file format and the various tools and apps which

2634

are distributed with this module.

2635

2636

=cut

2637

# -*- perl -*-

# $Id: XMLTV.pm.in,v 1.137 2008/07/14 04:05:56 rmeden Exp $

package XMLTV;

use strict;

use base 'Exporter';

our @EXPORT = ();

our @EXPORT_OK = qw(read_data parse parsefile parsefiles write_data

best_name list_channel_keys list_programme_keys);

# For the time being the version of this library is tied to that of

# the xmltv package as a whole. This number should be checked by the

# mkdist tool.

our $VERSION = '0.5.52';