~ubuntu-branches/debian/sid/calibre/sid

« back to all changes in this revision

Viewing changes to src/calibre/ebooks/metadata/xmp.py

Committer: Package Import Robot
Author(s): Martin Pitt
Date: 2014-02-27 07:48:06 UTC
mto: This revision was merged to the branch mainline in revision 74.
Revision ID: package-import@ubuntu.com-20140227074806-64wdebb3ptosxhhx

Tags: upstream-1.25.0+dfsg

Import upstream version 1.25.0+dfsg

files added:
manual/templates/epub_cover_template.html

recipes/disinformatico.recipe

recipes/gosc_full.recipe

recipes/icons/gosc_full.png

recipes/les_echos.recipe

resources/images/beautify.png

src/calibre/ebooks/metadata/xmp.py

src/calibre/gui2/tweak_book/diff/highlight.py

src/calibre/gui2/tweak_book/editor/smart

src/calibre/gui2/tweak_book/editor/smart/__init__.py

src/calibre/gui2/tweak_book/editor/smart/html.py

src/calibre/gui2/viewer/gestures.py

files removed:
recipes/icons/kdefamily_pl.png

recipes/kdefamily_pl.recipe

src/calibre/gui2/store/stores/foyles_uk_plugin.py

files modified:
Changelog.yaml

manual/conf.py

manual/conversion.rst

manual/custom.py

manual/diff.rst

manual/edit.rst

manual/epub.py

manual/faq.rst

manual/templates/layout.html

recipes/abc_es.recipe

recipes/ap.recipe

recipes/cosmopolitan_uk.recipe

recipes/cumhuriyet.recipe

recipes/gosc_niedzielny.recipe

recipes/kathemerini.recipe

recipes/mac_world_uk.recipe

recipes/nrc-nl-epub.recipe

recipes/nytimesbook.recipe

recipes/wired_it.recipe

resources/builtin_recipes.xml

resources/builtin_recipes.zip

resources/compiled_coffeescript.zip

resources/ebook-convert-complete.pickle

resources/images.qrc

resources/localization/locales.zip

resources/localization/stats.pickle

resources/quick_start.epub

setup/installer/__init__.py

setup/iso_639/pl.po

setup/iso_639/ru.po

setup/plugins_mirror.py

setup/resources.py

src/calibre/__init__.py

src/calibre/constants.py

src/calibre/customize/builtins.py

src/calibre/db/cache.py

src/calibre/debug.py

src/calibre/devices/kobo/driver.py

src/calibre/ebooks/chardet.py

src/calibre/ebooks/conversion/plugins/epub_output.py

src/calibre/ebooks/conversion/plugins/pdf_output.py

src/calibre/ebooks/html/input.py

src/calibre/ebooks/metadata/__init__.py

src/calibre/ebooks/metadata/meta.py

src/calibre/ebooks/metadata/opf2.py

src/calibre/ebooks/metadata/pdf.py

src/calibre/ebooks/metadata/sources/edelweiss.py

src/calibre/ebooks/mobi/reader/mobi8.py

src/calibre/ebooks/oeb/base.py

src/calibre/ebooks/oeb/display/paged.coffee

src/calibre/ebooks/oeb/parse_utils.py

src/calibre/ebooks/oeb/polish/check/links.py

src/calibre/ebooks/oeb/polish/check/main.py

src/calibre/ebooks/oeb/polish/check/parsing.py

src/calibre/ebooks/oeb/polish/choose.coffee

src/calibre/ebooks/oeb/polish/container.py

src/calibre/ebooks/oeb/polish/css.py

src/calibre/ebooks/oeb/polish/errors.py

src/calibre/ebooks/oeb/polish/font_stats.coffee

src/calibre/ebooks/oeb/polish/parsing.py

src/calibre/ebooks/oeb/polish/preview.coffee

src/calibre/ebooks/oeb/polish/split.py

src/calibre/ebooks/oeb/polish/tests/container.py

src/calibre/ebooks/oeb/polish/tests/parsing.py

src/calibre/ebooks/oeb/polish/toc.py

src/calibre/ebooks/oeb/polish/utils.py

src/calibre/ebooks/oeb/transforms/rasterize.py

src/calibre/ebooks/oeb/transforms/split.py

src/calibre/ebooks/pdf/render/from_html.py

src/calibre/ebooks/pdf/render/graphics.py

src/calibre/ebooks/pdf/render/serialize.py

src/calibre/gui2/__init__.py

src/calibre/gui2/actions/add.py

src/calibre/gui2/actions/copy_to_library.py

src/calibre/gui2/actions/next_match.py

src/calibre/gui2/actions/preferences.py

src/calibre/gui2/actions/restart.py

src/calibre/gui2/actions/similar_books.py

src/calibre/gui2/auto_add.py

src/calibre/gui2/comments_editor.py

src/calibre/gui2/complete2.py

src/calibre/gui2/dialogs/duplicates.py

src/calibre/gui2/dialogs/message_box.py

src/calibre/gui2/dialogs/metadata_bulk.py

src/calibre/gui2/email.py

src/calibre/gui2/init.py

src/calibre/gui2/jobs.py

src/calibre/gui2/layout.py

src/calibre/gui2/preferences/email.ui

src/calibre/gui2/preferences/email_ui.py

src/calibre/gui2/preferences/emailp.py

src/calibre/gui2/preferences/tweaks.py

src/calibre/gui2/preferences/tweaks.ui

src/calibre/gui2/preferences/tweaks_ui.py

src/calibre/gui2/progress_indicator/QProgressIndicator.cpp

src/calibre/gui2/progress_indicator/QProgressIndicator.h

src/calibre/gui2/progress_indicator/QProgressIndicator.sip

src/calibre/gui2/store/stores/amazon_de_plugin.py

src/calibre/gui2/store/stores/amazon_es_plugin.py

src/calibre/gui2/store/stores/amazon_fr_plugin.py

src/calibre/gui2/store/stores/amazon_it_plugin.py

src/calibre/gui2/store/stores/amazon_uk_plugin.py

src/calibre/gui2/store/stores/cdp_plugin.py

src/calibre/gui2/store/stores/mills_boon_uk_plugin.py

src/calibre/gui2/store/stores/publio_plugin.py

src/calibre/gui2/store/stores/woblink_plugin.py

src/calibre/gui2/store/stores/wolnelektury_plugin.py

src/calibre/gui2/toc/location.py

src/calibre/gui2/toc/main.py

src/calibre/gui2/tweak_book/boss.py

src/calibre/gui2/tweak_book/diff/main.py

src/calibre/gui2/tweak_book/diff/view.py

src/calibre/gui2/tweak_book/editor/syntax/base.py

src/calibre/gui2/tweak_book/editor/syntax/html.py

src/calibre/gui2/tweak_book/editor/text.py

src/calibre/gui2/tweak_book/editor/themes.py

src/calibre/gui2/tweak_book/editor/widget.py

src/calibre/gui2/tweak_book/file_list.py

src/calibre/gui2/tweak_book/main.py

src/calibre/gui2/tweak_book/preview.py

src/calibre/gui2/tweak_book/save.py

src/calibre/gui2/tweak_book/ui.py

src/calibre/gui2/tweak_book/undo.py

src/calibre/gui2/viewer/documentview.py

src/calibre/gui2/viewer/main_ui.py

src/calibre/gui2/viewer/toc.py

src/calibre/library/catalogs/epub_mobi.py

src/calibre/library/catalogs/epub_mobi_builder.py

src/calibre/library/cli.py

src/calibre/linux.py

src/calibre/translations/af.po

src/calibre/translations/ar.po

src/calibre/translations/ast.po

src/calibre/translations/az.po

src/calibre/translations/ber.po

src/calibre/translations/bg.po

src/calibre/translations/bn.po

src/calibre/translations/br.po

src/calibre/translations/bs.po

src/calibre/translations/ca.po

src/calibre/translations/calibre.pot

src/calibre/translations/cs.po

src/calibre/translations/cy.po

src/calibre/translations/da.po

src/calibre/translations/de.po

src/calibre/translations/el.po

src/calibre/translations/en_AU.po

src/calibre/translations/en_CA.po

src/calibre/translations/en_GB.po

src/calibre/translations/eo.po

src/calibre/translations/es.po

src/calibre/translations/et.po

src/calibre/translations/eu.po

src/calibre/translations/fa.po

src/calibre/translations/fi.po

src/calibre/translations/fil.po

src/calibre/translations/fo.po

src/calibre/translations/fr.po

src/calibre/translations/fr_CA.po

src/calibre/translations/fur.po

src/calibre/translations/gl.po

src/calibre/translations/gu.po

src/calibre/translations/he.po

src/calibre/translations/hi.po

src/calibre/translations/him.po

src/calibre/translations/hr.po

src/calibre/translations/hu.po

src/calibre/translations/id.po

src/calibre/translations/is.po

src/calibre/translations/it.po

src/calibre/translations/ja.po

src/calibre/translations/jv.po

src/calibre/translations/ka.po

src/calibre/translations/kn.po

src/calibre/translations/ko.po

src/calibre/translations/ku.po

src/calibre/translations/lt.po

src/calibre/translations/ltg.po

src/calibre/translations/lv.po

src/calibre/translations/mk.po

src/calibre/translations/ml.po

src/calibre/translations/mn.po

src/calibre/translations/mr.po

src/calibre/translations/ms.po

src/calibre/translations/my.po

src/calibre/translations/nb.po

src/calibre/translations/nds.po

src/calibre/translations/nl.po

src/calibre/translations/nn.po

src/calibre/translations/oc.po

src/calibre/translations/pa.po

src/calibre/translations/pl.po

src/calibre/translations/pt.po

src/calibre/translations/pt_BR.po

src/calibre/translations/ro.po

src/calibre/translations/ru.po

src/calibre/translations/sc.po

src/calibre/translations/si.po

src/calibre/translations/sk.po

src/calibre/translations/sl.po

src/calibre/translations/sq.po

src/calibre/translations/sr.po

src/calibre/translations/sr@latin.po

src/calibre/translations/sv.po

src/calibre/translations/ta.po

src/calibre/translations/te.po

src/calibre/translations/th.po

src/calibre/translations/tr.po

src/calibre/translations/ug.po

src/calibre/translations/uk.po

src/calibre/translations/ur.po

src/calibre/translations/vi.po

src/calibre/translations/wa.po

src/calibre/translations/yi.po

src/calibre/translations/zh_CN.po

src/calibre/translations/zh_HK.po

src/calibre/translations/zh_TW.po

src/calibre/utils/config.py

src/calibre/utils/ipc/simple_worker.py

src/calibre/utils/ipython.py

src/calibre/utils/localization.py

src/calibre/utils/logging.py

src/calibre/utils/podofo/__init__.py

src/calibre/utils/podofo/doc.cpp

src/calibre/utils/terminal.py

src/calibre/web/jsbrowser/browser.py

Show diffs side-by-side

added added

removed removed

src/calibre/ebooks/metadata/xmp.py

#!/usr/bin/env python

# vim:fileencoding=utf-8

from __future__ import (unicode_literals, division, absolute_import,

print_function)

__license__ = 'GPL v3'

__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'

import re, sys, copy, json

from itertools import repeat

from collections import defaultdict

from lxml import etree

from lxml.builder import ElementMaker

from calibre import prints

from calibre.ebooks.metadata import check_isbn, check_doi

from calibre.ebooks.metadata.book.base import Metadata

from calibre.ebooks.metadata.opf2 import dump_dict

from calibre.utils.date import parse_date, isoformat, now

from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1

_xml_declaration = re.compile(r'<\?xml[^<>]+encoding\s*=\s*[\'"](.*?)[\'"][^<>]*>', re.IGNORECASE)

NS_MAP = {

'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',

'dc': 'http://purl.org/dc/elements/1.1/',

'pdf': 'http://ns.adobe.com/pdf/1.3/',

'pdfx': 'http://ns.adobe.com/pdfx/1.3/',

'xmp': 'http://ns.adobe.com/xap/1.0/',

'xmpidq': 'http://ns.adobe.com/xmp/Identifier/qual/1.0/',

'xmpMM': 'http://ns.adobe.com/xap/1.0/mm/',

'xmpRights': 'http://ns.adobe.com/xap/1.0/rights/',

'xmpBJ': 'http://ns.adobe.com/xap/1.0/bj/',

'xmpTPg': 'http://ns.adobe.com/xap/1.0/t/pg/',

'xmpDM': 'http://ns.adobe.com/xmp/1.0/DynamicMedia/',

'prism': 'http://prismstandard.org/namespaces/basic/2.0/',

'crossmark': 'http://crossref.org/crossmark/1.0/',

'xml': 'http://www.w3.org/XML/1998/namespace',

'x': 'adobe:ns:meta/',

'calibre': 'http://calibre-ebook.com/xmp-namespace',

'calibreSI': 'http://calibre-ebook.com/xmp-namespace-series-index',

'calibreCC': 'http://calibre-ebook.com/xmp-namespace-custom-columns',

}

KNOWN_ID_SCHEMES = {'isbn', 'url', 'doi'}

def expand(name):

prefix, name = name.partition(':')[::2]

return '{%s}%s' % (NS_MAP[prefix], name)

xpath_cache = {}

def XPath(expr):

ans = xpath_cache.get(expr, None)

if ans is None:

xpath_cache[expr] = ans = etree.XPath(expr, namespaces=NS_MAP)

return ans

def parse_xmp_packet(raw_bytes):

raw_bytes = raw_bytes.strip()

enc = None

pat = r'''<?xpacket\s+[^>]*?begin\s*=\s*['"]([^'"]*)['"]'''

encodings = ('8', '16-le', '16-be', '32-le', '32-be')

header = raw_bytes[:1024]

emap = {'\ufeff'.encode('utf-'+x):'utf-'+x for x in encodings}

emap[b''] = 'utf-8'

for q in encodings:

m = re.search(pat.encode('utf-'+q), header)

if m is not None:

enc = emap.get(m.group(1), enc)

break

if enc is None:

return etree.fromstring(raw_bytes)

raw = _xml_declaration.sub('', raw_bytes.decode(enc)) # lxml barfs if encoding declaration present in unicode string

return etree.fromstring(raw)

def serialize_xmp_packet(root, encoding='utf-8'):

root.tail = '\n' + '\n'.join(repeat(' '*100, 30)) # Adobe spec recommends inserting padding at the end of the packet

raw_bytes = etree.tostring(root, encoding=encoding, pretty_print=True, with_tail=True, method='xml')

return b'<?xpacket begin="%s" id="W5M0MpCehiHzreSzNTczkc9d"?>\n%s\n<?xpacket end="w"?>' % ('\ufeff'.encode(encoding), raw_bytes)

def read_simple_property(elem):

# A simple property

if elem.text:

return elem.text

return elem.get(expand('rdf:resource'), '')

def read_lang_alt(parent):

# A text value with possible alternate values in different languages

items = XPath('descendant::rdf:li[@xml:lang="x-default"]')(parent)

if items:

return items[0]

items = XPath('descendant::rdf:li')(parent)

if items:

return items[0]

def read_sequence(parent):

# A sequence or set of values (assumes simple properties in the sequence)

for item in XPath('descendant::rdf:li')(parent):

100

yield read_simple_property(item)

101

102

def uniq(vals, kmap=lambda x:x):

103

''' Remove all duplicates from vals, while preserving order. kmap must be a

104

callable that returns a hashable value for every item in vals '''

105

vals = vals or ()

106

lvals = (kmap(x) for x in vals)

107

seen = set()

108

seen_add = seen.add

109

return tuple(x for x, k in zip(vals, lvals) if k not in seen and not seen_add(k))

110

111

def multiple_sequences(expr, root):

112

# Get all values for sequence elements matching expr, ensuring the returned

113

# list contains distinct non-null elements preserving their order.

114

ans = []

115

for item in XPath(expr)(root):

116

ans += list(read_sequence(item))

117

return filter(None, uniq(ans))

118

119

def first_alt(expr, root):

120

# The first element matching expr, assumes that the element contains a

121

# language alternate array

122

for item in XPath(expr)(root):

123

q = read_simple_property(read_lang_alt(item))

124

if q:

125

return q

126

127

def first_simple(expr, root):

128

# The value for the first occurrence of an element matching expr (assumes

129

# simple property)

130

for item in XPath(expr)(root):

131

q = read_simple_property(item)

132

if q:

133

return q

134

135

def first_sequence(expr, root):

136

# The first item in a sequence

137

for item in XPath(expr)(root):

138

for ans in read_sequence(item):

139

return ans

140

141

def read_series(root):

142

for item in XPath('//calibre:series')(root):

143

val = XPath('descendant::rdf:value')(item)

144

if val:

145

series = val[0].text

146

if series and series.strip():

147

series_index = 1.0

148

for si in XPath('descendant::calibreSI:series_index')(item):

149

try:

150

series_index = float(si.text)

151

except (TypeError, ValueError):

152

continue

153

else:

154

break

155

return series, series_index

156

return None, None

157

158

def read_user_metadata(mi, root):

159

from calibre.utils.config import from_json

160

from calibre.ebooks.metadata.book.json_codec import decode_is_multiple

161

fields = set()

162

for item in XPath('//calibre:custom_metadata')(root):

163

for li in XPath('./rdf:Bag/rdf:li')(item):

164

name = XPath('descendant::calibreCC:name')(li)

165

if name:

166

name = name[0].text

167

if name.startswith('#') and name not in fields:

168

val = XPath('descendant::rdf:value')(li)

169

if val:

170

fm = val[0].text

171

try:

172

fm = json.loads(fm, object_hook=from_json)

173

decode_is_multiple(fm)

174

mi.set_user_metadata(name, fm)

175

fields.add(name)

176

except:

177

prints('Failed to read user metadata:', name)

178

import traceback

179

traceback.print_exc()

180

181

def read_xmp_identifers(parent):

182

''' For example:

183

<rdf:li rdf:parseType="Resource"><xmpidq:Scheme>URL</xmp:idq><rdf:value>http://foo.com</rdf:value></rdf:li>

184

or the longer form:

185

<rdf:li><rdf:Description><xmpidq:Scheme>URL</xmp:idq><rdf:value>http://foo.com</rdf:value></rdf:Description></rdf:li>

186

'''

187

for li in XPath('./rdf:Bag/rdf:li')(parent):

188

is_resource = li.attrib.get(expand('rdf:parseType'), None) == 'Resource'

189

is_resource = is_resource or (len(li) == 1 and li[0].tag == expand('rdf:Description'))

190

if not is_resource:

191

yield None, li.text or ''

192

value = XPath('descendant::rdf:value')(li)

193

if not value:

194

continue

195

value = value[0].text or ''

196

scheme = XPath('descendant::xmpidq:Scheme')(li)

197

if not scheme:

198

yield None, value

199

else:

200

yield scheme[0].text or '', value

201

202

def metadata_from_xmp_packet(raw_bytes):

203

root = parse_xmp_packet(raw_bytes)

204

mi = Metadata(_('Unknown'))

205

title = first_alt('//dc:title', root)

206

if title:

207

mi.title = title

208

authors = multiple_sequences('//dc:creator', root)

209

if authors:

210

mi.authors = authors

211

tags = multiple_sequences('//dc:subject', root) or multiple_sequences('//pdf:Keywords', root)

212

if tags:

213

mi.tags = tags

214

comments = first_alt('//dc:description', root)

215

if comments:

216

mi.comments = comments

217

publishers = multiple_sequences('//dc:publisher', root)

218

if publishers:

219

mi.publisher = publishers[0]

220

try:

221

pubdate = parse_date(first_sequence('//dc:date', root) or first_simple('//xmp:CreateDate', root), assume_utc=False)

222

except:

223

pass

224

else:

225

mi.pubdate = pubdate

226

bkp = first_simple('//xmp:CreatorTool', root)

227

if bkp:

228

mi.book_producer = bkp

229

md = first_simple('//xmp:MetadataDate', root)

230

if md:

231

try:

232

mi.metadata_date = parse_date(md)

233

except:

234

pass

235

rating = first_simple('//calibre:rating', root)

236

if rating is not None:

237

try:

238

rating = float(rating)

239

if 0 <= rating <= 10:

240

mi.rating = rating

241

except (ValueError, TypeError):

242

pass

243

series, series_index = read_series(root)

244

if series:

245

mi.series, mi.series_index = series, series_index

246

for x in ('title_sort', 'author_sort'):

247

for elem in XPath('//calibre:' + x)(root):

248

val = read_simple_property(elem)

249

if val:

250

setattr(mi, x, val)

251

break

252

for x in ('author_link_map', 'user_categories'):

253

val = first_simple('//calibre:'+x, root)

254

if val:

255

try:

256

setattr(mi, x, json.loads(val))

257

except:

258

pass

259

260

languages = multiple_sequences('//dc:language', root)

261

if languages:

262

languages = filter(None, map(canonicalize_lang, languages))

263

if languages:

264

mi.languages = languages

265

266

identifiers = {}

267

for xmpid in XPath('//xmp:Identifier')(root):

268

for scheme, value in read_xmp_identifers(xmpid):

269

if scheme and value:

270

identifiers[scheme.lower()] = value

271

272

for namespace in ('prism', 'pdfx'):

273

for scheme in KNOWN_ID_SCHEMES:

274

if scheme not in identifiers:

275

val = first_simple('//%s:%s' % (namespace, scheme), root)

276

scheme = scheme.lower()

277

if scheme == 'isbn':

278

val = check_isbn(val)

279

elif scheme == 'doi':

280

val = check_doi(val)

281

if val:

282

identifiers[scheme] = val

283

284

# Check Dublin Core for recognizable identifier types

285

for scheme, check_func in {'doi':check_doi, 'isbn':check_isbn}.iteritems():

286

if scheme not in identifiers:

287

val = check_func(first_simple('//dc:identifier', root))

288

if val:

289

identifiers['doi'] = val

290

291

if identifiers:

292

mi.set_identifiers(identifiers)

293

294

read_user_metadata(mi, root)

295

296

return mi

297

298

def consolidate_metadata(info_mi, info):

299

''' When both the PDF Info dict and XMP metadata are present, prefer the xmp

300

metadata unless the Info ModDate is never than the XMP MetadataDate. This

301

is the algorithm recommended by the PDF spec. '''

302

try:

303

xmp_mi = metadata_from_xmp_packet(info['xmp_metadata'])

304

except:

305

import traceback

306

traceback.print_exc()

307

return info_mi

308

info_title, info_authors, info_tags = info_mi.title or _('Unknown'), list(info_mi.authors or ()), list(info_mi.tags or ())

309

info_mi.smart_update(xmp_mi, replace_metadata=True)

310

prefer_info = False

311

if 'ModDate' in info and hasattr(xmp_mi, 'metadata_date'):

312

try:

313

info_date = parse_date(info['ModDate'])

314

except:

315

pass

316

else:

317

prefer_info = info_date > xmp_mi.metadata_date

318

if prefer_info:

319

info_mi.title, info_mi.authors, info_mi.tags = info_title, info_authors, info_tags

320

else:

321

# We'll use the xmp tags/authors but fallback to the info ones if the

322

# xmp does not have tags/authors. smart_update() should have taken care of

323

# the rest

324

info_mi.authors, info_mi.tags = xmp_mi.authors or info_mi.authors, xmp_mi.tags or info_mi.tags

325

return info_mi

326

327

def nsmap(*args):

328

return {x:NS_MAP[x] for x in args}

329

330

def create_simple_property(parent, tag, value):

331

e = parent.makeelement(expand(tag))

332

parent.append(e)

333

e.text = value

334

335

def create_alt_property(parent, tag, value):

336

e = parent.makeelement(expand(tag))

337

parent.append(e)

338

alt = e.makeelement(expand('rdf:Alt'))

339

e.append(alt)

340

li = alt.makeelement(expand('rdf:li'))

341

alt.append(li)

342

li.set(expand('xml:lang'), 'x-default')

343

li.text = value

344

345

def create_sequence_property(parent, tag, val, ordered=True):

346

e = parent.makeelement(expand(tag))

347

parent.append(e)

348

seq = e.makeelement(expand('rdf:' + ('Seq' if ordered else 'Bag')))

349

e.append(seq)

350

for x in val:

351

li = seq.makeelement(expand('rdf:li'))

352

li.text = x

353

seq.append(li)

354

355

def create_identifiers(xmp, identifiers):

356

xmpid = xmp.makeelement(expand('xmp:Identifier'))

357

xmp.append(xmpid)

358

bag = xmpid.makeelement(expand('rdf:Bag'))

359

xmpid.append(bag)

360

for scheme, value in identifiers.iteritems():

361

li = bag.makeelement(expand('rdf:li'))

362

li.set(expand('rdf:parseType'), 'Resource')

363

bag.append(li)

364

s = li.makeelement(expand('xmpidq:Scheme'))

365

s.text = scheme

366

li.append(s)

367

val = li.makeelement(expand('rdf:value'))

368

li.append(val)

369

val.text = value

370

371

def create_series(calibre, series, series_index):

372

s = calibre.makeelement(expand('calibre:series'))

373

s.set(expand('rdf:parseType'), 'Resource')

374

calibre.append(s)

375

val = s.makeelement(expand('rdf:value'))

376

s.append(val)

377

val.text = series

378

try:

379

series_index = float(series_index)

380

except (TypeError, ValueError):

381

series_index = 1.0

382

si = s.makeelement(expand('calibreSI:series_index'))

383

si.text = '%.2f' % series_index

384

s.append(si)

385

386

def create_user_metadata(calibre, all_user_metadata):

387

from calibre.utils.config import to_json

388

from calibre.ebooks.metadata.book.json_codec import object_to_unicode, encode_is_multiple

389

390

s = calibre.makeelement(expand('calibre:custom_metadata'))

391

calibre.append(s)

392

bag = s.makeelement(expand('rdf:Bag'))

393

s.append(bag)

394

for name, fm in all_user_metadata.iteritems():

395

try:

396

fm = copy.copy(fm)

397

encode_is_multiple(fm)

398

fm = object_to_unicode(fm)

399

fm = json.dumps(fm, default=to_json, ensure_ascii=False)

400

except:

401

prints('Failed to write user metadata:', name)

402

import traceback

403

traceback.print_exc()

404

continue

405

li = bag.makeelement(expand('rdf:li'))

406

li.set(expand('rdf:parseType'), 'Resource')

407

bag.append(li)

408

n = li.makeelement(expand('calibreCC:name'))

409

li.append(n)

410

n.text = name

411

val = li.makeelement(expand('rdf:value'))

412

val.text = fm

413

li.append(val)

414

415

def metadata_to_xmp_packet(mi):

416

A = ElementMaker(namespace=NS_MAP['x'], nsmap=nsmap('x'))

417

R = ElementMaker(namespace=NS_MAP['rdf'], nsmap=nsmap('rdf'))

418

root = A.xmpmeta(R.RDF)

419

rdf = root[0]

420

dc = rdf.makeelement(expand('rdf:Description'), nsmap=nsmap('dc'))

421

dc.set(expand('rdf:about'), '')

422

rdf.append(dc)

423

for prop, tag in {'title':'dc:title', 'comments':'dc:description'}.iteritems():

424

val = mi.get(prop) or ''

425

create_alt_property(dc, tag, val)

426

for prop, (tag, ordered) in {

427

'authors':('dc:creator', True), 'tags':('dc:subject', False), 'publisher':('dc:publisher', False),

428

}.iteritems():

429

val = mi.get(prop) or ()

430

if isinstance(val, basestring):

431

val = [val]

432

create_sequence_property(dc, tag, val, ordered)

433

if not mi.is_null('pubdate'):

434

create_sequence_property(dc, 'dc:date', [isoformat(mi.pubdate, as_utc=False)]) # Adobe spec recommends local time

435

if not mi.is_null('languages'):

436

langs = filter(None, map(lambda x:lang_as_iso639_1(x) or canonicalize_lang(x), mi.languages))

437

if langs:

438

create_sequence_property(dc, 'dc:language', langs, ordered=False)

439

440

xmp = rdf.makeelement(expand('rdf:Description'), nsmap=nsmap('xmp', 'xmpidq'))

441

xmp.set(expand('rdf:about'), '')

442

rdf.append(xmp)

443

extra_ids = {}

444

for x in ('prism', 'pdfx'):

445

p = extra_ids[x] = rdf.makeelement(expand('rdf:Description'), nsmap=nsmap(x))

446

p.set(expand('rdf:about'), '')

447

rdf.append(p)

448

449

identifiers = mi.get_identifiers()

450

if identifiers:

451

create_identifiers(xmp, identifiers)

452

for scheme, val in identifiers.iteritems():

453

if scheme in {'isbn', 'doi'}:

454

for prefix, parent in extra_ids.iteritems():

455

ie = parent.makeelement(expand('%s:%s'%(prefix, scheme)))

456

ie.text = val

457

parent.append(ie)

458

459

d = xmp.makeelement(expand('xmp:MetadataDate'))

460

d.text = isoformat(now(), as_utc=False)

461

xmp.append(d)

462

463

calibre = rdf.makeelement(expand('rdf:Description'), nsmap=nsmap('calibre', 'calibreSI', 'calibreCC'))

464

calibre.set(expand('rdf:about'), '')

465

rdf.append(calibre)

466

if not mi.is_null('rating'):

467

try:

468

r = float(mi.rating)

469

except (TypeError, ValueError):

470

pass

471

else:

472

create_simple_property(calibre, 'calibre:rating', '%g' % r)

473

if not mi.is_null('series'):

474

create_series(calibre, mi.series, mi.series_index)

475

if not mi.is_null('timestamp'):

476

create_simple_property(calibre, 'calibre:timestamp', isoformat(mi.timestamp, as_utc=False))

477

for x in ('author_link_map', 'user_categories'):

478

val = getattr(mi, x, None)

479

if val:

480

create_simple_property(calibre, 'calibre:'+x, dump_dict(val))

481

482

for x in ('title_sort', 'author_sort'):

483

if not mi.is_null(x):

484

create_simple_property(calibre, 'calibre:'+x, getattr(mi, x))

485

486

all_user_metadata = mi.get_all_user_metadata(True)

487

if all_user_metadata:

488

create_user_metadata(calibre, all_user_metadata)

489

return serialize_xmp_packet(root)

490

491

def find_used_namespaces(elem):

492

getns = lambda x: (x.partition('}')[0][1:] if '}' in x else None)

493

ans = {getns(x) for x in list(elem.attrib) + [elem.tag]}

494

for child in elem.iterchildren(etree.Element):

495

ans |= find_used_namespaces(child)

496

return ans

497

498

def find_preferred_prefix(namespace, elems):

499

for elem in elems:

500

ans = {v:k for k, v in elem.nsmap.iteritems()}.get(namespace, None)

501

if ans is not None:

502

return ans

503

return find_preferred_prefix(namespace, elem.iterchildren(etree.Element))

504

505

def find_nsmap(elems):

506

used_namespaces = set()

507

for elem in elems:

508

used_namespaces |= find_used_namespaces(elem)

509

ans = {}

510

used_namespaces -= {NS_MAP['xml'], NS_MAP['x'], None, NS_MAP['rdf']}

511

rmap = {v:k for k, v in NS_MAP.iteritems()}

512

i = 0

513

for ns in used_namespaces:

514

if ns in rmap:

515

ans[rmap[ns]] = ns

516

else:

517

pp = find_preferred_prefix(ns, elems)

518

if pp and pp not in ans:

519

ans[pp] = ns

520

else:

521

i += 1

522

ans['ns%d' % i] = ns

523

return ans

524

525

def clone_into(parent, elem):

526

' Clone the element, assuming that all namespace declarations are present in parent '

527

clone = parent.makeelement(elem.tag)

528

parent.append(clone)

529

if elem.text and not elem.text.isspace():

530

clone.text = elem.text

531

if elem.tail and not elem.tail.isspace():

532

clone.tail = elem.tail

533

clone.attrib.update(elem.attrib)

534

for child in elem.iterchildren(etree.Element):

535

clone_into(clone, child)

536

537

def merge_xmp_packet(old, new):

538

''' Merge metadata present in the old packet that is not present in the new

539

one into the new one. Assumes the new packet was generated by

540

metadata_to_xmp_packet() '''

541

old, new = parse_xmp_packet(old), parse_xmp_packet(new)

542

# As per the adobe spec all metadata items have to be present inside top-level rdf:Description containers

543

item_xpath = XPath('//rdf:RDF/rdf:Description/*')

544

545

# First remove all data fields that metadata_to_xmp_packet() knowns about,

546

# since either they will have been set or if not present, imply they have

547

# been cleared

548

defined_tags = {expand(prefix + ':' + scheme) for prefix in ('prism', 'pdfx') for scheme in KNOWN_ID_SCHEMES}

549

defined_tags |= {expand('dc:' + x) for x in ('identifier', 'title', 'creator', 'date', 'description', 'language', 'publisher', 'subject')}

550

defined_tags |= {expand('xmp:' + x) for x in ('MetadataDate', 'Identifier')}

551

# For redundancy also remove all fields explicitly set in the new packet

552

defined_tags |= {x.tag for x in item_xpath(new)}

553

calibrens = '{%s}' % NS_MAP['calibre']

554

for elem in item_xpath(old):

555

if elem.tag in defined_tags or (elem.tag and elem.tag.startswith(calibrens)):

556

elem.getparent().remove(elem)

557

558

# Group all items into groups based on their namespaces

559

groups = defaultdict(list)

560

for item in item_xpath(new):

561

ns = item.nsmap[item.prefix]

562

groups[ns].append(item)

563

564

for item in item_xpath(old):

565

ns = item.nsmap[item.prefix]

566

groups[ns].append(item)

567

568

A = ElementMaker(namespace=NS_MAP['x'], nsmap=nsmap('x'))

569

R = ElementMaker(namespace=NS_MAP['rdf'], nsmap=nsmap('rdf'))

570

root = A.xmpmeta(R.RDF)

571

rdf = root[0]

572

573

for namespace in sorted(groups, key=lambda x:{NS_MAP['dc']:'a', NS_MAP['xmp']:'b', NS_MAP['calibre']:'c'}.get(x, 'z'+x)):

574

items = groups[namespace]

575

desc = rdf.makeelement(expand('rdf:Description'), nsmap=find_nsmap(items))

576

desc.set(expand('rdf:about'), '')

577

rdf.append(desc)

578

for item in items:

579

clone_into(desc, item)

580

581

return serialize_xmp_packet(root)

582

583

if __name__ == '__main__':

584

from calibre.utils.podofo import get_xmp_metadata

585

xmp_packet = get_xmp_metadata(sys.argv[-1])

586

mi = metadata_from_xmp_packet(xmp_packet)

587

np = metadata_to_xmp_packet(mi)

588

print (merge_xmp_packet(xmp_packet, np))

589

Older »