~ubuntu-branches/debian/sid/calibre/sid

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import (unicode_literals, division, absolute_import,
                        print_function)

__license__   = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'

import re
from functools import partial
from datetime import datetime
from future_builtins import zip

from calibre.constants import preferred_encoding, ispy3
from calibre.ebooks.metadata import author_to_author_sort, title_sort
from calibre.utils.date import (
    parse_only_date, parse_date, UNDEFINED_DATE, isoformat, is_date_undefined)
from calibre.utils.localization import canonicalize_lang
from calibre.utils.icu import strcmp

if ispy3:
    unicode = str

# Convert data into values suitable for the db {{{

def sqlite_datetime(x):
    return isoformat(x, sep=' ') if isinstance(x, datetime) else x

def single_text(x):
    if x is None:
        return x
    if not isinstance(x, unicode):
        x = x.decode(preferred_encoding, 'replace')
    x = x.strip()
    return x if x else None

series_index_pat = re.compile(r'(.*)\s+\[([.0-9]+)\]$')

def get_series_values(val):
    if not val:
        return (val, None)
    match = series_index_pat.match(val.strip())
    if match is not None:
        idx = match.group(2)
        try:
            idx = float(idx)
            return (match.group(1).strip(), idx)
        except:
            pass
    return (val, None)

def multiple_text(sep, ui_sep, x):
    if not x:
        return ()
    if isinstance(x, bytes):
        x = x.decode(preferred_encoding, 'replce')
    if isinstance(x, unicode):
        x = x.split(sep)
    else:
        x = (y.decode(preferred_encoding, 'replace') if isinstance(y, bytes)
             else y for y in x)
    ui_sep = ui_sep.strip()
    repsep = ',' if ui_sep == ';' else ';'
    x = (y.strip().replace(ui_sep, repsep) for y in x if y.strip())
    return tuple(' '.join(y.split()) for y in x if y)

def adapt_datetime(x):
    if isinstance(x, (unicode, bytes)):
        x = parse_date(x, assume_utc=False, as_utc=False)
    if x and is_date_undefined(x):
        x = UNDEFINED_DATE
    return x

def adapt_date(x):
    if isinstance(x, (unicode, bytes)):
        x = parse_only_date(x)
    if x is None or is_date_undefined(x):
        x = UNDEFINED_DATE
    return x

def adapt_number(typ, x):
    if x is None:
        return None
    if isinstance(x, (unicode, bytes)):
        if not x or x.lower() == 'none':
            return None
    return typ(x)

def adapt_bool(x):
    if isinstance(x, (unicode, bytes)):
        x = x.lower()
        if x == 'true':
            x = True
        elif x == 'false':
            x = False
        elif x == 'none' or x == '':
            x = None
        else:
            x = bool(int(x))
    return x if x is None else bool(x)

def adapt_languages(to_tuple, x):
    ans = []
    for lang in to_tuple(x):
        lc = canonicalize_lang(lang)
        if not lc or lc in ans or lc in ('und', 'zxx', 'mis', 'mul'):
            continue
        ans.append(lc)
    return tuple(ans)

def clean_identifier(typ, val):
    typ = icu_lower(typ or '').strip().replace(':', '').replace(',', '')
    val = (val or '').strip().replace(',', '|')
    return typ, val

def adapt_identifiers(to_tuple, x):
    if not isinstance(x, dict):
        x = {k:v for k, v in (y.partition(':')[0::2] for y in to_tuple(x))}
    ans = {}
    for k, v in x.iteritems():
        k, v = clean_identifier(k, v)
        if k and v:
            ans[k] = v
    return ans

def get_adapter(name, metadata):
    dt = metadata['datatype']
    if dt == 'text':
        if metadata['is_multiple']:
            m = metadata['is_multiple']
            ans = partial(multiple_text, m['ui_to_list'], m['list_to_ui'])
        else:
            ans = single_text
    elif dt == 'series':
        ans = single_text
    elif dt == 'datetime':
        ans = adapt_date if name == 'pubdate' else adapt_datetime
    elif dt == 'int':
        ans = partial(adapt_number, int)
    elif dt == 'float':
        ans = partial(adapt_number, float)
    elif dt == 'bool':
        ans = adapt_bool
    elif dt == 'comments':
        ans = single_text
    elif dt == 'rating':
        ans = lambda x: None if x in {None, 0} else min(10, max(0, adapt_number(int, x)))
    elif dt == 'enumeration':
        ans = single_text
    elif dt == 'composite':
        ans = lambda x: x

    if name == 'title':
        return lambda x: ans(x) or _('Unknown')
    if name == 'author_sort':
        return lambda x: ans(x) or ''
    if name == 'authors':
        return lambda x: tuple(y.replace('|', ',') for y in ans(x)) or (_('Unknown'),)
    if name in {'timestamp', 'last_modified'}:
        return lambda x: ans(x) or UNDEFINED_DATE
    if name == 'series_index':
        return lambda x: 1.0 if ans(x) is None else ans(x)
    if name == 'languages':
        return partial(adapt_languages, ans)
    if name == 'identifiers':
        return partial(adapt_identifiers, ans)

    return ans
# }}}

# One-One fields {{{
def one_one_in_books(book_id_val_map, db, field, *args):
    'Set a one-one field in the books table'
    if book_id_val_map:
        sequence = ((sqlite_datetime(v), k) for k, v in book_id_val_map.iteritems())
        db.executemany(
            'UPDATE books SET %s=? WHERE id=?'%field.metadata['column'], sequence)
        field.table.book_col_map.update(book_id_val_map)
    return set(book_id_val_map)

def set_uuid(book_id_val_map, db, field, *args):
    field.table.update_uuid_cache(book_id_val_map)
    return one_one_in_books(book_id_val_map, db, field, *args)

def set_title(book_id_val_map, db, field, *args):
    ans = one_one_in_books(book_id_val_map, db, field, *args)
    # Set the title sort field
    field.title_sort_field.writer.set_books(
        {k:title_sort(v) for k, v in book_id_val_map.iteritems()}, db)
    return ans

def one_one_in_other(book_id_val_map, db, field, *args):
    'Set a one-one field in the non-books table, like comments'
    deleted = tuple((k,) for k, v in book_id_val_map.iteritems() if v is None)
    if deleted:
        db.executemany('DELETE FROM %s WHERE book=?'%field.metadata['table'],
                        deleted)
        for book_id in deleted:
            field.table.book_col_map.pop(book_id[0], None)
    updated = {k:v for k, v in book_id_val_map.iteritems() if v is not None}
    if updated:
        db.executemany('INSERT OR REPLACE INTO %s(book,%s) VALUES (?,?)'%(
            field.metadata['table'], field.metadata['column']),
            ((k, sqlite_datetime(v)) for k, v in updated.iteritems()))
        field.table.book_col_map.update(updated)
    return set(book_id_val_map)

def custom_series_index(book_id_val_map, db, field, *args):
    series_field = field.series_field
    sequence = []
    for book_id, sidx in book_id_val_map.iteritems():
        if sidx is None:
            sidx = 1.0
        ids = series_field.ids_for_book(book_id)
        if ids:
            sequence.append((sidx, book_id, ids[0]))
        field.table.book_col_map[book_id] = sidx
    if sequence:
        db.executemany('UPDATE %s SET %s=? WHERE book=? AND value=?'%(
                field.metadata['table'], field.metadata['column']), sequence)
    return {s[1] for s in sequence}
# }}}

# Many-One fields {{{

def safe_lower(x):
    try:
        return icu_lower(x)
    except (TypeError, ValueError, KeyError, AttributeError):
        return x

def get_db_id(val, db, m, table, kmap, rid_map, allow_case_change,
              case_changes, val_map, is_authors=False):
    ''' Get the db id for the value val. If val does not exist in the db it is
    inserted into the db. '''
    kval = kmap(val)
    item_id = rid_map.get(kval, None)
    if item_id is None:
        if is_authors:
            aus = author_to_author_sort(val)
            db.execute('INSERT INTO authors(name,sort) VALUES (?,?)',
                            (val.replace(',', '|'), aus))
        else:
            db.execute('INSERT INTO %s(%s) VALUES (?)'%(
                m['table'], m['column']), (val,))
        item_id = rid_map[kval] = db.last_insert_rowid()
        table.id_map[item_id] = val
        table.col_book_map[item_id] = set()
        if is_authors:
            table.asort_map[item_id] = aus
            table.alink_map[item_id] = ''
    elif allow_case_change and val != table.id_map[item_id]:
        case_changes[item_id] = val
    val_map[val] = item_id

def change_case(case_changes, dirtied, db, table, m, is_authors=False):
    if is_authors:
        vals = ((val.replace(',', '|'), item_id) for item_id, val in
                case_changes.iteritems())
    else:
        vals = ((val, item_id) for item_id, val in case_changes.iteritems())
    db.executemany(
        'UPDATE %s SET %s=? WHERE id=?'%(m['table'], m['column']), vals)
    for item_id, val in case_changes.iteritems():
        table.id_map[item_id] = val
        dirtied.update(table.col_book_map[item_id])
        if is_authors:
            table.asort_map[item_id] = author_to_author_sort(val)

def many_one(book_id_val_map, db, field, allow_case_change, *args):
    dirtied = set()
    m = field.metadata
    table = field.table
    dt = m['datatype']
    is_custom_series = dt == 'series' and table.name.startswith('#')

    # Map values to db ids, including any new values
    kmap = safe_lower if dt in {'text', 'series'} else lambda x:x
    rid_map = {kmap(item):item_id for item_id, item in table.id_map.iteritems()}
    if len(rid_map) != len(table.id_map):
        # table has some entries that differ only in case, fix it
        table.fix_case_duplicates(db)
        rid_map = {kmap(item):item_id for item_id, item in table.id_map.iteritems()}
    val_map = {None:None}
    case_changes = {}
    for val in book_id_val_map.itervalues():
        if val is not None:
            get_db_id(val, db, m, table, kmap, rid_map, allow_case_change,
                    case_changes, val_map)

    if case_changes:
        change_case(case_changes, dirtied, db, table, m)

    book_id_item_id_map = {k:val_map[v] for k, v in book_id_val_map.iteritems()}

    # Ignore those items whose value is the same as the current value
    book_id_item_id_map = {k:v for k, v in book_id_item_id_map.iteritems()
        if v != table.book_col_map.get(k, None)}
    dirtied |= set(book_id_item_id_map)

    # Update the book->col and col->book maps
    deleted = set()
    updated = {}
    for book_id, item_id in book_id_item_id_map.iteritems():
        old_item_id = table.book_col_map.get(book_id, None)
        if old_item_id is not None:
            table.col_book_map[old_item_id].discard(book_id)
        if item_id is None:
            table.book_col_map.pop(book_id, None)
            deleted.add(book_id)
        else:
            table.book_col_map[book_id] = item_id
            table.col_book_map[item_id].add(book_id)
            updated[book_id] = item_id

    # Update the db link table
    if deleted:
        db.executemany('DELETE FROM %s WHERE book=?'%table.link_table,
                            ((k,) for k in deleted))
    if updated:
        sql = (
            'DELETE FROM {0} WHERE book=?; INSERT INTO {0}(book,{1},extra) VALUES(?, ?, 1.0)'
            if is_custom_series else
            'DELETE FROM {0} WHERE book=?; INSERT INTO {0}(book,{1}) VALUES(?, ?)'
        )
        db.executemany(sql.format(table.link_table, m['link_column']),
            ((book_id, book_id, item_id) for book_id, item_id in
                    updated.iteritems()))

    # Remove no longer used items
    remove = {item_id for item_id in table.id_map if not
              table.col_book_map.get(item_id, False)}
    if remove:
        db.executemany('DELETE FROM %s WHERE id=?'%m['table'],
            ((item_id,) for item_id in remove))
        for item_id in remove:
            del table.id_map[item_id]
            table.col_book_map.pop(item_id, None)

    return dirtied
# }}}

# Many-Many fields {{{

def uniq(vals, kmap=lambda x:x):
    ''' Remove all duplicates from vals, while preserving order. kmap must be a
    callable that returns a hashable value for every item in vals '''
    vals = vals or ()
    lvals = (kmap(x) for x in vals)
    seen = set()
    seen_add = seen.add
    return tuple(x for x, k in zip(vals, lvals) if k not in seen and not seen_add(k))

def many_many(book_id_val_map, db, field, allow_case_change, *args):
    dirtied = set()
    m = field.metadata
    table = field.table
    dt = m['datatype']
    is_authors = field.name == 'authors'

    # Map values to db ids, including any new values
    kmap = safe_lower if dt == 'text' else lambda x:x
    rid_map = {kmap(item):item_id for item_id, item in table.id_map.iteritems()}
    if len(rid_map) != len(table.id_map):
        # table has some entries that differ only in case, fix it
        table.fix_case_duplicates(db)
        rid_map = {kmap(item):item_id for item_id, item in table.id_map.iteritems()}
    val_map = {}
    case_changes = {}
    book_id_val_map = {k:uniq(vals, kmap) for k, vals in book_id_val_map.iteritems()}
    for vals in book_id_val_map.itervalues():
        for val in vals:
            get_db_id(val, db, m, table, kmap, rid_map, allow_case_change,
                      case_changes, val_map, is_authors=is_authors)

    if case_changes:
        change_case(case_changes, dirtied, db, table, m, is_authors=is_authors)
        if is_authors:
            for item_id, val in case_changes.iteritems():
                for book_id in table.col_book_map[item_id]:
                    current_sort = field.db_author_sort_for_book(book_id)
                    new_sort = field.author_sort_for_book(book_id)
                    if strcmp(current_sort, new_sort) == 0:
                        # The sort strings differ only by case, update the db
                        # sort
                        field.author_sort_field.writer.set_books({book_id:new_sort}, db)

    book_id_item_id_map = {k:tuple(val_map[v] for v in vals)
                           for k, vals in book_id_val_map.iteritems()}

    # Ignore those items whose value is the same as the current value
    book_id_item_id_map = {k:v for k, v in book_id_item_id_map.iteritems()
        if v != table.book_col_map.get(k, None)}
    dirtied |= set(book_id_item_id_map)

    # Update the book->col and col->book maps
    deleted = set()
    updated = {}
    for book_id, item_ids in book_id_item_id_map.iteritems():
        old_item_ids = table.book_col_map.get(book_id, None)
        if old_item_ids:
            for old_item_id in old_item_ids:
                table.col_book_map[old_item_id].discard(book_id)
        if item_ids:
            table.book_col_map[book_id] = item_ids
            for item_id in item_ids:
                table.col_book_map[item_id].add(book_id)
            updated[book_id] = item_ids
        else:
            table.book_col_map.pop(book_id, None)
            deleted.add(book_id)

    # Update the db link table
    if deleted:
        db.executemany('DELETE FROM %s WHERE book=?'%table.link_table,
                            ((k,) for k in deleted))
    if updated:
        vals = (
            (book_id, val) for book_id, vals in updated.iteritems()
            for val in vals
        )
        db.executemany('DELETE FROM %s WHERE book=?'%table.link_table,
                            ((k,) for k in updated))
        db.executemany('INSERT INTO {0}(book,{1}) VALUES(?, ?)'.format(
            table.link_table, m['link_column']), vals)
        if is_authors:
            aus_map = {book_id:field.author_sort_for_book(book_id) for book_id
                       in updated}
            field.author_sort_field.writer.set_books(aus_map, db)

    # Remove no longer used items
    remove = {item_id for item_id in table.id_map if not
              table.col_book_map.get(item_id, False)}
    if remove:
        db.executemany('DELETE FROM %s WHERE id=?'%m['table'],
            ((item_id,) for item_id in remove))
        for item_id in remove:
            del table.id_map[item_id]
            table.col_book_map.pop(item_id, None)
            if is_authors:
                table.asort_map.pop(item_id, None)
                table.alink_map.pop(item_id, None)

    return dirtied

# }}}

def identifiers(book_id_val_map, db, field, *args):  # {{{
    table = field.table
    updates = set()
    for book_id, identifiers in book_id_val_map.iteritems():
        if book_id not in table.book_col_map:
            table.book_col_map[book_id] = {}
        current_ids = table.book_col_map[book_id]
        remove_keys = set(current_ids) - set(identifiers)
        for key in remove_keys:
            table.col_book_map.get(key, set()).discard(book_id)
            current_ids.pop(key, None)
        current_ids.update(identifiers)
        for key, val in identifiers.iteritems():
            if key not in table.col_book_map:
                table.col_book_map[key] = set()
            table.col_book_map[key].add(book_id)
            updates.add((book_id, key, val))
    db.executemany('DELETE FROM identifiers WHERE book=?',
                        ((x,) for x in book_id_val_map))
    if updates:
        db.executemany('INSERT OR REPLACE INTO identifiers (book, type, val) VALUES (?, ?, ?)',
                            tuple(updates))
    return set(book_id_val_map)
# }}}

def dummy(book_id_val_map, *args):
    return set()

class Writer(object):

    def __init__(self, field):
        self.adapter = get_adapter(field.name, field.metadata)
        self.name = field.name
        self.field = field
        dt = field.metadata['datatype']
        self.accept_vals = lambda x: True
        if dt == 'composite' or field.name in {
            'id', 'size', 'path', 'formats', 'news'}:
            self.set_books_func = dummy
        elif self.name[0] == '#' and self.name.endswith('_index'):
            self.set_books_func = custom_series_index
        elif self.name == 'identifiers':
            self.set_books_func = identifiers
        elif self.name == 'uuid':
            self.set_books_func = set_uuid
        elif self.name == 'title':
            self.set_books_func = set_title
        elif field.is_many_many:
            self.set_books_func = many_many
        elif field.is_many:
            self.set_books_func = (self.set_books_for_enum if dt ==
                                   'enumeration' else many_one)
        else:
            self.set_books_func = (one_one_in_books if field.metadata['table']
                                   == 'books' else one_one_in_other)
            if self.name in {'timestamp', 'uuid', 'sort'}:
                self.accept_vals = bool

    def set_books(self, book_id_val_map, db, allow_case_change=True):
        book_id_val_map = {k:self.adapter(v) for k, v in
                           book_id_val_map.iteritems() if self.accept_vals(v)}
        if not book_id_val_map:
            return set()
        dirtied = self.set_books_func(book_id_val_map, db, self.field,
                                      allow_case_change)
        return dirtied

    def set_books_for_enum(self, book_id_val_map, db, field,
                           allow_case_change):
        allowed = set(field.metadata['display']['enum_values'])
        book_id_val_map = {k:v for k, v in book_id_val_map.iteritems() if v is
                           None or v in allowed}
        if not book_id_val_map:
            return set()
        return many_one(book_id_val_map, db, field, False)