1
# Written by Owen Williams
2
# see LICENSE for license information
6
import sqlite3 as sqlite
7
logging.info("Using built-in sqlite3")
9
logging.info("Using external pysqlite2")
10
from pysqlite2 import dbapi2 as sqlite
12
from math import floor,ceil
15
import urllib, urlparse
16
from urllib2 import URLError
19
import sys, os, os.path, re
26
from sets import Set as set
32
socket.setdefaulttimeout(30.0)
34
#locale.setlocale(locale.LC_ALL, '')
35
gettext.install('penguintv', '/usr/share/locale')
36
gettext.bindtextdomain('penguintv', '/usr/share/locale')
37
gettext.textdomain('penguintv')
42
import OfflineImageCache
51
from gnome import gconf
52
if utils.RUNNING_SUGAR: # or utils.RUNNING_HILDON:
53
USING_FLAG_CACHE = False
55
USING_FLAG_CACHE = True
56
#USING_FLAG_CACHE = False
69
if utils.RUNNING_SUGAR or utils.RUNNING_HILDON:
74
_common_unicode = { u'\u0093':u'"', u'\u0091': u"'", u'\u0092': u"'", u'\u0094':u'"', u'\u0085':u'...', u'\u2026':u'...'}
86
A_POOLED_POLL = 64 # if this is set, don't do housework after each poll
108
#obsolete tag-based flags (needed for schema upgrades)
109
T_NOAUTODOWNLOAD="noautodownload"
110
T_NOSEARCH="nosearch"
111
T_NOAUTOEXPIRE="noautoexpire"
112
T_NOTIFYUPDATES="notify"
115
FF_NOAUTODOWNLOAD = 1
121
FF_NOKEEPDELETED = 64
123
DB_FILE="penguintv4.db"
125
STRIPPER_REGEX = re.compile('<.*?>')
128
entry_flag_cache = {}
130
def __init__(self, polling_callback=None, change_setting_cb=None):
131
self._exiting = False
133
self.home = utils.get_home()
141
raise DBError, "error creating directories: "+self.home
142
if not os.access(self.home, os.R_OK | os.W_OK | os.X_OK):
143
raise DBError, "Insufficient access to "+self.home
144
self._initializing_db = False
146
#also check db connection in _process_feed
147
if os.path.isfile(os.path.join(self.home,"penguintv4.db")) == False:
149
self._initializing_db = True
150
if os.path.isfile(os.path.join(self.home,"penguintv3.db")):
152
shutil.copyfile(os.path.join(self.home,"penguintv3.db"), os.path.join(self.home,"penguintv4.db"))
154
raise DBError,"couldn't create new database file"
155
elif os.path.isfile(os.path.join(self.home,"penguintv2.db")):
157
shutil.copyfile(os.path.join(self.home,"penguintv2.db"), os.path.join(self.home,"penguintv4.db"))
159
raise DBError,"couldn't create new database file"
160
elif os.path.isfile(os.path.join(self.home,"penguintv.db")):
162
shutil.copyfile(os.path.join(self.home,"penguintv.db"), os.path.join(self.home,"penguintv4.db"))
164
raise DBError,"couldn't create new database file"
165
self._db=sqlite.connect(os.path.join(self.home,"penguintv4.db"), timeout=30.0, isolation_level="IMMEDIATE")
167
raise DBError,"error connecting to database"
169
self._c = self._db.cursor()
171
db_ver = self.get_version_info()[0]
173
logging.info("database will need init")
174
self._initializing_db = True
176
self._cancel_poll_multiple = False
178
self._c.execute('PRAGMA synchronous="NORMAL"')
179
if not utils.RUNNING_SUGAR and not utils.RUNNING_HILDON:
180
self._c.execute('PRAGMA cache_size=6000')
181
self.cache_dirty = True
183
if not self._initializing_db:
184
self.cache_dirty = self.get_setting(BOOL, "feed_cache_dirty", True)
188
if polling_callback is None:
189
self.polling_callback=self._polling_callback
191
self.polling_callback = polling_callback
193
self._change_setting_cb = change_setting_cb
198
self.searcher = Lucene.Lucene()
200
self.searcher = PTVXapian.PTVXapian()
202
logging.error("Have search, but no search engine? Programming error!")
204
if not self._initializing_db:
206
self._blacklist = self.get_feeds_for_flag(FF_NOSEARCH)
208
logging.error("possible old database version")
211
self._conf = gconf.client_get_default()
213
self._icon_manager = IconManager.IconManager(self.home)
215
self._image_cache = None
216
cache_images = self.get_setting(BOOL, "/apps/penguintv/cache_images_locally", False)
218
store_location = self.get_setting(STRING, '/apps/penguintv/media_storage_location', os.path.join(utils.get_home(), "media"))
219
if store_location != "":
220
self._image_cache = OfflineImageCache.OfflineImageCache(os.path.join(store_location, "images"))
222
self._reindex_entry_list = []
223
self._reindex_feed_list = []
224
self._image_cache_list = []
225
self._image_uncache_list = []
226
self._filtered_entries = {}
227
self._parse_list = []
229
def _db_execute(self, c, command, args=()):
230
#if "FROM FEEDS" in command.upper():
231
#traceback.print_stack()
232
#if "UPDATE" in command.upper():
233
# print command, args
234
# traceback.print_stack()
237
if command.upper().startswith("SELECT"):
238
unicode_check = False
241
for i, val in enumerate(args):
243
#logging.debug("DB Warning: String argument, making unicode: %s %i %s" % (command,i,val))
246
u_args = tuple(u_args)
247
return c.execute(command, u_args)
249
return c.execute(command, args)
251
#traceback.print_stack()
252
logging.error("Database error:" + str(command) + " " + str(args))
258
def finish(self, vacuumok=True, majorsearchwait=False, correctthread=True):
259
#allow multiple finishes
263
self._cancel_poll_multiple = True
264
if utils.HAS_SEARCH and self.searcher is not None:
265
if not majorsearchwait and self.searcher.is_indexing(only_this_thread=True):
266
logging.debug("not waiting for reindex")
267
self.searcher.finish(False)
269
if len(self._reindex_entry_list) > 0 or len(self._reindex_feed_list) > 0:
270
logging.info("have leftover things to reindex, reindexing")
271
#don't do it threadedly or else we will interrupt it on the next line
272
self.reindex(threaded=False) #it's usually not much...
273
self.searcher.finish(True)
277
if self._image_cache is not None:
278
self._image_cache.finish()
280
#FIXME: lame, but I'm being lazy
281
#if randint(1,100) == 1:
282
# print "cleaning up unreferenced media"
283
# self.clean_file_media()
286
if random.randint(1,80) == 1 and vacuumok:
287
logging.info("compacting database")
288
self._c.execute('VACUUM')
292
def get_version_info(self):
294
self._db_execute(self._c, u'SELECT rowid FROM feeds LIMIT 1')
296
logging.debug("db except: %s" % str(e))
297
return (-1, LATEST_DB_VER)
298
self._db_execute(self._c, u'SELECT value FROM settings WHERE data="db_ver"')
299
db_ver = self._c.fetchone()
303
db_ver = int(db_ver[0])
304
return (db_ver, LATEST_DB_VER)
306
def maybe_initialize_db(self):
307
"""returns true if new database"""
308
db_ver = self.get_version_info()[0]
310
logging.info("initializing database")
311
self._initializing_db = True
312
self._init_database()
316
#logging.debug("current database version is " + str(db_ver))
318
self._migrate_database_one_two()
320
self._migrate_database_one_two()
322
self._migrate_database_two_three()
324
self._migrate_database_three_four()
326
self._migrate_database_four_five()
328
self._migrate_database_five_six()
330
self._migrate_database_six_seven()
331
self.clean_database_media()
332
if db_ver > LATEST_DB_VER:
333
logging.warning("This database comes from a later version of PenguinTV and may not work with this version")
334
raise DBError, "db_ver is "+str(db_ver)+" instead of "+str(LATEST_DB_VER)
336
logging.error("exception:" + str(e))
338
#if self.searcher.needs_index:
339
# print "indexing for the first time"
340
# self.searcher.Do_Index_Threaded()
342
if not utils.RUNNING_HILDON:
343
self._check_settings_location()
348
def done_initializing(self):
349
self._initializing_db = False
351
def _migrate_database_one_two(self):
353
logging.info("upgrading to database schema 2")
355
self._db_execute(self._c, u'SELECT * FROM settings') #if it doesn't exist,
356
except: #we create it
357
self._db_execute(self._c, u"""CREATE TABLE settings
359
id INTEGER PRIMARY KEY,
364
self._db_execute(self._c, u"""CREATE TABLE tags
366
id INTEGER PRIMARY KEY,
368
feed_id INT UNSIGNED NOT NULL);""")
371
#add fake_date column
373
self._db_execute(self._c, u'ALTER TABLE entries ADD COLUMN fakedate DATE')
374
self._db_execute(self._c, u'UPDATE entries SET fakedate = date')
375
except sqlite.OperationalError,e:
376
if e != "duplicate column name: fakedate":
377
logging.warning(str(e)) #else pass
378
#change db_ver (last thing)
379
self._db_execute(self._c, u'ALTER TABLE feeds ADD COLUMN pollfreq INT')
380
self._db_execute(self._c, u'UPDATE feeds SET pollfreq=1800')
381
self._db_execute(self._c, u'ALTER TABLE feeds ADD COLUMN lastpoll DATE')
382
self._db_execute(self._c, u'UPDATE feeds SET lastpoll=?',(int(time.time())-(30*60),))
383
self._db_execute(self._c, u'ALTER TABLE feeds ADD COLUMN newatlast INT')
384
self._db_execute(self._c, u'UPDATE feeds SET newatlast=0')
387
self._db_execute(self._c, u'INSERT INTO settings (data, value) VALUES ("db_ver",2)')
391
self._db_execute(self._c, u'UPDATE settings SET value=2 WHERE data="db_ver"')
396
def _migrate_database_two_three(self):
397
"""version 3 added flag cache, entry_count_cache, and unread_count_cache"""
398
logging.info("upgrading to database schema 3")
399
self._db_execute(self._c, u'ALTER TABLE feeds ADD COLUMN flag_cache INT')
400
self._db_execute(self._c, u'ALTER TABLE feeds ADD COLUMN entry_count_cache INT')
401
self._db_execute(self._c, u'ALTER TABLE feeds ADD COLUMN unread_count_cache INT')
403
self._db_execute(self._c, u'UPDATE settings SET value=3 WHERE data="db_ver"')
404
self._db_execute(self._c, u'INSERT INTO settings (data, value) VALUES ("feed_cache_dirty",1)')
407
def _migrate_database_three_four(self):
408
"""version 4 adds fulltext table"""
409
logging.info("upgrading to database schema 4")
410
self._db_execute(self._c, u'ALTER TABLE tags ADD COLUMN type INT')
411
self._db_execute(self._c, u'ALTER TABLE tags ADD COLUMN query')
412
self._db_execute(self._c, u'ALTER TABLE tags ADD COLUMN favorite INT')
413
self._db_execute(self._c, u'UPDATE tags SET type=?',(T_TAG,)) #they must all be regular tags right now
414
self._db_execute(self._c, u'UPDATE settings SET value=4 WHERE data="db_ver"')
415
self._db_execute(self._c, u'ALTER TABLE feeds ADD COLUMN feed_pointer INT')
416
self._db_execute(self._c, u'ALTER TABLE feeds ADD COLUMN link')
417
self._db_execute(self._c, u'ALTER TABLE feeds ADD COLUMN image')
418
self._db_execute(self._c, u'ALTER TABLE media ADD COLUMN download_date DATE')
419
self._db_execute(self._c, u'ALTER TABLE media ADD COLUMN thumbnail')
420
self._db_execute(self._c, u'ALTER TABLE media ADD COLUMN feed_id INTEGER')
422
self._db_execute(self._c, u'UPDATE feeds SET feed_pointer=-1') #no filters yet!
423
self._db_execute(self._c, u'UPDATE feeds SET link=""')
424
self._db_execute(self._c, u"""CREATE TABLE terms
426
id INTEGER PRIMARY KEY,
429
self._db_execute(self._c, u'INSERT INTO settings (data, value) VALUES ("frequency_table_update",0)')
432
logging.info("building new column, please wait...")
433
self._db_execute(self._c, u'SELECT id FROM feeds')
434
for feed_id, in self._c.fetchall():
435
self._db_execute(self._c, u'SELECT media.id FROM entries INNER JOIN media ON media.entry_id = entries.id WHERE entries.feed_id=?', (feed_id,))
436
media = self._c.fetchall()
437
media = [m[0] for m in media]
439
qmarks = "?,"*(len(media)-1)+"?"
440
self._db_execute(self._c, u'UPDATE media SET feed_id=? WHERE id IN ('+qmarks+')', tuple([feed_id] + media))
444
def _migrate_database_four_five(self):
445
"""version five gets rid of 'id' column, 'new' column, adds option_flags column"""
446
logging.info("upgrading to database schema 5, please wait...")
448
self.__remove_columns("settings","""data TEXT NOT NULL,
452
self.__remove_columns("feeds", """id INTEGER PRIMARY KEY,
455
pollfail BOOL NOT NULL,
459
modified INT UNSIGNED NOT NULL,
461
pollfreq INT NOT NULL,
465
entry_count_cache INT,
466
unread_count_cache INT,
470
"""id, url, polled, pollfail, title, description, link,
471
modified, etag, pollfreq, lastpoll, newatlast,
472
flag_cache, entry_count_cache, unread_count_cache,
473
feed_pointer, image""")
475
self._db_execute(self._c, u'ALTER TABLE feeds ADD COLUMN flags INTEGER NOT NULL DEFAULT 0')
477
self.__update_flags(T_NOAUTODOWNLOAD, FF_NOAUTODOWNLOAD)
478
self.__update_flags(T_NOSEARCH, FF_NOSEARCH)
479
self.__update_flags(T_NOAUTOEXPIRE, FF_NOAUTOEXPIRE)
480
self.__update_flags(T_NOTIFYUPDATES, FF_NOTIFYUPDATES)
482
self.__remove_columns("entries", """id INTEGER PRIMARY KEY,
483
feed_id INTEGER UNSIGNED NOT NULL,
491
read INTEGER NOT NULL,
492
old INTEGER NOT NULL""",
493
"id, feed_id, title, creator, description, fakedate, date, guid, link, read, old")
495
self.__remove_columns("media", """id INTEGER PRIMARY KEY,
496
entry_id INTEGER UNSIGNED NOT NULL,
497
feed_id INTEGER UNSIGNED NOT NULL,
501
download_status INTEGER NOT NULL,
502
viewed BOOL NOT NULL,
507
"id, entry_id, feed_id, url, file, mimetype, download_status, viewed, keep, length, download_date, thumbnail")
509
self.__remove_columns("tags", """tag TEXT,
510
feed_id INT UNSIGNED NOT NULL,
514
"tag, feed_id, query, favorite, type")
516
self._db_execute(self._c, u'UPDATE settings SET value=5 WHERE data="db_ver"')
519
def _migrate_database_five_six(self):
520
logging.info("upgrading to database schema 6, please wait...")
521
self._db_execute(self._c, u'ALTER TABLE entries ADD COLUMN keep BOOL')
522
self._db_execute(self._c, u'UPDATE entries SET keep=0')
523
self.__remove_columns("feeds", """id INTEGER PRIMARY KEY,
525
pollfail BOOL NOT NULL,
529
modified INT UNSIGNED NOT NULL,
531
pollfreq INT NOT NULL,
534
flags INTEGER NOT NULL DEFAULT 0,
536
entry_count_cache INT,
537
unread_count_cache INT,
541
"""id, url, pollfail, title, description, link,
542
modified, etag, pollfreq, lastpoll, newatlast, flags,
543
flag_cache, entry_count_cache, unread_count_cache,
544
feed_pointer, image""")
545
self._db_execute(self._c, u'ALTER TABLE feeds ADD COLUMN first_entry_cache TEXT')
546
self._db_execute(self._c, u'UPDATE feeds SET first_entry_cache=""')
547
self._db_execute(self._c, u'UPDATE settings SET value=6 WHERE data="db_ver"')
549
self._db_execute(self._c, u"""CREATE INDEX pollindex ON entries (fakedate DESC);""")
550
self._db_execute(self._c, u"""CREATE INDEX feedindex ON feeds (title DESC);""")
551
self._db_execute(self._c, u"""CREATE INDEX e_feedindex ON entries (feed_id DESC);""")
552
self._db_execute(self._c, u"""CREATE INDEX m_feedindex ON media (feed_id DESC);""")
553
self._db_execute(self._c, u"""CREATE INDEX m_entryindex ON media (entry_id DESC);""")
554
self._db_execute(self._c, u"""CREATE INDEX t_feedindex ON tags (feed_id DESC);""")
558
def _migrate_database_six_seven(self):
559
logging.info("upgrading to database schema 7, please wait...")
560
self.__remove_columns("feeds", """id INTEGER PRIMARY KEY,
562
pollfail BOOL NOT NULL,
567
pollfreq INT NOT NULL,
570
flags INTEGER NOT NULL DEFAULT 0,
574
"""id, url, pollfail, title, description, link,
575
etag, pollfreq, lastpoll, newatlast,
576
flags, feed_pointer, image""")
578
self.__remove_columns("entries", """id INTEGER PRIMARY KEY,
579
feed_id INTEGER UNSIGNED NOT NULL,
588
read INTEGER NOT NULL""",
589
"""id, feed_id, title, creator, description,
590
fakedate, date, guid, link, keep,
593
self._db_execute(self._c, u'ALTER TABLE entries ADD COLUMN hash TEXT')
595
logging.info("Creating entry hashes")
596
self._db_execute(self._c, u'SELECT rowid, description, title, guid FROM entries')
597
entries = self._c.fetchall()
599
for entry_id, description, title, guid in entries:
600
entry_hash = self._get_hash(guid, title, description)
601
self._db_execute(self._c, u'UPDATE entries SET hash=? WHERE rowid=?', \
602
(entry_hash, entry_id))
606
self._db_execute(self._c, u'UPDATE settings SET value=7 WHERE data="db_ver"')
610
def __remove_columns(self, table, new_schema, new_columns):
611
"""dangerous internal function without injection checking.
612
(only called by migration function and with no user-programmable
615
logging.info("updating %s ..." % table)
617
self._c.execute(u"CREATE TEMPORARY TABLE t_backup(" + new_schema + ")")
618
self._c.execute(u"INSERT INTO t_backup SELECT "+new_columns+" FROM " + table)
619
self._c.execute(u"DROP TABLE "+ table)
620
self._c.execute(u"CREATE TABLE " + table + " ("+ new_schema +")")
621
self._c.execute(u"INSERT INTO " + table + " SELECT " + new_columns + " FROM t_backup")
622
self._c.execute(u"DROP TABLE t_backup")
626
def __update_flags(self, tag_flag, int_flag):
627
"""for migration. take all feeds with tag tag_flag and add int_flag
628
to its flag value. Then delete the tag_flag"""
630
flagged_feeds = self.get_feeds_for_tag(tag_flag)
632
if len(flagged_feeds) > 0:
633
qmarks = "?,"*(len(flagged_feeds)-1)+"?"
634
#print u'UPDATE feeds SET flags = flags + ? WHERE feeds.rowid in ('+qmarks+')'
635
#print (int_flag,) + tuple(flagged_feeds)
636
self._db_execute(self._c, u'UPDATE feeds SET flags = flags + ? WHERE feeds.rowid in ('+qmarks+')',
637
(int_flag,) + tuple(flagged_feeds))
639
self.remove_tag(tag_flag)
641
def _init_database(self):
642
self._db_execute(self._c, u"""CREATE TABLE settings
648
#for pointer / pointed filter feeds, feed_pointer is feed_id, and description is query
649
self._db_execute(self._c, u"""CREATE TABLE feeds
651
id INTEGER PRIMARY KEY,
653
pollfail BOOL NOT NULL,
658
pollfreq INT NOT NULL,
661
flags INTEGER NOT NULL DEFAULT 0,
667
self._db_execute(self._c, u"""CREATE TABLE entries
669
id INTEGER PRIMARY KEY,
670
feed_id INTEGER UNSIGNED NOT NULL,
679
read INTEGER NOT NULL,
682
self._db_execute(self._c, u"""CREATE TABLE media
684
id INTEGER PRIMARY KEY,
685
entry_id INTEGER UNSIGNED NOT NULL,
686
feed_id INTEGER UNSIGNED NOT NULL,
690
download_status INTEGER NOT NULL,
691
viewed BOOL NOT NULL,
698
self._db_execute(self._c, u"""CREATE TABLE tags
701
feed_id INT UNSIGNED NOT NULL,
706
self._db_execute(self._c, u"""CREATE INDEX pollindex ON entries (fakedate DESC);""")
707
self._db_execute(self._c, u"""CREATE INDEX feedindex ON feeds (title DESC);""")
708
self._db_execute(self._c, u"""CREATE INDEX e_feedindex ON entries (feed_id DESC);""")
709
self._db_execute(self._c, u"""CREATE INDEX m_feedindex ON media (feed_id DESC);""")
710
self._db_execute(self._c, u"""CREATE INDEX m_entryindex ON media (entry_id DESC);""")
711
self._db_execute(self._c, u"""CREATE INDEX t_feedindex ON tags (feed_id DESC);""")
712
self._db_execute(self._c, u'UPDATE entries SET keep=0')
716
self._db_execute(self._c, u"""INSERT INTO settings (data, value) VALUES ("db_ver", 7)""")
717
self._db_execute(self._c, u'INSERT INTO settings (data, value) VALUES ("frequency_table_update",0)')
720
def _get_hash(self, guid, title, description):
722
text = STRIPPER_REGEX.sub('', ' '.join((guid, title, description)))
726
def _fix_indexes(self):
728
self._db_execute(self._c, 'SELECT sql FROM sqlite_master WHERE name="pollindex"')
729
result = self._c.fetchone()[0]
733
if "fakedate" not in result:
734
logging.info("Rebuilding indexes")
735
#this means the user was using svn before I fixed the indexes
736
self._db_execute(self._c, 'SELECT name FROM sqlite_master WHERE type="index"')
737
result = self._c.fetchall()
739
if 'autoindex' not in index[0]:
740
self._db_execute(self._c, 'DROP INDEX %s' % index)
743
self._db_execute(self._c, u"""CREATE INDEX pollindex ON entries (fakedate DESC);""")
744
self._db_execute(self._c, u"""CREATE INDEX feedindex ON feeds (title DESC);""")
745
self._db_execute(self._c, u"""CREATE INDEX e_feedindex ON entries (feed_id DESC);""")
746
self._db_execute(self._c, u"""CREATE INDEX m_feedindex ON media (feed_id DESC);""")
747
self._db_execute(self._c, u"""CREATE INDEX m_entryindex ON media (entry_id DESC);""")
748
self._db_execute(self._c, u"""CREATE INDEX t_feedindex ON tags (feed_id DESC);""")
749
logging.info("Indexes rebuilt")
751
def clean_database_media(self):
752
self._db_execute(self._c, "SELECT rowid,file,entry_id FROM media")
753
result = self._c.fetchall()
755
self._db_execute(self._c, "SELECT title FROM entries WHERE rowid=?",(item[2],))
756
title = self._c.fetchone()
757
if title is None: #this entry doesn't exist anymore
758
self._db_execute(self._c, "DELETE FROM media WHERE rowid=?",(item[0],))
761
#right now this code doesn't get called. Maybe we should?
762
def clean_file_media(self):
763
"""walks the media dir, and deletes anything that doesn't have an entry in the database.
764
Also deletes dirs with only a playlist or with nothing"""
765
media_dir = self.get_setting(STRING, '/apps/penguintv/media_storage_location', os.path.join(utils.get_home(), "media"))
766
d = os.walk(media_dir)
767
for root,dirs,files in d:
769
image_index = dirs.index("images")
770
del dirs[image_index]
775
if file != "playlist.m3u":
776
self._db_execute(self._c, u"SELECT rowid, download_status FROM media WHERE file=?",(os.path.join(root, file),))
777
result = self._c.fetchone()
779
logging.info("deleting "+os.path.join(root,file))
780
os.remove(os.path.join(root,file))
781
elif result[1] == D_NOT_DOWNLOADED: #db says it's not downloaded, so remove it
782
logging.info("deleting "+os.path.join(root,file))
783
os.remove(os.path.join(root,file))
784
d = os.walk(media_dir)
785
for root,dirs,files in d:
787
image_index = dirs.index("images")
788
del dirs[image_index]
793
if files[0] == "playlist.m3u":
794
logging.info("deleting "+root)
796
elif len(files) == 0:
797
logging.info("deleting "+root)
800
#now clean up images?
801
self._image_cache.cleanup()
803
def relocate_media(self, old_dir, new_dir):
804
"""rewrite db so that media files point to a new place. Lots of
807
if old_dir[-1] == '/' or old_dir[-1] == '\\':
808
old_dir = old_dir[:-1]
810
assert os.access(new_dir, os.F_OK & os.R_OK & os.W_OK & os.X_OK)
811
assert os.access(old_dir, os.F_OK & os.R_OK & os.W_OK & os.X_OK)
813
self._db_execute(self._c, u'SELECT rowid, file FROM media WHERE file IS NOT NULL')
814
rows = self._c.fetchall()
815
for rowid, filename in rows:
816
assert filename.startswith(old_dir)
818
for rowid, filename in rows:
819
new_filename = os.path.join(new_dir, filename[len(old_dir) + 1:])
820
self._db_execute(self._c, u'UPDATE media SET file=? WHERE rowid=?', (new_filename, rowid))
823
def _check_settings_location(self):
824
"""Do we suddenly have gconf, where before we were using the db?
825
If so, migrate from db to gconf"""
827
settings_in_db = self.get_setting(BOOL, "settings_in_db", utils.HAS_GCONF, force_db=True)
828
settings_now_in_db = settings_in_db
831
self._db_execute(self._c, u'SELECT data, value FROM settings')
832
settings = self._c.fetchall()
833
for data, value in settings:
834
if data.startswith('/'):
835
val = self._conf.get_default_from_schema(data)
837
#not in schema, let it be replaced with a default
839
if val.type == gconf.VALUE_BOOL:
840
self._conf.set_bool(data, bool(value))
841
elif val.type == gconf.VALUE_INT:
842
self._conf.set_int(data, int(value))
843
elif val.type == gconf.VALUE_STRING:
844
self._conf.set_string(data, value)
845
settings_now_in_db = False
847
if not utils.HAS_GCONF:
848
logging.error("Setting used to be in gconf, but gconf is now missing. Loading defaults")
849
settings_now_in_db = True
850
self.set_setting(BOOL, 'settings_in_db', settings_now_in_db, force_db=True)
852
def get_setting(self, type, datum, default=None, force_db=False):
853
if utils.HAS_GCONF and self._initializing_db:
854
logging.debug("we are initing db, returning and setting default: %s %s" % (datum, str(default)))
855
return default #always return default, gconf LIES
856
if utils.HAS_GCONF and datum[0] == '/' and not force_db:
858
retval = self._conf.get_bool(datum)
860
retval = self._conf.get_int(datum)
862
retval = self._conf.get_string(datum)
863
if retval is not None:
867
self._db_execute(self._c, u'SELECT value FROM settings WHERE data=?',(datum,))
868
retval = self._c.fetchone()
869
if retval is not None:
871
return bool(int(retval[0]))
873
return int(retval[0])
875
return str(retval[0])
879
def set_setting(self, type, datum, value, force_db=False):
880
if utils.HAS_GCONF and datum[0] == '/' and not force_db:
882
self._conf.set_bool(datum, value)
884
self._conf.set_int(datum, value)
886
self._conf.set_string(datum, value)
888
current_val = self.get_setting(type, datum)
889
if current_val is None:
890
self._db_execute(self._c, u'INSERT INTO settings (data, value) VALUES (?,?)', (datum, value))
892
self._db_execute(self._c, u'UPDATE settings SET value=? WHERE data=?', (value,datum))
894
if self._change_setting_cb is not None:
895
self._change_setting_cb(type, datum, value)
897
def set_feed_cache(self, cachelist):
899
rowid, flag, unread, total"""
901
fd = open(os.path.join(self.home, 'feed_cache.pickle'), 'w')
902
pickle.dump(cachelist, fd)
904
logging.warning("Couldn't create feed_cache.pickle.")
907
#self._db_execute(self._c, u'UPDATE feeds SET flag_cache=?, unread_count_cache=?, entry_count_cache=?, first_entry_cache=? WHERE rowid=?',\
908
#(cache[1], cache[2], cache[3], cache[4], cache[0]))
909
#self._db_execute(self._c, u'UPDATE feeds SET unread_count_cache=? WHERE rowid=?',(cache[2],cache[0]))
910
#self._db_execute(self._c, u'UPDATE feeds SET entry_count_cache=? WHERE rowid=?',(cache[3],cache[0]))
913
self.set_setting(BOOL, "feed_cache_dirty", False)
914
self.cache_dirty = False
916
def get_feed_cache(self):
918
logging.debug("Feed cache is dirty, returning empty set")
922
fd = open(os.path.join(self.home, 'feed_cache.pickle'), 'r')
923
cache = pickle.load(fd)
925
logging.warning("error loading feed_cache.pickle ")
929
#self._db_execute(self._c, u'SELECT rowid, flag_cache, unread_count_cache, entry_count_cache, pollfail, first_entry_cache FROM feeds ORDER BY UPPER(TITLE)')
930
#cache = self._c.fetchall()
931
self.set_setting(BOOL, "feed_cache_dirty", True)
932
self.cache_dirty=True
935
def insertURL(self, url, title=None):
936
#if a feed with that url doesn't already exists, add it
938
self._db_execute(self._c, """SELECT url FROM feeds WHERE url=?""",(url,))
939
#on success, fetch will return the url itself
940
if self._c.fetchone() != (url,):
941
if title is not None:
942
self._db_execute(self._c, u"""INSERT INTO feeds (title,url,pollfail,pollfreq,lastpoll,newatlast,flags,feed_pointer,image) VALUES (?, ?,0, 1800,0,0,0,-1,"")""", (title,url)) #default 30 minute polling
944
self._db_execute(self._c, u"""INSERT INTO feeds (title,url,pollfail,pollfreq,lastpoll,newatlast,flags,feed_pointer,image) VALUES (?, ?,0, 1800,0,0,0,-1,"")""", (url,url)) #default 30 minute polling
946
#self._db_execute(self._c, u"""SELECT rowid,url FROM feeds WHERE url=?""",(url,))
947
self._db_execute(self._c, "SELECT last_insert_rowid()")
948
feed_id = self._c.fetchone()[0]
949
d={ 'title':_("Waiting for first poll"),
950
'description':_("This feed has not yet been polled successfully. There might be an error with this feed.<br>"+str(title)),
952
self._db_execute(self._c, u'INSERT INTO entries (feed_id, title, creator, description, read, fakedate, date, guid, link, keep) VALUES (?, ?, NULL, ?, ?, 0, ?, ?, "http://", 0)',(feed_id,d['title'],d['description'],'0', int(time.time()), int(time.time())))
955
self._db_execute(self._c, """SELECT rowid FROM feeds WHERE url=?""",(url,))
956
feed_id = self._c.fetchone()
958
logging.info("db: feed already exists")
959
raise FeedAlreadyExists(feed_id)
963
def add_feed_filter(self, pointed_feed_id, filter_name, query):
964
#self._db_execute(self._c, u'SELECT rowid,feed_pointer,description FROM feeds WHERE feed_pointer=? AND description=?',(pointed_feed_id,query))
965
#result = self._c.fetchone()
969
#this is lame I know. We shouldn't ever get a collision here though!
970
s.update(filter_name+query+str(random.getrandbits(32)))
971
self._db_execute(self._c, u'INSERT INTO feeds (title,url,feed_pointer,description,pollfail,pollfreq,lastpoll,newatlast,flags) VALUES (?, ?,?,?, 0,21600,0,0,0)', (filter_name,s.hexdigest(),pointed_feed_id,query))
973
self._db_execute(self._c, "SELECT last_insert_rowid()")
974
return self._c.fetchone()[0]
976
# raise FeedAlreadyExists, result[0]
978
def set_feed_filter(self, pointer_feed_id, filter_name, query):
979
self._db_execute(self._c, u'SELECT feed_pointer FROM feeds WHERE rowid=?',(pointer_feed_id,))
980
pointed_id = self._c.fetchone()
981
if pointed_id is None:
982
raise NoFeed, pointer_feed_id
983
pointed_id = pointed_id[0]
984
self._db_execute(self._c, u'SELECT rowid FROM feeds WHERE feed_pointer=? AND description=?',(pointed_id,query))
985
#result = self._c.fetchone()
987
self._db_execute(self._c, u'UPDATE feeds SET title=?, description=? WHERE rowid=?',(filter_name, query, pointer_feed_id))
990
# raise FeedAlreadyExists, result[0]
992
def delete_feed(self, feed_id):
993
#check for valid entry
994
self._db_execute(self._c, """SELECT rowid FROM feeds WHERE rowid=?""",(feed_id,))
995
result = self._c.fetchone()[0]
997
if result != feed_id:
1000
#delete the feed, its entries, and its media (this does not delete files)
1001
self._db_execute(self._c, """DELETE FROM feeds WHERE rowid=?""",(feed_id,))
1002
self._reindex_feed_list.append(feed_id)
1003
self._db_execute(self._c, u'DELETE FROM tags WHERE feed_id=?',(feed_id,))
1005
#result = self._c.fetchone()
1008
self._icon_manager.remove_icon(feed_id)
1010
self._db_execute(self._c, 'SELECT rowid FROM entries WHERE feed_id=?',(feed_id,))
1011
data=self._c.fetchall()
1013
dataList = [list(row) for row in data]
1014
for datum in dataList:
1015
if self._image_cache is not None:
1016
self._image_cache.remove_cache(datum[0])
1017
self._db_execute(self._c, 'SELECT rowid FROM media WHERE entry_id=?',(datum[0],))
1018
media=self._c.fetchall()
1020
mediaList = [list(row) for row in media]
1021
for medium in mediaList:
1022
self.delete_media(int(medium[0]))
1024
self._db_execute(self._c, 'DELETE FROM media WHERE entry_id=?',(datum[0],))
1025
self._reindex_entry_list.append(datum[0])
1026
self._db_execute(self._c, """DELETE FROM entries WHERE feed_id=?""",(feed_id,))
1029
def delete_media(self, media_id):
1030
media = self.get_media(media_id)
1031
try: #if it doesn't even have a 'file' key then return
1032
if media['file']==None:
1037
if os.path.isfile(media['file']):
1038
os.remove(media['file'])
1039
elif os.path.isdir(media['file']): #could be a dir if it was a bittorrent download
1040
utils.deltree(media['file'])
1041
except os.error, detail:
1042
logging.error("Error deleting: "+str(detail))
1043
#but keep going in case the dirs are empty now
1045
#now check to see if we should get rid of the dated dir
1047
globlist = glob.glob(os.path.split(media['file'])[0]+"/*")
1048
if len(globlist)==1 and os.path.split(globlist[0])[1]=="playlist.m3u": #if only the playlist is left, we're done
1049
utils.deltree(os.path.split(media['file'])[0])
1050
if len(globlist)==0: #similarly, if dir is empty, we're done.
1051
utils.deltree(os.path.split(media['file'])[0])
1052
except os.error, detail:
1053
logging.error("Error deleting dirs: "+str(detail))
1054
#if everything worked, set status
1055
self.set_media_download_status(media_id,D_NOT_DOWNLOADED)
1057
def delete_bad(self):
1058
self._db_execute(self._c, """DELETE FROM feeds WHERE title IS NULL""")
1061
def poll_multiple(self, arguments=0, feeds=None):
1062
"""Polls multiple feeds multithreadedly"""
1064
cur_time = int(time.time())
1065
self._cancel_poll_multiple = False
1068
if arguments & A_AUTOTUNE and arguments & A_ALL_FEEDS == 0:
1069
self._db_execute(self._c, 'SELECT rowid FROM feeds WHERE (? - lastpoll) >= pollfreq ORDER BY pollfreq', (cur_time,))
1070
elif arguments & A_ERROR_FEEDS:
1071
self._db_execute(self._c, 'SELECT rowid FROM feeds WHERE pollfail=1 ORDER BY pollfreq')
1073
self._db_execute(self._c, 'SELECT rowid FROM feeds ORDER BY pollfreq')
1075
data=self._c.fetchall()
1077
feeds = [row[0] for row in data]
1079
self.polling_callback((-1, [], 0), False)
1082
#don't renice on hildon because we can't renice
1083
#back down to zero again
1084
#if not utils.RUNNING_HILDON:
1088
if utils.RUNNING_HILDON or utils.RUNNING_SUGAR:
1090
pool = ThreadPool.ThreadPool(threadcount,"ptvDB", lucene_compat = utils.HAS_LUCENE)
1091
self._parse_list = []
1093
if self._cancel_poll_multiple or self._exiting:
1095
self._db_execute(self._c, u'SELECT feed_pointer FROM feeds WHERE rowid=?',(feed,))
1096
result = self._c.fetchone()[0]
1098
self._parse_list.append((feed, arguments, len(feeds), -2))
1101
self._db_execute(self._c, """SELECT url,etag FROM feeds WHERE rowid=?""",(feed,))
1102
data = self._c.fetchone()
1103
pool.queueTask(self._pool_poll_feed,(feed,arguments,len(feeds), data),self._poll_mult_cb)
1107
#grow the cache while we do this operation
1108
#self._db_execute(self._c, 'PRAGMA cache_size=6000')
1109
while polled < len(feeds):
1110
if self._cancel_poll_multiple or self._exiting:
1112
if len(self._parse_list) > 0:
1114
feed_id, args, total, parsed = self._parse_list.pop(0)
1115
self.polling_callback(self._process_feed(feed_id, args, total, parsed))
1118
#self._db_execute(self._c, 'PRAGMA cache_size=2000')
1120
if self._cancel_poll_multiple:
1121
self._parse_list = []
1122
#pass dummy poll result, send cancel signal
1123
self.polling_callback((-1, [], total), True)
1124
else: # no need for manual join
1125
while pool.getTaskCount()>0: #manual joinAll so we can check for exit
1127
pool.joinAll(False, True)
1131
#if not utils.RUNNING_HILDON:
1135
pool.joinAll(False,True) #just to make sure I guess
1138
if not self._exiting:
1140
self._cancel_poll_multiple = False
1142
#if not utils.RUNNING_HILDON:
1146
def interrupt_poll_multiple(self):
1147
self._cancel_poll_multiple = True
1149
def _poll_mult_cb(self, args):
1150
feed_id, args, total, parsed = args
1151
self._parse_list.append((feed_id, args, total, parsed))
1153
def _pool_poll_feed(self, args):
1154
feed_id, arguments, total, data = args
1157
#save ram by not piling up polled data
1158
if utils.RUNNING_SUGAR or utils.RUNNING_HILDON:
1159
parse_list_limit = 10
1161
parse_list_limit = 50
1163
while len(self._parse_list) > parse_list_limit and not self._exiting:
1167
return (feed_id, arguments, total, -1)
1171
#feedparser.disableWellFormedCheck=1 #do we still need this? it used to cause crashes
1172
#speed up feedparser
1173
#must sanitize because some feeds have POPUPS!
1174
if utils.RUNNING_SUGAR:
1175
#feedparser._sanitizeHTML = lambda a, b: a
1176
feedparser._resolveRelativeURIs = lambda a, b, c: a
1177
if arguments & A_IGNORE_ETAG == A_IGNORE_ETAG:
1178
data = feedparser.parse(url)
1180
data = feedparser.parse(url,etag)
1181
return (feed_id, arguments, total, data)
1182
except Exception, e:
1183
logging.error(str(e))
1184
return (feed_id, arguments, total, -1)
1186
def _process_feed(self,feed_id, args, total, data, recurse=0):
1187
"""a wrapper function that returns the index along with the result
1188
so we can sort. Each poller needs its own db connection for locking reasons"""
1190
self._db_execute(self._c, u'SELECT lastpoll FROM feeds WHERE rowid=?', (feed_id,))
1191
last_poll_time = self._c.fetchone()[0]
1196
#poll_arguments = args[1]
1198
return (feed_id,{'ioerror':None, 'pollfail':False}, total)
1200
result, new_entryids, mod_entryids = self.poll_feed(feed_id, args | A_POOLED_POLL, preparsed=data)
1203
return (feed_id,{'ioerror':None, 'pollfail':False}, total)
1204
except sqlite.OperationalError, e:
1205
logging.warning("Database warning..." + str(e))
1208
logging.warning("trying again...")
1210
self._db=sqlite.connect(os.path.join(self.home,"penguintv4.db"), timeout=30, isolation_level="IMMEDIATE")
1211
self._c = self._db.cursor()
1212
return self._process_feed(feed_id, args, total, data, recurse+1) #and reconnect
1213
logging.warning("can't get lock, giving up")
1214
return (feed_id,{'pollfail':True}, total)
1215
except FeedPollError,e:
1216
#print "feed poll error",
1217
logging.warning(str(e))
1218
return (feed_id,{'pollfail':True}, total)
1221
logging.warning(str(e))
1222
#we got an ioerror, but we won't take it out on the feed
1223
return (feed_id,{'ioerror':e, 'pollfail':False}, total)
1225
logging.warning("other error polling feed:" + str(feed_id))
1226
exc_type, exc_value, exc_traceback = sys.exc_info()
1228
for s in traceback.format_exception(exc_type, exc_value, exc_traceback):
1230
logging.error(error_msg)
1231
return (feed_id,{'pollfail':True}, total)
1233
#assemble our handy dictionary while we're in a thread
1237
update_data['first_poll'] = last_poll_time == 0
1238
update_data['new_entries'] = result
1239
update_data['new_entryids'] = new_entryids
1240
update_data['mod_entryids'] = mod_entryids
1241
if self.is_feed_filter(feed_id):
1242
entries = self.get_entrylist(feed_id) #reinitialize filtered_entries dict
1243
update_data['unread_count'] = self.get_unread_count(feed_id)
1244
flag_list = self.get_entry_flags(feed_id)
1245
update_data['pollfail']=self.get_feed_poll_fail(self._resolve_pointed_feed(feed_id))
1247
self._db_execute(self._c, u'SELECT read FROM entries WHERE feed_id=?',(feed_id,))
1248
list = self._c.fetchall()
1249
update_data['unread_count'] = len([item for item in list if item[0]==0])
1250
update_data['entry_count'] = len(list)
1251
flag_list = self.get_entry_flags(feed_id)
1253
if len(self.get_pointer_feeds(feed_id)) > 0:
1254
logging.info("have pointers, reindexing now")
1257
update_data['flag_list']=flag_list
1258
update_data['pollfail']=False
1259
update_data['no_changes'] = False
1261
flag_list = self.get_entry_flags(feed_id)
1262
update_data['flag_list']=flag_list
1263
update_data['pollfail'] = False
1264
update_data['no_changes'] = True
1265
update_data['first_poll'] = False
1267
return (feed_id, update_data, total)
1269
def poll_feed_trap_errors(self, feed_id, callback):
1272
self._db_execute(self._c, "SELECT title,url FROM feeds WHERE rowid=?",(feed_id,))
1273
result = self._c.fetchone()
1274
feed['feed_id']=feed_id
1275
feed['url']=result[1]
1276
feed['new_entries'], feed['new_entryids'], feed['mod_entryids'] = \
1277
self.poll_feed(feed_id, A_IGNORE_ETAG+A_DO_REINDEX)
1278
callback(feed, True)
1279
except Exception, e:#FeedPollError,e:
1280
logging.warning(str(e))
1281
logging.warning("error polling feed:")
1282
exc_type, exc_value, exc_traceback = sys.exc_info()
1284
for s in traceback.format_exception(exc_type, exc_value, exc_traceback):
1286
logging.warning(error_msg)
1288
callback(feed, False)
1290
def _polling_callback(self, data):
1291
print "look a callback"
1294
def poll_feed(self, feed_id, arguments=0, preparsed=None):
1295
"""polls a feed and returns the number of new articles and a flag list. Optionally, one can pass
1296
a feedparser dictionary in the preparsed argument and avoid network operations"""
1298
def perform_feed_updates(updates, f_id):
1299
if not updates.has_key('pollfail'):
1300
updates['pollfail'] = 0
1301
#logging.debug("setting pollfail to %i for %i" % (updates['pollfail'], f_id))
1302
updated_fields = ", ".join(["%s=?" % k for k in updates.keys()])
1303
updated_values = tuple([updates[k] for k in updates.keys()])
1304
self._db_execute(self._c, u"""UPDATE feeds SET %s WHERE rowid=?""" % updated_fields, updated_values + (feed_id,))
1307
self._db_execute(self._c, u'SELECT feed_pointer, url, etag, image, title, link, flags, lastpoll, newatlast, pollfreq FROM feeds WHERE rowid=?', (feed_id,))
1308
result = self._c.fetchone()
1310
feed['feed_id'] = feed_id
1311
feed['feed_pointer'] = result[0]
1312
feed['url'] = result[1]
1313
feed['etag'] = result[2]
1314
feed['image'] = result[3]
1315
feed['title'] = result[4]
1316
feed['link'] = result[5]
1317
feed['flags'] = result[6]
1318
feed['last_time'] = result[7]
1319
feed['netatlast'] = result[8]
1320
feed['old_poll_freq'] = result[9]
1323
if preparsed is None:
1324
#feed_id = self._resolve_pointed_feed(feed_id)
1325
#self._db_execute(self._c, u'SELECT feed_pointer FROM feeds WHERE rowid=?',(feed_id,))
1326
#result =self._c.fetchone()
1328
if feed['feed_pointer'] >= 0:
1331
#self._db_execute(self._c, """SELECT url,etag FROM feeds WHERE rowid=?""",(feed_id,))
1332
#data = self._c.fetchone()
1335
#feedparser.disableWellFormedCheck=1 #do we still need this? it used to cause crashes
1337
#speed up feedparser
1338
if utils.RUNNING_SUGAR or utils.RUNNING_HILDON:
1339
#feedparser._sanitizeHTML = lambda a, b: a
1340
feedparser._resolveRelativeURIs = lambda a, b, c: a
1342
if arguments & A_IGNORE_ETAG == A_IGNORE_ETAG:
1343
data = feedparser.parse(feed['url'])
1345
data = feedparser.parse(feed['url'], feed['etag'])
1346
except Exception, e:
1348
if arguments & A_AUTOTUNE == A_AUTOTUNE:
1349
feed_updates = self._set_new_update_freq(feed, 0)
1350
logging.warning("feedparser exception: %s" % str(e))
1351
feed_updates['pollfail'] = 1
1352
#self._db_execute(self._c, """UPDATE feeds SET pollfail=1 WHERE rowid=?""",(feed_id,))
1354
perform_feed_updates(feed_updates, feed_id)
1355
logging.warning(str(e))
1356
raise FeedPollError,(feed_id,"feedparser blew a gasket")
1360
if arguments & A_AUTOTUNE == A_AUTOTUNE:
1361
feed_updates = self._set_new_update_freq(feed, 0)
1362
logging.warning("bad preparsed")
1363
feed_updates['pollfail'] = 1
1364
#self._db_execute(self._c, """UPDATE feeds SET pollfail=1 WHERE rowid=?""",(feed_id,))
1366
perform_feed_updates(feed_updates, feed_id)
1367
raise FeedPollError,(feed_id,"feedparser blew a gasket")
1368
elif preparsed == -2:
1369
#print "pointer feed, returning 0"
1372
#print "data is good"
1373
#need to get a url from somewhere
1376
url = data['feed']['title_detail']['base']
1380
if data.has_key('status'):
1381
if data['status'] == 304: #this means "nothing has changed"
1383
if arguments & A_AUTOTUNE == A_AUTOTUNE:
1384
feed_updates = self._set_new_update_freq(feed, 0)
1385
#feed_updates['pollfail'] = 1
1386
#self._db_execute(self._c, """UPDATE feeds SET pollfail=1 WHERE rowid=?""",(feed_id,))
1388
perform_feed_updates(feed_updates, feed_id)
1390
if data['status'] == 404: #whoops
1392
if arguments & A_AUTOTUNE == A_AUTOTUNE:
1393
feed_updates = self._set_new_update_freq(feed, 0)
1394
feed_updates['pollfail'] = 1
1395
#self._db_execute(self._c, """UPDATE feeds SET pollfail=1 WHERE rowid=?""",(feed_id,))
1397
perform_feed_updates(feed_updates, feed_id)
1398
raise FeedPollError,(feed_id,"404 not found: "+str(url))
1400
if len(data['feed']) == 0 or len(data['items']) == 0:
1402
if data.has_key('bozo_exception'):
1403
if isinstance(data['bozo_exception'], URLError):
1404
e = data['bozo_exception'][0]
1405
#logging.debug(str(e))
1407
if errno in (#-2, # Name or service not known
1408
-3, #failure in name resolution
1409
101, #Network is unreachable
1410
114, #Operation already in progress
1411
11): #Resource temporarily unavailable
1413
elif errno == -2: #could be no site, could be no internet
1415
#this really should work, right?
1416
#fixme: let's find a real way to test internet, hm?
1417
u = urllib.urlretrieve("http://www.google.com")
1421
if arguments & A_AUTOTUNE == A_AUTOTUNE:
1422
feed_updates = self._set_new_update_freq(feed, 0)
1423
feed_updates['pollfail'] = 1
1424
#self._db_execute(self._c, """UPDATE feeds SET pollfail=1 WHERE rowid=?""",(feed_id,))
1426
perform_feed_updates(feed_updates, feed_id)
1427
#logging.debug("empty: %s" % str(data))
1428
raise FeedPollError,(feed_id,"empty feed")
1434
#see if we need to get an image
1435
if not self._icon_manager.icon_exists(feed_id):
1436
href = self._icon_manager.download_icon(feed_id, data)
1437
if href is not None:
1438
#self._db_execute(self._c, u"""UPDATE feeds SET image=? WHERE rowid=?""",(href,feed_id))
1439
feed_updates['image'] = href
1441
#self._db_execute(self._c, u"""SELECT image FROM feeds WHERE rowid=?""",(feed_id,))
1442
#try: old_href = self._c.fetchone()[0]
1443
#except: old_href = ""
1445
if not self._icon_manager.is_icon_up_to_date(feed_id, feed['image'], data):
1446
self._icon_manager.remove_icon(feed_id)
1447
href = self._icon_manager.download_icon(feed_id, data)
1448
if href is not None:
1449
#self._db_execute(self._c, u"""UPDATE feeds SET image=? WHERE rowid=?""",(href,feed_id))
1450
feed_updates['image'] = href
1452
if arguments & A_DELETE_ENTRIES == A_DELETE_ENTRIES:
1453
logging.info("deleting existing entries" + str(feed_id) + str(arguments))
1454
self._db_execute(self._c, """DELETE FROM entries WHERE feed_id=?""",(feed_id,))
1456
#to discover the old entries, first we mark everything as old
1457
#later, we well unset this flag for everything that is NEW,
1458
#MODIFIED, and EXISTS. anything still flagged should be deleted
1459
#self._db_execute(self._c, """UPDATE entries SET old=1 WHERE feed_id=?""",(feed_id,))
1460
feed_updates['pollfail'] = 0
1461
#self._db_execute(self._c, """UPDATE feeds SET pollfail=0 WHERE rowid=?""",(feed_id,))
1464
channel = data['feed']
1465
if channel.has_key('description') == 0:
1466
channel['description']=""
1467
if len(channel['description']) > 128:
1468
channel['description'] = channel['description'][0:127]
1469
channel['description']=self._encode_text(channel['description'])
1470
if channel.has_key('title') == 0:
1471
if channel['description'] != "":
1472
channel['title']=channel['description']
1474
channel['title']=url
1475
channel['title'] = self._encode_text(channel['title'])
1477
#print channel['title']
1479
if not data.has_key('etag'):
1481
#if not data.has_key('modified'):
1484
# modified = int(time.mktime(data['modified']))
1487
#self._db_execute(self._c, u'SELECT title FROM feeds WHERE rowid=?',(feed_id,))
1488
#exists=self._c.fetchone()
1490
if len(feed['title'])>4:
1491
#self._db_execute(self._c, """UPDATE feeds SET description=?, modified=?, etag=? WHERE rowid=?""", (channel['description'], modified,data['etag'],feed_id))
1492
if feed['title'][0:4] == "http": #hack to detect when the title hasn't been set yet because of first poll
1493
feed_updates['title'] = channel['title']
1494
#self._db_execute(self._c, """UPDATE feeds SET title=?, description=?, modified=?, etag=? WHERE rowid=?""", (channel['title'],channel['description'], modified,data['etag'],feed_id))
1495
elif len(feed['title'])>0: #don't change title
1496
#self._db_execute(self._c, """UPDATE feeds SET description=?, modified=?, etag=? WHERE rowid=?""", (channel['description'], modified,data['etag'],feed_id))
1497
if feed['title'] is None:
1498
feed_updates['title'] = channel['title']
1499
#self._db_execute(self._c, """UPDATE feeds SET title=?, description=?, modified=?, etag=? WHERE rowid=?""", (channel['title'],channel['description'], modified,data['etag'],feed_id))
1501
feed_updates['title'] = channel['title']
1502
feed_updates['description'] = channel['description']
1503
feed_updates['etag'] = data['etag']
1504
#self._db_execute(self._c, """UPDATE feeds SET title=?, description=?, etag=? WHERE rowid=?""", (channel['title'],channel['description'], data['etag'],feed_id))
1505
self._reindex_feed_list.append(feed_id)
1507
feed_updates['description'] = channel['description']
1508
feed_updates['etag'] = data['etag']
1509
except Exception, e:
1510
logging.warning(str(e))
1511
feed_updates['pollfail'] = 1
1512
#self._db_execute(self._c, """UPDATE feeds SET pollfail=1 WHERE rowid=?""",(feed_id,))
1513
perform_feed_updates(feed_updates, feed_id)
1514
raise FeedPollError,(feed_id,"error updating title and description of feed")
1516
#self._db_execute(self._c, u'SELECT link FROM feeds WHERE rowid=?',(feed_id,))
1517
#link = self._c.fetchone()
1518
#if link is not None:
1520
#if there was no result, or result is None, it's blank
1522
if feed['link'] is None:
1524
if feed['link'] == "" and data['feed'].has_key('link'):
1525
feed_updates['link'] = data['feed']['link']
1526
#self._db_execute(self._c, u'UPDATE feeds SET link=? WHERE rowid=?',(data['feed']['link'],feed_id))
1529
#populate the entries
1530
#only look as far back as 1000% for existing entries
1531
#existing_limit = int(len(data['items']) * 10)
1532
#print "only checking", existing_limit
1533
self._db_execute(self._c,
1534
#"""SELECT rowid,guid,link,title,description FROM entries WHERE feed_id=? ORDER BY fakedate DESC LIMIT %i""" % existing_limit,
1535
"""SELECT rowid,guid,link,title,description,hash FROM entries WHERE feed_id=? ORDER BY fakedate DESC""",
1537
existing_entries = self._c.fetchall()
1538
#logging.debug("existing entries: %i" % len(existing_entries))
1539
#print "got", len(existing_entries)
1541
#only use GUID if there are no dupes -- thanks peter's feed >-(
1543
if len(existing_entries) > 0:
1544
guids = [e[1] for e in existing_entries]
1546
if len(guids[0]) > 2: #too short to be valuable
1549
for g in guids[1:50]: #up to first 50 is fine
1553
guid_quality = 1 - (dupe_count / len(existing_entries))
1555
#we can't trust the dates inside the items for timing data.
1556
#Bad formats, no dates at all, and timezones screw things up
1557
#so I introduce a fake date which works for determining read and
1558
#unread article counts, and keeps the articles in order
1559
fake_time = int(time.time())
1569
default_read = int(feed['flags'] & FF_MARKASREAD == FF_MARKASREAD)
1571
self._db_execute(self._c, u"""SELECT entry_id FROM media WHERE feed_id=?""", (feed_id,))
1572
media_entries = self._c.fetchall()
1573
if media_entries is None:
1576
media_entries = [r[0] for r in media_entries]
1578
#logging.debug("feed has %i items" % len(data['items']))
1579
for item in data['items']:
1580
#do a lot of normalizing
1582
possible_bodies = []
1583
#right now we look in the following places for the body, and take the longest one:
1584
#content, description, summary, summary_detail
1585
if item.has_key('content'): #ok so peter was right,
1586
possible_bodies.append(item['content'][0]['value'])
1587
if item.has_key('description'): #content_encoded is where we should be
1588
possible_bodies.append(item['description'])
1589
if item.has_key('summary'): #or the summary
1590
possible_bodies.append(item['summary'])
1591
if item.has_key('summary_detail'):
1592
possible_bodies.append(item['summary_detail']['value'])
1594
if len(possible_bodies):
1595
possible_bodies.sort(lambda x,y: len(y)-len(x))
1596
item['body'] = possible_bodies[0]
1599
item['body']=self._encode_text(item['body'])
1601
if item['body'].count('<') > 5: #probably encoded body
1602
item['body'] = utils.html_entity_unfixer(item['body'])
1604
if item.has_key('title') == 0:
1606
if item['title']=="":
1607
item['title']=item['description'][0:35]
1608
html_begin = string.find(item['title'],'<')
1609
if html_begin >= 0 and html_begin < 5: #in case it _begins_ with html, and the html is really early
1610
#p = utils.StrippingParser()
1611
#p.feed(item['description'])
1614
#item['title']=p.result[0:35]
1615
desc = item['description']
1616
#hack for hullabaloo
1617
desc = desc[:desc.find("<br")]
1618
item['title'] = STRIPPER_REGEX.sub('', desc)[:35]
1619
elif html_begin > 5: #in case there's html within 35 chars...
1620
item['title']=item['title'][0:html_begin-1] #strip
1621
#things mess up if a title ends in a space, so strip trailing spaces
1623
if len(item['title'])==0:
1624
item['title']='untitled'
1626
item['title'] = item['title'].strip()
1629
#p = utils.StrippingParser()
1630
#p.feed(item['title'])
1633
#item['title'] = p.result
1634
item['title'] = STRIPPER_REGEX.sub('', item['title'])
1638
#let actual entities through, but correct unadorned &s.
1639
#thanks to http://www.regular-expressions.info/repeat.html#greedy
1641
#I wrote: &.+?; which didn't work (matched widest results-- see reference)
1642
m = re.compile('&[^;]+;').search(item['title'])
1643
if m is not None: #entity found
1645
if span[1]-span[0] > 10: #unlikely to be an entity
1646
item['title'] = re.sub('&','&',item['title'])
1649
item['title'] = re.sub('&','&',item['title'])
1651
if type(item['body']) is str:
1652
item['body'] = unicode(item['body'],'utf-8')
1653
for uni in _common_unicode.keys():
1654
item['body'] = item['body'].replace(uni, _common_unicode[uni])
1656
item['title'] = self._encode_text(item['title'])
1657
for uni in _common_unicode.keys():
1658
item['title'] = item['title'].replace(uni, _common_unicode[uni])
1660
if item.has_key('creator') == 0:
1662
if item.has_key('author') == 1:
1663
item['creator']=item['author']
1664
if item.has_key('guid') == 0:
1667
if item.has_key('link') == 0:
1670
item['creator']=self._encode_text(item['creator'])
1672
#blow away date_parsed with more recent times
1673
if item.has_key('updated_parsed'):
1674
item['date_parsed'] = item['updated_parsed']
1675
elif item.has_key('modified_parsed'):
1676
item['date_parsed'] = item['modified_parsed']
1677
elif item.has_key('created_parsed'):
1678
item['date_parsed'] = item['created_parsed']
1679
elif item.has_key('update_parsed'):
1680
item['date_parsed'] = item['update_parsed']
1682
if not item.has_key('date_parsed') or item['date_parsed'] is None:
1683
item['date_parsed']=time.localtime()
1685
entry_hash = self._get_hash(item['guid'], item['title'], item['body'])
1686
status = self._get_status(item, entry_hash, existing_entries, guid_quality, media_entries)
1690
self._db_execute(self._c, u'INSERT INTO entries (feed_id, title, creator, description, read, fakedate, date, guid, link, keep, hash) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, 0, ?)',
1691
(feed_id,item['title'],item['creator'],item['body'],
1692
default_read,fake_time-i,
1693
int(time.mktime(item['date_parsed'])),
1694
item['guid'],item['link'], entry_hash))
1695
self._db_execute(self._c, "SELECT last_insert_rowid()")
1696
entry_id = self._c.fetchone()[0]
1697
if item.has_key('enclosures'):
1698
for media in item['enclosures']:
1699
media.setdefault('length', 0)
1700
media.setdefault('type', 'application/octet-stream')
1701
self._db_execute(self._c, u"""INSERT INTO media (entry_id, url, mimetype, download_status, viewed, keep, length, feed_id) VALUES (?, ?, ?, ?, ?, ?, ?, ?)""", (entry_id, media['url'], media['type'], D_NOT_DOWNLOADED, default_read, 0, media['length'], feed_id))
1703
self._reindex_entry_list.append(entry_id)
1704
self._image_cache_list.append(entry_id)
1705
no_delete.append(entry_id)
1706
new_entryids.append(entry_id)
1707
elif status[0]==EXISTS:
1708
entry_id = status[1]
1709
no_delete.append(entry_id)
1710
elif status[0]==MODIFIED:
1711
entry_id = status[1]
1712
self._db_execute(self._c, u'UPDATE entries SET title=?, creator=?, description=?, date=?, guid=?, link=?, hash=? WHERE rowid=?',
1713
(item['title'],item['creator'],item['body'],
1714
int(time.mktime(item['date_parsed'])),item['guid'],
1715
item['link'], entry_hash, entry_id))
1716
if self.entry_flag_cache.has_key(entry_id): del self.entry_flag_cache[entry_id]
1717
if item.has_key('enclosures'):
1718
#self._db_execute(self._c, u'SELECT url FROM media WHERE entry_id=? AND (download_status=? OR download_status=?)',
1719
# (entry_id,D_NOT_DOWNLOADED,D_ERROR))
1720
self._db_execute(self._c, u'SELECT url FROM media WHERE entry_id=?', (entry_id,))
1721
db_enc = self._c.fetchall()
1722
db_enc = [c_i[0] for c_i in db_enc]
1723
f_enc = [f_i['url'] for f_i in item['enclosures']]
1725
db_set = set(db_enc)
1728
removed = list(db_set.difference(f_set))
1729
added = list(f_set.difference(db_set))
1732
qmarks = "?,"*(len(removed)-1)+"?"
1733
self._db_execute(self._c, u'DELETE FROM media WHERE url IN ('+qmarks+') AND (download_status=? OR download_status=?)', tuple(removed)+(D_NOT_DOWNLOADED,D_ERROR))
1735
#need to delete media that isn't in enclosures only and is not downloaded
1736
#need to add media that's in enclosures but not in db after that process
1739
for media in item['enclosures']: #add the rest
1740
if media['url'] in added:
1741
#if dburl[0] != media['url']: #only add if that url doesn't exist
1742
media.setdefault('length', 0)
1743
media.setdefault('type', 'application/octet-stream')
1744
self._db_execute(self._c, u"""INSERT INTO media (entry_id, url, mimetype, download_status, viewed, keep, length, download_date, feed_id) VALUES (?, ?, ?, ?, ?, ?, ?, 0, ?)""", (entry_id, media['url'], media['type'], D_NOT_DOWNLOADED, default_read, 0, media['length'], feed_id))
1745
self._db_execute(self._c, u'UPDATE entries SET read=0 WHERE rowid=?', (entry_id,))
1747
self._reindex_entry_list.append(entry_id)
1748
self._image_cache_list.append(entry_id)
1749
no_delete.append(entry_id)
1750
mod_entryids.append(entry_id)
1753
#don't call anything old that has media...
1754
self._db_execute(self._c, """SELECT entry_id FROM media WHERE download_status>0 AND feed_id=?""",(feed_id,))
1755
result = self._c.fetchall()
1757
#combine with EXISTing entries
1758
no_delete += [r[0] for r in result]
1760
# anything not set above as new, mod, or exists is no longer in
1761
# the xml and therefore could be deleted if we have more articles than
1764
self._db_execute(self._c, """SELECT count(*) FROM entries WHERE feed_id=?""",(feed_id,))
1765
all_entries = self._c.fetchone()[0]
1767
nokeepdeleted = int(feed['flags'] & FF_NOKEEPDELETED == FF_NOKEEPDELETED)
1769
if len(no_delete) > 0:
1770
qmarks = "?,"*(len(no_delete)-1)+"?"
1771
self._db_execute(self._c,
1772
"""DELETE FROM entries WHERE rowid NOT IN (%s) AND keep=0 AND feed_id=?""" % qmarks,
1773
tuple(no_delete) + (feed_id,))
1774
ditchables = self._c.fetchall()
1776
self._db_execute(self._c,
1777
"""DELETE FROM entries WHERE keep=0 AND feed_id=?""",
1779
ditchables = self._c.fetchall()
1780
elif MAX_ARTICLES > 0:
1781
if all_entries > MAX_ARTICLES:
1782
if len(no_delete) > 0:
1783
qmarks = "?,"*(len(no_delete)-1)+"?"
1784
self._db_execute(self._c, """SELECT rowid FROM entries WHERE rowid NOT IN (%s) AND keep=0 AND feed_id=? ORDER BY fakedate LIMIT ?""" % qmarks,
1785
tuple(no_delete) + (feed_id, all_entries - MAX_ARTICLES))
1786
ditchables = self._c.fetchall()
1788
self._db_execute(self._c, """SELECT rowid FROM entries WHERE keep=0 AND feed_id=? ORDER BY fakedate LIMIT ?""",
1789
(feed_id, all_entries - MAX_ARTICLES))
1790
ditchables = self._c.fetchall()
1792
if ditchables is not None:
1793
if len(ditchables) > 0:
1794
ditchables = tuple([r[0] for r in ditchables])
1795
qmarks = "?,"*(len(ditchables)-1)+"?"
1796
self._db_execute(self._c, """DELETE FROM entries WHERE rowid IN (%s)""" % qmarks, ditchables)
1797
for e_id in ditchables:
1798
self._image_uncache_list.append(e_id)
1800
#delete pre-poll entry
1801
if feed['last_time'] == 0:
1802
self._db_execute(self._c, "DELETE FROM entries WHERE fakedate=0 AND feed_id=?",(feed_id,))
1804
if arguments & A_AUTOTUNE == A_AUTOTUNE:
1805
result = self._set_new_update_freq(feed, new_items)
1806
feed_updates.update(result)
1808
cur_time = int(time.time())
1809
feed_updates['lastpoll'] = cur_time
1810
#self._db_execute(self._c, u'UPDATE feeds SET lastpoll=? WHERE rowid=?',(cur_time,feed_id))
1812
perform_feed_updates(feed_updates, feed_id)
1814
if arguments & A_POOLED_POLL == 0:
1815
if arguments & A_DO_REINDEX:
1819
return (new_items, new_entryids, mod_entryids)
1821
def _set_new_update_freq(self, feed, new_items):
1822
"""Based on previous feed history and number of items found, adjust
1823
the polling frequency. The goal is one item per poll.
1824
Right now the algorithm is:
1826
find new items per poll period.
1828
if it's zero (didn't find anything):
1829
increase the poll frequency by ratio of average polltime to our previous frequency
1831
set poll freq to now-last_poll / new_items_per_poll_period
1832
(ie if we got 1.5 items this past period, set poll freq to old_freq/1.5
1836
updates are never more often than 30 mins and never rarer than 4 hours
1841
#should never be called on a filtered feed
1843
cur_time = int(time.time())
1844
#this could suck if the program was just started, so only do it if the poll_freq seems correct
1845
#however still update the db with the poll time
1846
feed_updates['lastpoll'] = cur_time
1847
feed_updates['newatlast'] = new_items
1848
if cur_time - feed['last_time'] < feed['old_poll_freq']/2: #too soon to get a good reading.
1852
new_items = round(new_items * feed['old_poll_freq'] / (cur_time- feed['last_time']))
1855
#figure out the average time between article postings
1856
#this algorithm seems to be the most accurate based on my own personal judgment
1857
self._db_execute(self._c, 'SELECT date FROM entries WHERE feed_id=?',(feed['feed_id'],))
1858
datelist = self._c.fetchall()
1859
datelist.append((int(time.time()),)) #helps in some cases to pretend we found one now
1862
for item in datelist[:-1]:
1863
diff=abs(datelist[i+1][0]-datelist[i][0])
1867
avg = sum(list)/len(list)
1870
#increase the poll frequency by ratio of average polltime to our previous frequency
1871
modifier = avg / feed['old_poll_freq']
1872
poll_freq = round(feed['old_poll_freq'] + modifier*60)
1874
poll_freq = floor((cur_time - feed['last_time']) / new_items)
1878
if poll_freq > 21600: #four hours
1880
if poll_freq < 1800: #30 mins
1883
feed_updates['pollfreq'] = poll_freq
1886
def _get_status(self, item, new_hash, existing_entries, guid_quality, media_entries):
1887
"""returns status, the entry_id of the matching entry (if any), and the media list if unmodified"""
1897
t_item = {'guid': item['guid'],
1898
'body': item['body'],
1899
'link': item['link'],
1900
'title': item['title']}
1903
for entry_item in existing_entries:
1904
if guid_quality > 0.7:
1905
if str(entry_item[GUID]) == str(t_item['guid']):
1906
entry_id = entry_item[ID]
1907
old_hash = entry_item[HASH]
1908
#logging.debug("found match at %i (%f)" % (debug_i, debug_i / float(len(existing_entries))))
1910
elif guid_quality > 0.1:
1911
if str(entry_item[GUID]) == str(t_item['guid']):
1912
if entry_item[TITLE] == t_item['title']:
1913
entry_id = entry_item[ID]
1914
old_hash = entry_item[HASH]
1915
#logging.debug("found match at %i (%f)" % (debug_i, debug_i / float(len(existing_entries))))
1917
elif t_item['link'] != '':
1918
if entry_item[LINK] == t_item['link']:
1919
if entry_item[TITLE] == t_item['title']:
1920
entry_id = entry_item[ID]
1921
old_hash = entry_item[HASH]
1922
#logging.debug("found match at %i (%f)" % (debug_i, debug_i / float(len(existing_entries))))
1924
elif entry_item[BODY] == t_item['body']:
1925
entry_id = entry_item[ID]
1926
old_hash = entry_item[HASH]
1927
#logging.debug("found match at %i (%f)" % (debug_i, debug_i / float(len(existing_entries))))
1929
elif entry_item[TITLE] == t_item['title']:
1930
entry_id = entry_item[ID]
1931
old_hash = entry_item[HASH]
1932
#logging.debug("found match at %i (%f)" % (debug_i, debug_i / float(len(existing_entries))))
1934
elif entry_item[BODY] == t_item['body']:
1935
entry_id = entry_item[ID]
1936
old_hash = entry_item[HASH]
1937
#logging.debug("found match at %i (%f)" % (debug_i, debug_i / float(len(existing_entries))))
1942
return (NEW, -1, [])
1944
if new_hash == old_hash:
1945
#now check enclosures
1946
if entry_id not in media_entries:
1949
old_media = self.get_entry_media(entry_id)
1951
#if they are both zero, return
1952
if len(old_media) == 0 and item.has_key('enclosures') == False:
1953
return (EXISTS,entry_id, [])
1955
if item.has_key('enclosures'):
1956
#if lengths are different, return
1957
if len(old_media) != len(item['enclosures']):
1958
return (MODIFIED,entry_id, [])
1960
#if we had some, and now don't, return
1961
if len(old_media)>0:
1962
return (MODIFIED,entry_id, [])
1964
#we have two lists of the same, non-zero length
1965
#only now do we do the loops and sorts -- we need to test individual items
1967
existing_media = old_media
1969
old_media = [urlparse.urlparse(medium['url'])[:3] for medium in old_media]
1970
new_media = [urlparse.urlparse(m['url'])[:3] for m in item['enclosures']]
1972
old_media = utils.uniquer(old_media)
1974
new_media = utils.uniquer(new_media)
1977
if old_media != new_media:
1978
return (MODIFIED,entry_id,[])
1979
return (EXISTS,entry_id, existing_media)
1981
#logging.debug("entry is modified")
1982
return (MODIFIED,entry_id, [])
1984
def get_entry_media(self, entry_id):
1985
self._db_execute(self._c, """SELECT rowid,entry_id,url,file,download_status,viewed,length,mimetype FROM media WHERE entry_id = ? ORDER BY entry_id DESC""",(entry_id,))
1986
dataList=self._c.fetchall()
1988
if dataList is None:
1992
for datum in dataList:
1994
medium['url']=datum[2] #MAGIC
1995
medium['download_status']=int(datum[4]) #MAGIC
1997
medium['size']=int(datum[6]) #MAGIC
2000
medium['media_id']=int(datum[0]) #MAGIC
2001
medium['file']=datum[3] #MAGIC
2002
medium['entry_id']=datum[1] #MAGIC
2003
medium['viewed']=int(datum[5]) #MAGIC
2004
medium['mimetype']=datum[7] #MAGIC
2005
media_list.append(medium)
2008
def get_entry_media_block(self, entry_list):
2009
if len(entry_list) == 0:
2011
qmarks = "?,"*(len(entry_list)-1)+"?"
2013
self._db_execute(self._c, """SELECT rowid,entry_id,url,file,download_status,viewed,length,mimetype FROM media WHERE entry_id in ("""+qmarks+')',tuple(entry_list))
2014
result = self._c.fetchall()
2018
for datum in result:
2020
medium['url']=datum[2] #MAGIC
2021
medium['download_status']=int(datum[4]) #MAGIC
2023
medium['size']=int(datum[6]) #MAGIC
2026
medium['media_id']=int(datum[0]) #MAGIC
2027
medium['file']=datum[3] #MAGIC
2028
medium['entry_id']=datum[1] #MAGIC
2029
medium['viewed']=int(datum[5]) #MAGIC
2030
medium['mimetype']=datum[7] #MAGIC
2032
if not media_dict.has_key(medium['entry_id']):
2033
media_dict[medium['entry_id']] = [medium]
2035
media_dict[medium['entry_id']].append(medium)
2039
def get_media(self, media_id):
2040
self._db_execute(self._c, u'SELECT url, download_status, length, file, entry_id, viewed, mimetype, feed_id FROM media WHERE rowid=?',(media_id,))
2041
datum=self._c.fetchone()
2045
medium['url']=datum[0] #MAGIC
2046
medium['download_status']=int(datum[1]) #MAGIC
2048
medium['size']=int(datum[2]) #MAGIC
2051
medium['media_id']=media_id
2052
medium['file']=datum[3] #MAGIC
2053
medium['entry_id']=datum[4] #MAGIC
2054
medium['viewed']=int(datum[5]) #MAGIC
2055
medium['mimetype']=datum[6] #MAGIC
2056
medium['feed_id']=datum[7] #MAGIC
2059
def get_feed_media_count(self, feed_id):
2060
self._db_execute(self._c, u'SELECT count(*) FROM media WHERE feed_id=?',(feed_id,))
2061
return self._c.fetchone()[0]
2063
def get_entry(self, entry_id, ajax_url=None):
2064
self._db_execute(self._c, """SELECT title, creator, link, description, feed_id, date, read, keep, guid, hash FROM entries WHERE rowid=? LIMIT 1""",(entry_id,))
2065
result = self._c.fetchone()
2069
entry_dic['title'] = result[0]
2070
entry_dic['creator'] = result[1]
2071
entry_dic['link'] = result[2]
2072
entry_dic['description']=result[3]
2073
entry_dic['feed_id']= result[4]
2074
entry_dic['date'] = result[5]
2075
entry_dic['read'] = result[6]
2076
entry_dic['keep'] = result[7]
2077
entry_dic['guid'] = result[8]
2078
entry_dic['hash'] = result[9]
2079
entry_dic['entry_id'] = entry_id
2080
except TypeError: #this error occurs when feed or item is wrong
2081
raise NoEntry, entry_id
2083
if self._image_cache is not None:
2084
entry_dic['description'] = self._image_cache.rewrite_html(str(entry_id), entry_dic['description'], ajax_url)
2088
def get_entry_block(self, entry_list, ajax_url=None):
2089
if len(entry_list) == 0:
2091
qmarks = "?,"*(len(entry_list)-1)+"?"
2092
self._db_execute(self._c, u'SELECT title, creator, link, description, feed_id, date, read, rowid, keep, guid, hash FROM entries WHERE rowid in ('+qmarks+')', (tuple(entry_list)))
2093
result = self._c.fetchall()
2097
for entry in result:
2099
entry_dic['title'] = entry[0]
2100
entry_dic['creator'] = entry[1]
2101
entry_dic['link'] = entry[2]
2102
entry_dic['description']=entry[3]
2103
entry_dic['feed_id']= entry[4]
2104
entry_dic['date'] = entry[5]
2105
entry_dic['read'] = entry[6]
2106
entry_dic['entry_id'] = entry[7]
2107
entry_dic['keep'] = entry[8]
2108
entry_dic['guid'] = entry[9]
2109
entry_dic['hash'] = entry[10]
2111
if self._image_cache is not None:
2112
entry_dic['description'] = self._image_cache.rewrite_html(str(entry_dic['entry_id']), entry_dic['description'], ajax_url)
2114
retval.append(entry_dic)
2117
def get_entries_since(self, timestamp):
2118
self._db_execute(self._c, u'SELECT feed_id, rowid, hash, read FROM entries WHERE fakedate > ?', (timestamp,))
2119
result = self._c.fetchall()
2125
def get_kept_entries(self, feed_id):
2126
self._db_execute(self._c, u'SELECT rowid FROM entries WHERE keep=1 AND feed_id=?', (feed_id,))
2127
result = self._c.fetchall()
2131
return [r[0] for r in result]
2133
def get_filtered_entries(self, feed_index):
2134
"""Assumes this is a feed pointer"""
2135
self._db_execute(self._c, u'SELECT feed_pointer,description FROM feeds WHERE rowid=?',(feed_index,))
2136
result = self._c.fetchone()
2138
self._filtered_entries[feed_index] = []
2141
pointed_feed = result[0]
2142
#this is where we perform a search
2143
s_entries = self.search(result[1],pointed_feed)[1]
2144
if len(s_entries)==0:
2145
self._filtered_entries[feed_index] = []
2147
s_entries.sort(lambda x,y: int(y[2] - x[2]))
2150
for entry_id,title, fakedate, feed_id in s_entries:
2151
self._db_execute(self._c, """SELECT read FROM entries WHERE rowid=? LIMIT 1""",(entry_id,))
2153
readinfo = self._c.fetchone()[0]
2155
logging.info("error in search results, reindexing")
2158
entries.append([entry_id, title, fakedate, readinfo, feed_id])
2159
self._filtered_entries[feed_index] = entries
2162
logging.error("programming error: tried to get filter information from non-filter feed")
2165
def get_entrylist(self, feed_index):
2166
if self.is_feed_filter(feed_index):
2167
return self.get_filtered_entries(feed_index)
2169
self._db_execute(self._c, """SELECT rowid,title,fakedate,read,feed_id FROM entries WHERE feed_id=? ORDER BY fakedate DESC""",(feed_index,))
2170
result = self._c.fetchall()
2173
raise NoFeed, feed_index
2176
def get_first_entry_title(self, feed_id, strip_newlines=False):
2177
"""returns title of first entry"""
2178
if self.is_feed_filter(feed_id):
2179
if not self._filtered_entries.has_key(feed_id):
2180
self.get_filtered_entries(feed_id)
2181
for entry in self._filtered_entries[feed_id]:
2182
entry_id, title, fakedate, read, f_id = entry
2185
return title.replace("\n"," ")
2187
if len(self._filtered_entries[feed_id]) == 0:
2190
return self._filtered_entries[feed_id][0][1].replace("\n"," ")
2191
return self._filtered_entries[feed_id][0][1]
2193
self._db_execute(self._c, """SELECT title FROM entries WHERE feed_id=? ORDER BY fakedate DESC LIMIT 1""",(feed_id,))
2194
result = self._c.fetchone()
2197
raise NoFeed, feed_id
2200
return result[0].replace("\n"," ")
2203
def get_entry_count(self, feed_id):
2204
self._db_execute(self._c, u'SELECT count(*) FROM entries WHERE feed_id=?', (feed_id,))
2205
return self._c.fetchone()[0]
2207
def get_feedlist(self):
2208
self._db_execute(self._c, """SELECT rowid,title,url FROM feeds ORDER BY UPPER(title)""")
2209
result = self._c.fetchall()
2212
dataList = [list(row) for row in result]
2217
def get_feed_id_by_url(self, url):
2218
self._db_execute(self._c, """SELECT rowid FROM feeds WHERE url=?""",(url,))
2220
result = self._c.fetchone()[0]
2226
def get_feed_title(self, feed_index):
2227
self._db_execute(self._c, """SELECT title FROM feeds WHERE rowid=?""",(feed_index,))
2229
result = self._c.fetchone()[0]
2231
raise NoFeed, feed_index
2233
#don't return a tuple
2234
return result #self.decode_text(result)
2236
def get_feed_image(self, feed_id):
2237
self._db_execute(self._c, u'SELECT image FROM feeds WHERE rowid=?', (feed_id,))
2238
try: return self._c.fetchone()[0]
2241
def get_feed_info(self, feed_id):
2242
self._db_execute(self._c, """SELECT title, description, url, link, feed_pointer, lastpoll, pollfreq FROM feeds WHERE rowid=?""",(feed_id,))
2244
result = self._c.fetchone()
2245
d = {'title':result[0],
2246
'description':result[1],
2249
'feed_pointer':result[4],
2250
'lastpoll':result[5],
2251
'pollfreq':result[6]}
2252
parts=urlparse.urlsplit(result[2])
2253
usernameandpassword, domain=urllib.splituser(parts[1])
2254
#username, password=urllib.splitpasswd(usernameandpassword)
2255
if usernameandpassword is None:
2256
d['auth_feed'] = False
2258
d['auth_feed'] = True
2259
d['auth_userpass'] = usernameandpassword
2260
d['auth_domain'] = domain
2263
raise NoFeed, feed_id
2266
def set_feed_name(self, feed_id, name):
2267
name = self._encode_text(name)
2269
if name is not None:
2270
self._db_execute(self._c, u'UPDATE feeds SET title=? WHERE rowid=?',(name,feed_id))
2273
self._db_execute(self._c, """SELECT url FROM feeds WHERE rowid=?""",(feed_id,))
2274
url=self._c.fetchone()[0]
2278
feedparser.disableWellFormedCheck=1
2279
data = feedparser.parse(url)
2282
channel=data['feed']
2283
if channel.has_key('title') == 0:
2284
if channel['description'] != "":
2285
channel['title']=channel['description']
2287
channel['title']=url
2288
channel['title'] = self._encode_text(channel['title'])
2290
self._db_execute(self._c, u'UPDATE feeds SET title=? WHERE rowid=?',(channel['title'],feed_id))
2292
self._reindex_feed_list.append(feed_id)
2295
def set_feed_url(self, feed_id, url):
2297
self._db_execute(self._c, u'UPDATE feeds SET url=? WHERE rowid=?',(url,feed_id))
2299
except sqlite.IntegrityError:
2300
raise FeedAlreadyExists,feed_id
2302
def set_feed_link(self, feed_id, link):
2303
self._db_execute(self._c, u'UPDATE feeds SET link=? WHERE rowid=?',(link,feed_id))
2306
def set_media(self, media_id, status=None, filename=None, size=None):
2307
assert media_id is not None
2309
update_str = u'UPDATE media SET '
2312
if status is not None:
2313
update_str += u'download_status=?, download_date=?, '
2314
update_data += (status, int(time.time()))
2316
if filename is not None:
2317
update_str += u'file=?, '
2318
update_data += (filename,)
2320
if size is not None:
2321
update_str += u'length=?, '
2322
update_data += (int(size),)
2324
assert len(update_data) > 0
2326
update_str = update_str[:-2] + u'WHERE rowid=?'
2327
update_data += (media_id,)
2329
self._db_execute(self._c, update_str, update_data)
2332
def set_media_download_status(self, media_id, status):
2333
if status == D_DOWNLOADED:
2334
self._db_execute(self._c, u'UPDATE media SET download_status=?, download_date=? WHERE rowid=?', (status, int(time.time()),media_id,))
2337
self._db_execute(self._c, u'UPDATE media SET download_status=? WHERE rowid=?', (status,media_id,))
2339
self._db_execute(self._c, u'SELECT entry_id FROM media WHERE rowid=?',(media_id,))
2340
entry_id = self._c.fetchone()[0]
2341
if self.entry_flag_cache.has_key(entry_id):
2342
del self.entry_flag_cache[entry_id]
2344
def set_media_filename(self, media_id, filename):
2345
self._db_execute(self._c, u'UPDATE media SET file=? WHERE rowid=?', (filename,media_id))
2348
def set_media_viewed(self, media_id, viewed, entry_id=None):
2349
self._db_execute(self._c, u'UPDATE media SET viewed=? WHERE rowid=?',(int(viewed),media_id))
2351
if entry_id is None:
2352
self._db_execute(self._c, u'SELECT entry_id FROM media WHERE rowid=?',(media_id,))
2353
entry_id = self._c.fetchone()[0]
2355
if self.entry_flag_cache.has_key(entry_id): del self.entry_flag_cache[entry_id]
2357
if viewed==1:#check to see if this makes the whole entry viewed
2358
if self.get_entry_keep(entry_id):
2360
self._db_execute(self._c, u'SELECT viewed FROM media WHERE entry_id=?',(entry_id,))
2361
list = self._c.fetchall()
2364
if v==0: #still some unviewed
2367
self.set_entry_read(entry_id, 1)
2369
#mark as unviewed by default
2370
self.set_entry_read(entry_id, 0)
2372
def get_media_size(self, media_id):
2373
self._db_execute(self._c, u'SELECT length FROM media WHERE rowid=?',(media_id,))
2374
return self._c.fetchone()[0]
2376
def set_media_size(self, media_id, size):
2377
self._db_execute(self._c, u'UPDATE media SET length=? WHERE rowid=?',(int(size),media_id))
2380
def set_entry_read(self, entry_id, read):
2381
self._db_execute(self._c, u'UPDATE entries SET read=? WHERE rowid=?',(int(read),entry_id))
2382
self._db_execute(self._c, u'UPDATE media SET viewed=? WHERE entry_id=?',(int(read),entry_id))
2384
if self.entry_flag_cache.has_key(entry_id): del self.entry_flag_cache[entry_id]
2386
def set_entry_keep(self, entry_id, keep):
2387
self._db_execute(self._c, u'UPDATE entries SET keep=? WHERE rowid=?',(int(keep),entry_id))
2389
self._db_execute(self._c, u'UPDATE entries SET read=0 WHERE rowid=?',(entry_id,))
2390
self._db_execute(self._c, u'UPDATE media SET viewed=0 WHERE entry_id=?',(entry_id,))
2392
if self.entry_flag_cache.has_key(entry_id): del self.entry_flag_cache[entry_id]
2394
def get_entry_keep(self, entry_id):
2395
self._db_execute(self._c, u'SELECT keep FROM entries WHERE rowid=? LIMIT 1',(entry_id,))
2396
retval = self._c.fetchone()[0]
2399
def set_entrylist_read(self, entrylist, read):
2400
if len(entrylist) == 0:
2402
l = [str(e) for e in entrylist]
2406
qmarks = "?,"*(len(subset)-1)+"?"
2407
self._db_execute(self._c, u'UPDATE entries SET read=? WHERE rowid IN ('+qmarks+')', (int(read),)+tuple(subset))
2408
self._db_execute(self._c, u'UPDATE media SET viewed=? WHERE entry_id IN ('+qmarks+')',(int(read),)+tuple(subset))
2412
if self.entry_flag_cache.has_key(e): del self.entry_flag_cache[e]
2414
def get_entry_read(self, entry_id):
2415
self._db_execute(self._c, u'SELECT read FROM entries WHERE rowid=? LIMIT 1',(entry_id,))
2416
retval = self._c.fetchone()[0]
2419
def clean_media_status(self):
2420
self._db_execute(self._c, u'UPDATE media SET download_status=? WHERE download_status<1',(D_NOT_DOWNLOADED,))
2422
self._db_execute(self._c, u'UPDATE media SET download_status=? WHERE download_status=1',(D_RESUMABLE,))
2424
self._db_execute(self._c, u'UPDATE media SET download_status=? WHERE download_status=? AND file is NULL',(D_NOT_DOWNLOADED, D_DOWNLOADED))
2427
def get_entryid_for_media(self, media_id):
2428
self._db_execute(self._c, u'SELECT entry_id FROM media WHERE rowid=? LIMIT 1',(media_id,))
2429
ret = self._c.fetchone()
2432
def get_media_for_download(self, resume_paused = True):
2434
self._db_execute(self._c, u'SELECT rowid, length, entry_id, feed_id FROM media WHERE (download_status=? OR download_status==?) AND viewed=0',(D_NOT_DOWNLOADED,D_RESUMABLE))
2436
self._db_execute(self._c, u'SELECT rowid, length, entry_id, feed_id FROM media WHERE download_status=? AND viewed=0',(D_NOT_DOWNLOADED,))
2437
list=self._c.fetchall()
2438
self._db_execute(self._c, u'SELECT rowid, length, entry_id, feed_id FROM media WHERE download_status=?',(D_ERROR,))
2439
list=list+self._c.fetchall()
2447
size = int(''.join([b for b in item[1] if b.isdigit()]))
2450
new_item = (item[0],size,item[2], item[3])
2451
newlist.append(new_item)
2452
if self.entry_flag_cache.has_key(item[2]): del self.entry_flag_cache[item[2]]
2454
#build a list of feeds that do not include the noautodownload flag
2455
feeds = [l[3] for l in newlist]
2456
feeds = utils.uniquer(feeds)
2457
good_feeds = [f for f in feeds if self.get_flags_for_feed(f) & FF_NOAUTODOWNLOAD == 0]
2458
newlist = [l for l in newlist if l[3] in good_feeds]
2461
def get_deletable_media(self):
2462
no_expire = self.get_feeds_for_flag(FF_NOAUTOEXPIRE)
2463
if len(no_expire) > 0:
2464
qmarks = "?,"*(len(no_expire)-1)+"?"
2465
self._db_execute(self._c, u'SELECT media.rowid, media.entry_id, media.feed_id, media.file, media.download_date FROM media INNER JOIN entries ON media.entry_id = entries.rowid WHERE entries.keep=0 AND media.download_status=2 AND media.feed_id not in ('+qmarks+') ORDER BY media.viewed DESC, media.download_date', tuple(no_expire))
2467
self._db_execute(self._c, u'SELECT media.rowid, media.entry_id, media.feed_id, media.file, media.download_date FROM media INNER JOIN entries ON media.entry_id = entries.rowid WHERE entries.keep=0 AND media.download_status=2 ORDER BY media.viewed DESC, media.download_date')
2469
result = self._c.fetchall()
2471
return [[r[0],r[1],r[2],r[3],long(r[4])] for r in result]
2474
def get_resumable_media(self):
2475
self._db_execute(self._c, u'SELECT rowid, file, entry_id, feed_id FROM media WHERE download_status=?',(D_RESUMABLE,))
2476
list = self._c.fetchall()
2481
dict['media_id'] = item[0]
2482
dict['file'] = item[1]
2483
dict['entry_id'] = item[2]
2484
dict['feed_id'] = item[3]
2485
dict_list.append(dict)
2488
def mark_feed_as_viewed(self, feed_id):
2489
"""marks a feed's entries and media as viewed. If there's a way to do this all
2490
in sql, I'd like to know"""
2491
if self.is_feed_filter(feed_id):
2492
if not self._filtered_entries.has_key(feed_id):
2493
self.get_filtered_entries(feed_id)
2496
for entry in self._filtered_entries[feed_id]:
2497
self._db_execute(self._c, u'UPDATE entries SET read=1 WHERE rowid=? AND read=0 AND keep=0',(entry[0],))
2498
self._db_execute(self._c, u'SELECT rowid, download_status FROM media WHERE entry_id=?',(entry[0],))
2499
list = list+self._c.fetchall()
2500
feed_id = self._resolve_pointed_feed(feed_id)
2502
#feed_id = self._resolve_pointed_feed(feed_id)
2503
self._db_execute(self._c, u'SELECT rowid FROM entries WHERE feed_id=? AND read=0 AND keep=0',(feed_id,))
2504
changed_list = self._c.fetchall()
2505
self._db_execute(self._c, u'UPDATE entries SET read=1 WHERE feed_id=? AND read=0 AND keep=0',(feed_id,))
2506
self._db_execute(self._c, u'SELECT media.rowid, media.download_status FROM media INNER JOIN entries ON media.entry_id = entries.rowid WHERE entries.keep=0 AND media.feed_id = ?',(feed_id,))
2507
list = self._c.fetchall()
2510
qmarks = "?,"*(len(list)-1)+"?"
2511
idlist = [l[0] for l in list]
2512
self._db_execute(self._c, u'UPDATE media SET viewed=1 WHERE rowid IN ('+qmarks+')', tuple(idlist))
2514
# self._db_execute(self._c, u'UPDATE media SET viewed=? WHERE rowid=? AND viewed=0',(1,item[0]))
2515
# if item[1] == D_ERROR:
2516
# self._db_execute(self._c, u'UPDATE media SET download_status=? WHERE rowid=?', (D_NOT_DOWNLOADED,item[0]))
2519
changed_list = [r[0] for r in changed_list]
2521
for item in changed_list:
2522
if self.entry_flag_cache.has_key(item):
2523
del self.entry_flag_cache[item]
2528
def media_exists(self, filename):
2529
self._db_execute(self._c, u'SELECT count(*) FROM media WHERE media.file=?',(filename,))
2530
count = self._c.fetchone()[0]
2532
logging.warning("multiple entries in db for one filename")
2537
def get_unplayed_media(self, set_viewed=False):
2538
"""media_id, entry_id, feed_id, file, entry_title, feed_title
2540
self._db_execute(self._c, u'SELECT media.rowid, media.entry_id, media.feed_id, media.file, entries.title FROM media INNER JOIN entries ON media.entry_id = entries.rowid WHERE media.download_status=? AND media.viewed=0',(D_DOWNLOADED,))
2541
list=self._c.fetchall()
2545
self._db_execute(self._c, u'UPDATE media SET viewed=1 WHERE rowid=?',(item[0],))
2546
self._db_execute(self._c, u'UPDATE entries SET read=1 WHERE rowid=?',(item[1],))
2547
if self.entry_flag_cache.has_key(item[1]): del self.entry_flag_cache[item[1]]
2548
playlist.append(item)
2554
for row in playlist:
2555
feed_title = self.get_feed_title(row[2])
2556
retval.append(row+(feed_title,))
2560
def pause_all_downloads(self):
2561
self._db_execute(self._c, u'SELECT entry_id FROM media WHERE download_status=?',(D_DOWNLOADING,))
2562
list = self._c.fetchall()
2563
list = utils.uniquer(list)
2566
if self.entry_flag_cache.has_key(e[0]): del self.entry_flag_cache[e[0]]
2567
self._db_execute(self._c, u'UPDATE media SET viewed = 0 WHERE download_status=?',(D_DOWNLOADING,))
2568
self._db_execute(self._c, u'UPDATE media SET download_status=? WHERE download_status=?',(D_RESUMABLE,D_DOWNLOADING))
2571
def get_entry_download_status(self, entry_id):
2572
self._db_execute(self._c, u'SELECT download_status, viewed FROM media WHERE download_status!=0 AND entry_id=?',(entry_id,))
2573
result = self._c.fetchall()
2578
dataList = [list(row) for row in result]
2581
for datum in dataList:
2583
if val==D_DOWNLOADING:
2584
return D_DOWNLOADING
2587
if val==D_RESUMABLE:
2591
def get_feed_poll_fail(self, feed_id):
2592
feed_id = self._resolve_pointed_feed(feed_id)
2594
self._db_execute(self._c, u'SELECT pollfail FROM feeds WHERE rowid=?',(feed_id,))
2595
result = self._c.fetchone()[0]
2600
def get_feed_download_status(self, feed_id):
2601
#feed_id = self._resolve_pointed_feed(feed_id)
2603
entrylist = self.get_entrylist(feed_id)
2604
for entry in entrylist:
2605
status = self.get_entry_download_status(entry[0])
2606
if status!=D_NOT_DOWNLOADED:
2608
return D_NOT_DOWNLOADED
2610
def get_feed_verbose(self, feed_id):
2611
"""This function is slow, but all of the time is in the execute and fetchall calls. I can't even speed
2612
it up if I do my own sort. profilers don't lie!"""
2616
#if utils.HAS_SEARCH:
2617
# is_filter = self.is_feed_filter(feed_id)
2619
#if is_filter or self.cache_dirty:
2620
flaglist = self.get_entry_flags(feed_id)
2621
feed_info['important_flag'] = self.get_feed_flag(feed_id, flaglist) #not much speeding up this
2622
feed_info['entry_count'] = len(flaglist)
2623
feed_info['unread_count'] = len([f for f in flaglist if f & F_UNVIEWED])
2625
# self._db_execute(self._c, u'SELECT flag_cache, unread_count_cache, entry_count_cache FROM feeds WHERE rowid=?',(feed_id,))
2626
# cached_info = self._c.fetchone()
2627
# feed_info['important_flag'] = cached_info[0]
2628
# feed_info['unread_count'] = cached_info[1]
2629
# sfeed_info['entry_count'] = cached_info[2]
2631
self._db_execute(self._c, u'SELECT pollfail FROM feeds WHERE rowid=?',(feed_id,))
2632
result = self._c.fetchone()[0]
2634
feed_info['poll_fail'] = False
2636
feed_info['poll_fail'] = True
2639
def get_entry_flag(self, entry_id, medialist=None, read=None, media_entries=None):
2640
if self.entry_flag_cache.has_key(entry_id):
2641
return self.entry_flag_cache[entry_id]
2646
self._db_execute(self._c, u'SELECT read FROM entries WHERE rowid=?',(entry_id,))
2647
read = self._c.fetchone()[0]
2649
if medialist is None:
2650
if media_entries is not None:
2651
if entry_id not in media_entries:
2654
medialist = self.get_entry_media(entry_id)
2656
medialist = self.get_entry_media(entry_id)
2658
status = D_NOT_DOWNLOADED
2660
for medium in medialist:
2661
if medium['download_status'] == D_DOWNLOADING:
2662
status = D_DOWNLOADING
2664
if medium['download_status'] == D_ERROR:
2667
if medium['download_status'] == D_RESUMABLE:
2668
status = D_RESUMABLE
2670
if medium['download_status'] == D_DOWNLOADED:
2671
status = D_DOWNLOADED
2674
if status == D_ERROR:
2675
importance = importance + F_ERROR
2676
if status == D_DOWNLOADING:
2677
importance = importance + F_DOWNLOADING
2680
importance = importance + F_MEDIA
2681
if status == D_DOWNLOADED:
2682
importance = importance + F_DOWNLOADED
2683
elif status == D_RESUMABLE:
2684
importance = importance + F_PAUSED
2685
for medium in medialist:
2686
if medium['viewed'] == 0:
2687
importance = importance + F_UNVIEWED
2691
importance = importance + F_UNVIEWED
2693
if USING_FLAG_CACHE:
2694
self.entry_flag_cache[entry_id] = importance
2697
def get_entry_for_hash(self, e_hash):
2698
self._db_execute(self._c, u'SELECT feed_id, rowid FROM entries WHERE hash=?', (e_hash,))
2699
retval = self._c.fetchone()
2704
def get_entries_for_hashes(self, hashlist, read=None):
2705
if len(hashlist) == 0:
2710
while len(hashlist) > 0:
2711
subset = hashlist[:900]
2712
qmarks = "?,"*(len(subset)-1)+"?"
2714
if read is not None:
2716
condition = ' AND read=1'
2718
condition = ' AND read=0'
2719
self._db_execute(self._c, u'SELECT feed_id, rowid, read FROM entries WHERE hash IN ('+qmarks+')'+condition, tuple(subset))
2720
r = self._c.fetchall()
2723
hashlist = hashlist[900:]
2726
def get_hashes_for_entries(self, entrylist):
2727
if len(entrylist) == 0:
2732
while len(entrylist) > 0:
2733
subset = entrylist[:900]
2734
qmarks = "?,"*(len(subset)-1)+"?"
2735
self._db_execute(self._c, u'SELECT hash FROM entries WHERE rowid IN ('+qmarks+')', tuple(subset))
2736
r = self._c.fetchall()
2739
entrylist = entrylist[900:]
2740
return [r[0] for r in retval]
2742
def get_unread_hashes(self):
2743
self._db_execute(self._c, u'SELECT hash FROM entries WHERE read=0')
2744
retval = self._c.fetchall()
2747
return [r[0] for r in retval]
2749
def get_unread_entries(self, feed_id):
2750
if self.is_feed_filter(feed_id):
2751
if not self._filtered_entries.has_key(feed_id):
2752
self.get_filtered_entries(feed_id)
2754
return [r[0] for r in self.get_entrylist(feed_id) if r[3] == 0]
2756
self._db_execute(self._c, u'SELECT rowid FROM entries WHERE feed_id=? AND read=0', (feed_id,))
2757
retval = self._c.fetchall()
2760
return [r[0] for r in retval]
2762
def get_unread_count(self, feed_id):
2763
if self.is_feed_filter(feed_id):
2764
if not self._filtered_entries.has_key(feed_id):
2765
self.get_filtered_entries(feed_id)
2766
entries = self._filtered_entries[feed_id]
2768
for entry in entries:
2769
self._db_execute(self._c, u'SELECT read FROM entries WHERE rowid=?',(entry[0],))
2771
list.append(self._c.fetchone())
2779
feed_id = self._resolve_pointed_feed(feed_id)
2780
self._db_execute(self._c, u'SELECT count(*) FROM entries WHERE feed_id=? and read=0', (feed_id,))
2781
unread = self._c.fetchone()[0]
2785
def correct_unread_count(self, feed_id): #FIXME: we shouldn't need this one day
2786
""" Set the entry_read flag to the correct value based on all its enclosures.
2787
This is necessary because there are some bugs with regard to when this
2789
if self.is_feed_filter(feed_id):
2790
return #just don't do anything
2791
#feed_id = self._resolve_pointed_feed(feed_id)
2793
entrylist = self.get_entrylist(feed_id)
2795
for entry in entrylist:
2796
flag = self.get_entry_flag(entry[0])
2797
if flag & F_UNVIEWED:
2798
self.set_entry_read(entry[0],False)
2800
self.set_entry_read(entry[0],True)
2802
def get_entry_flags(self, feed_id):
2805
if self.is_feed_filter(feed_id):
2806
if not self._filtered_entries.has_key(feed_id):
2807
self.get_filtered_entries(feed_id)
2808
entrylist = [e[0] for e in self._filtered_entries[feed_id]]
2809
for entry in entrylist:
2810
flaglist.append(self.get_entry_flag(entry))
2812
self._db_execute(self._c, u'SELECT rowid, read FROM entries WHERE feed_id=?',(feed_id,))
2813
entrylist = self._c.fetchall()
2814
if self.get_feed_media_count(feed_id) == 0:
2818
self._db_execute(self._c, u"""SELECT entry_id FROM media WHERE feed_id=?""", (feed_id,))
2819
media_entries = self._c.fetchall()
2820
if media_entries is None:
2823
media_entries = [r[0] for r in media_entries]
2824
for entry,read in entrylist:
2825
flaglist.append(self.get_entry_flag(entry, read=read, medialist=medialist, media_entries=media_entries))
2828
def get_feed_flag(self, feed_id, flaglist = None):
2829
""" Based on a feed, what flag best represents the overall status of the feed at top-level?
2830
This is based on the numeric value of the flag, which is why flags are enumed the way they are."""
2833
if flaglist is None:
2834
flaglist = self.get_entry_flags(feed_id)
2836
if len(flaglist)==0:
2838
flaglist.sort()#lambda x,y:x[1]-y[1])
2839
best_flag = flaglist[-1]
2841
if best_flag & F_DOWNLOADED == 0 and feed_has_media==1:
2842
return best_flag + F_DOWNLOADED
2846
def get_feeds_for_tag(self, tag):
2847
self._db_execute(self._c, u'SELECT DISTINCT feeds.rowid FROM feeds INNER JOIN tags ON tags.feed_id=feeds.rowid WHERE tag=?',(tag,))
2848
result = self._c.fetchall()
2851
return [r[0] for r in result]
2853
def get_feeds_for_flag(self, tag):
2854
self._db_execute(self._c, u'SELECT DISTINCT feeds.rowid FROM feeds WHERE flags & ? == ?',(tag,tag))
2855
result = self._c.fetchall()
2858
return [r[0] for r in result]
2860
def get_tags_for_feed(self, feed_id):
2861
self._db_execute(self._c, u'SELECT tag FROM tags WHERE feed_id=? ORDER BY tag',(feed_id,))
2862
result = self._c.fetchall()
2865
dataList = [row[0] for row in result]
2870
def get_flags_for_feed(self, feed_id):
2871
self._db_execute(self._c, u'SELECT flags FROM feeds WHERE rowid=?',(feed_id,))
2872
result = self._c.fetchone()
2877
def set_flags_for_feed(self, feed_id, flags):
2878
self._db_execute(self._c, u'UPDATE feeds SET flags=? WHERE rowid=?',(flags, feed_id))
2881
def get_search_tag(self, tag):
2882
self._db_execute(self._c, u'SELECT query FROM tags WHERE tag=?',(tag,))
2883
result = self._c.fetchone()
2888
def get_search_tags(self):
2889
self._db_execute(self._c, u'SELECT tag,query FROM tags WHERE type=? ORDER BY tag',(T_SEARCH,))
2890
result = self._c.fetchall()
2895
def add_tag_for_feed(self, feed_id, tag):
2896
current_tags = self.get_tags_for_feed(feed_id)
2897
self._db_execute(self._c, u'SELECT favorite FROM tags WHERE tag=? LIMIT 1',(tag,))
2898
favorite = self._c.fetchone()
2899
try: favorite = favorite[0]
2900
except: favorite = 0
2902
if tag not in current_tags and len(tag)>0:
2903
self._db_execute(self._c, u'INSERT INTO tags (tag, feed_id, type, favorite) VALUES (?,?,?,?)',(tag,feed_id, T_TAG, favorite))
2906
self._db_execute(self._c, u'INSERT INTO tags (tag, feed_id, type, favorite) VALUES (?,?,?,?)',(tag,feed_id, T_TAG, favorite))
2910
self._db_execute(self._c, u'DELETE FROM tags WHERE tag=""')
2913
def add_search_tag(self, query, tag, favorite=False):
2914
current_tags = [t[0] for t in self.get_all_tags(T_ALL)] #exclude favorite stuff
2916
if tag not in current_tags:
2917
self._db_execute(self._c, u'INSERT INTO tags (tag, feed_id, query, type, favorite) VALUES (?,?,?,?,?)',(tag,0,query,T_SEARCH,favorite))
2920
raise TagAlreadyExists,"The tag name "+str(tag)+" is already being used"
2922
self._db_execute(self._c, u'INSERT INTO tags (tag, feed_id, query, type) VALUES (?,?,?,?,?)',(tag,0,query,T_SEARCH,favorite))
2925
def change_query_for_tag(self, tag, query):
2927
self._db_execute(self._c, u'UPDATE tags SET query=? WHERE tag=?',(query,tag))
2930
logging.error("error updating tag")
2932
def set_tag_favorite(self, tag, favorite=False):
2934
self._db_execute(self._c, u'UPDATE tags SET favorite=? WHERE tag=?',(favorite,tag))
2937
logging.error("error updating tag favorite")
2939
def rename_tag(self, old_tag, new_tag):
2940
self._db_execute(self._c, u'UPDATE tags SET tag=? WHERE tag=?',(new_tag,old_tag))
2943
def remove_tag_from_feed(self, feed_id, tag):
2944
self._db_execute(self._c, u'DELETE FROM tags WHERE tag=? AND feed_id=?',(tag,feed_id))
2947
def remove_tag(self, tag):
2948
self._db_execute(self._c, u'DELETE FROM tags WHERE tag=?',(tag,))
2951
def get_all_tags(self, type=T_TAG):
2953
self._db_execute(self._c, u'SELECT DISTINCT tag,favorite FROM tags')
2955
self._db_execute(self._c, u'SELECT DISTINCT tag,favorite FROM tags WHERE type=?',(T_TAG,))
2956
elif type==T_SEARCH:
2957
self._db_execute(self._c, u'SELECT DISTINCT tag,favorite FROM tags WHERE type=?',(T_SEARCH,))
2958
result = self._c.fetchall()
2959
def alpha_sorter(x,y):
2960
if x[0].upper()>y[0].upper():
2962
if x[0].upper()==y[0].upper():
2965
result.sort(alpha_sorter)
2966
#sometimes a tag has two different favorite settings due to a bug.
2967
#just work around it and get rid of the extras
2968
result = utils.uniquer(result, lambda x: x[0])
2971
def get_count_for_tag(self, tag):
2972
self._db_execute(self._c, u'SELECT count(*) FROM tags WHERE tag=?',(tag,))
2973
result = self._c.fetchone()[0]
2976
def export_OPML(self,stream):
2977
if not utils.HAS_PYXML:
2980
self._db_execute(self._c, u'SELECT title, description, url FROM feeds ORDER BY UPPER(title)')
2981
result = self._c.fetchall()
2984
dataList = [list(row) for row in result]
2991
item = OPML.Outline()
2992
item['title']=self._ascii(feed[0])
2993
item['text']=self._ascii(feed[0])
2995
item['description'] = ""
2997
item['description'] = self._ascii(feed[1])
2998
item['xmlUrl']=feed[2]
2999
o.outlines.append(item)
3003
def import_subscriptions(self, stream, opml = True):
3004
"""A generator which first yields the number of feeds, and then the feedids as they
3005
are inserted, and finally -1 on completion"""
3006
if not utils.HAS_PYXML and opml == True:
3007
logging.warning("Trying to import an OPML, but we don't have pyxml. Aborting import")
3015
p = OPML.parse(stream)
3017
exc_type, exc_value, exc_traceback = sys.exc_info()
3019
for s in traceback.format_exception(exc_type, exc_value, exc_traceback):
3021
logging.warning(error_msg)
3025
yield (1,len(p.outlines))
3026
for o in OPML.outline_generator(p.outlines):
3028
feed_id=self.insertURL(o['xmlUrl'],o['text'])
3029
if o.has_key('categories'):
3030
for tag in o['categories'].split(','):
3032
self.add_tag_for_feed(feed_id, tag)
3033
#added_feeds.append(feed_id)
3035
except FeedAlreadyExists, f:
3038
exc_type, exc_value, exc_traceback = sys.exc_info()
3040
for s in traceback.format_exception(exc_type, exc_value, exc_traceback):
3042
logging.warning(error_msg)
3047
else: #just a list in a file
3050
for line in stream.readlines():
3052
if len(line) == 0: continue
3053
space_at = line.find(' ')
3055
url = line[:space_at]
3056
title = line[space_at+1:]
3061
url_list.append((url, title))
3063
yield (1,len(url_list))
3064
for url, title in url_list:
3066
feed_id=self.insertURL(url, title)
3068
except FeedAlreadyExists, f:
3071
exc_type, exc_value, exc_traceback = sys.exc_info()
3073
for s in traceback.format_exception(exc_type, exc_value, exc_traceback):
3075
logging.warning(error_msg)
3079
def search(self, query, filter_feed=None, blacklist=None, since=0):
3080
if not utils.HAS_SEARCH:
3082
if blacklist is None:
3083
blacklist = self._blacklist
3084
if filter_feed: #no blacklist on filter feeds (doesn't make sense)
3085
result = [l for l in self.searcher.Search(query, since=since)[1] if l[3] == filter_feed]
3087
return ([filter_feed], result)
3089
return self.searcher.Search(query,blacklist, since=since)
3091
def doindex(self, callback=None):
3092
if utils.HAS_SEARCH:
3093
self.searcher.Do_Index_Threaded(callback)
3095
def reindex(self, feed_list=[], entry_list=[], threaded=True):
3096
"""reindex self._reindex_feed_list and self._reindex_entry_list as well as anything specified"""
3097
if not utils.HAS_SEARCH:
3099
self._reindex_feed_list += feed_list
3100
self._reindex_entry_list += entry_list
3103
self.searcher.Re_Index_Threaded(self._reindex_feed_list, self._reindex_entry_list)
3105
self.searcher.Re_Index(self._reindex_feed_list, self._reindex_entry_list)
3106
except Exception, e:
3107
logging.warning("reindex failure. wait til next time I guess: %s" % str(e))
3108
self._reindex_feed_list = []
3109
self._reindex_entry_list = []
3111
def cache_images(self):
3112
"""goes through _image_cache_list and caches everything"""
3114
if self._image_cache is not None:
3115
while len(self._image_cache_list) > 0:
3116
entry_id = self._image_cache_list.pop(0)
3117
body = self.get_entry(entry_id)['description']
3118
self._image_cache.cache_html(str(entry_id), body)
3120
while len(self._image_uncache_list) > 0:
3121
entry_id = self._image_uncache_list.pop(0)
3122
self._image_cache.remove_cache(entry_id)
3124
def _resolve_pointed_feed(self, feed_id):
3125
if not utils.HAS_SEARCH:
3127
self._db_execute(self._c, u'SELECT feed_pointer FROM feeds WHERE rowid=?',(feed_id,))
3128
result = self._c.fetchone()
3135
def is_feed_filter(self, feed_id):
3136
if not utils.HAS_SEARCH:
3138
self._db_execute(self._c, u'SELECT feed_pointer FROM feeds WHERE rowid=?',(feed_id,))
3139
result = self._c.fetchone()
3146
def get_pointer_feeds(self, feed_id):
3147
if not utils.HAS_SEARCH:
3149
self._db_execute(self._c, u'SELECT rowid FROM feeds WHERE feed_pointer=?',(feed_id,))
3150
results = self._c.fetchall()
3153
return [f[0] for f in results]
3155
def get_associated_feeds(self, feed_id):
3156
if not utils.HAS_SEARCH:
3158
feed_list = [feed_id]
3159
pointer = self._resolve_pointed_feed(feed_id)
3160
if pointer != feed_id:
3161
feed_list.append(pointer)
3163
feed_list += self.get_pointer_feeds(feed_id)
3166
def set_cache_images(self, cache):
3167
if self._image_cache is not None:
3169
self._image_cache.finish()
3170
self._image_cache = None
3173
store_location = self.get_setting(STRING, '/apps/penguintv/media_storage_location', os.path.join(utils.get_home(), "media"))
3174
if store_location != "":
3175
self._image_cache = OfflineImageCache.OfflineImageCache(os.path.join(store_location, "images"))
3177
logging.error("could not start image cache, no storage location")
3179
#############convenience Functions####################3
3181
def _encode_text(self,text):
3183
return text.encode('utf8')
3187
def _ascii(self, text):
3189
return text.encode('ascii','replace')
3190
except UnicodeDecodeError:
3193
def DEBUG_get_full_feedlist(self):
3194
self._db_execute(self._c, """SELECT rowid,title,url FROM feeds ORDER BY rowid""")
3195
result = self._c.fetchall()
3198
def DEBUG_reset_freqs(self):
3199
self._db_execute(self._c, 'UPDATE feeds SET pollfreq=1800')
3202
def DEBUG_get_freqs(self):
3203
self._db_execute(self._c, 'SELECT title, pollfreq, lastpoll, rowid FROM feeds ORDER BY title')
3204
a = self._c.fetchall()
3207
if len(item[0]) > max_len:
3208
max_len = len(item[0])
3211
#item2=(str(item[0]),item[1]/(60),time.asctime(time.localtime(item[2])))
3212
print self._ascii(item[0])+" "*(max_len-len(str(item[0])))+" "+str(item[1]/60)+" "+time.asctime(time.localtime(item[2]))+" "+str(item[3])
3214
print "whoops: "+ self._ascii(item[0])
3218
self._db_execute(self._c, 'SELECT title, pollfreq, lastpoll, rowid FROM feeds ORDER BY lastpoll')
3219
a = self._c.fetchall()
3222
if len(item[0]) > max_len:
3223
max_len = len(item[0])
3226
#item2=(str(item[0]),item[1]/(60),time.asctime(time.localtime(item[2])))
3227
print self._ascii(item[0])+" "*(max_len-len(str(item[0])))+" "+str(item[1]/60)+" "+time.asctime(time.localtime(item[2]))+" "+ str(item[3])
3229
print "whoops: "+ self._ascii(item[0])
3233
self._db_execute(self._c, 'SELECT title, pollfreq, lastpoll, rowid FROM feeds ORDER BY pollfreq')
3234
a = self._c.fetchall()
3238
if len(item[0]) > max_len:
3239
max_len = len(item[0])
3242
#item2=(str(item[0]),item[1]/(60),time.asctime(time.localtime(item[2])))
3243
print self._ascii(item[0])+" "*(max_len-len(self._ascii(item[0])))+" "+str(item[1]/60)+" "+time.asctime(time.localtime(item[2]))+" "+ str(item[3])
3245
print "whoops: "+ self._ascii(item[0])
3248
def DEBUG_delete_all_media(self):
3249
self._db_execute(self._c, u'UPDATE media SET download_status=?',(D_NOT_DOWNLOADED,))
3252
def DEBUG_correct_feed(self, feed_id):
3253
self._db_execute(self._c, u'SELECT media.download_status, media.viewed, media.entry_id, media.rowid FROM media,entries WHERE media.entry_id=entries.rowid AND media.download_status!=? AND entries.feed_id=?',(D_NOT_DOWNLOADED,feed_id))
3254
media = self._c.fetchall()
3256
self.set_entry_read(item[2],item[1])
3258
class NoFeed(Exception):
3259
def __init__(self,feed):
3264
class FeedPollError(Exception):
3265
def __init__(self,feed,msg="unspecified error"):
3269
return str(self.feed)+": "+self.msg
3271
class NoEntry(Exception):
3272
def __init__(self,entry):
3277
class NoSetting(Exception):
3278
def __init__(self,setting):
3279
self.setting = setting
3283
class DBError(Exception):
3284
def __init__(self,error):
3289
class FeedAlreadyExists(Exception):
3290
def __init__(self,feed):
3295
class TagAlreadyExists(Exception):
3296
def __init__(self,tag):
3301
class BadSearchResults(Exception):
3302
def __init__(self,m):