~ubuntu-branches/debian/sid/calibre/sid

« back to all changes in this revision

Viewing changes to src/calibre/web/jsbrowser/browser.py

  • Committer: Package Import Robot
  • Author(s): Martin Pitt
  • Date: 2014-02-27 07:48:06 UTC
  • mto: This revision was merged to the branch mainline in revision 74.
  • Revision ID: package-import@ubuntu.com-20140227074806-64wdebb3ptosxhhx
Tags: upstream-1.25.0+dfsg
ImportĀ upstreamĀ versionĀ 1.25.0+dfsg

Show diffs side-by-side

added added

removed removed

Lines of Context:
7
7
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
8
8
__docformat__ = 'restructuredtext en'
9
9
 
10
 
import os, pprint, time, uuid
 
10
import os, pprint, time, uuid, re
11
11
from cookielib import Cookie
12
12
from threading import current_thread
13
13
 
29
29
class LoadError(Exception):
30
30
    pass
31
31
 
 
32
class ElementNotFound(ValueError):
 
33
    pass
 
34
 
 
35
class NotAFile(ValueError):
 
36
    pass
32
37
 
33
38
class WebPage(QWebPage):  # {{{
34
39
 
114
119
        return True
115
120
 
116
121
    def on_unsupported_content(self, reply):
 
122
        reply.abort()
117
123
        self.log.warn('Unsupported content, ignoring: %s'%reply.url())
118
124
 
119
125
    @property
500
506
        if not isinstance(qwe, QWebElement):
501
507
            qwe = self.css_select(qwe)
502
508
            if qwe is None:
503
 
                raise ValueError('Failed to find element with selector: %r'
 
509
                raise ElementNotFound('Failed to find element with selector: %r'
504
510
                        % qwe_or_selector)
505
511
        js = '''
506
512
            var e = document.createEvent('MouseEvents');
526
532
                target = qwe
527
533
                break
528
534
        if target is None:
529
 
            raise ValueError('No element matching %r with text %s found'%(
 
535
            raise ElementNotFound('No element matching %r with text %s found'%(
530
536
                selector, text_or_regex))
531
537
        return self.click(target, wait_for_load=wait_for_load,
532
538
                ajax_replies=ajax_replies, timeout=timeout)
614
620
        if ans is not None:
615
621
            return ans
616
622
 
 
623
    def download_file(self, url_or_selector_or_qwe, timeout=60):
 
624
        '''
 
625
        Download unsupported content: i.e. files the browser cannot handle
 
626
        itself or files marked for saving as files by the website. Useful if
 
627
        you want to download something like an epub file after authentication.
 
628
 
 
629
        You can pass in either the url to the file to be downloaded, or a
 
630
        selector that points to an element to be clicked on the current page
 
631
        which will cause the file to be downloaded.
 
632
        '''
 
633
        ans = [False, None, []]
 
634
        loop = QEventLoop(self)
 
635
        start_time = time.time()
 
636
        end_time = start_time + timeout
 
637
        self.page.unsupportedContent.disconnect(self.page.on_unsupported_content)
 
638
        try:
 
639
            def download(reply):
 
640
                if ans[0]:
 
641
                    reply.abort()  # We only handle the first unsupported download
 
642
                    return
 
643
                ans[0] = True
 
644
                while not reply.isFinished() and end_time > time.time():
 
645
                    if not loop.processEvents():
 
646
                        time.sleep(0.01)
 
647
                    raw = bytes(bytearray(reply.readAll()))
 
648
                    if raw:
 
649
                        ans[-1].append(raw)
 
650
                if not reply.isFinished():
 
651
                    ans[1] = Timeout('Loading of %r took longer than %d seconds'%(url_or_selector_or_qwe, timeout))
 
652
                ans[-1].append(bytes(bytearray(reply.readAll())))
 
653
            self.page.unsupportedContent.connect(download)
 
654
            if hasattr(url_or_selector_or_qwe, 'rstrip') and re.match('[a-z]+://', url_or_selector_or_qwe) is not None:
 
655
                # We have a URL
 
656
                self.page.mainFrame().load(QUrl(url_or_selector_or_qwe))
 
657
            else:
 
658
                self.click(url_or_selector_or_qwe, wait_for_load=False)
 
659
            lw = LoadWatcher(self.page)
 
660
            while not ans[0] and lw.is_loading and end_time > time.time():
 
661
                if not loop.processEvents():
 
662
                    time.sleep(0.01)
 
663
            if not ans[0]:
 
664
                raise NotAFile('%r does not point to a downloadable file. You can only'
 
665
                                 ' use this method to download files that the browser cannot handle'
 
666
                                 ' natively. Or files that are marked with the '
 
667
                                 ' content-disposition: attachment header' % url_or_selector_or_qwe)
 
668
            if ans[1] is not None:
 
669
                raise ans[1]
 
670
            return b''.join(ans[-1])
 
671
        finally:
 
672
            self.page.unsupportedContent.disconnect()
 
673
            self.page.unsupportedContent.connect(self.page.on_unsupported_content)
 
674
 
617
675
    def show_browser(self):
618
676
        '''
619
677
        Show the currently loaded web page in a window. Useful for debugging.