~ubuntu-branches/debian/sid/calibre/sid

« back to all changes in this revision

Viewing changes to recipes/nikkei_sub_shakai.recipe

  • Committer: Package Import Robot
  • Author(s): Martin Pitt
  • Date: 2014-05-14 18:17:50 UTC
  • mfrom: (1.5.10)
  • Revision ID: package-import@ubuntu.com-20140514181750-xyrxqa47dbw0qfhu
Tags: 1.36.0+dfsg-1
* New upstream release:
  - Fixes editing of metadata (Closes: #741638)

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
__license__   = 'GPL v3'
2
 
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
3
 
'''
4
 
www.nikkei.com
5
 
'''
6
 
 
7
 
import re
8
 
from calibre.web.feeds.recipes import BasicNewsRecipe
9
 
import mechanize
10
 
from calibre.ptempfile import PersistentTemporaryFile
11
 
 
12
 
 
13
 
class NikkeiNet_sub_shakai(BasicNewsRecipe):
14
 
    title           = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(Social)'
15
 
    __author__      = 'Hiroshi Miura'
16
 
    description     = 'News and current market affairs from Japan'
17
 
    cover_url       = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
18
 
    masthead_url    = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
19
 
    needs_subscription = True
20
 
    oldest_article  = 2
21
 
    max_articles_per_feed = 20
22
 
    language        = 'ja'
23
 
    remove_javascript = False
24
 
    temp_files = []
25
 
 
26
 
    remove_tags_before = {'class':"cmn-section cmn-indent"}
27
 
    remove_tags = [
28
 
                       {'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
29
 
                       {'class':"cmn-article_keyword cmn-clearfix"},
30
 
                       {'class':"cmn-print_headline cmn-clearfix"},
31
 
                         ]
32
 
    remove_tags_after = {'class':"cmn-pr_list"}
33
 
 
34
 
    feeds = [ 
35
 
                 (u'\u793e\u4f1a',              u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shakai')
36
 
        ]
37
 
 
38
 
    def get_browser(self):
39
 
        br = BasicNewsRecipe.get_browser(self)
40
 
 
41
 
        cj = mechanize.LWPCookieJar()
42
 
        br.set_cookiejar(cj)
43
 
 
44
 
        #br.set_debug_http(True)
45
 
        #br.set_debug_redirects(True)
46
 
        #br.set_debug_responses(True)
47
 
 
48
 
        if self.username is not None and self.password is not None:
49
 
            #print "----------------------------get login form--------------------------------------------"
50
 
            # open login form
51
 
            br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam')
52
 
            response = br.response()
53
 
            #print "----------------------------get login form---------------------------------------------"
54
 
            #print "----------------------------set login form---------------------------------------------"
55
 
            # remove disabled input which brings error on mechanize
56
 
            response.set_data(response.get_data().replace("<input id=\"j_id48\"", "<!-- "))
57
 
            response.set_data(response.get_data().replace("gm_home_on.gif\" />", " -->"))
58
 
            br.set_response(response)
59
 
            br.select_form(name='LA0010Form01')
60
 
            br['LA0010Form01:LA0010Email']   = self.username
61
 
            br['LA0010Form01:LA0010Password'] = self.password
62
 
            br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
63
 
            br.submit()
64
 
            br.response()
65
 
            #print "----------------------------send login form---------------------------------------------"
66
 
            #print "----------------------------open news main page-----------------------------------------"
67
 
            # open news site
68
 
            br.open('http://www.nikkei.com/')
69
 
            br.response()
70
 
            #print "----------------------------www.nikkei.com BODY   --------------------------------------"
71
 
            #print response2.get_data()
72
 
            #print "-------------------------^^-got auto redirect form----^^--------------------------------"
73
 
            # forced redirect in default
74
 
            br.select_form(nr=0)
75
 
            br.submit()
76
 
            response3 = br.response()
77
 
            # return some cookie which should be set by Javascript
78
 
            #print response3.geturl()
79
 
            raw = response3.get_data()
80
 
            #print "---------------------------response to form --------------------------------------------"
81
 
            # grab cookie from JS and set it
82
 
            redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1)
83
 
            br.select_form(nr=0)
84
 
 
85
 
            self.temp_files.append(PersistentTemporaryFile('_fa.html'))
86
 
            self.temp_files[-1].write("#LWP-Cookies-2.0\n")
87
 
 
88
 
            self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
89
 
            self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
90
 
            self.temp_files[-1].close()
91
 
            cj.load(self.temp_files[-1].name)
92
 
 
93
 
            br.submit()
94
 
 
95
 
            #br.set_debug_http(False)
96
 
            #br.set_debug_redirects(False)
97
 
            #br.set_debug_responses(False)
98
 
        return br
99
 
 
100
 
 
101
 
 
102