1
4
from urllib import quote
2
5
from urllib2 import urlopen, Request
3
7
from BeautifulSoup import BeautifulSoup
5
9
from ibid.plugins import Processor, match
6
10
from ibid.config import Option
11
from ibid.utils import ibid_version
8
13
help = {'google': u'Retrieves results from Google and Google Calculator.'}
10
user_agent = 'Mozilla/5.0'
12
class Search(Processor):
13
u"""google [for] <term>"""
16
user_agent = Option('user_agent', 'HTTP user agent to present to Google', user_agent)
18
@match(r'^google\s+(?:(za)\s+)?(?:for\s+)?(.+?)$')
19
def search(self, event, country, query):
20
url = 'http://www.google.com/search?num=3&q=%s' % quote(query)
15
default_user_agent = 'Mozilla/5.0'
16
default_referrer = "http://ibid.omnia.za.net/"
19
"Remove HTML entities, and replace with their characters"
20
replace = lambda match: unichr(int(match.group(1)))
21
text = re.sub("&#(\d+);", replace, text)
23
replace = lambda match: unichr(htmlentitydefs.name2codepoint[match.group(1)])
24
text = re.sub("&(\w+);", replace, text)
27
class GoogleAPISearch(Processor):
28
u"""google [for] <term>
29
googlefight [for] <term> and <term>"""
33
api_key = Option('api_key', 'Your Google API Key (optional)', None)
34
referrer = Option('referrer', 'The referrer string to use (API searches)', default_referrer)
36
google_api_url = "http://ajax.googleapis.com/ajax/services/search/web?v=1.0&q=%s"
38
def _google_api_search(self, query, resultsize="large"):
39
url = self.google_api_url % quote(query)
40
url += "&rsz=%s" % resultsize
42
url += '&key=%s' % quote(key)
43
req = Request(url, headers={
44
'user-agent': "Ibid/%s" % ibid_version() or "dev",
45
'referrer': self.referrer,
50
result = simplejson.loads(result)
53
@match(r'^google\s+(?:for\s+)?(.+?)$')
54
def search(self, event, query):
55
items = self._google_api_search(query)
57
for item in items["responseData"]["results"]:
59
title = item["titleNoFormatting"]
61
results.append(u'"%s" %s' % (de_entity(title), item["unescapedUrl"]))
64
event.addresponse(u', '.join(results))
66
event.addresponse(u"Wow! Google couldn't find anything.")
68
@match(r'^(?:rank|(?:google(?:fight|compare|cmp)))\s+(?:for\s+)?(.+?)\s+and\s+(.+?)$')
69
def googlefight(self, event, term1, term2):
70
count1 = int(self._google_api_search(term1, "small")["responseData"]["cursor"].get("estimatedResultCount", 0))
71
count2 = int(self._google_api_search(term2, "small")["responseData"]["cursor"].get("estimatedResultCount", 0))
72
event.addresponse(u'%s wins with %i hits, %s had %i hits' %
73
(count1 > count2 and (term1, count1, term2, count2) or (term2, count2, term1, count1))
76
# Unfortunatly google API search doesn't support all of google search's
78
# Dear Google: We promise we don't bite.
79
class GoogleScrapeSearch(Processor):
80
u"""gcalc <expression>
82
google.<TLD> [for] <terms>"""
86
user_agent = Option('user_agent', 'HTTP user agent to present to Google (for non-API searches)', default_user_agent)
87
google_scrape_url = "http://www.google.com/search?q=%s"
89
def _google_scrape_search(self, query, country=None):
90
url = self.google_scrape_url
22
url = url + '&meta=cr%%3Dcountry%s' % country.upper()
24
f = urlopen(Request(url, headers={'user-agent': self.user_agent}))
92
url += "&cr=country%s" % country.upper()
93
f = urlopen(Request(url % quote(query), headers={'user-agent': self.user_agent}))
25
94
soup = BeautifulSoup(f.read())
29
items = soup.findAll('li')[:10]
33
title = u''.join([e.string for e in item.a.contents])
34
results.append(u'"%s" %s' % (title, url))
38
event.addresponse(u', '.join(results))
40
class Calc(Processor):
41
u"""gcalc <expression>"""
44
user_agent = Option('user_agent', 'HTTP user agent to present to Google', user_agent)
46
98
@match(r'^gcalc\s+(.+)$')
47
99
def calc(self, event, expression):
48
f = urlopen(Request('http://www.google.com/search?num=1&q=%s' % quote(expression), headers={'user-agent': self.user_agent}))
49
soup = BeautifulSoup(f.read())
100
soup = self._google_scrape_search(expression)
52
102
font = soup.find('font', size='+1')
56
106
event.addresponse(font.b.string)
58
class Define(Processor):
62
user_agent = Option('user_agent', 'HTTP user agent to present to Google', user_agent)
64
108
@match(r'^gdefine\s+(.+)$')
65
109
def define(self, event, term):
66
f = urlopen(Request('http://www.google.com/search?num=1&q=define:%s' % quote(term), headers={'user-agent': self.user_agent}))
67
soup = BeautifulSoup(f.read())
110
soup = self._google_scrape_search("define:%s" % term)
71
113
for li in soup.findAll('li'):
72
definitions.append('"%s"' % li.contents[0].strip())
114
definitions.append(de_entity(li.contents[0].strip()))
75
event.addresponse(', '.join(definitions))
117
event.addresponse(u' :: '.join(definitions))
77
119
event.addresponse(u"Are you making up words again?")
79
class Compare(Processor):
80
u"""google cmp [for] <term> and <term>"""
83
user_agent = Option('user_agent', 'HTTP user agent to present to Google', user_agent)
86
def results(self, term):
87
f = urlopen(Request('http://www.google.com/search?num=1&q=%s' % quote(term), headers={'user-agent': self.user_agent}))
88
soup = BeautifulSoup(f.read())
91
noresults = soup.findAll('div', attrs={'class': 'med'})
92
if noresults and len(noresults) > 1 and noresults[1].find('did not match any documents') != -1:
121
# Not supported by Google API: http://code.google.com/p/google-ajax-apis/issues/detail?id=24
122
@match(r'^google(?:\.com?)?\.([a-z]{2})(?:\s+for)?\s+(.*)$')
123
def country_search(self, event, country, terms):
124
soup = self._google_scrape_search(terms, country)
127
items = soup.findAll('li')
131
title = u''.join([e.string for e in item.a.contents])
132
if title.startswith("Image results for"):
134
results.append(u'"%s" %s' % (de_entity(title), url))
137
if len(results) >= 8:
141
event.addresponse(u", ".join(results))
95
results = soup.find('div', id='prs').nextSibling.contents[5].string.replace(',', '')
99
@match(r'^google\s+cmp\s+(?:for\s+)?(.+?)\s+and\s+(.+?)$')
100
def compare(self, event, term1, term2):
101
count1 = self.results(term1)
102
count2 = self.results(term2)
103
event.addresponse(u'%s wins with %s hits, %s had %s hits' % (count1 > count2 and term1 or term2, count1 > count2 and count1 or count2, count1 > count2 and term2 or term1, count1 > count2 and count2 or count1))
143
event.addresponse(u"Wow! Google couldn't find anything.")
105
145
# vi: set et sta sw=4 ts=4: