18
17
verbs = ListOption('verbs', u'Verbs to ignore', ('is', 'has', 'was', 'might', 'may', 'would', 'will', "isn't", "hasn't", "wasn't", "wouldn't", "won't", 'can', "can't", 'did', "didn't", 'said', 'says', 'should', "shouldn't", 'does', "doesn't"))
21
self.patterns = [ re.compile(r'^(%s)([:;.?>!,-]+)*\s+' % '|'.join(self.names), re.I | re.DOTALL),
22
re.compile(r',\s*(%s)\s*$' % '|'.join(self.names), re.I | re.DOTALL)
20
names = '|'.join(re.escape(x) for x in self.names)
21
verbs = '|'.join(re.escape(x) for x in self.verbs)
23
re.compile(r'^(%s)(?:[:;.?>!,-]|\s)+' % names, re.I | re.DOTALL),
24
# "hello there, bot"-style addressing. But we want to be sure that
25
# there wasn't normal addressing too:
26
re.compile(r'^(?:\S+:.*|.*,\s*(%s))\s*$' % names, re.I | re.DOTALL)
28
self.verb_pattern = re.compile(r'^(?:%s)\s+(?:%s)\s+' % (names, verbs),
26
32
def handle_addressed(self, event):
27
33
if 'addressed' not in event:
28
34
event.addressed = False
36
if self.verb_pattern.match(event.message['stripped']):
30
39
for pattern in self.patterns:
31
40
matches = pattern.search(event.message['stripped'])
41
if matches and matches.group(1):
33
42
new_message = pattern.sub('', event.message['stripped'])
34
if (len(matches.groups()) > 1 and not matches.group(2) and
35
any(new_message.lower().startswith(verb)
36
for verb in self.verbs)):
39
43
event.addressed = matches.group(1)
40
44
event.message['clean'] = new_message
41
45
event.message['deaddressed'] = pattern.sub('', event.message['raw'])
172
176
class Format(Processor):
179
def _truncate(self, line, length):
180
if length is not None:
181
eline = line.encode('utf-8')
182
if len(eline) > length:
183
# horizontal ellipsis = 3 utf-8 bytes
184
return eline[:length-3].decode('utf-8', 'ignore') \
185
+ u'\N{horizontal ellipsis}'
175
188
def process(self, event):
177
190
for response in event.responses:
178
191
source = response['source'].lower()
179
192
supports = ibid.sources[source].supports
193
maxlen = ibid.sources[source].truncation_point(response, event)
181
195
if response.get('action', False) and 'action' not in supports:
182
196
response['reply'] = u'*%s*' % response['reply']
184
198
conflate = response.get('conflate', True)
199
# Expand response into multiple single-line responses:
185
200
if (not conflate and 'multiline' not in supports):
186
201
for line in response['reply'].split('\n'):
188
for k in response.iterkeys():
189
if k not in ('reply'):
202
r = {'reply': self._truncate(line, maxlen)}
203
for k in response.iterkeys():
204
if k not in ('reply'):
208
# Expand response into multiple multi-line responses:
209
elif (not conflate and 'multiline' in supports
210
and maxlen is not None):
211
message = response['reply']
212
while len(message.encode('utf-8')) > maxlen:
213
splitpoint = len(message.encode('utf-8')[:maxlen] \
214
.decode('utf-8', 'ignore'))
215
parts = [message[:splitpoint].rstrip(),
216
message[splitpoint:].lstrip()]
217
for sep in u'\n.;:, ':
219
search = message[:splitpoint+1]
221
search = message[:splitpoint]
223
splitpoint = search.rindex(sep)
224
parts = [message[:splitpoint+1].rstrip(),
225
message[splitpoint+1:]]
227
r = {'reply': parts[0]}
228
for k in response.iterkeys():
229
if k not in ('reply'):
234
response['reply'] = message
235
filtered.append(response)
238
line = response['reply']
239
# Remove any characters that make no sense on IRC-like sources:
193
240
if 'multiline' not in supports:
194
response['reply'] = response['reply'].expandtabs(1) \
195
.replace('\n', conflate == True
196
and u' ' or conflate or u'')
241
line = line.expandtabs(1) \
242
.replace('\n', conflate == True
243
and u' ' or conflate or u'')
245
response['reply'] = self._truncate(line, maxlen)
197
247
filtered.append(response)
199
249
event.responses = filtered