~max-rabkin/ibid/unihan-rescraped

« back to all changes in this revision

Viewing changes to factpacks/knab-verbs.py

  • Committer: Michael Gorven
  • Date: 2009-12-11 16:22:11 UTC
  • mto: This revision was merged to the branch mainline in revision 807.
  • Revision ID: michael@gorven.za.net-20091211162211-3pyc15fqulzl1prb
Import Knab's verbs.knb.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
#!/usr/bin/env python
 
2
 
 
3
import re
 
4
from collections import defaultdict
 
5
from sys import argv
 
6
 
 
7
try:
 
8
        import json
 
9
except ImportError:
 
10
        import simplejson as json
 
11
 
 
12
def default():
 
13
        return ([], [])
 
14
factoids = defaultdict(default)
 
15
 
 
16
for line in open(argv[1]):
 
17
        line = line.strip()
 
18
 
 
19
        match = re.match(r'^verb (.+) (\d+)$', line)
 
20
        if match:
 
21
                name, id = match.groups()
 
22
                factoids[int(id)][0].append(name)
 
23
                continue
 
24
 
 
25
        match = re.match(r'^(\d+) (action|reply) (.+)$', line)
 
26
        if match:
 
27
                id, action, value = match.groups()
 
28
                factoids[int(id)][1].append('<%s> %s' % (action, value.replace('##', '$1')))
 
29
 
 
30
for names, values in factoids.values():
 
31
        for value in values:
 
32
                if '$1' in value:
 
33
                        for index, name in enumerate(names):
 
34
                                names[index] = name + ' $arg'
 
35
                        break
 
36
 
 
37
print json.dumps(factoids.values(), indent=1)