2
# -*- coding: utf-8 -*-
4
BASEDIR = ARGV.length > 0 ? ARGV[0] : "."
7
{ :part => "noun", :path => "#{BASEDIR}/data.noun" },
8
{ :part => "verb", :path => "#{BASEDIR}/data.verb" },
9
{ :part => "adj", :path => "#{BASEDIR}/data.adj" },
10
{ :part => "adv", :path => "#{BASEDIR}/data.adv" },
12
OUTFILE = 'wordnet.tsv'
14
if !File::directory?(BASEDIR)
15
printf("%s is not a directory\n", BASEDIR)
20
File::open(OUTFILE, "w") do |outfile|
21
DATAFILES.each do |info|
24
File::open(path) do |infile|
26
line.force_encoding('UTF-8')
27
next if line.start_with?(" ")
29
head = line.sub(/ *\|.*/, "")
30
head = head.sub(/ *\@.*/, "")
31
fields = head.split(" ")
32
next if fields.length < 4
33
pivot = fields[3].hex * 2
34
next if pivot + 4 > fields.length
35
fields = fields[4..3+pivot]
37
for i in (0...(fields.length))
38
faces.push(fields[i]) if i % 2 == 0
40
text = line.sub(/.*\| */, "")
42
face = face.gsub(/_/, " ")
43
face = face.gsub(/\s+/, " ")
46
printf(outfile, "%s\t%d\t%s\t%s\t%s\n", key, seq, face, part, text)
47
printf("%s: %d records done\n", $0, seq) if seq % 1000 == 0