1
#!/usr/local/bin/ruby -Ke
2
## Copyright (C) 2005 MITA Yuusuke <clefs@mail.goo.ne.jp>
4
## Author: MITA Yuusuke <clefs@mail.goo.ne.jp>
5
## Maintainer: SKK Development Team <skk@ring.gr.jp>
6
## Version: $Id: skkdictools.rb,v 1.7 2006/01/04 10:35:06 skk-cvs Exp $
7
## Keywords: japanese, dictionary
8
## Last Modified: $Date: 2006/01/04 10:35:06 $
10
## This program is free software; you can redistribute it and/or modify
11
## it under the terms of the GNU General Public License as published by
12
## the Free Software Foundation; either version 2, or (at your option)
15
## This program is distributed in the hope that it will be useful,
16
## but WITHOUT ANY WARRANTY; without even the implied warranty of
17
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18
## General Public License for more details.
20
## You should have received a copy of the GNU General Public License
21
## along with this program, see the file COPYING. If not, write to the
22
## Free Software Foundation Inc., 51 Franklin St, Fifth Floor, Boston,
23
## MA 02110-1301, USA.
26
## Based on registdic.cgi and skkform.rb by Mikio NAKAJIMA.
27
## (Prolly we'd better integrate skkdictools.rb and skkform.rb into one?)
31
## A library packed with small gadgets useful to handle skk dictionary.
33
## Most of scripts under tools/filters and some under tools/convert2skk
34
## require this file to be installed in one of the ruby loadpaths.
46
# ["��" , "c"] (eg. �֤ˤ������ /�������/�ע��֤ˤ�c /��/��)
47
# ["��" , "p"] (eg. �֤ĤäѤ� /�ͤäѤ�/�ע��֤�p /��/��)
48
# ["��" , "c"] (eg. �֤����ä��� /��ä���/�ע��֤���c /��/��)
49
GyakuhikiOkurigana = [
51
["��" , "b"], ["��" , "b"], ["��" , "b"], ["��" , "b"], ["��" , "b"],
52
["��" , "d"], ["��" , "d"], ["��" , "d"], ["��" , "d"], ["��" , "d"],
54
["��" , "g"], ["��" , "g"], ["��" , "g"], ["��" , "g"], ["��" , "g"],
55
["��" , "h"], ["��" , "h"], ["��" , "h"], ["��" , "h"], ["��" , "h"],
58
["��" , "k"], ["��" , "k"], ["��" , "k"], ["��" , "k"], ["��" , "k"],
59
["��" , "m"], ["��" , "m"], ["��" , "m"], ["��" , "m"], ["��" , "m"],
60
["��" , "n"], ["��" , "n"], ["��" , "n"], ["��" , "n"], ["��" , "n"], ["��" , "n"],
62
["��" , "p"], ["��" , "p"], ["��" , "p"], ["��" , "p"], ["��" , "p"],
63
["��" , "r"], ["��" , "r"], ["��" , "r"], ["��" , "r"], ["��" , "r"],
64
["��" , "s"], ["��" , "s"], ["��" , "s"], ["��" , "s"], ["��" , "s"],
65
["��" , "t"], ["��" , "t"], ["��" , "t"], ["��" , "t"], ["��" , "t"], ["��" , "t"],
67
["��" , "w"], ["��" , "w"],
68
["��" , "x"], ["��" , "x"], ["��" , "x"], ["��" , "x"], ["��" , "x"],
69
["��" , "x"], ["��" , "x"], ["��" , "x"], ["��" , "x"], ["��" , "x"], ["��" , "x"],
70
["��" , "y"], ["��" , "y"], ["��" , "y"], ["��" , "y"], ["��" , "y"], ["��" , "y"],
71
["��" , "z"], ["��" , "z"], ["��" , "z"], ["��" , "z"], ["��" , "z"]
74
# ("�����䤱", "ī�Ƥ�") => ("������k", "ī��", "��")
76
def okuri_nasi_to_ari(midasi, candidate)
77
return nil if (/(.*[^��-��])([��-��]+)$/ !~ candidate)
80
return nil if !(can_prefix && can_postfix && (/(.+)#{can_postfix}$/ =~ midasi))
82
key_kana_postfix = GyakuhikiOkurigana.assoc(can_postfix.split('')[0])
83
return nil if key_kana_postfix.empty?
85
okuri = key_kana_postfix[1]
86
# handle some exceptions
87
# XXX inplace of changing "t" into "c", this function should return
89
okuri = "c" if can_postfix =~ /^�ä�/ || can_postfix =~ /^��[����]/
90
okuri = "p" if can_postfix =~ /^��[��-��]/
91
okuri = "k" if can_postfix =~ /^��[��-��]/
93
return key_prefix + okuri, can_prefix, can_postfix
96
def print_pair(key, candidate, annotation = nil, comment = nil)
97
if !annotation.nil? && !annotation.empty?
98
if comment.nil? || comment.empty?
99
print "#{key} /#{candidate};#{annotation}/\n"
101
print "#{key} /#{candidate};#{annotation}��#{comment}/\n"
104
if comment.nil? || comment.empty?
105
print "#{key} /#{candidate}/\n"
107
print "#{key} /#{candidate};��#{comment}/\n"
112
# borrowed from skkform.rb
115
self.gsub(/����/, '\\1��').tr('��-��', '��-��')
119
self.gsub(/��/, '����').tr('��-��', '��-��')
122
def cut_off_prefix_postfix
123
self.sub(/^[<>\?]([����-��]+)$/, '\1').sub(/^([����-��]+)[<>\?]$/, '\1')
126
# from �֥��֥������Ȼظ�������ץȸ���ruby��p121
127
def csv_split(delimiter = ',')
130
self.split(delimiter).each do |d|
134
data += delimiter + d
137
if /[^"]"$/ =~ data or '""' == data
138
csv << data.sub(/^"(.*)"$/, '\1').gsub(/""/, '"')
146
raise "cannot decode CSV\n" unless data.empty?
151
self.gsub(/"/, '\\"').sub(/.*,.*/, '"\&"')
155
self.sub(/^\"(.+)\"$/, '\1')
159
#if !(/^([-\w]+.*)$/ =~ self) && (/[;\r\n\/\"]/ =~ self)
160
if /[;\r\n\/\"]/ =~ self
161
tmp = self.gsub(/;/, '\\\073').gsub(/\//, '\\\057').gsub(/\r/, '\\r').gsub(/\n/, '\\n').gsub(/"/, '\\"')
162
return '(concat "' + tmp + '")'
168
# 09/30/04 => 04/09/30
170
self.sub(/^([0-9]*)\/([0-9]*)\/([0-9]*)/, '\3/\1/\2')
174
midasi, rest = self.chop.split(" /", 2)
175
tokens = rest.sub(/\/\[.*/, "").split("/") if !rest.nil?
176
return midasi, tokens
179
def skk_split_tokens(delimiter = '��')
180
word, annotation = self.split(";", 2)
181
return word, nil, nil if annotation.nil?
182
return word, annotation, nil if delimiter.nil?
183
annotation, comment = annotation.split(delimiter, 2)
184
return word, annotation, comment
191
key = Kconv.toeuc(key)
192
key = CGI::escape(key)
196
sock = TCPSocket.open("search.goo.ne.jp", 80)
197
sock.printf("GET /web.jsp?DC=1&MT=\"%s\" HTTP/1.0\r\n\r\n", key)
198
sock.readlines.each do |line|
199
line = Kconv.toeuc(line)
200
if (/<b>��*([,0-9]+)<\/b>����/ =~ line)
201
hits = $1.gsub(/,/, '').to_i
202
return hits if hits > 0
203
# 0hits, or system error?
204
elsif (/<br>�������븡����̤���������ޤ���/ =~ line)
205
#elsif (/<br>���������ƥफ�鸡����̤�����˼����Ǥ��ʤ��ä���ǽ��������ޤ���/ =~ line) next
211
end while tries < 3 && hits < 1