4
+-----------------------------------------------------------------------+
5
| program/include/rcube_spellchecker.php |
7
| This file is part of the Roundcube Webmail client |
8
| Copyright (C) 2011, Kolab Systems AG |
9
| Copyright (C) 2008-2011, The Roundcube Dev Team |
10
| Licensed under the GNU GPL |
13
| Spellchecking using different backends |
15
+-----------------------------------------------------------------------+
16
| Author: Aleksander Machniak <machniak@kolabsys.com> |
17
| Author: Thomas Bruederli <roundcube@gmail.com> |
18
+-----------------------------------------------------------------------+
20
$Id: rcube_spellchecker.php 5181 2011-09-06 13:39:45Z alec $
26
* Helper class for spellchecking with Googielspell and PSpell support.
30
class rcube_spellchecker
32
private $matches = array();
37
private $separator = '/[\s\r\n\t\(\)\/\[\]{}<>\\"]+|[:;?!,\.]([^\w]|$)/';
38
private $options = array();
44
const GOOGLE_HOST = 'ssl://www.google.com';
45
const GOOGLE_PORT = 443;
46
const MAX_SUGGESTIONS = 10;
52
* @param string $lang Language code
54
function __construct($lang = 'en')
56
$this->rc = rcmail::get_instance();
57
$this->engine = $this->rc->config->get('spellcheck_engine', 'googie');
58
$this->lang = $lang ? $lang : 'en';
60
if ($this->engine == 'pspell' && !extension_loaded('pspell')) {
62
'code' => 500, 'type' => 'php',
63
'file' => __FILE__, 'line' => __LINE__,
64
'message' => "Pspell extension not available"), true, true);
67
$this->options = array(
68
'ignore_syms' => $this->rc->config->get('spellcheck_ignore_syms'),
69
'ignore_nums' => $this->rc->config->get('spellcheck_ignore_nums'),
70
'ignore_caps' => $this->rc->config->get('spellcheck_ignore_caps'),
71
'dictionary' => $this->rc->config->get('spellcheck_dictionary'),
77
* Set content and check spelling
79
* @param string $text Text content for spellchecking
80
* @param bool $is_html Enables HTML-to-Text conversion
82
* @return bool True when no mispelling found, otherwise false
84
function check($text, $is_html = false)
86
// convert to plain text
88
$this->content = $this->html2text($text);
91
$this->content = $text;
94
if ($this->engine == 'pspell') {
95
$this->matches = $this->_pspell_check($this->content);
98
$this->matches = $this->_googie_check($this->content);
101
return $this->found() == 0;
106
* Number of mispellings found (after check)
108
* @return int Number of mispellings
112
return count($this->matches);
117
* Returns suggestions for the specified word
119
* @param string $word The word
121
* @return array Suggestions list
123
function get_suggestions($word)
125
if ($this->engine == 'pspell') {
126
return $this->_pspell_suggestions($word);
129
return $this->_googie_suggestions($word);
134
* Returns mispelled words
136
* @param string $text The content for spellchecking. If empty content
137
* used for check() method will be used.
139
* @return array List of mispelled words
141
function get_words($text = null, $is_html=false)
143
if ($this->engine == 'pspell') {
144
return $this->_pspell_words($text, $is_html);
147
return $this->_googie_words($text, $is_html);
152
* Returns checking result in XML (Googiespell) format
154
* @return string XML content
159
$out = '<?xml version="1.0" encoding="'.RCMAIL_CHARSET.'"?><spellresult charschecked="'.mb_strlen($this->content).'">';
161
foreach ($this->matches as $item) {
162
$out .= '<c o="'.$item[1].'" l="'.$item[2].'">';
163
$out .= is_array($item[4]) ? implode("\t", $item[4]) : $item[4];
167
$out .= '</spellresult>';
174
* Returns checking result (mispelled words with suggestions)
176
* @return array Spellchecking result. An array indexed by word.
182
foreach ($this->matches as $item) {
183
if ($this->engine == 'pspell') {
187
$word = mb_substr($this->content, $item[1], $item[2], RCMAIL_CHARSET);
189
$result[$word] = is_array($item[4]) ? implode("\t", $item[4]) : $item[4];
197
* Returns error message
199
* @return string Error message
208
* Checks the text using pspell
210
* @param string $text Text content for spellchecking
212
private function _pspell_check($text)
215
$this->_pspell_init();
222
$text = preg_split($this->separator, $text, NULL, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE);
227
foreach ($text as $w) {
229
$pos = $w[1] - $diff;
230
$len = mb_strlen($word);
233
if ($this->is_exception($word)) {
235
else if (!pspell_check($this->plink, $word)) {
236
$suggestions = pspell_suggest($this->plink, $word);
238
if (sizeof($suggestions) > self::MAX_SUGGESTIONS)
239
$suggestions = array_slice($suggestions, 0, self::MAX_SUGGESTIONS);
241
$matches[] = array($word, $pos, $len, null, $suggestions);
244
$diff += (strlen($word) - $len);
252
* Returns the mispelled words
254
private function _pspell_words($text = null, $is_html=false)
260
$this->_pspell_init();
266
// With PSpell we don't need to get suggestions to return mispelled words
268
$text = $this->html2text($text);
271
$text = preg_split($this->separator, $text, NULL, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE);
273
foreach ($text as $w) {
277
if ($this->is_exception($word)) {
281
if (!pspell_check($this->plink, $word)) {
289
foreach ($this->matches as $m) {
298
* Returns suggestions for mispelled word
300
private function _pspell_suggestions($word)
303
$this->_pspell_init();
309
$suggestions = pspell_suggest($this->plink, $word);
311
if (sizeof($suggestions) > self::MAX_SUGGESTIONS)
312
$suggestions = array_slice($suggestions, 0, self::MAX_SUGGESTIONS);
314
return is_array($suggestions) ? $suggestions : array();
319
* Initializes PSpell dictionary
321
private function _pspell_init()
324
$this->plink = pspell_new($this->lang, null, null, RCMAIL_CHARSET, PSPELL_FAST);
328
$this->error = "Unable to load Pspell engine for selected language";
333
private function _googie_check($text)
335
// spell check uri is configured
336
$url = $this->rc->config->get('spellcheck_uri');
339
$a_uri = parse_url($url);
340
$ssl = ($a_uri['scheme'] == 'https' || $a_uri['scheme'] == 'ssl');
341
$port = $a_uri['port'] ? $a_uri['port'] : ($ssl ? 443 : 80);
342
$host = ($ssl ? 'ssl://' : '') . $a_uri['host'];
343
$path = $a_uri['path'] . ($a_uri['query'] ? '?'.$a_uri['query'] : '') . $this->lang;
346
$host = self::GOOGLE_HOST;
347
$port = self::GOOGLE_PORT;
348
$path = '/tbproxy/spell?lang=' . $this->lang;
351
// Google has some problem with spaces, use \n instead
352
$gtext = str_replace(' ', "\n", $text);
354
$gtext = '<?xml version="1.0" encoding="utf-8" ?>'
355
.'<spellrequest textalreadyclipped="0" ignoredups="0" ignoredigits="1" ignoreallcaps="1">'
356
.'<text>' . $gtext . '</text>'
360
if ($fp = fsockopen($host, $port, $errno, $errstr, 30)) {
361
$out = "POST $path HTTP/1.0\r\n";
362
$out .= "Host: " . str_replace('ssl://', '', $host) . "\r\n";
363
$out .= "Content-Length: " . strlen($gtext) . "\r\n";
364
$out .= "Content-Type: application/x-www-form-urlencoded\r\n";
365
$out .= "Connection: Close\r\n\r\n";
370
$store .= fgets($fp, 128);
375
$this->error = "Empty result from spelling engine";
378
preg_match_all('/<c o="([^"]*)" l="([^"]*)" s="([^"]*)">([^<]*)<\/c>/', $store, $matches, PREG_SET_ORDER);
380
// skip exceptions (if appropriate options are enabled)
381
if (!empty($this->options['ignore_syms']) || !empty($this->options['ignore_nums'])
382
|| !empty($this->options['ignore_caps']) || !empty($this->options['dictionary'])
384
foreach ($matches as $idx => $m) {
385
$word = mb_substr($text, $m[1], $m[2], RCMAIL_CHARSET);
387
if ($this->is_exception($word)) {
388
unset($matches[$idx]);
397
private function _googie_words($text = null, $is_html=false)
401
$text = $this->html2text($text);
404
$matches = $this->_googie_check($text);
407
$matches = $this->matches;
408
$text = $this->content;
413
foreach ($matches as $m) {
414
$result[] = mb_substr($text, $m[1], $m[2], RCMAIL_CHARSET);
421
private function _googie_suggestions($word)
424
$matches = $this->_googie_check($word);
427
$matches = $this->matches;
430
if ($matches[0][4]) {
431
$suggestions = explode("\t", $matches[0][4]);
432
if (sizeof($suggestions) > self::MAX_SUGGESTIONS) {
433
$suggestions = array_slice($suggestions, 0, MAX_SUGGESTIONS);
443
private function html2text($text)
445
$h2t = new html2text($text, false, true, 0);
446
return $h2t->get_text();
451
* Check if the specified word is an exception accoring to
452
* spellcheck options.
454
* @param string $word The word
456
* @return bool True if the word is an exception, False otherwise
458
public function is_exception($word)
460
// Contain only symbols (e.g. "+9,0", "2:2")
461
if (!$word || preg_match('/^[0-9@#$%^&_+~*=:;?!,.-]+$/', $word))
464
// Contain symbols (e.g. "g@@gle"), all symbols excluding separators
465
if (!empty($this->options['ignore_syms']) && preg_match('/[@#$%^&_+~*=-]/', $word))
468
// Contain numbers (e.g. "g00g13")
469
if (!empty($this->options['ignore_nums']) && preg_match('/[0-9]/', $word))
472
// Blocked caps (e.g. "GOOGLE")
473
if (!empty($this->options['ignore_caps']) && $word == mb_strtoupper($word))
476
// Use exceptions from dictionary
477
if (!empty($this->options['dictionary'])) {
480
// @TODO: should dictionary be case-insensitive?
481
if (!empty($this->dict) && in_array($word, $this->dict))
490
* Add a word to dictionary
492
* @param string $word The word to add
494
public function add_word($word)
498
foreach (explode(' ', $word) as $word) {
500
if (strlen($word) < 512) {
501
$this->dict[] = $word;
507
$this->dict = array_unique($this->dict);
508
$this->update_dict();
514
* Remove a word from dictionary
516
* @param string $word The word to remove
518
public function remove_word($word)
522
if (($key = array_search($word, $this->dict)) !== false) {
523
unset($this->dict[$key]);
524
$this->update_dict();
530
* Update dictionary row in DB
532
private function update_dict()
534
if (strcasecmp($this->options['dictionary'], 'shared') != 0) {
535
$userid = (int) $this->rc->user->ID;
538
$plugin = $this->rc->plugins->exec_hook('spell_dictionary_save', array(
539
'userid' => $userid, 'language' => $this->lang, 'dictionary' => $this->dict));
541
if (!empty($plugin['abort'])) {
545
if ($this->have_dict) {
546
if (!empty($this->dict)) {
547
$this->rc->db->query(
548
"UPDATE ".get_table_name('dictionary')
550
." WHERE user_id " . ($plugin['userid'] ? "= ".$plugin['userid'] : "IS NULL")
551
." AND " . $this->rc->db->quoteIdentifier('language') . " = ?",
552
implode(' ', $plugin['dictionary']), $plugin['language']);
554
// don't store empty dict
556
$this->rc->db->query(
557
"DELETE FROM " . get_table_name('dictionary')
558
." WHERE user_id " . ($plugin['userid'] ? "= ".$plugin['userid'] : "IS NULL")
559
." AND " . $this->rc->db->quoteIdentifier('language') . " = ?",
560
$plugin['language']);
563
else if (!empty($this->dict)) {
564
$this->rc->db->query(
565
"INSERT INTO " .get_table_name('dictionary')
566
." (user_id, " . $this->rc->db->quoteIdentifier('language') . ", data) VALUES (?, ?, ?)",
567
$plugin['userid'], $plugin['language'], implode(' ', $plugin['dictionary']));
573
* Get dictionary from DB
575
private function load_dict()
577
if (is_array($this->dict)) {
581
if (strcasecmp($this->options['dictionary'], 'shared') != 0) {
582
$userid = (int) $this->rc->user->ID;
585
$plugin = $this->rc->plugins->exec_hook('spell_dictionary_get', array(
586
'userid' => $userid, 'language' => $this->lang, 'dictionary' => array()));
588
if (empty($plugin['abort'])) {
590
$this->rc->db->query(
591
"SELECT data FROM ".get_table_name('dictionary')
592
." WHERE user_id ". ($plugin['userid'] ? "= ".$plugin['userid'] : "IS NULL")
593
." AND " . $this->rc->db->quoteIdentifier('language') . " = ?",
594
$plugin['language']);
596
if ($sql_arr = $this->rc->db->fetch_assoc($sql_result)) {
597
$this->have_dict = true;
598
if (!empty($sql_arr['data'])) {
599
$dict = explode(' ', $sql_arr['data']);
603
$plugin['dictionary'] = array_merge((array)$plugin['dictionary'], $dict);
606
if (!empty($plugin['dictionary']) && is_array($plugin['dictionary'])) {
607
$this->dict = $plugin['dictionary'];
610
$this->dict = array();