1
/* vim:set et ts=4 sts=4:
3
* ibus-pinyin - The Chinese PinYin engine for IBus
5
* Copyright (c) 2008-2010 Peng Huang <shawn.p.huang@gmail.com>
7
* This program is free software; you can redistribute it and/or modify
8
* it under the terms of the GNU General Public License as published by
9
* the Free Software Foundation; either version 2, or (at your option)
12
* This program is distributed in the hope that it will be useful,
13
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
* GNU General Public License for more details.
17
* You should have received a copy of the GNU General Public License
18
* along with this program; if not, write to the Free Software
19
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24
#include "PinyinArray.h"
28
#define DB_CACHE_SIZE "5000"
29
#define DB_INDEX_SIZE (3)
31
#define DB_COLUMN_USER_FREQ (0)
32
#define DB_COLUMN_PHRASE (1)
33
#define DB_COLUMN_FREQ (2)
34
#define DB_COLUMN_S0 (3)
36
#define DB_PREFETCH_LEN (6)
38
boost::scoped_ptr<Database> Database::m_instance;
40
class Conditions : public std::vector<std::string> {
42
Conditions (void) : std::vector<std::string> (1) {}
54
const std::string & value = std::vector<std::string>::at (--i);
60
void appendVPrintf (gint begin, gint end, const gchar *fmt, va_list args) {
62
g_vsnprintf (str, sizeof(str), fmt, args);
63
for (gint i = begin; i < end; i++) {
68
void appendPrintf (gint begin, gint end, const gchar *fmt, ...) {
71
appendVPrintf (begin, end, fmt, args);
79
: m_db (db), m_stmt (NULL) {
80
g_assert (m_db != NULL);
85
if (sqlite3_finalize (m_stmt) != SQLITE_OK) {
86
g_warning ("destroy sqlite stmt failed!");
91
gboolean prepare (const String &sql) {
92
if (sqlite3_prepare (m_db,
97
g_warning ("parse sql failed!\n %s", sql.c_str ());
104
gboolean step (void) {
105
switch (sqlite3_step (m_stmt)) {
111
g_warning ("sqlites step error!");
116
const gchar *columnText (guint col) {
117
return (const gchar *) sqlite3_column_text (m_stmt, col);
120
gint columnInt (guint col) {
121
return sqlite3_column_int (m_stmt, col);
126
sqlite3_stmt *m_stmt;
129
Query::Query (const PinyinArray & pinyin,
134
m_pinyin_begin (pinyin_begin),
135
m_pinyin_len (pinyin_len),
138
g_assert (m_pinyin.size () >= pinyin_begin + pinyin_len);
146
Query::fill (PhraseArray &phrases, gint count)
150
while (m_pinyin_len > 0) {
151
if (G_LIKELY (m_stmt.get () == NULL)) {
152
m_stmt = Database::instance ().query (m_pinyin, m_pinyin_begin, m_pinyin_len, -1, m_option);
153
g_assert (m_stmt.get () != NULL);
156
while (m_stmt->step ()) {
159
g_strlcpy (phrase.phrase,
160
m_stmt->columnText (DB_COLUMN_PHRASE),
161
sizeof (phrase.phrase));
162
phrase.freq = m_stmt->columnInt (DB_COLUMN_FREQ);
163
phrase.user_freq = m_stmt->columnInt (DB_COLUMN_USER_FREQ);
164
phrase.len = m_pinyin_len;
166
for (guint i = 0, column = DB_COLUMN_S0; i < m_pinyin_len; i++) {
167
phrase.pinyin_id[i].sheng = m_stmt->columnInt (column++);
168
phrase.pinyin_id[i].yun = m_stmt->columnInt (column++);
171
phrases.push_back (phrase);
173
if (G_UNLIKELY (row == count)) {
185
Database::Database (void)
191
Database::~Database (void)
194
if (sqlite3_close (m_db) != SQLITE_OK) {
195
g_warning ("close sqlite database failed!");
201
Database::executeSQL (const gchar *sql)
204
if (sqlite3_exec (m_db, sql, NULL, NULL, &errmsg) != SQLITE_OK) {
205
g_warning ("%s: %s", errmsg, sql);
206
sqlite3_free (errmsg);
213
Database::open (void)
217
#if (SQLITE_VERSION_NUMBER >= 3006000)
218
sqlite3_initialize ();
221
static const gchar * maindb [] = {
222
PKGDATADIR"/db/local.db",
223
PKGDATADIR"/db/open-phrase.db",
224
PKGDATADIR"/db/android.db",
229
for (i = 0; i < G_N_ELEMENTS (maindb); i++) {
230
if (!g_file_test(maindb[i], G_FILE_TEST_IS_REGULAR))
232
if (sqlite3_open_v2 (maindb[i], &m_db,
233
SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE, NULL) == SQLITE_OK) {
234
g_message ("Use database %s", maindb[i]);
239
if (i == G_N_ELEMENTS (maindb)) {
240
g_warning ("can not open main database");
247
/* Set synchronous=OFF, write user database will become much faster.
248
* It will cause user database corrupted, if the operatering system
249
* crashes or computer loses power.
251
m_sql << "PRAGMA synchronous=NORMAL;\n";
254
/* Set the cache size for better performance */
255
m_sql << "PRAGMA cache_size=" DB_CACHE_SIZE ";\n";
257
/* Using memory for temp store */
258
m_sql << "PRAGMA temp_store=MEMORY;\n";
260
/* Set journal mode */
261
m_sql << "PRAGMA journal_mode=PERSIST;\n";
263
/* Using EXCLUSIVE locking mode on databases
264
* for better performance */
265
m_sql << "PRAGMA locking_mode=EXCLUSIVE;\n";
266
if (!executeSQL (m_sql))
269
/* Attach user database */
270
m_buffer = g_get_user_cache_dir ();
271
m_buffer << G_DIR_SEPARATOR_S << "ibus"
272
<< G_DIR_SEPARATOR_S << "pinyin";
273
g_mkdir_with_parents (m_buffer, 0750);
274
m_buffer << G_DIR_SEPARATOR_S << "user-1.3.db";
275
retval = openUserDB (m_buffer);
277
g_warning ("Can not open user database %s", m_buffer.c_str ());
278
if (!openUserDB (":memory:"))
282
/* prefetch some tables */
289
sqlite3_close (m_db);
296
Database::openUserDB (const gchar *userdb)
298
m_sql.printf ("ATTACH DATABASE \"%s\" AS userdb;", userdb);
299
if (!executeSQL (m_sql))
302
m_sql = "BEGIN TRANSACTION;\n";
303
/* create desc table*/
304
m_sql << "CREATE TABLE IF NOT EXISTS userdb.desc (name PRIMARY KEY, value TEXT);\n";
305
m_sql << "INSERT OR IGNORE INTO userdb.desc VALUES " << "('version', '1.2.0');\n"
306
<< "INSERT OR IGNORE INTO userdb.desc VALUES " << "('uuid', '" << UUID () << "');\n"
307
<< "INSERT OR IGNORE INTO userdb.desc VALUES " << "('hostname', '" << Hostname () << "');\n"
308
<< "INSERT OR IGNORE INTO userdb.desc VALUES " << "('username', '" << Env ("USERNAME") << "');\n"
309
<< "INSERT OR IGNORE INTO userdb.desc VALUES " << "('create-time', datetime());\n"
310
<< "INSERT OR IGNORE INTO userdb.desc VALUES " << "('attach-time', datetime());\n";
312
/* create phrase tables */
313
for (guint i = 0; i < MAX_PHRASE_LEN; i++) {
314
m_sql.appendPrintf ("CREATE TABLE IF NOT EXISTS userdb.py_phrase_%d (user_freq, phrase TEXT, freq INTEGER ", i);
315
for (guint j = 0; j <= i; j++)
316
m_sql.appendPrintf (",s%d INTEGER, y%d INTEGER", j, j);
321
m_sql << "CREATE UNIQUE INDEX IF NOT EXISTS " << "userdb.index_0_0 ON py_phrase_0(s0,y0,phrase);\n";
322
m_sql << "CREATE UNIQUE INDEX IF NOT EXISTS " << "userdb.index_1_0 ON py_phrase_1(s0,y0,s1,y1,phrase);\n";
323
m_sql << "CREATE INDEX IF NOT EXISTS " << "userdb.index_1_1 ON py_phrase_1(s0,s1,y1);\n";
324
for (guint i = 2; i < MAX_PHRASE_LEN; i++) {
325
m_sql << "CREATE UNIQUE INDEX IF NOT EXISTS " << "userdb.index_" << i << "_0 ON py_phrase_" << i
327
for (guint j = 1; j <= i; j++)
328
m_sql << ",s" << j << ",y" << j;
329
m_sql << ",phrase);\n";
330
m_sql << "CREATE INDEX IF NOT EXISTS " << "userdb.index_" << i << "_1 ON py_phrase_" << i << "(s0,s1,s2,y2);\n";
334
if (!executeSQL (m_sql))
337
m_sql = "UPDATE userdb.desc SET value=datetime() WHERE name='attach-time';";
339
if (!executeSQL (m_sql))
345
m_sql = "DETACH DATABASE userdb;";
351
Database::prefetch (void)
354
for (guint i = 0; i < DB_PREFETCH_LEN; i++)
355
m_sql << "SELECT * FROM py_phrase_" << i << ";\n";
357
// g_debug ("prefetching ...");
362
inline static gboolean
363
pinyin_option_check_sheng (guint option, gint id, gint fid)
365
switch ((id << 16) | fid) {
366
case (PINYIN_ID_C << 16) | PINYIN_ID_CH:
367
return (option & PINYIN_FUZZY_C_CH);
368
case (PINYIN_ID_CH << 16) | PINYIN_ID_C:
369
return (option & PINYIN_FUZZY_CH_C);
370
case (PINYIN_ID_Z << 16) | PINYIN_ID_ZH:
371
return (option & PINYIN_FUZZY_Z_ZH);
372
case (PINYIN_ID_ZH << 16) | PINYIN_ID_Z:
373
return (option & PINYIN_FUZZY_ZH_Z);
374
case (PINYIN_ID_S << 16) | PINYIN_ID_SH:
375
return (option & PINYIN_FUZZY_S_SH);
376
case (PINYIN_ID_SH << 16) | PINYIN_ID_S:
377
return (option & PINYIN_FUZZY_SH_S);
378
case (PINYIN_ID_L << 16) | PINYIN_ID_N:
379
return (option & PINYIN_FUZZY_L_N);
380
case (PINYIN_ID_N << 16) | PINYIN_ID_L:
381
return (option & PINYIN_FUZZY_N_L);
382
case (PINYIN_ID_F << 16) | PINYIN_ID_H:
383
return (option & PINYIN_FUZZY_F_H);
384
case (PINYIN_ID_H << 16) | PINYIN_ID_F:
385
return (option & PINYIN_FUZZY_H_F);
386
case (PINYIN_ID_L << 16) | PINYIN_ID_R:
387
return (option & PINYIN_FUZZY_L_R);
388
case (PINYIN_ID_R << 16) | PINYIN_ID_L:
389
return (option & PINYIN_FUZZY_R_L);
390
case (PINYIN_ID_K << 16) | PINYIN_ID_G:
391
return (option & PINYIN_FUZZY_K_G);
392
case (PINYIN_ID_G << 16) | PINYIN_ID_K:
393
return (option & PINYIN_FUZZY_G_K);
394
default: return FALSE;
398
inline static gboolean
399
pinyin_option_check_yun (guint option, gint id, gint fid)
401
switch ((id << 16) | fid) {
402
case (PINYIN_ID_AN << 16) | PINYIN_ID_ANG:
403
return (option & PINYIN_FUZZY_AN_ANG);
404
case (PINYIN_ID_ANG << 16) | PINYIN_ID_AN:
405
return (option & PINYIN_FUZZY_ANG_AN);
406
case (PINYIN_ID_EN << 16) | PINYIN_ID_ENG:
407
return (option & PINYIN_FUZZY_EN_ENG);
408
case (PINYIN_ID_ENG << 16) | PINYIN_ID_EN:
409
return (option & PINYIN_FUZZY_ENG_EN);
410
case (PINYIN_ID_IN << 16) | PINYIN_ID_ING:
411
return (option & PINYIN_FUZZY_IN_ING);
412
case (PINYIN_ID_ING << 16) | PINYIN_ID_IN:
413
return (option & PINYIN_FUZZY_ING_IN);
414
case (PINYIN_ID_IAN << 16) | PINYIN_ID_IANG:
415
return (option & PINYIN_FUZZY_IAN_IANG);
416
case (PINYIN_ID_IANG << 16) | PINYIN_ID_IAN:
417
return (option & PINYIN_FUZZY_IANG_IAN);
418
case (PINYIN_ID_UAN << 16) | PINYIN_ID_UANG:
419
return (option & PINYIN_FUZZY_UAN_UANG);
420
case (PINYIN_ID_UANG << 16) | PINYIN_ID_UAN:
421
return (option & PINYIN_FUZZY_UANG_UAN);
422
default: return FALSE;
427
Database::query (const PinyinArray &pinyin,
433
g_assert (pinyin_begin < pinyin.size ());
434
g_assert (pinyin_len <= pinyin.size () - pinyin_begin);
435
g_assert (pinyin_len <= MAX_PHRASE_LEN);
438
Conditions conditions;
440
for (guint i = 0; i < pinyin_len; i++) {
443
p = pinyin[i + pinyin_begin];
445
fs1 = pinyin_option_check_sheng (option, p->pinyin_id[0].sheng, p->pinyin_id[1].sheng);
446
fs2 = pinyin_option_check_sheng (option, p->pinyin_id[0].sheng, p->pinyin_id[2].sheng);
448
if (G_LIKELY (i > 0))
449
conditions.appendPrintf (0, conditions.size (),
452
if (G_UNLIKELY (fs1 || fs2)) {
453
if (G_LIKELY (i < DB_INDEX_SIZE)) {
454
if (fs1 && fs2 == 0) {
455
conditions.double_ ();
456
conditions.appendPrintf (0, conditions.size () >> 1,
457
"s%d=%d", i, p->pinyin_id[0].sheng);
458
conditions.appendPrintf (conditions.size () >> 1, conditions.size (),
459
"s%d=%d", i, p->pinyin_id[1].sheng);
461
else if (fs1 == 0 && fs2) {
462
conditions.double_ ();
463
conditions.appendPrintf (0, conditions.size () >> 1,
464
"s%d=%d", i, p->pinyin_id[0].sheng);
465
conditions.appendPrintf (conditions.size () >> 1, conditions.size (),
466
"s%d=%d", i, p->pinyin_id[2].sheng);
469
gint len = conditions.size ();
470
conditions.triple ();
471
conditions.appendPrintf (0, len,
472
"s%d=%d", i, p->pinyin_id[0].sheng);
473
conditions.appendPrintf (len, len << 1,
474
"s%d=%d", i, p->pinyin_id[1].sheng);
475
conditions.appendPrintf (len << 1, conditions.size (),
476
"s%d=%d", i, p->pinyin_id[2].sheng);
480
if (fs1 && fs2 == 0) {
481
conditions.appendPrintf (0, conditions.size (),
482
"s%d IN (%d,%d)", i, p->pinyin_id[0].sheng, p->pinyin_id[1].sheng);
484
else if (fs1 == 0 && fs2) {
485
conditions.appendPrintf (0, conditions.size (),
486
"s%d IN (%d,%d)", i, p->pinyin_id[0].sheng, p->pinyin_id[2].sheng);
489
conditions.appendPrintf (0, conditions.size (),
490
"s%d IN (%d,%d,%d)", i, p->pinyin_id[0].sheng, p->pinyin_id[1].sheng, p->pinyin_id[2].sheng);
495
conditions.appendPrintf (0, conditions.size (),
496
"s%d=%d", i, p->pinyin_id[0].sheng);
499
if (p->pinyin_id[0].yun != PINYIN_ID_ZERO) {
500
if (pinyin_option_check_yun (option, p->pinyin_id[0].yun, p->pinyin_id[1].yun)) {
501
if (G_LIKELY (i < DB_INDEX_SIZE)) {
502
conditions.double_ ();
503
conditions.appendPrintf (0, conditions.size () >> 1,
504
" AND y%d=%d", i, p->pinyin_id[0].yun);
505
conditions.appendPrintf (conditions.size () >> 1, conditions.size (),
506
" and y%d=%d", i, p->pinyin_id[1].yun);
509
conditions.appendPrintf (0, conditions.size (),
510
" AND y%d IN (%d,%d)", i, p->pinyin_id[0].yun, p->pinyin_id[1].yun);
514
conditions.appendPrintf (0, conditions.size (),
515
" AND y%d=%d", i, p->pinyin_id[0].yun);
522
for (guint i = 0; i < conditions.size (); i++) {
523
if (G_UNLIKELY (i == 0))
524
m_buffer << " (" << conditions[i] << ")\n";
526
m_buffer << " OR (" << conditions[i] << ")\n";
530
gint id = pinyin_len - 1;
531
m_sql << "SELECT * FROM ("
532
"SELECT 0 AS user_freq, * FROM main.py_phrase_" << id << " WHERE " << m_buffer << " UNION ALL "
533
"SELECT * FROM userdb.py_phrase_" << id << " WHERE " << m_buffer << ") "
534
"GROUP BY phrase ORDER BY user_freq DESC, freq DESC";
536
m_sql << " LIMIT " << m;
538
g_debug ("sql =\n%s", m_sql.c_str ());
542
SQLStmtPtr stmt (new SQLStmt (m_db));
544
if (!stmt->prepare (m_sql)) {
552
Database::phraseWhereSql (const Phrase & p, String & sql)
555
sql << " s0=" << p.pinyin_id[0].sheng
556
<< " AND y0=" << p.pinyin_id[0].yun;
557
for (guint i = 1; i < p.len; i++) {
558
sql << " AND s" << i << '=' << p.pinyin_id[i].sheng
559
<< " AND y" << i << '=' << p.pinyin_id[i].yun;
561
sql << " AND phrase=\"" << p.phrase << "\"";
566
Database::phraseSql (const Phrase & p, String & sql)
568
sql << "INSERT OR IGNORE INTO userdb.py_phrase_" << p.len - 1
569
<< " VALUES(" << 0 /* user_freq */
570
<< ",\"" << p.phrase << '"' /* phrase */
571
<< ',' << p.freq; /* freq */
573
for (guint i = 0; i < p.len; i++) {
574
sql << ',' << p.pinyin_id[i].sheng << ',' << p.pinyin_id[i].yun;
579
sql << "UPDATE userdb.py_phrase_" << p.len - 1
580
<< " SET user_freq=user_freq+1";
582
phraseWhereSql (p, sql);
587
Database::commit (const PhraseArray &phrases)
589
Phrase phrase = {""};
591
m_sql = "BEGIN TRANSACTION;\n";
592
for (guint i = 0; i < phrases.size (); i++) {
593
phrase += phrases[i];
594
phraseSql (phrases[i], m_sql);
596
if (phrases.size () > 1)
597
phraseSql (phrase, m_sql);
598
m_sql << "COMMIT;\n";
604
Database::remove (const Phrase & phrase)
606
m_sql = "BEGIN TRANSACTION;\n";
607
m_sql << "DELETE FROM userdb.py_phrase_" << phrase.len - 1;
608
phraseWhereSql (phrase, m_sql);
610
m_sql << "COMMIT;\n";
616
Database::init (void)
618
if (m_instance == NULL) {
619
m_instance.reset (new Database ());