/* * Copyright 2012 Canonical Ltd. * * This file is part of u1db. * * u1db is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License version 3 * as published by the Free Software Foundation. * * u1db is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with u1db. If not, see . */ #include "u1db/u1db_internal.h" #include #include #include #include #include #define NO_GLOB 0 #define IS_GLOB 1 #define ENDS_IN_GLOB 2 #define EXPRESSION 1 #define INTEGER 2 #define OPS (sizeof(OPERATIONS) / sizeof (struct operation)) #define MAX_INT_STR_LEN 21 #ifndef max #define max(a, b) (((a) > (b)) ? (a) : (b)) #endif typedef struct string_list_item_ { char *data; struct string_list_item_ *next; } string_list_item; typedef struct string_list_ { string_list_item *head; string_list_item *tail; string_list_item *pos; } string_list; /* static void print_list(string_list *list) { string_list_item *item = NULL; printf("["); for (item = list->head; item != NULL; item = item->next) printf("'%s',", item->data); printf("]\n"); } */ static int init_list(string_list **list) { *list = (string_list *)calloc(1, sizeof(string_list)); if (*list == NULL) return U1DB_NOMEM; return U1DB_OK; } static void destroy_list(string_list *list) { string_list_item *item = NULL; string_list_item *previous = NULL; if (list == NULL) return; item = list->head; while (item != NULL) { previous = item; item = item->next; if (previous->data != NULL) free(previous->data); free(previous); } list->head = NULL; list->tail = NULL; free(list); list = NULL; } static int append(string_list *list, const char *data) { string_list_item *new_item = NULL; new_item = (string_list_item *)calloc(1, sizeof(string_list_item)); if (new_item == NULL) return U1DB_NOMEM; new_item->data = strdup(data); if (new_item->data == NULL) return U1DB_NOMEM; if (list->head == NULL) { list->head = new_item; list->pos = new_item; } if (list->tail != NULL) { list->tail->next = new_item; } list->tail = new_item; return U1DB_OK; } static int appendn(string_list *list, const char *data, int size) { string_list_item *new_item = NULL; new_item = (string_list_item *)calloc(1, sizeof(string_list_item)); if (new_item == NULL) return U1DB_NOMEM; new_item->data = strndup(data, size); if (new_item->data == NULL) return U1DB_NOMEM; if (list->head == NULL) { list->head = new_item; list->pos = new_item; } if (list->tail != NULL) { list->tail->next = new_item; } list->tail = new_item; return U1DB_OK; } typedef struct parse_tree_ { char *data; string_list *field_path; int arg_type; void *op; int arity; int number_of_children; struct parse_tree_ *first_child; struct parse_tree_ *last_child; struct parse_tree_ *next_sibling; const int *value_types; } parse_tree; /* static void print_tree(parse_tree *tree) { parse_tree *sub; if (tree->data) printf("data: '%s'\n", tree->data); if (tree->field_path->head) { printf("field_path: "); print_list(tree->field_path); } if (tree->arg_type) printf("arg_type: %d\n", tree->arg_type); if (tree->arity) printf("arity: %d\n", tree->arity); if (tree->number_of_children) printf("number_of_children: %d\n", tree->number_of_children); printf("("); for (sub = tree->first_child; sub != NULL; sub = sub->next_sibling) print_tree(sub); printf(")\n"); } */ static int init_parse_tree(parse_tree **result) { int status = U1DB_OK; *result = (parse_tree *)calloc(1, sizeof(parse_tree)); if (*result == NULL) return U1DB_NOMEM; (*result)->number_of_children = 0; status = init_list(&(*result)->field_path); return status; } static void destroy_parse_tree(parse_tree *t) { parse_tree *subtree = NULL; parse_tree *previous = NULL; if (t == NULL) return; if (t->field_path != NULL) destroy_list(t->field_path); if (t->data != NULL) free(t->data); subtree = t->first_child; while (subtree != NULL) { previous = subtree; subtree = subtree->next_sibling; destroy_parse_tree(previous); } free(t); } static int append_node(parse_tree *t, parse_tree *node) { if (t->first_child == NULL) t->first_child = node; if (t->last_child != NULL) t->last_child->next_sibling = node; t->last_child = node; (t->number_of_children)++; return U1DB_OK; } static int parse_op(string_list *tokens, char *term, parse_tree *result); static int parse_term(string_list *tokens, parse_tree *result); typedef int(*op_function)(parse_tree *, json_object *, string_list *); static int op_lower(parse_tree *tree, json_object *obj, string_list *result); static int op_number(parse_tree *tree, json_object *obj, string_list *result); static int op_split_words( parse_tree *tree, json_object *obj, string_list *result); static int op_bool(parse_tree *tree, json_object *obj, string_list *result); static int op_combine(parse_tree *tree, json_object *obj, string_list *result); static const int JUST_EXPRESSION[1] = {EXPRESSION}; static const int EXPRESSION_INTEGER[2] = {EXPRESSION, INTEGER}; struct operation { void *function; char *name; int value_type; int arity; const int *value_types; } OPERATIONS[] = { { op_lower, "lower", json_type_string, 1, JUST_EXPRESSION }, { op_number, "number", json_type_int, 2, EXPRESSION_INTEGER }, { op_split_words, "split_words", json_type_string, 1, JUST_EXPRESSION }, { op_bool, "bool", json_type_boolean, 1, JUST_EXPRESSION }, { op_combine, "combine", json_type_string, -1, JUST_EXPRESSION }, }; static int extract_value(json_object *val, int value_type, string_list *values) { int status = U1DB_OK; int i, integer_value, boolean_value, length; char string_value[MAX_INT_STR_LEN]; if (json_object_is_type(val, json_type_string) && value_type == json_type_string) { status = append(values, json_object_get_string(val)); goto finish; } if (json_object_is_type(val, json_type_int) && value_type == json_type_int) { integer_value = json_object_get_int(val); snprintf(string_value, MAX_INT_STR_LEN, "%d", integer_value); status = append(values, string_value); goto finish; } if (json_object_is_type(val, json_type_boolean) && value_type == json_type_boolean) { boolean_value = json_object_get_boolean(val); if (boolean_value) { status = append(values, "1"); } else { status = append(values, "0"); } goto finish; } if (json_object_is_type(val, json_type_array)) { length = json_object_array_length(val); for (i = 0; i < length; i++) { status = extract_value( json_object_array_get_idx(val, i), value_type, values); if (status != U1DB_OK) goto finish; } } finish: return status; } static int extract_field_values(json_object *obj, const string_list_item *field, int value_type, string_list *values) { json_object *val = NULL; json_object *array_item = NULL; int i, length; int status = U1DB_OK; if (!json_object_is_type(obj, json_type_object)) { goto finish; } val = json_object_object_get(obj, field->data); if (val == NULL) goto finish; if (field->next != NULL) { if (json_object_is_type(val, json_type_array)) { length = json_object_array_length(val); for (i = 0; i < length; i++) { array_item = json_object_array_get_idx(val, i); status = extract_field_values( array_item, field->next, value_type, values); if (status != U1DB_OK) goto finish; } goto finish; } if (json_object_is_type(val, json_type_object)) { status = extract_field_values(val, field->next, value_type, values); goto finish; } goto finish; } status = extract_value(val, value_type, values); finish: return status; } static int split(string_list *result, char *string, char splitter) { int status = U1DB_OK; char *result_ptr = NULL, *split_point = NULL; result_ptr = string; while (result_ptr != NULL) { split_point = strchr(result_ptr, splitter); if (split_point != NULL) { *split_point = '\0'; split_point++; } status = append(result, result_ptr); if (status != U1DB_OK) return status; result_ptr = split_point; } return status; } static int list_index(string_list *list, char *data) { int i = 0; string_list_item *item = NULL; for (item = list->head; item != NULL; item = item->next) { if (strcmp(item->data, data) == 0) { return i; } i++; } return -1; } static int get_values(parse_tree *tree, json_object *obj, string_list *values) { int status = U1DB_OK; if (tree->op) { status = ((op_function)tree->op)(tree, obj, values); } else { status = extract_field_values( obj, tree->field_path->head, json_type_string, values); } return status; } static int op_lower(parse_tree *tree, json_object *obj, string_list *result) { string_list *values = NULL; string_list_item *item = NULL; char *new_value = NULL, *value = NULL; int i; int status = U1DB_OK; status = init_list(&values); if (status != U1DB_OK) return status; status = get_values(tree->first_child, obj, values); if (status != U1DB_OK) goto finish; for (item = values->head; item != NULL; item = item->next) { value = item->data; i = 0; new_value = (char *)calloc(strlen(value) + 1, 1); if (new_value != NULL) { while (value[i] != '\0') { // TODO: unicode hahaha new_value[i] = tolower(value[i]); i++; } new_value[i] = '\0'; } status = append(result, new_value); } finish: if (new_value != NULL) free(new_value); if (values != NULL) destroy_list(values); return status; } static int op_number(parse_tree *tree, json_object *obj, string_list *result) { string_list *values = NULL; string_list_item *item = NULL; char *p = NULL, *new_value = NULL, *value = NULL, *number = NULL; parse_tree *node = NULL; int count, zeroes, value_size, isnumber; int status = U1DB_OK; node = tree->first_child; status = init_list(&values); if (status != U1DB_OK) return status; status = extract_field_values( obj, node->field_path->head, json_type_int, values); if (status != U1DB_OK) goto finish; node = node->next_sibling; number = node->data; for (p = number; *p; p++) { if (isdigit(*p) == 0) { status = U1DB_INVALID_VALUE_FOR_INDEX; goto finish; } } zeroes = atoi(number); for (item = values->head; item != NULL; item = item->next) { value = item->data; isnumber = 1; for (p = value; *p; p++) { if (isdigit(*p) == 0) { isnumber = 0; break; } } if (isnumber == 0) { continue; } value_size = max(strlen(value), zeroes) + 1; new_value = (char *)calloc(value_size, 1); if (new_value == NULL) { status = U1DB_NOMEM; goto finish; } count = snprintf(new_value, value_size, "%0*d", zeroes, atoi(value)); if (count != (value_size - 1)) { // Most likely encoding issues. status = U1DB_INVALID_PARAMETER; free(new_value); goto finish; } if ((status = append(result, new_value)) != U1DB_OK) { free(new_value); goto finish; } free(new_value); } finish: if (values != NULL) destroy_list(values); return status; } static int op_combine(parse_tree *tree, json_object *obj, string_list *result) { parse_tree *node = NULL; int status = U1DB_OK; node = tree->first_child; for (node = tree->first_child; node != NULL; node = node->next_sibling) { status = get_values(node, obj, result); if (status != U1DB_OK) return status; } return status; } static int op_split_words(parse_tree *tree, json_object *obj, string_list *result) { string_list_item *item = NULL; string_list *values = NULL; char *intermediate = NULL, *intermediate_ptr = NULL; char *space_chr = NULL; int status = U1DB_OK; status = init_list(&values); if (status != U1DB_OK) return status; status = get_values(tree->first_child, obj, values); if (status != U1DB_OK) goto finish; for (item = values->head; item != NULL; item = item->next) { intermediate = strdup(item->data); if (intermediate == NULL) return U1DB_NOMEM; intermediate_ptr = intermediate; while (intermediate_ptr != NULL) { space_chr = strchr(intermediate_ptr, ' '); if (space_chr != NULL) { *space_chr = '\0'; space_chr++; } if (list_index(result, intermediate_ptr) == -1) { if ((status = append(result, intermediate_ptr)) != U1DB_OK) { return status; } } intermediate_ptr = space_chr; } free(intermediate); } finish: if (values != NULL) destroy_list(values); return status; } static int op_bool(parse_tree *tree, json_object *obj, string_list *result) { string_list_item *item = NULL; string_list *values = NULL; int status = U1DB_OK; status = init_list(&values); if (status != U1DB_OK) return status; status = get_values(tree->first_child, obj, values); if (status != U1DB_OK) goto finish; //just return all the strings which have been filtered and converted from //booleans by extract_field_values. status = extract_field_values( obj, tree->first_child->field_path->head, json_type_boolean, values); if (status != U1DB_OK) goto finish; for (item = values->head; item != NULL; item = item->next) { if ((status = append(result, item->data)) != U1DB_OK) { return status; } } finish: if (values != NULL) destroy_list(values); return status; } static int lookup_index_fields(u1database *db, u1query *query) { int status, offset; char *field = NULL; sqlite3_stmt *statement = NULL; status = sqlite3_prepare_v2(db->sql_handle, "SELECT offset, field FROM index_definitions" " WHERE name = ?" " ORDER BY offset DESC", -1, &statement, NULL); if (status != SQLITE_OK) { goto finish; } status = sqlite3_bind_text(statement, 1, query->index_name, -1, SQLITE_TRANSIENT); if (status != SQLITE_OK) { goto finish; } status = sqlite3_step(statement); if (status == SQLITE_DONE) { status = U1DB_INDEX_DOES_NOT_EXIST; } while (status == SQLITE_ROW) { offset = sqlite3_column_int(statement, 0); field = (char*)sqlite3_column_text(statement, 1); if (query->fields == NULL) { query->num_fields = offset + 1; query->fields = (char**)calloc(query->num_fields, sizeof(char*)); if (query->fields == NULL) { status = U1DB_NOMEM; goto finish; } } if (offset >= query->num_fields) { status = U1DB_INVALID_PARAMETER; // TODO: better error code goto finish; } query->fields[offset] = strdup(field); if (query->fields[offset] == NULL) { status = U1DB_NOMEM; goto finish; } status = sqlite3_step(statement); } if (status == SQLITE_DONE) { status = U1DB_OK; } finish: sqlite3_finalize(statement); return status; } int u1db_query_init(u1database *db, const char *index_name, u1query **query) { int status; if (db == NULL || index_name == NULL || query == NULL) { return U1DB_INVALID_PARAMETER; } *query = (u1query*)calloc(1, sizeof(u1query)); if (*query == NULL) { return U1DB_NOMEM; } // Should we be copying this instead? (*query)->index_name = index_name; status = lookup_index_fields(db, *query); if (status != U1DB_OK) { u1db_free_query(query); } return status; } void u1db_free_query(u1query **query) { int i; u1query *q = NULL; if (query == NULL || *query == NULL) { return; } q = *query; if (q->fields != NULL) { for (i = 0; i < q->num_fields; ++i) { if (q->fields[i] != NULL) { free(q->fields[i]); q->fields[i] = NULL; } } free(q->fields); q->fields = NULL; } free(*query); *query = NULL; } int u1db_simple_lookup1(u1database *db, const char *index_name, const char *val0, void *context, u1db_doc_callback cb) { int status = U1DB_OK; u1query *query = NULL; if (db == NULL || index_name == NULL || val0 == NULL || cb == NULL) { return U1DB_INVALID_PARAMETER; } status = u1db_query_init(db, index_name, &query); if (status != U1DB_OK) { goto finish; } status = u1db_get_from_index(db, query, context, cb, 1, val0); finish: u1db_free_query(&query); return status; } int u1db_get_from_index_list(u1database *db, u1query *query, void *context, u1db_doc_callback cb, int n_values, const char **values) { int status = U1DB_OK; sqlite3_stmt *statement = NULL; char *doc_id = NULL; char *query_str = NULL; int i, bind_arg; int wildcard[20] = {0}; if (db == NULL || query == NULL || cb == NULL || n_values < 0) { return U1DB_INVALID_PARAMETER; } if (query->num_fields != n_values) { return U1DB_INVALID_VALUE_FOR_INDEX; } if (n_values > 20) { return U1DB_NOT_IMPLEMENTED; } status = u1db__format_query( query->num_fields, values, &query_str, wildcard); if (status != U1DB_OK) { goto finish; } status = sqlite3_prepare_v2(db->sql_handle, query_str, -1, &statement, NULL); if (status != SQLITE_OK) { goto finish; } // Bind all of the 'field_name' parameters. sqlite_bind starts at 1 bind_arg = 1; for (i = 0; i < query->num_fields; ++i) { status = sqlite3_bind_text(statement, bind_arg, query->fields[i], -1, SQLITE_TRANSIENT); bind_arg++; if (status != SQLITE_OK) { goto finish; } if (wildcard[i] == NO_GLOB) { // Not a wildcard, so add the argument status = sqlite3_bind_text(statement, bind_arg, values[i], -1, SQLITE_TRANSIENT); bind_arg++; } else if (wildcard[i] == ENDS_IN_GLOB) { status = sqlite3_bind_text(statement, bind_arg, values[i], -1, SQLITE_TRANSIENT); bind_arg++; } if (status != SQLITE_OK) { goto finish; } } status = sqlite3_step(statement); while (status == SQLITE_ROW) { doc_id = (char*)sqlite3_column_text(statement, 0); // We use u1db_get_docs so we can pass check_for_conflicts=0, which is // currently expected by the test suite. status = u1db_get_docs( db, 1, (const char**)&doc_id, 1, 0, context, cb); if (status != U1DB_OK) { goto finish; } status = sqlite3_step(statement); } if (status == SQLITE_DONE) { status = U1DB_OK; } finish: sqlite3_finalize(statement); if (query_str != NULL) { free(query_str); } return status; } int u1db_get_range_from_index(u1database *db, u1query *query, void *context, u1db_doc_callback cb, int n_values, const char **start_values, const char **end_values) { int i, bind_arg, status = U1DB_OK; char *query_str = NULL; sqlite3_stmt *statement = NULL; char *doc_id = NULL; char *stripped = NULL; int start_wildcard[20] = {0}; int end_wildcard[20] = {0}; if (db == NULL || query == NULL || cb == NULL || n_values < 0) { return U1DB_INVALID_PARAMETER; } if (n_values != query->num_fields) { return U1DB_INVALID_VALUE_FOR_INDEX; } status = u1db__format_range_query( query->num_fields, start_values, end_values, &query_str, start_wildcard, end_wildcard); if (status != U1DB_OK) { goto finish; } status = sqlite3_prepare_v2(db->sql_handle, query_str, -1, &statement, NULL); if (status != SQLITE_OK) { goto finish; } // Bind all of the 'field_name' parameters. sqlite_bind starts at 1 bind_arg = 1; for (i = 0; i < query->num_fields; ++i) { status = sqlite3_bind_text( statement, bind_arg, query->fields[i], -1, SQLITE_TRANSIENT); if (status != SQLITE_OK) { goto finish; } bind_arg++; if (start_values != NULL) { if (start_wildcard[i] == NO_GLOB) { status = sqlite3_bind_text( statement, bind_arg, start_values[i], -1, SQLITE_TRANSIENT); bind_arg++; } else if (start_wildcard[i] == ENDS_IN_GLOB) { if (stripped != NULL) free(stripped); stripped = strdup(start_values[i]); if (stripped == NULL) { status = U1DB_NOMEM; goto finish; } stripped[strlen(stripped) - 1] = '\0'; status = sqlite3_bind_text( statement, bind_arg, stripped, -1, SQLITE_TRANSIENT); bind_arg++; } if (status != SQLITE_OK) { goto finish; } } if (end_values != NULL) { if (end_wildcard[i] == NO_GLOB) { status = sqlite3_bind_text( statement, bind_arg, end_values[i], -1, SQLITE_TRANSIENT); bind_arg++; } else if (end_wildcard[i] == ENDS_IN_GLOB) { if (stripped != NULL) free(stripped); stripped = strdup(end_values[i]); if (stripped == NULL) { status = U1DB_NOMEM; goto finish; } stripped[strlen(stripped) - 1] = '\0'; status = sqlite3_bind_text( statement, bind_arg, stripped, -1, SQLITE_TRANSIENT); bind_arg++; status = sqlite3_bind_text( statement, bind_arg, end_values[i], -1, SQLITE_TRANSIENT); bind_arg++; } if (status != SQLITE_OK) { goto finish; } } } status = sqlite3_step(statement); while (status == SQLITE_ROW) { doc_id = (char*)sqlite3_column_text(statement, 0); // We use u1db_get_docs so we can pass check_for_conflicts=0, which is // currently expected by the test suite. status = u1db_get_docs( db, 1, (const char**)&doc_id, 1, 0, context, cb); if (status != U1DB_OK) { goto finish; } status = sqlite3_step(statement); } if (status == SQLITE_DONE) { status = U1DB_OK; } finish: sqlite3_finalize(statement); if (query_str != NULL) { free(query_str); } if (stripped != NULL) free(stripped); return status; } int u1db_get_from_index(u1database *db, u1query *query, void *context, u1db_doc_callback cb, int n_values, ...) { int i, status = U1DB_OK; va_list argp; const char **values = NULL; values = (const char **)calloc(n_values, sizeof(char*)); if (values == NULL) { status = U1DB_NOMEM; goto finish; } va_start(argp, n_values); for (i = 0; i < n_values; ++i) { values[i] = va_arg(argp, char *); } status = u1db_get_from_index_list( db, query, context, cb, n_values, values); finish: if (values != NULL) free(values); va_end(argp); return status; } int u1db_get_index_keys(u1database *db, char *index_name, void *context, u1db_key_callback cb) { int status = U1DB_OK; int num_fields = 0; int bind_arg, i; const char **key = NULL; string_list *field_names = NULL; string_list_item *field_name = NULL; char *query_str = NULL; sqlite3_stmt *statement; status = init_list(&field_names); if (status != U1DB_OK) goto finish; status = sqlite3_prepare_v2( db->sql_handle, "SELECT field FROM index_definitions WHERE name = ? ORDER BY " "offset;", -1, &statement, NULL); if (status != SQLITE_OK) { goto finish; } status = sqlite3_bind_text( statement, 1, index_name, -1, SQLITE_TRANSIENT); if (status != SQLITE_OK) { goto finish; } status = sqlite3_step(statement); if (status == SQLITE_DONE) { status = U1DB_INDEX_DOES_NOT_EXIST; goto finish; } while (status == SQLITE_ROW) { num_fields++; status = append(field_names, (char*)sqlite3_column_text(statement, 0)); if (status != U1DB_OK) goto finish; status = sqlite3_step(statement); } if (status != SQLITE_DONE) { goto finish; } sqlite3_finalize(statement); status = u1db__format_index_keys_query(num_fields, &query_str); if (status != U1DB_OK) { goto finish; } status = sqlite3_prepare_v2( db->sql_handle, query_str, -1, &statement, NULL); if (status != SQLITE_OK) { goto finish; } bind_arg = 1; for (field_name = field_names->head; field_name != NULL; field_name = field_name->next) { status = sqlite3_bind_text( statement, bind_arg++, field_name->data, -1, SQLITE_TRANSIENT); if (status != SQLITE_OK) goto finish; } status = sqlite3_step(statement); key = (const char**)calloc(num_fields, sizeof(char*)); if (key == NULL) { status = U1DB_NOMEM; goto finish; } while (status == SQLITE_ROW) { for (i = 0; i < num_fields; ++i) { key[i] = (const char*)sqlite3_column_text(statement, i); } if ((status = cb(context, num_fields, key)) != U1DB_OK) { goto finish; } status = sqlite3_step(statement); } if (status == SQLITE_DONE) { status = U1DB_OK; } finish: if (key != NULL) { free(key); } if (query_str != NULL) { free(query_str); } destroy_list(field_names); sqlite3_finalize(statement); return status; } static void add_to_buf(char **buf, int *buf_size, const char *fmt, ...) { int count; va_list argp; va_start(argp, fmt); count = vsnprintf(*buf, *buf_size, fmt, argp); va_end(argp); *buf += count; *buf_size -= count; } int u1db__format_query(int n_fields, const char **values, char **buf, int *wildcard) { int status = U1DB_OK; int buf_size, i; char *cur = NULL; const char *val = NULL; int have_wildcard = 0; if (n_fields < 1) { return U1DB_INVALID_PARAMETER; } // 81 for 1 doc, 166 for 2, 251 for 3 buf_size = (1 + n_fields) * 100; // The first field is treated specially cur = (char*)calloc(buf_size, 1); if (cur == NULL) { return U1DB_NOMEM; } *buf = cur; add_to_buf(&cur, &buf_size, "SELECT d0.doc_id FROM document_fields d0"); for (i = 1; i < n_fields; ++i) { add_to_buf(&cur, &buf_size, ", document_fields d%d", i); } add_to_buf(&cur, &buf_size, " WHERE d0.field_name = ?"); for (i = 0; i < n_fields; ++i) { if (i != 0) { add_to_buf(&cur, &buf_size, " AND d0.doc_id = d%d.doc_id" " AND d%d.field_name = ?", i, i); } val = values[i]; if (val == NULL) { status = U1DB_INVALID_VALUE_FOR_INDEX; goto finish; } if (val[0] == '*') { wildcard[i] = IS_GLOB; have_wildcard = 1; add_to_buf(&cur, &buf_size, " AND d%d.value NOT NULL", i); } else if (val[0] != '\0' && val[strlen(val)-1] == '*') { // glob wildcard[i] = ENDS_IN_GLOB; if (have_wildcard) { //globs not allowed after another wildcard status = U1DB_INVALID_GLOBBING; goto finish; } have_wildcard = 1; add_to_buf(&cur, &buf_size, " AND d%d.value GLOB ?", i); } else { wildcard[i] = NO_GLOB; if (have_wildcard) { // Can't have a non-wildcard after a wildcard status = U1DB_INVALID_GLOBBING; goto finish; } add_to_buf(&cur, &buf_size, " AND d%d.value = ?", i); } } add_to_buf(&cur, &buf_size, " ORDER BY "); for (i = 0; i < n_fields; ++i) { if (i != 0) { add_to_buf(&cur, &buf_size, ", "); } add_to_buf(&cur, &buf_size, "d%d.value", i); } finish: if (status != U1DB_OK && *buf != NULL) { free(*buf); *buf = NULL; } return status; } int u1db__format_range_query(int n_fields, const char **start_values, const char **end_values, char **buf, int *start_wildcard, int *end_wildcard) { int status = U1DB_OK; int buf_size, i; char *cur = NULL; const char *val = NULL; int have_start_wildcard = 0; int have_end_wildcard = 0; if (n_fields < 1) { return U1DB_INVALID_PARAMETER; } // 81 for 1 doc, 166 for 2, 251 for 3 buf_size = (1 + n_fields) * 100; // The first field is treated specially cur = (char*)calloc(buf_size, 1); if (cur == NULL) { return U1DB_NOMEM; } *buf = cur; add_to_buf(&cur, &buf_size, "SELECT d0.doc_id FROM document_fields d0"); for (i = 1; i < n_fields; ++i) { add_to_buf(&cur, &buf_size, ", document_fields d%d", i); } add_to_buf(&cur, &buf_size, " WHERE d0.field_name = ?"); for (i = 0; i < n_fields; ++i) { if (i != 0) { add_to_buf(&cur, &buf_size, " AND d0.doc_id = d%d.doc_id" " AND d%d.field_name = ?", i, i); } if (start_values != NULL) { val = start_values[i]; if (val == NULL) { status = U1DB_INVALID_VALUE_FOR_INDEX; goto finish; } if (val[0] == '*') { start_wildcard[i] = IS_GLOB; have_start_wildcard= 1; add_to_buf(&cur, &buf_size, " and d%d.value not null", i); } else if (val[0] != '\0' && val[strlen(val)-1] == '*') { // glob start_wildcard[i] = ENDS_IN_GLOB; if (have_start_wildcard) { //globs not allowed after another wildcard status = U1DB_INVALID_GLOBBING; goto finish; } have_start_wildcard = 1; add_to_buf(&cur, &buf_size, " and d%d.value >= ?", i); } else { start_wildcard[i] = NO_GLOB; if (have_start_wildcard) { // can't have a non-wildcard after a wildcard status = U1DB_INVALID_GLOBBING; goto finish; } add_to_buf(&cur, &buf_size, " and d%d.value >= ?", i); } } if (end_values != NULL) { val = end_values[i]; if (val == NULL) { status = U1DB_INVALID_VALUE_FOR_INDEX; goto finish; } if (val[0] == '*') { end_wildcard[i] = IS_GLOB; have_end_wildcard = 1; add_to_buf(&cur, &buf_size, " AND d%d.value NOT NULL", i); } else if (val[0] != '\0' && val[strlen(val)-1] == '*') { // glob end_wildcard[i] = ENDS_IN_GLOB; if (have_end_wildcard) { //globs not allowed after another wildcard status = U1DB_INVALID_GLOBBING; goto finish; } have_end_wildcard = 1; add_to_buf( &cur, &buf_size, " AND (d%d.value < ? OR d%d.value GLOB ?)", i, i); } else { end_wildcard[i] = NO_GLOB; if (have_end_wildcard) { // Can't have a non-wildcard after a wildcard status = U1DB_INVALID_GLOBBING; goto finish; } add_to_buf(&cur, &buf_size, " AND d%d.value <= ?", i); } } } add_to_buf(&cur, &buf_size, " ORDER BY "); for (i = 0; i < n_fields; ++i) { if (i != 0) { add_to_buf(&cur, &buf_size, ", "); } add_to_buf(&cur, &buf_size, "d%d.value", i); } finish: if (status != U1DB_OK && *buf != NULL) { free(*buf); *buf = NULL; } return status; } int u1db__format_index_keys_query(int n_fields, char **buf) { int status = U1DB_OK; int buf_size, i; char *cur = NULL; if (n_fields < 1) { return U1DB_INVALID_PARAMETER; } // 81 for 1 doc, 166 for 2, 251 for 3 buf_size = (1 + n_fields) * 100; // The first field is treated specially cur = (char*)calloc(buf_size, 1); if (cur == NULL) { return U1DB_NOMEM; } *buf = cur; add_to_buf(&cur, &buf_size, "SELECT "); for (i = 0; i < n_fields; ++i) { if (i != 0) { add_to_buf(&cur, &buf_size, ", "); } add_to_buf(&cur, &buf_size, " d%d.value", i); } add_to_buf(&cur, &buf_size, " FROM "); for (i = 0; i < n_fields; ++i) { if (i != 0) { add_to_buf(&cur, &buf_size, ", "); } add_to_buf(&cur, &buf_size, "document_fields d%d", i); } add_to_buf(&cur, &buf_size, " WHERE d0.field_name = ?"); for (i = 0; i < n_fields; ++i) { if (i != 0) { add_to_buf(&cur, &buf_size, " AND d0.doc_id = d%d.doc_id" " AND d%d.field_name = ?", i, i); } add_to_buf(&cur, &buf_size, " AND d%d.value NOT NULL", i); } add_to_buf(&cur, &buf_size, " GROUP BY "); for (i = 0; i < n_fields; ++i) { if (i != 0) { add_to_buf(&cur, &buf_size, ", "); } add_to_buf(&cur, &buf_size, "d%d.value", i); } if (status != U1DB_OK && *buf != NULL) { free(*buf); *buf = NULL; } return status; } struct sqlcb_to_field_cb { void *user_context; int (*user_cb)(void *, const char*, parse_tree *tree); }; static int check_fieldname(const char *fieldname) { if (fieldname[strlen(fieldname) - 1] == '.') return U1DB_INVALID_FIELD_SPECIFIER; return U1DB_OK; } static char * get_token(string_list *tokens) { char *token = NULL; if (tokens->pos != NULL) { token = tokens->pos->data; tokens->pos = tokens->pos->next; } return token; } static char * peek_token(string_list *tokens) { if (tokens->pos != NULL) { return tokens->pos->data; } return NULL; } static int to_getter(parse_tree *node) { int status = U1DB_OK; if (node->op == NULL) { status = check_fieldname(node->data); if (status != U1DB_OK) return status; status = split(node->field_path, node->data, '.'); } return status; } static int parse_op(string_list *tokens, char *term, parse_tree *result) { char *sep = NULL; parse_tree *node = NULL; int status = U1DB_OK; int i, array_size; sep = get_token(tokens); if (sep == NULL || strcmp(sep, "(") != 0) { return U1DB_INVALID_TRANSFORMATION_FUNCTION; } for (i = 0; i < OPS; i++) { if (strcmp(OPERATIONS[i].name, term) == 0) { result->data = strdup(term); result->op = OPERATIONS[i].function; result->arity = OPERATIONS[i].arity; result->value_types = OPERATIONS[i].value_types; break; } } if (result->op == NULL) { status = U1DB_UNKNOWN_OPERATION; goto finish; } while (1) { init_parse_tree(&node); status = parse_term(tokens, node); if (status != U1DB_OK) if (node != NULL) { destroy_parse_tree(node); goto finish; } status = append_node(result, node); if (status != U1DB_OK) { destroy_parse_tree(node); goto finish; } sep = get_token(tokens); if (sep == NULL) { status = U1DB_INVALID_TRANSFORMATION_FUNCTION; goto finish; } if (strcmp(sep, ")") == 0) { break; } if (strcmp(sep, ",") != 0) { status = U1DB_INVALID_TRANSFORMATION_FUNCTION; goto finish; } } i = 0; array_size = sizeof(result->value_types) / sizeof(int); for (node = result->first_child; node != NULL; node = node->next_sibling) { node->arg_type = result->value_types[i % array_size]; if (node->arg_type == EXPRESSION) { status = to_getter(node); if (status != U1DB_OK) goto finish; } i++; } finish: return status; } static int parse_term(string_list *tokens, parse_tree *result) { char *term = NULL; char *next = NULL; int status = U1DB_OK; term = get_token(tokens); if (term == NULL) { status = U1DB_INVALID_TRANSFORMATION_FUNCTION; goto finish; } if (strcmp(term, ",") == 0 || strcmp(term, "(") == 0 || strcmp(term, ")") == 0) { status = U1DB_INVALID_TRANSFORMATION_FUNCTION; goto finish; } next = peek_token(tokens); if (next != NULL && strcmp(next, "(") == 0) { status = parse_op(tokens, term, result); goto finish; } else { result->data = strdup(term); } finish: return status; } static int make_tokens(const char *expression, string_list *tokens) { int status = U1DB_OK; int size; int idx, start, end; char c; idx = 0; while (idx < strlen(expression) && expression[idx] == ' ') idx++; start = idx; end = strlen(expression) - 1; while (end && expression[end] == ' ') end--; end++; while (idx < end) { c = expression[idx]; if (c == '(' || c == ',' || c == ')') { if (idx == start) { status = appendn(tokens, expression + idx, 1); if (status != U1DB_OK) return status; idx++; start = idx; } else { while (start < idx && expression[start] == ' ') start++; size = idx - start; while (size && expression[start + size - 1] == ' ') size--; if (size > 0) { status = appendn(tokens, expression + start, size); if (status != U1DB_OK) return status; } start = idx; } } else { idx++; } } size = end - start; if (size > 0) { status = appendn(tokens, expression + start, size); } return status; } static int parse(const char *expression, parse_tree *result) { int status = U1DB_OK; int open_parens = 0; string_list *tokens = NULL; status = init_list(&tokens); if (status != U1DB_OK) goto finish; status = make_tokens(expression, tokens); if (status != U1DB_OK) { goto finish; } status = parse_term(tokens, result); if (status != U1DB_OK) { goto finish; } if (peek_token(tokens) != NULL) { status = U1DB_INVALID_TRANSFORMATION_FUNCTION; goto finish; } status = to_getter(result); if (status != U1DB_OK) { goto finish; } if (status != U1DB_OK) goto finish; if (open_parens != 0) status = U1DB_INVALID_TRANSFORMATION_FUNCTION; finish: if (tokens != NULL) destroy_list(tokens); return status; } // Thunk from the SQL interface, to a nicer single value interface static int sqlite_cb_to_field_cb(void *context, int n_cols, char **cols, char **rows) { struct sqlcb_to_field_cb *ctx = NULL; parse_tree *tree = NULL; char *expression = NULL; int status = U1DB_OK; ctx = (struct sqlcb_to_field_cb*)context; if (n_cols != 1) { return 1; // Error } expression = cols[0]; status = init_parse_tree(&tree); if (status != U1DB_OK) goto finish; status = parse(expression, tree); if (status != U1DB_OK) goto finish; status = ctx->user_cb(ctx->user_context, expression, tree); finish: destroy_parse_tree(tree); return status; } // Iterate over the fields that are indexed, and invoke cb for each one static int iter_field_definitions(u1database *db, void *context, int (*cb)(void *context, const char *expression, parse_tree *tree)) { int status; struct sqlcb_to_field_cb ctx; ctx.user_context = context; ctx.user_cb = cb; status = sqlite3_exec(db->sql_handle, "SELECT field FROM index_definitions", sqlite_cb_to_field_cb, &ctx, NULL); return status; } struct evaluate_index_context { u1database *db; const char *doc_id; json_object *obj; const char *content; }; static int add_to_document_fields(u1database *db, const char *doc_id, const char *expression, const char *val) { int status; sqlite3_stmt *statement = NULL; status = sqlite3_prepare_v2(db->sql_handle, "INSERT INTO document_fields (doc_id, field_name, value)" " VALUES (?, ?, ?)", -1, &statement, NULL); if (status != SQLITE_OK) { return status; } status = sqlite3_bind_text(statement, 1, doc_id, -1, SQLITE_TRANSIENT); if (status != SQLITE_OK) { goto finish; } status = sqlite3_bind_text(statement, 2, expression, -1, SQLITE_TRANSIENT); if (status != SQLITE_OK) { goto finish; } status = sqlite3_bind_text(statement, 3, val, -1, SQLITE_TRANSIENT); if (status != SQLITE_OK) { goto finish; } status = sqlite3_step(statement); if (status == SQLITE_DONE) { status = SQLITE_OK; } finish: sqlite3_finalize(statement); return status; } static int evaluate_index_and_insert_into_db(void *context, const char *expression, parse_tree *tree) { struct evaluate_index_context *ctx; string_list *values = NULL; string_list_item *item = NULL; int status = U1DB_OK; ctx = (struct evaluate_index_context *)context; if (ctx->obj == NULL || !json_object_is_type(ctx->obj, json_type_object)) { return U1DB_INVALID_JSON; } status = init_list(&values); if (status != U1DB_OK) goto finish; status = get_values(tree, ctx->obj, values); if (status != U1DB_OK) goto finish; for (item = values->head; item != NULL; item = item->next) { if ((status = add_to_document_fields(ctx->db, ctx->doc_id, expression, item->data)) != U1DB_OK) goto finish; } finish: if (values != NULL) { destroy_list(values); values = NULL; } return status; } // Is this expression field already in the indexed list? // We make an assumption that the number of new expressions is always small // relative to what is already indexed (which should be reasonably accurate). static int is_present(u1database *db, const char *expression, int *present) { sqlite3_stmt *statement = NULL; int status; status = sqlite3_prepare_v2(db->sql_handle, "SELECT 1 FROM index_definitions WHERE field = ? LIMIT 1", -1, &statement, NULL); if (status != SQLITE_OK) { return status; } status = sqlite3_bind_text(statement, 1, expression, -1, SQLITE_TRANSIENT); if (status != SQLITE_OK) { goto finish; } status = sqlite3_step(statement); if (status == SQLITE_DONE) { status = SQLITE_OK; *present = 0; } else if (status == SQLITE_ROW) { status = SQLITE_OK; *present = 1; } finish: sqlite3_finalize(statement); return status; } int u1db__find_unique_expressions(u1database *db, int n_expressions, const char **expressions, int *n_unique, const char ***unique_expressions) { int i, status, present = 0; const char **tmp = NULL; tmp = (const char **)calloc(n_expressions, sizeof(char*)); if (tmp == NULL) { return U1DB_NOMEM; } status = U1DB_OK; *n_unique = 0; for (i = 0; i < n_expressions; ++i) { if (expressions[i] == NULL) { status = U1DB_INVALID_PARAMETER; goto finish; } status = is_present(db, expressions[i], &present); if (status != SQLITE_OK) { goto finish; } if (!present) { tmp[*n_unique] = expressions[i]; (*n_unique)++; } } finish: if (status == U1DB_OK) { *unique_expressions = tmp; } else { free((void*)tmp); } return status; } int u1db__update_indexes(u1database *db, const char *doc_id, const char *content) { struct evaluate_index_context context; int status; if (content == NULL) { // No new fields to add to the database. return U1DB_OK; } context.db = db; context.doc_id = doc_id; context.content = content; context.obj = json_tokener_parse(content); if (context.obj == NULL || !json_object_is_type(context.obj, json_type_object)) { return U1DB_INVALID_JSON; } status = iter_field_definitions( db, &context, evaluate_index_and_insert_into_db); json_object_put(context.obj); return status; } int u1db__index_all_docs(u1database *db, int n_expressions, const char **expressions) { int status, i; sqlite3_stmt *statement = NULL; struct evaluate_index_context context = {0}; parse_tree **trees; trees = (parse_tree**)calloc(n_expressions, sizeof(parse_tree*)); for (i = 0; i < n_expressions; ++i) { init_parse_tree(&trees[i]); status = parse(expressions[i], trees[i]); if (status != U1DB_OK) goto finish; } status = sqlite3_prepare_v2(db->sql_handle, "SELECT doc_id, content FROM document", -1, &statement, NULL); if (status != SQLITE_OK) { return status; } context.db = db; status = sqlite3_step(statement); while (status == SQLITE_ROW) { if (context.obj != NULL) { json_object_put(context.obj); context.obj = NULL; } context.doc_id = (const char*)sqlite3_column_text(statement, 0); context.content = (const char*)sqlite3_column_text(statement, 1); if (context.content == NULL) { // This document is deleted so does not need to be indexed. status = sqlite3_step(statement); continue; } context.obj = json_tokener_parse(context.content); if (context.obj == NULL || !json_object_is_type(context.obj, json_type_object)) { // Invalid JSON in the database, for now we just continue? // TODO: Raise an error here. status = sqlite3_step(statement); continue; } for (i = 0; i < n_expressions; ++i) { status = evaluate_index_and_insert_into_db( &context, expressions[i], trees[i]); if (status != U1DB_OK) goto finish; } status = sqlite3_step(statement); } if (status == SQLITE_DONE) { status = U1DB_OK; } finish: for (i = 0; i < n_expressions; ++i) destroy_parse_tree(trees[i]); free(trees); if (context.obj != NULL) { json_object_put(context.obj); context.obj = NULL; } sqlite3_finalize(statement); return status; }