1
from persistent import Persistent
2
from persistent.dict import PersistentDict
3
from persistent.list import PersistentList
4
from zope.component import queryUtility, getMultiAdapter
5
from zope.interface import implements
6
from zope import schema
8
from repoze.catalog.catalog import Catalog
9
from repoze.catalog.document import DocumentMap
11
from uu.record.query.interfaces import IRecordCatalog, ISchemaInfo
12
from uu.record.interfaces import IRecordResolver
15
class RecordCatalog(Persistent):
17
Record catalog is facade for repoze.catalog backend providing
18
IRecordCatalog front-end.
21
implements(IRecordCatalog)
23
def __init__(self, name=None):
25
self.indexer = Catalog()
26
self.mapper = DocumentMap()
27
self.supported = PersistentDict() #schema id/name->ISchemaInfo
28
self._index_owners = PersistentDict() # idx name->list of schema names
30
def getObject(self, identifier):
32
Given an identifier as either a UID mapped in self.mapper or an
33
integer id key from self.indexer, resolve object or return None.
35
if isinstance(identifier, int):
36
identifier = self.mapper.address_for_docid(identifier) #int->uuid
37
resolver = queryUtility(IRecordResolver)
42
def _add_index(self, idxname, idxtype, getter=None):
44
getter = lambda obj, name, default: getattr(obj, name, default)
45
callback = lambda obj, default: getter(obj, idxname, default)
47
'field' : repoze.catalog.indexes.field.CatalogFieldIndex,
48
'text' : repoze.catalog.indexes.text.CatalogTextIndex,
49
'keyword' : repoze.catalog.indexes.keyword.CatalogKeywordIndex,
51
self.indexer[idxname] = idxcls(callback)
53
def bind(self, schema, omit=(), index_types=None):
55
Bind a schema providing IInterface.
57
An object providing ISchemaInfo will be created and stored in
60
All fields in the schema will have coresponding indexes created for
61
them (unless a fieldname is provided in the omit argument sequence)
62
in self.indexer. All index names created will be saved to the
63
ISchemaInfo object being saved in self.supported.
65
The index type for any field will, by default, be determined by the
66
type of the field. If the field is a sequence, by default, the
67
index type will be a 'keyword' index. If the value type is a
68
TextLine or BytesLine, then *both* 'text' and 'field' indexes will
69
be created for that field. If the value type is Text (multi-line
70
text), then only a (full) 'text' index will be created.
72
If the field is a Bytes field, we cannot tell whether this field is
73
tokenizable text, so we omit Bytes fields by default. However,
74
since BytesLine fields are constrained by textual cues (absense of
75
line feeds), we index BytesLine as field and text values. This is
76
a sensible default behavior, considering some Bytes fields may
77
contain large size binary content we do not want to index.
79
Index types can be specified by providing a dict/mapping of
80
field name keys to index type values. Such values can be provided
81
to override index creation defaults as any of:
83
* A string name in ('field', 'text', 'keyword')
85
* sequence (set/tuple/list) of one or more of the above names,
86
except None is mutually exclusive with other choices.
88
* None: equivalent to omitting field using the omit argument.
90
Overrides of index_types should not declare incorrect index types
91
for a field; the following should raise a ValueError:
93
* Specifying a text index on a non-text field, even if the
94
value can be cast to a string, it has no reasonable hope
95
of being either meaningful or tokenized in the vast
98
* Optimistic exception: allow any sequence field for which
99
a value type contains text or bytes based fields.
101
* Specifying a keyword index on a non-sequence field.
103
# adapt context, schema: produces schema+field info with index
104
# names populated for use here.
105
info = getMultiAdapter((self, schema),
106
ISchemaInfo) #adapt context, schema
108
for fieldinfo in info.fields:
109
for idxname in fieldinfo.indexes:
110
idxtype = idxname.split('.')[0]
111
if idxname not in self.indexer:
112
self._add_index(idxname, idxtype)
113
if idxname not in self._index_owners:
114
self._index_owners[idxname] = PersistentList()
115
self._index_owners[idxname].append(info.name)
116
self.supported[info.name] = info
118
def unbind(self, schema, remove_indexes=False):
120
Unbind a schema providing IInterface or object providing ISchemaInfo,
121
removing it from self.supported.
123
If remove_indexes is True (by default, it is False), then remove any
124
indexes for which the schema in question is the only schema in the
125
catalog managing any respective index name.
127
info = getMultiAdapter((self, schema),
128
ISchemaInfo) #adapt context, schema
129
if info.name in self.supported:
130
del(self.supported[name])
132
for idxname in info.indexes:
133
if idxname in self.indexer:
134
owners = self._index_owners.get(idxname, ())
135
if len(owners)==1 and info.name in owners:
136
del(self.indexer[idxname])
137
del(self._index_owners[idxname])
139
def searchable(self, schema):
141
Return a tuple of IFieldInfo objects, which provide the names of fields
142
with respective indexes. Fields without indexes will not have an
143
IFieldInfo object returned.
145
raise NotImplementedError('TODO') #TODO TODO TODO
147
def uniqueValuesFor(index):
149
Given index name, get unique values for content stored in the
150
'forward index' inside the catalog for that index object.
152
If index name is not in the catalog, raise KeyError.
154
If index in catalog for the index name is a text index (incapable
155
of providing unique values), raise a ValueError.
157
raise NotImplementedError('TODO') #TODO TODO TODO
159
def comparatorsFor(index):
161
Return tuple of available comparator functions and their labels.
162
Output is a tuple of two-item tuples, where the first item is
163
a comparator function or class providing IComparator, and the
164
second item is a human-readable label for that comparator.
166
raise NotImplementedError('TODO') #TODO TODO TODO
168
def index(record, uid=None, getuid=DEFAULT_GETUID):
170
Given a record object providing any number of interfaces, index that
171
record's field values for all interfaces it provides that are also
172
supported by this catalog. The record must have a UUID, which can
173
be provided by value using the uid parameter, or looked up from the
174
record object using a getuid function/callable. The default
175
should be to obtain a 'uid' attribute/property value from the
176
record object itself.
178
uid provided (or obtained) should be either:
180
* A 36-byte string representation of the UUID
182
* A uuid.UUID object.
184
If no UUID is provided nor resolved by function, raise a ValueError.
185
Note: the default getuid function will get a random UID for the
186
record if None can be found.
188
Note: self.mapper will store string representations of the UUID.
190
Returns the string representation of the UUID of the document, useful
191
if the UUID was generated byt the getuid function passed to index.
193
Implicit path/location indexing: if the record object provides
194
zope.location.interfaces.ILocation, then index the object's
195
identifier (__name__) and container (__parent__) in field and path
196
indexes respectively.
198
raise NotImplementedError('TODO') #TODO TODO TODO
202
Given the UUID (uid) of a record as either a uuid.UUID object or as
203
a string representation of UUID, remove the record from self.indexer
206
raise NotImplementedError('TODO') #TODO TODO TODO
208
def reindex(record, uid=None, getuid=DEFAULT_GETUID):
210
Alternate spelling for index(), may be optimized in implementation
211
or may simply just provide a synonymous call.
213
raise NotImplementedError('TODO') #TODO TODO TODO
215
def __getitem__(name):
216
"""return index for name from self.indexer, or raise KeyError"""
217
raise NotImplementedError('TODO') #TODO TODO TODO
219
def get(name, default=None):
220
"""return index for name from self.indexer, default, or None"""
221
raise NotImplementedError('TODO') #TODO TODO TODO
223
def __setitem__(name, index):
225
Set an index object into self.indexer explicitly; prefer
226
self.bind(schema) for adding indexes based on zope.schema fields
229
raise NotImplementedError('TODO') #TODO TODO TODO
233
Given a search as index-name/value mapping, return a results in the
234
form (count, iterable of result uids).
236
raise NotImplementedError('TODO') #TODO TODO TODO
238
def query(query, *args, **kwargs):
240
Given a search as a query providing IQuery, return a results in the
241
form (count, iterable of result uids).
243
Additional arguments are implementation-specific, and may be used
244
for sorting results. Each implementation should gracefully ignore
245
arguments it does not know about.
247
raise NotImplementedError('TODO') #TODO TODO TODO