~upiq-dev/upiq/uu.record

Viewing changes to uu/record/query/catalog.py

Committer: sean.upton at utah
Date: 2013-12-03 23:42:25 UTC
Revision ID: sean.upton@hsc.utah.edu-20131203234225-z3i9werdirwvyi11

removed cruft of old query package from uu.record; uu.retrieval supplanted this, and this old experimental package was never previously removed

files removed:
uu/record/query

uu/record/query/__init__.py

uu/record/query/catalog.py

uu/record/query/configure.zcml

uu/record/query/interfaces.py

uu/record/query/schemainfo.py

uu/record/tests/test_query_schemainfo.py

Show diffs side-by-side

added added

removed removed

uu/record/query/catalog.py

from persistent import Persistent

from persistent.dict import PersistentDict

from persistent.list import PersistentList

from zope.component import queryUtility, getMultiAdapter

from zope.interface import implements

from zope import schema

import repoze.catalog

from repoze.catalog.catalog import Catalog

from repoze.catalog.document import DocumentMap

from uu.record.query.interfaces import IRecordCatalog, ISchemaInfo

from uu.record.interfaces import IRecordResolver

class RecordCatalog(Persistent):

"""

Record catalog is facade for repoze.catalog backend providing

IRecordCatalog front-end.

"""

implements(IRecordCatalog)

def __init__(self, name=None):

self.name = name

self.indexer = Catalog()

self.mapper = DocumentMap()

self.supported = PersistentDict() #schema id/name->ISchemaInfo

self._index_owners = PersistentDict() # idx name->list of schema names

def getObject(self, identifier):

"""

Given an identifier as either a UID mapped in self.mapper or an

integer id key from self.indexer, resolve object or return None.

"""

if isinstance(identifier, int):

identifier = self.mapper.address_for_docid(identifier) #int->uuid

resolver = queryUtility(IRecordResolver)

if resolver is None:

return None

return resolver()

def _add_index(self, idxname, idxtype, getter=None):

if getter is None:

getter = lambda obj, name, default: getattr(obj, name, default)

callback = lambda obj, default: getter(obj, idxname, default)

idxcls = {

'field' : repoze.catalog.indexes.field.CatalogFieldIndex,

'text' : repoze.catalog.indexes.text.CatalogTextIndex,

'keyword' : repoze.catalog.indexes.keyword.CatalogKeywordIndex,

}[idxtype]

self.indexer[idxname] = idxcls(callback)

def bind(self, schema, omit=(), index_types=None):

"""

Bind a schema providing IInterface.

An object providing ISchemaInfo will be created and stored in

self.supported.

All fields in the schema will have coresponding indexes created for

them (unless a fieldname is provided in the omit argument sequence)

in self.indexer. All index names created will be saved to the

ISchemaInfo object being saved in self.supported.

The index type for any field will, by default, be determined by the

type of the field. If the field is a sequence, by default, the

index type will be a 'keyword' index. If the value type is a

TextLine or BytesLine, then *both* 'text' and 'field' indexes will

be created for that field. If the value type is Text (multi-line

text), then only a (full) 'text' index will be created.

If the field is a Bytes field, we cannot tell whether this field is

tokenizable text, so we omit Bytes fields by default. However,

since BytesLine fields are constrained by textual cues (absense of

line feeds), we index BytesLine as field and text values. This is

a sensible default behavior, considering some Bytes fields may

contain large size binary content we do not want to index.

Index types can be specified by providing a dict/mapping of

field name keys to index type values. Such values can be provided

to override index creation defaults as any of:

* A string name in ('field', 'text', 'keyword')

* sequence (set/tuple/list) of one or more of the above names,

except None is mutually exclusive with other choices.

* None: equivalent to omitting field using the omit argument.

Overrides of index_types should not declare incorrect index types

for a field; the following should raise a ValueError:

* Specifying a text index on a non-text field, even if the

value can be cast to a string, it has no reasonable hope

of being either meaningful or tokenized in the vast

majority of cases.

* Optimistic exception: allow any sequence field for which

a value type contains text or bytes based fields.

100

101

* Specifying a keyword index on a non-sequence field.

102

"""

103

# adapt context, schema: produces schema+field info with index

104

# names populated for use here.

105

info = getMultiAdapter((self, schema),

106

ISchemaInfo) #adapt context, schema

107

_marker = object()

108

for fieldinfo in info.fields:

109

for idxname in fieldinfo.indexes:

110

idxtype = idxname.split('.')[0]

111

if idxname not in self.indexer:

112

self._add_index(idxname, idxtype)

113

if idxname not in self._index_owners:

114

self._index_owners[idxname] = PersistentList()

115

self._index_owners[idxname].append(info.name)

116

self.supported[info.name] = info

117

118

def unbind(self, schema, remove_indexes=False):

119

"""

120

Unbind a schema providing IInterface or object providing ISchemaInfo,

121

removing it from self.supported.

122

123

If remove_indexes is True (by default, it is False), then remove any

124

indexes for which the schema in question is the only schema in the

125

catalog managing any respective index name.

126

"""

127

info = getMultiAdapter((self, schema),

128

ISchemaInfo) #adapt context, schema

129

if info.name in self.supported:

130

del(self.supported[name])

131

if remove_indexes:

132

for idxname in info.indexes:

133

if idxname in self.indexer:

134

owners = self._index_owners.get(idxname, ())

135

if len(owners)==1 and info.name in owners:

136

del(self.indexer[idxname])

137

del(self._index_owners[idxname])

138

139

def searchable(self, schema):

140

"""

141

Return a tuple of IFieldInfo objects, which provide the names of fields

142

with respective indexes. Fields without indexes will not have an

143

IFieldInfo object returned.

144

"""

145

raise NotImplementedError('TODO') #TODO TODO TODO

146

147

def uniqueValuesFor(index):

148

"""

149

Given index name, get unique values for content stored in the

150

'forward index' inside the catalog for that index object.

151

152

If index name is not in the catalog, raise KeyError.

153

154

If index in catalog for the index name is a text index (incapable

155

of providing unique values), raise a ValueError.

156

"""

157

raise NotImplementedError('TODO') #TODO TODO TODO

158

159

def comparatorsFor(index):

160

"""

161

Return tuple of available comparator functions and their labels.

162

Output is a tuple of two-item tuples, where the first item is

163

a comparator function or class providing IComparator, and the

164

second item is a human-readable label for that comparator.

165

"""

166

raise NotImplementedError('TODO') #TODO TODO TODO

167

168

def index(record, uid=None, getuid=DEFAULT_GETUID):

169

"""

170

Given a record object providing any number of interfaces, index that

171

record's field values for all interfaces it provides that are also

172

supported by this catalog. The record must have a UUID, which can

173

be provided by value using the uid parameter, or looked up from the

174

record object using a getuid function/callable. The default

175

should be to obtain a 'uid' attribute/property value from the

176

record object itself.

177

178

uid provided (or obtained) should be either:

179

180

* A 36-byte string representation of the UUID

181

182

* A uuid.UUID object.

183

184

If no UUID is provided nor resolved by function, raise a ValueError.

185

Note: the default getuid function will get a random UID for the

186

record if None can be found.

187

188

Note: self.mapper will store string representations of the UUID.

189

190

Returns the string representation of the UUID of the document, useful

191

if the UUID was generated byt the getuid function passed to index.

192

193

Implicit path/location indexing: if the record object provides

194

zope.location.interfaces.ILocation, then index the object's

195

identifier (__name__) and container (__parent__) in field and path

196

indexes respectively.

197

"""

198

raise NotImplementedError('TODO') #TODO TODO TODO

199

200

def unindex(uid):

201

"""

202

Given the UUID (uid) of a record as either a uuid.UUID object or as

203

a string representation of UUID, remove the record from self.indexer

204

and self.mapper.

205

"""

206

raise NotImplementedError('TODO') #TODO TODO TODO

207

208

def reindex(record, uid=None, getuid=DEFAULT_GETUID):

209

"""

210

Alternate spelling for index(), may be optimized in implementation

211

or may simply just provide a synonymous call.

212

"""

213

raise NotImplementedError('TODO') #TODO TODO TODO

214

215

def __getitem__(name):

216

"""return index for name from self.indexer, or raise KeyError"""

217

raise NotImplementedError('TODO') #TODO TODO TODO

218

219

def get(name, default=None):

220

"""return index for name from self.indexer, default, or None"""

221

raise NotImplementedError('TODO') #TODO TODO TODO

222

223

def __setitem__(name, index):

224

"""

225

Set an index object into self.indexer explicitly; prefer

226

self.bind(schema) for adding indexes based on zope.schema fields

227

over this.

228

"""

229

raise NotImplementedError('TODO') #TODO TODO TODO

230

231

def search(**query):

232

"""

233

Given a search as index-name/value mapping, return a results in the

234

form (count, iterable of result uids).

235

"""

236

raise NotImplementedError('TODO') #TODO TODO TODO

237

238

def query(query, *args, **kwargs):

239

"""

240

Given a search as a query providing IQuery, return a results in the

241

form (count, iterable of result uids).

242

243

Additional arguments are implementation-specific, and may be used

244

for sorting results. Each implementation should gracefully ignore

245

arguments it does not know about.

246

"""

247

raise NotImplementedError('TODO') #TODO TODO TODO

248

Older »