1
by Antonio Terceiro
Import upstream version 0.11.6 |
1 |
module Ferret |
2 |
# Instead of using documents to add data to an index you can use Hashes and
|
|
3 |
# Arrays. The only real benefits of using a Document over a Hash are pretty
|
|
4 |
# printing and the boost attribute. You can add the boost attribute to
|
|
5 |
# Hashes and arrays using the BoostMixin. For example;
|
|
6 |
#
|
|
7 |
# class Hash
|
|
8 |
# include BoostMixin
|
|
9 |
# end
|
|
10 |
#
|
|
11 |
# class Array
|
|
12 |
# include BoostMixin
|
|
13 |
# end
|
|
14 |
#
|
|
15 |
# class String
|
|
16 |
# include BoostMixin
|
|
17 |
# end
|
|
18 |
module BoostMixin |
|
19 |
attr_accessor :boost |
|
20 |
end
|
|
21 |
||
22 |
# Documents are the unit of indexing and search.
|
|
23 |
#
|
|
24 |
# A Document is a set of fields. Each field has a name and an array of
|
|
25 |
# textual values. If you are coming from a Lucene background you should note
|
|
26 |
# that Fields don't have any properties except for the boost property. You
|
|
27 |
# should use the Ferret::Index::FieldInfos class to set field properties
|
|
28 |
# across the whole index instead.
|
|
29 |
#
|
|
30 |
# === Boost
|
|
31 |
#
|
|
32 |
# The boost attribute makes a Document more important in the index. That is,
|
|
33 |
# you can increase the score of a match for queries that match a particular
|
|
34 |
# document, making it more likely to appear at the top of search results.
|
|
35 |
# You may, for example, want to boost products that have a higher user
|
|
36 |
# rating so that they are more likely to appear in search results.
|
|
37 |
#
|
|
38 |
# Note: that fields which are _not_ stored (see Ferret::Index::FieldInfos)
|
|
39 |
# are _not_ available in documents retrieved from the index, e.g.
|
|
40 |
# Ferret::Search::Searcher#doc or Ferret::Index::IndexReader#doc.
|
|
41 |
#
|
|
42 |
# Note: that modifying a Document retrieved from the index will not modify
|
|
43 |
# the document contained within the index. You need to delete the old
|
|
44 |
# version of the document and add the new version of the document.
|
|
45 |
class Document < Hash |
|
46 |
include BoostMixin |
|
47 |
||
48 |
# Create a new Document object with a boost. The boost defaults to 1.0.
|
|
49 |
def initialize(boost = 1.0) |
|
50 |
@boost = boost |
|
51 |
end
|
|
52 |
||
53 |
# Return true if the documents are equal, ie they have the same fields
|
|
54 |
def eql?(o) |
|
55 |
return (o.is_a? Document and (o.boost == @boost) and |
|
56 |
(self.keys == o.keys) and (self.values == o.values)) |
|
57 |
end
|
|
58 |
alias :== :eql? |
|
59 |
||
60 |
# Create a string representation of the document
|
|
61 |
def to_s |
|
62 |
buf = ["Document {"] |
|
63 |
self.keys.sort_by {|key| key.to_s}.each do |key| |
|
64 |
val = self[key] |
|
65 |
val_str = if val.instance_of? Array then %{["#{val.join('", "')}"]} |
|
66 |
elsif val.is_a? Field then val.to_s |
|
67 |
else %{"#{val.to_s}"} |
|
68 |
end
|
|
69 |
buf << " :#{key} => #{val_str}" |
|
70 |
end
|
|
71 |
buf << ["}#{@boost == 1.0 ? "" : "^" + @boost.to_s}"] |
|
72 |
return buf.join("\n") |
|
73 |
end
|
|
74 |
end
|
|
75 |
||
76 |
# A Field is a section of a Document. A Field is basically an array with a
|
|
77 |
# boost attribute. It also provides pretty printing of the field with the
|
|
78 |
# #to_s method.
|
|
79 |
#
|
|
80 |
# === Boost
|
|
81 |
#
|
|
82 |
# The boost attribute makes a field more important in the index. That is,
|
|
83 |
# you can increase the score of a match for queries that match terms in a
|
|
84 |
# boosted field. You may, for example, want to boost a title field so that
|
|
85 |
# matches that match in the :title field score more highly than matches that
|
|
86 |
# match in the :contents field.
|
|
87 |
#
|
|
88 |
# Note: If you'd like to use boosted fields without having to use
|
|
89 |
# the Field class you can just include the BoostMixin in the Array class.
|
|
90 |
# See BoostMixin.
|
|
91 |
class Field < Array |
|
92 |
include BoostMixin |
|
93 |
||
94 |
# Create a new Field object. You can pass data to the field as either a
|
|
95 |
# string;
|
|
96 |
#
|
|
97 |
# f = Field.new("This is the fields data")
|
|
98 |
#
|
|
99 |
# or as an array of strings;
|
|
100 |
#
|
|
101 |
# f = Field.new(["this", "is", "an", "array", "of", "field", "data"])
|
|
102 |
#
|
|
103 |
# Of course Fields can also be boosted;
|
|
104 |
#
|
|
105 |
# f = Field.new("field data", 1000.0)
|
|
106 |
def initialize(data = [], boost = 1.0) |
|
107 |
@boost = boost |
|
108 |
if data.is_a? Array |
|
109 |
data.each {|v| self << v} |
|
110 |
else
|
|
111 |
self << data.to_s |
|
112 |
end
|
|
113 |
end
|
|
114 |
||
115 |
def eql?(o) |
|
116 |
return (o.is_a? Field and (o.boost == @boost) and super(o)) |
|
117 |
end
|
|
118 |
alias :== :eql? |
|
119 |
||
120 |
def +(o) |
|
121 |
return Field.new(super(o), self.boost) |
|
122 |
end
|
|
123 |
||
124 |
def to_s |
|
125 |
buf = %{["#{self.join('", "')}"]} |
|
126 |
buf << "^#@boost" if @boost != 1.0 |
|
127 |
return buf |
|
128 |
end
|
|
129 |
end
|
|
130 |
end
|