~ubuntu-branches/ubuntu/wily/ruby-ferret/wily

1 by Antonio Terceiro
Import upstream version 0.11.6
1
module Ferret
2
  # Instead of using documents to add data to an index you can use Hashes and
3
  # Arrays. The only real benefits of using a Document over a Hash are pretty
4
  # printing and the boost attribute. You can add the boost attribute to
5
  # Hashes and arrays using the BoostMixin. For example;
6
  #
7
  #    class Hash
8
  #      include BoostMixin
9
  #    end
10
  #
11
  #    class Array
12
  #      include BoostMixin
13
  #    end
14
  #
15
  #    class String
16
  #      include BoostMixin
17
  #    end
18
  module BoostMixin
19
    attr_accessor :boost
20
  end
21
22
  # Documents are the unit of indexing and search.
23
  #
24
  # A Document is a set of fields.  Each field has a name and an array of
25
  # textual values. If you are coming from a Lucene background you should note
26
  # that Fields don't have any properties except for the boost property. You
27
  # should use the Ferret::Index::FieldInfos class to set field properties
28
  # across the whole index instead.
29
  # 
30
  # === Boost
31
  #
32
  # The boost attribute makes a Document more important in the index. That is,
33
  # you can increase the score of a match for queries that match a particular
34
  # document, making it more likely to appear at the top of search results.
35
  # You may, for example, want to boost products that have a higher user
36
  # rating so that they are more likely to appear in search results.
37
  #
38
  # Note: that fields which are _not_ stored (see Ferret::Index::FieldInfos)
39
  # are _not_ available in documents retrieved from the index, e.g.
40
  # Ferret::Search::Searcher#doc or Ferret::Index::IndexReader#doc.
41
  #
42
  # Note: that modifying a Document retrieved from the index will not modify
43
  # the document contained within the index. You need to delete the old
44
  # version of the document and add the new version of the document.
45
  class Document < Hash
46
    include BoostMixin
47
48
    # Create a new Document object with a boost. The boost defaults to 1.0.
49
    def initialize(boost = 1.0)
50
      @boost = boost
51
    end
52
53
    # Return true if the documents are equal, ie they have the same fields
54
    def eql?(o)
55
      return (o.is_a? Document and (o.boost == @boost) and
56
              (self.keys == o.keys) and (self.values == o.values))
57
    end
58
    alias :== :eql?
59
60
    # Create a string representation of the document
61
    def to_s
62
      buf = ["Document {"]
63
      self.keys.sort_by {|key| key.to_s}.each do |key|
64
        val = self[key]
65
        val_str = if val.instance_of? Array then %{["#{val.join('", "')}"]}
66
                  elsif val.is_a? Field then val.to_s
67
                  else %{"#{val.to_s}"}
68
                  end
69
        buf << "  :#{key} => #{val_str}"
70
      end
71
      buf << ["}#{@boost == 1.0 ? "" : "^" + @boost.to_s}"]
72
      return buf.join("\n")
73
    end
74
  end
75
76
  # A Field is a section of a Document. A Field is basically an array with a
77
  # boost attribute. It also provides pretty printing of the field with the
78
  # #to_s method.
79
  #
80
  # === Boost
81
  #
82
  # The boost attribute makes a field more important in the index. That is,
83
  # you can increase the score of a match for queries that match terms in a
84
  # boosted field. You may, for example, want to boost a title field so that
85
  # matches that match in the :title field score more highly than matches that
86
  # match in the :contents field. 
87
  #
88
  # Note: If you'd like to use boosted fields without having to use
89
  # the Field class you can just include the BoostMixin in the Array class.
90
  # See BoostMixin.
91
  class Field < Array
92
    include BoostMixin
93
94
    # Create a new Field object. You can pass data to the field as either a
95
    # string;
96
    #
97
    #    f = Field.new("This is the fields data")
98
    #
99
    # or as an array of strings;
100
    #
101
    #    f = Field.new(["this", "is", "an", "array", "of", "field", "data"])
102
    #
103
    # Of course Fields can also be boosted;
104
    #
105
    #    f = Field.new("field data", 1000.0)
106
    def initialize(data = [], boost = 1.0)
107
      @boost = boost
108
      if data.is_a? Array
109
        data.each {|v| self << v}
110
      else
111
        self << data.to_s
112
      end
113
    end
114
115
    def eql?(o)
116
      return (o.is_a? Field and (o.boost == @boost) and super(o))
117
    end
118
    alias :== :eql?
119
120
    def +(o)
121
      return Field.new(super(o), self.boost)
122
    end
123
124
    def to_s
125
      buf = %{["#{self.join('", "')}"]}
126
      buf << "^#@boost" if @boost != 1.0
127
      return buf
128
    end
129
  end
130
end