~ubuntu-branches/ubuntu/precise/gnome-do/precise-proposed

« back to all changes in this revision

Viewing changes to Do/src/Do.Core/RelevanceProvider.cs

  • Committer: Bazaar Package Importer
  • Author(s): Christopher James Halse Rogers
  • Date: 2008-09-14 10:09:40 UTC
  • mto: (0.1.8 sid)
  • mto: This revision was merged to the branch mainline in revision 7.
  • Revision ID: james.westby@ubuntu.com-20080914100940-kyghudg7py14bu2z
Tags: upstream-0.6.0.0
ImportĀ upstreamĀ versionĀ 0.6.0.0

Show diffs side-by-side

added added

removed removed

Lines of Context:
32
32
            return new HistogramRelevanceProvider ();
33
33
                }
34
34
 
35
 
                // Quicksilver algorithm.
36
 
                // http://docs.blacktree.com/quicksilver/development/string_ranking?DokuWiki=10df5a965790f5b8cc9ef63be6614516
37
 
                public static float StringScoreForAbbreviation (string s, string ab)
 
35
                /// <summary>
 
36
                /// Scores a string based on similarity to a query.  Bonuses are given for things like
 
37
                /// perfect matches and letters from the query starting words.
 
38
                /// </summary>
 
39
                /// <param name="s">
 
40
                /// A <see cref="System.String"/>
 
41
                /// String to be scored
 
42
                /// </param>
 
43
                /// <param name="query">
 
44
                /// A <see cref="System.String"/>
 
45
                /// Query to score against
 
46
                /// </param>
 
47
                /// <returns>
 
48
                /// A <see cref="System.Single"/>
 
49
                /// A relevancy score for the string ranging from 0 to 1
 
50
                /// </returns>
 
51
                public static float StringScoreForAbbreviation (string s, string query)
38
52
                {
39
 
                        return StringScoreForAbbreviationInRanges (s, ab,
40
 
                                                                   new int[] {0, s.Length},
41
 
                                                                   new int[] {0, ab.Length});
 
53
                        if(query.Length == 0)
 
54
                                return 1;
 
55
                        
 
56
                        float score;
 
57
                        string ls = s.ToLower();
 
58
 
 
59
                        //Find the shortest possible substring that matches the query
 
60
                        //and get the ration of their lengths for a base score
 
61
                        int[] match = findBestSubstringMatchIndices(ls, query);
 
62
                        if ((match[1] - match[0]) == 0) return 0;
 
63
                        score = query.Length / (float)(match[1] - match[0]);
 
64
                        if (score == 0) return 0;
 
65
                        
 
66
                        //Now we weight by string length so shorter strings are better
 
67
                        score = score * .7F + query.Length / s.Length * .3F;
 
68
                        
 
69
                        //Bonus points if the characters start words
 
70
                        float good = 0, bad = 1;
 
71
                        int firstCount = 0;
 
72
                        for(int i=match[0]; i<match[1]-1; i++)
 
73
                        {
 
74
                                if(s[i] == ' ')
 
75
                                {
 
76
                                        if(query.Contains(ls[i+1].ToString()))
 
77
                                                firstCount++;
 
78
                                        else
 
79
                                                bad++;
 
80
                                }
 
81
                        }
 
82
                                                
 
83
                        //A first character match counts extra
 
84
                        if(query[0] == ls[0])
 
85
                                firstCount += 2;
 
86
                        
 
87
                        //The longer the acronym, the better it scores
 
88
                        good += firstCount*firstCount*4;
 
89
                        
 
90
                        //Better yet if the match itself started there
 
91
                        if(match[0] == 0)
 
92
                                good += 2;
 
93
                        
 
94
                        //Super bonus if the whole match is at the beginning
 
95
                        if(match[1] == (query.Length - 1))
 
96
                                good += match[1] + 4;
 
97
                        
 
98
                        //Super-duper bonus if it is a perfect match
 
99
                        if(query == ls)
 
100
                                good += match[1] * 2 + 4;                       
 
101
                        
 
102
                        if(good+bad > 0)
 
103
                                score = (score + 3*good/(good+bad)) / 4;
 
104
                        
 
105
                        //This fix makes sure that perfect matches always rank higher
 
106
                        //than split matches.  Perfect matches get the .9 - 1.0 range
 
107
                        //everything else goes lower
 
108
                        
 
109
                        if(match[1] - match[0] == query.Length)
 
110
                                score = .9f + .1f * score;
 
111
                        else
 
112
                                score = .9f * score;
 
113
                        
 
114
                        return score;
42
115
                }
43
116
 
44
 
                protected static float
45
 
                StringScoreForAbbreviationInRanges (string s, string ab, int[] s_range, int[] ab_range)
 
117
                /// <summary>
 
118
                /// Finds the shortest substring of s that contains all the characters of query in order
 
119
                /// </summary>
 
120
                /// <param name="s">
 
121
                /// A <see cref="System.String"/>
 
122
                /// String to search
 
123
                /// </param>
 
124
                /// <param name="query">
 
125
                /// A <see cref="System.String"/>
 
126
                /// Query to search for
 
127
                /// </param>
 
128
                /// <returns>
 
129
                /// A <see cref="System.Int32"/>
 
130
                /// A two item array containing the start and end indices of the match.
 
131
                /// No match returns {-1.-1}
 
132
                /// </returns>
 
133
                protected static int[] findBestSubstringMatchIndices(string s, string query)
46
134
                {
47
 
                        float score, remainingScore;
48
 
                        int i, j;
49
 
                        int[] remainingSearchRange = {0, 0};
50
 
 
51
 
                        if (ab_range[1] == 0) return 0.9F;
52
 
                        if (ab_range[1] > s_range[1]) return 0.0F;
53
 
                        for (i = ab_range[1]; i > 0; i--) {
54
 
                                // Search for steadily smaller portions of the abbreviation.
55
 
                                // TODO Turn this into a dynamic algorithm.
56
 
                                string ab_substring = ab.Substring (ab_range[0], i);
57
 
                                string s_substring = s.Substring (s_range[0], s_range[1]);
58
 
                                int loc = s_substring.IndexOf (ab_substring, StringComparison.CurrentCultureIgnoreCase);
59
 
                                if (loc < 0) continue;
60
 
                                remainingSearchRange[0] = loc + i;
61
 
                                remainingSearchRange[1] = s_range[1] - remainingSearchRange[0];
62
 
                                remainingScore = StringScoreForAbbreviationInRanges (s, ab,
63
 
                                                                                     remainingSearchRange,
64
 
                                                                                     new int[] {ab_range[0]+i, ab_range[1]-i});
65
 
                                if (remainingScore != 0) {
66
 
                                        score = remainingSearchRange[0] - s_range[0];
67
 
                                        if (loc > s_range[0]) {
68
 
                                                // If some letters were skipped.
69
 
                                                if (s[loc-1] == ' ') {
70
 
                                                        for (j = loc-2; j >= s_range[0]; j--) {
71
 
                                                                if (s[j] == ' ')
72
 
                                                                        score--;
73
 
                                                                else
74
 
                                                                        score -= 0.15F;
75
 
                                                        }
76
 
                                                }
77
 
                                                // Else if word is uppercase (?)
78
 
                                                else if (s[loc] >= 'A') {
79
 
                                                        for (j = loc-1; j >= s_range[0]; j--) {
80
 
                                                                if (s[j] >= 'A')
81
 
                                                                        score--;
82
 
                                                                else
83
 
                                                                        score -= 0.15F;
84
 
                                                        }
85
 
                                                }
86
 
                                                else {
87
 
                                                        score -= loc - s_range[0];
88
 
                                                }
89
 
                                        }
90
 
                                        score += remainingScore * remainingSearchRange[1];
91
 
                                        score /= s_range[1];
92
 
                                        return score;
 
135
                        if(query.Length == 0)
 
136
                                return new int[] {0,0};
 
137
                        
 
138
                        int index=-1;
 
139
                        int[] bestMatch = {-1,-1};
 
140
                        
 
141
                        //Find the last instance of the last character of the query
 
142
                        //since we never need to search beyond that
 
143
                        int lastChar = s.Length - 1;
 
144
                        while((lastChar >= 0) && (s[lastChar] != query[query.Length - 1]))
 
145
                                lastChar--;
 
146
                        
 
147
                        //No instance of the character?
 
148
                        if(lastChar == -1)
 
149
                                return bestMatch;
 
150
                        
 
151
                        //Loop through each instance of the first character in query
 
152
                        while ((index = s.IndexOf(query[0], index+1, lastChar-index)) >= 0) {
 
153
                                //Is there even room for a match?
 
154
                                if(index > lastChar + 1 - query.Length) break;
 
155
                                
 
156
                                //Look for the best match in the tail
 
157
                                //We know the first char matches, so we dont check it.
 
158
                                int cur  = index + 1;
 
159
                                int qcur = 1;
 
160
                                while(qcur < query.Length && cur < s.Length)
 
161
                                        if(query[qcur] == s[cur++])
 
162
                                                qcur++;
 
163
                                
 
164
                                if((qcur == query.Length) && (((cur - index) < (bestMatch[1] - bestMatch[0])) || (bestMatch[0] == -1))) {
 
165
                                        bestMatch[0] = index;
 
166
                                        bestMatch[1] = cur;
93
167
                                }
 
168
                                
 
169
                                if(index == s.Length - 1)
 
170
                                        break;
94
171
                        }
95
 
                        return 0;
 
172
                        
 
173
                        return bestMatch;
96
174
                }
97
175
 
98
176
                public virtual void IncreaseRelevance (DoObject r, string match, DoObject other)