32
32
return new HistogramRelevanceProvider ();
35
// Quicksilver algorithm.
36
// http://docs.blacktree.com/quicksilver/development/string_ranking?DokuWiki=10df5a965790f5b8cc9ef63be6614516
37
public static float StringScoreForAbbreviation (string s, string ab)
36
/// Scores a string based on similarity to a query. Bonuses are given for things like
37
/// perfect matches and letters from the query starting words.
40
/// A <see cref="System.String"/>
41
/// String to be scored
43
/// <param name="query">
44
/// A <see cref="System.String"/>
45
/// Query to score against
48
/// A <see cref="System.Single"/>
49
/// A relevancy score for the string ranging from 0 to 1
51
public static float StringScoreForAbbreviation (string s, string query)
39
return StringScoreForAbbreviationInRanges (s, ab,
40
new int[] {0, s.Length},
41
new int[] {0, ab.Length});
57
string ls = s.ToLower();
59
//Find the shortest possible substring that matches the query
60
//and get the ration of their lengths for a base score
61
int[] match = findBestSubstringMatchIndices(ls, query);
62
if ((match[1] - match[0]) == 0) return 0;
63
score = query.Length / (float)(match[1] - match[0]);
64
if (score == 0) return 0;
66
//Now we weight by string length so shorter strings are better
67
score = score * .7F + query.Length / s.Length * .3F;
69
//Bonus points if the characters start words
70
float good = 0, bad = 1;
72
for(int i=match[0]; i<match[1]-1; i++)
76
if(query.Contains(ls[i+1].ToString()))
83
//A first character match counts extra
87
//The longer the acronym, the better it scores
88
good += firstCount*firstCount*4;
90
//Better yet if the match itself started there
94
//Super bonus if the whole match is at the beginning
95
if(match[1] == (query.Length - 1))
98
//Super-duper bonus if it is a perfect match
100
good += match[1] * 2 + 4;
103
score = (score + 3*good/(good+bad)) / 4;
105
//This fix makes sure that perfect matches always rank higher
106
//than split matches. Perfect matches get the .9 - 1.0 range
107
//everything else goes lower
109
if(match[1] - match[0] == query.Length)
110
score = .9f + .1f * score;
44
protected static float
45
StringScoreForAbbreviationInRanges (string s, string ab, int[] s_range, int[] ab_range)
118
/// Finds the shortest substring of s that contains all the characters of query in order
121
/// A <see cref="System.String"/>
124
/// <param name="query">
125
/// A <see cref="System.String"/>
126
/// Query to search for
129
/// A <see cref="System.Int32"/>
130
/// A two item array containing the start and end indices of the match.
131
/// No match returns {-1.-1}
133
protected static int[] findBestSubstringMatchIndices(string s, string query)
47
float score, remainingScore;
49
int[] remainingSearchRange = {0, 0};
51
if (ab_range[1] == 0) return 0.9F;
52
if (ab_range[1] > s_range[1]) return 0.0F;
53
for (i = ab_range[1]; i > 0; i--) {
54
// Search for steadily smaller portions of the abbreviation.
55
// TODO Turn this into a dynamic algorithm.
56
string ab_substring = ab.Substring (ab_range[0], i);
57
string s_substring = s.Substring (s_range[0], s_range[1]);
58
int loc = s_substring.IndexOf (ab_substring, StringComparison.CurrentCultureIgnoreCase);
59
if (loc < 0) continue;
60
remainingSearchRange[0] = loc + i;
61
remainingSearchRange[1] = s_range[1] - remainingSearchRange[0];
62
remainingScore = StringScoreForAbbreviationInRanges (s, ab,
64
new int[] {ab_range[0]+i, ab_range[1]-i});
65
if (remainingScore != 0) {
66
score = remainingSearchRange[0] - s_range[0];
67
if (loc > s_range[0]) {
68
// If some letters were skipped.
69
if (s[loc-1] == ' ') {
70
for (j = loc-2; j >= s_range[0]; j--) {
77
// Else if word is uppercase (?)
78
else if (s[loc] >= 'A') {
79
for (j = loc-1; j >= s_range[0]; j--) {
87
score -= loc - s_range[0];
90
score += remainingScore * remainingSearchRange[1];
135
if(query.Length == 0)
136
return new int[] {0,0};
139
int[] bestMatch = {-1,-1};
141
//Find the last instance of the last character of the query
142
//since we never need to search beyond that
143
int lastChar = s.Length - 1;
144
while((lastChar >= 0) && (s[lastChar] != query[query.Length - 1]))
147
//No instance of the character?
151
//Loop through each instance of the first character in query
152
while ((index = s.IndexOf(query[0], index+1, lastChar-index)) >= 0) {
153
//Is there even room for a match?
154
if(index > lastChar + 1 - query.Length) break;
156
//Look for the best match in the tail
157
//We know the first char matches, so we dont check it.
160
while(qcur < query.Length && cur < s.Length)
161
if(query[qcur] == s[cur++])
164
if((qcur == query.Length) && (((cur - index) < (bestMatch[1] - bestMatch[0])) || (bestMatch[0] == -1))) {
165
bestMatch[0] = index;
169
if(index == s.Length - 1)
98
176
public virtual void IncreaseRelevance (DoObject r, string match, DoObject other)