15
#define DELIM "[-\\s_\\.'\\(\\)\\[\\]]"
16
#define NUMRE "(^|" DELIM "+)(\\d+)($|" DELIM "+)"
18
void string_split(list<string> &store, const string &s, const string &delims)
20
string expr("(?>[^" + delims + "]+)");
21
rex.exec(s, expr, Regexx::global);
22
store.insert(store.end(), rex.match.begin(), rex.match.end());
25
string escape_char(char c)
42
bool imms_magic_preprocess_filename(string &filename)
44
filename = rex.replace(filename, "[-\\s_\\.]{2,}", "/");
46
bool confident = rex.matches();
51
if (extradelims != "")
53
filename = rex.replace(filename, "[" + extradelims + "]",
55
confident = rex.matches();
60
int spaces = rex.exec(filename, " ", Regexx::global);
61
int dashes = rex.exec(filename, "-", Regexx::global);
62
int scores = rex.exec(filename, "_", Regexx::global);
64
if ((!spaces || !scores) && dashes && dashes < 3
65
&& (spaces >= dashes || scores >= dashes))
66
filename = rex.replace(filename, "-", "/", Regexx::global);
72
void imms_magic_preprocess_path(string &path)
74
path = string_tolower(path);
75
path = rex.replace(path, "[-\\s_\\.]{2,}", "/", Regexx::global);
76
path = rex.replace(path, "(/|^)[\\(\\[]", "/", Regexx::global);
77
path = rex.replace(path, "[\\(\\[][^/]+[\\)\\]]/", "/", Regexx::global);
78
path = rex.replace(path, "[-\\s_\\./][iv]{2}i?[/$]", "/", Regexx::global);
79
path = rex.replace(path, "[^a-z/]", "", Regexx::global);
82
bool imms_magic_parse_filename(list<string> &store, string filename)
84
bool result = imms_magic_preprocess_filename(filename);
85
imms_magic_preprocess_path(filename);
86
string_split(store, filename, "/");
90
void imms_magic_parse_path(list<string> &store, string path)
92
path = rex.replace(path, "/+$", "", Regexx::global);
94
string lastdir = path_get_filename(path);
95
path = path_get_dirname(path);
97
imms_magic_preprocess_path(path);
98
string_split(store, path, "/");
100
imms_magic_preprocess_filename(lastdir);
101
imms_magic_preprocess_path(lastdir);
102
string_split(store, lastdir, "/");
105
string string_normalize(string s)
107
s = string_brfilter(string_tolower(s));
108
s = rex.replace(s, "[^a-z]", "", Regexx::global);
112
bool string_like(const string &s1, const string &s2, int slack)
114
int len1 = s1.length();
115
int len2 = s2.length();
117
int distance = lev_edit_distance(len1, s1.c_str(), len2, s2.c_str(), 0);
118
return (len1 + len2) / (13 - slack) >= distance;
121
LevMatchingBlock *get_matching_blocks(const string &s1, const string &s2,
124
int len1 = s1.length(), len2 = s2.length();
126
LevEditOp *editops = lev_editops_find(len1, s1.c_str(), len2, s2.c_str(),
129
LevMatchingBlock *blocks =
130
lev_editops_matching_blocks(len1, len2, num_editops, editops, &n);
137
string filename_cleanup(const string &s)
139
return string_tolower(rex.replace(s, "(\\d)", "#", Regexx::global));
142
string get_filename_mask(const string& path)
144
string dirname = path_get_dirname(path);
145
string filename = filename_cleanup(path_get_filename(path));
146
string extension = path_get_extension(path);
150
DIR *dir = opendir(dirname.c_str());
152
while ((de = readdir(dir)))
154
if (de->d_name[0] != '.' && path_get_extension(de->d_name) == extension)
155
files.push_back(filename_cleanup(path_get_filename(de->d_name)));
159
char *mask = new char[filename.length() + 1];
160
memset(mask, 0, filename.length() + 1);
163
for (list<string>::iterator i = files.begin(); i != files.end(); i++)
167
LevMatchingBlock *blocks = get_matching_blocks(filename, *i, num_blocks);
169
for (size_t j = 0; j < num_blocks; j++)
171
for (size_t k = 0; k < blocks[j].len; k++)
172
mask[blocks[j].spos + k]++;
181
for (size_t i = 0; i < filename.length(); i++)
183
strmask += mask[i] > count * 0.7 ? filename[i] : '*';
193
static string filename;
195
static string double_erase(const regexx::RegexxMatch& _match)
197
mask.erase(_match.start(), _match.length());
198
filename.erase(_match.start(), _match.length());
202
static string numerals(const regexx::RegexxMatch& _match)
205
string replacement = "/";
206
int l1 = _match.atom[0].length(), l2 = _match.atom[2].length();
208
if (l1 < 2 && l2 < 2)
210
if (_match.atom[0].str() != " " && _match.atom[0].str() != "_")
212
replacement = _match.atom[0].str();
213
if (_match.atom[0].length() == 1)
214
extradelims += escape_char(_match.atom[0].str()[0]);
216
if (_match.atom[2].str() != " " && _match.atom[2].str() != "_")
218
replacement = _match.atom[2].str();
219
if (_match.atom[2].length() == 1)
220
extradelims += escape_char(_match.atom[2].str()[0]);
225
replacement = _match.atom[(!!(l1 < l2)) * 2].str();
228
mask.replace(_match.start(), _match.length(), replacement);
229
filename.replace(_match.start(), _match.length(), replacement);
234
string H::filename, H::mask;
236
pair<string, string> get_simplified_filename_mask(const string &path)
238
H::filename = string_tolower(path_get_filename(path));
239
H::mask = get_filename_mask(path);
241
if (rex.exec(H::mask, "(\\)|\\]|\\*[a-z]{0,3})-[a-z0-9]{3,4}$"))
242
rex.replacef(H::mask, "-[a-z]{3,4}$", H::double_erase, Regexx::global);
244
rex.replacef(H::filename,
245
"[-\\s_\\.]*[\\(\\[][^\\]\\)]{0,60}[\\]\\)]?$",
246
H::double_erase, Regexx::global);
250
rex.replacef(H::filename, NUMRE, H::numerals, Regexx::global);
251
} while (rex.matches());
253
rex.replacef(H::filename, "^[-\\s_\\.']+|[-\\s_\\.']+$",
254
H::double_erase, Regexx::global);
256
return pair<string, string>(H::filename, H::mask);
259
string album_filter(const string &album)
261
return string_normalize(rex.replace(string_tolower(album),
262
"(lp|ep|cmd|promo|demo|maxi)$", "", Regexx::global));
265
string title_filter(const string &title)
267
string normtitle = string_normalize(title);
268
size_t p = title.rfind("- ");
269
if (p == string::npos)
271
return string_normalize(title.substr(p));
274
string path_get_dirname(const string &path)
276
size_t last_slash = path.find_last_of("/") + 1;
277
return path.substr(0, last_slash);
280
string path_get_filename(const string &path)
282
size_t last_slash = path.find_last_of("/") + 1;
283
size_t last_dot = path.find_last_of(".");
285
if (last_dot == string::npos || last_dot < path.length() - 4)
286
last_dot = path.length();
288
return path.substr(last_slash, last_dot - last_slash);
291
string path_get_extension(const string &path)
293
size_t last_dot = path.find_last_of(".");
295
if (last_dot == string::npos)
296
last_dot = path.length();
300
return path.substr(last_dot);
303
string string_delete(const string &haystack, const string &needle)
305
return rex.replace(haystack, needle, "", Regexx::global);