ridiculousfish
2013-05-25 15:41:18 -07:00
parent ee7339b661
commit 908b07527e
9 changed files with 542 additions and 333 deletions

View File

@@ -1764,6 +1764,120 @@ bool string_suffixes_string(const wchar_t *proposed_suffix, const wcstring &valu
return suffix_size <= value.size() && value.compare(value.size() - suffix_size, suffix_size, proposed_suffix) == 0;
}
// Returns true if seq, represented as a subsequence, is contained within string
static bool subsequence_in_string(const wcstring &seq, const wcstring &str)
{
/* Impossible if seq is larger than string */
if (seq.size() > str.size())
{
return false;
}
/* Empty strings are considered to be subsequences of everything */
if (seq.empty())
{
return true;
}
size_t str_idx, seq_idx;
for (seq_idx = str_idx = 0; seq_idx < seq.size() && str_idx < str.size(); seq_idx++)
{
wchar_t c = seq.at(seq_idx);
size_t char_loc = str.find(c, str_idx);
if (char_loc == wcstring::npos)
{
/* Didn't find this character */
break;
}
else
{
/* We found it. Continue the search just after it. */
str_idx = char_loc + 1;
}
}
/* We succeeded if we exhausted our sequence */
assert(seq_idx <= seq.size());
return seq_idx == seq.size();
}
string_fuzzy_match_t::string_fuzzy_match_t(enum fuzzy_match_type_t t, size_t distance_first, size_t distance_second) :
type(t),
match_distance_first(distance_first),
match_distance_second(distance_second)
{
}
string_fuzzy_match_t string_fuzzy_match_string(const wcstring &string, const wcstring &match_against, fuzzy_match_type_t limit_type)
{
// Distances are generally the amount of text not matched
string_fuzzy_match_t result(fuzzy_match_none, 0, 0);
size_t location;
if (limit_type >= fuzzy_match_exact && string == match_against)
{
result.type = fuzzy_match_exact;
}
else if (limit_type >= fuzzy_match_prefix && string_prefixes_string(string, match_against))
{
result.type = fuzzy_match_prefix;
assert(match_against.size() >= string.size());
result.match_distance_first = match_against.size() - string.size();
}
else if (limit_type >= fuzzy_match_case_insensitive && wcscasecmp(string.c_str(), match_against.c_str()) == 0)
{
result.type = fuzzy_match_case_insensitive;
}
else if (limit_type >= fuzzy_match_prefix_case_insensitive && string_prefixes_string_case_insensitive(string, match_against))
{
result.type = fuzzy_match_prefix_case_insensitive;
assert(match_against.size() >= string.size());
result.match_distance_first = match_against.size() - string.size();
}
else if (limit_type >= fuzzy_match_substring && (location = match_against.find(string)) != wcstring::npos)
{
// string is contained within match against
result.type = fuzzy_match_substring;
assert(match_against.size() >= string.size());
result.match_distance_first = match_against.size() - string.size();
result.match_distance_second = location; //prefer earlier matches
}
else if (limit_type >= fuzzy_match_subsequence_insertions_only && subsequence_in_string(string, match_against))
{
result.type = fuzzy_match_subsequence_insertions_only;
assert(match_against.size() >= string.size());
result.match_distance_first = match_against.size() - string.size();
// it would be nice to prefer matches with greater matching runs here
}
return result;
}
template<typename T>
static inline int compare_ints(T a, T b)
{
if (a < b) return -1;
if (a == b) return 0;
return 1;
}
// Compare types; if the types match, compare distances
int string_fuzzy_match_t::compare(const string_fuzzy_match_t &rhs) const
{
if (this->type != rhs.type)
{
return compare_ints(this->type, rhs.type);
}
else if (this->match_distance_first != rhs.match_distance_first)
{
return compare_ints(this->match_distance_first, rhs.match_distance_first);
}
else if (this->match_distance_second != rhs.match_distance_second)
{
return compare_ints(this->match_distance_second, rhs.match_distance_second);
}
return 0; //equal
}
bool list_contains_string(const wcstring_list_t &list, const wcstring &str)
{
return std::find(list.begin(), list.end(), str) != list.end();