modify indexing and search logic to account for phrases
This commit is contained in:
@ -31,25 +31,48 @@ class SearchIndex {
|
||||
}
|
||||
}
|
||||
|
||||
private static String[] split(String source) {
|
||||
source = source.replaceAll(SplitPattern.SPLIT_PATTERN, " ").toLowerCase()
|
||||
String [] split = source.split(" ")
|
||||
private static String[] split(final String source) {
|
||||
// first split by split pattern
|
||||
String sourceSplit = source.replaceAll(SplitPattern.SPLIT_PATTERN, " ").toLowerCase()
|
||||
String [] split = sourceSplit.split(" ")
|
||||
def rv = []
|
||||
split.each { if (it.length() > 0) rv << it }
|
||||
|
||||
// then just by ' '
|
||||
source.split(' ').each { if (it.length() > 0) rv << it }
|
||||
|
||||
// and add original string
|
||||
rv << source
|
||||
rv.toArray(new String[0])
|
||||
}
|
||||
|
||||
String[] search(List<String> terms) {
|
||||
Set<String> rv = null;
|
||||
|
||||
Set<String> powerSet = new HashSet<>()
|
||||
terms.each {
|
||||
powerSet.addAll(it.toLowerCase().split(' '))
|
||||
}
|
||||
|
||||
powerSet.each {
|
||||
Set<String> forWord = keywords.getOrDefault(it,[])
|
||||
if (rv == null) {
|
||||
rv = new HashSet<>(forWord)
|
||||
} else {
|
||||
rv.retainAll(forWord)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// now, filter by terms
|
||||
for (Iterator<String> iter = rv.iterator(); iter.hasNext();) {
|
||||
String candidate = iter.next()
|
||||
candidate = candidate.toLowerCase()
|
||||
boolean keep = true
|
||||
terms.each {
|
||||
keep &= candidate.contains(it)
|
||||
}
|
||||
if (!keep)
|
||||
iter.remove()
|
||||
}
|
||||
|
||||
if (rv != null)
|
||||
|
@ -90,4 +90,22 @@ class SearchIndexTest {
|
||||
def found = index.search(["muwire", "0", "3", "jar"])
|
||||
assert found.size() == 1
|
||||
}
|
||||
|
||||
@Test
|
||||
void testOriginalText() {
|
||||
initIndex(["a-b c-d"])
|
||||
def found = index.search(['a-b'])
|
||||
assert found.size() == 1
|
||||
found = index.search(['c-d'])
|
||||
assert found.size() == 1
|
||||
}
|
||||
|
||||
@Test
|
||||
void testPhrase() {
|
||||
initIndex(["a-b c-d e-f"])
|
||||
def found = index.search(['a-b c-d'])
|
||||
assert found.size() == 1
|
||||
assert index.search(['c-d e-f']).size() == 1
|
||||
assert index.search(['a-b e-f']).size() == 0
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user