我正在寻找一种方法来清理传递给弹性搜索的搜索词,即逃避所有控制字符。类似于this answer中Ruby所描述的内容。 Scala有这样的事吗?
答案 0 :(得分:2)
我已将this answer中找到的ruby解决方案翻译成Scala:
ListBox item
以下是测试:
package util
import java.util.regex.Pattern
trait ElasticSearchSanitizer {
/** Sanitizes special characters and set operators in elastic search search-terms. */
def sanitize(term: String): String = (
escapeSpecialCharacters _ andThen
escapeSetOperators andThen
collapseWhiteSpaces andThen
escapeOddQuote
)(term)
private def escapeSpecialCharacters(term: String): String = {
val escapedCharacters = Pattern.quote("""\/+-&|!(){}[]^~*?:""")
term.replaceAll(s"([$escapedCharacters])", "\\\\$1")
}
private def escapeSetOperators(term: String): String = {
val operators = Set("AND", "OR", "NOT")
operators.foldLeft(term) { case (accTerm, op) =>
val escapedOp = escapeEachCharacter(op)
accTerm.replaceAll(s"""\\b($op)\\b""", escapedOp)
}
}
private def escapeEachCharacter(op: String): String =
op.toCharArray.map(ch => s"""\\\\$ch""").mkString
private def collapseWhiteSpaces(term: String): String = term.replaceAll("""\s+""", " ")
private def escapeOddQuote(term: String): String = {
if (term.count(_ == '"') % 2 == 1) term.replaceAll("""(.*)"(.*)""", """$1\\"$2""") else term
}
}
答案 1 :(得分:1)
为此提供的库可能会更好地使用
import org.apache.lucene.queryparser.classic.QueryParserBase
....
val escapedQ = QueryParserBase.escape(rawQuery)