我正在使用一些Java代码来执行快速前缀查找,使用java.util.TreeSet,我可以使用scala的TreeSet吗?还是一个不同的解决方案?
/** A class that uses a TreeSet to do fast prefix matching
*/
class PrefixMatcher {
private val _set = new java.util.TreeSet[String]
def add(s: String) = _set.add(s)
def findMatches(prefix: String): List[String] = {
val matches = new ListBuffer[String]
val tailSet = _set.tailSet(prefix)
for ( tail <- tailSet.toArray ) {
val tailString = tail.asInstanceOf[String]
if ( tailString.startsWith(prefix) )
matches += tailString
else
return matches.toList
}
matches.toList
}
}
答案 0 :(得分:12)
使用Trie。尽管事实上有些人发布了他们错误命名为试图的有序TreeMap数据结构,但实际上还没有人在这里发布过Trie。 Here是Java中Trie实现的一个相当有代表性的示例。我不知道任何Scala。另请参阅Wikipedia上对试验的解释。
答案 1 :(得分:6)
来自&amp; takeWhile 方法:
class PrefixMatcher {
private val _set = new TreeSet[String]
def add(s: String) = _set.add(s)
def findMatches(prefix: String): Iterable[String] =
_set from prefix takeWhile(_ startsWith prefix)
}
另一种方法是从前缀选择子集到前缀++(前缀后面的最小字符串)。这仅选择实际以给定前缀开头的树的范围。不必过滤条目。 subSet方法将创建基础集的视图。
在增量方法中仍然有一些工作(溢出和空字符串不起作用)但意图应该是明确的。
class PrefixMatcher {
private val _set = new java.util.TreeSet[String]
def add(s: String) = _set.add(s)
def findMatches(prefix: String) : Set[String] = {
def inc(x : String) = { //ignores overflow
assert(x.length > 0)
val last = x.length - 1
(x take last) + (x(last) + 1).asInstanceOf[Char]
}
_set.subSet(prefix, inc(prefix))
}
}
同样适用于scala jcl.TreeSet#range实现。
答案 2 :(得分:3)
据我了解,Scala TreeSet由Java TreeSet支持,但使用Scala变体可以使用序列理解(http://www.scala-lang.org/node/111)缩短代码,为您提供类似于(对于Scala 2.7):
import scala.collection.jcl.TreeSet;
class PrefixMatcher
{
private val _set = new TreeSet[String]
def add(s: String) = _set.add(s)
def findMatches(prefix: String): Iterable[String] =
for (s <- _set.from(prefix) if s.startsWith(prefix)) yield s
}
object Main
{
def main(args: Array[String]): Unit =
{
val pm = new PrefixMatcher()
pm.add("fooBar")
pm.add("fooCow")
pm.add("barFoo")
pm.findMatches("foo").foreach(println)
}
}
对于我的任何糟糕的Scala风格表示道歉,我只是习惯了这种语言。
答案 3 :(得分:2)
我blogged关于前一段时间为前缀组合找到匹配项。这是一个更难的问题,因为你不知道一个前缀何时结束而另一个前缀何时开始。你可能会感兴趣。我甚至会在我没有博客的代码下面发帖(但希望:),虽然它被删除了所有评论,但都没有用英语制作:
package com.blogspot.dcsobral.matcher.DFA
object DFA {
type Matched = List[(String, String)]
def words(s : String) = s.split("\\W").filter(! _.isEmpty).toList
}
import DFA._
import scala.runtime.RichString
class DFA {
private val initialState : State = new State(None, "")
private var currState : State = initialState
private var _input : RichString = ""
private var _badInput : RichString = ""
private var _accepted : Boolean = true
def accepted : Boolean = _accepted
def input : String = _input.reverse + _badInput.reverse
def transition(c : Char) : List[(String, Matched)] = {
if (c == '\b') backtrack
else {
if (accepted) {
val newState = currState(c)
newState match {
case Some(s) => _input = c + _input; currState = s
case None => _badInput = c + _badInput; _accepted = false
}
} else {
_badInput = c + _badInput
}
optionList
}
}
def transition(s : String) : List[(String, Matched)] = {
s foreach (c => transition(c))
optionList
}
def apply(c : Char) : List[(String, Matched)] = transition(c)
def apply(s : String) : List[(String,Matched)] = transition(s)
def backtrack : List[(String, Matched)] = {
if(_badInput isEmpty) {
_input = _input drop 1
currState.backtrack match {
case Some(s) => currState = s
case None =>
}
} else {
_badInput = _badInput drop 1
if (_badInput isEmpty) _accepted = true
}
optionList
}
def optionList : List[(String, Matched)] = if (accepted) currState.optionList else Nil
def possibleTransitions : Set[Char] = if (accepted) (currState possibleTransitions) else Set.empty
def reset : Unit = {
currState = initialState
_input = ""
_badInput = ""
_accepted = true
}
def addOption(s : String) : Unit = {
initialState addOption s
val saveInput = input
reset
transition(saveInput)
}
def removeOption(s : String) : Unit = {
initialState removeOption s
val saveInput = input
reset
transition(saveInput)
}
}
class State (val backtrack : Option[State],
val input : String) {
private var _options : List[PossibleMatch] = Nil
private val transitions : scala.collection.mutable.Map[Char, State] = scala.collection.mutable.Map.empty
private var _possibleTransitions : Set[Char] = Set.empty
private def computePossibleTransitions = {
if (! options.isEmpty)
_possibleTransitions = options map (_.possibleTransitions) reduceLeft (_++_)
else
_possibleTransitions = Set.empty
}
private def computeTransition(c : Char) : State = {
val newState = new State(Some(this), input + c)
options foreach (o => if (o.possibleTransitions contains c) (o computeTransition (newState, c)))
newState
}
def options : List[PossibleMatch] = _options
def optionList : List[(String, Matched)] = options map (pm => (pm.option, pm.bestMatch))
def possibleTransitions : Set[Char] = _possibleTransitions
def transition(c : Char) : Option[State] = {
val t = c.toLowerCase
if (possibleTransitions contains t) Some(transitions getOrElseUpdate (t, computeTransition(t))) else None
}
def apply(c : Char) : Option[State] = transition(c)
def addOption(option : String) : Unit = {
val w = words(option)
addOption(option, w.size, List(("", w.head)), w)
}
def addOption(option : String, priority : Int, matched : Matched, remaining : List[String]) : Unit = {
options find (_.option == option) match {
case Some(pM) =>
if (!pM.hasMatchOption(matched)) {
pM.addMatchOption(priority, matched, remaining)
if (priority < pM.priority) {
val (before, _ :: after) = options span (_ != pM)
val (highPriority, lowPriority) = before span (p => p.priority < priority ||
(p.priority == priority && p.option < option))
_options = highPriority ::: (pM :: lowPriority) ::: after
}
transitions foreach (t => pM computeTransition (t._2, t._1))
}
case None =>
val (highPriority, lowPriority) = options span (p => p.priority < priority ||
(p.priority == priority && p.option < option))
val newPM = new PossibleMatch(option, priority, matched, remaining)
_options = highPriority ::: (newPM :: lowPriority)
transitions foreach (t => newPM computeTransition (t._2, t._1))
}
computePossibleTransitions
}
def removeOption(option : String) : Unit = {
options find (_.option == option) match {
case Some(possibleMatch) =>
val (before, _ :: after) = options span (_ != possibleMatch)
(possibleMatch.possibleTransitions ** Set(transitions.keys.toList : _*)).toList foreach (t =>
transition(t).get.removeOption(option))
_options = before ::: after
computePossibleTransitions
case None =>
}
}
}
class PossibleMatch (val option : String,
thisPriority : Int,
matched : Matched,
remaining : List[String]) {
private var _priority = thisPriority
private var matchOptions = List(new MatchOption(priority, matched, remaining))
private var _possibleTransitions = matchOptions map (_.possibleTransitions) reduceLeft (_++_)
private def computePossibleTransitions = {
_possibleTransitions = matchOptions map (_.possibleTransitions) reduceLeft (_++_)
}
def priority : Int = _priority
def hasMatchOption(matched : Matched) : Boolean = matchOptions exists (_.matched == matched)
def addMatchOption(priority : Int, matched : Matched, remaining : List[String]) : Unit = {
if (priority < _priority) _priority = priority
val (highPriority, lowPriority) = matchOptions span (_.priority < priority)
val newMO = new MatchOption(priority, matched, remaining)
matchOptions = highPriority ::: (newMO :: lowPriority)
computePossibleTransitions
}
def bestMatch : Matched = matchOptions.head.matched.reverse.map(p => (p._1.reverse.toString, p._2)) :::
remaining.tail.map(w => ("", w))
def possibleTransitions : Set[Char] = _possibleTransitions
def computeTransition(s: State, c : Char) : Unit = {
def computeOptions(state : State,
c : Char,
priority : Int,
matched : Matched,
remaining : List[String]) : Unit = {
remaining match {
case w :: ws =>
if (!w.isEmpty && w(0).toLowerCase == c.toLowerCase) {
val newMatched = (w(0) + matched.head._1, matched.head._2.substring(1)) :: matched.tail
val newPriority = if (matched.head._1 isEmpty) (priority - 1) else priority
if (w.drop(1) isEmpty)
s.addOption(option, newPriority - 1, ("", ws.head) :: newMatched , ws)
else
s.addOption(option, newPriority, newMatched, w.substring(1) :: ws)
}
if (ws != Nil) computeOptions(s, c, priority, ("", ws.head) :: matched, ws)
case Nil =>
}
}
if(possibleTransitions contains c)
matchOptions foreach (mO => computeOptions(s, c, mO.priority, mO.matched, mO.remaining))
}
}
class MatchOption (val priority : Int,
val matched : Matched,
val remaining : List[String]) {
lazy val possibleTransitions : Set[Char] = Set( remaining map (_(0) toLowerCase) : _* )
}
但它真的需要一些重构。当我开始为博客解释时,我总是这样做。
答案 4 :(得分:1)
好吧,我刚刚意识到你想要的是我的一个朋友为另一个问题提出的建议。所以,这是他的答案,简化了你的需求。
class PrefixMatcher {
// import scala.collection.Set // Scala 2.7 needs this -- and returns a gimped Set
private var set = new scala.collection.immutable.TreeSet[String]()
private def succ(s : String) = s.take(s.length - 1) + ((s.charAt(s.length - 1) + 1)).toChar
def add(s: String) = set += s
def findMatches(prefix: String): Set[String] =
if (prefix.isEmpty) set else set.range(prefix, succ(prefix))
}