假设我有一个类似的列表:
(def data [:a :b :c :d :e :f :g :h :b :d :x])
和谓词,例如:
(defn start? [x] (= x :b))
(defn stop? [x] (= x :d))
标记子序列的第一个和最后一个元素。我想返回带有子组的列表,如下所示:
(parse data) => [:a [:b :c :d] :e :f :g :h [:b :d] :x]
如何使用Clojure完成此任务?
答案 0 :(得分:5)
您可以使用自定义的状态转换器:
(defn subgroups [start? stop?]
(let [subgroup (volatile! nil)]
(fn [rf]
(fn
([] (rf))
([result] (rf result))
([result item]
(let [sg @subgroup]
(cond
(and (seq sg) (stop? item))
(do (vreset! subgroup nil)
(rf result (conj sg item)))
(seq sg)
(do (vswap! subgroup conj item)
result)
(start? item)
(do (vreset! subgroup [item])
result)
:else (rf result item))))))))
(into []
(subgroups #{:b} #{:d})
[:a :b :c :d :e :f :g :h :b :d :x])
; => [:a [:b :c :d] :e :f :g :h [:b :d] :x]
答案 1 :(得分:3)
我喜欢RFC 7230 HTTP/1.1 Message Syntax and Routing,但注意到该问题并未说明如果找到 start 元素但未找到 stop 元素时的行为找到了。如果将子组保持为 open ,传感器将截断输入序列,这可能是意外的/不希望的。考虑删除 stop 元素的示例:
(into [] (subgroups #{:b} #{:d}) [:a :b :c :e :f :g :h :b :x])
=> [:a] ;; drops inputs from before (last) subgroup opens
换能器具有 compleing 辅助项,在这种情况下可用于刷新任何打开的子组:
完成(arity 1)-有些过程不会结束,但是对于那些完成的过程(如转导),使用完成Arity来产生最终值和/或刷新状态。该Arity必须仅一次调用xf完成Arity。
此示例与原始换能器示例之间的唯一区别是完成一致性:
(defn subgroups-all [start? stop?]
(let [subgroup (volatile! nil)]
(fn [rf]
(fn
([] (rf))
([result] ;; completing arity flushes open subgroup
(let [sg @subgroup]
(if (seq sg)
(do (vreset! subgroup nil)
(rf result sg))
(rf result))))
([result item]
(let [sg @subgroup]
(cond
(and (seq sg) (stop? item))
(do (vreset! subgroup nil)
(rf result (conj sg item)))
(seq sg)
(do (vswap! subgroup conj item)
result)
(start? item)
(do (vreset! subgroup [item])
result)
:else (rf result item))))))))
然后悬空的开放组将被刷新:
(into [] (subgroups-all #{:b} #{:d}) [:a :b :c :d :e :f :g :h :b :x])
=> [:a [:b :c :d] :e :f :g :h [:b :x]]
(into [] (subgroups-all #{:b} #{:d}) [:a :b :c :e :f :g :h :b :x])
=> [:a [:b :c :e :f :g :h :b :x]]
在上一个示例中,通知嵌套/开始嵌套不会导致嵌套分组,这使我开始考虑其他解决方案...
当我更普遍地认为这是“弄平”序列时,便想到了拉链:
(defn unflatten [open? close? coll]
(when (seq coll)
(z/root
(reduce
(fn [loc elem]
(cond
(open? elem)
(-> loc (z/append-child (list elem)) z/down z/rightmost)
(and (close? elem) (z/up loc))
(-> loc (z/append-child elem) z/up)
:else (z/append-child loc elem)))
(z/seq-zip ())
coll))))
这将在一个空列表上创建一个拉链,并使用reduce
在输入序列上进行构建。它使用一对谓词来打开/关闭组,并允许任意嵌套组:
(unflatten #{:b} #{:d} [:a :b :c :b :d :d :e :f])
=> (:a (:b :c (:b :d) :d) :e :f)
(unflatten #{:b} #{:d} [:a :b :c :b :d :b :b :d :e :f])
=> (:a (:b :c (:b :d) (:b (:b :d) :e :f)))
(unflatten #{:b} #{:d} [:b :c :e :f])
=> ((:b :c :e :f))
(unflatten #{:b} #{:d} [:d :c :e :f])
=> (:d :c :e :f)
(unflatten #{:b} #{:d} [:c :d])
=> (:c :d)
(unflatten #{:b} #{:d} [:c :d :b])
=> (:c :d (:b))
答案 2 :(得分:0)
Clojure函数split-with
可用于完成大多数工作。唯一棘手的问题是使子组也包含stop?
值。这是一种解决方案:
(ns tst.demo.core
(:use tupelo.core demo.core tupelo.test))
(def data [:a :b :c :d :e :f :g :h :b :d :x])
(defn start? [x] (= x :b))
(defn stop? [x] (= x :d))
(defn parse [vals]
(loop [result []
vals vals]
(if (empty? vals)
result
(let [[singles group-plus] (split-with #(not (start? %)) vals)
[grp* others*] (split-with #(not (stop? %)) group-plus)
grp (glue grp* (take 1 others*))
others (drop 1 others*)
result-out (cond-it-> (glue result singles)
(not-empty? grp) (append it grp))]
(recur result-out others)))))
结果:
(parse data) => [:a [:b :c :d] :e :f :g :h [:b :d] :x]
我们使用t/glue
和t/append
,所以我们可以always deal with vectors and append only at the end(不是像列表conj
那样的开头)。
最后使用cond-it->
以避免粘在空的[]
向量上,这有点难看。后来我想到,这是一种相互递归的形式,非常适合trampoline
函数:
(ns tst.demo.core
(:use tupelo.core demo.core tupelo.test))
(def data [:a :b :c :d :e :f :g :h :b :d :x])
(defn start? [x] (= x :b))
(defn stop? [x] (= x :d))
(declare parse-singles parse-group)
(defn parse-singles [result vals]
(if (empty? vals)
result
(let [[singles groupies] (split-with #(not (start? %)) vals)
result-out (glue result singles)]
#(parse-group result-out groupies))))
(defn parse-group [result vals]
(if (empty? vals)
result
(let [[grp-1 remaining] (split-with #(not (stop? %)) vals)
grp (glue grp-1 (take 1 remaining))
singlies (drop 1 remaining)
result-out (append result grp)]
#(parse-singles result-out singlies))))
(defn parse [vals]
(trampoline parse-singles [] vals))
(dotest
(spyx (parse data)))
(parse data) => [:a [:b :c :d] :e :f :g :h [:b :d] :x]
请注意,对于任何合理的大小解析任务(例如,对parse-singles
和parse-group
的调用少于数千次,您实际上都不需要使用trampoline
。在这种情况下,从对#
和parse-singles
的两次调用中删除parse-group
,并从trampoline
的定义中删除parse
。
一如既往,不要忘记bookmark the Clojure CheatSheet!
答案 3 :(得分:0)
这是一个使用lazy-seq和split-with的版本。 关键是考虑序列中每个元素需要产生什么,在这种情况下,伪代码如下:
;; for each element (e) in the input sequence
if (start? e)
(produce values up to an including (stop? e))
else
e
用于实现它的Clojure代码在上面的描述中并没有太多。
(def data [:a :b :c :d :e :f :g :h :b :d :x])
(def start? #(= :b %))
(def stop? #(= :d %))
(defn parse [vals]
(when-let [e (first vals)]
(let [[val rst] (if (start? e)
(let [[run remainder] (split-with (complement stop?) vals)]
[(concat run [(first remainder)]) (rest remainder)])
[e (rest vals)])]
(cons val (lazy-seq (parse rst))))))
;; this produces the following output
(parse data) ;; => (:a (:b :c :d) :e :f :g :h (:b :d) :x)
答案 4 :(得分:0)
(defn start? [x] (= x :b))
(defn stop? [x] (= x :d))
(def data [:a :b :c :d :e :f :g :h :b :d :c])
看来split-with
应该是一个不错的选择,但是嗯
(loop [data data
res []]
(let [[left tail] (split-with (comp not start?) data)
[group [stop & new-data]] (split-with (comp not stop?) tail)
group (cond-> (vec group) stop (into [stop]))
new-res (cond-> (into res left)
(seq group) (into [group]))]
(if (seq new-data)
(recur new-data new-res)
new-res)))
答案 5 :(得分:0)
只是因为我喜欢FSM和快速测试台。
(let [start? #(= % :b)
stop? #(= % :d)
data [:a :b :c :d :e :f :g :h :b :d :x]]
(letfn [(start [result [x & xs]]
#(collect-vec (conj result [x]) xs))
(collect-vec [result [x & xs]]
#(if (nil? x)
result
((if (stop? x) initial collect-vec)
(conj (subvec result 0 (dec (count result))) (conj (last result) x)) xs)))
(collect [result [x & xs]]
#(initial (conj result x) xs))
(initial [result [x & xs :as v]]
(cond (nil? x) result
(start? x) #(start result v)
:else (fn [] (collect result v))))]
(trampoline initial [] data)))
答案 6 :(得分:0)
如果性能不是问题,我可以使用clojure.spec。首先,为要解析的数据定义语法:
(ns playground.startstop
(:require [clojure.spec.alpha :as spec]))
(defn start? [x] (= x :b))
(defn stop? [x] (= x :d))
(spec/def ::not-start-stop #(and (not (start? %))
(not (stop? %))))
(spec/def ::group (spec/cat :start start?
:contents (spec/* ::not-start-stop)
:stop stop?))
(spec/def ::element (spec/alt :group ::group
:primitive ::not-start-stop))
(spec/def ::elements (spec/* ::element))
现在,您可以使用conform
函数来解析数据了:
(def data [:a :b :c :d :e :f :g :h :b :d :x])
(spec/conform ::elements data)
;; => [[:primitive :a] [:group {:start :b, :contents [:c], :stop :d}] [:primitive :e] [:primitive :f] [:primitive :g] [:primitive :h] [:group {:start :b, :stop :d}] [:primitive :x]]
上面的输出不是我们想要的,因此我们定义函数来呈现结果:
(defn render [[type data]]
(case type
:primitive data
:group `[~(:start data) ~@(:contents data) ~(:stop data)]))
并将其映射到已解析的数据上:
(mapv render (spec/conform ::elements data))
;; => [:a [:b :c :d] :e :f :g :h [:b :d] :x]
这种基于规范的解决方案可能不是最快的代码,但易于理解,维护,扩展和调试。