嵌套组和拉链

Question

假设我有一个类似的列表：

(def data [:a :b :c :d :e :f :g :h :b :d :x])

和谓词，例如：

(defn start? [x] (= x :b))
(defn stop?  [x] (= x :d))

标记子序列的第一个和最后一个元素。我想返回带有子组的列表，如下所示：

(parse data) => [:a [:b :c :d] :e :f :g :h [:b :d] :x]

如何使用Clojure完成此任务？

Answer 1

您可以使用自定义的状态转换器：

(defn subgroups [start? stop?]
  (let [subgroup (volatile! nil)]
    (fn [rf]
      (fn
        ([] (rf))
        ([result] (rf result))
        ([result item]
         (let [sg @subgroup]
           (cond
             (and (seq sg) (stop? item))
             (do (vreset! subgroup nil)
               (rf result (conj sg item)))
             (seq sg)
             (do (vswap! subgroup conj item)
               result)
             (start? item)
             (do (vreset! subgroup [item])
               result)
             :else (rf result item))))))))

(into []
      (subgroups #{:b} #{:d})
      [:a :b :c :d :e :f :g :h :b :d :x])
; => [:a [:b :c :d] :e :f :g :h [:b :d] :x]

Answer 2

我喜欢RFC 7230 HTTP/1.1 Message Syntax and Routing，但注意到该问题并未说明如果找到 start 元素但未找到 stop 元素时的行为找到了。如果将子组保持为 open ，传感器将截断输入序列，这可能是意外的/不希望的。考虑删除 stop 元素的示例：

(into [] (subgroups #{:b} #{:d}) [:a :b :c :e :f :g :h :b :x])
=> [:a] ;; drops inputs from before (last) subgroup opens

换能器具有 compleing 辅助项，在这种情况下可用于刷新任何打开的子组：

完成（arity 1）-有些过程不会结束，但是对于那些完成的过程（如转导），使用完成Arity来产生最终值和/或刷新状态。该Arity必须仅一次调用xf完成Arity。

此示例与原始换能器示例之间的唯一区别是完成一致性：

(defn subgroups-all [start? stop?]
  (let [subgroup (volatile! nil)]
    (fn [rf]
      (fn
        ([] (rf))
        ([result] ;; completing arity flushes open subgroup
         (let [sg @subgroup]
           (if (seq sg)
             (do (vreset! subgroup nil)
                 (rf result sg))
             (rf result))))
        ([result item]
         (let [sg @subgroup]
           (cond
             (and (seq sg) (stop? item))
             (do (vreset! subgroup nil)
                 (rf result (conj sg item)))
             (seq sg)
             (do (vswap! subgroup conj item)
                 result)
             (start? item)
             (do (vreset! subgroup [item])
                 result)
             :else (rf result item))))))))

然后悬空的开放组将被刷新：

(into [] (subgroups-all #{:b} #{:d}) [:a :b :c :d :e :f :g :h :b :x])
=> [:a [:b :c :d] :e :f :g :h [:b :x]]
(into [] (subgroups-all #{:b} #{:d}) [:a :b :c :e :f :g :h :b :x])
=> [:a [:b :c :e :f :g :h :b :x]]

在上一个示例中，通知嵌套/开始嵌套不会导致嵌套分组，这使我开始考虑其他解决方案...

嵌套组和拉链

当我更普遍地认为这是“弄平”序列时，便想到了拉链：

(defn unflatten [open? close? coll]
  (when (seq coll)
    (z/root
     (reduce
      (fn [loc elem]
        (cond
          (open? elem)
          (-> loc (z/append-child (list elem)) z/down z/rightmost)
          (and (close? elem) (z/up loc))
          (-> loc (z/append-child elem) z/up)
          :else (z/append-child loc elem)))
      (z/seq-zip ())
      coll))))

这将在一个空列表上创建一个拉链，并使用reduce在输入序列上进行构建。它使用一对谓词来打开/关闭组，并允许任意嵌套组：

(unflatten #{:b} #{:d} [:a :b :c :b :d :d :e :f])
=> (:a (:b :c (:b :d) :d) :e :f)
(unflatten #{:b} #{:d} [:a :b :c :b :d :b :b :d :e :f])
=> (:a (:b :c (:b :d) (:b (:b :d) :e :f)))
(unflatten #{:b} #{:d} [:b :c :e :f])
=> ((:b :c :e :f))
(unflatten #{:b} #{:d} [:d :c :e :f])
=> (:d :c :e :f)
(unflatten #{:b} #{:d} [:c :d])
=> (:c :d)
(unflatten #{:b} #{:d} [:c :d :b])
=> (:c :d (:b))

Answer 3

Clojure函数split-with可用于完成大多数工作。唯一棘手的问题是使子组也包含stop?值。这是一种解决方案：

(ns tst.demo.core
  (:use tupelo.core demo.core tupelo.test))

(def data [:a :b :c :d :e :f :g :h :b :d :x])

(defn start? [x] (= x :b))
(defn stop?  [x] (= x :d))

(defn parse [vals]
  (loop [result []
         vals   vals]
    (if (empty? vals)
      result
      (let [[singles group-plus]  (split-with #(not (start? %)) vals)
            [grp* others*]        (split-with #(not (stop? %)) group-plus)
            grp        (glue grp* (take 1 others*))
            others     (drop 1 others*)
            result-out (cond-it-> (glue result singles)
                         (not-empty? grp) (append it grp))]
        (recur result-out others)))))

结果：

(parse data) => [:a [:b :c :d] :e :f :g :h [:b :d] :x]

我们使用t/glue和t/append，所以我们可以always deal with vectors and append only at the end（不是像列表conj那样的开头）。

更新

最后使用cond-it->以避免粘在空的[]向量上，这有点难看。后来我想到，这是一种相互递归的形式，非常适合trampoline函数：

(ns tst.demo.core
  (:use tupelo.core demo.core tupelo.test))

(def data [:a :b :c :d :e :f :g :h :b :d :x])

(defn start? [x] (= x :b))
(defn stop?  [x] (= x :d))

(declare parse-singles parse-group)

(defn parse-singles [result vals]
  (if (empty? vals)
    result
    (let [[singles groupies] (split-with #(not (start? %)) vals)
          result-out (glue result singles)]
      #(parse-group result-out groupies))))

(defn parse-group [result vals]
  (if (empty? vals)
    result
    (let [[grp-1 remaining] (split-with #(not (stop? %)) vals)
          grp      (glue grp-1 (take 1 remaining))
          singlies (drop 1 remaining)
          result-out   (append result grp)]
      #(parse-singles result-out singlies))))

(defn parse [vals]
  (trampoline parse-singles [] vals))

(dotest
  (spyx (parse data)))

(parse data) => [:a [:b :c :d] :e :f :g :h [:b :d] :x]

请注意，对于任何合理的大小解析任务（例如，对parse-singles和parse-group的调用少于数千次，您实际上都不需要使用trampoline。在这种情况下，从对#和parse-singles的两次调用中删除parse-group，并从trampoline的定义中删除parse。

Clojure CheatSheet

一如既往，不要忘记bookmark the Clojure CheatSheet!

Answer 4

这是一个使用lazy-seq和split-with的版本。关键是考虑序列中每个元素需要产生什么，在这种情况下，伪代码如下：

;; for each element (e) in the input sequence

if (start? e) 
  (produce values up to an including (stop? e))
else 
  e

用于实现它的Clojure代码在上面的描述中并没有太多。

(def data [:a :b :c :d :e :f :g :h :b :d :x])

(def start? #(= :b %))
(def stop?  #(= :d %))

(defn parse [vals]
  (when-let [e (first vals)]
    (let [[val rst] (if (start? e)
                      (let [[run remainder] (split-with (complement stop?) vals)]
                        [(concat run [(first remainder)]) (rest remainder)])
                      [e (rest vals)])]
      (cons val (lazy-seq (parse rst))))))

;; this produces the following output
(parse data) ;; => (:a (:b :c :d) :e :f :g :h (:b :d) :x)

Answer 5

(defn start? [x] (= x :b))
(defn stop?  [x] (= x :d))
(def data [:a :b :c :d :e :f :g :h :b :d :c])

看来split-with应该是一个不错的选择，但是嗯

(loop [data data
       res []]
  (let [[left tail] (split-with (comp not start?) data)
        [group [stop & new-data]] (split-with (comp not stop?) tail)
        group (cond-> (vec group) stop (into [stop]))
        new-res (cond-> (into res left)
                  (seq group) (into [group]))]
    (if (seq new-data)
      (recur new-data new-res)
      new-res)))

Answer 6

只是因为我喜欢FSM和快速测试台。

(let [start? #(= % :b)
          stop?  #(= % :d)
          data   [:a :b :c :d :e :f :g :h :b :d :x]]
        (letfn [(start [result [x & xs]]
                    #(collect-vec (conj result [x]) xs))

                (collect-vec [result [x & xs]]
                    #(if (nil? x)
                         result
                         ((if (stop? x) initial collect-vec)
                             (conj (subvec result 0 (dec (count result))) (conj (last result) x)) xs)))

                (collect [result [x & xs]]
                    #(initial (conj result x) xs))

                (initial [result [x & xs :as v]]
                    (cond (nil? x) result
                          (start? x) #(start result v)
                          :else (fn [] (collect result v))))]
            (trampoline initial [] data)))

Answer 7

如果性能不是问题，我可以使用clojure.spec。首先，为要解析的数据定义语法：

(ns playground.startstop
  (:require [clojure.spec.alpha :as spec]))

(defn start? [x] (= x :b))
(defn stop?  [x] (= x :d))

(spec/def ::not-start-stop #(and (not (start? %))
                                 (not (stop? %))))

(spec/def ::group (spec/cat :start start?
                            :contents (spec/* ::not-start-stop)
                            :stop stop?))

(spec/def ::element (spec/alt :group ::group
                              :primitive ::not-start-stop))

(spec/def ::elements (spec/* ::element))

现在，您可以使用conform函数来解析数据了：

(def data [:a :b :c :d :e :f :g :h :b :d :x])

(spec/conform ::elements data)
;; => [[:primitive :a] [:group {:start :b, :contents [:c], :stop :d}] [:primitive :e] [:primitive :f] [:primitive :g] [:primitive :h] [:group {:start :b, :stop :d}] [:primitive :x]]

上面的输出不是我们想要的，因此我们定义函数来呈现结果：

(defn render [[type data]]
  (case type
    :primitive data
    :group `[~(:start data) ~@(:contents data) ~(:stop data)]))

并将其映射到已解析的数据上：

(mapv render (spec/conform ::elements data))
;; => [:a [:b :c :d] :e :f :g :h [:b :d] :x]

这种基于规范的解决方案可能不是最快的代码，但易于理解，维护，扩展和调试。

如何使用开始/停止谓词对列表的连续元素进行分组？

7 个答案:

嵌套组和拉链

更新

Clojure CheatSheet