我有一个中等大小的地图(~1M键值相对较小,例如(first mymap) => ["7:21658846-21658846" {["C" "T"] {"central_nervous_system" 1}}]
),使用下面的函数将其缩小为另一个地图(省略了一些辅助功能,因为它们与它们并不相关问题 - 他们没有影响绩效)
是否有任何明显的问题容易解决?非惯用方法可以改变以大幅提高绩效?它看起来并不是特别平行 - 但那里的东西真的效率低下 - 我期待累积的地图。
*编辑 - 添加了联合总和定义
(defn add-mut-freq-firstTS
"Builds a map of Transcript -> {aapos -> {:aaposn_count :codon :aa {genomic_SNP_posn -> {:SNP_posn_count :frame {:genomic_ref :genomic_mut :SNP_count :aa_mut :codon_mut}}}}.
but only using one transcript per SNP. ***ONLY USED FOR STATS CALCS***"
[CDS-ref snp-freq]
(reduce-kv (fn [m k v](let [aa-ref (first (cosu/map-ts-aa-pos CDS-ref [k v]))] (add-mut-freq** m aa-ref aa-ref [k v]))) {} snp-freq) )
(defn add-mut-freq**
"Adds data for frequency of different mutations at a given position to a cumulative map m for a given transcript. Updates running totals
for frequency at aa position and genomic position as well."
[m ts first-ts snp-freq]
(let [[ts_ID SNP_aa_posn SNP_aa_frame _ gene strand] ts
[posn nt-mut-freq] snp-freq
m-pre (if (= ts first-ts) (assoc-in m [:ts ts_ID :snp-aa-pos SNP_aa_posn :snp-nt-posn posn :first] true) m)
m-init (assoc-in m-pre [:ts ts_ID :gene] gene)]
(reduce-kv (fn [m1 k v](
let [mut k
tiss-freq v
snp-count (apply + (vals tiss-freq))]
(-> m1 (u/assoc-in-sum [:ts ts_ID :ts-cnt] snp-count)
(assoc-in [:ts ts_ID :ts-strand] strand)
(u/assoc-in-sum [:ts ts_ID :snp-aa-pos SNP_aa_posn :aa-cnt] snp-count)
(u/assoc-in-sum [:ts ts_ID :snp-aa-pos SNP_aa_posn :snp-nt-posn posn :pos-cnt] snp-count)
(assoc-in [:ts ts_ID :snp-aa-pos SNP_aa_posn :snp-nt-posn posn :ts-frame] SNP_aa_frame)
(u/assoc-in-sum [:ts ts_ID :snp-aa-pos SNP_aa_posn :snp-nt-posn posn :mut-nt mut :posnt-cnt] snp-count)
(add-tissue-counts ts_ID SNP_aa_posn tiss-freq)
(assoc-in [:ts ts_ID :snp-aa-pos SNP_aa_posn :snp-nt-posn posn :mut-nt mut :mut-tiss-freq] tiss-freq))
)) m-init nt-mut-freq)))
(defn add-tissue-counts
[m ts_ID SNP_aa_posn tiss-map]
(reduce-kv (fn [m1 k v] (-> m1 (u/assoc-in-sum [:ts ts_ID :snp-aa-pos SNP_aa_posn :aa-tiss-cnt k] v)
(u/assoc-in-sum [:ts ts_ID :ts-tiss-cnt k] v)
(u/assoc-in-sum [:tiss-cnt k] v)
)) m tiss-map))
(defn assoc-in-sum
"Same as assoc-in except that if the key already exists, the value is added to instead of replaced"
[m key-vec v]
(let [ex-val (get-in m key-vec)
new-val (if ex-val (+ ex-val v) v)]
(assoc-in m key-vec new-val))
(defn test-xf
(defn test-xf
(let [sum (volatile! 0)]
([] (rf))
([result] (rf (assoc! result :total-sum @sum)))
([result [k m]]
;; calculate sums etc.
(vswap! sum + (get-in m [["C" "T"] "x"]))
;; Result is transient map while in reduction!
(-> result
(assoc! :mydata "hello")
(assoc! k m))
(defn data [n]
(for [i (range n)]
[(str "key-" i) {["C" "T"] {"x" 1}}]))
(into {} test-xf (data 1000000))))
"Elapsed time: 1750.867127 msecs"
=> 1000000