我有一个这样的工作代码,但速度很慢。
def halfconvolution(g,w,dz):
convo=np.zeros_like(g)
for i in range(0,len(g)):
sum=0
for j in range(0,i):
sum+=g[j]*w[(i-j)]*dz
convo[i] = -sum
return convo
我试图把它变成列表理解,但我正在努力 我试过了:
convo=[-g*w[i-j] for i in g for j in w]
答案 0 :(得分:3)
我不确定这是否会提高性能,但是当你问到
时,这是一个列表理解# make the matrices square
g = np.repeat(g, g.shape[0]).reshape(g.shape[0], g.shape[0], order='F')
w = np.repeat(w, w.shape[0]).reshape(w.shape[0], w.shape[0], order='F')
# take the lower half of g
g = np.tril(g, k=-1)
# shift each column by its index number
# see: https://stackoverflow.com/questions/20360675/roll-rows-of-a-matrix-independently
rows_w, column_indices_w = np.ogrid[:w.shape[0], :w.shape[1]]
shift = np.arange(w.shape[0])
shift[shift < 0] += w.shape[1]
w = w[rows_w, column_indices_w - shift[:,np.newaxis]].T
convo = np.sum(g * w, axis=1) * dz
使用NumPy加快实施速度:
@IBAction func postImage(_ sender: Any) {
if let image = imageTopost.image {
let post = PFObject(className: "Post")
post["message"] = comment.text
post["userId"] = PFUser.current()?.objectId
if let imageData = UIImagePNGRepresentation(image){
let activityIndicator = UIActivityIndicatorView(frame: CGRect(x:0, y: 0, width: 50, height: 50))
activityIndicator.center = self.view.center
activityIndicator.hidesWhenStopped = true
activityIndicator.activityIndicatorViewStyle = UIActivityIndicatorViewStyle.gray
view.addSubview(activityIndicator)
activityIndicator.startAnimating()
UIApplication.shared.beginIgnoringInteractionEvents()
let imageFile = PFFile(name: "image.png", data: imageData)
post["imageFile"] = imageFile
post.saveInBackground(block: { (success, error) in
activityIndicator.stopAnimating()
UIApplication.shared.endIgnoringInteractionEvents()
if success {
self.displayAlert(title: "Hat super geklappt", message: "Dein Ziel wurde erfolgreich hinzugefügt")
self.comment.text = ""
self.imageTopost.image = nil
}else {
self.displayAlert(title: "Etwas ist schiefgelaufen" , message: "Dein Bild konnte leider nicht gespeichert werden. Versuche es nochmal. Dein Internet muss eingeschalten sein." )
}
})
}
}
}
要使它工作,它需要w和g的大小相同,否则我确定可以找到解决方法。
我希望这对你来说是一个更可接受的加速?总是尝试将逻辑/问题重写为向量/矩阵乘法。
答案 1 :(得分:1)
内部循环可以用sum函数替换(不要用同名变量覆盖它)
然后将外部循环追加到
的末尾Duplicate instance declarations:
instance Show a => Show (Stack a) -- Defined at stackOp.hs:2:29
instance Show a => Show (Stack a) -- Defined at stackOp.hs:11:10
答案 2 :(得分:1)
出于性能原因,请不要使用列表推导
使用
<强> Numba 强>
import numba as nb
import numpy as np
import time
@nb.njit(fastmath=True)
def halfconvolution(g,w,dz):
convo=np.empty(g.shape[0],dtype=g.dtype)
for i in range(g.shape[0]):
sum=0.
for j in range(0,i):
sum+=g[j]*w[(i-j)]*dz
convo[i] = -sum
return convo
g=np.random.rand(1000)
w=np.random.rand(1000)
dz=0.15
t1=time.time()
for i in range(1000):
#res=halfconvolution(g,w,dz)
res=[-sum(g[j]*w[i-j]*dz for j in range(i)) for i in range(len(g))]
print(time.time()-t1)
print("Done")
<强>性能强>
List Comprehension: 0.27s per iteration
Numba Version: 0.6ms per iteration
因此这两个版本之间存在500因子。如果你不想一次在多个阵列上调用这个函数,你也可以轻松地并行化这个问题,你应该至少得到另一个&#34;核心数量&#34;加快。