优化复杂算法

时间:2018-01-03 21:10:17

标签: python arrays numpy

我知道这不是解决这个问题的理想场所,但我不知道在哪里可以提出这个或如何分解它。我过去几周一直在研究一项功能,但是为了达到我的目的,我需要加速200-300x。

我有一个图像数组,其中所有相似颜色的像素已经被平均并设置为该平均值。然后我有一个相同高度和宽度的2D数组,它标记了图像的每个独特和非连续的特征。

使用这些我需要评估每个特征的大小及其与每个邻居的对比度。这些值用在等式中,如果该等式的输出低于某个阈值,则该特征与其最相似的邻居合并。

我已将图片和功能标签数组(使用numpy.savetext()打印)上传到OneDrive和附加链接

代码:

def textureRemover(pix, labeledPix, ratio = 1.0):
    numElements = numpy.amax(labeledPix)
    maxSize = numpy.count_nonzero(labeledPix)
    MAXIMUMCONTRAST = 443.405

    for regionID in range(numElements):        
        start = time.clock()
        regionID += 1
        if regionID not in labeledPix:
            continue

        #print(regionID)
        #print((regionID / numElements) * 100, '%')

        neighborIDs = getNeighbors(labeledPix, regionID)
        if 0 in neighborIDs:
            neighborIDs.remove(0) #remove white value
        regionMask = labeledPix == regionID

        region = pix[regionMask]

        size = numpy.count_nonzero(regionMask)
        contrastMin = (ratio - (size / maxSize)) * MAXIMUMCONTRAST 
        regionMean = region.mean(axis = 0)

        if len(neighborIDs) > 200:
            contrast = numpy.zeros(labeledPix.shape)
            contrast[labeledPix!=0] = numpy.sqrt(numpy.sum((regionMean - pix[labeledPix!=0])**2, axis = -1))

            significantMask = (contrast < contrastMin)
            significantContrasts = list(numpy.unique(contrast[significantMask]))

            significantNeighbors = {}
            for significantContrast in significantContrasts:
                minContrast = min(significantContrasts)
                if labeledPix[contrast == minContrast][0] in neighborIDs:
                    significantNeighbors[minContrast] = labeledPix[contrast == minContrast][0]
                else:
                    significantContrasts.pop(significantContrasts.index(minContrast))

        else:
            significantNeighbors = {}
            for neighborID in neighborIDs:
                neighborMask = labeledPix == neighborID
                neighbor = pix[neighborMask]
                neighborMean = neighbor.mean(axis = 0)
                contrast = numpy.sqrt(numpy.sum((regionMean - neighborMean)**2, axis = -1))
                if contrast < contrastMin:
                    significantNeighbors[contrast] = neighborID

        if significantNeighbors:
            contrasts = significantNeighbors.keys()            
            minContrast = min(contrasts)

            minNeighbor = significantNeighbors[minContrast]
            neighborMask = labeledPix == minNeighbor
            neighborSize = numpy.count_nonzero(neighborMask)

            if neighborSize <= size:
                labeledPix[neighborMask] = regionID
                pix[neighborMask] = regionMean

            else:
                labeledPix[regionMask] = minNeighbor
                pix[regionMask] = pix[neighborMask].mean(axis = 0)

        print(time.clock() - start)
    return pix

pix

labeledPix

我知道我要求很多帮助,但我已经坚持了几个星期而且我不确定我还能做些什么。任何帮助将不胜感激!

1 个答案:

答案 0 :(得分:1)

这是大多数逻辑的优化版本(我低估了将要工作的数量......)。我跳过>200分支并使用虚假数据,因为我无法访问您的链接。当我关闭你的>200分支时,你和我的代码看起来会得到相同的结果,但我的假例子上的速度要快得多。

示例输出:

original
26.056154000000003
optimized
0.763613000000003
equal
True

代码:

import numpy as np
from numpy.lib.stride_tricks import as_strided

def mockdata(m, n, k):
    colors = np.random.random((m, n, 3))
    i, j = np.ogrid[:m, :n]
    labels = np.round(k*k * (np.sin(0.05 * i) + np.sin(0.05 * j)**2)).astype(int) % k
    return colors, labels

DIAG_NEIGHBORS = True
MAXIMUMCONTRAST = 443.405

def textureRemover2(pix, labeledPix, ratio=1.0):
    start = time.clock()
    pix, labeledPix = pix.copy(), labeledPix.copy()
    pixf, labeledPixf = pix.reshape(-1, 3), labeledPix.ravel()
    m, n = labeledPix.shape
    s, t = labeledPix.strides
    # find all sizes in O(n)
    sizes = np.bincount(labeledPixf)
    n_ids = len(sizes)
    # make index for quick access to labeled areas
    lblidx = np.split(np.argsort(labeledPixf), np.cumsum(sizes[:-1]))
    lblidx[0] = None
    # find all mean colors in O(n)
    regionMeans = np.transpose([np.bincount(labeledPix.ravel(), px)
                                / np.maximum(sizes, 1)
                                for px in pix.reshape(-1, 3).T])
    # find all neighbors in O(n)
    horz = set(frozenset(p) for bl in as_strided(labeledPix, (m,n-1,2), (s,t,t))
               for p in bl)
    vert = set(frozenset(p) for bl in as_strided(labeledPix, (m-1,n,2), (s,t,s))
               for p in bl)
    nb = horz|vert
    if DIAG_NEIGHBORS:
        dwnrgt = set(frozenset(p) for bl in as_strided(
            labeledPix, (m-1,n-1,2), (s,t,s+t)) for p in bl)
        dwnlft = set(frozenset(p) for bl in as_strided(
            labeledPix[::-1], (m-1,n-1,2), (-s,t,t-s)) for p in bl)
        nb = nb|dwnrgt|dwnlft
    nb = {p for p in nb if len(p) == 2 and not 0 in p}
    nb_dict = {}
    for a, b in nb:
        nb_dict.setdefault(a, set()).add(b)
        nb_dict.setdefault(b, set()).add(a)

    maxSize = labeledPix.size - sizes[0]

    for id_ in range(1, n_ids):
        nbs = list(nb_dict.get(id_, set()))
        if not nbs:
            continue
        d = regionMeans[id_] - regionMeans[nbs]
        d = np.einsum('ij,ij->i', d, d)
        mnd = np.argmin(d)
        if d[mnd] < ((ratio - sizes[id_]/maxSize) * MAXIMUMCONTRAST)**2:
            mn = nbs[mnd]
            lrg, sml = (id_, mn) if sizes[id_] >= sizes[mn] else (mn, id_)
            sizes[lrg], sizes[sml] = sizes[lrg] + sizes[sml], 0
            for nb in nb_dict[sml]:
                nb_dict[nb].remove(sml)
                nb_dict[nb].add(lrg)
            nb_dict[lrg].update(nb_dict[sml])
            nb_dict[lrg].remove(lrg)
            nb_dict[sml] = set()
            pixf[lblidx[sml]] = regionMeans[lrg]
            labeledPixf[lblidx[sml]] = lrg
            lblidx[lrg], lblidx[sml] = np.r_[lblidx[lrg],lblidx[sml]], None
    print(time.clock() - start)
    return pix

from scipy.ndimage.morphology import binary_dilation
import time

STRUCTEL = np.ones((3,3), int) if DIAG_NEIGHBORS else np.array([[0,1,0],[1,1,1],[0,1,0]], int)

def getNeighbors(labeledPix, regionID):
    nb = set(labeledPix[binary_dilation(labeledPix == regionID, structure=STRUCTEL)])
    nb.remove(regionID)
    return sorted(nb)

numpy = np

def textureRemover(pix, labeledPix, ratio = 1.0):
    pix, labeledPix = pix.copy(), labeledPix.copy()
    numElements = numpy.amax(labeledPix)
    maxSize = numpy.count_nonzero(labeledPix)
    MAXIMUMCONTRAST = 443.405

    start = time.clock()
    for regionID in range(numElements):        
        regionID += 1
        if regionID not in labeledPix:
            continue

        #print(regionID)
        #print((regionID / numElements) * 100, '%')

        neighborIDs = getNeighbors(labeledPix, regionID)
        if 0 in neighborIDs:
            neighborIDs.remove(0) #remove white value
        regionMask = labeledPix == regionID

        region = pix[regionMask]

        size = numpy.count_nonzero(regionMask)
        contrastMin = (ratio - (size / maxSize)) * MAXIMUMCONTRAST 
        regionMean = region.mean(axis = 0)

        if len(neighborIDs) > 20000:
            contrast = numpy.zeros(labeledPix.shape)
            contrast[labeledPix!=0] = numpy.sqrt(numpy.sum((regionMean - pix[labeledPix!=0])**2, axis = -1))

            significantMask = (contrast < contrastMin)
            significantContrasts = list(numpy.unique(contrast[significantMask]))

            significantNeighbors = {}
            for significantContrast in significantContrasts:
                minContrast = min(significantContrasts)
                if labeledPix[contrast == minContrast][0] in neighborIDs:
                    significantNeighbors[minContrast] = labeledPix[contrast == minContrast][0]
                else:
                    significantContrasts.pop(significantContrasts.index(minContrast))

        else:
            significantNeighbors = {}
            for neighborID in neighborIDs:
                neighborMask = labeledPix == neighborID
                neighbor = pix[neighborMask]
                neighborMean = neighbor.mean(axis = 0)
                contrast = numpy.sqrt(numpy.sum((regionMean - neighborMean)**2, axis = -1))
                if contrast < contrastMin:
                    significantNeighbors[contrast] = neighborID

        if significantNeighbors:
            contrasts = significantNeighbors.keys()            
            minContrast = min(contrasts)

            minNeighbor = significantNeighbors[minContrast]
            neighborMask = labeledPix == minNeighbor
            neighborSize = numpy.count_nonzero(neighborMask)

            if neighborSize <= size:
                labeledPix[neighborMask] = regionID
                pix[neighborMask] = regionMean

            else:
                labeledPix[regionMask] = minNeighbor
                pix[regionMask] = pix[neighborMask].mean(axis = 0)

    print(time.clock() - start)
    return pix

data = mockdata(200, 200, 1000)
print('original')
res0 = textureRemover(*data)
print('optimized')
res2 = textureRemover2(*data)
print('equal')
print(np.allclose(res0, res2))