如何在python中快速求和大型的numpy数组?

时间:2019-07-17 18:09:55

标签: python performance addition scientific-computing

我正在尝试编写一些python代码来解决特定的能量最小化问题,并且遇到了将很多非常大的4维数组加在一起的瓶颈。

我给了一些拉格朗日式的问题,可以根据要更新的事物的派生定义一个更新函数。我发现这些导数具有有限的差异,这相对较快。然后,将它们输入到一个函数中,该函数将用于更新n(向量字段)。除了dx,dy,dz,k11,k22,k33和q0(它们是浮点数)之外,下面的每个变量都是一个尺寸为200x200x200x3的numpy数组。

我尝试使用numba和cython来加速它,这给出了一些最小的加速(也许是10%)。尽管我不熟悉它们,所以我可能没有正确实现它们。此外,我已经在MatLab中键入了此问题,并且它执行相同的操作的速度似乎快两倍。理想情况下,我想坚持使用python。无论如何,有什么要加快数组的运算速度的吗?谢谢您的宝贵时间。

import numpy as np

def F(n,xE,yE,zE,xB,yB,zB,dx,dy,dz,k11,k22,k33,q0):

    #some code to calculate derivitives
    xn=n[:,:,:,0]
    yn=n[:,:,:,1]
    zn=n[:,:,:,2]

    xnx=(np.roll(xn[:,:,:],1,0)-np.roll(xn[:,:,:],-1,0))/(2*dx)
    xny=(np.roll(xn[:,:,:],1,1)-np.roll(xn[:,:,:],-1,1))/(2*dy)
    xnz=(np.roll(xn[:,:,:],1,2)-np.roll(xn[:,:,:],-1,2))/(2*dz)
    ynx=(np.roll(yn[:,:,:],1,0)-np.roll(yn[:,:,:],-1,0))/(2*dx)
    yny=(np.roll(yn[:,:,:],1,1)-np.roll(yn[:,:,:],-1,1))/(2*dy)
    ynz=(np.roll(yn[:,:,:],1,2)-np.roll(yn[:,:,:],-1,2))/(2*dz)
    znx=(np.roll(zn[:,:,:],1,0)-np.roll(zn[:,:,:],-1,0))/(2*dx)
    zny=(np.roll(zn[:,:,:],1,1)-np.roll(zn[:,:,:],-1,1))/(2*dy)
    znz=(np.roll(zn[:,:,:],1,2)-np.roll(zn[:,:,:],-1,2))/(2*dz)



    xnxx=(np.roll(xn[:,:,:],1,0)+np.roll(xn[:,:,:],-1,0)-2*xn[:,:,:])/dx**2
    xnyy=(np.roll(xn[:,:,:],1,1)+np.roll(xn[:,:,:],-1,1)-2*xn[:,:,:])/dx**2
    xnzz=(np.roll(xn[:,:,:],1,2)+np.roll(xn[:,:,:],-1,2)-2*xn[:,:,:])/dx**2
    ynxx=(np.roll(yn[:,:,:],1,0)+np.roll(yn[:,:,:],-1,0)-2*yn[:,:,:])/dy**2
    ynyy=(np.roll(yn[:,:,:],1,1)+np.roll(yn[:,:,:],-1,1)-2*yn[:,:,:])/dy**2
    ynzz=(np.roll(yn[:,:,:],1,2)+np.roll(yn[:,:,:],-1,2)-2*yn[:,:,:])/dy**2
    znxx=(np.roll(zn[:,:,:],1,0)+np.roll(zn[:,:,:],-1,0)-2*zn[:,:,:])/dz**2
    znyy=(np.roll(zn[:,:,:],1,1)+np.roll(zn[:,:,:],-1,1)-2*zn[:,:,:])/dz**2
    znzz=(np.roll(zn[:,:,:],1,2)+np.roll(zn[:,:,:],-1,2)-2*zn[:,:,:])/dz**2


    xnxy=(np.roll(np.roll(xn[:,:,:],1,0),1,1)-np.roll(np.roll(xn[:,:,:],1,0),-1,1)-np.roll(np.roll(xn[:,:,:],-1,0),1,1)+np.roll(np.roll(xn[:,:,:],-1,0),-1,1))/(4*dx*dy)
    xnxz=(np.roll(np.roll(xn[:,:,:],1,0),1,2)-np.roll(np.roll(xn[:,:,:],1,0),-1,2)-np.roll(np.roll(xn[:,:,:],-1,0),1,2)+np.roll(np.roll(xn[:,:,:],-1,0),-1,2))/(4*dx*dz)
    xnyz=(np.roll(np.roll(xn[:,:,:],1,1),1,2)-np.roll(np.roll(xn[:,:,:],1,1),-1,2)-np.roll(np.roll(xn[:,:,:],-1,1),1,2)+np.roll(np.roll(xn[:,:,:],-1,1),-1,2))/(4*dy*dz)
    ynxy=(np.roll(np.roll(yn[:,:,:],1,0),1,1)-np.roll(np.roll(yn[:,:,:],1,0),-1,1)-np.roll(np.roll(yn[:,:,:],-1,0),1,1)+np.roll(np.roll(yn[:,:,:],-1,0),-1,1))/(4*dx*dy)
    ynxz=(np.roll(np.roll(yn[:,:,:],1,0),1,2)-np.roll(np.roll(yn[:,:,:],1,0),-1,2)-np.roll(np.roll(yn[:,:,:],-1,0),1,2)+np.roll(np.roll(yn[:,:,:],-1,0),-1,2))/(4*dx*dz)
    ynyz=(np.roll(np.roll(yn[:,:,:],1,1),1,2)-np.roll(np.roll(yn[:,:,:],1,1),-1,2)-np.roll(np.roll(yn[:,:,:],-1,1),1,2)+np.roll(np.roll(yn[:,:,:],-1,1),-1,2))/(4*dy*dz)
    znxy=(np.roll(np.roll(zn[:,:,:],1,0),1,1)-np.roll(np.roll(zn[:,:,:],1,0),-1,1)-np.roll(np.roll(zn[:,:,:],-1,0),1,1)+np.roll(np.roll(zn[:,:,:],-1,0),-1,1))/(4*dx*dy)
    znxz=(np.roll(np.roll(zn[:,:,:],1,0),1,2)-np.roll(np.roll(zn[:,:,:],1,0),-1,2)-np.roll(np.roll(zn[:,:,:],-1,0),1,2)+np.roll(np.roll(zn[:,:,:],-1,0),-1,2))/(4*dx*dz)
    znyz=(np.roll(np.roll(zn[:,:,:],1,1),1,2)-np.roll(np.roll(zn[:,:,:],1,1),-1,2)-np.roll(np.roll(zn[:,:,:],-1,1),1,2)+np.roll(np.roll(zn[:,:,:],-1,1),-1,2))/(4*dy*dz)


    #code to find the minimization steps
    x_update = -1.0*k11*(xnxx + ynxy + znxz) - 1.0*k22*(ynz - zny)*(q0 - (xny - ynx)*zn + (xnz - znx)*yn - (ynz - zny)*xn) - 1.0*k22*(q0 - (xny - ynx)*zn + (xnz - znx)*yn - (ynz - zny)*xn)*ynz + 1.0*k22*(q0 - (xny - ynx)*zn + (xnz - znx)*yn - (ynz - zny)*xn)*zny - 1.0*k22*((xny - ynx)*zny - (xnz - znx)*yny + (ynz - zny)*xny + (xnyy - ynxy)*zn - (xnyz - znxy)*yn + (ynyz - znyy)*xn)*zn + 1.0*k22*((xny - ynx)*znz - (xnz - znx)*ynz + (ynz - zny)*xnz + (xnyz - ynxz)*zn - (xnzz - znxz)*yn + (ynzz - znyz)*xn)*yn + k33*(((xny - ynx)*xn - (ynz - zny)*zn)*(xny - ynx) + ((xnz - znx)*xn + (ynz - zny)*yn)*(xnz - znx)) - 1.0*k33*(((xny - ynx)*xn - (ynz - zny)*zn)*xny + ((xny - ynx)*yn + (xnz - znx)*zn)*yny + ((xny - ynx)*xny - (ynz - zny)*zny + (xnyy - ynxy)*xn - (ynyz - znyy)*zn)*xn + ((xny - ynx)*yny + (xnz - znx)*zny + (xnyy - ynxy)*yn + (xnyz - znxy)*zn)*yn) - 1.0*k33*(((xny - ynx)*yn + (xnz - znx)*zn)*znz + ((xnz - znx)*xn + (ynz - zny)*yn)*xnz + ((xny - ynx)*ynz + (xnz - znx)*znz + (xnyz - ynxz)*yn + (xnzz - znxz)*zn)*zn + ((xnz - znx)*xnz + (ynz - zny)*ynz + (xnzz - znxz)*xn + (ynzz - znyz)*yn)*xn) + xB + xE

    y_update = -1.0*k11*(xnxy + ynyy + znyz) + k22*(xnz - znx)*(q0 - (xny - ynx)*zn + (xnz - znx)*yn - (ynz - zny)*xn) + 1.0*k22*(q0 - (xny - ynx)*zn + (xnz - znx)*yn - (ynz - zny)*xn)*xnz - 1.0*k22*(q0 - (xny - ynx)*zn + (xnz - znx)*yn - (ynz - zny)*xn)*znx + 1.0*k22*((xny - ynx)*znx - (xnz - znx)*ynx + (ynz - zny)*xnx + (xnxy - ynxx)*zn - (xnxz - znxx)*yn + (ynxz - znxy)*xn)*zn - 1.0*k22*((xny - ynx)*znz - (xnz - znx)*ynz + (ynz - zny)*xnz + (xnyz - ynxz)*zn - (xnzz - znxz)*yn + (ynzz - znyz)*xn)*xn + k33*(((xny - ynx)*yn + (xnz - znx)*zn)*(xny - ynx) + ((xnz - znx)*xn + (ynz - zny)*yn)*(ynz - zny)) + k33*(((xny - ynx)*xn - (ynz - zny)*zn)*xnx + ((xny - ynx)*yn + (xnz - znx)*zn)*ynx + ((xny - ynx)*xnx - (ynz - zny)*znx + (xnxy - ynxx)*xn - (ynxz - znxy)*zn)*xn + ((xny - ynx)*ynx + (xnz - znx)*znx + (xnxy - ynxx)*yn + (xnxz - znxx)*zn)*yn) + k33*(((xny - ynx)*xn - (ynz - zny)*zn)*znz - ((xnz - znx)*xn + (ynz - zny)*yn)*ynz + ((xny - ynx)*xnz - (ynz - zny)*znz + (xnyz - ynxz)*xn - (ynzz - znyz)*zn)*zn - ((xnz - znx)*xnz + (ynz - zny)*ynz + (xnzz - znxz)*xn + (ynzz - znyz)*yn)*yn) + yB + yE

    z_update = -1.0*k11*(xnxz + ynyz + znzz) - 1.0*k22*(xny - ynx)*(q0 - (xny - ynx)*zn + (xnz - znx)*yn - (ynz - zny)*xn) - 1.0*k22*(q0 - (xny - ynx)*zn + (xnz - znx)*yn - (ynz - zny)*xn)*xny + 1.0*k22*(q0 - (xny - ynx)*zn + (xnz - znx)*yn - (ynz - zny)*xn)*ynx - 1.0*k22*((xny - ynx)*znx - (xnz - znx)*ynx + (ynz - zny)*xnx + (xnxy - ynxx)*zn - (xnxz - znxx)*yn + (ynxz - znxy)*xn)*yn + 1.0*k22*((xny - ynx)*zny - (xnz - znx)*yny + (ynz - zny)*xny + (xnyy - ynxy)*zn - (xnyz - znxy)*yn + (ynyz - znyy)*xn)*xn - 1.0*k33*(((xny - ynx)*xn - (ynz - zny)*zn)*(ynz - zny) - ((xny - ynx)*yn + (xnz - znx)*zn)*(xnz - znx)) - 1.0*k33*(((xny - ynx)*xn - (ynz - zny)*zn)*zny - ((xnz - znx)*xn + (ynz - zny)*yn)*yny + ((xny - ynx)*xny - (ynz - zny)*zny + (xnyy - ynxy)*xn - (ynyz - znyy)*zn)*zn - ((xnz - znx)*xny + (ynz - zny)*yny + (xnyz - znxy)*xn + (ynyz - znyy)*yn)*yn) + k33*(((xny - ynx)*yn + (xnz - znx)*zn)*znx + ((xnz - znx)*xn + (ynz - zny)*yn)*xnx + ((xny - ynx)*ynx + (xnz - znx)*znx + (xnxy - ynxx)*yn + (xnxz - znxx)*zn)*zn + ((xnz - znx)*xnx + (ynz - zny)*ynx + (xnxz - znxx)*xn + (ynxz - znxy)*yn)*xn) + zB + zE

    return x_update,y_update,z_update


#COMPUTATIONAL PARAMETERS
#define computational size of cell
blocks_x=50
blocks_y=50
blocks_z=50
#define experimental size of cell in micro-meters
dimx=float(1e-6)
dimy=float(1e-6)
dimz=float(1e-6)
#define step size
dx=dimx/blocks_x
dy=dimy/blocks_y
dz=dimz/blocks_z




#EXPERIMENTAL PARAMETERS
#director profile
n = np.zeros((blocks_x,blocks_y,blocks_z,3))
n[:,:,:,0]=1

#elastic constants
k11=float(1e-12)        #splay
k22=float(1e-12)        #twist 
k33=float(1e-12)        #bend
k24=float(1e-12)        #saddle splay

#twistedness
pitch = float(1e-6)/1.5
q0 =2*np.pi/pitch  #chiral wavenumber

#applied magnetic field
E=np.zeros((blocks_x,blocks_y,blocks_z,3))
E[:,:,:,1]=float(1e2)

#applied magnetic field
B=np.zeros((blocks_x,blocks_y,blocks_z,3))
B[:,:,:,2]=float(1e2)
#viscocity


xE=E[:,:,:,0]
yE=E[:,:,:,1]
zE=E[:,:,:,2]
xB=B[:,:,:,0]
yB=B[:,:,:,1]
zB=B[:,:,:,2]

%timeit -n 100 F(n,xE,yE,zE,xB,yB,zB,dx,dy,dz,k11,k22,k33,q0)


编辑:

已更新代码,因此现在可以运行。魔术命令%timeit每输入100个循环,输出一次107ms,输入n为(50,50,50,3)。可以通过重新定义blocks_x,blocks_y和blocks_z来修改输入的大小。

2 个答案:

答案 0 :(得分:0)

您是否尝试过使用数组的locations = pd.read_excel('file.xlsx') locations 功能而不是长逻辑流?您还可以指定沿单个轴运行(例如axis = 2)

答案 1 :(得分:0)

这些等式中有多个重复项。 将其过滤为单个变量,并且只执行一次。

def F(n,xE,yE,zE,xB,yB,zB,dx,dy,dz,k11,k22,k33,q0):

    #some code to calculate derivitives

    # common terms for minimization steps
    alpha = (xny - ynx)    # 32 of these
    alpha1 = alpha*zn

    beta = (xnz - znx)    # 32 of these
    beta1 = beta*yn

    gamma = (ynz - zny)    # 32 of these
    gamma1 = gamma*xn

    kappa = (q0 - alpha1 + beta1 - gamma1)    # 10 of these

    theta = (xnyz - znxy)    # 4 each of this group
    zeta = (xnyz - ynxz)
    rho = (xnyy - ynxy)
    tau = (xnxy - ynxx)
    upsilon = (xnxz - znxx)
    phi = (ynyz - znyy)
    chi = (ynxz - znxy)
    psi = (xnzz - znxz)
    omega = (ynzz - znyz)

    #code to find the minimization steps
    x_update = -1.0*k11*(xnxx + ynxy + znxz) - 1.0*k22*gamma*kappa - 1.0*k22*kappa*ynz + 1.0*k22*kappa*zny - 1.0*k22*(alpha*zny - beta*yny + gamma*xny + rho*zn - theta*yn + phi*xn)*zn + 1.0*k22*(alpha*znz - beta*ynz + gamma*xnz + zeta*zn - psi*yn + omega*xn)*yn + k33*((alpha*xn - gamma*zn)*alpha + (beta*xn + gamma*yn)*beta) - 1.0*k33*((alpha*xn - gamma*zn)*xny + (alpha*yn + beta*zn)*yny + (alpha*xny - gamma*zny + rho*xn - phi*zn)*xn + (alpha*yny + beta*zny + rho*yn + theta*zn)*yn) - 1.0*k33*((alpha*yn + beta*zn)*znz + (beta*xn + gamma*yn)*xnz + (alpha*ynz + beta*znz + zeta*yn + psi*zn)*zn + (beta*xnz + gamma*ynz + psi*xn + omega*yn)*xn) + xB + xE

    y_update = -1.0*k11*(xnxy + ynyy + znyz) + k22*beta*kappa + 1.0*k22*kappa*xnz - 1.0*k22*kappa*znx + 1.0*k22*(alpha*znx - beta*ynx + gamma*xnx + tau*zn - upsilon*yn + chi*xn)*zn - 1.0*k22*(alpha*znz - beta*ynz + gamma*xnz + zeta*zn - psi*yn + omega*xn)*xn + k33*((alpha*yn + beta*zn)*alpha + (beta*xn + gamma*yn)*gamma) + k33*((alpha*xn - gamma*zn)*xnx + (alpha*yn + beta*zn)*ynx + (alpha*xnx - gamma*znx + tau*xn - chi*zn)*xn + (alpha*ynx + beta*znx + tau*yn + upsilon*zn)*yn) + k33*((alpha*xn - gamma*zn)*znz - (beta*xn + gamma*yn)*ynz + (alpha*xnz - gamma*znz + zeta*xn - omega*zn)*zn - (beta*xnz + gamma*ynz + psi*xn + omega*yn)*yn) + yB + yE

    z_update = -1.0*k11*(xnxz + ynyz + znzz) - 1.0*k22*alpha*kappa - 1.0*k22*kappa*xny + 1.0*k22*kappa*ynx - 1.0*k22*(alpha*znx - beta*ynx + gamma*xnx + tau*zn - upsilon*yn + chi*xn)*yn + 1.0*k22*(alpha*zny - beta*yny + gamma*xny + rho*zn - theta*yn + phi*xn)*xn - 1.0*k33*((alpha*xn - gamma*zn)*gamma - (alpha*yn + beta*zn)*beta) - 1.0*k33*((alpha*xn - gamma*zn)*zny - (beta*xn + gamma*yn)*yny + (alpha*xny - gamma*zny + rho*xn - phi*zn)*zn - (beta*xny + gamma*yny + theta*xn + phi*yn)*yn) + k33*((alpha*yn + beta*zn)*znx + (beta*xn + gamma*yn)*xnx + (alpha*ynx + beta*znx + tau*yn + upsilon*zn)*zn + (beta*xnx + gamma*ynx + upsilon*xn + chi*yn)*xn) + zB + zE]



   return [x_update,y_update,z_update]

最初的几个是最多的。很多搜索和替换操作可以做到这一点,并且尚未经过测试。

我找到了一个用括号括起来的术语,对其进行了搜索和替换。寻找更多被搜索和替换的模式(kappa实际上是这里的唯一模式)。 alpha1beta1gamma1不是必需的,它们可以包含在kappa分配中。

也许有人拥有可以以编程方式执行此操作的解析器(sp ??)-我做到了蛮力。


因此,我无济于事,尝试自动进行搜索和替换。这就是我想出的-它应确保重复计算的最少数量。同样,该结果尚未经过与原始陈述相同的测试。运行此命令后,新语句位于new_statements中:只需执行print(new_statements.read())即可获取一个字符串,您可以将其复制/粘贴到函数中。

import re, itertools, collections, io

s = '''
    x_update = -1.0*k11*(xnxx + ynxy + znxz) - 1.0*k22*(ynz - zny)*(q0 - (xny - ynx)*zn + (xnz - znx)*yn - (ynz - zny)*xn) - 1.0*k22*(q0 - (xny - ynx)*zn + (xnz - znx)*yn - (ynz - zny)*xn)*ynz + 1.0*k22*(q0 - (xny - ynx)*zn + (xnz - znx)*yn - (ynz - zny)*xn)*zny - 1.0*k22*((xny - ynx)*zny - (xnz - znx)*yny + (ynz - zny)*xny + (xnyy - ynxy)*zn - (xnyz - znxy)*yn + (ynyz - znyy)*xn)*zn + 1.0*k22*((xny - ynx)*znz - (xnz - znx)*ynz + (ynz - zny)*xnz + (xnyz - ynxz)*zn - (xnzz - znxz)*yn + (ynzz - znyz)*xn)*yn + k33*(((xny - ynx)*xn - (ynz - zny)*zn)*(xny - ynx) + ((xnz - znx)*xn + (ynz - zny)*yn)*(xnz - znx)) - 1.0*k33*(((xny - ynx)*xn - (ynz - zny)*zn)*xny + ((xny - ynx)*yn + (xnz - znx)*zn)*yny + ((xny - ynx)*xny - (ynz - zny)*zny + (xnyy - ynxy)*xn - (ynyz - znyy)*zn)*xn + ((xny - ynx)*yny + (xnz - znx)*zny + (xnyy - ynxy)*yn + (xnyz - znxy)*zn)*yn) - 1.0*k33*(((xny - ynx)*yn + (xnz - znx)*zn)*znz + ((xnz - znx)*xn + (ynz - zny)*yn)*xnz + ((xny - ynx)*ynz + (xnz - znx)*znz + (xnyz - ynxz)*yn + (xnzz - znxz)*zn)*zn + ((xnz - znx)*xnz + (ynz - zny)*ynz + (xnzz - znxz)*xn + (ynzz - znyz)*yn)*xn) + xB + xE

    y_update = -1.0*k11*(xnxy + ynyy + znyz) + k22*(xnz - znx)*(q0 - (xny - ynx)*zn + (xnz - znx)*yn - (ynz - zny)*xn) + 1.0*k22*(q0 - (xny - ynx)*zn + (xnz - znx)*yn - (ynz - zny)*xn)*xnz - 1.0*k22*(q0 - (xny - ynx)*zn + (xnz - znx)*yn - (ynz - zny)*xn)*znx + 1.0*k22*((xny - ynx)*znx - (xnz - znx)*ynx + (ynz - zny)*xnx + (xnxy - ynxx)*zn - (xnxz - znxx)*yn + (ynxz - znxy)*xn)*zn - 1.0*k22*((xny - ynx)*znz - (xnz - znx)*ynz + (ynz - zny)*xnz + (xnyz - ynxz)*zn - (xnzz - znxz)*yn + (ynzz - znyz)*xn)*xn + k33*(((xny - ynx)*yn + (xnz - znx)*zn)*(xny - ynx) + ((xnz - znx)*xn + (ynz - zny)*yn)*(ynz - zny)) + k33*(((xny - ynx)*xn - (ynz - zny)*zn)*xnx + ((xny - ynx)*yn + (xnz - znx)*zn)*ynx + ((xny - ynx)*xnx - (ynz - zny)*znx + (xnxy - ynxx)*xn - (ynxz - znxy)*zn)*xn + ((xny - ynx)*ynx + (xnz - znx)*znx + (xnxy - ynxx)*yn + (xnxz - znxx)*zn)*yn) + k33*(((xny - ynx)*xn - (ynz - zny)*zn)*znz - ((xnz - znx)*xn + (ynz - zny)*yn)*ynz + ((xny - ynx)*xnz - (ynz - zny)*znz + (xnyz - ynxz)*xn - (ynzz - znyz)*zn)*zn - ((xnz - znx)*xnz + (ynz - zny)*ynz + (xnzz - znxz)*xn + (ynzz - znyz)*yn)*yn) + yB + yE

    z_update = -1.0*k11*(xnxz + ynyz + znzz) - 1.0*k22*(xny - ynx)*(q0 - (xny - ynx)*zn + (xnz - znx)*yn - (ynz - zny)*xn) - 1.0*k22*(q0 - (xny - ynx)*zn + (xnz - znx)*yn - (ynz - zny)*xn)*xny + 1.0*k22*(q0 - (xny - ynx)*zn + (xnz - znx)*yn - (ynz - zny)*xn)*ynx - 1.0*k22*((xny - ynx)*znx - (xnz - znx)*ynx + (ynz - zny)*xnx + (xnxy - ynxx)*zn - (xnxz - znxx)*yn + (ynxz - znxy)*xn)*yn + 1.0*k22*((xny - ynx)*zny - (xnz - znx)*yny + (ynz - zny)*xny + (xnyy - ynxy)*zn - (xnyz - znxy)*yn + (ynyz - znyy)*xn)*xn - 1.0*k33*(((xny - ynx)*xn - (ynz - zny)*zn)*(ynz - zny) - ((xny - ynx)*yn + (xnz - znx)*zn)*(xnz - znx)) - 1.0*k33*(((xny - ynx)*xn - (ynz - zny)*zn)*zny - ((xnz - znx)*xn + (ynz - zny)*yn)*yny + ((xny - ynx)*xny - (ynz - zny)*zny + (xnyy - ynxy)*xn - (ynyz - znyy)*zn)*zn - ((xnz - znx)*xny + (ynz - zny)*yny + (xnyz - znxy)*xn + (ynyz - znyy)*yn)*yn) + k33*(((xny - ynx)*yn + (xnz - znx)*zn)*znx + ((xnz - znx)*xn + (ynz - zny)*yn)*xnx + ((xny - ynx)*ynx + (xnz - znx)*znx + (xnxy - ynxx)*yn + (xnxz - znxx)*zn)*zn + ((xnz - znx)*xnx + (ynz - zny)*ynx + (xnxz - znxx)*xn + (ynxz - znxy)*yn)*xn) + zB + zE]

'''

term_names = itertools.combinations(string.ascii_uppercase,r=5)
new_statements = io.StringIO()

substitutions = {}
def f(s,pattern):
    '''Replace repeating terms with names and construct assignment statements.
    '''
    new_s = s
    c = collections.Counter(re.findall(pattern, s))
    for term, count in c.items():
        if count > 1:
            term_name = ''.join(next(term_names))
            substitutions[term] = term_name
            assignment = f'    {term_name} = {term}\n'
            new_statements.write(assignment)
            if term.startswith(r'('):
                sub = re.escape(term)
            elif '*' in term:
                sub = f'\\b{re.escape(term)}\\b'
            else:
                break
            new_s = re.sub(sub, term_name, new_s)
    return new_s, new_s != s

# keep a copy !!
original = s

pattern1 = r'\([^()]+\)'
pattern2 = r'\b[a-zA-Z]+\*[a-zA-Z]+\b'

while True:
    s, chng1 = f(s,pattern1)
    s, chng2 = f(s,pattern2)
    if not (chng1 or chng2):
        break


new_statements.write(s)
new_statements.seek(0)
#print(new_statements.read())