如何使用生产规则添加两个矩阵

时间:2017-08-14 20:36:28

标签: python numpy matrix

我有两个矩阵,一个是2 x 3维,另一个是3 x 2.

a = [[1, 0, 1],   
     [1, 0, 1]]

b = [[1, 0],   
     [1, 0],
     [1, 0]]

我想返回一个2x2矩阵c,它是a和b之间的元素逻辑或运算的总和。

所以结果将是

c = [[3,2]
     [3,2]]

是否有任何套餐可以有效地进行这些操作?对于具有数十万个维度的非常大的矩阵,循环遍历元素/向量非常慢。

这相对容易返回2x2矩阵d,这是在a和b之间添加逐元素逻辑and运算的结果。

d = np.dot(a,b)可以实现这一目标。我想知道是否有任何包与np.dot相对应的包logic or

2 个答案:

答案 0 :(得分:0)

面向对象的方法是否可以接受?

#!/usr/bin/env python

from __future__ import absolute_import
from __future__ import print_function
import numpy


class PseudoBinary(object):
    def __init__(self,i):
        self.i = i

    def __mul__(self,rhs):
        return PseudoBinary(self.i or rhs.i)

    __rmul__ = __mul__
    __imul__ = __mul__

    def __add__(self,rhs):
        return PseudoBinary(self.i + rhs.i)

    __radd__ = __add__
    __iadd__ = __add__

    def __str__(self):
        return str(self.i)

    __repr__ = __str__



a = [[PseudoBinary(1), PseudoBinary(0), PseudoBinary(1)],
     [PseudoBinary(1), PseudoBinary(0), PseudoBinary(1)]]

b = [[PseudoBinary(1), PseudoBinary(0)],
     [PseudoBinary(1), PseudoBinary(0)],
     [PseudoBinary(1), PseudoBinary(0)]]

c = numpy.dot(a,b)
print(c)

打印

[[3 2]
 [3 2]]

我花了一些时间来衡量,了解这种方法的表现。 长话短说:这个带有自定义对象的numpy.dot比整数的常规矩阵乘法慢几个数量级。

我不能100%确定这种差异的根本原因。我已经问了一个具体的question关于缓慢的原因。

性能图如下所示: enter image description here

在此图中,红色曲线(base)是使用整数矩阵调用numpy.dot(..,..)的基本测量值。蓝色曲线(setOr)是@ vortex answer中建议的方法。绿色曲线是使用自定义对象矩阵的numpy.dot()性能。正如您所看到的,带有自定义对象的numpy.dot非常慢。我在MacBook Air(13英寸,2014年初),1.7 GHz Intel Core i7,8 GB 1600 MHz DDR3中获得了这些数字

执行性能测量并打印图的代码是:(在python 2.7.10中测试)

#!/usr/bin/env python

# A possible answer and performance analysis for a stackoverflow
# question. https://stackoverflow.com/q/45682641/5771861

from __future__ import absolute_import
from __future__ import print_function
import numpy
import time
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as tick
import random
import datetime
import timeit

class PseudoBinary(object):
    def __init__(self,i):
        self.i = i

    def __mul__(self,rhs):
        return PseudoBinary(self.i or rhs.i)

    __rmul__ = __mul__
    __imul__ = __mul__

    def __add__(self,rhs):
    return PseudoBinary(self.i + rhs.i)

    __radd__ = __add__
    __iadd__ = __add__

    def __str__(self):
        return "P"+str(self.i)

    __repr__ = __str__

class TestCase(object):
    def __init__(self,n):
        self.n = n

        # Only use square matrixes
        rows = self.n
        cols = self.n

    self.base = numpy.array([[random.getrandbits(1) for x in range(cols)] \
        for y in range(rows)])
    self.pseudo = numpy.array(
        [[PseudoBinary(v) for v in row] for row in self.base])

    @staticmethod
    def printMatrix(m):
        for row in m:
            for v in row:
                print(v,end=" ")
            print("")

    def print(self):
        print("base")
        TestCase.printMatrix(self.base)
        print("pseudo")
        TestCase.printMatrix(self.pseudo)

class TestRes(object):

   def __init__(self):
      self.res = []

   def append(self,v):
      self.res.append(v)

   def mean(self):
      return sum(self.res)/float(len(self.res))

def runWithTime(f,count,msg):
    start = time.time()
    for i in xrange(count):
       f()
    end = time.time()
    elapsed = end-start
    print(msg,"took",str(datetime.timedelta(seconds=end-start)),"seconds")
    return elapsed

def measureAndPrint(execCount):
   random.seed(1)

   print("Start to initialize test data")
   start = time.time()
   sizes = [1, 4, 8, 16, 32]
   testCases = [TestCase(n) for n in sizes]
   end = time.time()
   print("Test data initialization complete in ",
      str(datetime.timedelta(seconds=end-start)))

   measCount = 4

   baseResults = {}
   pseudoResults = {}
   setOrResults = {}

   for tc in testCases:
       print("Test case for",tc.n)

       def base():
       rv = numpy.dot(tc.base,tc.base)
       return rv

       res = TestRes()
       for i in xrange(measCount):
      t = runWithTime(base,execCount,"base")
      res.append(t)
       baseResults[tc.n] = res

       def pseudo():
      rv = numpy.dot(tc.pseudo,tc.pseudo)
      return rv

       res = TestRes()
       for i in xrange(measCount):
      t = runWithTime(pseudo,execCount,"pseudo")
      res.append(t)
       pseudoResults[tc.n] = res

       ones = numpy.ones(tc.n)
       dotInput = ones-tc.base
       def setOr():
      rv = ones*tc.n-numpy.dot(dotInput,dotInput)
      return rv

       res = TestRes()
       for i in xrange(measCount):
      t = runWithTime(setOr,execCount,"setOr")
      res.append(t)
       setOrResults[tc.n] = res

   return baseResults,pseudoResults,setOrResults

def isClose(a, b, rel_tol=1e-09, abs_tol=0.0):
    # https://stackoverflow.com/a/33024979/5771861
    return abs(a-b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol)

def formatSeconds(s):
    # A concise printer for a time duration in millisecond accuracy.
    # For example 3 d 12 h 4 m 5 s 234 mi
    def maybeStr(fmt,x):
        # If x is non-zero return the formatted string with x
        if isClose(x,0):
            return ""
        else:
            return fmt.format(x)

    seconds, fraction = divmod(s, 1)
    days, seconds = divmod(seconds, 86400)
    hours, seconds = divmod(seconds, 3600)
    minutes, seconds = divmod(seconds, 60)
    milli = int(fraction * 1000)

    rv = maybeStr("{} d ",days) \
       + maybeStr("{} h ",hours) \
       + maybeStr("{} m ",minutes) \
       + maybeStr("{} s ",seconds) \
       + maybeStr("{} milliS ",milli) \

    if rv=="":
        return "0"
    else:
        return rv

def plotResults(results,color,label):
   # Get the key and values in the same order.
   res = sorted(results.items())
   xx = [x for (x,y) in res]
   yy = [y.mean() for (x,y) in res]
   plt.semilogy(xx,yy,color,label=label)
   plt.scatter(xx,yy,c=color)

   # Add an annotation to each measurement data point.
   for x,y in res:
      yValue = y.mean()
      plt.annotate(str(formatSeconds(yValue)),(x,yValue))

multiplicationCount = 1000
baseResults,pseudoResults,setOrResults = measureAndPrint(multiplicationCount)

plotResults(baseResults,"r","base")
plotResults(pseudoResults,"g","pseudo")
plotResults(setOrResults,"b","setOr")
plt.legend(loc="upper left")
plt.title("numpy.dot() performance measurements")
plt.ylabel("Mean seconds taken by {} multiplications".format(multiplicationCount))
plt.xlabel("Dimension of square matrix")

def yFmt(val,pos):
   return formatSeconds(val)

axes = plt.gca()
yaxis = axes.get_yaxis()
yaxis.set_major_formatter(tick.FuncFormatter(yFmt))

plt.show()

答案 1 :(得分:0)

我确实想出了一个替代解决方案,它只是应用集合论并使用np.dot。

ones = np.ones((2,3))
c = np.ones((2,2))*3 - np.dot((ones-a),(ones-b))