为什么Python在数值计算上这么慢?

时间:2014-04-21 19:25:03

标签: python performance

考虑下面的代码,这是解决"圈子刺伤问题"计算几何,即找到与最大数量的非相交圆相交的线。

这里的主要问题是为什么Python代码比C ++代码慢42倍。是因为Python的一些不当使用?或者是因为Python在算术和数学运算中本身比C ++慢?有什么办法可以让它快一点吗?

首先是Python代码:

from __future__ import division
from math import sqrt, atan2, pi
from sys import stdin

__author__ = "Sahand Saba"


EPS = 1e-6


class Point(object):
    __slots__ = ['x', 'y']

    def __init__(self, xx=0.0, yy=0.0):
        self.x = float(xx)
        self.y = float(yy)

    def angle(self):
        """
        Returns the angle the point makes with the x-axis,
        counter-clockwise. Result is in the [0, 2*pi) range.
        """
        a = atan2(self.y, self.x)
        if a < 0:
            a = 2 * pi + a
        return a


class Circle(object):
    __slots__ = ['center', 'radius']

    def __init__(self, center, radius):
        self.center = center
        self.radius = float(radius)

    def common_tangents(self, other):
        """
        Returns [[p1,p2],[q1,q2]] with p1, p2, q1, q2 all Point objects
        representing points on C1 such that the tangent lines to C1 at p1, p2,
        q1, q2 are tangent to C2 as well. Further more, p1 and p2 represent
        external tangent lines, while q1 and q2 represent internal ones. It is
        also guaranteed that p1 and q1 are both on the left-side of the line
        connecting C1.center to C2.center, and p2 and q2 are on the right-side
        of it.
        """
        C1, C2 = self, other
        mu = C1.center.x - C2.center.x
        eta = C1.center.y - C2.center.y
        r1 = C1.radius
        r2 = C2.radius
        r1r1 = r1 * r1
        r1r2 = r1 * r2
        delta1 = r1r1 - r1r2
        delta2 = r1r1 + r1r2
        mumu = mu * mu
        etaeta = eta * eta
        D = etaeta + mumu
        result = [[], []]
        if abs(D) < EPS:
            return result

        if abs(eta) < EPS:
            # In this case there is symmetry along the x-axis and we can
            # not divide by eta. Use x^2 + y^2 = r^2 to find y.
            dmu = -1 if mu < 0 else 1
            x = (-delta1 * mu) / D
            y = -dmu * sqrt(r1r1 - x * x)
            result[0].append(Point(x, y))
            result[0].append(Point(x, -y))
            x = (-delta2 * mu) / D
            y = -dmu * sqrt(r1r1 - x * x)
            result[1].append(Point(x, y))
            result[1].append(Point(x, -y))
        else:
            # Here, the symmetry is along the line connecting the two centers.
            # Use the equation eta*y + mu *x + r1^2 - r1 * r2 = 0 to derive y
            # since we can divide by eta.
            dd1 = delta1 * delta1
            dd2 = delta2 * delta2
            Delta1 = sqrt(dd1 * mumu - D*(dd1 - etaeta * r1r1))
            Delta2 = sqrt(dd2 * mumu - D*(dd2 - etaeta * r1r1))
            deta = -1 if eta < 0 else 1
            x = (-delta1 * mu + deta * Delta1) / D
            result[0].append(Point(x, -(mu*x + delta1)/eta))
            x = (-delta1 * mu - deta * Delta1) / D
            result[0].append(Point(x, -(mu*x + delta1)/eta))
            x = (-delta2 * mu + deta * Delta2) / D
            result[1].append(Point(x, -(mu*x + delta2)/eta))
            x = (-delta2 * mu - deta * Delta2) / D
            result[1].append(Point(x, -(mu*x + delta2)/eta))

        return result


def add_events(A, p, q):
    start = p.angle()
    end = q.angle()
    A.append((start, 1, p))
    A.append((end, -1, q))
    return 1 if start > end else 0


def max_intersecting_line(C):
    """
    Given a list of circles, returns (m, c, p) where m is the maximal number of
    circles in C any line can intersect, and p is a point on a circle c in C
    such that the tangent line to c at p intersects m circles in C.
    """
    global_max = 1
    solution_circle = C[0]
    solution_point = Point(C[0].radius, 0.0)
    for c1 in C:
        local_max = 1
        A = []

        for c2 in (c for c in C if c is not c1):
            Q = c1.common_tangents(c2)
            t1 = add_events(A, Q[1][0], Q[0][0])
            t2 = add_events(A, Q[0][1], Q[1][1])
            local_max += max(t1, t2)

        if local_max > global_max:
            global_max = local_max
            solution_point = Point(c1.radius, 0.0)
            solution_circle = c1

        A.sort(key=lambda a: a[0])
        for a in A:
            local_max += a[1]
            if local_max > global_max:
                global_max = local_max
                solution_point = Point(c1.center.x + a[2].x,
                                       c1.center.y + a[2].y)
                solution_circle = c1
    return global_max, solution_circle, solution_point


if __name__ == '__main__':
    T = int(stdin.readline())
    for __ in xrange(T):
        n = int(stdin.readline())
        C = []
        for i in xrange(n):
            x, y, r = tuple(stdin.readline().split(' '))
            C.append(Circle(Point(x, y), r))
        print max_intersecting_line(C)[0]

几乎与行等效的C ++代码:

#include <iostream>
#include <vector>
#include <algorithm>
#include <cmath>

using namespace std;

double EPS = 1e-6;

class Point {
    public:
        double x, y;
        Point(double x=0.0, double y=0.0) : x(x), y(y) {}
        double angle() const {
            double a = atan2(y, x);
            if (a < 0) {
                a = atan(1) * 8.0 + a;
            }
            return a;
        }
};

class Event {
    public:
        double angle;
        double count;
        Event(double angle = 0, int count = 1) : angle(angle), count(count) {}
        bool operator<(const Event &o) const {
            return angle < o.angle;
        }
};

struct CircleCircleTangents {
    public:
        Point external[2];
        Point internal[2];
};

class Circle {
    public:
        Point center;
        double radius;
        Circle(double x=0.0, double y=0.0, double r=1.0) : radius(r), center(x,y) {}

        // external[0] and internal[0] are guaranteed to be on the left-side of
        // the directed line contennting C1.center to C2.center
        CircleCircleTangents commonTangents(const Circle& C2) const {
            const Circle& C1 = *this;
            double mu = C1.center.x - C2.center.x;
            double eta = C1.center.y - C2.center.y;
            double r1 = C1.radius;
            double r2 = C2.radius;
            double r1r1 = r1 * r1;
            double r1r2 = r1 * r2;
            double delta1 = r1r1 - r1r2;
            double delta2 = r1r1 + r1r2;
            double D = eta*eta + mu*mu;
            CircleCircleTangents result;
            if (abs(eta) < EPS){
                // Do not divide by eta! Use x^2 + y^2 = r^2 to find y.
                double dmu = mu < 0? -1 : 1;
                double x = (-delta1 * mu) / D;
                double y = -dmu * sqrt(r1r1 - x * x);
                result.external[0].x = x;
                result.external[0].y = y;
                result.external[1].x = x;
                result.external[1].y = -y;
                x = (-delta2 * mu) / D;
                y = -dmu * sqrt(r1r1 - x * x);
                result.internal[0].x = x;
                result.internal[0].y = y;
                result.internal[1].x = x;
                result.internal[1].y = -y;
            } else {
                // Dividing by eta is ok. Use mu*x + eta*y + delta = 0 to find y.
                double mumu = mu * mu;
                double etaeta = eta * eta;
                double dd1 = delta1 * delta1;
                double dd2 = delta2 * delta2;
                double deta = eta < 0? -1 : 1;
                double Delta1 = deta * sqrt(dd1 * mumu - D*(dd1 - etaeta * r1r1));
                double Delta2 = deta * sqrt(dd2 * mumu - D*(dd2 - etaeta * r1r1));
                double x = (-delta1 * mu + Delta1) / D;
                result.external[0].x = x;
                result.external[0].y = -(mu*x + delta1)/eta;
                x = (-delta1 * mu - Delta1) / D;
                result.external[1].x = x;
                result.external[1].y = -(mu*x + delta1)/eta;
                x = (-delta2 * mu + Delta2) / D;
                result.internal[0].x = x;
                result.internal[0].y = -(mu*x + delta2)/eta;
                x = (-delta2 * mu - Delta2) / D;
                result.internal[1].x = x;
                result.internal[1].y = -(mu*x + delta2)/eta;
            }
            return result;
        }
};

bool add_events(vector<Event>& A, const Point& p, const Point& q) {
    double start = p.angle();
    double end = q.angle();
    A.push_back(Event(start, 1));
    A.push_back(Event(end, -1));
    return start > end;
}

// Given a list of circles, returns (m, c, p) where m is the maximal number of
// circles in C any line can intersect, and p is a point on a circle c in C
// such that the tangent line to c at p intersects m circles in C.
int max_intersecting_line(const Circle* C, int n) {
    int global_max = 1;
    vector<Event> A;
    for(int i = 0; i < n; i++) {
        const Circle& c1 = C[i];
        A.clear();
        int local_max = 1;
        for(int j = 0; j < n; j++) {
            if(j == i) continue;
            const Circle& c2 = C[j];
            CircleCircleTangents Q = c1.commonTangents(c2);
            bool t1 = add_events(A, Q.internal[0], Q.external[0]);
            bool t2 = add_events(A, Q.external[1], Q.internal[1]);
            if(t1 || t2) {
                local_max++;
            }
        }

        if (local_max > global_max) {
            global_max = local_max;
        }

        sort(A.begin(), A.end());
        for(int i = 0; i < A.size(); i++) {
            local_max += A[i].count;
            if(local_max > global_max) {
                global_max = local_max;
            }
        }
    }
    return global_max;
}

int main() {
    Circle C[2000];
    int T;
    cin >> T;
    for (int t = 0; t < T; t++) {
        int n;
        cin >> n;
        for (int i = 0; i < n; i++) {
            cin >> C[i].center.x >> C[i].center.y >> C[i].radius;
        }

        cout << max_intersecting_line(C, n) << endl;
    }
    return 0;
}

他们的表现差异:

$ time ./janeway < io/Janeway.in > /dev/null

real    0m8.436s
user    0m8.430s
sys 0m0.003s

$ time python janeway.py < io/Janeway.in > /dev/null

real    5m57.899s
user    5m57.217s
sys 0m0.165s

如您所见,C ++代码的速度提高了约42倍。

(测试输入来自2013年ACM ICPC地区性问题。请参阅http://www.acmicpc-pacnw.org/results.htm问题&#34; Janeway&#34; 2013年。)

编辑:这是Python代码的cProfile输出:

         799780565 function calls in 507.293 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    0.000    0.000 __future__.py:48(<module>)
        1    0.000    0.000    0.000    0.000 __future__.py:74(_Feature)
        7    0.000    0.000    0.000    0.000 __future__.py:75(__init__)
        1    0.047    0.047  507.293  507.293 janeway.py:1(<module>)
       25   63.703    2.548  507.207   20.288 janeway.py:103(max_intersecting_line)
        1    0.000    0.000    0.000    0.000 janeway.py:11(Point)
 24250014    5.671    0.000    5.671    0.000 janeway.py:116(<genexpr>)
 96926032    9.343    0.000    9.343    0.000 janeway.py:127(<lambda>)
 96955733   57.902    0.000   57.902    0.000 janeway.py:14(__init__)
 96926032   46.840    0.000   63.156    0.000 janeway.py:18(angle)
        1    0.000    0.000    0.000    0.000 janeway.py:29(Circle)
    18506    0.009    0.000    0.009    0.000 janeway.py:32(__init__)
 24231508  167.128    0.000  245.945    0.000 janeway.py:36(common_tangents)
 48463016   59.402    0.000  129.139    0.000 janeway.py:95(add_events)
 48463016    4.106    0.000    4.106    0.000 {abs}
 96926032   16.315    0.000   16.315    0.000 {math.atan2}
 48463016    4.908    0.000    4.908    0.000 {math.sqrt}
 24231508    9.483    0.000    9.483    0.000 {max}
193870570   18.503    0.000   18.503    0.000 {method 'append' of 'list' objects}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
    18532    0.009    0.000    0.009    0.000 {method 'readline' of 'file' objects}
    18506   43.918    0.002   53.261    0.003 {method 'sort' of 'list' objects}
    18506    0.007    0.000    0.007    0.000 {method 'split' of 'str' objects}

3 个答案:

答案 0 :(得分:1)

Python是一种解释型语言,C ++是经过编译的。通常,对于每个算术表达式,例如&#34; 1 + 2&#34;,在堆上创建三个对象,一个用于数字&#34; 1&#34;一个用于数字&#34; 2& #34;另一个是结果,&#34; 3&#34;。在C ++中,它归结为编译后更简单的汇编程序操作。因此,对于大多数数字代码而言,预期会出现如此大的性能差异。

在某些情况下,你可以通过使用numpy数组和numpy表达式来加快速度。有关详细信息,请参阅:http://wiki.scipy.org/PerformancePython

答案 1 :(得分:1)

在C ++中,编译器通常可以将算术运算转换为单个处理器指令。

在Python中,你面临双重打击。解释代码会引入每个操作的开销。不仅如此,但Python不能假设对象是数字,它必须检查它们甚至知道要执行哪个操作。例如,您可以在两个数字上使用+来添加它们,或者您可以在两个字符串上使用+来连接它们。如果变量是数字或字符串,Python不会提前知道。

答案 2 :(得分:0)

Python - 是一种解释型语言,而c ++是编译的。 要加速Python代码,您可以尝试PyPy,Cython或Shedskin。