Question

我有用C ++和Java编写的相同程序。对于C ++，我使用VS 2019，对于Java，则使用Eclipse 2019-03。

这是C ++程序。

    func scrollViewWillBeginDecelerating(_ scrollView: UIScrollView) {
        animateCollectionView(scrollView)
    }

    func scrollViewDidEndDragging(_ scrollView: UIScrollView, willDecelerate decelerate: Bool) {
        animateCollectionView(scrollView)
    }

    func animateCollectionView(_ scrollView: UIScrollView) {

        guard !collectionViewIsAnimating else { return }

        collectionViewIsAnimating = true

        let minHeightToTrigger: CGFloat = -(scrollView.contentInset.top + triggerOffset)

        if !collectionViewIsHidden && scrollView.contentOffset.y < minHeightToTrigger {
            hideCollectionView(scrollView)
        }
        else {
            showCollectionView(scrollView)
        }
    }

    func hideCollectionView(_ scrollView: UIScrollView) {

        scrollView.isUserInteractionEnabled = false
        let height = scrollView.bounds.height
        let initialOffset = scrollView.contentOffset

        print("***** opening")
        UIView.animate(withDuration: 0.3, delay: 0, usingSpringWithDamping: 0.7, initialSpringVelocity: 0, options: .curveEaseInOut, animations: {

            scrollView.contentInset = UIEdgeInsets(top: height, left: 0, bottom: 0, right: 0)
//            scrollView.contentOffset.y = -height
            scrollView.contentOffset = initialOffset

        }) { (finished) in

            if finished {

                scrollView.isUserInteractionEnabled = true
                self.collectionViewIsHidden = true
                self.collectionViewIsAnimating = false
            }
        }
    }

这是Java程序...

#define InputSize 500000

int FindDuplicate::FindDuplicateNaive(int* input, int size)
{
    int j;
    for (int i = 0; i < size-1; i++)
    {
        for ( j= i+1; j < size; j++)
        {
            if (input[i] == input[j])
                return input[i];
        }
    }
    return -1;
}

int* FindDuplicate::CreateTestCase(int size)
{
    int* output = new int[size];
    int i;
    for ( i= 0; i < size-1; i++)
    {
        output[i] = i + 1;
    }
    output[i] = i;
    return output;
}

int main()
{

    int* input= FindDuplicate::CreateTestCase(InputSize);
    auto start = std::chrono::system_clock::now();//clock start 
    int output = FindDuplicate::FindDuplicateNaive(input, InputSize);
    auto end = std::chrono::system_clock::now();//clock end
    cout<<"Output is: "<<output<<endl;
    std::chrono::duration<double> elapsed_seconds = end - start;
    cout<< "elapsed time: " << elapsed_seconds.count() << "s\n";

}

您将震惊地知道在c ++和Java中相同的程序和相同的输入所花费的时间。

在Java中：

总耗时为：41.876秒
499999

在CPP中：

启用优化并处于发布模式后，

输出为：499999
耗用时间：64.0293s

对此有何想法，可能是什么原因？为什么Java花费41.876秒，而CPP花费64.0293秒？

Answer 1

由于矢量化不容易进行，因此大部分时间都花在了循环控制上。
多亏在内部循环上使用#pragma GCC unroll N，这有助于调查，循环展开提供了OP结果的解释。

我获得这些平均结果（从计时中排除控制台）：

gcc 8.3, -03, unroll 64    1.63s
gcc 8.3, -03, unroll 32    1.66s
gcc 8.3, -03, unroll 16    1.71s
gcc 8.3, -03, unroll 8     1.81s
gcc 8.3, -03, unroll 4     1.97s
gcc 8.3, -03, unroll 2     2.33s
gcc 8.3, -03, no unroll    3.06s
openjdk 10.0.2             1.93s

编辑：这些测试在原始问题中以InputSize = 100'000运行（此后更改为500'000）

Answer 2

主要区别是循环展开。

Java非常巧妙地展开了内部循环，而GCC / clang / MSVC / ICC却没有展开（这是这些编译器的最佳优化）。

如果您手动展开循环，则可以加快循环速度，使其具有与Java版本类似的速度，如下所示：

for ( j= i+1; j < size-3; j+=4)
{
    if (input[i] == input[j])
        return input[i];
    if (input[i] == input[j+1])
        return input[i];
    if (input[i] == input[j+2])
        return input[i];
    if (input[i] == input[j+3])
        return input[i];
}
for (; j < size; j++)
{
    if (input[i] == input[j])
        return input[i];
}

为证明起见，这是Java版本的内部循环（8次展开）：

  0x00007f13a5113f60: mov     0x10(%rsi,%rdx,4),%ebx  ;*iaload
                                                ; - FindDuplicate::FindDuplicateNaive@25 (line 6)

  0x00007f13a5113f64: cmp     %ebx,%ecx
  0x00007f13a5113f66: je      0x7f13a5113fcb    ;*if_icmpne
                                                ; - FindDuplicate::FindDuplicateNaive@26 (line 6)

  0x00007f13a5113f68: movsxd  %edx,%rdi
  0x00007f13a5113f6b: mov     0x14(%rsi,%rdi,4),%ebx  ;*iaload
                                                ; - FindDuplicate::FindDuplicateNaive@25 (line 6)

  0x00007f13a5113f6f: cmp     %ebx,%ecx
  0x00007f13a5113f71: je      0x7f13a5113fc9    ;*if_icmpne
                                                ; - FindDuplicate::FindDuplicateNaive@26 (line 6)

  0x00007f13a5113f73: mov     0x18(%rsi,%rdi,4),%ebx  ;*iaload
                                                ; - FindDuplicate::FindDuplicateNaive@25 (line 6)

  0x00007f13a5113f77: cmp     %ebx,%ecx
  0x00007f13a5113f79: je      0x7f13a5113fed    ;*if_icmpne
                                                ; - FindDuplicate::FindDuplicateNaive@26 (line 6)

  0x00007f13a5113f7b: mov     0x1c(%rsi,%rdi,4),%ebx  ;*iaload
                                                ; - FindDuplicate::FindDuplicateNaive@25 (line 6)

  0x00007f13a5113f7f: cmp     %ebx,%ecx
  0x00007f13a5113f81: je      0x7f13a5113ff2    ;*if_icmpne
                                                ; - FindDuplicate::FindDuplicateNaive@26 (line 6)

  0x00007f13a5113f83: mov     0x20(%rsi,%rdi,4),%ebx  ;*iaload
                                                ; - FindDuplicate::FindDuplicateNaive@25 (line 6)

  0x00007f13a5113f87: cmp     %ebx,%ecx
  0x00007f13a5113f89: je      0x7f13a5113ff7    ;*if_icmpne
                                                ; - FindDuplicate::FindDuplicateNaive@26 (line 6)

  0x00007f13a5113f8b: mov     0x24(%rsi,%rdi,4),%ebx  ;*iaload
                                                ; - FindDuplicate::FindDuplicateNaive@25 (line 6)

  0x00007f13a5113f8f: cmp     %ebx,%ecx
  0x00007f13a5113f91: je      0x7f13a5113ffc    ;*if_icmpne
                                                ; - FindDuplicate::FindDuplicateNaive@26 (line 6)

  0x00007f13a5113f93: mov     0x28(%rsi,%rdi,4),%ebx  ;*iaload
                                                ; - FindDuplicate::FindDuplicateNaive@25 (line 6)

  0x00007f13a5113f97: cmp     %ebx,%ecx
  0x00007f13a5113f99: je      0x7f13a5114001    ;*if_icmpne
                                                ; - FindDuplicate::FindDuplicateNaive@26 (line 6)

  0x00007f13a5113f9b: mov     0x2c(%rsi,%rdi,4),%ebx  ;*iaload
                                                ; - FindDuplicate::FindDuplicateNaive@25 (line 6)

  0x00007f13a5113f9f: cmp     %ebx,%ecx
  0x00007f13a5113fa1: je      0x7f13a5114006    ;*if_icmpne
                                                ; - FindDuplicate::FindDuplicateNaive@26 (line 6)

  0x00007f13a5113fa3: add     $0x8,%edx         ;*iinc
                                                ; - FindDuplicate::FindDuplicateNaive@33 (line 5)

  0x00007f13a5113fa6: cmp     %r8d,%edx
  0x00007f13a5113fa9: jl      0x7f13a5113f60    ;*if_icmpge
                                                ; - FindDuplicate::FindDuplicateNaive@17 (line 5)

Answer 3

这不是一个完整的答案，我无法解释为什么它在Java中实际上比C ++更快地运行。但是我可以解释一些阻碍您的C ++版本性能的事情。如果有人确实对性能的整体差异有实际的解释，请不要选择此为正确答案。

on meta已讨论了此答案，并同意暂时将其作为部分答案是最好的选择。

首先也是最重要的一点，正如注释中提到的那样，测试时Java代码已经进行了优化，而在C ++中，您必须将优化级别指定为命令行参数（窗体Visual Studio ide作为发行版进行编译），以及尽管这有很大的不同，但是在我的测试中，Java仍然排名第一（所有结果都位于底部）。

但是我想指出您的测试中的一个主要缺陷，在这个特定情况下这似乎并不重要，因为当您查看数字时，它几乎没有什么区别，但仍然很重要：输入输出操作会增加明显的延迟。为了进行准确的执行时比较，必须从计时器中排除两种语言的输入输出操作。尽管在这种情况下没什么区别，但让一种语言在计时器运行时同时执行功能和输出，而另一种语言仅执行功能，会使整个测试带有偏见且毫无意义。

要使其更类似于Java版本，请将c ++ main更改为

int main()
    {
    int* input = FindDuplicate::CreateTestCase(InputSize);   

    int result;
    auto start = std::chrono::system_clock::now(); //clock start 
    result = FindDuplicate::FindDuplicateNaive(input, InputSize);
    auto end = std::chrono::system_clock::now(); //clock end

    std::chrono::duration<double> elapsed_seconds = end - start;
    cout << "Output is: " << result << endl;
    cout << "elapsed time: " << elapsed_seconds.count() << "s\n";
    }

请注意，默认情况下，C ++的控制台I / O（iostream，cin / cout）甚至比它的速度还要慢，因为启用了与C的控制台I / O（stdio，scanf / printf）的同步，以使程序无法执行如果同时使用cout和printf，这很奇怪。 Here，您可以了解关闭同步时cout的性能。不仅在计时器限制内使用了I / O，甚至还以可能的最差性能模式使用了它。

这是我的结果，尽管仍然使Java占了上风，但它显示了某些编译选项和I / O操作可以在C ++中产生多大的变化（对于单个cout，通过关闭同步，平均产生0.03s的差异大于它看起来）。所有以秒为单位的值都是10次测试的平均值。

1. Java print in timer                   1.52s
2. Java                                  1.36s
3. C++  debug, cout in timer            11.78s
4. C++  debug                           11.73s
5. C++  release, cout in timer           3.32s
6. C++  release cout syncronization off  3.29s
7. C++  release                          3.26s

我想让您理解，在所有这些测试中，只有一个比较有意义的测试是 1等于6 和 2等于7 。不管您重复测试多少次，其他所有参数（3、4、5）都会造成偏差。

为什么C ++中的整数数组搜索循环比Java慢？

3 个答案: