如果没有第三方,处理这个2DArray会有什么更高效的方法?
#time
let ar = array2D[[5.0; 6.0; 7.0; 8.0]; [1.0; 2.0; 3.0; 4.0]]
[0..5000000]
let a2 = ar |> Array2D.mapi(fun rowi coli value -> (value + 1.6) * double(coli + 6) * double(rowi + 7))
答案 0 :(得分:7)
如果你运行上面的代码,它需要大约0ms,所以我真的取决于你调用它的上下文。如果你只是循环运行它1M次,那么我的机器需要大约600ms:
for i in 0 .. 1000000 do
let a2 = ar |> Array2D.mapi(fun rowi coli value ->
(value + 1.6) * double ((coli + 6) * (rowi + 7)))
()
这里,大部分时间都花在分配结果数组上 - 对于每次迭代,我们需要分配一个新的2D数组来存储结果。这为您提供了很好的功能属性(结果可以共享,因为它们没有发生变异),但这就是它需要更长时间的原因。
你可以使用一些变异并避免这种情况。这取决于上下文,所以这就是为什么你可能在这里找不到有用的答案。
例如,在这个仿真的1M循环示例中,我可以只分配一个数组来存储结果,然后重复写入:
let res = ar |> Array2D.map id
for i in 0 .. 1000000 do
for x in 0 .. ar.GetLength(0) - 1 do
for y in 0 .. ar.GetLength(1) - 1 do
res.[x, y] <- (ar.[x, y] + 1.6) * double ((x + 6) * (y + 7))
这大约需要100毫秒,因此可以让您了解分配的成本。但是,如果它可以破坏你的程序你不应该做这个改变,因为现在你正在使用可变数组......
答案 1 :(得分:2)
我对这个问题进行了一些测量,我认为这可能很有趣。
我创建了8个不同的测试用例并运行了3个不同大小的矩阵; 1000x1000,100x100和10x10。
此外,我还在x64和x86中运行了测试。
最后,我最终在两张图中显示了48个测试结果。 y轴是以毫秒为单位的执行时间。
Array2D.copy
Array2D.copy map id
一些观察
Array2D.zeroCreate
来创建新的矩阵。写这篇文章的时候,我意识到最好把它们都修改完毕。Array2D.zeroCreate
并使用Array2D.copy
y >= 0
比y < xl
更便宜。 for y = (yl - 1) downto 0
使用y > variable_that_is_always_minus_1
来检查循环结束。通过尾递归,我们可以强制y >= 0
用于生成测量值的代码。
open System
open System.IO
open System.Diagnostics
let clock =
let sw = Stopwatch ()
sw.Start ()
sw
let collectionCount () =
GC.CollectionCount 0 + GC.CollectionCount 1 + GC.CollectionCount 2
let timeIt (n : string) (outer : int) (a : unit -> 'T) : 'T*int64 =
printfn "Timing '%s'..." n
let v = a ()
let t = clock.ElapsedMilliseconds
for i in 1..outer do
a () |> ignore
let e = clock.ElapsedMilliseconds - t
printfn " took %d ms" e
v, e
[<EntryPoint>]
let main argv =
let random = Random 19740531
let total = 100000000
let outers = [|100;10000;1000000|]
use output = new StreamWriter ".\output.tsv"
"Dimensions\tName\tSum\tCollectionCounts\tMilliseconds" |> output.WriteLine
for outer in outers do
let inner = total / outer
let dim = inner |> float |> sqrt |> int32
let ar = Array2D.init dim dim (fun _ _ -> random.NextDouble ())
printfn "New test run, matrix dimensions are %dx%d" dim dim
let run = sprintf "%d_%d" dim dim
let perf_zero () : float[,] =
let xl = ar.GetLength(0)
let yl = ar.GetLength(1)
let res = Array2D.zeroCreate xl yl
res
let perf_copy () : float[,] =
Array2D.copy ar
let perf_id () : float[,] =
ar |> Array2D.map id
let perf_op () : float[,] =
ar |> Array2D.mapi(fun rowi coli value -> (value + 1.6) * double(coli + 6) * double(rowi + 7))
let perf_tp () : float[,] =
let res = ar |> Array2D.map id
for x in 0 .. ar.GetLength(0) - 1 do
for y in 0 .. ar.GetLength(1) - 1 do
res.[x, y] <- (ar.[x, y] + 1.6) * double ((x + 6) * (y + 7))
res
let perf_tpm () : float[,] =
let xl = ar.GetLength(0)
let yl = ar.GetLength(1)
let res = Array2D.zeroCreate xl yl
for x in 0 .. xl - 1 do
for y in 0 .. yl - 1 do
res.[x, y] <- (ar.[x, y] + 1.6) * double ((x + 6) * (y + 7))
res
let perf_tpmf () : float[,] =
let xl = ar.GetLength(0)
let yl = ar.GetLength(1)
let res = Array2D.zeroCreate xl yl
for y in 0 .. yl - 1 do
for x in 0 .. xl - 1 do
res.[x, y] <- (ar.[x, y] + 1.6) * double ((x + 6) * (y + 7))
res
let perf_tr () : float[,] =
let xl = ar.GetLength(0)
let yl = ar.GetLength(1)
let res = Array2D.zeroCreate xl yl
let rec loopy x y =
if y >= 0 then
res.[x, y] <- (ar.[x, y] + 1.6) * double ((x + 6) * (y + 7))
loopy x (y - 1)
else
()
and loopx x =
if x >= 0 then
loopy x (yl - 1)
loopx (x - 1)
else
()
loopx (xl - 1)
res
let testCases =
[|
"Creating Zero Matrix" , perf_zero
"Copying Matrix" , perf_copy
"Mapping Matrix with id" , perf_id
"Original Algorithm" , perf_op
"Tomas Petricek Algorithm" , perf_tp
"Modified Tomas Petricek Algorithm" , perf_tpm
"Reverse Algoritm" , perf_tr
"Flipped x,y Algoritm" , perf_tpmf
|]
for name, a in testCases do
let pcc = collectionCount ()
let vs, t = timeIt name outer a
let sum = ref 0.
vs |> Array2D.iter (fun v -> sum := !sum + v)
let dcc = collectionCount () - pcc
sprintf "%s\t%s\t%f\t%d\t%d" run name !sum dcc t |> output.WriteLine
0
答案 2 :(得分:1)
由于OP指出他的问题涉及像9x4这样的较小Matrix,我做了另一组指标。但是,由于我认为我以前的答案对尺寸较大的指标有一些有趣的观点,所以我决定创建一个新答案
我对这个问题进行了一些测量,我认为这可能很有趣。
我创建了9个不同的测试用例并在10x5矩阵上运行。所有测试都在Release(显然)/ x64中运行。
第一张图显示了以毫秒为单位的执行时间:
第二张图显示了测试运行期间GC集合的数量:
Array2D.zeroInit
固定大小矩阵是struct
,它使用unsafe
代码来避免GC分配。它是用C#编写的,但可以移植到F#。它不应被视为具有生产价值的代码,更像是对自己创作的灵感。
一些观察结果:
unsafe
代码Array2D.iteri
)具有最佳性能如果固定大小矩阵是OP的可行路径,我很难判断,但这是一个值得考虑的选项。
F#代码:
open System
open System.IO
open System.Diagnostics
open Unsafe
let clock =
let sw = Stopwatch ()
sw.Start ()
sw
let collectionCount () =
GC.CollectionCount 0 + GC.CollectionCount 1 + GC.CollectionCount 2
let createTimer (n : string) (a : unit -> 'T) (r : 'T -> 'TResult) : string*(int -> 'TResult*int64*int) =
n, fun outer ->
printfn "Timing '%s'..." n
let v = a () |> r
GC.Collect ()
GC.WaitForFullGCComplete () |> ignore
let pcc = collectionCount ()
let t = clock.ElapsedMilliseconds
for i in 1..outer do
a () |> ignore
let e = clock.ElapsedMilliseconds - t
let dcc = collectionCount () - pcc
printfn " took %d ms, collected %d times, result is %A" e dcc v
v, e, dcc
[<EntryPoint>]
let main argv =
let random = Random 19740531
let total = 300000000
use output = new StreamWriter ".\output.tsv"
"Name\tSum\tCollectionCounts\tMilliseconds" |> output.WriteLine
let cols = 5
let rows = 10
let inner = cols*rows
let outer = total / inner
let ar = Array2D.init rows cols (fun _ _ -> random.NextDouble ())
let mtx5x10 =
let mutable m = Matrix5x10 ()
ar |> Array2D.iteri (fun row col v -> (m.[col, row] <- v))
m
printfn "New test run, matrix dimensions are %dx%d" cols rows
let perf_zero () =
let xl = ar.GetLength(0)
let yl = ar.GetLength(1)
let res = Array2D.zeroCreate xl yl
res
let perf_copy () =
Array2D.copy ar
let perf_id () =
ar |> Array2D.map id
let perf_op () =
ar |> Array2D.mapi(fun rowi coli value -> (value + 1.6) * double(rowi + 6) * double(coli + 7))
let perf_tpm () =
let xl = ar.GetLength(0)
let yl = ar.GetLength(1)
let res = Array2D.zeroCreate xl yl
for x in 0 .. xl - 1 do
for y in 0 .. yl - 1 do
res.[x, y] <- (ar.[x, y] + 1.6) * double ((x + 6) * (y + 7))
res
let perf_fzero () =
let m = Matrix5x10()
m
let perf_fcopy () =
let m = mtx5x10
m
let perf_fs () =
let mutable m = Matrix5x10 ()
for row = 0 to Matrix5x10.Rows - 1 do
for col = 0 to Matrix5x10.Columns - 1 do
m.[col, row] <- (mtx5x10.[col, row] + 1.6) * double ((row + 6) * (col + 7))
m
let perf_fsui = Func<int, int, double, double> (fun col row v -> (v + 1.6) * double ((row + 6) * (col + 7)))
let perf_fsu () =
let mutable m = mtx5x10
m.Update perf_fsui
m
let sumArray vs =
let sum = ref 0.
vs |> Array2D.iter (fun v -> sum := !sum + v)
!sum
let sumMatrix (mtx : Matrix5x10) =
let sum = ref 0.
mtx.Update (fun _ _ v -> sum := !sum + v; v)
!sum
let testCases =
[|
createTimer "Creating Zero Matrix" perf_zero sumArray
createTimer "Copying Matrix" perf_copy sumArray
createTimer "Mapping Matrix with id" perf_id sumArray
createTimer "Original Algorithm" perf_op sumArray
createTimer "Tomas P Algorithm with Zero Init" perf_tpm sumArray
createTimer "Creating Zero Fixed Size Matrix" perf_fzero sumMatrix
createTimer "Copying Fixed Size Matrix" perf_fcopy sumMatrix
createTimer "Fixed Size Algorithm" perf_fs sumMatrix
createTimer "Fixed Size Updater" perf_fsu sumMatrix
|]
for name, a in testCases do
let sum, t, dcc = a outer
sprintf "%s\t%f\t%d\t%d" name sum dcc t |> output.WriteLine
0
C#代码(对于那些关心我用T4生成的代码):
namespace Unsafe
{
using System;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
[StructLayout(LayoutKind.Sequential)]
public struct Matrix5x10
{
double m_c0_r0;
double m_c1_r0;
double m_c2_r0;
double m_c3_r0;
double m_c4_r0;
double m_c0_r1;
double m_c1_r1;
double m_c2_r1;
double m_c3_r1;
double m_c4_r1;
double m_c0_r2;
double m_c1_r2;
double m_c2_r2;
double m_c3_r2;
double m_c4_r2;
double m_c0_r3;
double m_c1_r3;
double m_c2_r3;
double m_c3_r3;
double m_c4_r3;
double m_c0_r4;
double m_c1_r4;
double m_c2_r4;
double m_c3_r4;
double m_c4_r4;
double m_c0_r5;
double m_c1_r5;
double m_c2_r5;
double m_c3_r5;
double m_c4_r5;
double m_c0_r6;
double m_c1_r6;
double m_c2_r6;
double m_c3_r6;
double m_c4_r6;
double m_c0_r7;
double m_c1_r7;
double m_c2_r7;
double m_c3_r7;
double m_c4_r7;
double m_c0_r8;
double m_c1_r8;
double m_c2_r8;
double m_c3_r8;
double m_c4_r8;
double m_c0_r9;
double m_c1_r9;
double m_c2_r9;
double m_c3_r9;
double m_c4_r9;
public const int Columns = 5;
public const int Rows = 10;
unsafe public double this[int x, int y]
{
[MethodImpl (MethodImplOptions.AggressiveInlining)]
get
{
var i = 5 * y + x;
if (i < 0 || i >= 50)
{
throw new IndexOutOfRangeException ("0 <= x <= 5 && 0 <= y <= 10");
}
fixed (double * ms = &m_c0_r0)
{
return ms[i];
}
}
[MethodImpl (MethodImplOptions.AggressiveInlining)]
set
{
var i = 5 * y + x;
if (i < 0 || i >= 50)
{
throw new IndexOutOfRangeException ("0 <= x <= 5 && 0 <= y <= 10");
}
fixed (double * ms = &m_c0_r0)
{
ms[i] = value;
}
}
}
public void Update (Func<int, int, double, double> updater)
{
if (updater == null)
{
return;
}
m_c0_r0 = updater (0, 0, m_c0_r0);
m_c1_r0 = updater (1, 0, m_c1_r0);
m_c2_r0 = updater (2, 0, m_c2_r0);
m_c3_r0 = updater (3, 0, m_c3_r0);
m_c4_r0 = updater (4, 0, m_c4_r0);
m_c0_r1 = updater (0, 1, m_c0_r1);
m_c1_r1 = updater (1, 1, m_c1_r1);
m_c2_r1 = updater (2, 1, m_c2_r1);
m_c3_r1 = updater (3, 1, m_c3_r1);
m_c4_r1 = updater (4, 1, m_c4_r1);
m_c0_r2 = updater (0, 2, m_c0_r2);
m_c1_r2 = updater (1, 2, m_c1_r2);
m_c2_r2 = updater (2, 2, m_c2_r2);
m_c3_r2 = updater (3, 2, m_c3_r2);
m_c4_r2 = updater (4, 2, m_c4_r2);
m_c0_r3 = updater (0, 3, m_c0_r3);
m_c1_r3 = updater (1, 3, m_c1_r3);
m_c2_r3 = updater (2, 3, m_c2_r3);
m_c3_r3 = updater (3, 3, m_c3_r3);
m_c4_r3 = updater (4, 3, m_c4_r3);
m_c0_r4 = updater (0, 4, m_c0_r4);
m_c1_r4 = updater (1, 4, m_c1_r4);
m_c2_r4 = updater (2, 4, m_c2_r4);
m_c3_r4 = updater (3, 4, m_c3_r4);
m_c4_r4 = updater (4, 4, m_c4_r4);
m_c0_r5 = updater (0, 5, m_c0_r5);
m_c1_r5 = updater (1, 5, m_c1_r5);
m_c2_r5 = updater (2, 5, m_c2_r5);
m_c3_r5 = updater (3, 5, m_c3_r5);
m_c4_r5 = updater (4, 5, m_c4_r5);
m_c0_r6 = updater (0, 6, m_c0_r6);
m_c1_r6 = updater (1, 6, m_c1_r6);
m_c2_r6 = updater (2, 6, m_c2_r6);
m_c3_r6 = updater (3, 6, m_c3_r6);
m_c4_r6 = updater (4, 6, m_c4_r6);
m_c0_r7 = updater (0, 7, m_c0_r7);
m_c1_r7 = updater (1, 7, m_c1_r7);
m_c2_r7 = updater (2, 7, m_c2_r7);
m_c3_r7 = updater (3, 7, m_c3_r7);
m_c4_r7 = updater (4, 7, m_c4_r7);
m_c0_r8 = updater (0, 8, m_c0_r8);
m_c1_r8 = updater (1, 8, m_c1_r8);
m_c2_r8 = updater (2, 8, m_c2_r8);
m_c3_r8 = updater (3, 8, m_c3_r8);
m_c4_r8 = updater (4, 8, m_c4_r8);
m_c0_r9 = updater (0, 9, m_c0_r9);
m_c1_r9 = updater (1, 9, m_c1_r9);
m_c2_r9 = updater (2, 9, m_c2_r9);
m_c3_r9 = updater (3, 9, m_c3_r9);
m_c4_r9 = updater (4, 9, m_c4_r9);
}
}
}