我希望将以下C代码转换为F#(这是快速反平方根算法):
float Q_rsqrt( float number )
{
long i;
float x2, y;
x2 = number * 0.5F;
y = number;
i = * ( long * ) &y; // Extract bit pattern
i = 0x5f3759df - ( i >> 1 );
y = * ( float * ) &i; // Convert back to float.
y = y * ( 1.5F - ( x2 * y * y ) );
return y;
}
答案 0 :(得分:3)
首先你应该做一些研究。然后,如果你坚持指出你有什么问题。 这是solution by Kit Eason。
let fastInvSqrt (n : float32) : float32 =
let MAGIC_NUMBER : int32 = 0x5f3759df
let THREE_HALVES = 1.5f
let x2 = n * 0.5f
let i = MAGIC_NUMBER - (System.BitConverter.ToInt32(System.BitConverter.GetBytes(n), 0) >>> 1)
let y = System.BitConverter.ToSingle(System.BitConverter.GetBytes(i), 0)
y * (THREE_HALVES - (x2 * y * y))
// Examples:
let x = fastInvSqrt 4.0f
// Output: val x : float32 = 0.499153584f
let x' = 1. / sqrt(4.0)
// Output: val x' : float = 0.5
答案 1 :(得分:1)
在性能和低级优化方面,前后测量通常是个好主意。快速反向技巧非常酷,但它近似于反平方,问题是这些天是否真的需要这样的棘手代码(在DOOM时代浮动性能是废话,技巧是惊人的)。
无论如何,我建立了一个简单的性能测试平台,以便将琐碎的实现与Kit Eason / lad2025提供的解决方案以及另一个不分配字节数组的解决方案进行比较。
open System
open System.Diagnostics
open System.Runtime.InteropServices
[<Literal>]
let MAGIC_NUMBER : int32 = 0x5f3759df
[<Literal>]
let THREE_HALVES = 1.5F
[<Literal>]
let HALF = 0.5F
[<Literal>]
let OUTER = 1000
[<Literal>]
let INNER = 10000
let inline invSqr (x : float32) : float32 = 1.F / sqrt x
let fInvSqr (x : float32) : float32 =
let x2 = x * 0.5f
// Allocates two byte arrays creating GC pressure ==> hurts performance
let i = MAGIC_NUMBER - (BitConverter.ToInt32(BitConverter.GetBytes(x), 0) >>> 1)
let y = BitConverter.ToSingle(BitConverter.GetBytes(i), 0)
y * (THREE_HALVES - (x2 * y * y))
// Susceptible to race conditions & endianess issues
[<StructLayout (LayoutKind.Explicit)>]
type Bits =
struct
[<FieldOffset(0)>]
val mutable f: float32
[<FieldOffset(0)>]
val mutable i: int32
end
let mutable bits = Bits ()
let fInvSqr2 (x : float32) : float32 =
let x2 = x * 0.5F
bits.f <- x
let i = MAGIC_NUMBER - (bits.i >>> 1)
bits.i <- i
let y = bits.f
y * (THREE_HALVES - (x2 * y * y))
let timeIt n (a : unit -> 'T) : int64 * 'T =
let r = a ()
let sw = Stopwatch ()
sw.Start ()
for i = 1 to n do
ignore <| a ()
sw.Stop ()
sw.ElapsedMilliseconds, r
[<EntryPoint>]
let main argv =
let testCases =
[|
"invSqr" , fun () ->
let mutable sum = 0.F
for x = 1 to INNER do
sum <- sum + invSqr (float32 x)
sum
"fInvSqr" , fun () ->
let mutable sum = 0.F
for x = 1 to INNER do
sum <- sum + fInvSqr (float32 x)
sum
"fInvSqr2" , fun () ->
let mutable sum = 0.F
for x = 1 to INNER do
sum <- sum + fInvSqr2 (float32 x)
sum
|]
for name, action in testCases do
printfn "Running %s %d times..." name (OUTER*INNER)
let elapsed, result = timeIt OUTER action
printfn "... it took %d ms product result: %f" elapsed result
0
我机器上的性能测试结果:
Running invSqr 10000000 times...
... it took 78 ms product result: 198.544600
Running fInvSqr 10000000 times...
... it took 311 ms product result: 198.358200
Running fInvSqr2 10000000 times...
... it took 49 ms product result: 198.358200
Press any key to continue . . .
所以我们看到fInvSqr实际上比普通解决方案慢3倍,很可能是因为字节分配。此外,GC的成本隐藏在这些数字中,可能会增加非确定性的性能降级。
fInvSqr2似乎表现稍好,但这里也有缺点
Bits
技巧易受竞争条件影响(可修复)Bits
技巧对于endian问题是可以接受的(如果你在具有不同endianess的CPU上运行程序它可能会破坏)性能提升是否值得弊端?由于程序可能不仅仅是通过逆平方运算构建的,因此实际上有效的性能增益可能要小得多。我很难想象一个场景,我会为性能做出预测,我今天选择快速反向技巧,但这一切都取决于你的背景。