
时间:2012-12-24 18:13:19

标签: c# multiplication bit-shift




        float mul1 = 18.579434f;
        float mul2 = 34.307951f;

        int shiftMul1 = (int)((2 ^ 32) * mul1);
        int shiftMul2 = (int)((2 ^ 32) * mul2);

        var resultMul = shiftMul1 * shiftMul2;
        float floatResShift = resultMul >> 32; // wrong value
        float floatResNormal = mul1 * mul2; //expected value




当A =时,使用固定点算法计算A·B的结果   使用32位整数的2.5和B = 8.4将涉及以下操作:


决定缩放因子。这在很大程度上取决于什么样的   可能会看到数字。因为这个例子中的数字是这样的   低,不太重要,16个小数位(右边的位)   基数点)是可以接受的。缩放因子将为f   = 216 = 65536.这种格式称为Q15.16(小数点左边15位,右边16位,符号1位)。


使用正常整数乘法乘以Ai和Bi。 Ri = Ai·Bi = 163840·550502 =   90194247680.如此大的数字的原因是Ai和Bi都被缩放到我们的   Q15.16格式,因此乘法产生的数字基本上是(A·f)·(B·f)=   A·B·f2。


为了使我们的结果回到Q15.16格式,结果   因此必须除以比例因子。这也可以使用   位移算术,但为了简单起见,这里使用了除法。   Ri / f = 90194247680/65536 = 1376255这是我们在Q15.16中的结果   格式


要将数字转回正常实数,只需要一个   将其转换为所需的格式并除以比例因子   再次,所以:1376255.0 / 65536.0 = 20.999985接近预期   21号。


使用缩放系数缩放数字。在二进制算术中,这可以   使用位移完成,但为了简单起见,我们将使用   乘以比例因子。 Ai = A·f = 2.5·65536 = 163840   并且B·f = 8.4·65536 = 550502.4然后被截断转动它   变成一个整数,所以Bi = 550502。


要将数字转回正常实数,只需要一个   将其转换为所需的格式并除以比例因子   再次,所以:1376255.0 / 65536.0 = 20.999985接近预期   21号。










    float mul1 = 18.579434f;
    float mul2 = 34.307951f;

    int scaleFactor = (int) Math.Pow(2, 20);

    long shiftMul1 = (int)((scaleFactor) * mul1);
    long shiftMul2 = (int)((scaleFactor) * mul2);

    var resultMul = shiftMul1 * shiftMul2;
    float floatResShift = resultMul >> 40; 
    float floatResNormal = mul1 * mul2; // the result floatResNormal almost same as floatResShift

2 个答案:

答案 0 :(得分:1)

var k = 20;
var k_2 = k/2;
var p = 1 << k;

float mul1 = 18.579434f;
float mul2 = 34.307951f;

int shiftMul1 = (int)(p * mul1);
int shiftMul2 = (int)(p * mul2);

//fixed point multiplication         
var resultMul = ((shiftMul1 >> k_2) * (shiftMul2 >> k_2));

float floatResShift = ((float)resultMul)/p;
float floatResNormal = mul1 * mul2; 

Console.WriteLine("{0} {1}", floatResNormal, floatResShift);


637,4223 637,4043

答案 1 :(得分:0)


Fixed Point Arithmatic structure and relevant methods. Simple fixed point structure included as well.

Created from information and code gathered here: http://stackoverflow.com/questions/605124/fixed-point-math-in-c

May be used for anything without permission.

To quote the original author (x4000 of stackoverflow.com):
"The accuracy of these functions as they are coded here is more than enough for my purposes, but if you need more you can increase the SHIFT AMOUNT on FInt.
Just be aware that if you do so, the constants on [trigonomic] functions will then need to be divided by 4096 and then multiplied by whatever your new SHIFT AMOUNT requires.
You're likely to run into some bugs if you do that and aren't careful, so be sure to run checks against the built-in Math functions to make sure that your results aren't
being put off by incorrectly adjusting a constant."

Code credit: x4000 of stackoverflow.com

Compiled into a usable source file by: Paul Bergeron

Date: 7/1/2009

More fixed point functions can be found written in Java here: http://home.comcast.net/~ohommes/MathFP/


public struct FInt
    public long RawValue;
    public const int SHIFT_AMOUNT = 12; //12 is 4096

    public const long One = 1 << SHIFT_AMOUNT;
    public const int OneI = 1 << SHIFT_AMOUNT;
    public static FInt OneF = new FInt( 1, true );

    #region Constructors
    public FInt( long StartingRawValue, bool UseMultiple )
        this.RawValue = StartingRawValue;
        if ( UseMultiple )
            this.RawValue = this.RawValue << SHIFT_AMOUNT;
    public FInt( double DoubleValue )
        DoubleValue *= (double)One;
        this.RawValue = (int)Math.Round( DoubleValue );

    public int IntValue
        get { return (int)( this.RawValue >> SHIFT_AMOUNT ); }

    public int ToInt()
        return (int)( this.RawValue >> SHIFT_AMOUNT );

    public double ToDouble()
        return (double)this.RawValue / (double)One;

    public FInt Inverse
        get { return new FInt( -this.RawValue, false ); }

    #region FromParts
    /// <summary>
    /// Create a fixed-int number from parts.  For example, to create 1.5 pass in 1 and 500.
    /// </summary>
    /// <param name="PreDecimal">The number above the decimal.  For 1.5, this would be 1.</param>
    /// <param name="PostDecimal">The number below the decimal, to three digits.
    /// For 1.5, this would be 500. For 1.005, this would be 5.</param>
    /// <returns>A fixed-int representation of the number parts</returns>
    public static FInt FromParts( int PreDecimal, int PostDecimal )
        FInt f = new FInt( PreDecimal );
        if ( PostDecimal != 0 )
            f.RawValue += ( new FInt( PostDecimal ) / 1000 ).RawValue;

        return f;

    #region *
    public static FInt operator *( FInt one, FInt other )
        return new FInt( ( one.RawValue * other.RawValue ) >> SHIFT_AMOUNT, false );

    public static FInt operator *( FInt one, int multi )
        return one * (FInt)multi;

    public static FInt operator *( int multi, FInt one )
        return one * (FInt)multi;

    #region /
    public static FInt operator /( FInt one, FInt other )
        return new FInt( ( one.RawValue << SHIFT_AMOUNT ) / ( other.RawValue  ), false );

    public static FInt operator /( FInt one, int divisor )
        return one / (FInt)divisor;

    public static FInt operator /( int divisor, FInt one )
        return (FInt)divisor / one;

    #region %
    public static FInt operator %( FInt one, FInt other )
        return new FInt( ( one.RawValue ) % ( other.RawValue ), false );

    public static FInt operator %( FInt one, int divisor )
        return one % (FInt)divisor;

    public static FInt operator %( int divisor, FInt one )
        return (FInt)divisor % one;

    #region +
    public static FInt operator +( FInt one, FInt other )
        return new FInt( one.RawValue + other.RawValue, false );

    public static FInt operator +( FInt one, int other )
        return one + (FInt)other;

    public static FInt operator +( int other, FInt one )
        return one + (FInt)other;

    #region -
    public static FInt operator -( FInt one, FInt other )
        return new FInt( one.RawValue - other.RawValue, false );

    public static FInt operator -( FInt one, int other )
        return one - (FInt)other;

    public static FInt operator -( int other, FInt one )
        return (FInt)other - one;

    #region ==
    public static bool operator ==( FInt one, FInt other )
        return one.RawValue == other.RawValue;

    public static bool operator ==( FInt one, int other )
        return one == (FInt)other;

    public static bool operator ==( int other, FInt one )
        return (FInt)other == one;

    #region !=
    public static bool operator !=( FInt one, FInt other )
        return one.RawValue != other.RawValue;

    public static bool operator !=( FInt one, int other )
        return one != (FInt)other;

    public static bool operator !=( int other, FInt one )
        return (FInt)other != one;

    #region >=
    public static bool operator >=( FInt one, FInt other )
        return one.RawValue >= other.RawValue;

    public static bool operator >=( FInt one, int other )
        return one >= (FInt)other;

    public static bool operator >=( int other, FInt one )
        return (FInt)other >= one;

    #region <=
    public static bool operator <=( FInt one, FInt other )
        return one.RawValue <= other.RawValue;

    public static bool operator <=( FInt one, int other )
        return one <= (FInt)other;

    public static bool operator <=( int other, FInt one )
        return (FInt)other <= one;

    #region >
    public static bool operator >( FInt one, FInt other )
        return one.RawValue > other.RawValue;

    public static bool operator >( FInt one, int other )
        return one > (FInt)other;

    public static bool operator >( int other, FInt one )
        return (FInt)other > one;

    #region <
    public static bool operator <( FInt one, FInt other )
        return one.RawValue < other.RawValue;

    public static bool operator <( FInt one, int other )
        return one < (FInt)other;

    public static bool operator <( int other, FInt one )
        return (FInt)other < one;

    public static explicit operator int( FInt src )
        return (int)( src.RawValue >> SHIFT_AMOUNT );

    public static explicit operator FInt( int src )
        return new FInt( src, true );

    public static explicit operator FInt( long src )
        return new FInt( src, true );

    public static explicit operator FInt( ulong src )
        return new FInt( (long)src, true );

    public static FInt operator <<( FInt one, int Amount )
        return new FInt( one.RawValue << Amount, false );

    public static FInt operator >>( FInt one, int Amount )
        return new FInt( one.RawValue >> Amount, false );

    public override bool Equals( object obj )
        if ( obj is FInt )
            return ( (FInt)obj ).RawValue == this.RawValue;
            return false;

    public override int GetHashCode()
        return RawValue.GetHashCode();

    public override string ToString()
        return this.RawValue.ToString();

    #region PI, DoublePI
    public static FInt PI = new FInt( 12868, false ); //PI x 2^12
    public static FInt TwoPIF = PI * 2; //radian equivalent of 260 degrees
    public static FInt PIOver180F = PI / (FInt)180; //PI / 180

    #region Sqrt
    public static FInt Sqrt( FInt f, int NumberOfIterations )
        if ( f.RawValue < 0 ) //NaN in Math.Sqrt
            throw new ArithmeticException( "Input Error" );
        if ( f.RawValue == 0 )
            return (FInt)0;
        FInt k = f + FInt.OneF >> 1;
        for ( int i = 0; i < NumberOfIterations; i++ )
            k = ( k + ( f / k ) ) >> 1;

        if ( k.RawValue < 0 )
            throw new ArithmeticException( "Overflow" );
            return k;

    public static FInt Sqrt( FInt f )
        byte numberOfIterations = 8;
        if ( f.RawValue > 0x64000 )
            numberOfIterations = 12;
        if ( f.RawValue > 0x3e8000 )
            numberOfIterations = 16;
        return Sqrt( f, numberOfIterations );

    #region Sin
    public static FInt Sin( FInt i )
        FInt j = (FInt)0;
        for ( ; i < 0; i += new FInt( 25736, false ) ) ;
        if ( i > new FInt( 25736, false ) )
            i %= new FInt( 25736, false );
        FInt k = ( i * new FInt( 10, false ) ) / new FInt( 714, false );
        if ( i != 0 && i != new FInt( 6434, false ) && i != new FInt( 12868, false ) &&
            i != new FInt( 19302, false ) && i != new FInt( 25736, false ) )
            j = ( i * new FInt( 100, false ) ) / new FInt( 714, false ) - k * new FInt( 10, false );
        if ( k <= new FInt( 90, false ) )
            return sin_lookup( k, j );
        if ( k <= new FInt( 180, false ) )
            return sin_lookup( new FInt( 180, false ) - k, j );
        if ( k <= new FInt( 270, false ) )
            return sin_lookup( k - new FInt( 180, false ), j ).Inverse;
            return sin_lookup( new FInt( 360, false ) - k, j ).Inverse;

    private static FInt sin_lookup( FInt i, FInt j )
        if ( j > 0 && j < new FInt( 10, false ) && i < new FInt( 90, false ) )
            return new FInt( SIN_TABLE[i.RawValue], false ) +
                ( ( new FInt( SIN_TABLE[i.RawValue + 1], false ) - new FInt( SIN_TABLE[i.RawValue], false ) ) /
                new FInt( 10, false ) ) * j;
            return new FInt( SIN_TABLE[i.RawValue], false );

    private static int[] SIN_TABLE = {
        0, 71, 142, 214, 285, 357, 428, 499, 570, 641,
        711, 781, 851, 921, 990, 1060, 1128, 1197, 1265, 1333,
        1400, 1468, 1534, 1600, 1665, 1730, 1795, 1859, 1922, 1985,
        2048, 2109, 2170, 2230, 2290, 2349, 2407, 2464, 2521, 2577,
        2632, 2686, 2740, 2793, 2845, 2896, 2946, 2995, 3043, 3091,
        3137, 3183, 3227, 3271, 3313, 3355, 3395, 3434, 3473, 3510,
        3547, 3582, 3616, 3649, 3681, 3712, 3741, 3770, 3797, 3823,
        3849, 3872, 3895, 3917, 3937, 3956, 3974, 3991, 4006, 4020,
        4033, 4045, 4056, 4065, 4073, 4080, 4086, 4090, 4093, 4095,

    private static FInt mul( FInt F1, FInt F2 )
        return F1 * F2;

    #region Cos, Tan, Asin
    public static FInt Cos( FInt i )
        return Sin( i + new FInt( 6435, false ) );

    public static FInt Tan( FInt i )
        return Sin( i ) / Cos( i );

    public static FInt Asin( FInt F )
        bool isNegative = F < 0;
        F = Abs( F );

        if ( F > FInt.OneF )
            throw new ArithmeticException( "Bad Asin Input:" + F.ToDouble() );

        FInt f1 = mul( mul( mul( mul( new FInt( 145103 >> FInt.SHIFT_AMOUNT, false ), F ) -
            new FInt( 599880 >> FInt.SHIFT_AMOUNT, false ), F ) +
            new FInt( 1420468 >> FInt.SHIFT_AMOUNT, false ), F ) -
            new FInt( 3592413 >> FInt.SHIFT_AMOUNT, false ), F ) +
            new FInt( 26353447 >> FInt.SHIFT_AMOUNT, false );
        FInt f2 = PI / new FInt( 2, true ) - ( Sqrt( FInt.OneF - F ) * f1 );

        return isNegative ? f2.Inverse : f2;

    #region ATan, ATan2
    public static FInt Atan( FInt F )
        return Asin( F / Sqrt( FInt.OneF + ( F * F ) ) );

    public static FInt Atan2( FInt F1, FInt F2 )
        if ( F2.RawValue == 0 && F1.RawValue == 0 )
            return (FInt)0;

        FInt result = (FInt)0;
        if ( F2 > 0 )
            result = Atan( F1 / F2 );
        else if ( F2 < 0 )
            if ( F1 >= 0 )
                result = ( PI - Atan( Abs( F1 / F2 ) ) );
                result = ( PI - Atan( Abs( F1 / F2 ) ) ).Inverse;
            result = ( F1 >= 0 ? PI : PI.Inverse ) / new FInt( 2, true );

        return result;

    #region Abs
    public static FInt Abs( FInt F )
        if ( F < 0 )
            return F.Inverse;
            return F;


public struct FPoint
    public FInt X;
    public FInt Y;

    public FPoint( FInt X, FInt Y )
        this.X = X;
        this.Y = Y;

    public static FPoint FromPoint( Point p )
        FPoint f = new FPoint();
        f.X = (FInt)p.X;
        f.Y = (FInt)p.Y;
        return f;

    public static Point ToPoint( FPoint f )
        return new Point( f.X.IntValue, f.Y.IntValue );