
时间:2018-12-04 09:31:43

标签: c casting floating-point arm math.h


#include <unistd.h>
#include <stdlib.h>
#include <stdint.h>
#include <math.h>
#include <stdio.h>
#include <float.h>
#include <inttypes.h>
#include <stdbool.h>
#include <string.h>

#define ARRAY_LENGTH                        17

typedef uint8_t     UI_8;
typedef uint16_t    UI_16;
typedef uint32_t    UI_32;
typedef uint64_t    UI_64;
typedef int8_t      SI_8;
typedef int16_t     SI_16;
typedef int32_t     SI_32;
typedef int64_t     SI_64;
typedef float       FL_32;
typedef double      FL_64;
typedef char        CHAR_8;
typedef bool        BOOL;

SI_32 BoolifyValue(const UI_64 input_value, BOOL * encoded_bool_array, const UI_16 array_length)
    SI_32 return_code = -1;

    UI_64 encode_mask = 0x0000000000000001, single_bit = 0;
    UI_8 bit_index = 0;
    UI_8 array_index = 0;
    const UI_8 array_max_index = array_length - 1;

    for (bit_index=0; bit_index < array_length; bit_index++) {
        /* Capture a single bit from the encoded_value */
        single_bit = (input_value >> bit_index) & encode_mask;
        /* We want to preserve the endianness of the value, so we start storing the bits from the end of the array */
        array_index = array_max_index - bit_index;
        /* Store the bit as a boolean in the encoded_bool_array */
        encoded_bool_array[array_index] = (BOOL)single_bit;
    return_code = 0;

    return return_code;

SI_32 CalculateEncodedValue(const FL_64 encode_divisor, const FL_64 meas_value, UI_64 * encoded_value)
    SI_32 return_code = 0;
    FL_64 temp_float = 0.0, limit = 0.0;

    /* Before performing division by floating point, we need to assess whether the result will overflow or not. */
    /* DBL_MAX is the maximum representable value for a float on the current platform, and encode_divisor
    shall be the divisor, so if we multiply them both we get the maximum value the dividend can take */
    limit = DBL_MAX * encode_divisor;
    if (((-limit) <= meas_value) && (meas_value <= limit)){
        /* Overflow will not occur */
        /* Encode the value, round it up and cast it to an integer type */
        temp_float = meas_value / encode_divisor;
        printf("Divided result = %.4f\n", temp_float);
        temp_float = rint(temp_float);
        printf("Rounded result = %.4f\n", temp_float);
        *encoded_value = (UI_64)temp_float;
        printf("Unsigned result = ""%" PRIu64 "\n", *encoded_value);

        return_code = 0;
        /* Division will overflow, so we truncate to the maximun value */
        *encoded_value = (UI_64)DBL_MAX;

        return_code = -1;

    return return_code;

SI_32 main(SI_32 argc, CHAR_8 *argv[])
    FL_64 input_float = -4.5018;
    UI_64 output_value = 0;
    const FL_64 acc_den       = 0.00152587890625;      //ldexp(100, -16)
    BOOL array[ARRAY_LENGTH] = {0};
    if(argc != 2){
        printf("Usage: test_program <float>");
        return 1;
    printf("Test program started, input value initialized to: %.4f\n", input_float);
    input_float = atof(argv[1]);
    printf("Test program started, input value is: %f\n", input_float);
    if (CalculateEncodedValue(acc_den, input_float, &output_value) < 0) {
    printf("Result---->""%" PRIu64 "\n", output_value);
    printf("Printing boolean: \n");
    (void)BoolifyValue(output_value, array, ARRAY_LENGTH);
    for (size_t i = 0; i <  ARRAY_LENGTH; i++) {
        if(array[i] == true){

    return 0;

在i386机器上使用gcc test_program.c -o test_program -lm进行编译会产生以下结果:

root@i386-machine:/$ ./test_program -4.5074
Test program started, input value initialized to: -4.5018
Test program started, input value is: -4.507400
Divided result = -2953.9697
Rounded result = -2954.0000
Unsigned result = 18446744073709548662
Printing boolean: 


root@arm-machine:~# ./test_program -4.5074
Test program started, input value initialized to: -4.5018
Test program started, input value is: -4.507400
Divided result = -2953.9697
Rounded result = -2954.0000
Unsigned result = 0
Printing boolean: 

我在这里看到的是,在i386上,强制转换*encoded_value = (UI_64)temp_float;将值转换为2的补码(这是我想要的),而在ARM上将其转换为零。

问题是双重的: 1)为什么呢? 2)我该怎么做才能轻松地从四舍五入的双中得到那个二的补码?

0 个答案:
