Question

我想做16位* 32位Mul操作，但只使用32位寄存器。结果可由两个32位寄存器捕获，因为输出为48位。我想要C代码来解决这个问题！我有32位* 32位MUL和64位输出功能，但我不能正确使用此功能，因为有迹象。对于exapmle 16bit减去1是0xFFFF而32bit减去1是0xFFFFFFFF。我将使用此代码进行MUL的LLVM转换。

Answer 1

你想做这样的事吗？

#include <inttypes.h>

void multiply(uint16_t*top,uint32_t*bottom, uint16_t lhs,uint32_t rhs){

    uint32_t low=lhs*(rhs&0xFFFF);
    uint32_t high=lhs*(rhs>>16)+(low>>16);
    *bottom=(high)<<16)|(low&0xFFFF);
    *top=(high>>16);
}

当你意识到你将一个数字的数字乘以一个两位数的数字时，基本65536（2 ** 16）就会轻松得多。

我只使用64位检查并显示输出。乘法运算为32位。

这是测试工具：

#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>

void multiply(uint16_t*top,uint32_t*bottom, uint16_t lhs,uint32_t rhs){

    uint32_t low=lhs*(rhs&0xFFFF);
    uint32_t high=lhs*(rhs>>16)+(low>>16);
    *bottom=(high)<<16)|(low&0xFFFF);
    *top=(high>>16);
}

uint64_t encode64(uint16_t top,uint32_t bottom){
    return (((uint64_t)top)<<32)|((uint64_t)bottom);
}

int check(uint16_t lhs,uint32_t rhs){
    uint16_t t16;
    uint32_t t32;

    multiply(&t16,&t32,lhs,rhs);
    const uint64_t result=encode64(t16,t32);

    uint64_t llhs=lhs;
    uint64_t lrhs=rhs;
    uint64_t expect=llhs*lrhs;

    if(result==expect){
        return 0;
    }
    printf("%"PRIu16"*%"PRIu32"==%"PRIu64"!=%"PRIu64"\n",lhs,rhs,result,expect);
    return 1;
}

int main(void) {
    int error=0;
    uint16_t top;
    uint32_t bottom;
    uint16_t lhs=58989;
    uint32_t rhs=5978342;
    error+=check(2U,20UL);
    error+=check(0xFFFF,0xFFFFFFFF);
    error+=check(768U,565354767UL);
    error+=check(26434U,566534767UL);
    error+=check(26434U,690789UL);
    error+=check(5678U,9767889UL);
    error+=check(3674U,784367UL);
    error+=check(0,690789ULL);
    error+=check(0,0xFFFFFFFF);
    error+=check(0xFFFF,0);
    error+=check(0xFFFF,1);
    error+=check(1,0xFFFFFFFF);
    error+=check(0x2,0xAFFFFFFF);       
    multiply(&top,&bottom,lhs,rhs);

    uint64_t result=encode64(top,bottom);

    printf("%"PRIu16"*%"PRIu32"==%"PRIu64"\n",lhs,rhs,result);

    if(error!=0){
        printf("\nErrors=%d\n",error);
    }

    return error==0?EXIT_SUCCESS:EXIT_FAILURE;
}

Answer 2

困难的部分是知道如何定义16位，32位和64位整数，因为它没有在C语言（*）的旧版本中指定...和int，long和long long并且没有明确定义方式。

假设你有int16_t，int32_t和int64_t，你可以做

int64_t product16_35(int16_t val1, int32_t val2) {
    int64_t v1 = val1, v2 = val2, resul;
    resul = v1 * v2; /* resul uses 48 bits on 64, with sign extended to 64 bits */
    resul &= 0x00FFFFFFFFFFFFFF; /* truncate resul at 48 bits */ 
    return resul;
}

（*）它是C99的一部分，仅在MSVC中提供＆gt; 2010。

根据OP评论编辑

如果你想把结果放在一个16位整数（高位部分）和一个32位整数中，这里有一个小的变化：

struct int48 {
    int16_t h;
    uint32_t l; /* sign has no sense for lower part */
}

int48 product16_35(int16_t val1, int32_t val2) {
    int48 res48;
    int64_t v1 = val1, v2 = val2, resul;
    resul = v1 * v2; /* resul uses 48 bits on 64, with sign extended to 64 bits */
    resul &= 0x00FFFFFFFFFFFFFF; /* truncate resul at 48 bits */
    res48.l = resul & 0xFFFFFFFF;
    res48.h = (resul >> 32) & 0xFFFF;
    return res48;
}

当然，我也可以手动使用产品16位* 32位，仅使用32位操作和移位。但是，当我要求它直接执行64位操作时，它肯定会比编译器的效率低。

Answer 3

将16位寄存器符号扩展为32位寄存器，然后使用带符号的32位x 32位乘法。

Answer 4

这是32 * 32 MUL。如果有人理解LLVM，那么对他们来说会有所帮助。对于16bit只是签名扩展然后这个功能。

/*static*/
enum bin2vm_status_codes bin2vm::IrModuleWriter::getSignedMul32_Result64bit(llvm::Value* tempFirstOp,llvm::Value* tempSecondOp,llvm::Value** result_Right32,llvm::Value** result_Left32, IRBuilder* irBuilder )
{
  enum bin2vm_status_codes status = BIN2VM_STATUS_SUCCESS;
  oef_debug_print(( "bin2vm::IrModuleWriter::getSignedMul32_Result64bit(): ENTERED\n" ));
  llvm::Value* Op1IsNeg = nullptr;
  llvm::Value* bool_Op1IsNeg = nullptr;
  llvm::Value* Op2IsNeg = nullptr;
  llvm::Value* bool_Op2IsNeg = nullptr;
  llvm::ConstantInt* int32One = irBuilder->getInt32(1);
  llvm::Value* finalResult_right32 = nullptr;
  llvm::Value* bool_bothNeg = nullptr;
  llvm::Value* firstOp_right = nullptr;
  llvm::Value* firstOp_left = nullptr;
  llvm::Value* secondOp_right = nullptr;
  llvm::Value* secondOp_left = nullptr;
  llvm::Value* partialProduct_0 = nullptr;
  llvm::Value* partialProduct_1 = nullptr;
  llvm::Value* partialProduct_2 = nullptr;
  llvm::Value* partialProduct_3 = nullptr;
  llvm::Value* partialProduct_1_left = nullptr;
  llvm::Value* partialProduct_1_right = nullptr;
  llvm::Value* partialProduct_2_left = nullptr;
  llvm::Value* partialProduct_2_right = nullptr;
  llvm::Value* sumPartial_temp = nullptr;
  llvm::Value* sumPartial = nullptr;
  llvm::Value* finalResult_left32 = nullptr;
  llvm::Value* sumPartial_op1Neg = nullptr;
  llvm::Value* sumPartial_op2Neg = nullptr;
  llvm::Value* sumPartial_bothNeg = nullptr;
  llvm::Value* bothNeg = nullptr;


  //Mul operation   
  finalResult_right32 = irBuilder->CreateMul(tempFirstOp,tempSecondOp,"mulResult");


  //Calculation for left 32 bits
  //Can have a look at http://stackoverflow.com/questions/22845801/32-bit-signed-multiplication-without-using-64-bit-data-type
  firstOp_right = irBuilder->CreateAnd(tempFirstOp,0x0000FFFF,"firstOp_right");
  firstOp_left = irBuilder->CreateLShr(tempFirstOp, 16, "firstOp_left");
  secondOp_right = irBuilder->CreateAnd(tempSecondOp,0x0000FFFF,"secondOp_right");
  secondOp_left = irBuilder->CreateLShr(tempSecondOp, 16, "secondOp_left");

  /* compute partial products */
  partialProduct_0 = irBuilder->CreateMul(firstOp_right,secondOp_right,"partialProduct_0");
  partialProduct_1 = irBuilder->CreateMul(firstOp_right,secondOp_left,"partialProduct_1");
  partialProduct_2 = irBuilder->CreateMul(firstOp_left,secondOp_right,"partialProduct_2");
  partialProduct_3 = irBuilder->CreateMul(firstOp_left,secondOp_left,"partialProduct_3");

  partialProduct_0 = irBuilder->CreateLShr(partialProduct_0,16,"partialProduct_0");
  partialProduct_1_left = irBuilder->CreateLShr(partialProduct_1,16,"partialProduct_1_left");
  partialProduct_1_right = irBuilder->CreateAnd(partialProduct_1,0x0000FFFF, "partialProduct_1_right");
  partialProduct_2_left = irBuilder->CreateLShr(partialProduct_2,16,"partialProduct_2_left");
  partialProduct_2_right = irBuilder->CreateAnd(partialProduct_2,0x0000FFFF, "partialProduct_2_right");

  //sumPartial_temp = ((p0 >> 16) + (uint16_t)p1 + (uint16_t)p2) >> 16
  sumPartial_temp = irBuilder->CreateAdd(partialProduct_0,partialProduct_1_right,"sumPartial_temp");
  sumPartial_temp = irBuilder->CreateAdd(sumPartial_temp,partialProduct_2_right,"sumPartial_temp");
  sumPartial_temp = irBuilder->CreateLShr(sumPartial_temp,16,"sumPartial_temp");

  // p3 + (p2 >> 16) + (p1 >> 16) + sumPartial_temp
  sumPartial = irBuilder->CreateAdd(sumPartial_temp,partialProduct_3,"sumPartial");
  sumPartial = irBuilder->CreateAdd(sumPartial,partialProduct_2_left,"sumPartial");
  sumPartial = irBuilder->CreateAdd(sumPartial,partialProduct_1_left,"sumPartial");

  //Now for signed Mul we look at sumPartial- ((op1 < 0) ? op2 : 0) - ((op2 < 0) ? op1 : 0)
  sumPartial_op1Neg = irBuilder->CreateSub(sumPartial,tempSecondOp,"sumPartial_op1Neg");
  sumPartial_op2Neg = irBuilder->CreateSub(sumPartial,tempFirstOp,"sumPartial_op2Neg");
  sumPartial_bothNeg = irBuilder->CreateSub(sumPartial_op1Neg,tempFirstOp,"sumPartial_bothNeg");

  //MUL signed adaptation
  Op1IsNeg = irBuilder->CreateLShr(tempFirstOp,31,"bool_Op1IsNeg");
  bool_Op1IsNeg = irBuilder->CreateICmpEQ(Op1IsNeg, int32One,"bool_Op1IsNeg");
  Op2IsNeg = irBuilder->CreateLShr(tempSecondOp,31,"bool_Op2IsNeg");
  bool_Op2IsNeg = irBuilder->CreateICmpEQ(Op2IsNeg, int32One,"bool_Op2IsNeg");
  bothNeg = irBuilder->CreateAnd(Op1IsNeg,Op2IsNeg,"bothNeg");
  bool_bothNeg = irBuilder->CreateICmpEQ(bothNeg,int32One,"bool_bothNeg");

  //Resul left 32 bits
  finalResult_left32 = irBuilder->CreateSelect(bool_Op1IsNeg,sumPartial_op1Neg,sumPartial);
  finalResult_left32 = irBuilder->CreateSelect(bool_Op2IsNeg,sumPartial_op2Neg,finalResult_left32);
  finalResult_left32 = irBuilder->CreateSelect(bool_bothNeg,sumPartial_bothNeg,finalResult_left32);


  *result_Right32 = finalResult_right32; 
  *result_Left32 = finalResult_left32;

  return status;
}

16位* 32位MUL，48位结果

4 个答案: