我想做16位* 32位Mul操作,但只使用32位寄存器。结果可由两个32位寄存器捕获,因为输出为48位。 我想要C代码来解决这个问题! 我有32位* 32位MUL和64位输出功能,但我不能正确使用此功能,因为有迹象。对于exapmle 16bit减去1是0xFFFF而32bit减去1是0xFFFFFFFF。 我将使用此代码进行MUL的LLVM转换。
答案 0 :(得分:3)
你想做这样的事吗?
#include <inttypes.h>
void multiply(uint16_t*top,uint32_t*bottom, uint16_t lhs,uint32_t rhs){
uint32_t low=lhs*(rhs&0xFFFF);
uint32_t high=lhs*(rhs>>16)+(low>>16);
*bottom=(high)<<16)|(low&0xFFFF);
*top=(high>>16);
}
当你意识到你将一个数字的数字乘以一个两位数的数字时,基本65536(2 ** 16)就会轻松得多。
我只使用64位检查并显示输出。 乘法运算为32位。
这是测试工具:
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
void multiply(uint16_t*top,uint32_t*bottom, uint16_t lhs,uint32_t rhs){
uint32_t low=lhs*(rhs&0xFFFF);
uint32_t high=lhs*(rhs>>16)+(low>>16);
*bottom=(high)<<16)|(low&0xFFFF);
*top=(high>>16);
}
uint64_t encode64(uint16_t top,uint32_t bottom){
return (((uint64_t)top)<<32)|((uint64_t)bottom);
}
int check(uint16_t lhs,uint32_t rhs){
uint16_t t16;
uint32_t t32;
multiply(&t16,&t32,lhs,rhs);
const uint64_t result=encode64(t16,t32);
uint64_t llhs=lhs;
uint64_t lrhs=rhs;
uint64_t expect=llhs*lrhs;
if(result==expect){
return 0;
}
printf("%"PRIu16"*%"PRIu32"==%"PRIu64"!=%"PRIu64"\n",lhs,rhs,result,expect);
return 1;
}
int main(void) {
int error=0;
uint16_t top;
uint32_t bottom;
uint16_t lhs=58989;
uint32_t rhs=5978342;
error+=check(2U,20UL);
error+=check(0xFFFF,0xFFFFFFFF);
error+=check(768U,565354767UL);
error+=check(26434U,566534767UL);
error+=check(26434U,690789UL);
error+=check(5678U,9767889UL);
error+=check(3674U,784367UL);
error+=check(0,690789ULL);
error+=check(0,0xFFFFFFFF);
error+=check(0xFFFF,0);
error+=check(0xFFFF,1);
error+=check(1,0xFFFFFFFF);
error+=check(0x2,0xAFFFFFFF);
multiply(&top,&bottom,lhs,rhs);
uint64_t result=encode64(top,bottom);
printf("%"PRIu16"*%"PRIu32"==%"PRIu64"\n",lhs,rhs,result);
if(error!=0){
printf("\nErrors=%d\n",error);
}
return error==0?EXIT_SUCCESS:EXIT_FAILURE;
}
答案 1 :(得分:1)
困难的部分是知道如何定义16位,32位和64位整数,因为它没有在C语言(*)的旧版本中指定...和int,long和long long并且没有明确定义方式。
假设你有int16_t,int32_t和int64_t,你可以做
int64_t product16_35(int16_t val1, int32_t val2) {
int64_t v1 = val1, v2 = val2, resul;
resul = v1 * v2; /* resul uses 48 bits on 64, with sign extended to 64 bits */
resul &= 0x00FFFFFFFFFFFFFF; /* truncate resul at 48 bits */
return resul;
}
(*)它是C99的一部分,仅在MSVC中提供&gt; 2010。
根据OP评论编辑
如果你想把结果放在一个16位整数(高位部分)和一个32位整数中,这里有一个小的变化:
struct int48 {
int16_t h;
uint32_t l; /* sign has no sense for lower part */
}
int48 product16_35(int16_t val1, int32_t val2) {
int48 res48;
int64_t v1 = val1, v2 = val2, resul;
resul = v1 * v2; /* resul uses 48 bits on 64, with sign extended to 64 bits */
resul &= 0x00FFFFFFFFFFFFFF; /* truncate resul at 48 bits */
res48.l = resul & 0xFFFFFFFF;
res48.h = (resul >> 32) & 0xFFFF;
return res48;
}
当然,我也可以手动使用产品16位* 32位,仅使用32位操作和移位。但是,当我要求它直接执行64位操作时,它肯定会比编译器的效率低。
答案 2 :(得分:0)
将16位寄存器符号扩展为32位寄存器,然后使用带符号的32位x 32位乘法。
答案 3 :(得分:0)
这是32 * 32 MUL。如果有人理解LLVM,那么对他们来说会有所帮助。对于16bit只是签名扩展然后这个功能。
/*static*/
enum bin2vm_status_codes bin2vm::IrModuleWriter::getSignedMul32_Result64bit(llvm::Value* tempFirstOp,llvm::Value* tempSecondOp,llvm::Value** result_Right32,llvm::Value** result_Left32, IRBuilder* irBuilder )
{
enum bin2vm_status_codes status = BIN2VM_STATUS_SUCCESS;
oef_debug_print(( "bin2vm::IrModuleWriter::getSignedMul32_Result64bit(): ENTERED\n" ));
llvm::Value* Op1IsNeg = nullptr;
llvm::Value* bool_Op1IsNeg = nullptr;
llvm::Value* Op2IsNeg = nullptr;
llvm::Value* bool_Op2IsNeg = nullptr;
llvm::ConstantInt* int32One = irBuilder->getInt32(1);
llvm::Value* finalResult_right32 = nullptr;
llvm::Value* bool_bothNeg = nullptr;
llvm::Value* firstOp_right = nullptr;
llvm::Value* firstOp_left = nullptr;
llvm::Value* secondOp_right = nullptr;
llvm::Value* secondOp_left = nullptr;
llvm::Value* partialProduct_0 = nullptr;
llvm::Value* partialProduct_1 = nullptr;
llvm::Value* partialProduct_2 = nullptr;
llvm::Value* partialProduct_3 = nullptr;
llvm::Value* partialProduct_1_left = nullptr;
llvm::Value* partialProduct_1_right = nullptr;
llvm::Value* partialProduct_2_left = nullptr;
llvm::Value* partialProduct_2_right = nullptr;
llvm::Value* sumPartial_temp = nullptr;
llvm::Value* sumPartial = nullptr;
llvm::Value* finalResult_left32 = nullptr;
llvm::Value* sumPartial_op1Neg = nullptr;
llvm::Value* sumPartial_op2Neg = nullptr;
llvm::Value* sumPartial_bothNeg = nullptr;
llvm::Value* bothNeg = nullptr;
//Mul operation
finalResult_right32 = irBuilder->CreateMul(tempFirstOp,tempSecondOp,"mulResult");
//Calculation for left 32 bits
//Can have a look at http://stackoverflow.com/questions/22845801/32-bit-signed-multiplication-without-using-64-bit-data-type
firstOp_right = irBuilder->CreateAnd(tempFirstOp,0x0000FFFF,"firstOp_right");
firstOp_left = irBuilder->CreateLShr(tempFirstOp, 16, "firstOp_left");
secondOp_right = irBuilder->CreateAnd(tempSecondOp,0x0000FFFF,"secondOp_right");
secondOp_left = irBuilder->CreateLShr(tempSecondOp, 16, "secondOp_left");
/* compute partial products */
partialProduct_0 = irBuilder->CreateMul(firstOp_right,secondOp_right,"partialProduct_0");
partialProduct_1 = irBuilder->CreateMul(firstOp_right,secondOp_left,"partialProduct_1");
partialProduct_2 = irBuilder->CreateMul(firstOp_left,secondOp_right,"partialProduct_2");
partialProduct_3 = irBuilder->CreateMul(firstOp_left,secondOp_left,"partialProduct_3");
partialProduct_0 = irBuilder->CreateLShr(partialProduct_0,16,"partialProduct_0");
partialProduct_1_left = irBuilder->CreateLShr(partialProduct_1,16,"partialProduct_1_left");
partialProduct_1_right = irBuilder->CreateAnd(partialProduct_1,0x0000FFFF, "partialProduct_1_right");
partialProduct_2_left = irBuilder->CreateLShr(partialProduct_2,16,"partialProduct_2_left");
partialProduct_2_right = irBuilder->CreateAnd(partialProduct_2,0x0000FFFF, "partialProduct_2_right");
//sumPartial_temp = ((p0 >> 16) + (uint16_t)p1 + (uint16_t)p2) >> 16
sumPartial_temp = irBuilder->CreateAdd(partialProduct_0,partialProduct_1_right,"sumPartial_temp");
sumPartial_temp = irBuilder->CreateAdd(sumPartial_temp,partialProduct_2_right,"sumPartial_temp");
sumPartial_temp = irBuilder->CreateLShr(sumPartial_temp,16,"sumPartial_temp");
// p3 + (p2 >> 16) + (p1 >> 16) + sumPartial_temp
sumPartial = irBuilder->CreateAdd(sumPartial_temp,partialProduct_3,"sumPartial");
sumPartial = irBuilder->CreateAdd(sumPartial,partialProduct_2_left,"sumPartial");
sumPartial = irBuilder->CreateAdd(sumPartial,partialProduct_1_left,"sumPartial");
//Now for signed Mul we look at sumPartial- ((op1 < 0) ? op2 : 0) - ((op2 < 0) ? op1 : 0)
sumPartial_op1Neg = irBuilder->CreateSub(sumPartial,tempSecondOp,"sumPartial_op1Neg");
sumPartial_op2Neg = irBuilder->CreateSub(sumPartial,tempFirstOp,"sumPartial_op2Neg");
sumPartial_bothNeg = irBuilder->CreateSub(sumPartial_op1Neg,tempFirstOp,"sumPartial_bothNeg");
//MUL signed adaptation
Op1IsNeg = irBuilder->CreateLShr(tempFirstOp,31,"bool_Op1IsNeg");
bool_Op1IsNeg = irBuilder->CreateICmpEQ(Op1IsNeg, int32One,"bool_Op1IsNeg");
Op2IsNeg = irBuilder->CreateLShr(tempSecondOp,31,"bool_Op2IsNeg");
bool_Op2IsNeg = irBuilder->CreateICmpEQ(Op2IsNeg, int32One,"bool_Op2IsNeg");
bothNeg = irBuilder->CreateAnd(Op1IsNeg,Op2IsNeg,"bothNeg");
bool_bothNeg = irBuilder->CreateICmpEQ(bothNeg,int32One,"bool_bothNeg");
//Resul left 32 bits
finalResult_left32 = irBuilder->CreateSelect(bool_Op1IsNeg,sumPartial_op1Neg,sumPartial);
finalResult_left32 = irBuilder->CreateSelect(bool_Op2IsNeg,sumPartial_op2Neg,finalResult_left32);
finalResult_left32 = irBuilder->CreateSelect(bool_bothNeg,sumPartial_bothNeg,finalResult_left32);
*result_Right32 = finalResult_right32;
*result_Left32 = finalResult_left32;
return status;
}