我无法理解如何使用for-loop在FPGA中最好地复制一些C代码(不是我第一次被困在这上面)。
C代码片段如下所示:
dot_product(&corr_sum, &sample_data_buffer[sample_index+d_circ_buf_size-sync_pattern_size], &sync_pattern[0], sync_pattern_size);
abs_corr_sum += abs(corr_sum);
非常直截了当,它采用两个复杂向量的点积并对其进行累积求和。
他试图复制它:
always @(sample_index)
begin
// for each incoming sample
abs_corr_sum = 64'd0;
corr_sum = 64'd0;
for (index2 = 0; index2 < sync_pattern_size; index2 = index2 + 1'b1)
begin
corr_sum = sample_data_buffer_I[index2+sample_index+circ_buf_size-sync_pattern_size] * sync_pattern_I[index2]
+ sample_data_buffer_Q[index2+sample_index+circ_buf_size-sync_pattern_size] * sync_pattern_Q[index2];
//this is my quick and dirty abs(corr_sum) summer
abs_corr_sum = (corr_sum < 0) ? abs_corr_sum + ~$signed(corr_sum)+1 : abs_corr_sum + corr_sum;
end // for (index2 = 0; index2 < sync_pattern_size; index2 = index2 + 1'b1)
end //always @(sample_index)
这看起来是对的吗?我没有得到我期待的结果;虽然问题可能在其他地方,但我认为这一部分是最可能的罪魁祸首。
答案 0 :(得分:2)
要将来自带有循环,条件等的算法的代码转换为Verilog的可综合形式,您需要将其转换为FSM。
例如,你要求做的类似的for循环将是:
int sample_I[N], sync_I[N]; // assume 32-bit ints, 2-complement numbers.
int sample_Q[N], sync_Q[N];
int i, corsum, abscorsum = 0;
for (i=0;i<N;i++)
{
corsum = sample_I[i] * sync_I[i] + sample_Q[i] * sync_Q[i];
abscorsum += abs(corsum);
}
首先,将句子分组到时隙中,这样您就可以看到哪些动作可以在同一个时钟周期内完成(相同的状态),并为每个插槽分配一个状态:
1)
i = 0
abscorsum = 0
goto 2)
2)
if i!=N
corsum = sample_I[i] * sync_I[i]
goto 3)
else
goto 5)
3)
corsum = corsum + sample_Q[i] * sync_Q[i]
i = i + 1
goto 4)
4)
if (corsum >= 0)
abscorsum = abscorsum + corsum
else
abscorsum = abscorsum + (-corsum)
goto 2)
5)
STOP
状态2和3可以合并为单个状态,但这会迫使合成器推断出两个乘法器,此外,所得到的组合路径的传播延迟可能非常高,限制了此设计允许的时钟频率。因此,我将点积计算分为两部分,每部分使用单个乘法运算。如果指令合成器可以使用一个乘法器并将其共享用于两个操作,因为两者都发生在不同的时钟周期中。
转换为此模块: http://www.edaplayground.com/x/MEG
信号rst
用于指示模块开始操作。模块引发finish
以表示操作结束和输出有效性(abscorrsum
)
sample_I
,sync_i
,sample_Q
和sync_Q
使用内存块建模,i
是要读取的元素的地址。大多数合成器将推断这些向量的块RAM,因为它们中的每一个都只在一个状态下读取,并且始终具有相同的地址信号。
module corrdotprod #(N=4) (
input wire clk,
input wire rst,
output reg [31:0] i,
input wire [31:0] sample_i,
input wire [31:0] sync_i,
input wire [31:0] sample_q,
input wire [31:0] sync_q,
output reg [31:0] abscorrsum,
output reg finish
);
parameter
STATE1 = 3'd1,
STATE2 = 3'd2,
STATE3 = 3'd3,
STATE4 = 3'd4,
STATE5 = 3'd5;
reg [31:0] corrsum;
reg [2:0] state;
always @(posedge clk) begin
if (rst == 1'b1) begin
state <= STATE1;
end
else begin
case (state)
STATE1:
begin
i <= 0;
abscorrsum <= 0;
finish <= 1'b0;
state <= STATE2;
end
STATE2:
begin
if (i!=N) begin
corrsum <= sample_i * sync_i; // synthesizer deals with multiplication
state <= STATE3;
end
else begin
state <= STATE5;
end
end
STATE3:
begin
corrsum <= corrsum + sample_q * sync_q; // this product can share the multiplier as above
i <= i + 1;
state <= STATE4;
end
STATE4:
begin
if (corrsum[31] == 1'b0) // remember: 2-complement
abscorrsum <= abscorrsum + corrsum;
else
abscorrsum <= abscorrsum + (~corrsum+1);
state <= STATE2;
end
STATE5:
finish <= 1'b1;
endcase
end
end
endmodule
可以使用这个简单的测试平台进行测试:
module tb;
reg clk;
reg rst;
reg [31:0] sample_i[0:3];
reg [31:0] sync_i[0:3];
reg [31:0] sample_q[0:3];
reg [31:0] sync_q[0:3];
wire [31:0] i;
wire [31:0] abscorrsum;
corrdotprod #(.N(4)) uut (clk, rst, i, sample_i[i], sync_i[i], sample_q[i], sync_q[i], abscorrsum, finish);
integer tb_i, tb_corrsum, tb_abscorrsum;
initial begin
$dumpfile ("dump.vcd");
$dumpvars (0, tb.uut);
sample_i[0] = 1;
sample_i[1] = 2;
sample_i[2] = 3;
sample_i[3] = 4;
sync_i[0] = 2;
sync_i[1] = -2;
sync_i[2] = 2;
sync_i[3] = -2;
sample_q[0] = -1;
sample_q[1] = -2;
sample_q[2] = -3;
sample_q[3] = -4;
sync_q[0] = 3;
sync_q[1] = -3;
sync_q[2] = 3;
sync_q[3] = -3;
clk = 0;
rst = 1;
#30;
rst = 0;
wait (finish == 1);
$display ("ABSCORRSUM = %d\n", abscorrsum);
// Testing result from module
tb_abscorrsum = 0;
for (tb_i = 0; tb_i < 4; tb_i = tb_i + 1) begin
tb_corrsum = sample_i[tb_i] * sync_i[tb_i] + sample_q[tb_i] * sync_q[tb_i];
if (tb_corrsum<0)
tb_corrsum = -tb_corrsum;
tb_abscorrsum = tb_abscorrsum + tb_corrsum;
end
$display ("TB_ABSCORRSUM = %d\n", tb_abscorrsum);
$finish;
end
always begin
clk = #5 !clk;
end
endmodule