我想在Verilog中实现以下Matlab代码:
symBuf = [symBuf(numFFT/2+1:end); zeros(numFFT/2,1)];
symBuf(KF+(1:KF)) = symBuf(KF+(1:KF)) + txSymb;
这是一个简单的重叠和添加操作。
这是我的实施:
module overlap
#(K = 3,
FFT = 128
)
(
input signed [15:0] symbInReal ,
input signed [15:0] symbInImag ,
input clock ,
input reset ,
input readyIn ,
input validIn ,
input lastIn ,
output signed [15:0] outReal ,
output signed [15:0] outImag ,
output reg lastOut ,
output wire readyOut ,
output reg validOut
);
reg signed [15:0] previousSymbolReal [2*FFT*K-1:0] ;
reg signed [15:0] previousSymbolImag [2*FFT*K-1:0] ;
reg signed [15:0] txSymbolBuffReal [K*FFT-1:0] ;
reg signed [15:0] txSymbolBuffImag [K*FFT-1:0] ;
reg [15:0] counter ;
reg [1:0] state ;
reg [3:0] nextstate ;
reg [15:0] clockcount ;
reg signed [15:0] outputValueReal ;
reg signed [15:0] outputValueImag ;
reg [15:0] buffcount ;
reg [7:0] symboutcount ;
reg [7:0] symbincount ;
reg last ;
reg lastvalidout ;
wire lastout ;
integer i;
initial begin
for (i=0; i<2*FFT*K ; i = i + 1) begin
previousSymbolReal[i] = 0;
previousSymbolImag[i] = 0;
end
end
always@(posedge clock) begin
if(~reset) begin
counter <= 0;
end else begin
counter <= counter +1;
if(nextstate != state)
counter <= 0;
end
end
always@(*) begin
if(~reset) begin
nextstate = 0;
end else begin
nextstate = state;
if(readyIn) begin
case(state)
4'd0: begin
if(validIn || last) begin
nextstate = 1;
end
end
4'd1: begin
if (counter == (FFT*K-2)) begin
nextstate = 2;
end
end
4'd2: begin
nextstate = 0;
end
endcase
end
end
end
always@(posedge clock) begin
if(~reset) begin
state <= 0;
end else begin
if(readyIn)
state <= nextstate;
end
end
always@(posedge clock) begin
if(~reset) begin
clockcount <= 0;
symboutcount <= 0;
lastOut <= 0;
end else begin
if(readyIn) begin
clockcount <= clockcount +1 ;
case(state)
4'd0: begin
validOut <= 0;
clockcount <= 0;
lastOut <= 0;
end
4'd1: begin
if(~lastvalidout)
validOut <= 1;
outputValueReal <= previousSymbolReal[clockcount+ FFT/2];
outputValueImag <= previousSymbolImag[clockcount+ FFT/2];
end
4'd2: begin
outputValueReal <= previousSymbolReal[clockcount + FFT/2];
outputValueImag <= previousSymbolImag[clockcount + FFT/2];
clockcount <= 0;
if(~lastvalidout)
validOut <= 1;
if(symboutcount == symbincount + 1 && last)
lastOut <= 1;
symboutcount <= symboutcount +1 ;
end
endcase
end
end
end
assign readyOut = readyIn;
genvar M;
generate
for(M=0;M<K*FFT;M=M+1) begin
always@(posedge clock) begin
if(state==2) begin
previousSymbolReal[M] <= previousSymbolReal[M+FFT/2];
previousSymbolImag[M] <= previousSymbolImag[M+FFT/2];
end
end
end
for(M=K*FFT;M<2*K*FFT-FFT/2;M=M+1) begin
always@(posedge clock) begin
if(state==2) begin
previousSymbolReal[M] <= previousSymbolReal[M+FFT/2]+txSymbolBuffReal[M-K*FFT];
previousSymbolImag[M] <= previousSymbolImag[M+FFT/2]+txSymbolBuffImag[M-K*FFT];
end
end
end
for(M=2*K*FFT-FFT/2;M<2*K*FFT;M=M+1) begin
always@(posedge clock) begin
if(state==2) begin
previousSymbolReal[M] <= txSymbolBuffReal[M-K*FFT];
previousSymbolImag[M] <= txSymbolBuffImag[M-K*FFT];
end
end
end
endgenerate
always@(posedge clock) begin
if(~reset) begin
buffcount <= 0;
symbincount <= 0;
last <= 0;
end else begin
if(validIn) begin
txSymbolBuffReal[buffcount] <= symbInReal;
txSymbolBuffImag[buffcount] <= symbInImag;
buffcount <= buffcount +1;
if(buffcount == K*FFT-1) begin
symbincount <= symbincount + 1;
buffcount <= 0;
end
if(lastIn)
last <= 1;
end
end
end
always@(posedge clock) begin
if(~reset)
lastvalidout <= 0;
else begin
if(last && lastOut)
lastvalidout <= 1;
end
end
assign outReal = outputValueReal;
assign outImag = outputValueImag;
endmodule
这里的问题是我有4个巨大的阵列,占用了我FPGA中的4倍。 因此,我希望能够使用Block RAM。但是,由于执行的读写操作次数,我认为不可能。
有人有解决方案吗?
答案 0 :(得分:0)
但是,由于执行的读写操作次数,我认为不可能。
正确。至少,并非没有对您的设计进行重大更改。
典型的Block RAM元素每个时钟周期只能读取或写入一个(或有时两个)值,但是generate
循环正在尝试一次更新RAM中的每个元素!
要使此操作使用块RAM,您需要实现状态机以在每个时钟周期更新一个元素,并对其他状态进行排序操作,直到更新完成。
如果要加速此操作,您可以将阵列拆分为多个Block RAM,以便可以并行更新多个值。 (您需要仔细考虑需要读/写哪些元素以避免冲突。)