更改代码以使用Block RAM

时间:2017-06-03 21:06:00

标签: verilog

我想在Verilog中实现以下Matlab代码:

symBuf = [symBuf(numFFT/2+1:end); zeros(numFFT/2,1)];
symBuf(KF+(1:KF)) = symBuf(KF+(1:KF)) + txSymb;

这是一个简单的重叠和添加操作。

这是我的实施:

module overlap
#(K = 3,
FFT = 128    
)
(
input signed [15:0] symbInReal      ,
input signed [15:0] symbInImag      ,
input clock                         ,
input reset                         ,
input readyIn                       ,
input validIn                       ,
input lastIn                        ,
output signed [15:0] outReal        ,
output signed [15:0] outImag        ,
output reg lastOut                  ,
output wire readyOut                ,
output reg validOut
);

reg signed [15:0] previousSymbolReal [2*FFT*K-1:0]      ;
reg signed [15:0] previousSymbolImag [2*FFT*K-1:0]      ;
reg signed [15:0] txSymbolBuffReal [K*FFT-1:0]          ;
reg signed [15:0] txSymbolBuffImag [K*FFT-1:0]          ;
reg [15:0] counter                                      ;
reg [1:0] state                                         ;
reg [3:0] nextstate                                     ;
reg [15:0] clockcount                                   ;
reg signed [15:0] outputValueReal                       ;
reg signed [15:0] outputValueImag                       ;
reg [15:0] buffcount                                    ;
reg [7:0] symboutcount                                  ;
reg [7:0] symbincount                                   ;
reg last                                                ;
reg lastvalidout                                        ;

wire lastout                                            ;

  integer i;
  initial begin 
    for (i=0; i<2*FFT*K ; i = i + 1) begin
            previousSymbolReal[i] = 0;
            previousSymbolImag[i] = 0;
    end
end  

always@(posedge clock) begin
    if(~reset) begin
        counter <= 0;
    end else begin
        counter <= counter +1;
        if(nextstate != state)
            counter <= 0;
    end
end

always@(*) begin
    if(~reset) begin
        nextstate = 0;
    end else begin 
        nextstate = state;
        if(readyIn) begin
            case(state)
                4'd0: begin
                        if(validIn || last) begin
                            nextstate = 1;
                        end 
                      end
                4'd1: begin 
                          if (counter == (FFT*K-2)) begin
                            nextstate = 2;
                          end
                  end
                4'd2: begin
                            nextstate = 0;          
                      end
            endcase
        end
    end
end

always@(posedge clock) begin
    if(~reset) begin
        state <= 0;
    end else begin
        if(readyIn)
            state <= nextstate;
    end
end

always@(posedge clock) begin
    if(~reset) begin
        clockcount <= 0;
        symboutcount <= 0;
        lastOut <= 0;
    end else begin
        if(readyIn) begin
            clockcount <= clockcount +1 ;    
            case(state)
                4'd0: begin
                        validOut <= 0;
                        clockcount <= 0;
                        lastOut <= 0;
                      end
                4'd1: begin
                        if(~lastvalidout) 
                            validOut <= 1;
                        outputValueReal <= previousSymbolReal[clockcount+ FFT/2];           
                        outputValueImag <= previousSymbolImag[clockcount+ FFT/2];           
                      end
                4'd2: begin
                        outputValueReal <= previousSymbolReal[clockcount + FFT/2];
                        outputValueImag <= previousSymbolImag[clockcount + FFT/2];
                        clockcount <= 0;
                        if(~lastvalidout)
                            validOut <= 1;
                        if(symboutcount == symbincount + 1 && last)
                            lastOut <= 1;
                        symboutcount <= symboutcount +1 ;
                      end
             endcase   
         end
     end
 end

assign readyOut = readyIn;

 genvar M; 
 generate
    for(M=0;M<K*FFT;M=M+1) begin
        always@(posedge clock) begin
            if(state==2) begin
                previousSymbolReal[M] <= previousSymbolReal[M+FFT/2];
                previousSymbolImag[M] <= previousSymbolImag[M+FFT/2];
            end
        end
    end
    for(M=K*FFT;M<2*K*FFT-FFT/2;M=M+1) begin
        always@(posedge clock) begin
            if(state==2) begin
                previousSymbolReal[M] <= previousSymbolReal[M+FFT/2]+txSymbolBuffReal[M-K*FFT];
                previousSymbolImag[M] <= previousSymbolImag[M+FFT/2]+txSymbolBuffImag[M-K*FFT];
            end
        end
    end
    for(M=2*K*FFT-FFT/2;M<2*K*FFT;M=M+1) begin
        always@(posedge clock) begin
            if(state==2) begin
                previousSymbolReal[M] <= txSymbolBuffReal[M-K*FFT];
                previousSymbolImag[M] <= txSymbolBuffImag[M-K*FFT];
            end
        end
    end
 endgenerate

 always@(posedge clock) begin
    if(~reset) begin
        buffcount <= 0;
        symbincount <= 0;
        last <= 0;
    end else begin
        if(validIn) begin 
            txSymbolBuffReal[buffcount] <= symbInReal;
            txSymbolBuffImag[buffcount] <= symbInImag;
            buffcount <= buffcount +1;
            if(buffcount == K*FFT-1) begin
                symbincount <= symbincount + 1;
                buffcount <= 0;
            end
            if(lastIn) 
                last <= 1;
        end
    end
 end 

 always@(posedge clock) begin
    if(~reset) 
        lastvalidout <= 0;
    else begin
        if(last && lastOut)
            lastvalidout <= 1;
    end
 end


 assign outReal = outputValueReal;
 assign outImag = outputValueImag;

endmodule

这里的问题是我有4个巨大的阵列,占用了我FPGA中的4倍。 因此,我希望能够使用Block RAM。但是,由于执行的读写操作次数,我认为不可能。

有人有解决方案吗?

1 个答案:

答案 0 :(得分:0)

  

但是,由于执行的读写操作次数,我认为不可能。

正确。至少,并非没有对您的设计进行重大更改。

典型的Block RAM元素每个时钟周期只能读取或写入一个(或有时两个)值,但是generate循环正在尝试一次更新RAM中的每个元素!

要使此操作使用块RAM,您需要实现状态机以在每个时钟周期更新一个元素,并对其他状态进行排序操作,直到更新完成。

如果要加速此操作,您可以将阵列拆分为多个Block RAM,以便可以并行更新多个值。 (您需要仔细考虑需要读/写哪些元素以避免冲突。)