Question

我正在尝试用VHDL制作8位数组乘法器，我使用数组乘法器的标准架构来做到这一点，我有一个接收A（被乘数）和B（乘数）的BDF文件，并且这个BDF文件有一个名为“adder”的块，它从A和B的产品中得到总和。我遇到了和输出的问题，它显示的数字远低于正确的值。

上图是我的主要BDF。

上图显示了从and_arrays到加法器的de连接。

加法器代码：

LIBRARY IEEE;
USE IEEE.std_logic_1164.ALL;
USE IEEE.std_logic_unsigned.ALL;
use IEEE.std_logic_arith.ALL;
use IEEE.numeric_std.ALL;

ENTITY adder IS
  PORT (i_IN0   : IN  STD_LOGIC_VECTOR(7 downto 0);-- data input
        i_IN1   : IN  STD_LOGIC_VECTOR(7 downto 0);-- data input
        i_IN2   : IN  STD_LOGIC_VECTOR(7 downto 0);-- data input
        i_IN3   : IN  STD_LOGIC_VECTOR(7 downto 0);-- data input
        i_IN4   : IN  STD_LOGIC_VECTOR(7 downto 0);-- data input
        i_IN5   : IN  STD_LOGIC_VECTOR(7 downto 0);-- data input
        i_IN6   : IN  STD_LOGIC_VECTOR(7 downto 0);-- data input
        i_IN7   : IN  STD_LOGIC_VECTOR(7 downto 0);-- data input
        o_Q     : OUT STD_LOGIC_VECTOR(15 DOWNTO 0);
        o_COUT  : OUT STD_LOGIC);-- data output  
END adder;

architecture arch1 of adder is 
begin 
    process(i_IN0, i_IN1, i_IN2, i_IN3, i_IN4, i_IN5, i_IN6, i_IN7)
      variable soma:std_logic_vector(14 downto 0);
      variable aux0:std_logic_vector(14 downto 0);
      variable aux1:std_logic_vector(14 downto 0);
      variable aux2:std_logic_vector(14 downto 0);
      variable aux3:std_logic_vector(14 downto 0);
      variable aux4:std_logic_vector(14 downto 0);
      variable aux5:std_logic_vector(14 downto 0);
      variable aux6:std_logic_vector(14 downto 0);
      variable aux7:std_logic_vector(14 downto 0);
      variable c:std_logic; -- carry in
      BEGIN
         aux0(7 downto 0) := i_IN0; aux0(14 downto 8) := "0000000"; -- here i'm trying to shift the 8 nbit input value
         aux1(0) := '0'; aux1(8 downto 1) := i_IN1; aux1(14 downto 9) := "000000"; -- to a 15 bit value to the sums
         aux2(1 downto 0) := "00";aux2(9 downto 2) := i_IN2; aux2(14 downto 10) := "00000"; -- looking to the array multiplier
         aux3(2 downto 0) := "000";aux3(10 downto 3) := i_IN3; aux3(14 downto 11) := "0000"; -- architecture, it seem to be
         aux4(3 downto 0) := "0000";aux4(11 downto 4) := i_IN4; aux4(14 downto 12) := "000"; -- correct
         aux5(4 downto 0) := "00000";aux5(12 downto 5) := i_IN5; aux5(14 downto 13) := "00";
         aux6(5 downto 0) := "000000"; aux6(13 downto 6) := i_IN6; aux6(14) := '0';
         aux7(6 downto 0) := "0000000"; aux7(14 downto 7) := i_IN7;
    -- below the loop that make the sums bit-in-bit, i've addapted this code from a 8 bit adder
         c := '0';
            for i in 0 to 14 loop
                soma(i) := aux0(1) xor aux1(i) xor aux2(i) xor aux3(i) xor aux4(i) xor aux5(i) xor aux6(i) xor aux7(i) xor c;
                c := (aux0(i) and aux1(i) and aux2(i) and aux3(i) and aux4(i) and aux5(i) and aux6(i) and aux7(i)) or ((aux0(i) xor aux1(i) xor aux2(i) xor aux3(i) xor aux4(i) xor aux5(i) xor aux6(i) xor aux7(i)) and c);
            end loop;
            o_COUT <= c; 
            o_Q(15) <= c; -- carry out atribuition to the last bit of the vector
            o_Q(14 downto 0) <= soma;
    end process;
end arch1;

AND ARRAY CODE：

LIBRARY IEEE;
USE IEEE.std_logic_1164.ALL;
USE IEEE.std_logic_unsigned.ALL;
use IEEE.std_logic_arith.ALL;

ENTITY and_array IS
  PORT (i_MULTIPLICANDO   : IN  STD_LOGIC_VECTOR(7 downto 0);-- data input
        i_MULTIPLICADOR   : IN  STD_LOGIC;-- data input
        o_Q   : OUT STD_LOGIC_VECTOR(7 DOWNTO 0));-- data output
END and_array;

ARCHITECTURE arch_1 OF and_array IS
    BEGIN
        GEN_REG: 
        FOR i IN 0 TO 7 GENERATE
            o_Q(i) <= i_MULTIPLICADOR and i_MULTIPLICANDO(i); 
        END GENERATE GEN_REG;
END arch_1;

我已经对这两个块进行了模拟，并且数组块工作正常，但错误出现在加法器模拟中，位于模拟图像下方：

我正在模拟以下输入：

        Multiplicand = 1 1 1 1 0 1 0 1 (245 dec)
        Multiplier   = 1 0 1 0 1 1 1 1 (175 dec)

此产品的正确值为42785，在我的模拟中显示为24899

我在加法器中使用以下逻辑：

                   1 1 1 1 0 1 0 1
                   1 0 1 0 1 1 1 1
                   -----------------
      x x x x x x x 1 1 1 1 0 1 0 1     p1
      x x x x x x 1 1 1 1 0 1 0 1 x     p2
      x x x x x 1 1 1 1 0 1 0 1 x x     p3
      x x x x 1 1 1 1 0 1 0 1 x x x     p4 
      x x x 0 0 0 0 0 0 0 0 x x x x     p5
      x x 1 1 1 1 0 1 0 1 x x x x x     p6
      x 0 0 0 0 0 0 0 0 x x x x x x     p7
      1 0 1 0 1 1 1 1 x x x x x x x     p8
_________________________________________

在加法器块中，我将0的所有“X”改为总和（如移位）。在加法器块的循环中，p1到p8的总和是bit-in-bit。可能是这个循环或进位中的错误，但我试图从多种形式做到这一点并且总是出错。

有人知道如何解决这个问题吗？

下面是主电路抽象图像：

Answer 1

此问题的解决方案是使用半加法器和完整加法器代替上述问题中的加法器。

使用了下面的逻辑：

完全使用了：

64 AND门; 8个半加法器; 48个全加器。

Answer 2

这种行为模型源自Vahid的幻灯片63：

library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

entity multiplier_array is
    port (
        a:  in  std_logic_vector  (7 downto 0);
        b:  in  std_logic_vector  (7 downto 0);
        p:  out std_logic_vector (15 downto 0)
    );
end entity;

architecture and_array of multiplier_array is

    signal b0:          unsigned (7 downto 0);
    signal b1:          unsigned (7 downto 0);
    signal b2:          unsigned (7 downto 0);
    signal b3:          unsigned (7 downto 0);
    signal b4:          unsigned (7 downto 0);
    signal b5:          unsigned (7 downto 0);
    signal b6:          unsigned (7 downto 0);
    signal b7:          unsigned (7 downto 0);

    signal pp1:         unsigned (7 downto 0);
    signal pp2:         unsigned (7 downto 0);   
    signal pp3:         unsigned (7 downto 0); 
    signal pp4:         unsigned (7 downto 0);
    signal pp5:         unsigned (7 downto 0);
    signal pp6:         unsigned (7 downto 0);   
    signal pp7:         unsigned (7 downto 0); 
    signal pp8:         unsigned (7 downto 0);

    signal s:           unsigned (15 downto 0);

begin

    -- AND Array Multiplies

    b0 <= (others => b(0));
    b1 <= (others => b(1));
    b2 <= (others => b(2));
    b3 <= (others => b(3));
    b4 <= (others => b(4));
    b5 <= (others => b(5));
    b6 <= (others => b(6));
    b7 <= (others => b(7));

    pp1 <= unsigned (a) and b0;

    pp2 <= unsigned (a) and b1;

    pp3 <= unsigned (a) and b2;

    pp4 <= unsigned (a) and b3;

    pp5 <= unsigned (a) and b4;

    pp6 <= unsigned (a) and b5;

    pp7 <= unsigned (a) and b6;

    pp8 <= unsigned (a) and b7;

    -- Sum

    s <=   ( '0' & pp8 & "0000000") -- lead '0' for 16 bit sum
         + (       pp7 & "000000" ) -- "+" takes sum width from left operand
         + (       pp6 & "00000"  ) -- adds are in left to right order so 16 bit
         + (       pp5 & "0000"   ) 
         + (       pp4 & "000"    )  
         + (       pp3 & "00"     )
         + (       pp2 & '0'      )
         +         pp1;

    p <= std_logic_vector (s); 

end architecture;

library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

entity mult_tb is
end entity;

architecture foo of mult_tb is
    signal a, b:    std_logic_vector (7 downto 0) := (others =>'0');
    signal p:       std_logic_vector (15 downto 0);

begin
DUT:
    entity work.multiplier_array
        port map (
            a => a,
            b => b,
            p => p
        );

STIMULI:
    process
    begin
        wait for 10 ns;
        a <= std_logic_vector (to_unsigned(245, a'length));
        b <= std_logic_vector (to_unsigned(175, b'length));
        wait for 10 ns;
        wait;
    end process;

MONITOR:
    process (p)
    begin
        if now > 9 ns then
            report "a = " & natural'image (to_integer(unsigned(a)));
            report "b = " & natural'image (to_integer(unsigned(b)));
            report "product = " & natural'image (to_integer(unsigned(p)));
            report "expected product = " & 
                    natural'image (to_integer(unsigned(a) * unsigned(b)));
        end if;
    end process;

end architecture;

加法是关联的，它允许反转加法的顺序，允许最宽的操作数作为第一次加法的左操作数给出。 “+”运算符的最左操作数的长度定义结果长度约束，设置剩余添加的宽度。请注意，连接操作与添加运算符的优先级相同，并且括在括号中以防止边界错误。

运行包含的测试平台mult_tb时：

multiplier_array.vhdl：113：13：@ 10ns :(报告单）：a = 245
  multiplier_array.vhdl：114：13：@ 10ns :(报告单）：b = 175
  multiplier_array.vhdl：115：13：@ 10ns :(报告单）：product = 42875
  multiplier_array.vhdl：116：13：@ 10ns :(报告单）：预期产品= 42875

行为模型应该提供与综合后的答案相同的结构，并指出将进行优化。

此外，VHDL -2008支持的逻辑运算符允许消除信号b0到b8。

IEEE Std 1076-2008,9.2运算符，9.2.2逻辑运算符，第3段：

如果两个操作数都是一维数组，则操作数应为相同长度的数组，操作在数组的匹配元素上执行，结果是与左操作数具有相同索引范围的数组。如果一个操作数是标量而另一个操作数是一维数组，则使用数组操作数的每个元素对标量操作数执行操作。结果是一个与数组操作数具有相同索引范围的数组。

这允许VHDL -2008的模型稍微复杂一点。软件包std_logic_1164和numeric_std的-2008版本都支持标量/向量逻辑运算。

受到“仅1加法器”评论的启发，可以使用汉明加权（例如popcount行为函数）生成正确的结果。

在阅读了关于FPGA的VHDL中的Hamming权重实现，并勾画出向量后，需要8个不同的加权网络（长度为3,4,5,6,7,8,9和10），多个实例用于长度为4,7,8和10。

与部分产品加法器相比，汉明重量不会出现面积或传播延迟的竞争。如果您要以易于理解的术语定义加权网络，那么在合成期间吃掉大量的CPU周期后，最终可能会出现部分产品加法器。非幂的2的汉明加权函数似乎也会导致低效的进位传播。

这是一个实现汉明加权的架构：

architecture popcount of multiplier_array is

    function popcount (inp: in  std_logic_vector; len: in natural)  
            return std_logic_vector is
        variable r: integer := 0;
        variable result:  std_logic_vector (len - 1 downto 0);

    begin
        for i in inp'range loop
            if inp(i) = '1' then
                r := r + 1;
            elsif inp(i) /= '0' and now > 0 ns then
                report "WARNING: function popcount encountered metavalue " &
                        "returned zero";
                r := 0;
                exit;                
            end if;
        end loop;
        return std_logic_vector (to_unsigned(r,result'length));
    end function;

    signal b0:          unsigned (7 downto 0);
    signal b1:          unsigned (7 downto 0);
    signal b2:          unsigned (7 downto 0);
    signal b3:          unsigned (7 downto 0);
    signal b4:          unsigned (7 downto 0);
    signal b5:          unsigned (7 downto 0);
    signal b6:          unsigned (7 downto 0);
    signal b7:          unsigned (7 downto 0);

    signal pp1:         unsigned (7 downto 0);
    signal pp2:         unsigned (7 downto 0);   
    signal pp3:         unsigned (7 downto 0); 
    signal pp4:         unsigned (7 downto 0);
    signal pp5:         unsigned (7 downto 0);
    signal pp6:         unsigned (7 downto 0);   
    signal pp7:         unsigned (7 downto 0); 
    signal pp8:         unsigned (7 downto 0);

    signal c1:          std_logic;

    signal sum2:        std_logic_vector(2 downto 0);
    signal sum3:        std_logic_vector(2 downto 0);
    signal sum4:        std_logic_vector(2 downto 0);
    signal sum5:        std_logic_vector(3 downto 0);
    signal sum6:        std_logic_vector(3 downto 0);
    signal sum7:        std_logic_vector(3 downto 0);
    signal sum8:        std_logic_vector(3 downto 0);
    signal sum9:        std_logic_vector(3 downto 0);
    signal sum10:       std_logic_vector(3 downto 0);
    signal sum11:       std_logic_vector(2 downto 0);
    signal sum12:       std_logic_vector(2 downto 0);
    signal sum13:       std_logic_vector(2 downto 0);
    signal sum14:       std_logic_vector(1 downto 0);

    signal c14:         std_logic;
    signal c15:         std_logic;

begin

    -- AND Array Multiplies

    b0 <= (others => b(0));
    b1 <= (others => b(1));
    b2 <= (others => b(2));
    b3 <= (others => b(3));
    b4 <= (others => b(4));
    b5 <= (others => b(5));
    b6 <= (others => b(6));
    b7 <= (others => b(7));

    pp1 <= unsigned (a) and b0;

    pp2 <= unsigned (a) and b1;

    pp3 <= unsigned (a) and b2;

    pp4 <= unsigned (a) and b3;

    pp5 <= unsigned (a) and b4;

    pp6 <= unsigned (a) and b5;

    pp7 <= unsigned (a) and b6;

    pp8 <= unsigned (a) and b7;


    p(0) <= pp1(0);   -- nothing to do here
    p(1) <= pp1(1) xor pp2(0);
    c1   <= pp1(1) and pp2(0);  -- single carry bit

    sum2 <= popcount ((pp1(2), pp2(1), pp3(0), c1), sum2'length);

    p(2) <= sum2(0);

    sum3 <= popcount ((pp1(3), pp2(2), pp3(1), pp4(0), sum2(1)), sum3'length); 

    p(3) <= sum3(0);

    sum4 <= popcount ((pp1(4), pp2(3), pp3(2), pp4(1), pp5(0), 
                       sum2(2), sum3(1)), sum4'length);

    p(4) <= sum4(0);

    sum5 <= popcount ((pp1(5), pp2(4), pp3(3), pp4(2), pp5(1), pp6(0),
                       sum3(2), sum4(1)), sum5'length);

    p(5) <= sum5(0);

    sum6 <= popcount ((pp1(6), pp2(5), pp3(4), pp4(3), pp5(2), pp6(1), pp7(0),
                        sum4(2), sum5(1)), sum6'length);

    p(6) <= sum6(0);

    sum7 <= popcount ((pp1(7), pp2(6), pp3(5), pp4(4), pp5(3), pp6(2), pp7(1),
                       pp8(0), sum5(2), sum6(1)), sum7'length);

    p(7) <= sum7(0);

    sum8 <= popcount ((pp2(7), pp3(6), pp4(5), pp5(4), pp6(3), pp7(2), pp8(1),
                      sum5(3), sum6(2), sum7(1)), sum8'length);

    p(8) <= sum8(0);

    sum9 <= popcount ((pp3(7), pp4(6), pp5(5), pp6(4), pp7(3), pp8(2),
                      sum6(3), sum7(2), sum8(1)), sum9'length);

    p(9) <= sum9(0);   

    sum10 <= popcount ((pp4(7), pp5(6), pp6(5), pp7(4), pp8(3), 
                      sum7(3), sum8(2), sum9(1)), sum10'length);

    p(10) <= sum10(0);   

    sum11 <= popcount ((pp5(7), pp6(6), pp7(5), pp8(4),
                      sum8(3), sum9(2), sum10(1)), sum11'length);

    p(11) <= sum11(0);       

    sum12 <= popcount ((pp6(7), pp7(6), pp8(5), 
                      sum9(3), sum10(2), sum11(1)), sum12'length);

    p(12) <= sum12(0); 

    sum13 <= popcount ((pp7(7), pp8(6),  
                      sum11(2), sum12(1)), sum13'length);

    p(13) <= sum12(1);    

    sum14 <= popcount ((pp8(7),  
                      sum12(2), sum13(1)), sum14'length);

    p(14)  <= sum14(0);


    p(15) <=  sum13(2) xor sum14(1);

    c15 <= sum13(2) and sum14(1);  -- overflow

end architecture;

multiplier_array.vhdl：277：13：@ 11ns :(报告单）：a = 245
  multiplier_array.vhdl：278：13：@ 11ns :(报告单）：b = 175
  multiplier_array.vhdl：279：13：@ 11ns :(报告单）：product = 42875
  multiplier_array.vhdl：280：13：@ 11ns :(报告单）：预期产品= 42875

它给出了相同的答案。

已修改测试平台中的监视器进程以跳过产品的中间值：

MONITOR:
    process 
    begin
            wait for 11 ns;
            report "a = " & natural'image (to_integer(unsigned(a)));
            report "b = " & natural'image (to_integer(unsigned(b)));
            report "product = " & natural'image (to_integer(unsigned(p)));
            report "expected product = " & 
                    natural'image (to_integer(unsigned(a) * unsigned(b)));
            wait;
    end process;

另请注意，因为有两个输入对最左边的产品位有贡献，我生成了一个可用于检测错误的进位（溢出）。

Answer 3

我认为主要问题在于用于进行求和的按位代码：

        for i in 0 to 14 loop
            soma(i) := aux0(1) xor aux1(i) xor aux2(i) xor aux3(i) xor aux4(i) xor aux5(i) xor aux6(i) xor aux7(i) xor c;
            c := (aux0(i) and aux1(i) and aux2(i) and aux3(i) and aux4(i) and aux5(i) and aux6(i) and aux7(i)) or ((aux0(i) xor aux1(i) xor aux2(i) xor aux3(i) xor aux4(i) xor aux5(i) xor aux6(i) xor aux7(i)) and c);
        end loop;

c这里只有一位，相当不足以作为9个项的进位（8个辅助位和进位，所以当我们充分扩展进位时，这反馈产生更多的项）。如果我们假设这九个位足够，你仍然会得到高达9的值，这需要四位二进制，因此至少有三位进位。一旦你有超过三个学期，就会发生这种情况;全加器使用其输出的所有值从1 + 1 + 1到2 + 1（一位具有更高权重）求和。由于减少了它们可以在层中链接，但是更大的加法器必须使用更多的输出位（例如，5个输入加法器可以产生2 + 2 + 1，7位加法器可以用于产生4+ 2 + 1）;如果我们计算数组乘法器原理图中列之间的进位线，我们就会看到这一点。由于因子中存在多个位，因此中间列的数量较多。

8位阵列乘法器VHDL（输出错误）

3 个答案: