2022年1月23日 星期日

FPGA加減乘除速度對比


本文大略測試FPGA的執行速度,模擬的FPGA為DE2 115的Cyclone IV EP4CE115F29C7。

測試的運算速度分別有加、減、乘、除,其中,除法計算的部分,又區分為直接除法以及利用倒數把除法運算改為乘法運算。

Verilog 程式碼如下。有三個輸入A, B, C,運算有加Add(A+B)、減Sub(A-B)、乘Mul(A*B)、除Div(A*B/C)。其中,輸入C經過module reciprocal化為32位元倒數1/C(該數值除2^31為實際數值大小,也就是該值放大2^31倍。)

  1. module counter(
  2. input [7:0] iA, iB, iC,

  3. output [7:0] oAdd, oSub,
  4. output [15:0] oMul, 
  5. output [15:0] oDiv,
  6. output [7:0] oDiv_mul,
  7. output [31:0] ot_div,
  8. output [31:0] ot_reciprocal_C
  9. );

  10. wire [31:0] t_reciprocal_C;
  11. wire [47:0] t_div;

  12. assign oAdd = iA + iB;
  13. assign oSub = iA - iB;
  14. assign oMul = iA * iB;
  15. assign oDiv = iA * iB / iC;
  16. assign t_div = iA * iB * t_reciprocal_C;
  17. assign oDiv_mul = t_div[30] ? t_div[46:31]+1 :t_div[46:31];
  18. assign ot_reciprocal_C = t_reciprocal_C;
  19. assign ot_div = t_div;

  20. reciprocal u_reciprocal(
  21.   .i_num(iC),
  22.   .o_recip(t_reciprocal_C)
  23. );

  24. endmodule 


                                                        module reciprocal的程式碼如下

                                                        1. module reciprocal(
                                                        2.   input wire [7:0]  i_num,
                                                        3.   output reg[31:0]  o_recip
                                                        4. );

                                                        5. always @(*)
                                                        6. begin
                                                        7.   case (i_num)
                                                        8.     8'd000: o_recip = 32'b00000000000000000000000000000000;
                                                        9.     8'd001: o_recip = 32'b10000000000000000000000000000000;
                                                        10.     8'd002: o_recip = 32'b01000000000000000000000000000000;
                                                        11.     8'd003: o_recip = 32'b00101010101010101010101010101010;
                                                        12.     8'd004: o_recip = 32'b00100000000000000000000000000000;
                                                        13.     8'd005: o_recip = 32'b00011001100110011001100110011001;
                                                        14.     8'd006: o_recip = 32'b00010101010101010101010101010101;
                                                        15.     8'd007: o_recip = 32'b00010010010010010010010010010010;
                                                        16.     8'd008: o_recip = 32'b00010000000000000000000000000000;
                                                        17.     8'd009: o_recip = 32'b00001110001110001110001110001110;
                                                        18.     8'd010: o_recip = 32'b00001100110011001100110011001100;
                                                        19.     8'd011: o_recip = 32'b00001011101000101110100010111010;
                                                        20.     8'd012: o_recip = 32'b00001010101010101010101010101010;
                                                        21.     8'd013: o_recip = 32'b00001001110110001001110110001001;
                                                        22.     8'd014: o_recip = 32'b00001001001001001001001001001001;
                                                        23.     8'd015: o_recip = 32'b00001000100010001000100010001000;
                                                        24.     8'd016: o_recip = 32'b00001000000000000000000000000000;
                                                        25.     8'd017: o_recip = 32'b00000111100001111000011110000111;
                                                        26.     8'd018: o_recip = 32'b00000111000111000111000111000111;
                                                        27.     8'd019: o_recip = 32'b00000110101111001010000110101111;
                                                        28.     8'd020: o_recip = 32'b00000110011001100110011001100110;
                                                        29.     8'd021: o_recip = 32'b00000110000110000110000110000110;
                                                        30.     8'd022: o_recip = 32'b00000101110100010111010001011101;
                                                        31.     8'd023: o_recip = 32'b00000101100100001011001000010110;
                                                        32.     8'd024: o_recip = 32'b00000101010101010101010101010101;
                                                        33.     8'd025: o_recip = 32'b00000101000111101011100001010001;
                                                        34.     8'd026: o_recip = 32'b00000100111011000100111011000100;
                                                        35.     8'd027: o_recip = 32'b00000100101111011010000100101111;
                                                        36.     8'd028: o_recip = 32'b00000100100100100100100100100100;
                                                        37.     8'd029: o_recip = 32'b00000100011010011110111001011000;
                                                        38.     8'd030: o_recip = 32'b00000100010001000100010001000100;
                                                        39.     8'd031: o_recip = 32'b00000100001000010000100001000010;
                                                        40.     8'd032: o_recip = 32'b00000100000000000000000000000000;
                                                        41.     8'd033: o_recip = 32'b00000011111000001111100000111110;
                                                        42.     8'd034: o_recip = 32'b00000011110000111100001111000011;
                                                        43.     8'd035: o_recip = 32'b00000011101010000011101010000011;
                                                        44.     8'd036: o_recip = 32'b00000011100011100011100011100011;
                                                        45.     8'd037: o_recip = 32'b00000011011101011001111100100010;
                                                        46.     8'd038: o_recip = 32'b00000011010111100101000011010111;
                                                        47.     8'd039: o_recip = 32'b00000011010010000011010010000011;
                                                        48.     8'd040: o_recip = 32'b00000011001100110011001100110011;
                                                        49.     8'd041: o_recip = 32'b00000011000111110011100000110001;
                                                        50.     8'd042: o_recip = 32'b00000011000011000011000011000011;
                                                        51.     8'd043: o_recip = 32'b00000010111110100000101111101000;
                                                        52.     8'd044: o_recip = 32'b00000010111010001011101000101110;
                                                        53.     8'd045: o_recip = 32'b00000010110110000010110110000010;
                                                        54.     8'd046: o_recip = 32'b00000010110010000101100100001011;
                                                        55.     8'd047: o_recip = 32'b00000010101110010011000100000101;
                                                        56.     8'd048: o_recip = 32'b00000010101010101010101010101010;
                                                        57.     8'd049: o_recip = 32'b00000010100111001011110000010100;
                                                        58.     8'd050: o_recip = 32'b00000010100011110101110000101000;
                                                        59.     8'd051: o_recip = 32'b00000010100000101000001010000010;
                                                        60.     8'd052: o_recip = 32'b00000010011101100010011101100010;
                                                        61.     8'd053: o_recip = 32'b00000010011010100100001110011111;
                                                        62.     8'd054: o_recip = 32'b00000010010111101101000010010111;
                                                        63.     8'd055: o_recip = 32'b00000010010100111100100000100101;
                                                        64.     8'd056: o_recip = 32'b00000010010010010010010010010010;
                                                        65.     8'd057: o_recip = 32'b00000010001111101110000010001111;
                                                        66.     8'd058: o_recip = 32'b00000010001101001111011100101100;
                                                        67.     8'd059: o_recip = 32'b00000010001010110110001111001011;
                                                        68.     8'd060: o_recip = 32'b00000010001000100010001000100010;
                                                        69.     8'd061: o_recip = 32'b00000010000110010010111000101001;
                                                        70.     8'd062: o_recip = 32'b00000010000100001000010000100001;
                                                        71.     8'd063: o_recip = 32'b00000010000010000010000010000010;
                                                        72.     8'd064: o_recip = 32'b00000010000000000000000000000000;
                                                        73.     8'd065: o_recip = 32'b00000001111110000001111110000001;
                                                        74.     8'd066: o_recip = 32'b00000001111100000111110000011111;
                                                        75.     8'd067: o_recip = 32'b00000001111010010001001100011010;
                                                        76.     8'd068: o_recip = 32'b00000001111000011110000111100001;
                                                        77.     8'd069: o_recip = 32'b00000001110110101110011000000111;
                                                        78.     8'd070: o_recip = 32'b00000001110101000001110101000001;
                                                        79.     8'd071: o_recip = 32'b00000001110011011000010101101000;
                                                        80.     8'd072: o_recip = 32'b00000001110001110001110001110001;
                                                        81.     8'd073: o_recip = 32'b00000001110000001110000001110000;
                                                        82.     8'd074: o_recip = 32'b00000001101110101100111110010001;
                                                        83.     8'd075: o_recip = 32'b00000001101101001110100000011011;
                                                        84.     8'd076: o_recip = 32'b00000001101011110010100001101011;
                                                        85.     8'd077: o_recip = 32'b00000001101010011000111011110110;
                                                        86.     8'd078: o_recip = 32'b00000001101001000001101001000001;
                                                        87.     8'd079: o_recip = 32'b00000001100111101100100011101001;
                                                        88.     8'd080: o_recip = 32'b00000001100110011001100110011001;
                                                        89.     8'd081: o_recip = 32'b00000001100101001000101100001111;
                                                        90.     8'd082: o_recip = 32'b00000001100011111001110000011000;
                                                        91.     8'd083: o_recip = 32'b00000001100010101100101110010000;
                                                        92.     8'd084: o_recip = 32'b00000001100001100001100001100001;
                                                        93.     8'd085: o_recip = 32'b00000001100000011000000110000001;
                                                        94.     8'd086: o_recip = 32'b00000001011111010000010111110100;
                                                        95.     8'd087: o_recip = 32'b00000001011110001010010011001000;
                                                        96.     8'd088: o_recip = 32'b00000001011101000101110100010111;
                                                        97.     8'd089: o_recip = 32'b00000001011100000010111000000101;
                                                        98.     8'd090: o_recip = 32'b00000001011011000001011011000001;
                                                        99.     8'd091: o_recip = 32'b00000001011010000001011010000001;
                                                        100.     8'd092: o_recip = 32'b00000001011001000010110010000101;
                                                        101.     8'd093: o_recip = 32'b00000001011000000101100000010110;
                                                        102.     8'd094: o_recip = 32'b00000001010111001001100010000010;
                                                        103.     8'd095: o_recip = 32'b00000001010110001110110100100011;
                                                        104.     8'd096: o_recip = 32'b00000001010101010101010101010101;
                                                        105.     8'd097: o_recip = 32'b00000001010100011101000001111110;
                                                        106.     8'd098: o_recip = 32'b00000001010011100101111000001010;
                                                        107.     8'd099: o_recip = 32'b00000001010010101111110101101010;
                                                        108.     8'd100: o_recip = 32'b00000001010001111010111000010100;
                                                        109.     8'd101: o_recip = 32'b00000001010001000110111110000110;
                                                        110.     8'd102: o_recip = 32'b00000001010000010100000101000001;
                                                        111.     8'd103: o_recip = 32'b00000001001111100010001011001011;
                                                        112.     8'd104: o_recip = 32'b00000001001110110001001110110001;
                                                        113.     8'd105: o_recip = 32'b00000001001110000001001110000001;
                                                        114.     8'd106: o_recip = 32'b00000001001101010010000111001111;
                                                        115.     8'd107: o_recip = 32'b00000001001100100011111000110100;
                                                        116.     8'd108: o_recip = 32'b00000001001011110110100001001011;
                                                        117.     8'd109: o_recip = 32'b00000001001011001001111110110100;
                                                        118.     8'd110: o_recip = 32'b00000001001010011110010000010010;
                                                        119.     8'd111: o_recip = 32'b00000001001001110011010100001011;
                                                        120.     8'd112: o_recip = 32'b00000001001001001001001001001001;
                                                        121.     8'd113: o_recip = 32'b00000001001000011111101101111000;
                                                        122.     8'd114: o_recip = 32'b00000001000111110111000001000111;
                                                        123.     8'd115: o_recip = 32'b00000001000111001111000001101010;
                                                        124.     8'd116: o_recip = 32'b00000001000110100111101110010110;
                                                        125.     8'd117: o_recip = 32'b00000001000110000001000110000001;
                                                        126.     8'd118: o_recip = 32'b00000001000101011011000111100101;
                                                        127.     8'd119: o_recip = 32'b00000001000100110101110010000001;
                                                        128.     8'd120: o_recip = 32'b00000001000100010001000100010001;
                                                        129.     8'd121: o_recip = 32'b00000001000011101100111101010110;
                                                        130.     8'd122: o_recip = 32'b00000001000011001001011100010100;
                                                        131.     8'd123: o_recip = 32'b00000001000010100110100000010000;
                                                        132.     8'd124: o_recip = 32'b00000001000010000100001000010000;
                                                        133.     8'd125: o_recip = 32'b00000001000001100010010011011101;
                                                        134.     8'd126: o_recip = 32'b00000001000001000001000001000001;
                                                        135.     8'd127: o_recip = 32'b00000001000000100000010000001000;
                                                        136.     8'd128: o_recip = 32'b00000001000000000000000000000000;
                                                        137.     8'd129: o_recip = 32'b00000000111111100000001111111000;
                                                        138.     8'd130: o_recip = 32'b00000000111111000000111111000000;
                                                        139.     8'd131: o_recip = 32'b00000000111110100010001100101100;
                                                        140.     8'd132: o_recip = 32'b00000000111110000011111000001111;
                                                        141.     8'd133: o_recip = 32'b00000000111101100110000000111101;
                                                        142.     8'd134: o_recip = 32'b00000000111101001000100110001101;
                                                        143.     8'd135: o_recip = 32'b00000000111100101011100111010110;
                                                        144.     8'd136: o_recip = 32'b00000000111100001111000011110000;
                                                        145.     8'd137: o_recip = 32'b00000000111011110010111010110111;
                                                        146.     8'd138: o_recip = 32'b00000000111011010111001100000011;
                                                        147.     8'd139: o_recip = 32'b00000000111010111011110110110010;
                                                        148.     8'd140: o_recip = 32'b00000000111010100000111010100000;
                                                        149.     8'd141: o_recip = 32'b00000000111010000110010110101100;
                                                        150.     8'd142: o_recip = 32'b00000000111001101100001010110100;
                                                        151.     8'd143: o_recip = 32'b00000000111001010010010110011000;
                                                        152.     8'd144: o_recip = 32'b00000000111000111000111000111000;
                                                        153.     8'd145: o_recip = 32'b00000000111000011111110001111000;
                                                        154.     8'd146: o_recip = 32'b00000000111000000111000000111000;
                                                        155.     8'd147: o_recip = 32'b00000000110111101110100101011100;
                                                        156.     8'd148: o_recip = 32'b00000000110111010110011111001000;
                                                        157.     8'd149: o_recip = 32'b00000000110110111110101101100001;
                                                        158.     8'd150: o_recip = 32'b00000000110110100111010000001101;
                                                        159.     8'd151: o_recip = 32'b00000000110110010000000110110010;
                                                        160.     8'd152: o_recip = 32'b00000000110101111001010000110101;
                                                        161.     8'd153: o_recip = 32'b00000000110101100010101110000000;
                                                        162.     8'd154: o_recip = 32'b00000000110101001100011101111011;
                                                        163.     8'd155: o_recip = 32'b00000000110100110110100000001101;
                                                        164.     8'd156: o_recip = 32'b00000000110100100000110100100000;
                                                        165.     8'd157: o_recip = 32'b00000000110100001011011010011111;
                                                        166.     8'd158: o_recip = 32'b00000000110011110110010001110100;
                                                        167.     8'd159: o_recip = 32'b00000000110011100001011010001010;
                                                        168.     8'd160: o_recip = 32'b00000000110011001100110011001100;
                                                        169.     8'd161: o_recip = 32'b00000000110010111000011100100111;
                                                        170.     8'd162: o_recip = 32'b00000000110010100100010110000111;
                                                        171.     8'd163: o_recip = 32'b00000000110010010000011111011010;
                                                        172.     8'd164: o_recip = 32'b00000000110001111100111000001100;
                                                        173.     8'd165: o_recip = 32'b00000000110001101001100000001100;
                                                        174.     8'd166: o_recip = 32'b00000000110001010110010111001000;
                                                        175.     8'd167: o_recip = 32'b00000000110001000011011100101111;
                                                        176.     8'd168: o_recip = 32'b00000000110000110000110000110000;
                                                        177.     8'd169: o_recip = 32'b00000000110000011110010010111011;
                                                        178.     8'd170: o_recip = 32'b00000000110000001100000011000000;
                                                        179.     8'd171: o_recip = 32'b00000000101111111010000000101111;
                                                        180.     8'd172: o_recip = 32'b00000000101111101000001011111010;
                                                        181.     8'd173: o_recip = 32'b00000000101111010110100100010000;
                                                        182.     8'd174: o_recip = 32'b00000000101111000101001001100100;
                                                        183.     8'd175: o_recip = 32'b00000000101110110011111011100111;
                                                        184.     8'd176: o_recip = 32'b00000000101110100010111010001011;
                                                        185.     8'd177: o_recip = 32'b00000000101110010010000101000011;
                                                        186.     8'd178: o_recip = 32'b00000000101110000001011100000010;
                                                        187.     8'd179: o_recip = 32'b00000000101101110000111110111011;
                                                        188.     8'd180: o_recip = 32'b00000000101101100000101101100000;
                                                        189.     8'd181: o_recip = 32'b00000000101101010000100111100110;
                                                        190.     8'd182: o_recip = 32'b00000000101101000000101101000000;
                                                        191.     8'd183: o_recip = 32'b00000000101100110000111101100011;
                                                        192.     8'd184: o_recip = 32'b00000000101100100001011001000010;
                                                        193.     8'd185: o_recip = 32'b00000000101100010001111111010011;
                                                        194.     8'd186: o_recip = 32'b00000000101100000010110000001011;
                                                        195.     8'd187: o_recip = 32'b00000000101011110011101011011101;
                                                        196.     8'd188: o_recip = 32'b00000000101011100100110001000001;
                                                        197.     8'd189: o_recip = 32'b00000000101011010110000000101011;
                                                        198.     8'd190: o_recip = 32'b00000000101011000111011010010001;
                                                        199.     8'd191: o_recip = 32'b00000000101010111000111101101001;
                                                        200.     8'd192: o_recip = 32'b00000000101010101010101010101010;
                                                        201.     8'd193: o_recip = 32'b00000000101010011100100001001010;
                                                        202.     8'd194: o_recip = 32'b00000000101010001110100000111111;
                                                        203.     8'd195: o_recip = 32'b00000000101010000000101010000000;
                                                        204.     8'd196: o_recip = 32'b00000000101001110010111100000101;
                                                        205.     8'd197: o_recip = 32'b00000000101001100101010111000100;
                                                        206.     8'd198: o_recip = 32'b00000000101001010111111010110101;
                                                        207.     8'd199: o_recip = 32'b00000000101001001010100111001111;
                                                        208.     8'd200: o_recip = 32'b00000000101000111101011100001010;
                                                        209.     8'd201: o_recip = 32'b00000000101000110000011001011110;
                                                        210.     8'd202: o_recip = 32'b00000000101000100011011111000011;
                                                        211.     8'd203: o_recip = 32'b00000000101000010110101100110001;
                                                        212.     8'd204: o_recip = 32'b00000000101000001010000010100000;
                                                        213.     8'd205: o_recip = 32'b00000000100111111101100000001001;
                                                        214.     8'd206: o_recip = 32'b00000000100111110001000101100101;
                                                        215.     8'd207: o_recip = 32'b00000000100111100100110010101101;
                                                        216.     8'd208: o_recip = 32'b00000000100111011000100111011000;
                                                        217.     8'd209: o_recip = 32'b00000000100111001100100011100001;
                                                        218.     8'd210: o_recip = 32'b00000000100111000000100111000000;
                                                        219.     8'd211: o_recip = 32'b00000000100110110100110001101111;
                                                        220.     8'd212: o_recip = 32'b00000000100110101001000011100111;
                                                        221.     8'd213: o_recip = 32'b00000000100110011101011100100010;
                                                        222.     8'd214: o_recip = 32'b00000000100110010001111100011010;
                                                        223.     8'd215: o_recip = 32'b00000000100110000110100011001000;
                                                        224.     8'd216: o_recip = 32'b00000000100101111011010000100101;
                                                        225.     8'd217: o_recip = 32'b00000000100101110000000100101110;
                                                        226.     8'd218: o_recip = 32'b00000000100101100100111111011010;
                                                        227.     8'd219: o_recip = 32'b00000000100101011010000000100101;
                                                        228.     8'd220: o_recip = 32'b00000000100101001111001000001001;
                                                        229.     8'd221: o_recip = 32'b00000000100101000100010110000000;
                                                        230.     8'd222: o_recip = 32'b00000000100100111001101010000101;
                                                        231.     8'd223: o_recip = 32'b00000000100100101111000100010011;
                                                        232.     8'd224: o_recip = 32'b00000000100100100100100100100100;
                                                        233.     8'd225: o_recip = 32'b00000000100100011010001010110011;
                                                        234.     8'd226: o_recip = 32'b00000000100100001111110110111100;
                                                        235.     8'd227: o_recip = 32'b00000000100100000101101000111000;
                                                        236.     8'd228: o_recip = 32'b00000000100011111011100000100011;
                                                        237.     8'd229: o_recip = 32'b00000000100011110001011101111001;
                                                        238.     8'd230: o_recip = 32'b00000000100011100111100000110101;
                                                        239.     8'd231: o_recip = 32'b00000000100011011101101001010010;
                                                        240.     8'd232: o_recip = 32'b00000000100011010011110111001011;
                                                        241.     8'd233: o_recip = 32'b00000000100011001010001010011100;
                                                        242.     8'd234: o_recip = 32'b00000000100011000000100011000000;
                                                        243.     8'd235: o_recip = 32'b00000000100010110111000000110100;
                                                        244.     8'd236: o_recip = 32'b00000000100010101101100011110010;
                                                        245.     8'd237: o_recip = 32'b00000000100010100100001011111000;
                                                        246.     8'd238: o_recip = 32'b00000000100010011010111001000000;
                                                        247.     8'd239: o_recip = 32'b00000000100010010001101011000111;
                                                        248.     8'd240: o_recip = 32'b00000000100010001000100010001000;
                                                        249.     8'd241: o_recip = 32'b00000000100001111111011110000000;
                                                        250.     8'd242: o_recip = 32'b00000000100001110110011110101011;
                                                        251.     8'd243: o_recip = 32'b00000000100001101101100100000101;
                                                        252.     8'd244: o_recip = 32'b00000000100001100100101110001010;
                                                        253.     8'd245: o_recip = 32'b00000000100001011011111100110111;
                                                        254.     8'd246: o_recip = 32'b00000000100001010011010000001000;
                                                        255.     8'd247: o_recip = 32'b00000000100001001010100111111001;
                                                        256.     8'd248: o_recip = 32'b00000000100001000010000100001000;
                                                        257.     8'd249: o_recip = 32'b00000000100000111001100100110000;
                                                        258.     8'd250: o_recip = 32'b00000000100000110001001001101110;
                                                        259.     8'd251: o_recip = 32'b00000000100000101000110010111111;
                                                        260.     8'd252: o_recip = 32'b00000000100000100000100000100000;
                                                        261.     8'd253: o_recip = 32'b00000000100000011000010010001101;
                                                        262.     8'd254: o_recip = 32'b00000000100000010000001000000100;
                                                        263.     8'd255: o_recip = 32'b00000000100000001000000010000000;
                                                        264. default:o_recip = 32'b00000000000000000000000000000000;
                                                        265.   endcase
                                                        266. end

                                                        267. endmodule 

                                                        測試用testbench 如下:

                                                        1. `timescale 1ns/100ps

                                                        2. module counter_tb;

                                                        3. reg [7:0] iA, iB, iC;
                                                        4. wire [7:0] oAdd, oSub;
                                                        5. wire [15:0] oMul;
                                                        6. wire [15:0] oDiv;
                                                        7. wire [7:0] oDiv_mul;
                                                        8. wire [31:0] ot_reciprocal_C, ot_div;

                                                        9. counter u(
                                                        10. .iA(iA),
                                                        11. .iB(iB),
                                                        12. .iC(iC),
                                                        13. .oAdd(oAdd),
                                                        14. .oSub(oSub),
                                                        15. .oMul(oMul),
                                                        16. .oDiv(oDiv),
                                                        17. .oDiv_mul(oDiv_mul),
                                                        18. .ot_div(ot_div),
                                                        19. .ot_reciprocal_C(ot_reciprocal_C)
                                                        20. );

                                                        21. initial 
                                                        22. begin
                                                        23. #0 
                                                        24. iA = 10;
                                                        25. iB = 2;
                                                        26. iC = 1;
                                                        27. #50
                                                        28. iA = 10;
                                                        29. #200
                                                        30. iA = 20;
                                                        31. end


                                                        32. always #50
                                                        33. begin
                                                        34. iB <= iB+2;
                                                        35.   iC <= iC+1;
                                                        36. end
                                                        37. endmodule 

                                                                                                                                            Modelsim 後模擬執行結果


                                                                                                                                            上面三個是輸入input A, B, C。接著兩行分別為加減,計算耗時約10ns。

                                                                                                                                            第六行為A*B,耗時約16ns。

                                                                                                                                            第七行是用除法 A*B/C,最長約花35ns。

                                                                                                                                            第八行先把C換成倒數 1/C,用乘法計算 A*B*(1/C),最長約花25ns。

                                                                                                                                            第九行為C查表求倒數1/C,約花15ns。

                                                                                                                                            第十行為A*B*(1/C)的完整結果,共有48 bits,最長約花25ns。第八行實為第十行輸出除2^31且經四捨五入後的結果。

                                                                                                                                            另,倒數的數值轉換係以matlab程式輸出而成,matlab程式如下:

                                                                                                                                            1. fid = fopen('reciprocal.txt','w');
                                                                                                                                            2. fprintf(fid, '    8''d000: Recip = 32''b00000000000000000000000000000000;\n');
                                                                                                                                            3. fprintf(fid, '    8''d001: Recip = 32''b10000000000000000000000000000000;\n');
                                                                                                                                            4. bit_num = 32;

                                                                                                                                            5. for j = 2:255
                                                                                                                                            6.     innum = 1/j;
                                                                                                                                            7.     count = 0;
                                                                                                                                            8.     tempnum = innum;
                                                                                                                                            9.     
                                                                                                                                            10.     N = bit_num;
                                                                                                                                            11.     record = zeros(1, N);
                                                                                                                                            12.     
                                                                                                                                            13.     while(N>=0)
                                                                                                                                            14.       count = count+1;
                                                                                                                                            15.       N = N - 1;
                                                                                                                                            16.       tempnum = tempnum*2;
                                                                                                                                            17.       if tempnum>=1
                                                                                                                                            18.         record(count) = 1;
                                                                                                                                            19.         tempnum = tempnum-1;
                                                                                                                                            20.       else
                                                                                                                                            21.         record(count) = 0;
                                                                                                                                            22.       end
                                                                                                                                            23.     end

                                                                                                                                            24.     fprintf(fid, '    8''d%03d: Recip = 32''b', j);
                                                                                                                                            25.     fprintf(fid, '%d', 0);
                                                                                                                                            26.     for i = 1:bit_num-1
                                                                                                                                            27.       fprintf(fid, '%d', record(i));
                                                                                                                                            28.     end
                                                                                                                                            29.     fprintf(fid,';\n');
                                                                                                                                            30. end
                                                                                                                                            31. fprintf(fid, 'default: Recip = 32''b00000000000000000000000000000000;');
                                                                                                                                            32. fclose(fid);

                                                                                                                                            沒有留言:

                                                                                                                                            張貼留言