本文大略測試FPGA的執行速度,模擬的FPGA為DE2 115的Cyclone IV EP4CE115F29C7。
測試的運算速度分別有加、減、乘、除,其中,除法計算的部分,又區分為直接除法以及利用倒數把除法運算改為乘法運算。
Verilog 程式碼如下。有三個輸入A, B, C,運算有加Add(A+B)、減Sub(A-B)、乘Mul(A*B)、除Div(A*B/C)。其中,輸入C經過module reciprocal化為32位元倒數1/C(該數值除2^31為實際數值大小,也就是該值放大2^31倍。)
- module counter(
- input [7:0] iA, iB, iC,
- output [7:0] oAdd, oSub,
- output [15:0] oMul,
- output [15:0] oDiv,
- output [7:0] oDiv_mul,
- output [31:0] ot_div,
- output [31:0] ot_reciprocal_C
- );
- wire [31:0] t_reciprocal_C;
- wire [47:0] t_div;
- assign oAdd = iA + iB;
- assign oSub = iA - iB;
- assign oMul = iA * iB;
- assign oDiv = iA * iB / iC;
- assign t_div = iA * iB * t_reciprocal_C;
- assign oDiv_mul = t_div[30] ? t_div[46:31]+1 :t_div[46:31];
- assign ot_reciprocal_C = t_reciprocal_C;
- assign ot_div = t_div;
- reciprocal u_reciprocal(
- .i_num(iC),
- .o_recip(t_reciprocal_C)
- );
- endmodule
- module reciprocal(
- input wire [7:0] i_num,
- output reg[31:0] o_recip
- );
- always @(*)
- begin
- case (i_num)
- 8'd000: o_recip = 32'b00000000000000000000000000000000;
- 8'd001: o_recip = 32'b10000000000000000000000000000000;
- 8'd002: o_recip = 32'b01000000000000000000000000000000;
- 8'd003: o_recip = 32'b00101010101010101010101010101010;
- 8'd004: o_recip = 32'b00100000000000000000000000000000;
- 8'd005: o_recip = 32'b00011001100110011001100110011001;
- 8'd006: o_recip = 32'b00010101010101010101010101010101;
- 8'd007: o_recip = 32'b00010010010010010010010010010010;
- 8'd008: o_recip = 32'b00010000000000000000000000000000;
- 8'd009: o_recip = 32'b00001110001110001110001110001110;
- 8'd010: o_recip = 32'b00001100110011001100110011001100;
- 8'd011: o_recip = 32'b00001011101000101110100010111010;
- 8'd012: o_recip = 32'b00001010101010101010101010101010;
- 8'd013: o_recip = 32'b00001001110110001001110110001001;
- 8'd014: o_recip = 32'b00001001001001001001001001001001;
- 8'd015: o_recip = 32'b00001000100010001000100010001000;
- 8'd016: o_recip = 32'b00001000000000000000000000000000;
- 8'd017: o_recip = 32'b00000111100001111000011110000111;
- 8'd018: o_recip = 32'b00000111000111000111000111000111;
- 8'd019: o_recip = 32'b00000110101111001010000110101111;
- 8'd020: o_recip = 32'b00000110011001100110011001100110;
- 8'd021: o_recip = 32'b00000110000110000110000110000110;
- 8'd022: o_recip = 32'b00000101110100010111010001011101;
- 8'd023: o_recip = 32'b00000101100100001011001000010110;
- 8'd024: o_recip = 32'b00000101010101010101010101010101;
- 8'd025: o_recip = 32'b00000101000111101011100001010001;
- 8'd026: o_recip = 32'b00000100111011000100111011000100;
- 8'd027: o_recip = 32'b00000100101111011010000100101111;
- 8'd028: o_recip = 32'b00000100100100100100100100100100;
- 8'd029: o_recip = 32'b00000100011010011110111001011000;
- 8'd030: o_recip = 32'b00000100010001000100010001000100;
- 8'd031: o_recip = 32'b00000100001000010000100001000010;
- 8'd032: o_recip = 32'b00000100000000000000000000000000;
- 8'd033: o_recip = 32'b00000011111000001111100000111110;
- 8'd034: o_recip = 32'b00000011110000111100001111000011;
- 8'd035: o_recip = 32'b00000011101010000011101010000011;
- 8'd036: o_recip = 32'b00000011100011100011100011100011;
- 8'd037: o_recip = 32'b00000011011101011001111100100010;
- 8'd038: o_recip = 32'b00000011010111100101000011010111;
- 8'd039: o_recip = 32'b00000011010010000011010010000011;
- 8'd040: o_recip = 32'b00000011001100110011001100110011;
- 8'd041: o_recip = 32'b00000011000111110011100000110001;
- 8'd042: o_recip = 32'b00000011000011000011000011000011;
- 8'd043: o_recip = 32'b00000010111110100000101111101000;
- 8'd044: o_recip = 32'b00000010111010001011101000101110;
- 8'd045: o_recip = 32'b00000010110110000010110110000010;
- 8'd046: o_recip = 32'b00000010110010000101100100001011;
- 8'd047: o_recip = 32'b00000010101110010011000100000101;
- 8'd048: o_recip = 32'b00000010101010101010101010101010;
- 8'd049: o_recip = 32'b00000010100111001011110000010100;
- 8'd050: o_recip = 32'b00000010100011110101110000101000;
- 8'd051: o_recip = 32'b00000010100000101000001010000010;
- 8'd052: o_recip = 32'b00000010011101100010011101100010;
- 8'd053: o_recip = 32'b00000010011010100100001110011111;
- 8'd054: o_recip = 32'b00000010010111101101000010010111;
- 8'd055: o_recip = 32'b00000010010100111100100000100101;
- 8'd056: o_recip = 32'b00000010010010010010010010010010;
- 8'd057: o_recip = 32'b00000010001111101110000010001111;
- 8'd058: o_recip = 32'b00000010001101001111011100101100;
- 8'd059: o_recip = 32'b00000010001010110110001111001011;
- 8'd060: o_recip = 32'b00000010001000100010001000100010;
- 8'd061: o_recip = 32'b00000010000110010010111000101001;
- 8'd062: o_recip = 32'b00000010000100001000010000100001;
- 8'd063: o_recip = 32'b00000010000010000010000010000010;
- 8'd064: o_recip = 32'b00000010000000000000000000000000;
- 8'd065: o_recip = 32'b00000001111110000001111110000001;
- 8'd066: o_recip = 32'b00000001111100000111110000011111;
- 8'd067: o_recip = 32'b00000001111010010001001100011010;
- 8'd068: o_recip = 32'b00000001111000011110000111100001;
- 8'd069: o_recip = 32'b00000001110110101110011000000111;
- 8'd070: o_recip = 32'b00000001110101000001110101000001;
- 8'd071: o_recip = 32'b00000001110011011000010101101000;
- 8'd072: o_recip = 32'b00000001110001110001110001110001;
- 8'd073: o_recip = 32'b00000001110000001110000001110000;
- 8'd074: o_recip = 32'b00000001101110101100111110010001;
- 8'd075: o_recip = 32'b00000001101101001110100000011011;
- 8'd076: o_recip = 32'b00000001101011110010100001101011;
- 8'd077: o_recip = 32'b00000001101010011000111011110110;
- 8'd078: o_recip = 32'b00000001101001000001101001000001;
- 8'd079: o_recip = 32'b00000001100111101100100011101001;
- 8'd080: o_recip = 32'b00000001100110011001100110011001;
- 8'd081: o_recip = 32'b00000001100101001000101100001111;
- 8'd082: o_recip = 32'b00000001100011111001110000011000;
- 8'd083: o_recip = 32'b00000001100010101100101110010000;
- 8'd084: o_recip = 32'b00000001100001100001100001100001;
- 8'd085: o_recip = 32'b00000001100000011000000110000001;
- 8'd086: o_recip = 32'b00000001011111010000010111110100;
- 8'd087: o_recip = 32'b00000001011110001010010011001000;
- 8'd088: o_recip = 32'b00000001011101000101110100010111;
- 8'd089: o_recip = 32'b00000001011100000010111000000101;
- 8'd090: o_recip = 32'b00000001011011000001011011000001;
- 8'd091: o_recip = 32'b00000001011010000001011010000001;
- 8'd092: o_recip = 32'b00000001011001000010110010000101;
- 8'd093: o_recip = 32'b00000001011000000101100000010110;
- 8'd094: o_recip = 32'b00000001010111001001100010000010;
- 8'd095: o_recip = 32'b00000001010110001110110100100011;
- 8'd096: o_recip = 32'b00000001010101010101010101010101;
- 8'd097: o_recip = 32'b00000001010100011101000001111110;
- 8'd098: o_recip = 32'b00000001010011100101111000001010;
- 8'd099: o_recip = 32'b00000001010010101111110101101010;
- 8'd100: o_recip = 32'b00000001010001111010111000010100;
- 8'd101: o_recip = 32'b00000001010001000110111110000110;
- 8'd102: o_recip = 32'b00000001010000010100000101000001;
- 8'd103: o_recip = 32'b00000001001111100010001011001011;
- 8'd104: o_recip = 32'b00000001001110110001001110110001;
- 8'd105: o_recip = 32'b00000001001110000001001110000001;
- 8'd106: o_recip = 32'b00000001001101010010000111001111;
- 8'd107: o_recip = 32'b00000001001100100011111000110100;
- 8'd108: o_recip = 32'b00000001001011110110100001001011;
- 8'd109: o_recip = 32'b00000001001011001001111110110100;
- 8'd110: o_recip = 32'b00000001001010011110010000010010;
- 8'd111: o_recip = 32'b00000001001001110011010100001011;
- 8'd112: o_recip = 32'b00000001001001001001001001001001;
- 8'd113: o_recip = 32'b00000001001000011111101101111000;
- 8'd114: o_recip = 32'b00000001000111110111000001000111;
- 8'd115: o_recip = 32'b00000001000111001111000001101010;
- 8'd116: o_recip = 32'b00000001000110100111101110010110;
- 8'd117: o_recip = 32'b00000001000110000001000110000001;
- 8'd118: o_recip = 32'b00000001000101011011000111100101;
- 8'd119: o_recip = 32'b00000001000100110101110010000001;
- 8'd120: o_recip = 32'b00000001000100010001000100010001;
- 8'd121: o_recip = 32'b00000001000011101100111101010110;
- 8'd122: o_recip = 32'b00000001000011001001011100010100;
- 8'd123: o_recip = 32'b00000001000010100110100000010000;
- 8'd124: o_recip = 32'b00000001000010000100001000010000;
- 8'd125: o_recip = 32'b00000001000001100010010011011101;
- 8'd126: o_recip = 32'b00000001000001000001000001000001;
- 8'd127: o_recip = 32'b00000001000000100000010000001000;
- 8'd128: o_recip = 32'b00000001000000000000000000000000;
- 8'd129: o_recip = 32'b00000000111111100000001111111000;
- 8'd130: o_recip = 32'b00000000111111000000111111000000;
- 8'd131: o_recip = 32'b00000000111110100010001100101100;
- 8'd132: o_recip = 32'b00000000111110000011111000001111;
- 8'd133: o_recip = 32'b00000000111101100110000000111101;
- 8'd134: o_recip = 32'b00000000111101001000100110001101;
- 8'd135: o_recip = 32'b00000000111100101011100111010110;
- 8'd136: o_recip = 32'b00000000111100001111000011110000;
- 8'd137: o_recip = 32'b00000000111011110010111010110111;
- 8'd138: o_recip = 32'b00000000111011010111001100000011;
- 8'd139: o_recip = 32'b00000000111010111011110110110010;
- 8'd140: o_recip = 32'b00000000111010100000111010100000;
- 8'd141: o_recip = 32'b00000000111010000110010110101100;
- 8'd142: o_recip = 32'b00000000111001101100001010110100;
- 8'd143: o_recip = 32'b00000000111001010010010110011000;
- 8'd144: o_recip = 32'b00000000111000111000111000111000;
- 8'd145: o_recip = 32'b00000000111000011111110001111000;
- 8'd146: o_recip = 32'b00000000111000000111000000111000;
- 8'd147: o_recip = 32'b00000000110111101110100101011100;
- 8'd148: o_recip = 32'b00000000110111010110011111001000;
- 8'd149: o_recip = 32'b00000000110110111110101101100001;
- 8'd150: o_recip = 32'b00000000110110100111010000001101;
- 8'd151: o_recip = 32'b00000000110110010000000110110010;
- 8'd152: o_recip = 32'b00000000110101111001010000110101;
- 8'd153: o_recip = 32'b00000000110101100010101110000000;
- 8'd154: o_recip = 32'b00000000110101001100011101111011;
- 8'd155: o_recip = 32'b00000000110100110110100000001101;
- 8'd156: o_recip = 32'b00000000110100100000110100100000;
- 8'd157: o_recip = 32'b00000000110100001011011010011111;
- 8'd158: o_recip = 32'b00000000110011110110010001110100;
- 8'd159: o_recip = 32'b00000000110011100001011010001010;
- 8'd160: o_recip = 32'b00000000110011001100110011001100;
- 8'd161: o_recip = 32'b00000000110010111000011100100111;
- 8'd162: o_recip = 32'b00000000110010100100010110000111;
- 8'd163: o_recip = 32'b00000000110010010000011111011010;
- 8'd164: o_recip = 32'b00000000110001111100111000001100;
- 8'd165: o_recip = 32'b00000000110001101001100000001100;
- 8'd166: o_recip = 32'b00000000110001010110010111001000;
- 8'd167: o_recip = 32'b00000000110001000011011100101111;
- 8'd168: o_recip = 32'b00000000110000110000110000110000;
- 8'd169: o_recip = 32'b00000000110000011110010010111011;
- 8'd170: o_recip = 32'b00000000110000001100000011000000;
- 8'd171: o_recip = 32'b00000000101111111010000000101111;
- 8'd172: o_recip = 32'b00000000101111101000001011111010;
- 8'd173: o_recip = 32'b00000000101111010110100100010000;
- 8'd174: o_recip = 32'b00000000101111000101001001100100;
- 8'd175: o_recip = 32'b00000000101110110011111011100111;
- 8'd176: o_recip = 32'b00000000101110100010111010001011;
- 8'd177: o_recip = 32'b00000000101110010010000101000011;
- 8'd178: o_recip = 32'b00000000101110000001011100000010;
- 8'd179: o_recip = 32'b00000000101101110000111110111011;
- 8'd180: o_recip = 32'b00000000101101100000101101100000;
- 8'd181: o_recip = 32'b00000000101101010000100111100110;
- 8'd182: o_recip = 32'b00000000101101000000101101000000;
- 8'd183: o_recip = 32'b00000000101100110000111101100011;
- 8'd184: o_recip = 32'b00000000101100100001011001000010;
- 8'd185: o_recip = 32'b00000000101100010001111111010011;
- 8'd186: o_recip = 32'b00000000101100000010110000001011;
- 8'd187: o_recip = 32'b00000000101011110011101011011101;
- 8'd188: o_recip = 32'b00000000101011100100110001000001;
- 8'd189: o_recip = 32'b00000000101011010110000000101011;
- 8'd190: o_recip = 32'b00000000101011000111011010010001;
- 8'd191: o_recip = 32'b00000000101010111000111101101001;
- 8'd192: o_recip = 32'b00000000101010101010101010101010;
- 8'd193: o_recip = 32'b00000000101010011100100001001010;
- 8'd194: o_recip = 32'b00000000101010001110100000111111;
- 8'd195: o_recip = 32'b00000000101010000000101010000000;
- 8'd196: o_recip = 32'b00000000101001110010111100000101;
- 8'd197: o_recip = 32'b00000000101001100101010111000100;
- 8'd198: o_recip = 32'b00000000101001010111111010110101;
- 8'd199: o_recip = 32'b00000000101001001010100111001111;
- 8'd200: o_recip = 32'b00000000101000111101011100001010;
- 8'd201: o_recip = 32'b00000000101000110000011001011110;
- 8'd202: o_recip = 32'b00000000101000100011011111000011;
- 8'd203: o_recip = 32'b00000000101000010110101100110001;
- 8'd204: o_recip = 32'b00000000101000001010000010100000;
- 8'd205: o_recip = 32'b00000000100111111101100000001001;
- 8'd206: o_recip = 32'b00000000100111110001000101100101;
- 8'd207: o_recip = 32'b00000000100111100100110010101101;
- 8'd208: o_recip = 32'b00000000100111011000100111011000;
- 8'd209: o_recip = 32'b00000000100111001100100011100001;
- 8'd210: o_recip = 32'b00000000100111000000100111000000;
- 8'd211: o_recip = 32'b00000000100110110100110001101111;
- 8'd212: o_recip = 32'b00000000100110101001000011100111;
- 8'd213: o_recip = 32'b00000000100110011101011100100010;
- 8'd214: o_recip = 32'b00000000100110010001111100011010;
- 8'd215: o_recip = 32'b00000000100110000110100011001000;
- 8'd216: o_recip = 32'b00000000100101111011010000100101;
- 8'd217: o_recip = 32'b00000000100101110000000100101110;
- 8'd218: o_recip = 32'b00000000100101100100111111011010;
- 8'd219: o_recip = 32'b00000000100101011010000000100101;
- 8'd220: o_recip = 32'b00000000100101001111001000001001;
- 8'd221: o_recip = 32'b00000000100101000100010110000000;
- 8'd222: o_recip = 32'b00000000100100111001101010000101;
- 8'd223: o_recip = 32'b00000000100100101111000100010011;
- 8'd224: o_recip = 32'b00000000100100100100100100100100;
- 8'd225: o_recip = 32'b00000000100100011010001010110011;
- 8'd226: o_recip = 32'b00000000100100001111110110111100;
- 8'd227: o_recip = 32'b00000000100100000101101000111000;
- 8'd228: o_recip = 32'b00000000100011111011100000100011;
- 8'd229: o_recip = 32'b00000000100011110001011101111001;
- 8'd230: o_recip = 32'b00000000100011100111100000110101;
- 8'd231: o_recip = 32'b00000000100011011101101001010010;
- 8'd232: o_recip = 32'b00000000100011010011110111001011;
- 8'd233: o_recip = 32'b00000000100011001010001010011100;
- 8'd234: o_recip = 32'b00000000100011000000100011000000;
- 8'd235: o_recip = 32'b00000000100010110111000000110100;
- 8'd236: o_recip = 32'b00000000100010101101100011110010;
- 8'd237: o_recip = 32'b00000000100010100100001011111000;
- 8'd238: o_recip = 32'b00000000100010011010111001000000;
- 8'd239: o_recip = 32'b00000000100010010001101011000111;
- 8'd240: o_recip = 32'b00000000100010001000100010001000;
- 8'd241: o_recip = 32'b00000000100001111111011110000000;
- 8'd242: o_recip = 32'b00000000100001110110011110101011;
- 8'd243: o_recip = 32'b00000000100001101101100100000101;
- 8'd244: o_recip = 32'b00000000100001100100101110001010;
- 8'd245: o_recip = 32'b00000000100001011011111100110111;
- 8'd246: o_recip = 32'b00000000100001010011010000001000;
- 8'd247: o_recip = 32'b00000000100001001010100111111001;
- 8'd248: o_recip = 32'b00000000100001000010000100001000;
- 8'd249: o_recip = 32'b00000000100000111001100100110000;
- 8'd250: o_recip = 32'b00000000100000110001001001101110;
- 8'd251: o_recip = 32'b00000000100000101000110010111111;
- 8'd252: o_recip = 32'b00000000100000100000100000100000;
- 8'd253: o_recip = 32'b00000000100000011000010010001101;
- 8'd254: o_recip = 32'b00000000100000010000001000000100;
- 8'd255: o_recip = 32'b00000000100000001000000010000000;
- default:o_recip = 32'b00000000000000000000000000000000;
- endcase
- end
- endmodule
測試用testbench 如下:
- `timescale 1ns/100ps
- module counter_tb;
- reg [7:0] iA, iB, iC;
- wire [7:0] oAdd, oSub;
- wire [15:0] oMul;
- wire [15:0] oDiv;
- wire [7:0] oDiv_mul;
- wire [31:0] ot_reciprocal_C, ot_div;
- counter u(
- .iA(iA),
- .iB(iB),
- .iC(iC),
- .oAdd(oAdd),
- .oSub(oSub),
- .oMul(oMul),
- .oDiv(oDiv),
- .oDiv_mul(oDiv_mul),
- .ot_div(ot_div),
- .ot_reciprocal_C(ot_reciprocal_C)
- );
- initial
- begin
- #0
- iA = 10;
- iB = 2;
- iC = 1;
- #50
- iA = 10;
- #200
- iA = 20;
- end
- always #50
- begin
- iB <= iB+2;
- iC <= iC+1;
- end
- endmodule
Modelsim 後模擬執行結果
上面三個是輸入input A, B, C。接著兩行分別為加減,計算耗時約10ns。
第六行為A*B,耗時約16ns。
第七行是用除法 A*B/C,最長約花35ns。
第八行先把C換成倒數 1/C,用乘法計算 A*B*(1/C),最長約花25ns。
第九行為C查表求倒數1/C,約花15ns。
第十行為A*B*(1/C)的完整結果,共有48 bits,最長約花25ns。第八行實為第十行輸出除2^31且經四捨五入後的結果。
另,倒數的數值轉換係以matlab程式輸出而成,matlab程式如下:
- fid = fopen('reciprocal.txt','w');
- fprintf(fid, ' 8''d000: Recip = 32''b00000000000000000000000000000000;\n');
- fprintf(fid, ' 8''d001: Recip = 32''b10000000000000000000000000000000;\n');
- bit_num = 32;
- for j = 2:255
- innum = 1/j;
- count = 0;
- tempnum = innum;
- N = bit_num;
- record = zeros(1, N);
- while(N>=0)
- count = count+1;
- N = N - 1;
- tempnum = tempnum*2;
- if tempnum>=1
- record(count) = 1;
- tempnum = tempnum-1;
- else
- record(count) = 0;
- end
- end
- fprintf(fid, ' 8''d%03d: Recip = 32''b', j);
- fprintf(fid, '%d', 0);
- for i = 1:bit_num-1
- fprintf(fid, '%d', record(i));
- end
- fprintf(fid,';\n');
- end
- fprintf(fid, 'default: Recip = 32''b00000000000000000000000000000000;');
- fclose(fid);
沒有留言:
張貼留言