基于迭代单元的不恢复余数开方器

基本算法

与恢复余数开方器类似,不恢复余数开方器也是通过迭代完成运算的,基本算法的伪代码如下所示

1
2
3
4
5
6
7
8
9
10
11
Ra = 被开方数(位宽2W)
Re = 余数(初值为0)
Dout = 0
for i in W -> 0 {
if(Re > 0) {
Re = {Re,Ra[2i - 1],Ra[2i]} - {Dout,2'b01}
} else {
Re = {Re,Ra[2i - 1],Ra[2i]} + {Dout,2'b11}
}
Dout = {Dout,!Re[MSB]}
}

迭代单元

基本算法

迭代单元的基本算法即基本算法中for循环包裹的部分:

1
2
3
4
5
6
7
8
9
10
input Re = 上一余数
input Dout = 上一结果
if(Re > 0) {
Re = {Re,Ra[2i - 1],Ra[2i]} - {Dout,2'b01}
} else {
Re = {Re,Ra[2i - 1],Ra[2i]} + {Dout,2'b11}
}
Dout = {Dout,!Re[MSB]}
output 本次余数 = Re
output 本次结果 = Dout

RTL代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
module norestore_square_cell #(
parameter WIDTH = 4,
parameter STEP = 0
)(
input clk, // Clock
input rst_n, // Asynchronous reset active low

input [2 * WIDTH - 1:0]radicand,
input [WIDTH - 1:0]last_dout,
input [2 * WIDTH:0]remainder_din,

output reg [WIDTH - 1:0]this_dout,
output reg [2 * WIDTH:0]remainder_dout
);

wire [2 * WIDTH:0]target_data = {remainder_din[2 * WIDTH],remainder_din[2 * WIDTH - 3:0],radicand[2 * STEP +:2]};
wire [2 * WIDTH:0]pos_data = {last_dout,2'b01};
wire [2 * WIDTH:0]neg_data = {last_dout,2'b11};

wire [2 * WIDTH:0]pos_final_data = target_data - pos_data;
wire [2 * WIDTH:0]neg_final_data = target_data + neg_data;
wire [2 * WIDTH:0]final_data = (remainder_din[2 * WIDTH])?neg_final_data:pos_final_data;
always @(posedge clk or negedge rst_n) begin
if(~rst_n) begin
{this_dout,remainder_dout} <= 'b0;
end else begin
remainder_dout <= final_data;
this_dout <= {last_dout[WIDTH - 2:0],~final_data[2 * WIDTH]};
end
end

endmodule

顶层模块

顶层模块根据位宽参数生成多级迭代单元完成算法

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
module square_extractor #(
parameter WIDTH = 4
)(
input clk, // Clock
input rst_n, // Asynchronous reset active low

input [2 * WIDTH - 1:0]radicand,

output [WIDTH - 1:0]dout
// output [2 * WIDTH - 1:0]remainder
);

genvar i;
generate
for (i = WIDTH - 1; i >= 0; i = i - 1) begin:square
wire [2 * WIDTH:0]remainder_dout,remainder_din;
wire [WIDTH - 1:0]this_dout,last_dout;
if(i == WIDTH - 1) begin
assign remainder_din = 'b0;
assign last_dout = 'b0;
end else begin
assign remainder_din = square[i + 1].remainder_dout;
assign last_dout = square[i + 1].this_dout;
end
norestore_square_cell #(
.WIDTH(WIDTH),
.STEP(i)
) u_square_cell (
.clk(clk), // Clock
.rst_n(rst_n), // Asynchronous reset active low

.radicand(radicand),
.last_dout(last_dout),
.remainder_din(remainder_din),

.this_dout(this_dout),
.remainder_dout(remainder_dout)
);
end
endgenerate

assign dout = square[0].this_dout;
// assign remainder = square[0].remainder_dout;

endmodule

TestBench

由于本算法无法获得正确余数,在验证时,计算输出数据dout的平方和输出数据加1dout + 1的平方,若输入在这两个数之间的区域,判定结果正确

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
module tb_square (
);

parameter WIDTH = 4;

logic clk; // Clock
logic rst_n; // Asynchronous reset active low

logic [2 * WIDTH - 1:0]radicand;

logic [WIDTH - 1:0]dout;
logic [2 * WIDTH - 1:0]remainder;

square_extractor #(
.WIDTH(WIDTH)
) dut (
.clk(clk), // Clock
.rst_n(rst_n), // Asynchronous reset active low

.radicand(radicand),

.dout(dout)
// .remainder(remainder)
);

initial begin
clk = 0;
forever begin
#50 clk = ~clk;
end
end

initial begin
rst_n = 1'b1;
#5 rst_n = 1'b0;
#10 rst_n = 1'b1;
end

logic [2 * WIDTH - 1:0]act;
logic [2 * WIDTH - 1:0]dout_ex;
initial begin
radicand = 'b0;
forever begin
@(negedge clk);
radicand = (2 * WIDTH)'($urandom_range(0,2 ** (2 * WIDTH)));
repeat(4 * WIDTH) begin
@(negedge clk);
end
dout_ex = '{dout};
if(((dout_ex + 1) ** 2 > radicand) && (dout_ex ** 2 <= radicand)) begin
$display("successfully");
end else begin
$display("failed");
$stop;
end
end
end

endmodule