基于迭代单元的不恢复余数开方器

基本算法

与恢复余数开方器类似，不恢复余数开方器也是通过迭代完成运算的，基本算法的伪代码如下所示

Ra = 被开方数(位宽2W)
Re = 余数(初值为0)
Dout = 0
for i in W -> 0 {
  if(Re > 0) {
    Re = {Re,Ra[2i - 1],Ra[2i]} - {Dout,2'b01}
  } else {
    Re = {Re,Ra[2i - 1],Ra[2i]} + {Dout,2'b11}
  }
  Dout = {Dout,!Re[MSB]}
}

迭代单元

基本算法

迭代单元的基本算法即基本算法中for循环包裹的部分：

input Re = 上一余数
input Dout = 上一结果
if(Re > 0) {
    Re = {Re,Ra[2i - 1],Ra[2i]} - {Dout,2'b01}
  } else {
    Re = {Re,Ra[2i - 1],Ra[2i]} + {Dout,2'b11}
  }
  Dout = {Dout,!Re[MSB]}
output 本次余数 = Re
output 本次结果 = Dout

RTL代码

module norestore_square_cell #(
	parameter WIDTH = 4,
	parameter STEP = 0
)(
	input clk,    // Clock
	input rst_n,  // Asynchronous reset active low

	input [2 * WIDTH - 1:0]radicand,
	input [WIDTH - 1:0]last_dout,
	input [2 * WIDTH:0]remainder_din,

	output reg [WIDTH - 1:0]this_dout,
	output reg [2 * WIDTH:0]remainder_dout
);

wire [2 * WIDTH:0]target_data = {remainder_din[2 * WIDTH],remainder_din[2 * WIDTH - 3:0],radicand[2 * STEP +:2]};
wire [2 * WIDTH:0]pos_data = {last_dout,2'b01};
wire [2 * WIDTH:0]neg_data = {last_dout,2'b11};

wire [2 * WIDTH:0]pos_final_data = target_data - pos_data;
wire [2 * WIDTH:0]neg_final_data = target_data + neg_data;
wire [2 * WIDTH:0]final_data = (remainder_din[2 * WIDTH])?neg_final_data:pos_final_data;
always @(posedge clk or negedge rst_n) begin
	if(~rst_n) begin
		{this_dout,remainder_dout} <= 'b0;
	end else begin
		remainder_dout <= final_data;
		this_dout <= {last_dout[WIDTH - 2:0],~final_data[2 * WIDTH]};
	end
end

endmodule

顶层模块

顶层模块根据位宽参数生成多级迭代单元完成算法

module square_extractor #(
	parameter WIDTH = 4
)(
	input clk,    // Clock
	input rst_n,  // Asynchronous reset active low

	input [2 * WIDTH - 1:0]radicand,

	output [WIDTH - 1:0]dout
	// output [2 * WIDTH - 1:0]remainder
);

genvar i;
generate
	for (i = WIDTH - 1; i >= 0; i = i - 1) begin:square
		wire [2 * WIDTH:0]remainder_dout,remainder_din;
		wire [WIDTH - 1:0]this_dout,last_dout;
		if(i == WIDTH - 1) begin
			assign remainder_din = 'b0;
			assign last_dout = 'b0;
		end else begin
			assign remainder_din = square[i + 1].remainder_dout;
			assign last_dout = square[i + 1].this_dout;
		end
		norestore_square_cell #(
			.WIDTH(WIDTH),
			.STEP(i)
		) u_square_cell (
			.clk(clk),    // Clock
			.rst_n(rst_n),  // Asynchronous reset active low

			.radicand(radicand),
			.last_dout(last_dout),
			.remainder_din(remainder_din),

			.this_dout(this_dout),
			.remainder_dout(remainder_dout)
		);
	end
endgenerate

assign dout = square[0].this_dout;
// assign remainder = square[0].remainder_dout;

endmodule

TestBench

由于本算法无法获得正确余数，在验证时，计算输出数据dout的平方和输出数据加1dout + 1的平方，若输入在这两个数之间的区域，判定结果正确

module tb_square (
);

parameter WIDTH = 4;

logic clk;    // Clock
logic rst_n;  // Asynchronous reset active low

logic [2 * WIDTH - 1:0]radicand;

logic [WIDTH - 1:0]dout;
logic [2 * WIDTH - 1:0]remainder;

square_extractor #(
	.WIDTH(WIDTH)
) dut (
	.clk(clk),    // Clock
	.rst_n(rst_n),  // Asynchronous reset active low

	.radicand(radicand),

	.dout(dout)
	// .remainder(remainder)
);

initial begin
	clk = 0;
	forever begin
		#50 clk = ~clk;
	end
end

initial begin
	rst_n = 1'b1;
	#5 rst_n = 1'b0;
	#10 rst_n = 1'b1;
end

logic [2 * WIDTH - 1:0]act;
logic [2 * WIDTH - 1:0]dout_ex;
initial begin
	radicand = 'b0;
	forever begin
		@(negedge clk);
		radicand = (2 * WIDTH)'($urandom_range(0,2 ** (2 * WIDTH)));
		repeat(4 * WIDTH) begin
			@(negedge clk);
		end
		dout_ex = '{dout};
		if(((dout_ex + 1) ** 2 > radicand) && (dout_ex ** 2 <= radicand)) begin
			$display("successfully");
		end else begin
			$display("failed");
			$stop;
		end
	end
end

endmodule