引言
推排序常常应用在操作系统的任务调度中,尝试使用硬件对堆排序进行实现,在实现的过程中不使用function
和tasks
语法,即真·硬件实现
参考的博客
也就这一个博客有介绍
堆排序的Verilog
实现
原理
堆排序还需要复习一遍吗? 我肯定是要的
菜鸟-堆排序
图解排序算法(三)之堆排序
可以看到,推排序很复杂,所以我们需要祭出我们的FSM(有限状态机)
首先,将整个堆排序分为两个阶段:
- 构建大根堆或小根堆
- 从最后一个节点开始,和根节点交换,并维护大根堆
我们这里统一构建大根堆
大根堆的构建
直接上流程:
-
从第一个非叶子节点开始,读取左右孩子的值;
-
比较大小,确定是否需要交换,以及交换的数据;
-
写入或不写入,如果这个节点是根节点,那么构建结束。
-
如果写入了,需要对子树进行判断,也就是保证子树也是大根堆
还是很简单的,就是需要在读写时注意控制,以及节点编号的判断与更改
交换数据,进行排序
流程如下:
- 从最后一个节点开始;
- 交换根节点和这个节点的值;
- 维护大根堆
3.1 从根节点开始,读取左右孩子的值;
3.2 比较大小,确定是否需要交换,以及交换的数据;
3.3 写入或不写入,如果这个节点是叶子节点,那么维护结束。 - 如果这个节点已经是根节点,排序结束。
我们可以发现在这个第三步和之前构建的步骤是相似的,虽然我很想优化掉它,但是能力不太够
实现
这个部分最容易出现的问题就是读写,然后自己也是调试了好几天才弄出来文章来源:https://www.toymoban.com/news/detail-418561.html
顶层代码
module top
#(
parameter ADDR_WIDTH = 12,
parameter DATA_WIDTH = 8
)
(
input clk,rst,
output [ADDR_WIDTH:1] addr1,addr2,addr3, //read addr
output [ADDR_WIDTH-1:0] w_addr1,w_addr2,
output [DATA_WIDTH-1:0] data1,data2,data3, //data
output we,re,w_ok,r_ok, //enable write and read
output [DATA_WIDTH-1:0] w_data1,w_data2 //write data
);
heap_sort hp
(.clk(clk),.rst(rst),.w_ok(w_ok),.r_ok(r_ok),.re(re),
.data1(data1),.data2(data2),.data3(data3),
.addr1(addr1),.addr2(addr2),.addr3(addr3),
.we(we),.w_data1(w_data1),.w_data2(w_data2),
.w_addr1(w_addr1),.w_addr2(w_addr2));
ram ram01
(.clk(clk),.we(we),.re(re),.w_ok(w_ok),.r_ok(r_ok),
.addr1(addr1),.addr2(addr2),.addr3(addr3),
.w_data1(w_data1),.w_data2(w_data2),
.w_addr1(w_addr1),.w_addr2(w_addr2),
.r_data1(data1),.r_data2(data2),.r_data3(data3));
endmodule
排序部分状态机算法
module heap_sort
#(
parameter ADDR_WIDTH = 12,
parameter DATA_WIDTH = 8,
parameter FSM_STATE = 5
)
(
input clk,rst,
input [DATA_WIDTH-1:0] data1,data2,data3, //data from ram
input w_ok,r_ok, //finish read and write
output reg [ADDR_WIDTH:1] addr1,addr2,addr3, //read addr
output reg [ADDR_WIDTH-1:0] w_addr1,w_addr2,
output reg we,re, //enable write and read
output reg [DATA_WIDTH-1:0] w_data1,w_data2 //write data
);
parameter IDLE = 0;
parameter BUILD_READ = 1;
parameter BUILD_COMPARED = 2;
parameter BUILD_WRITE = 3;
parameter UPKEEP_READ = 4;
parameter UPKEEP_COMPARED = 5;
parameter UPKEEP_WRITE = 6;
parameter SORT_READY = 7;
parameter SORT_CHANGE = 8;
parameter SORT_CWRITE = 9;
parameter SORT_READ = 10;
parameter SORT_COMPARED = 11;
parameter SORT_WRITE = 12;
parameter COMPLETED = 13;
parameter ERROR = 14;
reg [FSM_STATE-1:0] state;
reg [ADDR_WIDTH-1:0] i,j,k;
reg lf;
always @(posedge clk) begin
if (rst) begin
state <= 0;
i <= 0;
j <= 0;
k <= 0;
lf <= 0;
end
else begin
case (state)
IDLE:begin
state <= BUILD_READ;
i <= ADDR_WIDTH / 2;
j <= ADDR_WIDTH;
k <= ADDR_WIDTH;
end
BUILD_READ:begin
//disable write
we = 0;
//condition of ending build
if(i)begin
state <= BUILD_COMPARED;
re = 1;
addr1 <= i;
addr2 <= 2 * i;
if(i * 2 + 1 <= ADDR_WIDTH) addr3 <= i * 2 + 1;
else addr3 <= 1;
end
else begin
state <= SORT_READY;
re <= 0;
end
end
BUILD_COMPARED:begin
// big-node
if(r_ok) begin
if(i * 2 + 1 <= ADDR_WIDTH) begin
if(data1 <= data3 && data2 <= data3) begin
w_data1 <= data3;
w_data2 <= data1;
w_addr1 <= addr1;
w_addr2 <= addr3;
k <= addr3;
state = BUILD_WRITE;
end
else if(data1 <= data2 && data3 < data2) begin
w_data1 <= data2;
w_data2 <= data1;
w_addr1 <= addr1;
w_addr2 <= addr2;
k <= addr2;
state = BUILD_WRITE;
end
else begin
state = BUILD_READ;
i = i - 1;
end
end
else begin
if(data1 <= data2)begin
w_data1 <= data2;
w_data2 <= data1;
w_addr1 <= addr1;
w_addr2 <= addr2;
state = BUILD_WRITE;
end
else begin
state = BUILD_READ;
i = i - 1;
end
end
re <= 0;
end
end
BUILD_WRITE:begin
we = 1;
state <= UPKEEP_READ;
i = i - 1;
end
UPKEEP_READ:begin
//leaf node
if(k > ADDR_WIDTH / 2 ) state <= BUILD_READ;
else begin
state <= UPKEEP_COMPARED;
re = 1;
end
//disable write
we = 0;
addr1 <= k;
addr2 <= 2 * k;
if(k * 2 + 1 <= ADDR_WIDTH) addr3 <= k * 2 + 1;
else addr3 <= 1;
end
UPKEEP_COMPARED:begin
if(r_ok) begin
if(k * 2 + 1 <= ADDR_WIDTH) begin
if(data1 <= data3 && data2 <= data3) begin
w_data1 <= data3;
w_data2 <= data1;
w_addr1 <= addr1;
w_addr2 <= addr3;
k <= addr3;
state <= UPKEEP_WRITE;
end
else if(data1 <= data2 && data3 < data2) begin
w_data1 <= data2;
w_data2 <= data1;
w_addr1 <= addr1;
w_addr2 <= addr2;
k <= addr2;
state <= UPKEEP_WRITE;
end
else state <= BUILD_READ;
end
else begin
if(data1 <= data2)begin
w_data1 <= data2;
w_data2 <= data1;
w_addr1 <= addr1;
w_addr2 <= addr2;
k <= ADDR_WIDTH;
state <= UPKEEP_WRITE;
end
else state <= BUILD_READ;
end
re <= 0;
end
end
UPKEEP_WRITE:begin
we = 1;
state <= UPKEEP_READ;
end
SORT_READY:begin
we = 0;
//condition of ending sort
if(j) state <= SORT_CHANGE;
else state <= COMPLETED;
re <= 1;
addr1 <= 1;
addr2 <= j;
k <= 1;
end
SORT_CHANGE:begin
if(r_ok)begin
w_data1 <= data2;
w_data2 <= data1;
w_addr1 <= addr1;
w_addr2 <= addr2;
state <= SORT_CWRITE;
re <= 0;
end
end
SORT_CWRITE: begin
we = 1;
state <= SORT_READ;
j = j - 1;
end
SORT_READ:begin
//disable write
we = 0;
if(k <= j / 2) begin
state <= SORT_COMPARED;
addr1 <= k;
addr2 <= 2 * k;
if(2 * k + 1 <= j) addr3 <= 2 * k + 1;
re <= 1;
end
else begin
state <= SORT_READY;
end
end
SORT_COMPARED:begin
// big-node
if(r_ok) begin
if(k * 2 + 1 <= j) begin
if(data1 <= data3 && data2 <= data3) begin
w_data1 <= data3;
w_data2 <= data1;
w_addr1 <= addr1;
w_addr2 <= addr3;
state <= SORT_WRITE;
lf <= 1;
end
else if(data1 <= data2 && data3 < data2) begin
w_data1 <= data2;
w_data2 <= data1;
w_addr1 <= addr1;
w_addr2 <= addr2;
state <= SORT_WRITE;
lf <= 0;
end
else state <= SORT_READY;
end
else begin
if(data1 <= data2)begin
w_data1 <= data2;
w_data2 <= data1;
w_addr1 <= addr1;
w_addr2 <= addr2;
state = SORT_WRITE;
lf <= 0;
end
else state <= SORT_READY;
end
re <= 0;
end
end
SORT_WRITE:begin
we = 1;
state <= SORT_READ;
if(lf) k = k * 2 + 1;
else k = k * 2;
end
COMPLETED:begin
state <= COMPLETED;
end
default:begin
state <= ERROR;
end
endcase
end
end
endmodule
寄存器数组部分代码
module ram
#(
parameter ADDR_WIDTH = 12,
parameter DATA_WIDTH = 8
)
(
input [ADDR_WIDTH-1:0] addr1,addr2,addr3,
input [ADDR_WIDTH-1:0] w_addr1,w_addr2,
input we,clk,re,
input [DATA_WIDTH-1:0] w_data1,w_data2,
output reg [DATA_WIDTH-1:0] r_data1,r_data2,r_data3,
output reg w_ok,r_ok
);
reg [DATA_WIDTH-1:0] data[ADDR_WIDTH:1];
initial begin
$readmemh("data.mem",data); //需要创建对应的文件,用于填充数据
end
always @(posedge clk) begin
if (we ) begin
data[w_addr1] <= w_data1;
data[w_addr2] <= w_data2;
w_ok <= 1;
end
else w_ok <= 0;
if(re) begin
r_data1 <= data[addr1];
r_data2 <= data[addr2];
r_data3 <= data[addr3];
r_ok <= 1;
end
else r_ok <= 0;
end
endmodule
激励模块
module top_tb
#(
parameter ADDR_WIDTH = 12,
parameter DATA_WIDTH = 8
)();
reg clk,rst;
wire [ADDR_WIDTH:1] addr1,addr2,addr3; //read addr
wire [DATA_WIDTH-1:0] data1,data2,data3; //data
wire we; //enable write
wire [DATA_WIDTH-1:0] w_data1,w_data2; //write data
initial begin
clk <= 0;
rst <= 1;
#10;
clk <= 1;
#10;
rst <= 0;
forever begin
clk <= ~clk;
#50;
end
end
top tp(clk,rst,addr1,addr2,addr3,data1,data2,data3,we,w_data1,w_data2);
endmodule
缺陷
实现过程中,最大的缺陷是排序的数不能太多,以为I/O
端口有限文章来源地址https://www.toymoban.com/news/detail-418561.html
到了这里,关于推排序 Verilog实现原理的文章就介绍完了。如果您还想了解更多内容,请在右上角搜索TOY模板网以前的文章或继续浏览下面的相关文章,希望大家以后多多支持TOY模板网!