依據此過程,可以將CPU指令集分爲四類:
(IF:取指令;ID:譯碼;EX:執行; MEM:存取數據;WB:數據寄存)
模式1:
指令:LOAD
IF |
ID |
EX |
MEM |
WB |
模式2:
指令:STORE,JMPR,BZ,BNZ,BN,BNN,BC,BNC
IF |
ID |
EX |
MEM |
NOP |
模式3:
指令:CMP,JUMP,NOP
XXXX |
XXXX |
XXXX |
NOP |
NOP |
模式4:
指令: LDIH,ADD,ADDI,ADDC,SUB,SUBI,SUBC,AND,OR,XOR,SLL,SRL,SLA,SRA
IF |
ID |
EX |
NOP |
WB |
在模式4中流水線第四級沒有任何有建設性的操作,故可以將流水線第五級(WB階段)提前一級(濾過)。當然,在對流水線第五級進行提前時,要判斷上一條指令是否對WB有進行操作,當上一條指令對WB階段有操作時,若此時把流水線第五級提前,則會與上一條指令產生衝突;而對於上一條指令爲模式2的指令,由於當上一條指令處於第四級時,當前指令處於第三級,還未產生旁路變換,從而不會與旁路過程產生衝突。對於模式3,由於對流水線第五級不做操作,所以無需提前。
代碼如下:
`define NOP 5'b00000
`define HALT 5'b00001
`define LOAD 5'b00010
`define STORE 5'b00011
`define LDIH 5'b10000
`define ADD 5'b01000
`define ADDI 5'b01001
`define ADDC 5'b10001
`define SUB 5'b01010
`define SUBI 5'b01011
`define SUBC 5'b10010
`define CMP 5'b01100
//Logical / shift
`define AND 5'b01101
`define OR 5'b01110
`define XOR 5'b01111
`define SLL 5'b00100
`define SRL 5'b00111
`define SLA 5'b00101
`define SRA 5'b00110
//Control
`define JUMP 5'b11000
`define JMPR 5'b11001
`define BZ 5'b11010
`define BNZ 5'b11011
`define BN 5'b11100
`define BNN 5'b11101
`define BC 5'b11110
`define BNC 5'b11111
//Statement
`define idle 1'b0
`define exec 1'b1
/**********************************************************/
module PCPU(
input wire clk, mclk, reset, enable, start,
input wire [15:0] i_datain, d_datain,
output reg d_we,
output reg [7:0] d_addr,// pc,
output reg [15:0] d_dataout,
output wire [7:0] i_addr
);
/**********************************************************/
/**********************************************************/
reg state;
reg [7:0] pc;
reg [15:0] gr[7:0];
reg [15:0] id_ir, wb_ir, mem_ir;
reg [15:0] reg_A, reg_B, smdr=16'b0, ex_ir;
reg [15:0] reg_C, reg_C1;
reg [15:0] smdr1, ALU0;
reg dw, zf, nf, cf, in;
/**********************************************************/
assign i_addr = pc;
/**********************************************************/
//CPU_Control
reg next_state = 1'b0;
always @ (posedge mclk or posedge reset)
begin
if(reset)
state <= `idle;
else
state <= next_state;
end
always @ (*)
begin
case(state)
`idle: begin
if((enable==1'b1)&&(start==1'b1)) next_state <= `exec;
else next_state <= `idle;
end
`exec: begin
if((enable==1'b0)||(wb_ir[15:11]==`HALT)) next_state <= `idle;
else next_state <= `exec;
end
default: next_state <= `idle;
endcase
end
/*******************************************************************************/
/*******************************************************************************/
//CPU_IF
always @ (posedge mclk or posedge reset)
begin
if(reset)
begin
id_ir <= 16'b0;
pc <= 8'b0;
end
else if(state == `exec)
begin
if(((mem_ir[15:11]==`BZ)&&(zf==1'b1))||((mem_ir[15:11]==`BNZ)&&(zf==1'b0))
||((mem_ir[15:11]==`BN)&&(nf==1'b1))||((mem_ir[15:11]==`BNN)&&(nf==1'b0))
||((mem_ir[15:11]==`BC)&&(cf==1'b1))||((mem_ir[15:11]==`BNC)&&(cf==1'b0))||(mem_ir[15:11]==`JMPR))
begin
pc <= reg_C[7:0];
id_ir <= i_datain;
end
else if(id_ir[15:11]==`JUMP)
begin
pc <= id_ir[7:0];
id_ir <= i_datain;
end
/*****************************Operation of "LOAD"*****************************/
else if ((id_ir[15:11]==`LOAD)&&(i_datain[15:11]!=`JUMP)&&(i_datain[15:11]!=`NOP)
&&(i_datain[15:11]!=`HALT)&&(i_datain[15:11]!=`LOAD))
begin
/*********r1*********/
if((id_ir[10:8]==i_datain[2:0])&&((i_datain[15:11]==`ADD)||(i_datain[15:11]==`ADDC)
||(i_datain[15:11]==`SUB)||(i_datain[15:11]==`SUBC)||(i_datain[15:11]==`CMP)
||(i_datain[15:1]==`AND)||(i_datain[15:11]==`OR)||(i_datain[15:11]==`XOR)))
begin
pc <= pc;
id_ir <= 16'bx;
end
/*********r2*********/
else if((id_ir[10:8]==i_datain[6:4])&&((i_datain[15:11]==`STORE)||(i_datain[15:11]==`ADD)||(i_datain[15:11]==`ADDC)
||(i_datain[15:11]==`SUB)||(i_datain[15:11]==`SUBC)||(i_datain[15:11]==`CMP)||(i_datain[15:1]==`AND)
||(i_datain[15:11]==`OR)||(i_datain[15:11]==`XOR)||(i_datain[15:11]==`SLL)||(i_datain[15:11]==`SRL)
||(i_datain[15:11]==`SLA)||(i_datain[15:11]==`SRA))) //r2
begin
pc <= pc;
id_ir <= 16'bx;
end
/*********r3*********/
else if((id_ir[10:8]==i_datain[10:8])&&((i_datain[15:11]==`STORE)||(i_datain[15:11]==`LDIH)
||(i_datain[15:11]==`ADDI)||(i_datain[15:11]==`SUBI)||(i_datain[15:11]==`JMPR)||(i_datain[15:11]==`BZ)
||(i_datain[15:11]==`BNZ)||(i_datain[15:11]==`BN)||(i_datain[15:11]==`BNN)||(i_datain[15:11]==`BC)
||(i_datain[15:11]==`BNC)))
begin
pc <= pc;
id_ir <= 16'bx;
end
end
else
begin
pc <= pc + 1'b1;
id_ir <= i_datain;
end
end
else if(state == `idle)
pc <= pc;
end
/*******************************************************************************/
/*******************************************************************************/
//CPU_ID
always @ (posedge mclk or posedge reset)
begin
if(reset)
begin
reg_A <= 16'b0;
reg_B <= 16'b0;
smdr <= 16'b0;
ex_ir <= 16'b0;
end
/*******************************************************************************/
else if(state == `exec)
begin
ex_ir <= id_ir;
/*********************Hazard for reg_A*********************/
/****************************************************************************/
if((id_ir[15:11]==`BZ)||(id_ir[15:11]==`BNZ)||(id_ir[15:11]==`BN)||(id_ir[15:11]==`BNN)
||(id_ir[15:11]==`BC)||(id_ir[15:11]==`BNC)||(id_ir[15:11]==`ADDI)||(id_ir[15:11]==`SUBI)
||(id_ir[15:11]==`LDIH)||(id_ir[15:11]==`JMPR))
begin
if((id_ir[10:8]==ex_ir[10:8])&&(ex_ir[15:11]!=`NOP)&&(ex_ir[15:11]!=`HALT)&&(ex_ir[15:11]!=`LOAD)
&&(ex_ir[15:11]!=`CMP)&&(ex_ir[15:11]!=`JUMP))
begin reg_A <= ALU0; end
else if((id_ir[10:8]==mem_ir[10:8])&&(mem_ir[15:11]!=`NOP)&&(mem_ir[15:11]!=`HALT)
&&(mem_ir[15:11]!=`CMP)&&(mem_ir[15:11]!=`JUMP))
begin
if(mem_ir[15:11]==`LOAD) reg_A <= d_datain;
else reg_A <= reg_C;
end
else if((id_ir[10:8]==wb_ir[10:8])&&(wb_ir[15:11]!=`NOP)&&(wb_ir[15:11]!=`HALT)
&&(wb_ir[15:11]!=`CMP)&&(wb_ir[15:11]!=`JUMP))
begin reg_A <= reg_C1; end
else
begin reg_A <= gr[id_ir[10:8]]; end //r1
end
else if((id_ir[15:11]==`LOAD)||(id_ir[15:11]==`STORE)||(id_ir[15:11]==`ADD)||(id_ir[15:11]==`ADDC)
||(id_ir[15:11]==`SUB)||(id_ir[15:11]==`SUBC)||(id_ir[15:11]==`CMP)||(id_ir[15:11]==`AND)||(id_ir[15:11]==`OR)
||(id_ir[15:11]==`XOR)||(id_ir[15:11]==`SLL)||(id_ir[15:11]==`SRL)||(id_ir[15:11]==`SLA)||(id_ir[15:11]==`SRA))
begin
if((id_ir[6:4]==ex_ir[10:8])&&(ex_ir[15:11]!=`NOP)&&(ex_ir[15:11]!=`HALT)&&(ex_ir[15:11]!=`LOAD)
&&(ex_ir[15:11]!=`CMP)&&(ex_ir[15:11]!=`JUMP))
begin reg_A <= ALU0; end
else if((id_ir[6:4]==mem_ir[10:8])&&(mem_ir[15:11]!=`NOP)&&(mem_ir[15:11]!=`HALT)
&&(mem_ir[15:11]!=`CMP)&&(mem_ir[15:11]!=`JUMP))
begin
if(mem_ir[15:11]==`LOAD) reg_A <= d_datain;
else reg_A <= reg_C;
end
else if((id_ir[6:4]==wb_ir[10:8])&&((wb_ir[15:11]!=`NOP)&&(wb_ir[15:11]!=`HALT)
&&(wb_ir[15:11]!=`CMP)&&(wb_ir[15:11]!=`JUMP)))
begin reg_A <= reg_C1; end
else
begin reg_A <= gr[id_ir[6:4]]; end //r2
end
else if(((mem_ir[15:11]==`BZ)&&(zf==1'b1))||((mem_ir[15:11]==`BNZ)&&(zf==1'b0))
||((mem_ir[15:11]==`BN)&&(nf==1'b1))||((mem_ir[15:11]==`BNN)&&(nf==1'b0))
||((mem_ir[15:11]==`BC)&&(cf==1'b1))||((mem_ir[15:11]==`BNC)&&(nf==1'b0))||(mem_ir[15:11]==`JMPR))
begin reg_A <= 16'b0; end
/*********************Hazard for reg_B*********************/
/****************************************************************************/
if(id_ir[15:11]==`LDIH)
begin reg_B <= {id_ir[7:0], 8'b0000_0000}; end
else if((id_ir[15:11]==`STORE)||(id_ir[15:11]==`LOAD)||(id_ir[15:11]==`SLL)||(id_ir[15:11]==`SRL)
||(id_ir[15:11]==`SLA)||(id_ir[15:11]==`SRA))
begin reg_B <= {12'b0000_0000_0000, id_ir[3:0]}; end
else if((id_ir[15:11]==`BZ)||(id_ir[15:11]==`BNZ)||(id_ir[15:11]==`BN)||(id_ir[15:11]==`BNN)
||(id_ir[15:11]==`BC)||(id_ir[15:11]==`BNC)||(id_ir[15:11]==`ADDI)||(id_ir[15:11]==`SUBI)
||(id_ir[15:11]==`JUMP)||(id_ir[15:11]==`JMPR))
begin reg_B <= {8'b0000_0000, id_ir[7:0]}; end
else if((id_ir[15:11]==`ADD)||(id_ir[15:11]==`ADDC)||(id_ir[15:11]==`SUB)||(id_ir[15:11]==`SUBC)
||(id_ir[15:11]==`CMP)||(id_ir[15:11]==`AND)||(id_ir[15:11]==`OR)||(id_ir[15:11]==`XOR))
begin
if((id_ir[2:0]==ex_ir[10:8])&&((ex_ir[15:11]!=`NOP)&&(ex_ir[15:11]!=`HALT)&&(ex_ir[15:11]!=`LOAD)
&&(ex_ir[15:11]!=`CMP)&&(ex_ir[15:11]!=`JUMP)))
begin reg_B <= ALU0; end
else if((id_ir[2:0]==mem_ir[10:8])&&((mem_ir[15:11]!=`NOP)&&(mem_ir[15:11]!=`HALT)
&&(mem_ir[15:11]!=`CMP)&&(mem_ir[15:11]!=`JUMP)))
begin
if(mem_ir[15:11]==`LOAD) reg_B <= d_datain;
else reg_B <= reg_C;
end
else if((id_ir[2:0]==wb_ir[10:8])&&((wb_ir[15:11]!=`NOP)&&(wb_ir[15:11]!=`HALT)
&&(wb_ir[15:11]!=`CMP)&&(wb_ir[15:11]!=`JUMP)))
begin reg_B <= reg_C1; end
else
begin reg_B <= gr[id_ir[2:0]]; end //r3
end
else if(((mem_ir[15:11]==`BZ)&&(zf==1'b1))||((mem_ir[15:11]==`BNZ)&&(zf==1'b0))
||((mem_ir[15:11]==`BN)&&(nf==1'b1))||((mem_ir[15:11]==`BNN)&&(nf==1'b0))
||((mem_ir[15:11]==`BC)&&(cf==1'b1))||((mem_ir[15:11]==`BNC)&&(nf==1'b0))||(mem_ir[15:11]==`JMPR))
begin reg_B <= 16'b0; end
/*********************Get smdr*********************/
/****************************************************************************/
if(id_ir[15:11]==`STORE)
begin
if((id_ir[10:8]==ex_ir[10:8])&&((ex_ir[15:11]!=`NOP)&&(ex_ir[15:11]!=`HALT)&&(ex_ir[15:11]!=`LOAD)
&&(ex_ir[15:11]!=`CMP)&&(ex_ir[15:11]!=`JUMP)))
begin smdr <= ALU0; end
else if((id_ir[10:8]==mem_ir[10:8])&&((mem_ir[15:11]!=`NOP)&&(mem_ir[15:11]!=`HALT)
&&(mem_ir[15:11]!=`CMP)&&(mem_ir[15:11]!=`JUMP)))
begin
if(mem_ir[15:11]==`LOAD) smdr <= d_datain;
else smdr <= reg_C;
end
else if((id_ir[10:8]==wb_ir[10:8])&&((wb_ir[15:11]!=`NOP)&&(wb_ir[15:11]!=`HALT)
&&(wb_ir[15:11]!=`CMP)&&(wb_ir[15:11]!=`JUMP)))
begin smdr <= reg_C1; end
else
begin smdr <= gr[id_ir[10:8]]; end
end
/*******************************************************************************/
end
end
/*******************************************************************************/
/*******************************************************************************/
//CPU_EX
always @ (posedge mclk or posedge reset)
begin
if(reset)
begin
dw <= 1'b0;
zf <= 1'b0;
nf <= 1'b0;
mem_ir <= 16'b0;
reg_C <= 16'b0;
smdr1 <= 16'b0;
end
else if(state == `exec)
begin
/********************* PASS SELECT *********************/
if(!can_be_passed)
begin
reg_C <= ALU0;
mem_ir <= ex_ir;
smdr1 <= smdr;
end
/************************* END *************************/
if((ex_ir[15:11]==`LDIH)||(ex_ir[15:11]==`ADD)||(ex_ir[15:11]==`ADDI)||(ex_ir[15:11]==`ADDC)
||(ex_ir[15:11]==`SUB)||(ex_ir[15:11]==`SUBI)||(ex_ir[15:11]==`SUBC)||(ex_ir[15:11]==`CMP)
||(ex_ir[15:11]==`AND)||(ex_ir[15:11]==`OR)||(ex_ir[15:11]==`XOR)||(ex_ir[15:11]==`SLL)
||(ex_ir[15:11]==`SRL)||(ex_ir[15:11]==`SLA)||(ex_ir[15:11]==`SRA))
begin
//Get zf
if(ALU0==16'b0) zf <= 1'b1;
else zf <= 1'b0;
//Get nf
if(ALU0[15]==1'b1) nf <= 1'b1;
else nf <= 1'b0;
end
else begin zf <= zf; nf <= nf; end
if(ex_ir[15:11] == `STORE)
begin
dw <= 1'b1;
end
else begin dw <= 1'b0; end
end
end
/*******************************************************************************/
/*******************************************************************************/
//CPU_ALU
reg signed [15:0] A;
always @ (reg_A)
begin
A = reg_A;
end
always @ (reg_A or reg_B or ex_ir[15:11])
begin
if(reset) {cf, ALU0} <= 17'b0;
else
case(ex_ir[15:11])
`LOAD: {cf, ALU0} <= reg_A + reg_B;
`STORE: {cf, ALU0} <= reg_A + reg_B;
`LDIH: {cf, ALU0} <= reg_A + reg_B;
`ADD: {cf, ALU0} <= reg_A + reg_B;
`ADDI: {cf, ALU0} <= reg_A + reg_B;
`ADDC: {cf, ALU0} <= reg_A + reg_B + in;
`SUB: {cf, ALU0} <= reg_A - reg_B;
`SUBI: {cf, ALU0} <= reg_A - reg_B;
`SUBC: {cf, ALU0} <= reg_A - reg_B - in;
`CMP: {cf, ALU0} <= reg_A - reg_B;
`AND: ALU0 <= (reg_A & reg_B);
`OR: ALU0 <= (reg_A | reg_B);
`XOR: ALU0 <= (reg_A ^ reg_B);
`SLL: ALU0 <= (reg_A << reg_B[3:0]);
`SRL: ALU0 <= (reg_A >> reg_B[3:0]);
`SLA: ALU0 <= (A <<< reg_B[3:0]);
`SRA: ALU0 <= (A >>> reg_B[3:0]);
`JMPR: {cf, ALU0} <= reg_A + reg_B;
`BZ: {cf, ALU0} <= reg_A + reg_B;
`BNZ: {cf, ALU0} <= reg_A + reg_B;
`BN: {cf, ALU0} <= reg_A + reg_B;
`BNN: {cf, ALU0} <= reg_A + reg_B;
`BC: {cf, ALU0} <= reg_A + reg_B;
`BNC: {cf, ALU0} <= reg_A + reg_B;
default: begin cf <= cf; ALU0 <= ALU0; end
endcase
end
/*******************************************************************************/
/************************** PASE OR NO **************************/
reg can_be_passed;
always @ (posedge mclk or posedge reset)
begin
if(reset)
begin can_be_passed <= 1'b0; end
else if((ex_ir[15:11]==`LDIH)||(ex_ir[15:11]==`ADD)||(ex_ir[15:11]==`ADDI)
||(ex_ir[15:11]==`ADDC)||(ex_ir[15:11]==`SUB)||(ex_ir[15:11]==`SUBI)||(ex_ir[15:11]==`SUBC)
||(ex_ir[15:11]==`AND)||(ex_ir[15:11]==`OR)||(ex_ir[15:11]==`XOR)||(ex_ir[15:11]==`SLL)
||(ex_ir[15:11]==`SRL)||(ex_ir[15:11]==`SLA)||(ex_ir[15:11]==`SRA))
begin
if(mem_ir[15:11] == `LOAD)
begin can_be_passed <= 1'b0; end
else
begin can_be_passed <= 1'b1; end
end
else
begin can_be_passed <= 1'b0; end
end
/************************** END **************************/
/*******************************************************************************/
//CPU_MEM
always @ (posedge mclk or posedge reset)
begin
if(reset)
begin
in <= 1'b0;
wb_ir <= 16'b0;
reg_C1 <= 16'b0;
d_we <= 1'b0;
d_addr <= 8'b0;
d_dataout <= 16'b0;
end
else if(state == `exec)
begin
in <= cf;
d_we <= dw;
d_addr <= reg_C[7:0];
d_dataout <= smdr1;
/********************* PASS SELECT *********************/
if(can_be_passed) wb_ir <= ex_ir;
else wb_ir <= mem_ir;
if(mem_ir[15:11]==`LOAD) reg_C1 <= d_datain;
else if(can_be_passed) reg_C1 <= ALU0;
else reg_C1 <= reg_C;
/************************* END *************************/
end
end
/*******************************************************************************/
/*******************************************************************************/
//CPU_WB
always @ (posedge mclk or posedge reset)
begin
if(reset)
begin
gr[0] <= 16'b0;
gr[1] <= 16'b0;
gr[2] <= 16'b0;
gr[3] <= 16'b0;
gr[4] <= 16'b0;
gr[5] <= 16'b0;
gr[6] <= 16'b0;
gr[7] <= 16'b0;
end
else if(state == `exec)
begin
if((wb_ir[15:11]==`LOAD)||(wb_ir[15:11]==`LDIH)||(wb_ir[15:11]==`ADD)||(wb_ir[15:11]==`ADDI)
||(wb_ir[15:11]==`ADDC)||(wb_ir[15:11]==`SUB)||(wb_ir[15:11]==`SUBI)||(wb_ir[15:11]==`SUBC)
||(wb_ir[15:11]==`AND)||(wb_ir[15:11]==`OR)||(wb_ir[15:11]==`XOR)||(wb_ir[15:11]==`SLL)
||(wb_ir[15:11]==`SRL)||(wb_ir[15:11]==`SLA)||(wb_ir[15:11]==`SRA))
begin gr[wb_ir[10:8]] <= reg_C1; end
end
end
/*******************************************************************************/
/********************************************************************************/
endmodule
優化前後的XPOWER測試結果比較:
使用“By Passing”優化後動態功率減小爲優化前的55%。