Skip to content

Commit e7a39d9

Browse files
khovgsermazz
authored andcommitted
[hardware] Implement QLR module in Snitch
1 parent 58d6ecc commit e7a39d9

File tree

10 files changed

+1156
-33
lines changed

10 files changed

+1156
-33
lines changed

Bender.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ sources:
3535
- hardware/src/axi2mem.sv
3636
- hardware/src/bootrom.sv
3737
- hardware/src/latch_scm.sv
38+
- hardware/src/snitch_qlr.sv
39+
- hardware/src/snitch_qlr_group.sv
3840
# Level 1
3941
- hardware/src/mempool_tile.sv
4042
# Level 2

config/config.mk

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,12 @@ xqueue ?= 0
6565
# XQueue extension's queue size in each memory bank (in words)
6666
xqueue_size ?= 0
6767

68+
#########################
69+
## QLR configuration ##
70+
#########################
71+
72+
qlr_fifo_size ?= 0
73+
6874
################################
6975
## Optional functionalities ##
7076
################################

config/systolic.mk

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,11 @@ xqueue ?= 1
3939
# - software queues emulation (size measured in queue entries)
4040
# - hardware xqueue's queue in each memory bank (size measured in words)
4141
xqueue_size ?= 4
42+
43+
#########################
44+
## QLR configuration ##
45+
#########################
46+
47+
qlr_fifo_size ?= 4
48+
qlr_max_requests ?= 4095
49+
qlr_max_rf_reads ?= 7

hardware/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ vlog_defs += -DRO_LINE_WIDTH=$(ro_line_width)
9696
vlog_defs += -DDMAS_PER_GROUP=$(dmas_per_group)
9797
vlog_defs += -DAXI_HIER_RADIX=$(axi_hier_radix) -DAXI_MASTERS_PER_GROUP=$(axi_masters_per_group)
9898
vlog_defs += -DSEQ_MEM_SIZE=$(seq_mem_size) -DXQUEUE=$(xqueue) -DXQUEUE_SIZE=$(xqueue_size)
99+
vlog_defs += -DQLR_FIFO_SIZE=$(qlr_fifo_size) -DQLR_MAX_REQUESTS=$(qlr_max_requests) -DQLR_MAX_RF_READS=$(qlr_max_rf_reads)
99100

100101
# Traffic generation enabled
101102
ifdef tg

hardware/deps/snitch/src/snitch.sv

Lines changed: 144 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ module snitch
1919
parameter bit RVE = 0, // Reduced-register Extension
2020
parameter bit RVM = 1, // Enable IntegerMmultiplication & Division Extension
2121
parameter int RegNrWritePorts = 2, // Implement one or two write ports into the register file
22-
parameter bit Xqueue = 0
22+
parameter bit Xqueue = 0,
23+
parameter bit Qlr = 0
2324
) (
2425
input logic clk_i,
2526
input logic rst_i,
@@ -249,6 +250,10 @@ module snitch
249250
logic dst_ready;
250251
logic opa_ready, opb_ready, opc_ready;
251252
logic dstrd_ready, dstrs1_ready;
253+
logic [2**RegWidth-1:0] sb;
254+
logic [3:0] qlr_sb_enabled;
255+
logic [3:0] qlr_sb;
256+
logic [3:0] qlr_ready;
252257

253258
always_comb begin
254259
sb_d = sb_q;
@@ -258,15 +263,41 @@ module snitch
258263
if (retire_acc) sb_d[acc_pid_i[RegWidth-1:0]] = 1'b0;
259264
sb_d[0] = 1'b0;
260265
end
261-
// TODO(zarubaf): This can probably be described a bit more efficient
262-
assign opa_ready = (opa_select != Reg) | ~sb_q[rs1];
263-
assign opb_ready = ((opb_select != Reg & opb_select != SImmediate) | ~sb_q[rs2]) & ((opb_select != RegRd) | ~sb_q[rd]);
264-
assign opc_ready = ((opc_select != Reg) | ~sb_q[rd]) & ((opc_select != RegRs2) | ~sb_q[rs2]);
265-
assign operands_ready = opa_ready & opb_ready & opc_ready;
266-
// either we are not using the destination register or we need to make
267-
// sure that its destination operand is not marked busy in the scoreboard.
268-
assign dstrd_ready = ~uses_rd | (uses_rd & ~sb_q[rd]);
269-
assign dstrs1_ready = ~uses_rs1 | (uses_rs1 & ~sb_q[rs1]);
266+
267+
if (Qlr) begin: gen_sb_qlr
268+
// select internal scoreboard or scoreboard provided by QLR
269+
// TODO(khovg): Make the assigned QLR register tags configurable
270+
always_comb begin
271+
for (int ii = 0; ii < 2**RegWidth; ii++) begin
272+
unique case (ii)
273+
5: sb[ii] = qlr_sb_enabled[0] ? qlr_sb[0] : (sb_q[ii] | ~qlr_ready[0]);
274+
6: sb[ii] = qlr_sb_enabled[1] ? qlr_sb[1] : (sb_q[ii] | ~qlr_ready[1]);
275+
7: sb[ii] = qlr_sb_enabled[2] ? qlr_sb[2] : (sb_q[ii] | ~qlr_ready[2]);
276+
28: sb[ii] = qlr_sb_enabled[3] ? qlr_sb[3] : (sb_q[ii] | ~qlr_ready[3]);
277+
default: sb[ii] = sb_q[ii];
278+
endcase
279+
end
280+
end
281+
assign opa_ready = (opa_select != Reg) | ~sb[rs1];
282+
assign opb_ready = ((opb_select != Reg & opb_select != SImmediate) | ~sb[rs2]) & ((opb_select != RegRd) | ~sb[rd]);
283+
assign opc_ready = ((opc_select != Reg) | ~sb[rd]) & ((opc_select != RegRs2) | ~sb[rs2]);
284+
assign operands_ready = opa_ready & opb_ready & opc_ready;
285+
// either we are not using the destination register or we need to make
286+
// sure that its destination operand is not marked busy in the scoreboard.
287+
assign dstrd_ready = ~uses_rd | (uses_rd & ~sb[rd]);
288+
assign dstrs1_ready = ~uses_rs1 | (uses_rs1 & ~sb[rs1]);
289+
end else begin: gen_sb
290+
// TODO(zarubaf): This can probably be described a bit more efficient
291+
assign opa_ready = (opa_select != Reg) | ~sb_q[rs1];
292+
assign opb_ready = ((opb_select != Reg & opb_select != SImmediate) | ~sb_q[rs2]) & ((opb_select != RegRd) | ~sb_q[rd]);
293+
assign opc_ready = ((opc_select != Reg) | ~sb_q[rd]) & ((opc_select != RegRs2) | ~sb_q[rs2]);
294+
assign operands_ready = opa_ready & opb_ready & opc_ready;
295+
// either we are not using the destination register or we need to make
296+
// sure that its destination operand is not marked busy in the scoreboard.
297+
assign dstrd_ready = ~uses_rd | (uses_rd & ~sb_q[rd]);
298+
assign dstrs1_ready = ~uses_rs1 | (uses_rs1 & ~sb_q[rs1]);
299+
end
300+
270301
assign dst_ready = dstrd_ready & dstrs1_ready;
271302

272303
assign valid_instr = (inst_ready_i & inst_valid_o) & operands_ready & dst_ready;
@@ -1584,29 +1615,115 @@ module snitch
15841615
end
15851616
/* verilator lint_on WIDTH */
15861617

1618+
// --------------------
1619+
// QLR
1620+
// --------------------
1621+
// intermediate signals (QLRs <-> LSU)
1622+
logic [RegWidth-1:0] lsu_req_tag;
1623+
logic lsu_req_write;
1624+
logic lsu_req_signed;
1625+
logic [31:0] lsu_req_addr;
1626+
logic [31:0] lsu_req_data;
1627+
logic [1:0] lsu_req_size;
1628+
logic [3:0] lsu_req_amo;
1629+
logic lsu_req_qlr;
1630+
logic lsu_req_valid;
1631+
logic lsu_req_ready;
1632+
logic [31:0] lsu_resp_data;
1633+
logic [RegWidth-1:0] lsu_resp_tag;
1634+
logic lsu_resp_error;
1635+
logic lsu_resp_qlr;
1636+
logic lsu_resp_valid;
1637+
logic lsu_resp_ready;
1638+
1639+
// instruction usage bits
1640+
logic instr_reads_rs1, instr_reads_rs2, instr_reads_rd;
1641+
1642+
// group of QLRs
1643+
if (Qlr) begin: gen_qlr_group
1644+
snitch_qlr_group #(
1645+
.NumWritePorts ( RegNrWritePorts ),
1646+
.tag_t ( logic[RegWidth-1:0] )
1647+
) i_qlr_group (
1648+
.clk_i ,
1649+
.rst_ni ( ~rst_i ),
1650+
.lsu_qtag_i ( rd ),
1651+
.lsu_qwrite_i ( is_store ),
1652+
.lsu_qsigned_i ( is_signed ),
1653+
.lsu_qaddr_i ( lsu_qaddr ),
1654+
.lsu_qdata_i ( gpr_rdata[1] ),
1655+
.lsu_qsize_i ( ls_size ),
1656+
.lsu_qamo_i ( ls_amo ),
1657+
.lsu_qvalid_i ( lsu_qvalid ),
1658+
.lsu_qready_o ( lsu_qready ),
1659+
.lsu_out_tag_o ( lsu_req_tag ),
1660+
.lsu_out_write_o ( lsu_req_write ),
1661+
.lsu_out_signed_o ( lsu_req_signed ),
1662+
.lsu_out_addr_o ( lsu_req_addr ),
1663+
.lsu_out_data_o ( lsu_req_data ),
1664+
.lsu_out_size_o ( lsu_req_size ),
1665+
.lsu_out_amo_o ( lsu_req_amo ),
1666+
.lsu_out_qlr_o ( lsu_req_qlr ),
1667+
.lsu_out_valid_o ( lsu_req_valid ),
1668+
.lsu_out_ready_i ( lsu_req_ready ),
1669+
.lsu_in_data_i ( lsu_resp_data ),
1670+
.lsu_in_tag_i ( lsu_resp_tag ),
1671+
.lsu_in_error_i ( lsu_resp_error ),
1672+
.lsu_in_qlr_i ( lsu_resp_qlr ),
1673+
.lsu_in_valid_i ( lsu_resp_valid ),
1674+
.lsu_in_ready_o ( lsu_resp_ready ),
1675+
.lsu_pdata_o ( ld_result ),
1676+
.lsu_ptag_o ( lsu_rd ),
1677+
.lsu_perror_o ( ), // ignored for the moment
1678+
.lsu_pvalid_o ( lsu_pvalid ),
1679+
.lsu_pready_i ( lsu_pready ),
1680+
.instr_rs1_i ( rs1 ),
1681+
.instr_rs2_i ( rs2 ),
1682+
.instr_rd_i ( rd ),
1683+
.instr_reads_rs1_i ( instr_reads_rs1 ),
1684+
.instr_reads_rs2_i ( instr_reads_rs2 ),
1685+
.instr_reads_rd_i ( instr_reads_rd ),
1686+
.instr_executed_i ( ~stall ),
1687+
.qlr_sb_enabled_o ( qlr_sb_enabled ),
1688+
.qlr_sb_o ( qlr_sb ),
1689+
.rf_in_data_i ( gpr_wdata ),
1690+
.rf_in_tag_i ( gpr_waddr ),
1691+
.rf_in_vld_i ( gpr_we ),
1692+
.qlr_ready_o ( qlr_ready )
1693+
);
1694+
1695+
// instruction decoding
1696+
assign instr_reads_rs1 = (opa_select == Reg);
1697+
assign instr_reads_rs2 = (opb_select inside {Reg, SImmediate}) | (opc_select == RegRs2);
1698+
assign instr_reads_rd = (opb_select == RegRd) | (opc_select == Reg);
1699+
end
1700+
15871701
// --------------------
15881702
// LSU
15891703
// --------------------
15901704
snitch_lsu #(
15911705
.tag_t ( logic[RegWidth-1:0] ),
1592-
.NumOutstandingLoads ( snitch_pkg::NumIntOutstandingLoads )
1706+
.NumOutstandingLoads ( snitch_pkg::NumIntOutstandingLoads ),
1707+
.Qlr ( Qlr )
15931708
) i_snitch_lsu (
1594-
.clk_i ,
1595-
.rst_i ,
1596-
.lsu_qtag_i ( rd ),
1597-
.lsu_qwrite ( is_store ),
1598-
.lsu_qsigned ( is_signed ),
1599-
.lsu_qaddr_i ( lsu_qaddr ),
1600-
.lsu_qdata_i ( gpr_rdata[1] ),
1601-
.lsu_qsize_i ( ls_size ),
1602-
.lsu_qamo_i ( ls_amo ),
1603-
.lsu_qvalid_i ( lsu_qvalid ),
1604-
.lsu_qready_o ( lsu_qready ),
1605-
.lsu_pdata_o ( ld_result ),
1606-
.lsu_ptag_o ( lsu_rd ),
1607-
.lsu_perror_o ( ), // ignored for the moment
1608-
.lsu_pvalid_o ( lsu_pvalid ),
1609-
.lsu_pready_i ( lsu_pready ),
1709+
.clk_i ,
1710+
.rst_i ,
1711+
.lsu_qtag_i ( lsu_req_tag ),
1712+
.lsu_qwrite ( lsu_req_write ),
1713+
.lsu_qsigned ( lsu_req_signed ),
1714+
.lsu_qaddr_i ( lsu_req_addr ),
1715+
.lsu_qdata_i ( lsu_req_data ),
1716+
.lsu_qsize_i ( lsu_req_size ),
1717+
.lsu_qamo_i ( lsu_req_amo ),
1718+
.lsu_qqlr_i ( lsu_req_qlr ),
1719+
.lsu_qvalid_i ( lsu_req_valid ),
1720+
.lsu_qready_o ( lsu_req_ready ),
1721+
.lsu_pdata_o ( lsu_resp_data ),
1722+
.lsu_ptag_o ( lsu_resp_tag ),
1723+
.lsu_perror_o ( lsu_resp_error ),
1724+
.lsu_pqlr_o ( lsu_resp_qlr ),
1725+
.lsu_pvalid_o ( lsu_resp_valid ),
1726+
.lsu_pready_i ( lsu_resp_ready ),
16101727
.data_qaddr_o ,
16111728
.data_qwrite_o ,
16121729
.data_qdata_o ,

hardware/deps/snitch/src/snitch_lsu.sv

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ module snitch_lsu
1313
parameter type tag_t = logic [4:0],
1414
parameter int unsigned NumOutstandingLoads = 1,
1515
parameter bit NaNBox = 0,
16+
parameter bit Qlr = 0,
1617
// Dependent parameters. DO NOT CHANGE.
1718
localparam int unsigned IdWidth = idx_width(NumOutstandingLoads)
1819
) (
@@ -26,12 +27,14 @@ module snitch_lsu
2627
input logic [31:0] lsu_qdata_i,
2728
input logic [1:0] lsu_qsize_i,
2829
input logic [3:0] lsu_qamo_i,
30+
input logic lsu_qqlr_i,
2931
input logic lsu_qvalid_i,
3032
output logic lsu_qready_o,
3133
// response channel
3234
output logic [31:0] lsu_pdata_o,
3335
output tag_t lsu_ptag_o,
3436
output logic lsu_perror_o,
37+
output logic lsu_pqlr_o,
3538
output logic lsu_pvalid_o,
3639
input logic lsu_pready_i,
3740
// Memory Interface Channel
@@ -62,6 +65,7 @@ module snitch_lsu
6265
logic sign_ext;
6366
logic [1:0] offset;
6467
logic [1:0] size;
68+
logic qlr;
6569
} metadata_t;
6670

6771
// ----------------
@@ -108,7 +112,8 @@ module snitch_lsu
108112
tag: lsu_qtag_i,
109113
sign_ext: lsu_qsigned,
110114
offset: lsu_qaddr_i[1:0],
111-
size: lsu_qsize_i
115+
size: lsu_qsize_i,
116+
qlr: lsu_qqlr_i
112117
};
113118

114119
assign resp_metadata = metadata_q[resp_id];
@@ -187,6 +192,9 @@ module snitch_lsu
187192
assign lsu_ptag_o = resp_metadata.tag;
188193
assign lsu_pvalid_o = data_pvalid_i && !resp_metadata.write;
189194
assign data_pready_o = lsu_pready_i || resp_metadata.write;
195+
if (Qlr) begin
196+
assign lsu_pqlr_o = resp_metadata.qlr;
197+
end
190198

191199
// ----------------
192200
// SEQUENTIAL

hardware/src/mempool_cc.sv

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -57,11 +57,12 @@ module mempool_cc
5757

5858
// Snitch Integer Core
5959
snitch #(
60-
.BootAddr ( BootAddr ),
61-
.MTVEC ( MTVEC ),
62-
.RVE ( RVE ),
63-
.RVM ( RVM ),
64-
.Xqueue ( mempool_pkg::Xqueue )
60+
.BootAddr ( BootAddr ),
61+
.MTVEC ( MTVEC ),
62+
.RVE ( RVE ),
63+
.RVM ( RVM ),
64+
.Xqueue ( mempool_pkg::Xqueue ),
65+
.Qlr ( mempool_pkg::QlrEnabled )
6566
) i_snitch (
6667
.clk_i ,
6768
.rst_i ,

hardware/src/mempool_pkg.sv

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,23 @@ package mempool_pkg;
264264
// Size of xqueues in words (must be a power of two)
265265
localparam int unsigned XQueueSize = `ifdef XQUEUE_SIZE `XQUEUE_SIZE `else 0 `endif;
266266

267+
/********************
268+
* QLR PARAMETERS *
269+
********************/
270+
271+
// Design constants
272+
localparam int unsigned NumQlrsPerCore = 4;
273+
localparam logic [31:0] QlrConfigMask = 32'b0100_0000_0000_0001_????_????_????_????;
274+
275+
// DO NOT CHANGE: Assigned register tags (t0 = x5, t1 = x6, t2 = x7, t3 = x28)
276+
localparam logic [NumQlrsPerCore-1:0][4:0] QlrTags = {5'(28), 5'(7), 5'(6), 5'(5)};
277+
278+
// Configurable parameters
279+
localparam int unsigned QlrFifoSize = `ifdef QLR_FIFO_SIZE `QLR_FIFO_SIZE `else 0 `endif;
280+
localparam int unsigned QlrMaxRequests = `ifdef QLR_MAX_REQUESTS `QLR_MAX_REQUESTS `else 0 `endif;
281+
localparam int unsigned QlrMaxRfReads = `ifdef QLR_MAX_RF_READS `QLR_MAX_RF_READS `else 0 `endif;
282+
localparam bit QlrEnabled = !QlrFifoSize;
283+
267284
/*****************
268285
* ADDRESS MAP *
269286
*****************/

0 commit comments

Comments
 (0)