Jakob Viketoft
2014-11-17 09:41:27 UTC
Hello!
We have developed a new memory controller to use with the openrisc core, but using it in Linux caused a bus error. After lots of digging trying to find the fault with the new memory controller, it turns out that there is a bug in the or1200 exception handling. I don't know exactly why only the new memory controller uncovered this bug, but when looking at the present code I'm a bit puzzled as to why it didn't seem to occur for anyone else.
I don't know if everyone else stopped using this core, but I thought it would be good to put the fix out there if someone else could benefit from it and also perhaps to get some more eyes onto whether this was the right fix or not. Since the bugzilla that's been used before seem to have gone off the deep end, I post this in this e-mail in the hope it's visible enough.
I would also like to add a few words of how I see the nature of the bug. When an ITLB miss occur, the internal icpu_err signal is held high which in turn trigger the ITLB miss exception. This in turn sets the FSM in the exception module going which (among other things) generate the extend_flush signal. The FSM is supposed to wait until such time as any outstanding instruction access is finished and so flushing this as well before going to the exception vector. However, the icpu_error signal coming from the MMU is held high a bit too long and caught by the flushpipe_r always block which dutifully sets flushpipe_r, this triggers genpc_freeze to go high which in turn moves the FSM in the exception module along. This lowers the extend_flush (and hence flushpipe signal) regardless of how the instruction fetch is going. If the instruction fetch is slower than the length of the flushpipe signal, the instruction won't be flushed and instead executed. Our bus error stems from the situ
ation when this instruction is a load or store where it's virtual address is now used as a physical address (with no recipient) since the IMMU have been turned on at the start of the exception. I have corrected the bug by putting a condition of genpc_freeze's ability to move the exception FSM along if there is an ongoing wishbone instruction fetch at the start of the exception.
I might add that the new memory controller uses a round-robin scheme for arbitration and that there's more going on on the buses to it apart from cpu.
Anyway, the bug fix is inlined below, comments are welcome.
Best regards,
/Jakob
diff --git a/hdl/or1200_cpu.v b/hdl/or1200_cpu.v
index de5a854..e422d3e 100644
--- a/hdl/or1200_cpu.v
+++ b/hdl/or1200_cpu.v
@@ -81,7 +81,7 @@ module or1200_cpu(
boot_adr_sel_i,
// Interrupt & tick exceptions
- sig_int, sig_tick, sig_trap,
+ sig_int, sig_tick, sig_trap, iwb_cyc_i, iwb_ack_i, iwb_err_i,
// SPR interface
supv, spr_addr, spr_dat_cpu, spr_dat_pic, spr_dat_tt, spr_dat_pm,
@@ -213,6 +213,13 @@ input mtspr_dc_done;
input sig_int;
input sig_tick;
output sig_trap;
+
+//
+// External instruction fetch signals
+//
+input iwb_cyc_i;
+input iwb_ack_i;
+input iwb_err_i;
//
// Register file parity error indicator
@@ -858,6 +865,9 @@ or1200_except or1200_except(
.sig_fp(sig_fp),
.fpcsr_fpee(fpcsr[`OR1200_FPCSR_FPEE]),
.ex_branch_taken(ex_branch_taken),
+ .iext_cycstb_i(iwb_cyc_i),
+ .iext_ack_i(iwb_ack_i),
+ .iext_err_i(iwb_err_i),
.icpu_ack_i(icpu_ack_i),
.icpu_err_i(icpu_err_i),
.dcpu_ack_i(dcpu_ack_i),
diff --git a/hdl/or1200_except.v b/hdl/or1200_except.v
index d7cc6bd..da6b2a6 100644
--- a/hdl/or1200_except.v
+++ b/hdl/or1200_except.v
@@ -78,8 +78,8 @@ module or1200_except
except_stop, except_trig, ex_void, abort_mvspr, branch_op, spr_dat_ppc,
spr_dat_npc, datain, du_dsr, epcr_we, eear_we, esr_we, pc_we, epcr, eear,
du_dmr1, du_hwbkpt, du_hwbkpt_ls_r, esr, sr_we, to_sr, sr, lsu_addr,
- abort_ex, icpu_ack_i, icpu_err_i, dcpu_ack_i, dcpu_err_i, sig_fp, fpcsr_fpee,
- dsx
+ abort_ex, iext_cycstb_i, iext_ack_i, iext_err_i, icpu_ack_i, icpu_err_i,
+ dcpu_ack_i, dcpu_err_i, sig_fp, fpcsr_fpee, dsx
);
@@ -144,6 +144,9 @@ output [31:0] spr_dat_ppc;
output [31:0] spr_dat_npc;
output abort_ex;
output abort_mvspr;
+input iext_cycstb_i;
+input iext_ack_i;
+input iext_err_i;
input icpu_ack_i;
input icpu_err_i;
input dcpu_ack_i;
@@ -168,6 +171,7 @@ reg [2:0] ex_exceptflags;
reg [`OR1200_EXCEPTFSM_WIDTH-1:0] state;
reg extend_flush;
reg extend_flush_last;
+reg iext_access;
reg ex_dslot /* verilator public */;
reg delayed1_ex_dslot;
reg delayed2_ex_dslot;
@@ -461,6 +465,7 @@ assign except_flushpipe = |except_trig & ~|state;
state <= `OR1200_EXCEPTFSM_IDLE;
except_type <= `OR1200_EXCEPT_NONE;
extend_flush <= 1'b0;
+ iext_access <= 1'b0;
epcr <= 32'b0;
eear <= 32'b0;
esr <= {2'h1, {`OR1200_SR_WIDTH-3{1'b0}}, 1'b1};
@@ -477,6 +482,12 @@ assign except_flushpipe = |except_trig & ~|state;
if (except_flushpipe) begin
state <= `OR1200_EXCEPTFSM_FLU1;
extend_flush <= 1'b1;
+ if (iext_cycstb_i && !iext_ack_i && !iext_err_i) begin
+ // An instruction access is under way when exception arrives
+ iext_access <= 1'b1;
+ end else begin
+ iext_access <= 1'b0;
+ end
esr <= sr_we ? to_sr : sr;
casez (except_trig)
`ifdef OR1200_EXCEPT_ITLBMISS
@@ -628,8 +639,11 @@ assign except_flushpipe = |except_trig & ~|state;
esr <= {datain[`OR1200_SR_WIDTH-1], 1'b1, datain[`OR1200_SR_WIDTH-3:0]};
end
`OR1200_EXCEPTFSM_FLU1:
- if (icpu_ack_i | icpu_err_i | genpc_freeze)
- state <= `OR1200_EXCEPTFSM_FLU2;
+ // Wait here until a possible ongoing access is finished
+ if (icpu_ack_i | icpu_err_i | (!iext_access & genpc_freeze)) begin
+ iext_access <= 1'b0;
+ state <= `OR1200_EXCEPTFSM_FLU2;
+ end
`OR1200_EXCEPTFSM_FLU2:
`ifdef OR1200_EXCEPT_TRAP
if (except_type == `OR1200_EXCEPT_TRAP) begin
diff --git a/hdl/or1200_top.v b/hdl/or1200_top.v
index 947ab4f..5243b0c 100644
--- a/hdl/or1200_top.v
+++ b/hdl/or1200_top.v
@@ -728,6 +728,9 @@ or1200_cpu(
.sig_int(sig_int),
.sig_tick(sig_tick),
.sig_trap(sig_trap),
+ .iwb_cyc_i(iwb_cyc_o),
+ .iwb_ack_i(iwb_ack_i),
+ .iwb_err_i(iwb_err_i),
// SPRs
.supv(supv),
We have developed a new memory controller to use with the openrisc core, but using it in Linux caused a bus error. After lots of digging trying to find the fault with the new memory controller, it turns out that there is a bug in the or1200 exception handling. I don't know exactly why only the new memory controller uncovered this bug, but when looking at the present code I'm a bit puzzled as to why it didn't seem to occur for anyone else.
I don't know if everyone else stopped using this core, but I thought it would be good to put the fix out there if someone else could benefit from it and also perhaps to get some more eyes onto whether this was the right fix or not. Since the bugzilla that's been used before seem to have gone off the deep end, I post this in this e-mail in the hope it's visible enough.
I would also like to add a few words of how I see the nature of the bug. When an ITLB miss occur, the internal icpu_err signal is held high which in turn trigger the ITLB miss exception. This in turn sets the FSM in the exception module going which (among other things) generate the extend_flush signal. The FSM is supposed to wait until such time as any outstanding instruction access is finished and so flushing this as well before going to the exception vector. However, the icpu_error signal coming from the MMU is held high a bit too long and caught by the flushpipe_r always block which dutifully sets flushpipe_r, this triggers genpc_freeze to go high which in turn moves the FSM in the exception module along. This lowers the extend_flush (and hence flushpipe signal) regardless of how the instruction fetch is going. If the instruction fetch is slower than the length of the flushpipe signal, the instruction won't be flushed and instead executed. Our bus error stems from the situ
ation when this instruction is a load or store where it's virtual address is now used as a physical address (with no recipient) since the IMMU have been turned on at the start of the exception. I have corrected the bug by putting a condition of genpc_freeze's ability to move the exception FSM along if there is an ongoing wishbone instruction fetch at the start of the exception.
I might add that the new memory controller uses a round-robin scheme for arbitration and that there's more going on on the buses to it apart from cpu.
Anyway, the bug fix is inlined below, comments are welcome.
Best regards,
/Jakob
diff --git a/hdl/or1200_cpu.v b/hdl/or1200_cpu.v
index de5a854..e422d3e 100644
--- a/hdl/or1200_cpu.v
+++ b/hdl/or1200_cpu.v
@@ -81,7 +81,7 @@ module or1200_cpu(
boot_adr_sel_i,
// Interrupt & tick exceptions
- sig_int, sig_tick, sig_trap,
+ sig_int, sig_tick, sig_trap, iwb_cyc_i, iwb_ack_i, iwb_err_i,
// SPR interface
supv, spr_addr, spr_dat_cpu, spr_dat_pic, spr_dat_tt, spr_dat_pm,
@@ -213,6 +213,13 @@ input mtspr_dc_done;
input sig_int;
input sig_tick;
output sig_trap;
+
+//
+// External instruction fetch signals
+//
+input iwb_cyc_i;
+input iwb_ack_i;
+input iwb_err_i;
//
// Register file parity error indicator
@@ -858,6 +865,9 @@ or1200_except or1200_except(
.sig_fp(sig_fp),
.fpcsr_fpee(fpcsr[`OR1200_FPCSR_FPEE]),
.ex_branch_taken(ex_branch_taken),
+ .iext_cycstb_i(iwb_cyc_i),
+ .iext_ack_i(iwb_ack_i),
+ .iext_err_i(iwb_err_i),
.icpu_ack_i(icpu_ack_i),
.icpu_err_i(icpu_err_i),
.dcpu_ack_i(dcpu_ack_i),
diff --git a/hdl/or1200_except.v b/hdl/or1200_except.v
index d7cc6bd..da6b2a6 100644
--- a/hdl/or1200_except.v
+++ b/hdl/or1200_except.v
@@ -78,8 +78,8 @@ module or1200_except
except_stop, except_trig, ex_void, abort_mvspr, branch_op, spr_dat_ppc,
spr_dat_npc, datain, du_dsr, epcr_we, eear_we, esr_we, pc_we, epcr, eear,
du_dmr1, du_hwbkpt, du_hwbkpt_ls_r, esr, sr_we, to_sr, sr, lsu_addr,
- abort_ex, icpu_ack_i, icpu_err_i, dcpu_ack_i, dcpu_err_i, sig_fp, fpcsr_fpee,
- dsx
+ abort_ex, iext_cycstb_i, iext_ack_i, iext_err_i, icpu_ack_i, icpu_err_i,
+ dcpu_ack_i, dcpu_err_i, sig_fp, fpcsr_fpee, dsx
);
@@ -144,6 +144,9 @@ output [31:0] spr_dat_ppc;
output [31:0] spr_dat_npc;
output abort_ex;
output abort_mvspr;
+input iext_cycstb_i;
+input iext_ack_i;
+input iext_err_i;
input icpu_ack_i;
input icpu_err_i;
input dcpu_ack_i;
@@ -168,6 +171,7 @@ reg [2:0] ex_exceptflags;
reg [`OR1200_EXCEPTFSM_WIDTH-1:0] state;
reg extend_flush;
reg extend_flush_last;
+reg iext_access;
reg ex_dslot /* verilator public */;
reg delayed1_ex_dslot;
reg delayed2_ex_dslot;
@@ -461,6 +465,7 @@ assign except_flushpipe = |except_trig & ~|state;
state <= `OR1200_EXCEPTFSM_IDLE;
except_type <= `OR1200_EXCEPT_NONE;
extend_flush <= 1'b0;
+ iext_access <= 1'b0;
epcr <= 32'b0;
eear <= 32'b0;
esr <= {2'h1, {`OR1200_SR_WIDTH-3{1'b0}}, 1'b1};
@@ -477,6 +482,12 @@ assign except_flushpipe = |except_trig & ~|state;
if (except_flushpipe) begin
state <= `OR1200_EXCEPTFSM_FLU1;
extend_flush <= 1'b1;
+ if (iext_cycstb_i && !iext_ack_i && !iext_err_i) begin
+ // An instruction access is under way when exception arrives
+ iext_access <= 1'b1;
+ end else begin
+ iext_access <= 1'b0;
+ end
esr <= sr_we ? to_sr : sr;
casez (except_trig)
`ifdef OR1200_EXCEPT_ITLBMISS
@@ -628,8 +639,11 @@ assign except_flushpipe = |except_trig & ~|state;
esr <= {datain[`OR1200_SR_WIDTH-1], 1'b1, datain[`OR1200_SR_WIDTH-3:0]};
end
`OR1200_EXCEPTFSM_FLU1:
- if (icpu_ack_i | icpu_err_i | genpc_freeze)
- state <= `OR1200_EXCEPTFSM_FLU2;
+ // Wait here until a possible ongoing access is finished
+ if (icpu_ack_i | icpu_err_i | (!iext_access & genpc_freeze)) begin
+ iext_access <= 1'b0;
+ state <= `OR1200_EXCEPTFSM_FLU2;
+ end
`OR1200_EXCEPTFSM_FLU2:
`ifdef OR1200_EXCEPT_TRAP
if (except_type == `OR1200_EXCEPT_TRAP) begin
diff --git a/hdl/or1200_top.v b/hdl/or1200_top.v
index 947ab4f..5243b0c 100644
--- a/hdl/or1200_top.v
+++ b/hdl/or1200_top.v
@@ -728,6 +728,9 @@ or1200_cpu(
.sig_int(sig_int),
.sig_tick(sig_tick),
.sig_trap(sig_trap),
+ .iwb_cyc_i(iwb_cyc_o),
+ .iwb_ack_i(iwb_ack_i),
+ .iwb_err_i(iwb_err_i),
// SPRs
.supv(supv),