From 388444011a6d3e8fef844860d61e30156734c200 Mon Sep 17 00:00:00 2001 From: tanxiaofei Date: Tue, 15 Jan 2019 09:34:48 +0800 Subject: [PATCH] scsi: hisi_sas: fix the issue of losing directly attached disk when hot-plug Hot-plugging mini-SAS wire of direct hard disk backplane may cause disk lost. We have done this test with several types of SATA disk from different venders, and only two models from Seagate has this problem, ST4000NM0035-1V4107 and ST3000VM002-1ET166. The root cause is that the disk doesn't send D2H frame after OOB finished. SAS controller will issue phyup interrupt only when D2H frame is received, otherwise, will be waiting there all the time. When this issue happen, we can find the disk again with link reset. To fix this issue, we setup an timer after OOB finished. If the PHY is not up in 20s, do link reset. Notes: the 20s is an experience value. Feature or Bugfix: Bugfix Signed-off-by: tanxiaofei Reviewed-by: chenxiang Signed-off-by: Yang Yingliang --- drivers/scsi/hisi_sas/hisi_sas.h | 1 + drivers/scsi/hisi_sas/hisi_sas_main.c | 2 ++ drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 49 +++++++++++++++++++++----- 3 files changed, 43 insertions(+), 9 deletions(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas.h b/drivers/scsi/hisi_sas/hisi_sas.h index 3672bd599133..86cbc8c96c4d 100644 --- a/drivers/scsi/hisi_sas/hisi_sas.h +++ b/drivers/scsi/hisi_sas/hisi_sas.h @@ -141,6 +141,7 @@ struct hisi_sas_phy { struct asd_sas_phy sas_phy; struct sas_identify identify; struct completion *reset_completion; + struct timer_list timer; spinlock_t lock; u64 port_id; /* from hw */ u64 frame_rcvd_size; diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 37ae3ed106eb..f048f9063a0d 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -920,6 +920,8 @@ static void hisi_sas_phy_init(struct hisi_hba *hisi_hba, int phy_no) INIT_WORK(&phy->works[i], hisi_sas_phye_fns[i]); spin_lock_init(&phy->lock); + + timer_setup(&phy->timer, NULL, 0); } static void hisi_sas_port_notify_formed(struct asd_sas_phy *sas_phy) diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index e3e4f3dafb09..f76c092e43f4 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -1378,6 +1378,7 @@ static irqreturn_t phy_up_v3_hw(int phy_no, struct hisi_hba *hisi_hba) struct device *dev = hisi_hba->dev; unsigned long flags; + del_timer(&phy->timer); hisi_sas_phy_write32(hisi_hba, phy_no, PHYCTRL_PHY_ENA_MSK, 1); port_id = hisi_sas_read32(hisi_hba, PHY_PORT_NUM_MA); @@ -1479,9 +1480,11 @@ static irqreturn_t phy_up_v3_hw(int phy_no, struct hisi_hba *hisi_hba) static irqreturn_t phy_down_v3_hw(int phy_no, struct hisi_hba *hisi_hba) { + struct hisi_sas_phy *phy = &hisi_hba->phy[phy_no]; u32 phy_state, sl_ctrl, txid_auto; struct device *dev = hisi_hba->dev; + del_timer(&phy->timer); hisi_sas_phy_write32(hisi_hba, phy_no, PHYCTRL_NOT_RDY_MSK, 1); phy_state = hisi_sas_read32(hisi_hba, PHY_STATE); @@ -1672,6 +1675,41 @@ static void handle_chl_int2_v3_hw(struct hisi_hba *hisi_hba, int phy_no) hisi_sas_phy_write32(hisi_hba, phy_no, CHL_INT2, irq_value); } +#define WAIT_PHYUP_TIMEOUT_V3_HW 20 +static void wait_phyup_timedout_v3_hw(struct timer_list *t) +{ + struct hisi_sas_phy *phy = from_timer(phy, t, timer); + struct hisi_hba *hisi_hba = phy->hisi_hba; + struct device *dev = hisi_hba->dev; + int phy_no = phy->sas_phy.id; + + dev_warn(dev, "phy%d wait phyup timeout, issuing link reset\n", phy_no); + hisi_sas_notify_phy_event(phy, HISI_PHYE_LINK_RESET); +} + +static void handle_chl_int0_v3_hw(struct hisi_hba *hisi_hba, int phy_no) +{ + u32 irq_value0 = hisi_sas_phy_read32(hisi_hba, phy_no, CHL_INT0); + struct device *dev = hisi_hba->dev; + + if (irq_value0 & CHL_INT0_PHY_RDY_MSK) { + struct hisi_sas_phy *phy = &hisi_hba->phy[phy_no]; + + if (!timer_pending(&phy->timer)) { + dev_dbg(dev, "phy%d OOB ready\n", phy_no); + phy->timer.function = wait_phyup_timedout_v3_hw; + phy->timer.expires = jiffies + + WAIT_PHYUP_TIMEOUT_V3_HW * HZ; + add_timer(&phy->timer); + } + } + + hisi_sas_phy_write32(hisi_hba, phy_no, CHL_INT0, + irq_value0 & (~CHL_INT0_SL_RX_BCST_ACK_MSK) + & (~CHL_INT0_SL_PHY_ENABLE_MSK) + & (~CHL_INT0_NOT_RDY_MSK)); +} + static irqreturn_t int_chnl_int_v3_hw(int irq_no, void *p) { struct hisi_hba *hisi_hba = p; @@ -1682,8 +1720,8 @@ static irqreturn_t int_chnl_int_v3_hw(int irq_no, void *p) & 0xeeeeeeee; while (irq_msk) { - u32 irq_value0 = hisi_sas_phy_read32(hisi_hba, phy_no, - CHL_INT0); + if (irq_msk & (2 << (phy_no * 4))) + handle_chl_int0_v3_hw(hisi_hba, phy_no); if (irq_msk & (4 << (phy_no * 4))) handle_chl_int1_v3_hw(hisi_hba, phy_no); @@ -1691,13 +1729,6 @@ static irqreturn_t int_chnl_int_v3_hw(int irq_no, void *p) if (irq_msk & (8 << (phy_no * 4))) handle_chl_int2_v3_hw(hisi_hba, phy_no); - if (irq_msk & (2 << (phy_no * 4)) && irq_value0) { - hisi_sas_phy_write32(hisi_hba, phy_no, - CHL_INT0, irq_value0 - & (~CHL_INT0_SL_RX_BCST_ACK_MSK) - & (~CHL_INT0_SL_PHY_ENABLE_MSK) - & (~CHL_INT0_NOT_RDY_MSK)); - } irq_msk &= ~(0xe << (phy_no * 4)); phy_no++; } -- GitLab