未验证 提交 5e652296 编写于 作者: O openeuler-ci-bot 提交者: Gitee

!497 SAS-related bugfix

Merge Pull Request from: @xia-bing1 
 
1. In the NCQ scenario, if multiple I/Os are delivered and one of the I/Os is faulty, a group of slow disks will always occur.
2. During I/O running, a null pointer exception occurs during the pressure test when a disk is removed.
3. When the length of the DMA Setup frame returned by the disk is abnormal, a group of slow disks are triggered. 
 
Link:https://gitee.com/openeuler/kernel/pulls/497 

Reviewed-by: Jialin Zhang <zhangjialin11@huawei.com> 
Signed-off-by: Jialin Zhang <zhangjialin11@huawei.com> 
......@@ -105,6 +105,7 @@ enum {
enum dev_status {
HISI_SAS_DEV_INIT,
HISI_SAS_DEV_NORMAL,
HISI_SAS_DEV_NCQ_ERR,
};
enum {
......@@ -660,7 +661,8 @@ extern void hisi_sas_phy_down(struct hisi_hba *hisi_hba, int phy_no, int rdy,
gfp_t gfp_flags);
extern void hisi_sas_slot_task_free(struct hisi_hba *hisi_hba,
struct sas_task *task,
struct hisi_sas_slot *slot);
struct hisi_sas_slot *slot,
bool need_lock);
extern void hisi_sas_init_mem(struct hisi_hba *hisi_hba);
extern void hisi_sas_rst_work_handler(struct work_struct *work);
extern void hisi_sas_sync_rst_work_handler(struct work_struct *work);
......
......@@ -215,7 +215,7 @@ static void hisi_sas_slot_index_init(struct hisi_hba *hisi_hba)
}
void hisi_sas_slot_task_free(struct hisi_hba *hisi_hba, struct sas_task *task,
struct hisi_sas_slot *slot)
struct hisi_sas_slot *slot, bool need_lock)
{
int device_id = slot->device_id;
struct hisi_sas_device *sas_dev = &hisi_hba->devices[device_id];
......@@ -244,9 +244,13 @@ void hisi_sas_slot_task_free(struct hisi_hba *hisi_hba, struct sas_task *task,
}
}
spin_lock(&sas_dev->lock);
list_del_init(&slot->entry);
spin_unlock(&sas_dev->lock);
if (need_lock) {
spin_lock(&sas_dev->lock);
list_del_init(&slot->entry);
spin_unlock(&sas_dev->lock);
} else {
list_del_init(&slot->entry);
}
memset(slot, 0, offsetof(struct hisi_sas_slot, buf));
......@@ -696,7 +700,7 @@ static int hisi_sas_init_device(struct domain_device *device)
case SAS_END_DEVICE:
int_to_scsilun(0, &lun);
tmf_task.tmf = TMF_CLEAR_TASK_SET;
tmf_task.tmf = TMF_ABORT_TASK_SET;
while (retry-- > 0) {
rc = hisi_sas_debug_issue_ssp_tmf(device, lun.scsi_lun,
&tmf_task);
......@@ -1024,7 +1028,7 @@ static void hisi_sas_port_notify_formed(struct asd_sas_phy *sas_phy)
}
static void hisi_sas_do_release_task(struct hisi_hba *hisi_hba, struct sas_task *task,
struct hisi_sas_slot *slot)
struct hisi_sas_slot *slot, bool need_lock)
{
if (task) {
unsigned long flags;
......@@ -1042,7 +1046,7 @@ static void hisi_sas_do_release_task(struct hisi_hba *hisi_hba, struct sas_task
spin_unlock_irqrestore(&task->task_state_lock, flags);
}
hisi_sas_slot_task_free(hisi_hba, task, slot);
hisi_sas_slot_task_free(hisi_hba, task, slot, need_lock);
}
static void hisi_sas_release_task(struct hisi_hba *hisi_hba,
......@@ -1051,8 +1055,11 @@ static void hisi_sas_release_task(struct hisi_hba *hisi_hba,
struct hisi_sas_slot *slot, *slot2;
struct hisi_sas_device *sas_dev = device->lldd_dev;
spin_lock(&sas_dev->lock);
list_for_each_entry_safe(slot, slot2, &sas_dev->list, entry)
hisi_sas_do_release_task(hisi_hba, slot->task, slot);
hisi_sas_do_release_task(hisi_hba, slot->task, slot, false);
spin_unlock(&sas_dev->lock);
}
void hisi_sas_release_tasks(struct hisi_hba *hisi_hba)
......@@ -1680,6 +1687,7 @@ static int hisi_sas_abort_task(struct sas_task *task)
struct hisi_sas_tmf_task tmf_task;
struct domain_device *device = task->dev;
struct hisi_sas_device *sas_dev = device->lldd_dev;
struct hisi_sas_slot *slot = task->lldd_task;
struct hisi_hba *hisi_hba;
struct device *dev;
int rc = TMF_RESP_FUNC_FAILED;
......@@ -1693,7 +1701,6 @@ static int hisi_sas_abort_task(struct sas_task *task)
spin_lock_irqsave(&task->task_state_lock, flags);
if (task->task_state_flags & SAS_TASK_STATE_DONE) {
struct hisi_sas_slot *slot = task->lldd_task;
struct hisi_sas_cq *cq;
if (slot) {
......@@ -1711,9 +1718,8 @@ static int hisi_sas_abort_task(struct sas_task *task)
task->task_state_flags |= SAS_TASK_STATE_ABORTED;
spin_unlock_irqrestore(&task->task_state_lock, flags);
if (task->lldd_task && task->task_proto & SAS_PROTOCOL_SSP) {
if (slot && task->task_proto & SAS_PROTOCOL_SSP) {
struct scsi_cmnd *cmnd = task->uldd_task;
struct hisi_sas_slot *slot = task->lldd_task;
u16 tag = slot->idx;
int rc2;
......@@ -1741,11 +1747,13 @@ static int hisi_sas_abort_task(struct sas_task *task)
*/
if (rc == TMF_RESP_FUNC_COMPLETE && rc2 != TMF_RESP_FUNC_SUCC) {
if (task->lldd_task)
hisi_sas_do_release_task(hisi_hba, task, slot);
hisi_sas_do_release_task(hisi_hba, task, slot, true);
}
} else if (task->task_proto & SAS_PROTOCOL_SATA ||
task->task_proto & SAS_PROTOCOL_STP) {
if (task->dev->dev_type == SAS_SATA_DEV) {
struct ata_queued_cmd *qc = task->uldd_task;
rc = hisi_sas_internal_task_abort(hisi_hba, device,
HISI_SAS_INT_ABT_DEV,
0, false);
......@@ -1754,11 +1762,21 @@ static int hisi_sas_abort_task(struct sas_task *task)
goto out;
}
hisi_sas_dereg_device(hisi_hba, device);
rc = hisi_sas_softreset_ata_disk(device);
/*
* If an ATA internal command times out in ATA EH, it
* need to execute soft reset, so check the scsicmd
*/
if ((sas_dev->dev_status == HISI_SAS_DEV_NCQ_ERR) &&
qc && qc->scsicmd) {
hisi_sas_do_release_task(hisi_hba, task, slot, true);
rc = TMF_RESP_FUNC_COMPLETE;
} else {
rc = hisi_sas_softreset_ata_disk(device);
}
}
} else if (task->lldd_task && task->task_proto & SAS_PROTOCOL_SMP) {
} else if (slot && task->task_proto & SAS_PROTOCOL_SMP) {
/* SMP */
struct hisi_sas_slot *slot = task->lldd_task;
u32 tag = slot->idx;
struct hisi_sas_cq *cq = &hisi_hba->cq[slot->dlvry_queue];
......@@ -1885,8 +1903,12 @@ static int hisi_sas_I_T_nexus_reset(struct domain_device *device)
{
struct hisi_hba *hisi_hba = dev_to_hisi_hba(device);
struct device *dev = hisi_hba->dev;
struct hisi_sas_device *sas_dev = device->lldd_dev;
int rc;
if (sas_dev->dev_status == HISI_SAS_DEV_NCQ_ERR)
sas_dev->dev_status = HISI_SAS_DEV_NORMAL;
rc = hisi_sas_internal_task_abort(hisi_hba, device,
HISI_SAS_INT_ABT_DEV, 0, false);
if (rc < 0) {
......
......@@ -1259,7 +1259,11 @@ static void slot_complete_v1_hw(struct hisi_hba *hisi_hba,
slot_err_v1_hw(hisi_hba, task, slot);
if (unlikely(slot->abort)) {
sas_task_abort(task);
if (dev_is_sata(device) && task->ata_task.use_ncq)
sas_ata_device_link_abort(device, true);
else
sas_task_abort(task);
return;
}
goto out;
......@@ -1309,7 +1313,7 @@ static void slot_complete_v1_hw(struct hisi_hba *hisi_hba,
}
out:
hisi_sas_slot_task_free(hisi_hba, task, slot);
hisi_sas_slot_task_free(hisi_hba, task, slot, true);
if (task->task_done)
task->task_done(task);
......
......@@ -2405,7 +2405,11 @@ static void slot_complete_v2_hw(struct hisi_hba *hisi_hba,
error_info[2], error_info[3]);
if (unlikely(slot->abort)) {
sas_task_abort(task);
if (dev_is_sata(device) && task->ata_task.use_ncq)
sas_ata_device_link_abort(device, true);
else
sas_task_abort(task);
return;
}
goto out;
......@@ -2465,7 +2469,7 @@ static void slot_complete_v2_hw(struct hisi_hba *hisi_hba,
}
task->task_state_flags |= SAS_TASK_STATE_DONE;
spin_unlock_irqrestore(&task->task_state_lock, flags);
hisi_sas_slot_task_free(hisi_hba, task, slot);
hisi_sas_slot_task_free(hisi_hba, task, slot, true);
if (!is_internal && (task->task_proto != SAS_PROTOCOL_SMP)) {
spin_lock_irqsave(&device->done_lock, flags);
......
......@@ -403,6 +403,11 @@
#define CMPLT_HDR_CMPLT_MSK (0x3 << CMPLT_HDR_CMPLT_OFF)
#define CMPLT_HDR_ERROR_PHASE_OFF 2
#define CMPLT_HDR_ERROR_PHASE_MSK (0xff << CMPLT_HDR_ERROR_PHASE_OFF)
/* bit[9:2] Error Phase */
#define ERR_PHASE_RESPONSE_FRAME_REV_STAGE_OFF \
8
#define ERR_PHASE_RESPONSE_FRAME_REV_STAGE_MSK \
(0x1 << ERR_PHASE_RESPONSE_FRAME_REV_STAGE_OFF)
#define CMPLT_HDR_RSPNS_XFRD_OFF 10
#define CMPLT_HDR_RSPNS_XFRD_MSK (0x1 << CMPLT_HDR_RSPNS_XFRD_OFF)
#define CMPLT_HDR_RSPNS_GOOD_OFF 11
......@@ -422,8 +427,15 @@
#define CMPLT_HDR_DEV_ID_OFF 16
#define CMPLT_HDR_DEV_ID_MSK (0xffff << CMPLT_HDR_DEV_ID_OFF)
/* dw3 */
#define CMPLT_HDR_SATA_DISK_ERR_OFF 16
#define CMPLT_HDR_SATA_DISK_ERR_MSK (0x1 << CMPLT_HDR_SATA_DISK_ERR_OFF)
#define CMPLT_HDR_IO_IN_TARGET_OFF 17
#define CMPLT_HDR_IO_IN_TARGET_MSK (0x1 << CMPLT_HDR_IO_IN_TARGET_OFF)
/* bit[23:18] ERR_FIS_ATA_STATUS */
#define FIS_ATA_STATUS_ERR_OFF 18
#define FIS_ATA_STATUS_ERR_MSK (0x1 << FIS_ATA_STATUS_ERR_OFF)
#define FIS_TYPE_SDB_OFF 31
#define FIS_TYPE_SDB_MSK (0x1 << FIS_TYPE_SDB_OFF)
/* ITCT header */
/* qw0 */
......@@ -866,6 +878,7 @@ static void dereg_device_v3_hw(struct hisi_hba *hisi_hba,
cfg_abt_set_query_iptt = hisi_sas_read32(hisi_hba,
CFG_ABT_SET_QUERY_IPTT);
spin_lock(&sas_dev->lock);
list_for_each_entry_safe(slot, slot2, &sas_dev->list, entry) {
cfg_abt_set_query_iptt &= ~CFG_SET_ABORTED_IPTT_MSK;
cfg_abt_set_query_iptt |= (1 << CFG_SET_ABORTED_EN_OFF) |
......@@ -873,6 +886,7 @@ static void dereg_device_v3_hw(struct hisi_hba *hisi_hba,
hisi_sas_write32(hisi_hba, CFG_ABT_SET_QUERY_IPTT,
cfg_abt_set_query_iptt);
}
spin_unlock(&sas_dev->lock);
cfg_abt_set_query_iptt &= ~(1 << CFG_SET_ABORTED_EN_OFF);
hisi_sas_write32(hisi_hba, CFG_ABT_SET_QUERY_IPTT,
cfg_abt_set_query_iptt);
......@@ -2148,6 +2162,18 @@ static irqreturn_t fatal_axi_int_v3_hw(int irq_no, void *p)
return IRQ_HANDLED;
}
static bool is_ncq_err_v3_hw(struct hisi_sas_complete_v3_hdr *complete_hdr)
{
u32 dw0, dw3;
dw0 = le32_to_cpu(complete_hdr->dw0);
dw3 = le32_to_cpu(complete_hdr->dw3);
return (dw0 & ERR_PHASE_RESPONSE_FRAME_REV_STAGE_MSK) &&
(dw3 & FIS_TYPE_SDB_MSK) &&
(dw3 & FIS_ATA_STATUS_ERR_MSK);
}
static bool
slot_err_v3_hw(struct hisi_hba *hisi_hba, struct sas_task *task,
struct hisi_sas_slot *slot)
......@@ -2294,7 +2320,11 @@ static void slot_complete_v3_hw(struct hisi_hba *hisi_hba,
error_info[0], error_info[1],
error_info[2], error_info[3]);
if (unlikely(slot->abort)) {
sas_task_abort(task);
if (dev_is_sata(device) && task->ata_task.use_ncq)
sas_ata_device_link_abort(device, true);
else
sas_task_abort(task);
return;
}
goto out;
......@@ -2350,7 +2380,7 @@ static void slot_complete_v3_hw(struct hisi_hba *hisi_hba,
}
task->task_state_flags |= SAS_TASK_STATE_DONE;
spin_unlock_irqrestore(&task->task_state_lock, flags);
hisi_sas_slot_task_free(hisi_hba, task, slot);
hisi_sas_slot_task_free(hisi_hba, task, slot, true);
if (!is_internal && (task->task_proto != SAS_PROTOCOL_SMP)) {
spin_lock_irqsave(&device->done_lock, flags);
......@@ -2384,14 +2414,34 @@ static irqreturn_t cq_thread_v3_hw(int irq_no, void *p)
while (rd_point != wr_point) {
struct hisi_sas_complete_v3_hdr *complete_hdr;
struct device *dev = hisi_hba->dev;
u32 dw1;
u32 dw0, dw1, dw3;
int iptt;
complete_hdr = &complete_queue[rd_point];
dw0 = le32_to_cpu(complete_hdr->dw0);
dw1 = le32_to_cpu(complete_hdr->dw1);
dw3 = le32_to_cpu(complete_hdr->dw3);
iptt = dw1 & CMPLT_HDR_IPTT_MSK;
if (likely(iptt < HISI_SAS_COMMAND_ENTRIES_V3_HW)) {
if (unlikely((dw0 & CMPLT_HDR_CMPLT_MSK) == 0x3) &&
(dw3 & CMPLT_HDR_SATA_DISK_ERR_MSK)) {
int device_id = (dw1 & CMPLT_HDR_DEV_ID_MSK) >>
CMPLT_HDR_DEV_ID_OFF;
struct hisi_sas_itct *itct =
&hisi_hba->itct[device_id];
struct hisi_sas_device *sas_dev =
&hisi_hba->devices[device_id];
struct domain_device *device = sas_dev->sas_device;
dev_err(dev, "erroneous completion disk err dev id=%d sas_addr=0x%llx CQ hdr: 0x%x 0x%x 0x%x 0x%x\n",
device_id, itct->sas_addr, dw0, dw1,
complete_hdr->act, dw3);
if (is_ncq_err_v3_hw(complete_hdr))
sas_dev->dev_status = HISI_SAS_DEV_NCQ_ERR;
sas_ata_device_link_abort(device, true);
} else if (likely(iptt < HISI_SAS_COMMAND_ENTRIES_V3_HW)) {
slot = &hisi_hba->slot_info[iptt];
slot->cmplt_queue_slot = rd_point;
slot->cmplt_queue = queue;
......
......@@ -147,8 +147,8 @@ static void sas_ata_task_done(struct sas_task *task)
qc->flags |= ATA_QCFLAG_FAILED;
}
dev->sata_dev.fis[3] = 0x04; /* status err */
dev->sata_dev.fis[2] = ATA_ERR;
dev->sata_dev.fis[2] = ATA_ERR | ATA_DRDY; /* tf status */
dev->sata_dev.fis[3] = ATA_ABORTED; /* tf error */
}
}
......@@ -875,3 +875,21 @@ void sas_ata_wait_eh(struct domain_device *dev)
ap = dev->sata_dev.ap;
ata_port_wait_eh(ap);
}
void sas_ata_device_link_abort(struct domain_device *device, bool force_reset)
{
struct ata_port *ap = device->sata_dev.ap;
struct ata_link *link = &ap->link;
unsigned long flags;
spin_lock_irqsave(ap->lock, flags);
device->sata_dev.fis[2] = ATA_ERR | ATA_DRDY; /* tf status */
device->sata_dev.fis[3] = ATA_ABORTED; /* tf error */
link->eh_info.err_mask |= AC_ERR_DEV;
if (force_reset)
link->eh_info.action |= ATA_EH_RESET;
ata_link_abort(link);
spin_unlock_irqrestore(ap->lock, flags);
}
EXPORT_SYMBOL_GPL(sas_ata_device_link_abort);
......@@ -33,6 +33,7 @@ void sas_probe_sata(struct asd_sas_port *port);
void sas_suspend_sata(struct asd_sas_port *port);
void sas_resume_sata(struct asd_sas_port *port);
void sas_ata_end_eh(struct ata_port *ap);
void sas_ata_device_link_abort(struct domain_device *dev, bool force_reset);
int sas_ata_wait_after_reset(struct domain_device *dev, unsigned long deadline);
#else
......@@ -87,6 +88,11 @@ static inline void sas_ata_end_eh(struct ata_port *ap)
{
}
static inline void sas_ata_device_link_abort(struct domain_device *dev,
bool force_reset)
{
}
static inline int sas_ata_wait_after_reset(struct domain_device *dev,
unsigned long deadline)
{
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册