[SCSI] FC transport: fixes for workq deadlocks

As previously reported via Michael Reed, the FC transport took a hit in 2.6.15 (perhaps a little earlier) when we solved a recursion error. There are 2 deadlocks occurring: - With scan and the delete items sharing the same workq, flushing the workq for the delete code was getting it stalled behind a very long running scan code path. - There's a deadlock where scsi_remove_target() has to sit behind scsi_scan_target() due to contention over the scan_lock(). This patch resolves the 1st deadlock and significantly reduces the odds of the second. So far, we have only replicated the 2nd deadlock on a highly-parallel SMP system. More on the 2nd deadlock in a following email. This patch reworks the transport to: - Only use the scsi host workq for scanning - Use 2 other workq's internally. One for deletions, the other for scheduled deletions. Originally, we tried this with a single workq, but the occassional flushes of the scheduled queues was hitting the second deadlock with a slightly higher frequency. In the future, we'll look at the LLDD's and the transport to see if we can get rid of this extra overhead. - When moving to the other workq's we tightened up some object states and some lock handling. - Properly syncs adds/deletes - minor code cleanups - directly reference fc_host_attrs, rather than through attribute macros - flush the right workq on delayed work cancel failures. Large kudos to Michael Reed who has been working this issue for the last month. Signed-off-by: N James Bottomley <James.Bottomley@SteelEye.com>

[SCSI] FC transport: fixes for workq deadlocks
As previously reported via Michael Reed, the FC transport took a hit in 2.6.15 (perhaps a little earlier) when we solved a recursion error. There are 2 deadlocks occurring: - With scan and the delete items sharing the same workq, flushing the workq for the delete code was getting it stalled behind a very long running scan code path. - There's a deadlock where scsi_remove_target() has to sit behind scsi_scan_target() due to contention over the scan_lock(). This patch resolves the 1st deadlock and significantly reduces the odds of the second. So far, we have only replicated the 2nd deadlock on a highly-parallel SMP system. More on the 2nd deadlock in a following email. This patch reworks the transport to: - Only use the scsi host workq for scanning - Use 2 other workq's internally. One for deletions, the other for scheduled deletions. Originally, we tried this with a single workq, but the occassional flushes of the scheduled queues was hitting the second deadlock with a slightly higher frequency. In the future, we'll look at the LLDD's and the transport to see if we can get rid of this extra overhead. - When moving to the other workq's we tightened up some object states and some lock handling. - Properly syncs adds/deletes - minor code cleanups - directly reference fc_host_attrs, rather than through attribute macros - flush the right workq on delayed work cancel failures. Large kudos to Michael Reed who has been working this issue for the last month. Signed-off-by: N James Bottomley <James.Bottomley@SteelEye.com>
aedf3497 · James Smart · James Bottomley · b0d23648 · aedf3497 · aedf3497
展开全部显示空白变更内容
内联并排

Showing with 341 addition and 166 deletion

drivers/scsi/scsi_transport_fc.c drivers/scsi/scsi_transport_fc.c +311 -155

include/scsi/scsi_transport_fc.h include/scsi/scsi_transport_fc.h +30 -11

未找到文件。
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
--- a/include/scsi/scsi_transport_fc.h
+++ b/include/scsi/scsi_transport_fc.h
@@ -202,12 +202,19 @@ struct fc_rport {	/* aka fc_starget_attrs */
 	/* internal data */
 	unsigned int channel;
 	u32 number;
+	u8 flags;
 	struct list_head peers;
 	struct device dev;
 	struct work_struct dev_loss_work;
 	struct work_struct scan_work;
+ 	struct work_struct stgt_delete_work;
+	struct work_struct rport_delete_work;
 } __attribute__((aligned(sizeof(unsigned long))));
+/* bit field values for struct fc_rport "flags" field: */
+#define FC_RPORT_DEVLOSS_PENDING	0x01
+#define FC_RPORT_SCAN_PENDING		0x02
 #define	dev_to_rport(d)				\
 	container_of(d, struct fc_rport, dev)
 #define transport_class_to_rport(classdev)	\
@@ -327,13 +334,16 @@ struct fc_host_attrs {
 	struct list_head rport_bindings;
 	u32 next_rport_number;
 	u32 next_target_id;
-	u8 flags;
- 	struct work_struct rport_del_work;
-};
-/* values for struct fc_host_attrs "flags" field: */
+	/* work queues for rport state manipulation */
-#define FC_SHOST_RPORT_DEL_SCHEDULED	0x01
+	char work_q_name[KOBJ_NAME_LEN];
+	struct workqueue_struct *work_q;
+	char devloss_work_q_name[KOBJ_NAME_LEN];
+	struct workqueue_struct *devloss_work_q;
+};
+#define shost_to_fc_host(x) \
+	((struct fc_host_attrs *)(x)->shost_data)
 #define fc_host_node_name(x) \
 	(((struct fc_host_attrs *)(x)->shost_data)->node_name)
@@ -375,10 +385,14 @@ struct fc_host_attrs {
 	(((struct fc_host_attrs *)(x)->shost_data)->next_rport_number)
 #define fc_host_next_target_id(x) \
 	(((struct fc_host_attrs *)(x)->shost_data)->next_target_id)
-#define fc_host_flags(x) \
+#define fc_host_work_q_name(x) \
-	(((struct fc_host_attrs *)(x)->shost_data)->flags)
+	(((struct fc_host_attrs *)(x)->shost_data)->work_q_name)
-#define fc_host_rport_del_work(x) \
+#define fc_host_work_q(x) \
-	(((struct fc_host_attrs *)(x)->shost_data)->rport_del_work)
+	(((struct fc_host_attrs *)(x)->shost_data)->work_q)
+#define fc_host_devloss_work_q_name(x) \
+	(((struct fc_host_attrs *)(x)->shost_data)->devloss_work_q_name)
+#define fc_host_devloss_work_q(x) \
+	(((struct fc_host_attrs *)(x)->shost_data)->devloss_work_q)
 /* The functions by which the transport class and the driver communicate */
@@ -461,10 +475,15 @@ fc_remote_port_chkready(struct fc_rport *rport)
 	switch (rport->port_state) {
 	case FC_PORTSTATE_ONLINE:
+		if (rport->roles & FC_RPORT_ROLE_FCP_TARGET)
 			result = 0;
+		else if (rport->flags & FC_RPORT_DEVLOSS_PENDING)
+			result = DID_IMM_RETRY << 16;
+		else
+			result = DID_NO_CONNECT << 16;
 		break;
 	case FC_PORTSTATE_BLOCKED:
-		result = DID_BUS_BUSY << 16;
+		result = DID_IMM_RETRY << 16;
 		break;
 	default:
 		result = DID_NO_CONNECT << 16;