You need to sign in or sign up before continuing.
未验证 提交 152f2494 编写于 作者: M Mark Brown

Optimize spi_sync path

Merge series from David Jander <david@protonic.nl>:

These patches optimize the spi_sync call for the common case that the
worker thread is idle and the queue is empty. It also opens the
possibility to potentially further optimize the async path also, since
it doesn't need to take into account the direct sync path anymore.

As an example for the performance gain, on an i.MX8MM SoC with a SPI CAN
controller attached (MCP2518FD), the time the interrupt line stays
active (which corresponds roughly with the time it takes to send 3
relatively short consecutive spi_sync messages) is reduced from 98us to
only 72us by this patch.

A note about message ordering:

This patch series should not change the behavior of message ordering when
coming from the same context. This means that if a client driver issues
one or more spi_async() messages immediately followed by a spi_sync()
message in the same context, it can still rely on these messages being
sent out in the order they were fired.
...@@ -1549,6 +1549,103 @@ static void spi_idle_runtime_pm(struct spi_controller *ctlr) ...@@ -1549,6 +1549,103 @@ static void spi_idle_runtime_pm(struct spi_controller *ctlr)
} }
} }
static int __spi_pump_transfer_message(struct spi_controller *ctlr,
struct spi_message *msg, bool was_busy)
{
struct spi_transfer *xfer;
int ret;
if (!was_busy && ctlr->auto_runtime_pm) {
ret = pm_runtime_get_sync(ctlr->dev.parent);
if (ret < 0) {
pm_runtime_put_noidle(ctlr->dev.parent);
dev_err(&ctlr->dev, "Failed to power device: %d\n",
ret);
return ret;
}
}
if (!was_busy)
trace_spi_controller_busy(ctlr);
if (!was_busy && ctlr->prepare_transfer_hardware) {
ret = ctlr->prepare_transfer_hardware(ctlr);
if (ret) {
dev_err(&ctlr->dev,
"failed to prepare transfer hardware: %d\n",
ret);
if (ctlr->auto_runtime_pm)
pm_runtime_put(ctlr->dev.parent);
msg->status = ret;
spi_finalize_current_message(ctlr);
return ret;
}
}
trace_spi_message_start(msg);
if (ctlr->prepare_message) {
ret = ctlr->prepare_message(ctlr, msg);
if (ret) {
dev_err(&ctlr->dev, "failed to prepare message: %d\n",
ret);
msg->status = ret;
spi_finalize_current_message(ctlr);
return ret;
}
msg->prepared = true;
}
ret = spi_map_msg(ctlr, msg);
if (ret) {
msg->status = ret;
spi_finalize_current_message(ctlr);
return ret;
}
if (!ctlr->ptp_sts_supported && !ctlr->transfer_one) {
list_for_each_entry(xfer, &msg->transfers, transfer_list) {
xfer->ptp_sts_word_pre = 0;
ptp_read_system_prets(xfer->ptp_sts);
}
}
/*
* Drivers implementation of transfer_one_message() must arrange for
* spi_finalize_current_message() to get called. Most drivers will do
* this in the calling context, but some don't. For those cases, a
* completion is used to guarantee that this function does not return
* until spi_finalize_current_message() is done accessing
* ctlr->cur_msg.
* Use of the following two flags enable to opportunistically skip the
* use of the completion since its use involves expensive spin locks.
* In case of a race with the context that calls
* spi_finalize_current_message() the completion will always be used,
* due to strict ordering of these flags using barriers.
*/
WRITE_ONCE(ctlr->cur_msg_incomplete, true);
WRITE_ONCE(ctlr->cur_msg_need_completion, false);
reinit_completion(&ctlr->cur_msg_completion);
smp_wmb(); /* make these available to spi_finalize_current_message */
ret = ctlr->transfer_one_message(ctlr, msg);
if (ret) {
dev_err(&ctlr->dev,
"failed to transfer one message from queue\n");
return ret;
} else {
WRITE_ONCE(ctlr->cur_msg_need_completion, true);
smp_mb(); /* see spi_finalize_current_message()... */
if (READ_ONCE(ctlr->cur_msg_incomplete))
wait_for_completion(&ctlr->cur_msg_completion);
}
return 0;
}
/** /**
* __spi_pump_messages - function which processes spi message queue * __spi_pump_messages - function which processes spi message queue
* @ctlr: controller to process queue for * @ctlr: controller to process queue for
...@@ -1564,34 +1661,25 @@ static void spi_idle_runtime_pm(struct spi_controller *ctlr) ...@@ -1564,34 +1661,25 @@ static void spi_idle_runtime_pm(struct spi_controller *ctlr)
*/ */
static void __spi_pump_messages(struct spi_controller *ctlr, bool in_kthread) static void __spi_pump_messages(struct spi_controller *ctlr, bool in_kthread)
{ {
struct spi_transfer *xfer;
struct spi_message *msg; struct spi_message *msg;
bool was_busy = false; bool was_busy = false;
unsigned long flags; unsigned long flags;
int ret; int ret;
/* Take the IO mutex */
mutex_lock(&ctlr->io_mutex);
/* Lock queue */ /* Lock queue */
spin_lock_irqsave(&ctlr->queue_lock, flags); spin_lock_irqsave(&ctlr->queue_lock, flags);
/* Make sure we are not already running a message */ /* Make sure we are not already running a message */
if (ctlr->cur_msg) { if (ctlr->cur_msg)
spin_unlock_irqrestore(&ctlr->queue_lock, flags); goto out_unlock;
return;
}
/* If another context is idling the device then defer */
if (ctlr->idling) {
kthread_queue_work(ctlr->kworker, &ctlr->pump_messages);
spin_unlock_irqrestore(&ctlr->queue_lock, flags);
return;
}
/* Check if the queue is idle */ /* Check if the queue is idle */
if (list_empty(&ctlr->queue) || !ctlr->running) { if (list_empty(&ctlr->queue) || !ctlr->running) {
if (!ctlr->busy) { if (!ctlr->busy)
spin_unlock_irqrestore(&ctlr->queue_lock, flags); goto out_unlock;
return;
}
/* Defer any non-atomic teardown to the thread */ /* Defer any non-atomic teardown to the thread */
if (!in_kthread) { if (!in_kthread) {
...@@ -1599,17 +1687,16 @@ static void __spi_pump_messages(struct spi_controller *ctlr, bool in_kthread) ...@@ -1599,17 +1687,16 @@ static void __spi_pump_messages(struct spi_controller *ctlr, bool in_kthread)
!ctlr->unprepare_transfer_hardware) { !ctlr->unprepare_transfer_hardware) {
spi_idle_runtime_pm(ctlr); spi_idle_runtime_pm(ctlr);
ctlr->busy = false; ctlr->busy = false;
ctlr->queue_empty = true;
trace_spi_controller_idle(ctlr); trace_spi_controller_idle(ctlr);
} else { } else {
kthread_queue_work(ctlr->kworker, kthread_queue_work(ctlr->kworker,
&ctlr->pump_messages); &ctlr->pump_messages);
} }
spin_unlock_irqrestore(&ctlr->queue_lock, flags); goto out_unlock;
return;
} }
ctlr->busy = false; ctlr->busy = false;
ctlr->idling = true;
spin_unlock_irqrestore(&ctlr->queue_lock, flags); spin_unlock_irqrestore(&ctlr->queue_lock, flags);
kfree(ctlr->dummy_rx); kfree(ctlr->dummy_rx);
...@@ -1624,9 +1711,8 @@ static void __spi_pump_messages(struct spi_controller *ctlr, bool in_kthread) ...@@ -1624,9 +1711,8 @@ static void __spi_pump_messages(struct spi_controller *ctlr, bool in_kthread)
trace_spi_controller_idle(ctlr); trace_spi_controller_idle(ctlr);
spin_lock_irqsave(&ctlr->queue_lock, flags); spin_lock_irqsave(&ctlr->queue_lock, flags);
ctlr->idling = false; ctlr->queue_empty = true;
spin_unlock_irqrestore(&ctlr->queue_lock, flags); goto out_unlock;
return;
} }
/* Extract head of queue */ /* Extract head of queue */
...@@ -1640,81 +1726,23 @@ static void __spi_pump_messages(struct spi_controller *ctlr, bool in_kthread) ...@@ -1640,81 +1726,23 @@ static void __spi_pump_messages(struct spi_controller *ctlr, bool in_kthread)
ctlr->busy = true; ctlr->busy = true;
spin_unlock_irqrestore(&ctlr->queue_lock, flags); spin_unlock_irqrestore(&ctlr->queue_lock, flags);
mutex_lock(&ctlr->io_mutex); ret = __spi_pump_transfer_message(ctlr, msg, was_busy);
if (!was_busy && ctlr->auto_runtime_pm) {
ret = pm_runtime_resume_and_get(ctlr->dev.parent);
if (ret < 0) {
dev_err(&ctlr->dev, "Failed to power device: %d\n",
ret);
mutex_unlock(&ctlr->io_mutex);
return;
}
}
if (!was_busy)
trace_spi_controller_busy(ctlr);
if (!was_busy && ctlr->prepare_transfer_hardware) {
ret = ctlr->prepare_transfer_hardware(ctlr);
if (ret) {
dev_err(&ctlr->dev,
"failed to prepare transfer hardware: %d\n",
ret);
if (ctlr->auto_runtime_pm) if (!ret)
pm_runtime_put(ctlr->dev.parent); kthread_queue_work(ctlr->kworker, &ctlr->pump_messages);
ctlr->cur_msg = NULL;
msg->status = ret; ctlr->fallback = false;
spi_finalize_current_message(ctlr);
mutex_unlock(&ctlr->io_mutex);
return;
}
}
trace_spi_message_start(msg);
if (ctlr->prepare_message) {
ret = ctlr->prepare_message(ctlr, msg);
if (ret) {
dev_err(&ctlr->dev, "failed to prepare message: %d\n",
ret);
msg->status = ret;
spi_finalize_current_message(ctlr);
goto out;
}
ctlr->cur_msg_prepared = true;
}
ret = spi_map_msg(ctlr, msg);
if (ret) {
msg->status = ret;
spi_finalize_current_message(ctlr);
goto out;
}
if (!ctlr->ptp_sts_supported && !ctlr->transfer_one) {
list_for_each_entry(xfer, &msg->transfers, transfer_list) {
xfer->ptp_sts_word_pre = 0;
ptp_read_system_prets(xfer->ptp_sts);
}
}
ret = ctlr->transfer_one_message(ctlr, msg);
if (ret) {
dev_err(&ctlr->dev,
"failed to transfer one message from queue: %d\n",
ret);
goto out;
}
out:
mutex_unlock(&ctlr->io_mutex); mutex_unlock(&ctlr->io_mutex);
/* Prod the scheduler in case transfer_one() was busy waiting */ /* Prod the scheduler in case transfer_one() was busy waiting */
if (!ret) if (!ret)
cond_resched(); cond_resched();
return;
out_unlock:
spin_unlock_irqrestore(&ctlr->queue_lock, flags);
mutex_unlock(&ctlr->io_mutex);
} }
/** /**
...@@ -1839,6 +1867,7 @@ static int spi_init_queue(struct spi_controller *ctlr) ...@@ -1839,6 +1867,7 @@ static int spi_init_queue(struct spi_controller *ctlr)
{ {
ctlr->running = false; ctlr->running = false;
ctlr->busy = false; ctlr->busy = false;
ctlr->queue_empty = true;
ctlr->kworker = kthread_create_worker(0, dev_name(&ctlr->dev)); ctlr->kworker = kthread_create_worker(0, dev_name(&ctlr->dev));
if (IS_ERR(ctlr->kworker)) { if (IS_ERR(ctlr->kworker)) {
...@@ -1897,12 +1926,9 @@ void spi_finalize_current_message(struct spi_controller *ctlr) ...@@ -1897,12 +1926,9 @@ void spi_finalize_current_message(struct spi_controller *ctlr)
{ {
struct spi_transfer *xfer; struct spi_transfer *xfer;
struct spi_message *mesg; struct spi_message *mesg;
unsigned long flags;
int ret; int ret;
spin_lock_irqsave(&ctlr->queue_lock, flags);
mesg = ctlr->cur_msg; mesg = ctlr->cur_msg;
spin_unlock_irqrestore(&ctlr->queue_lock, flags);
if (!ctlr->ptp_sts_supported && !ctlr->transfer_one) { if (!ctlr->ptp_sts_supported && !ctlr->transfer_one) {
list_for_each_entry(xfer, &mesg->transfers, transfer_list) { list_for_each_entry(xfer, &mesg->transfers, transfer_list) {
...@@ -1926,7 +1952,7 @@ void spi_finalize_current_message(struct spi_controller *ctlr) ...@@ -1926,7 +1952,7 @@ void spi_finalize_current_message(struct spi_controller *ctlr)
*/ */
spi_res_release(ctlr, mesg); spi_res_release(ctlr, mesg);
if (ctlr->cur_msg_prepared && ctlr->unprepare_message) { if (mesg->prepared && ctlr->unprepare_message) {
ret = ctlr->unprepare_message(ctlr, mesg); ret = ctlr->unprepare_message(ctlr, mesg);
if (ret) { if (ret) {
dev_err(&ctlr->dev, "failed to unprepare message: %d\n", dev_err(&ctlr->dev, "failed to unprepare message: %d\n",
...@@ -1934,12 +1960,12 @@ void spi_finalize_current_message(struct spi_controller *ctlr) ...@@ -1934,12 +1960,12 @@ void spi_finalize_current_message(struct spi_controller *ctlr)
} }
} }
spin_lock_irqsave(&ctlr->queue_lock, flags); mesg->prepared = false;
ctlr->cur_msg = NULL;
ctlr->cur_msg_prepared = false; WRITE_ONCE(ctlr->cur_msg_incomplete, false);
ctlr->fallback = false; smp_mb(); /* See __spi_pump_transfer_message()... */
kthread_queue_work(ctlr->kworker, &ctlr->pump_messages); if (READ_ONCE(ctlr->cur_msg_need_completion))
spin_unlock_irqrestore(&ctlr->queue_lock, flags); complete(&ctlr->cur_msg_completion);
trace_spi_message_done(mesg); trace_spi_message_done(mesg);
...@@ -2042,6 +2068,7 @@ static int __spi_queued_transfer(struct spi_device *spi, ...@@ -2042,6 +2068,7 @@ static int __spi_queued_transfer(struct spi_device *spi,
msg->status = -EINPROGRESS; msg->status = -EINPROGRESS;
list_add_tail(&msg->queue, &ctlr->queue); list_add_tail(&msg->queue, &ctlr->queue);
ctlr->queue_empty = false;
if (!ctlr->busy && need_pump) if (!ctlr->busy && need_pump)
kthread_queue_work(ctlr->kworker, &ctlr->pump_messages); kthread_queue_work(ctlr->kworker, &ctlr->pump_messages);
...@@ -3025,6 +3052,7 @@ int spi_register_controller(struct spi_controller *ctlr) ...@@ -3025,6 +3052,7 @@ int spi_register_controller(struct spi_controller *ctlr)
} }
ctlr->bus_lock_flag = 0; ctlr->bus_lock_flag = 0;
init_completion(&ctlr->xfer_completion); init_completion(&ctlr->xfer_completion);
init_completion(&ctlr->cur_msg_completion);
if (!ctlr->max_dma_len) if (!ctlr->max_dma_len)
ctlr->max_dma_len = INT_MAX; ctlr->max_dma_len = INT_MAX;
...@@ -3937,6 +3965,39 @@ static int spi_async_locked(struct spi_device *spi, struct spi_message *message) ...@@ -3937,6 +3965,39 @@ static int spi_async_locked(struct spi_device *spi, struct spi_message *message)
} }
static void __spi_transfer_message_noqueue(struct spi_controller *ctlr, struct spi_message *msg)
{
bool was_busy;
int ret;
mutex_lock(&ctlr->io_mutex);
was_busy = ctlr->busy;
ctlr->cur_msg = msg;
ret = __spi_pump_transfer_message(ctlr, msg, was_busy);
if (ret)
goto out;
ctlr->cur_msg = NULL;
ctlr->fallback = false;
if (!was_busy) {
kfree(ctlr->dummy_rx);
ctlr->dummy_rx = NULL;
kfree(ctlr->dummy_tx);
ctlr->dummy_tx = NULL;
if (ctlr->unprepare_transfer_hardware &&
ctlr->unprepare_transfer_hardware(ctlr))
dev_err(&ctlr->dev,
"failed to unprepare transfer hardware\n");
spi_idle_runtime_pm(ctlr);
}
out:
mutex_unlock(&ctlr->io_mutex);
}
/*-------------------------------------------------------------------------*/ /*-------------------------------------------------------------------------*/
/* /*
...@@ -3955,51 +4016,51 @@ static int __spi_sync(struct spi_device *spi, struct spi_message *message) ...@@ -3955,51 +4016,51 @@ static int __spi_sync(struct spi_device *spi, struct spi_message *message)
DECLARE_COMPLETION_ONSTACK(done); DECLARE_COMPLETION_ONSTACK(done);
int status; int status;
struct spi_controller *ctlr = spi->controller; struct spi_controller *ctlr = spi->controller;
unsigned long flags;
status = __spi_validate(spi, message); status = __spi_validate(spi, message);
if (status != 0) if (status != 0)
return status; return status;
message->complete = spi_complete;
message->context = &done;
message->spi = spi; message->spi = spi;
SPI_STATISTICS_INCREMENT_FIELD(ctlr->pcpu_statistics, spi_sync); SPI_STATISTICS_INCREMENT_FIELD(ctlr->pcpu_statistics, spi_sync);
SPI_STATISTICS_INCREMENT_FIELD(spi->pcpu_statistics, spi_sync); SPI_STATISTICS_INCREMENT_FIELD(spi->pcpu_statistics, spi_sync);
/* /*
* If we're not using the legacy transfer method then we will * Checking queue_empty here only guarantees async/sync message
* try to transfer in the calling context so special case. * ordering when coming from the same context. It does not need to
* This code would be less tricky if we could remove the * guard against reentrancy from a different context. The io_mutex
* support for driver implemented message queues. * will catch those cases.
*/ */
if (ctlr->transfer == spi_queued_transfer) { if (READ_ONCE(ctlr->queue_empty)) {
spin_lock_irqsave(&ctlr->bus_lock_spinlock, flags); message->actual_length = 0;
message->status = -EINPROGRESS;
trace_spi_message_submit(message); trace_spi_message_submit(message);
status = __spi_queued_transfer(spi, message, false); SPI_STATISTICS_INCREMENT_FIELD(ctlr->pcpu_statistics, spi_sync_immediate);
SPI_STATISTICS_INCREMENT_FIELD(spi->pcpu_statistics, spi_sync_immediate);
spin_unlock_irqrestore(&ctlr->bus_lock_spinlock, flags); __spi_transfer_message_noqueue(ctlr, message);
} else {
status = spi_async_locked(spi, message);
}
if (status == 0) { return message->status;
/* Push out the messages in the calling context if we can */
if (ctlr->transfer == spi_queued_transfer) {
SPI_STATISTICS_INCREMENT_FIELD(ctlr->pcpu_statistics,
spi_sync_immediate);
SPI_STATISTICS_INCREMENT_FIELD(spi->pcpu_statistics,
spi_sync_immediate);
__spi_pump_messages(ctlr, false);
} }
/*
* There are messages in the async queue that could have originated
* from the same context, so we need to preserve ordering.
* Therefor we send the message to the async queue and wait until they
* are completed.
*/
message->complete = spi_complete;
message->context = &done;
status = spi_async_locked(spi, message);
if (status == 0) {
wait_for_completion(&done); wait_for_completion(&done);
status = message->status; status = message->status;
} }
message->context = NULL; message->context = NULL;
return status; return status;
} }
......
...@@ -383,10 +383,14 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch ...@@ -383,10 +383,14 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch
* @pump_messages: work struct for scheduling work to the message pump * @pump_messages: work struct for scheduling work to the message pump
* @queue_lock: spinlock to syncronise access to message queue * @queue_lock: spinlock to syncronise access to message queue
* @queue: message queue * @queue: message queue
* @idling: the device is entering idle state
* @cur_msg: the currently in-flight message * @cur_msg: the currently in-flight message
* @cur_msg_prepared: spi_prepare_message was called for the currently * @cur_msg_completion: a completion for the current in-flight message
* in-flight message * @cur_msg_incomplete: Flag used internally to opportunistically skip
* the @cur_msg_completion. This flag is used to check if the driver has
* already called spi_finalize_current_message().
* @cur_msg_need_completion: Flag used internally to opportunistically skip
* the @cur_msg_completion. This flag is used to signal the context that
* is running spi_finalize_current_message() that it needs to complete()
* @cur_msg_mapped: message has been mapped for DMA * @cur_msg_mapped: message has been mapped for DMA
* @last_cs: the last chip_select that is recorded by set_cs, -1 on non chip * @last_cs: the last chip_select that is recorded by set_cs, -1 on non chip
* selected * selected
...@@ -463,6 +467,8 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch ...@@ -463,6 +467,8 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch
* @irq_flags: Interrupt enable state during PTP system timestamping * @irq_flags: Interrupt enable state during PTP system timestamping
* @fallback: fallback to pio if dma transfer return failure with * @fallback: fallback to pio if dma transfer return failure with
* SPI_TRANS_FAIL_NO_START. * SPI_TRANS_FAIL_NO_START.
* @queue_empty: signal green light for opportunistically skipping the queue
* for spi_sync transfers.
* *
* Each SPI controller can communicate with one or more @spi_device * Each SPI controller can communicate with one or more @spi_device
* children. These make a small bus, sharing MOSI, MISO and SCK signals * children. These make a small bus, sharing MOSI, MISO and SCK signals
...@@ -616,12 +622,13 @@ struct spi_controller { ...@@ -616,12 +622,13 @@ struct spi_controller {
spinlock_t queue_lock; spinlock_t queue_lock;
struct list_head queue; struct list_head queue;
struct spi_message *cur_msg; struct spi_message *cur_msg;
bool idling; struct completion cur_msg_completion;
bool cur_msg_incomplete;
bool cur_msg_need_completion;
bool busy; bool busy;
bool running; bool running;
bool rt; bool rt;
bool auto_runtime_pm; bool auto_runtime_pm;
bool cur_msg_prepared;
bool cur_msg_mapped; bool cur_msg_mapped;
char last_cs; char last_cs;
bool last_cs_mode_high; bool last_cs_mode_high;
...@@ -680,6 +687,9 @@ struct spi_controller { ...@@ -680,6 +687,9 @@ struct spi_controller {
/* Interrupt enable state during PTP system timestamping */ /* Interrupt enable state during PTP system timestamping */
unsigned long irq_flags; unsigned long irq_flags;
/* Flag for enabling opportunistic skipping of the queue in spi_sync */
bool queue_empty;
}; };
static inline void *spi_controller_get_devdata(struct spi_controller *ctlr) static inline void *spi_controller_get_devdata(struct spi_controller *ctlr)
...@@ -988,6 +998,7 @@ struct spi_transfer { ...@@ -988,6 +998,7 @@ struct spi_transfer {
* @queue: for use by whichever driver currently owns the message * @queue: for use by whichever driver currently owns the message
* @state: for use by whichever driver currently owns the message * @state: for use by whichever driver currently owns the message
* @resources: for resource management when the spi message is processed * @resources: for resource management when the spi message is processed
* @prepared: spi_prepare_message was called for the this message
* *
* A @spi_message is used to execute an atomic sequence of data transfers, * A @spi_message is used to execute an atomic sequence of data transfers,
* each represented by a struct spi_transfer. The sequence is "atomic" * each represented by a struct spi_transfer. The sequence is "atomic"
...@@ -1037,6 +1048,9 @@ struct spi_message { ...@@ -1037,6 +1048,9 @@ struct spi_message {
/* list of spi_res reources when the spi message is processed */ /* list of spi_res reources when the spi message is processed */
struct list_head resources; struct list_head resources;
/* spi_prepare_message was called for this message */
bool prepared;
}; };
static inline void spi_message_init_no_memset(struct spi_message *m) static inline void spi_message_init_no_memset(struct spi_message *m)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册