diff --git a/arch/x86_64/kernel/genapic_flat.c b/arch/x86_64/kernel/genapic_flat.c
index 00f3fa6df71406b6eb5b6d64cb071f65a05e5b84..282846965080ee90ac4388618de7a4b2a2fbedae 100644
--- a/arch/x86_64/kernel/genapic_flat.c
+++ b/arch/x86_64/kernel/genapic_flat.c
@@ -20,6 +20,46 @@
 #include <asm/smp.h>
 #include <asm/ipi.h>
 
+/*
+ * The following permit choosing broadcast IPI shortcut v.s sending IPI only
+ * to online cpus via the send_IPI_mask varient.
+ * The mask version is my preferred option, since it eliminates a lot of
+ * other extra code that would need to be written to cleanup intrs sent
+ * to a CPU while offline.
+ *
+ * Sending broadcast introduces lots of trouble in CPU hotplug situations.
+ * These IPI's are delivered to cpu's irrespective of their offline status
+ * and could pickup stale intr data when these CPUS are turned online.
+ *
+ * Not using broadcast is a cleaner approach IMO, but Andi Kleen disagrees with
+ * the idea of not using broadcast IPI's anymore. Hence the run time check
+ * is introduced, on his request so we can choose an alternate mechanism.
+ *
+ * Initial wacky performance tests that collect cycle counts show
+ * no increase in using mask v.s broadcast version. In fact they seem
+ * identical in terms of cycle counts.
+ *
+ * if we need to use broadcast, we need to do the following.
+ *
+ * cli;
+ * hold call_lock;
+ * clear any pending IPI, just ack and clear all pending intr
+ * set cpu_online_map;
+ * release call_lock;
+ * sti;
+ *
+ * The complicated dummy irq processing shown above is not required if
+ * we didnt sent IPI's to wrong CPU's in the first place.
+ *
+ * - Ashok Raj <ashok.raj@intel.com>
+ */
+#ifdef CONFIG_HOTPLUG_CPU
+#define DEFAULT_SEND_IPI	(1)
+#else
+#define DEFAULT_SEND_IPI	(0)
+#endif
+
+static int no_broadcast=DEFAULT_SEND_IPI;
 
 static cpumask_t flat_target_cpus(void)
 {
@@ -79,28 +119,37 @@ static void flat_send_IPI_mask(cpumask_t cpumask, int vector)
 	local_irq_restore(flags);
 }
 
-static void flat_send_IPI_allbutself(int vector)
+static inline void __local_flat_send_IPI_allbutself(int vector)
 {
-	cpumask_t mask;
-	/*
-	 * if there are no other CPUs in the system then
-	 * we get an APIC send error if we try to broadcast.
-	 * thus we have to avoid sending IPIs in this case.
-	 */
-	int this_cpu = get_cpu();
-
-	mask = cpu_online_map;
-	cpu_clear(this_cpu, mask);
+	if (no_broadcast) {
+		cpumask_t mask = cpu_online_map;
+		int this_cpu = get_cpu();
 
-	if (cpus_weight(mask) >= 1)
+		cpu_clear(this_cpu, mask);
 		flat_send_IPI_mask(mask, vector);
+		put_cpu();
+	}
+	else
+		__send_IPI_shortcut(APIC_DEST_ALLBUT, vector, APIC_DEST_LOGICAL);
+}
 
-	put_cpu();
+static inline void __local_flat_send_IPI_all(int vector)
+{
+	if (no_broadcast)
+		flat_send_IPI_mask(cpu_online_map, vector);
+	else
+		__send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
+}
+
+static void flat_send_IPI_allbutself(int vector)
+{
+	if (((num_online_cpus()) - 1) >= 1)
+		__local_flat_send_IPI_allbutself(vector);
 }
 
 static void flat_send_IPI_all(int vector)
 {
-	flat_send_IPI_mask(cpu_online_map, vector);
+	__local_flat_send_IPI_all(vector);
 }
 
 static int flat_apic_id_registered(void)
@@ -121,6 +170,16 @@ static unsigned int phys_pkg_id(int index_msb)
 	return ((ebx >> 24) & 0xFF) >> index_msb;
 }
 
+static __init int no_ipi_broadcast(char *str)
+{
+	get_option(&str, &no_broadcast);
+	printk ("Using %s mode\n", no_broadcast ? "No IPI Broadcast" :
+											"IPI Broadcast");
+	return 1;
+}
+
+__setup("no_ipi_broadcast", no_ipi_broadcast);
+
 struct genapic apic_flat =  {
 	.name = "flat",
 	.int_delivery_mode = dest_LowestPrio,
@@ -135,3 +194,12 @@ struct genapic apic_flat =  {
 	.cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
 	.phys_pkg_id = phys_pkg_id,
 };
+
+static int __init print_ipi_mode(void)
+{
+	printk ("Using IPI %s mode\n", no_broadcast ? "No-Shortcut" :
+											"Shortcut");
+	return 0;
+}
+
+late_initcall(print_ipi_mode);