[openib-general] [PATCH][12/12] InfiniBand/mthca: remove x86 SSE pessimization

Roland Dreier
Sun Jan 23 22:14:24 PST 2005


Get rid of the x86 SSE code for atomic 64-bit writes to doorbell
registers.  Saving/setting CR0 plus a clts instruction are too
expensive for it to ever be a win, and the config option was just
confusing.

Signed-off-by: Roland Dreier <roland at topspin.com>

--- linux-bk.orig/drivers/infiniband/hw/mthca/Kconfig   2005-01-23 08:30:27.000000000 -0800
+++ linux-bk/drivers/infiniband/hw/mthca/Kconfig        2005-01-23 21:00:44.744520064 -0800
@@ -14,13 +14,3 @@
          This option causes the mthca driver produce a bunch of debug
          messages.  Select this is you are developing the driver or
          trying to diagnose a problem.
-
-config INFINIBAND_MTHCA_SSE_DOORBELL
-       bool "SSE doorbell code"
-       depends on INFINIBAND_MTHCA && X86 && !X86_64
-       default n
-       ---help---
-         This option will have the mthca driver use SSE instructions
-         to ring hardware doorbell registers.  This may improve
-         performance for some workloads, but the driver will not run
-         on processors without SSE instructions.
--- linux-bk.orig/drivers/infiniband/hw/mthca/mthca_main.c      2005-01-23 20:58:55.771086544 -0800
+++ linux-bk/drivers/infiniband/hw/mthca/mthca_main.c   2005-01-23 21:00:44.745519912 -0800
@@ -40,10 +40,6 @@
 #include 
 #include 
 
-#ifdef CONFIG_INFINIBAND_MTHCA_SSE_DOORBELL
-#include 
-#endif
-
 #include "mthca_dev.h"
 #include "mthca_config_reg.h"
 #include "mthca_cmd.h"
@@ -1117,22 +1113,6 @@
 {
        int ret;
 
-       /*
-        * TODO: measure whether dynamically choosing doorbell code at
-        * runtime affects our performance.  Is there a "magic" way to
-        * choose without having to follow a function pointer every
-        * time we ring a doorbell?
-        */
-#ifdef CONFIG_INFINIBAND_MTHCA_SSE_DOORBELL
-       if (!cpu_has_xmm) {
-               printk(KERN_ERR PFX "mthca was compiled with SSE doorbell code, but\n");
-               printk(KERN_ERR PFX "the current CPU does not support SSE.\n");
-               printk(KERN_ERR PFX "Turn off CONFIG_INFINIBAND_MTHCA_SSE_DOORBELL "
-                      "and recompile.\n");
-               return -ENODEV;
-       }
-#endif
-
        ret = pci_register_driver(&mthca_driver);
        return ret < 0 ? ret : 0;
 }
--- linux-bk.orig/drivers/infiniband/hw/mthca/mthca_doorbell.h  2005-01-23 08:30:38.000000000 -0800
+++ linux-bk/drivers/infiniband/hw/mthca/mthca_doorbell.h       2005-01-23 21:00:44.746519760 -0800
@@ -32,9 +32,7 @@
  * $Id: mthca_doorbell.h 1349 2004-12-16 21:09:43Z roland $
  */
 
-#include 
 #include 
-#include 
 
 #define MTHCA_RD_DOORBELL      0x00
 #define MTHCA_SEND_DOORBELL    0x10
@@ -59,51 +57,13 @@
        __raw_writeq(*(u64 *) val, dest);
 }
 
-#elif defined(CONFIG_INFINIBAND_MTHCA_SSE_DOORBELL)
-/* Use SSE to write 64 bits atomically without a lock. */
-
-#define MTHCA_DECLARE_DOORBELL_LOCK(name)
-#define MTHCA_INIT_DOORBELL_LOCK(ptr)    do { } while (0)
-#define MTHCA_GET_DOORBELL_LOCK(ptr)      (NULL)
-
-static inline unsigned long mthca_get_fpu(void)
-{
-       unsigned long cr0;
-
-       preempt_disable();
-       asm volatile("mov %%cr0,%0; clts" : "=r" (cr0));
-       return cr0;
-}
-
-static inline void mthca_put_fpu(unsigned long cr0)
-{
-       asm volatile("mov %0,%%cr0" : : "r" (cr0));
-       preempt_enable();
-}
-
-static inline void mthca_write64(u32 val[2], void __iomem *dest,
-                                spinlock_t *doorbell_lock)
-{
-       /* i386 stack is aligned to 8 bytes, so this should be OK: */
-       u8 xmmsave[8] __attribute__((aligned(8)));
-       unsigned long cr0;
-
-       cr0 = mthca_get_fpu();
-
-       asm volatile (
-               "movlps %%xmm0,(%0); \n\t"
-               "movlps (%1),%%xmm0; \n\t"
-               "movlps %%xmm0,(%2); \n\t"
-               "movlps (%0),%%xmm0; \n\t"
-               :
-               : "r" (xmmsave), "r" (val), "r" (dest)
-               : "memory" );
-
-       mthca_put_fpu(cr0);
-}
-
 #else
-/* Just fall back to a spinlock to protect the doorbell */
+
+/*
+ * Just fall back to a spinlock to protect the doorbell if
+ * BITS_PER_LONG is 32 -- there's no portable way to do atomic 64-bit
+ * MMIO writes.
+ */
 
 #define MTHCA_DECLARE_DOORBELL_LOCK(name) spinlock_t name;
 #define MTHCA_INIT_DOORBELL_LOCK(ptr)     spin_lock_init(ptr)




More information about the openib-general mailing list