Home Home > GIT Browse
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndi Kleen <ak@muc.de>2002-10-11 06:07:19 -0700
committerLinus Torvalds <torvalds@home.transmeta.com>2002-10-11 06:07:19 -0700
commit08f895c3998348daf924ba233b3584bbba57be2b (patch)
treefc96f6e0c83f9acef04b65ddcee4aeff3fd5b366
parentb4655acd91583dc201e47b3e346f9ab686dda385 (diff)
[PATCH] Efficient bswab64 for i386
Due to some bugs in byteorder/generic.h linux would always use C handcoded swab64 for 64bit ntohq or cpu_to_be64. The C version is very inefficient and expands to 30+ instructions of horrible code. This hurts on filesystems that use on disk big endian data structures with 64bit data types. This patch adds an assembly optimized swab64 to fix it. Now bswab64 is 4 instructions when your CPU supports bswap and 9 when it doesn't. Tests were done with gcc 3.2, may be different on older gcc. This is good for ~600 bytes code size reduction in XFS (gcc 3.2): Before: 503199 3296 1682 508177 7c111 fs/xfs/xfs.o After: 502543 3296 1682 507521 7be81 fs/xfs/xfs.o Also should be faster. Also some minor cleanups in the file.
-rw-r--r--include/asm-i386/byteorder.h34
1 files changed, 27 insertions, 7 deletions
diff --git a/include/asm-i386/byteorder.h b/include/asm-i386/byteorder.h
index bbfb629fae26..b164317e3c37 100644
--- a/include/asm-i386/byteorder.h
+++ b/include/asm-i386/byteorder.h
@@ -24,21 +24,41 @@ static __inline__ __const__ __u32 ___arch__swab32(__u32 x)
return x;
}
+/* gcc should generate this for open coded C now too. May be worth switching to
+ it because inline assembly cannot be scheduled. -AK */
static __inline__ __const__ __u16 ___arch__swab16(__u16 x)
{
- __asm__("xchgb %b0,%h0" /* swap bytes */ \
- : "=q" (x) \
- : "0" (x)); \
+ __asm__("xchgb %b0,%h0" /* swap bytes */
+ : "=q" (x)
+ : "0" (x));
return x;
}
+
+static inline __u64 ___arch__swab64(__u64 val)
+{
+ union {
+ struct { __u32 a,b; } s;
+ __u64 u;
+ } v;
+ v.u = val;
+#ifdef CONFIG_X86_BSWAP
+ asm("bswapl %0 ; bswapl %1 ; xchgl %0,%1"
+ : "=r" (v.s.a), "=r" (v.s.b)
+ : "0" (v.s.a), "1" (v.s.b));
+#else
+ v.s.a = ___arch__swab32(v.s.a);
+ v.s.b = ___arch__swab32(v.s.b);
+ asm("xchgl %0,%1" : "=r" (v.s.a), "=r" (v.s.b) : "0" (v.s.a), "1" (v.s.b));
+#endif
+ return v.u;
+}
+
+#define __arch__swab64(x) ___arch__swab64(x)
#define __arch__swab32(x) ___arch__swab32(x)
#define __arch__swab16(x) ___arch__swab16(x)
-#if !defined(__STRICT_ANSI__) || defined(__KERNEL__)
-# define __BYTEORDER_HAS_U64__
-# define __SWAB_64_THRU_32__
-#endif
+#define __BYTEORDER_HAS_U64__
#endif /* __GNUC__ */