From 842accd12472f563523ba92ac4fe540ae71c99e9 Mon Sep 17 00:00:00 2001 From: barracuda156 Date: Tue, 29 Aug 2023 22:34:19 +0800 Subject: [PATCH] Revert "Remove configurability of PPC spinlock assembly code." Upstream has broken PowerPC build in 8ded65682bee2a1c04392a88e0df0f4fc7552623 Revert that. diff --git configure configure index 028434b56e..7c10905dfd 100755 --- configure +++ configure @@ -15336,7 +15336,39 @@ $as_echo "#define HAVE_X86_64_POPCNTQ 1" >>confdefs.h fi ;; ppc*|powerpc*) - # On PPC, check if compiler accepts "i"(x) when __builtin_constant_p(x). + # On PPC, check if assembler supports LWARX instruction's mutex hint bit + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether assembler supports lwarx hint bit" >&5 +$as_echo_n "checking whether assembler supports lwarx hint bit... " >&6; } +if ${pgac_cv_have_ppc_mutex_hint+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ +int a = 0; int *p = &a; int r; + __asm__ __volatile__ (" lwarx %0,0,%1,1\n" : "=&r"(r) : "r"(p)); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + pgac_cv_have_ppc_mutex_hint=yes +else + pgac_cv_have_ppc_mutex_hint=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_have_ppc_mutex_hint" >&5 +$as_echo "$pgac_cv_have_ppc_mutex_hint" >&6; } + if test x"$pgac_cv_have_ppc_mutex_hint" = xyes ; then + +$as_echo "#define HAVE_PPC_LWARX_MUTEX_HINT 1" >>confdefs.h + + fi + # Check if compiler accepts "i"(x) when __builtin_constant_p(x). { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether __builtin_constant_p(x) implies \"i\"(x) acceptance" >&5 $as_echo_n "checking whether __builtin_constant_p(x) implies \"i\"(x) acceptance... " >&6; } if ${pgac_cv_have_i_constraint__builtin_constant_p+:} false; then : diff --git configure.ac configure.ac index f8ab273674..d3e402dfc0 100644 --- configure.ac +++ configure.ac @@ -1677,7 +1677,18 @@ case $host_cpu in fi ;; ppc*|powerpc*) - # On PPC, check if compiler accepts "i"(x) when __builtin_constant_p(x). + # On PPC, check if assembler supports LWARX instruction's mutex hint bit + AC_CACHE_CHECK([whether assembler supports lwarx hint bit], + [pgac_cv_have_ppc_mutex_hint], + [AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], + [int a = 0; int *p = &a; int r; + __asm__ __volatile__ (" lwarx %0,0,%1,1\n" : "=&r"(r) : "r"(p));])], + [pgac_cv_have_ppc_mutex_hint=yes], + [pgac_cv_have_ppc_mutex_hint=no])]) + if test x"$pgac_cv_have_ppc_mutex_hint" = xyes ; then + AC_DEFINE(HAVE_PPC_LWARX_MUTEX_HINT, 1, [Define to 1 if the assembler supports PPC's LWARX mutex hint bit.]) + fi + # Check if compiler accepts "i"(x) when __builtin_constant_p(x). AC_CACHE_CHECK([whether __builtin_constant_p(x) implies "i"(x) acceptance], [pgac_cv_have_i_constraint__builtin_constant_p], [AC_COMPILE_IFELSE([AC_LANG_PROGRAM( diff --git src/include/pg_config.h.in src/include/pg_config.h.in index 85150f90b2..31674bde4e 100644 --- src/include/pg_config.h.in +++ src/include/pg_config.h.in @@ -349,6 +349,9 @@ /* Define to 1 if you have the `posix_fallocate' function. */ #undef HAVE_POSIX_FALLOCATE +/* Define to 1 if the assembler supports PPC's LWARX mutex hint bit. */ +#undef HAVE_PPC_LWARX_MUTEX_HINT + /* Define to 1 if you have the `ppoll' function. */ #undef HAVE_PPOLL diff --git src/include/pg_config_manual.h src/include/pg_config_manual.h index 844c3e0f09..5ee2c46267 100644 --- src/include/pg_config_manual.h +++ src/include/pg_config_manual.h @@ -227,6 +227,32 @@ */ #define DEFAULT_EVENT_SOURCE "PostgreSQL" +/* + * On PPC machines, decide whether to use the mutex hint bit in LWARX + * instructions. Setting the hint bit will slightly improve spinlock + * performance on POWER6 and later machines, but does nothing before that, + * and will result in illegal-instruction failures on some pre-POWER4 + * machines. By default we use the hint bit when building for 64-bit PPC, + * which should be safe in nearly all cases. You might want to override + * this if you are building 32-bit code for a known-recent PPC machine. + */ +#ifdef HAVE_PPC_LWARX_MUTEX_HINT /* must have assembler support in any case */ +#if defined(__ppc64__) || defined(__powerpc64__) +#define USE_PPC_LWARX_MUTEX_HINT +#endif +#endif + +/* + * On PPC machines, decide whether to use LWSYNC instructions in place of + * ISYNC and SYNC. This provides slightly better performance, but will + * result in illegal-instruction failures on some pre-POWER4 machines. + * By default we use LWSYNC when building for 64-bit PPC, which should be + * safe in nearly all cases. + */ +#if defined(__ppc64__) || defined(__powerpc64__) +#define USE_PPC_LWSYNC +#endif + /* * Assumed cache line size. This doesn't affect correctness, but can be used * for low-level optimizations. Currently, this is used to pad some data diff --git src/include/port/atomics/arch-ppc.h src/include/port/atomics/arch-ppc.h index 35a79042c0..eb64513626 100644 --- src/include/port/atomics/arch-ppc.h +++ src/include/port/atomics/arch-ppc.h @@ -90,12 +90,12 @@ pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, (int32) *expected >= PG_INT16_MIN) __asm__ __volatile__( " sync \n" - " lwarx %0,0,%5,1 \n" + " lwarx %0,0,%5 \n" " cmpwi %0,%3 \n" - " bne $+12 \n" /* branch to lwsync */ + " bne $+12 \n" /* branch to isync */ " stwcx. %4,0,%5 \n" " bne $-16 \n" /* branch to lwarx */ - " lwsync \n" + " isync \n" " mfcr %1 \n" : "=&r"(found), "=r"(condition_register), "+m"(ptr->value) : "i"(*expected), "r"(newval), "r"(&ptr->value) @@ -104,12 +104,12 @@ pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, #endif __asm__ __volatile__( " sync \n" - " lwarx %0,0,%5,1 \n" + " lwarx %0,0,%5 \n" " cmpw %0,%3 \n" - " bne $+12 \n" /* branch to lwsync */ + " bne $+12 \n" /* branch to isync */ " stwcx. %4,0,%5 \n" " bne $-16 \n" /* branch to lwarx */ - " lwsync \n" + " isync \n" " mfcr %1 \n" : "=&r"(found), "=r"(condition_register), "+m"(ptr->value) : "r"(*expected), "r"(newval), "r"(&ptr->value) @@ -138,11 +138,11 @@ pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_) add_ <= PG_INT16_MAX && add_ >= PG_INT16_MIN) __asm__ __volatile__( " sync \n" - " lwarx %1,0,%4,1 \n" + " lwarx %1,0,%4 \n" " addi %0,%1,%3 \n" " stwcx. %0,0,%4 \n" " bne $-12 \n" /* branch to lwarx */ - " lwsync \n" + " isync \n" : "=&r"(_t), "=&b"(res), "+m"(ptr->value) : "i"(add_), "r"(&ptr->value) : "memory", "cc"); @@ -150,11 +150,11 @@ pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_) #endif __asm__ __volatile__( " sync \n" - " lwarx %1,0,%4,1 \n" + " lwarx %1,0,%4 \n" " add %0,%1,%3 \n" " stwcx. %0,0,%4 \n" " bne $-12 \n" /* branch to lwarx */ - " lwsync \n" + " isync \n" : "=&r"(_t), "=&r"(res), "+m"(ptr->value) : "r"(add_), "r"(&ptr->value) : "memory", "cc"); @@ -180,12 +180,12 @@ pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, (int64) *expected >= PG_INT16_MIN) __asm__ __volatile__( " sync \n" - " ldarx %0,0,%5,1 \n" + " ldarx %0,0,%5 \n" " cmpdi %0,%3 \n" - " bne $+12 \n" /* branch to lwsync */ + " bne $+12 \n" /* branch to isync */ " stdcx. %4,0,%5 \n" " bne $-16 \n" /* branch to ldarx */ - " lwsync \n" + " isync \n" " mfcr %1 \n" : "=&r"(found), "=r"(condition_register), "+m"(ptr->value) : "i"(*expected), "r"(newval), "r"(&ptr->value) @@ -194,12 +194,12 @@ pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, #endif __asm__ __volatile__( " sync \n" - " ldarx %0,0,%5,1 \n" + " ldarx %0,0,%5 \n" " cmpd %0,%3 \n" - " bne $+12 \n" /* branch to lwsync */ + " bne $+12 \n" /* branch to isync */ " stdcx. %4,0,%5 \n" " bne $-16 \n" /* branch to ldarx */ - " lwsync \n" + " isync \n" " mfcr %1 \n" : "=&r"(found), "=r"(condition_register), "+m"(ptr->value) : "r"(*expected), "r"(newval), "r"(&ptr->value) @@ -224,11 +224,11 @@ pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_) add_ <= PG_INT16_MAX && add_ >= PG_INT16_MIN) __asm__ __volatile__( " sync \n" - " ldarx %1,0,%4,1 \n" + " ldarx %1,0,%4 \n" " addi %0,%1,%3 \n" " stdcx. %0,0,%4 \n" " bne $-12 \n" /* branch to ldarx */ - " lwsync \n" + " isync \n" : "=&r"(_t), "=&b"(res), "+m"(ptr->value) : "i"(add_), "r"(&ptr->value) : "memory", "cc"); @@ -236,11 +236,11 @@ pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_) #endif __asm__ __volatile__( " sync \n" - " ldarx %1,0,%4,1 \n" + " ldarx %1,0,%4 \n" " add %0,%1,%3 \n" " stdcx. %0,0,%4 \n" " bne $-12 \n" /* branch to ldarx */ - " lwsync \n" + " isync \n" : "=&r"(_t), "=&r"(res), "+m"(ptr->value) : "r"(add_), "r"(&ptr->value) : "memory", "cc"); diff --git src/include/storage/s_lock.h src/include/storage/s_lock.h index cc83d561b2..0877cf65b0 100644 --- src/include/storage/s_lock.h +++ src/include/storage/s_lock.h @@ -435,8 +435,7 @@ typedef unsigned int slock_t; * * NOTE: per the Enhanced PowerPC Architecture manual, v1.0 dated 7-May-2002, * an isync is a sufficient synchronization barrier after a lwarx/stwcx loop. - * But if the spinlock is in ordinary memory, we can use lwsync instead for - * better performance. + * On newer machines, we can use lwsync instead for better performance. * * Ordinarily, we'd code the branches here using GNU-style local symbols, that * is "1f" referencing "1:" and so on. But some people run gcc on AIX with @@ -451,15 +450,23 @@ tas(volatile slock_t *lock) int _res; __asm__ __volatile__( +#ifdef USE_PPC_LWARX_MUTEX_HINT " lwarx %0,0,%3,1 \n" +#else +" lwarx %0,0,%3 \n" +#endif " cmpwi %0,0 \n" " bne $+16 \n" /* branch to li %1,1 */ " addi %0,%0,1 \n" " stwcx. %0,0,%3 \n" -" beq $+12 \n" /* branch to lwsync */ +" beq $+12 \n" /* branch to lwsync/isync */ " li %1,1 \n" " b $+12 \n" /* branch to end of asm sequence */ +#ifdef USE_PPC_LWSYNC " lwsync \n" +#else +" isync \n" +#endif " li %1,0 \n" : "=&b"(_t), "=r"(_res), "+m"(*lock) @@ -470,14 +477,23 @@ tas(volatile slock_t *lock) /* * PowerPC S_UNLOCK is almost standard but requires a "sync" instruction. - * But we can use lwsync instead for better performance. + * On newer machines, we can use lwsync instead for better performance. */ +#ifdef USE_PPC_LWSYNC #define S_UNLOCK(lock) \ do \ { \ __asm__ __volatile__ (" lwsync \n" ::: "memory"); \ *((volatile slock_t *) (lock)) = 0; \ } while (0) +#else +#define S_UNLOCK(lock) \ +do \ +{ \ + __asm__ __volatile__ (" sync \n" ::: "memory"); \ + *((volatile slock_t *) (lock)) = 0; \ +} while (0) +#endif /* USE_PPC_LWSYNC */ #endif /* powerpc */