以下 16 字节原子操作是否正确实现?有没有更好的选择?

typedef struct {
    uintptr_t low;
    uintptr_t high;
} uint128_atomic;


uint128_atomic load_relaxed(uint128_atomic const *atomic)
{
    uint128_atomic ret;
    asm volatile("xor %%eax, %%eax\n"
                 "xor %%ebx, %%ebx\n"
                 "xor %%ecx, %%ecx\n"
                 "xor %%edx, %%edx\n"
                 "lock; cmpxchg16b %1"
                 : "=A"(ret)
                 : "m"(*atomic)
                 : "cc", "rbx", "rcx");
    return ret;
}

bool cmpexch_weak_relaxed(
    uint128_atomic *atomic,
    uint128_atomic *expected,
    uint128_atomic desired)
{
    bool matched;
    uint128_atomic e = *expected;
    asm volatile("lock; cmpxchg16b %1\n"
                 "setz %0"
                 : "=q"(matched), "+m"(atomic->ui)
                 : "a"(e.low), "d"(e.high), "b"(desired.low), "c"(desired.high)
                 : "cc");
    return matched;
}

void store_relaxed(uint128_atomic *atomic, uint128_atomic val)
{
    uint128_atomic old = *atomic;
    asm volatile("lock; cmpxchg16b %0"
                 : "+m"(*atomic)
                 : "a"(old.low), "d"(old.high), "b"(val.low), "c"(val.high)
                 : "cc");
}

有关完整的工作示例,请查看:

https://godbolt.org/g/CemfSg

更新的实现可以在这里找到:https://godbolt.org/g/vGNQG5

最佳答案

在应用了 @PeterCordes@David Wohlferd@prl 的所有建议后,我想出了以下实现。非常感谢!

struct _uint128_atomic {
    volatile uint64_t low;
    volatile uint64_t high;
} __attribute__((aligned(16)));
typedef struct _uint128_atomic uint128_atomic;


bool
cmpexch_weak_relaxed(
    uint128_atomic *atomic,
    uint128_atomic *expected,
    uint128_atomic desired)
{
    bool matched;
    uint128_atomic e = *expected;
    asm volatile("lock cmpxchg16b %1"
                 : "=@ccz"(matched), "+m"(*atomic), "+a"(e.low), "+d"(e.high)
                 : "b"(desired.low), "c"(desired.high)
                 : "cc");
    if (!matched)
        *expected = e;
    return matched;
}


uint128_atomic
load_relaxed(uint128_atomic const *atomic)
{
    uint128_atomic ret = {0, 0};
    asm volatile("lock cmpxchg16b %1"
                 : "+A"(ret)
                 : "m"(*atomic), "b"(0), "c"(0)
                 : "cc");
    return ret;
}


void
store_relaxed(uint128_atomic *atomic, uint128_atomic val)
{
    uint128_atomic old = *atomic;
    while (!cmpexch_weak_relaxed(atomic, &old, val))
        ;
}

请记住,该实现是特定于 GCC 的,并且不适用于 clangclang 中 GCC 内联汇编的实现充其量是次优的,最糟糕的是垃圾。
GCC 实现也可以在 Godbolt 的 Compiler Explorer here 上找到。
here 是一个次优但有效的 clang 实现。

关于c - x86_64 上的原子 16 字节操作,我们在Stack Overflow上找到一个类似的问题:https://stackoverflow.com/questions/49214363/

10-11 18:22