A futex'd counting semaphore that doesn't suffer stolen wakeups
(as with most implementations).
A futex'd counting semaphore that doesn't suffer stolen wakeups
(as with most implementations).
// hpp
#pragma once
#include <atomic>
#include <cassert>
#define XSEMAPHORE_TRICKY
struct xsemaphore
{
ÿÿÿÿxsemaphore( uint32_t initial = 0 ) noexcept;
ÿÿÿÿxsemaphore( const xsemaphore & ) = delete;
ÿÿÿÿ~xsemaphore();
ÿÿÿÿxsemaphore &operator =( const xsemaphore & ) = delete;
ÿÿÿÿvoid acquire() noexcept;
ÿÿÿÿvoid release( uint32_t n = 1 ) noexcept;
private:
ÿÿÿÿstatic constexpr unsigned
ÿÿÿÿÿÿÿ MASK_BITS = 21,
ÿÿÿÿÿÿÿ NOTIFY_BASE = MASK_BITS,
ÿÿÿÿÿÿÿ WAIT_BASE = 2 * MASK_BITS;
ÿÿÿÿstatic constexpr uint64_t
ÿÿÿÿÿÿÿ MASK21 = 0x1FFFFF,
ÿÿÿÿÿÿÿ COUNT_VALUE = 1,
ÿÿÿÿÿÿÿ NOTIFY_VALUE = 1ull << NOTIFY_BASE,
ÿÿÿÿÿÿÿ WAIT_VALUE = 1ull << WAIT_BASE,
ÿÿÿÿÿÿÿ NOTIFY_MASK = MASK21 << NOTIFY_BASE,
ÿÿÿÿÿÿÿ WAIT_MASK = MASK21 << WAIT_BASE;
ÿÿÿÿstatic constexpr std::memory_order
ÿÿÿÿÿÿÿ ACQ = std::memory_order_acquire,
ÿÿÿÿÿÿÿ REL = std::memory_order_release,
ÿÿÿÿÿÿÿ RLX = std::memory_order_relaxed;
ÿÿÿÿstd::atomic_uint64_t m_counters;
};
inline xsemaphore::~xsemaphore()
{
#if defined(XSEMAPHORE_TRICKY)
ÿÿÿÿassert(!((m_counters >> WAIT_BASE) & MASK21));
#endif
}
// cpp
#include "xsemaphore.hpp"
#include <algorithm>
using namespace std;
xsemaphore::xsemaphore( uint32_t initial ) noexcept :
ÿÿÿÿm_counters( [&]ÿÿÿ { return initial <= MASK21 ? initial : MASK21; }
() )
{
}
void xsemaphore::acquire() noexcept
{
ÿÿÿÿuint64_t ref = m_counters.load( RLX ), niu;
ÿÿÿÿfor( ; ; )
ÿÿÿÿÿÿÿ if( (ref & MASK21) )
ÿÿÿÿÿÿÿÿÿÿÿ if( m_counters.compare_exchange_strong( ref, ref -
COUNT_VALUE, ACQ, RLX ) )
ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ return;
ÿÿÿÿÿÿÿÿÿÿÿ else
ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ continue;
ÿÿÿÿÿÿÿ else
ÿÿÿÿÿÿÿ {
ÿÿÿÿÿÿÿÿÿÿÿ if( (ref & WAIT_MASK) == WAIT_MASK )
ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ abort();
ÿÿÿÿÿÿÿÿÿÿÿ niu = ref + WAIT_VALUE;
ÿÿÿÿÿÿÿÿÿÿÿ if( m_counters.compare_exchange_strong( ref, niu, RLX, RLX ) )
ÿÿÿÿÿÿÿÿÿÿÿ {
ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ ref = niu;
ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ break;
ÿÿÿÿÿÿÿÿÿÿÿ }
ÿÿÿÿÿÿÿ }
ÿÿÿÿfor( ; ; )
ÿÿÿÿ{
ÿÿÿÿÿÿÿ while( (ref & NOTIFY_MASK) )
ÿÿÿÿÿÿÿÿÿÿÿ if( m_counters.compare_exchange_strong( ref, ref - NOTIFY_VALUE, ACQ, RLX ) )
ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ return;
ÿÿÿÿÿÿÿ m_counters.wait( ref, RLX );
ÿÿÿÿÿÿÿ ref = m_counters.load( RLX );
ÿÿÿÿ}
}
void xsemaphore::release( uint32_t n ) noexcept
{
ÿÿÿÿif( !n )
ÿÿÿÿÿÿÿ return;
ÿÿÿÿuint64_t ref = m_counters.load( RLX ), niu, notifies;
ÿÿÿÿint64_t ahead;
ÿÿÿÿdo
ÿÿÿÿ{
ÿÿÿÿÿÿÿ uint64_t waiters = (ref >> WAIT_BASE) & MASK21;
ÿÿÿÿÿÿÿ ahead = n - waiters;
ÿÿÿÿÿÿÿ notifies = ahead >= 0 ? waiters : n;
ÿÿÿÿÿÿÿ uint64_t beyond = ahead >= 0 ? ahead : 0;
ÿÿÿÿÿÿÿ if( (ref & MASK21) + beyond > MASK21 )
ÿÿÿÿÿÿÿÿÿÿÿ abort();
ÿÿÿÿÿÿÿ niu = ref + beyond;
ÿÿÿÿÿÿÿ if( ((ref >> NOTIFY_BASE) & MASK21) + notifies > MASK21 )
ÿÿÿÿÿÿÿÿÿÿÿ abort();
ÿÿÿÿÿÿÿ niu += notifies << NOTIFY_BASE;
ÿÿÿÿÿÿÿ niu -= notifies << WAIT_BASE;
ÿÿÿÿ} while( !m_counters.compare_exchange_strong( ref, niu, REL, RLX ) );
ÿÿÿÿif( ahead >= 0 )
ÿÿÿÿÿÿÿ m_counters.notify_all();
ÿÿÿÿelse
ÿÿÿÿÿÿÿ for( ; notifies; m_counters.notify_one(), --notifies );
}
On 1/23/2026 12:06 PM, Bonita Montero wrote:
A futex'd counting semaphore that doesn't suffer stolen wakeups
(as with most implementations).
// hpp
#pragma once
#include <atomic>
#include <cassert>
#define XSEMAPHORE_TRICKY
struct xsemaphore
{
ÿÿÿÿÿxsemaphore( uint32_t initial = 0 ) noexcept;
ÿÿÿÿÿxsemaphore( const xsemaphore & ) = delete;
ÿÿÿÿÿ~xsemaphore();
ÿÿÿÿÿxsemaphore &operator =( const xsemaphore & ) = delete;
ÿÿÿÿÿvoid acquire() noexcept;
ÿÿÿÿÿvoid release( uint32_t n = 1 ) noexcept;
private:
ÿÿÿÿÿstatic constexpr unsigned
ÿÿÿÿÿÿÿÿ MASK_BITS = 21,
ÿÿÿÿÿÿÿÿ NOTIFY_BASE = MASK_BITS,
ÿÿÿÿÿÿÿÿ WAIT_BASE = 2 * MASK_BITS;
ÿÿÿÿÿstatic constexpr uint64_t
ÿÿÿÿÿÿÿÿ MASK21 = 0x1FFFFF,
ÿÿÿÿÿÿÿÿ COUNT_VALUE = 1,
ÿÿÿÿÿÿÿÿ NOTIFY_VALUE = 1ull << NOTIFY_BASE,
ÿÿÿÿÿÿÿÿ WAIT_VALUE = 1ull << WAIT_BASE,
ÿÿÿÿÿÿÿÿ NOTIFY_MASK = MASK21 << NOTIFY_BASE,
ÿÿÿÿÿÿÿÿ WAIT_MASK = MASK21 << WAIT_BASE;
ÿÿÿÿÿstatic constexpr std::memory_order
ÿÿÿÿÿÿÿÿ ACQ = std::memory_order_acquire,
ÿÿÿÿÿÿÿÿ REL = std::memory_order_release,
ÿÿÿÿÿÿÿÿ RLX = std::memory_order_relaxed;
ÿÿÿÿÿstd::atomic_uint64_t m_counters;
};
inline xsemaphore::~xsemaphore()
{
#if defined(XSEMAPHORE_TRICKY)
ÿÿÿÿÿassert(!((m_counters >> WAIT_BASE) & MASK21));
#endif
}
// cpp
#include "xsemaphore.hpp"
#include <algorithm>
using namespace std;
xsemaphore::xsemaphore( uint32_t initial ) noexcept :
ÿÿÿÿÿm_counters( [&]ÿÿÿ { return initial <= MASK21 ? initial :
MASK21; } () )
{
}
void xsemaphore::acquire() noexcept
{
ÿÿÿÿÿuint64_t ref = m_counters.load( RLX ), niu;
ÿÿÿÿÿfor( ; ; )
ÿÿÿÿÿÿÿÿ if( (ref & MASK21) )
ÿÿÿÿÿÿÿÿÿÿÿÿ if( m_counters.compare_exchange_strong( ref, ref -
COUNT_VALUE, ACQ, RLX ) )
ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ return;
ÿÿÿÿÿÿÿÿÿÿÿÿ else
ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ continue;
ÿÿÿÿÿÿÿÿ else
ÿÿÿÿÿÿÿÿ {
ÿÿÿÿÿÿÿÿÿÿÿÿ if( (ref & WAIT_MASK) == WAIT_MASK )
ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ abort();
ÿÿÿÿÿÿÿÿÿÿÿÿ niu = ref + WAIT_VALUE;
ÿÿÿÿÿÿÿÿÿÿÿÿ if( m_counters.compare_exchange_strong( ref, niu, RLX,
RLX ) )
ÿÿÿÿÿÿÿÿÿÿÿÿ {
ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ ref = niu;
ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ break;
ÿÿÿÿÿÿÿÿÿÿÿÿ }
ÿÿÿÿÿÿÿÿ }
ÿÿÿÿÿfor( ; ; )
ÿÿÿÿÿ{
ÿÿÿÿÿÿÿÿ while( (ref & NOTIFY_MASK) )
ÿÿÿÿÿÿÿÿÿÿÿÿ if( m_counters.compare_exchange_strong( ref, ref -
NOTIFY_VALUE, ACQ, RLX ) )
ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ return;
ÿÿÿÿÿÿÿÿ m_counters.wait( ref, RLX );
ÿÿÿÿÿÿÿÿ ref = m_counters.load( RLX );
ÿÿÿÿÿ}
}
void xsemaphore::release( uint32_t n ) noexcept
{
ÿÿÿÿÿif( !n )
ÿÿÿÿÿÿÿÿ return;
ÿÿÿÿÿuint64_t ref = m_counters.load( RLX ), niu, notifies;
ÿÿÿÿÿint64_t ahead;
ÿÿÿÿÿdo
ÿÿÿÿÿ{
ÿÿÿÿÿÿÿÿ uint64_t waiters = (ref >> WAIT_BASE) & MASK21;
ÿÿÿÿÿÿÿÿ ahead = n - waiters;
ÿÿÿÿÿÿÿÿ notifies = ahead >= 0 ? waiters : n;
ÿÿÿÿÿÿÿÿ uint64_t beyond = ahead >= 0 ? ahead : 0;
ÿÿÿÿÿÿÿÿ if( (ref & MASK21) + beyond > MASK21 )
ÿÿÿÿÿÿÿÿÿÿÿÿ abort();
ÿÿÿÿÿÿÿÿ niu = ref + beyond;
ÿÿÿÿÿÿÿÿ if( ((ref >> NOTIFY_BASE) & MASK21) + notifies > MASK21 )
ÿÿÿÿÿÿÿÿÿÿÿÿ abort();
ÿÿÿÿÿÿÿÿ niu += notifies << NOTIFY_BASE;
ÿÿÿÿÿÿÿÿ niu -= notifies << WAIT_BASE;
ÿÿÿÿÿ} while( !m_counters.compare_exchange_strong( ref, niu, REL,
RLX ) );
ÿÿÿÿÿif( ahead >= 0 )
ÿÿÿÿÿÿÿÿ m_counters.notify_all();
ÿÿÿÿÿelse
ÿÿÿÿÿÿÿÿ for( ; notifies; m_counters.notify_one(), --notifies );
}
Again, "using namespace std" is imprecise programming.
Need to look at it when I have some more time. But, I still like the benaphore. Simple, works, elegant. No CAS loops in sight.
Am 24.01.2026 um 03:08 schrieb Chris M. Thomasson:
Need to look at it when I have some more time. But, I still like the
benaphore. Simple, works, elegant. No CAS loops in sight.
A benaphore isn't a semaphnore.
Am 24.01.2026 um 03:53 schrieb Lynn McGuire:[...]
On 1/23/2026 12:06 PM, Bonita Montero wrote:
A futex'd counting semaphore that doesn't suffer stolen wakeups
(as with most implementations).
Again, "using namespace std" is imprecise programming.
Idiot.
Am 24.01.2026 um 03:08 schrieb Chris M. Thomasson:
Need to look at it when I have some more time. But, I still like the
benaphore. Simple, works, elegant. No CAS loops in sight.
A benaphore isn't a semaphnore.
On 1/23/2026 11:19 PM, Bonita Montero wrote:
Am 24.01.2026 um 03:53 schrieb Lynn McGuire:[...]
On 1/23/2026 12:06 PM, Bonita Montero wrote:
A futex'd counting semaphore that doesn't suffer stolen wakeups
(as with most implementations).
Again, "using namespace std" is imprecise programming.
Idiot.
Pot kettle? You just tried to claim that a benaphore is not a semaphore. You, idiot? Humm...
A futex'd counting semaphore that doesn't suffer stolen wakeups[...]
(as with most implementations).
On 1/23/2026 10:06 AM, Bonita Montero wrote:
A futex'd counting semaphore that doesn't suffer stolen wakeups
(as with most implementations).
[...]
Are you trying to stress-test the CPU's branch prediction? All of those loops...
Am 24.01.2026 um 22:36 schrieb Chris M. Thomasson:
On 1/23/2026 11:19 PM, Bonita Montero wrote:
Am 24.01.2026 um 03:53 schrieb Lynn McGuire:[...]
On 1/23/2026 12:06 PM, Bonita Montero wrote:
A futex'd counting semaphore that doesn't suffer stolen wakeups
(as with most implementations).
Again, "using namespace std" is imprecise programming.
Idiot.
Pot kettle? You just tried to claim that a benaphore is not a
semaphore. You, idiot? Humm...
A benaphore is a combination of an atomic counter and a semaphore
to have a mutex.
Am 25.01.2026 um 08:59 schrieb Chris M. Thomasson:
On 1/23/2026 10:06 AM, Bonita Montero wrote:
A futex'd counting semaphore that doesn't suffer stolen wakeups
(as with most implementations).
[...]
Are you trying to stress-test the CPU's branch prediction? All of
those loops...
A futex'd semaphore's performance isn't determined by the branch
prediction but by the speed of the cacheline-transfer between the
coress; this could be really slow. And sleeping inside the kernel
and being awakened by an intra processor interrupt is even two
magitudes slower.
On 1/24/2026 11:55 PM, Bonita Montero wrote:
Am 24.01.2026 um 22:36 schrieb Chris M. Thomasson:
On 1/23/2026 11:19 PM, Bonita Montero wrote:
Am 24.01.2026 um 03:53 schrieb Lynn McGuire:[...]
On 1/23/2026 12:06 PM, Bonita Montero wrote:
A futex'd counting semaphore that doesn't suffer stolen wakeups
(as with most implementations).
Again, "using namespace std" is imprecise programming.
Idiot.
Pot kettle? You just tried to claim that a benaphore is not a
semaphore. You, idiot? Humm...
A benaphore is a combination of an atomic counter and a semaphore
to have a mutex.
A benaphore is basically atomic accounting using a fast path. The logic
is loopless, well, wrt LOCK XADD. If that LOCK XADD is based on LL/SC
logic, it ruins the loopless factor... Its simple. Your CAS infested
thing is not so simple... I understand it, but wow.
Am 25.01.2026 um 21:34 schrieb Chris M. Thomasson:
On 1/24/2026 11:55 PM, Bonita Montero wrote:
Am 24.01.2026 um 22:36 schrieb Chris M. Thomasson:
On 1/23/2026 11:19 PM, Bonita Montero wrote:
Am 24.01.2026 um 03:53 schrieb Lynn McGuire:[...]
On 1/23/2026 12:06 PM, Bonita Montero wrote:
A futex'd counting semaphore that doesn't suffer stolen wakeups
(as with most implementations).
Again, "using namespace std" is imprecise programming.
Idiot.
Pot kettle? You just tried to claim that a benaphore is not a
semaphore. You, idiot? Humm...
A benaphore is a combination of an atomic counter and a semaphore
to have a mutex.
A benaphore is basically atomic accounting using a fast path. The
logic is loopless, well, wrt LOCK XADD. If that LOCK XADD is based on
LL/SC logic, it ruins the loopless factor... Its simple. Your CAS
infested thing is not so simple... I understand it, but wow.
I know what a Benaphore is; it is not a semaphore but it has a
semaphore.
On 1/25/2026 9:26 PM, Bonita Montero wrote:
Am 25.01.2026 um 21:34 schrieb Chris M. Thomasson:
On 1/24/2026 11:55 PM, Bonita Montero wrote:
Am 24.01.2026 um 22:36 schrieb Chris M. Thomasson:
On 1/23/2026 11:19 PM, Bonita Montero wrote:
Am 24.01.2026 um 03:53 schrieb Lynn McGuire:[...]
On 1/23/2026 12:06 PM, Bonita Montero wrote:
A futex'd counting semaphore that doesn't suffer stolen wakeups >>>>>>>> (as with most implementations).
Again, "using namespace std" is imprecise programming.
Idiot.
Pot kettle? You just tried to claim that a benaphore is not a
semaphore. You, idiot? Humm...
A benaphore is a combination of an atomic counter and a semaphore
to have a mutex.
A benaphore is basically atomic accounting using a fast path. The
logic is loopless, well, wrt LOCK XADD. If that LOCK XADD is based on
LL/SC logic, it ruins the loopless factor... Its simple. Your CAS
infested thing is not so simple... I understand it, but wow.
I know what a Benaphore is; it is not a semaphore but it has a
semaphore.
A Benaphore is a semaphore.
class fast_semaphore
{
public:
ÿÿÿ fast_semaphore(int count) noexcept
ÿÿÿ : m_count(count), m_semaphore(0) {}
ÿÿÿ void post()
ÿÿÿ {
ÿÿÿÿÿÿÿ std::atomic_thread_fence(std::memory_order_release);
ÿÿÿÿÿÿÿ int count = m_count.fetch_add(1, std::memory_order_relaxed);
ÿÿÿÿÿÿÿ if (count < 0)
ÿÿÿÿÿÿÿÿÿÿÿ m_semaphore.post();
ÿÿÿ }
ÿÿÿ void wait()
ÿÿÿ {
ÿÿÿÿÿÿÿ int count = m_count.fetch_sub(1, std::memory_order_relaxed);
ÿÿÿÿÿÿÿ if (count < 1)
ÿÿÿÿÿÿÿÿÿÿÿ m_semaphore.wait();
ÿÿÿÿÿÿÿ std::atomic_thread_fence(std::memory_order_acquire);
ÿÿÿ }
private:
ÿÿÿ std::atomic m_count;
ÿÿÿ semaphore m_semaphore;
};
Am 27.01.2026 um 08:57 schrieb Chris M. Thomasson:
On 1/25/2026 9:26 PM, Bonita Montero wrote:
Am 25.01.2026 um 21:34 schrieb Chris M. Thomasson:
On 1/24/2026 11:55 PM, Bonita Montero wrote:
Am 24.01.2026 um 22:36 schrieb Chris M. Thomasson:
On 1/23/2026 11:19 PM, Bonita Montero wrote:
Am 24.01.2026 um 03:53 schrieb Lynn McGuire:[...]
On 1/23/2026 12:06 PM, Bonita Montero wrote:
A futex'd counting semaphore that doesn't suffer stolen wakeups >>>>>>>>> (as with most implementations).
Again, "using namespace std" is imprecise programming.
Idiot.
Pot kettle? You just tried to claim that a benaphore is not a
semaphore. You, idiot? Humm...
A benaphore is a combination of an atomic counter and a semaphore
to have a mutex.
A benaphore is basically atomic accounting using a fast path. The
logic is loopless, well, wrt LOCK XADD. If that LOCK XADD is based
on LL/SC logic, it ruins the loopless factor... Its simple. Your CAS
infested thing is not so simple... I understand it, but wow.
I know what a Benaphore is; it is not a semaphore but it has a
semaphore.
A Benaphore is a semaphore.
A benaphore can allow only one thread to run. A semaphore can trigger
an arbitrary number of threads to run. So they're completely different.
class fast_semaphore
{
public:
ÿÿÿÿ fast_semaphore(int count) noexcept
ÿÿÿÿ : m_count(count), m_semaphore(0) {}
ÿÿÿÿ void post()
ÿÿÿÿ {
ÿÿÿÿÿÿÿÿ std::atomic_thread_fence(std::memory_order_release);
ÿÿÿÿÿÿÿÿ int count = m_count.fetch_add(1, std::memory_order_relaxed);
ÿÿÿÿÿÿÿÿ if (count < 0)
ÿÿÿÿÿÿÿÿÿÿÿÿ m_semaphore.post();
ÿÿÿÿ }
ÿÿÿÿ void wait()
ÿÿÿÿ {
ÿÿÿÿÿÿÿÿ int count = m_count.fetch_sub(1, std::memory_order_relaxed);
ÿÿÿÿÿÿÿÿ if (count < 1)
ÿÿÿÿÿÿÿÿÿÿÿÿ m_semaphore.wait();
ÿÿÿÿÿÿÿÿ std::atomic_thread_fence(std::memory_order_acquire);
ÿÿÿÿ }
private:
ÿÿÿÿ std::atomic m_count;
ÿÿÿÿ semaphore m_semaphore;
};
Am 27.01.2026 um 08:57 schrieb Chris M. Thomasson:
On 1/25/2026 9:26 PM, Bonita Montero wrote:
Am 25.01.2026 um 21:34 schrieb Chris M. Thomasson:
On 1/24/2026 11:55 PM, Bonita Montero wrote:
Am 24.01.2026 um 22:36 schrieb Chris M. Thomasson:
On 1/23/2026 11:19 PM, Bonita Montero wrote:
Am 24.01.2026 um 03:53 schrieb Lynn McGuire:[...]
On 1/23/2026 12:06 PM, Bonita Montero wrote:
A futex'd counting semaphore that doesn't suffer stolen wakeups >>>>>>>>> (as with most implementations).
Again, "using namespace std" is imprecise programming.
Idiot.
Pot kettle? You just tried to claim that a benaphore is not a
semaphore. You, idiot? Humm...
A benaphore is a combination of an atomic counter and a semaphore
to have a mutex.
A benaphore is basically atomic accounting using a fast path. The
logic is loopless, well, wrt LOCK XADD. If that LOCK XADD is based
on LL/SC logic, it ruins the loopless factor... Its simple. Your CAS
infested thing is not so simple... I understand it, but wow.
I know what a Benaphore is; it is not a semaphore but it has a
semaphore.
A Benaphore is a semaphore.
A benaphore can allow only one thread to run. A semaphore can trigger
an arbitrary number of threads to run. So they're completely different.
class fast_semaphore
{
public:
ÿÿÿÿ fast_semaphore(int count) noexcept
ÿÿÿÿ : m_count(count), m_semaphore(0) {}
ÿÿÿÿ void post()
ÿÿÿÿ {
ÿÿÿÿÿÿÿÿ std::atomic_thread_fence(std::memory_order_release);
ÿÿÿÿÿÿÿÿ int count = m_count.fetch_add(1, std::memory_order_relaxed);
ÿÿÿÿÿÿÿÿ if (count < 0)
ÿÿÿÿÿÿÿÿÿÿÿÿ m_semaphore.post();
ÿÿÿÿ }
ÿÿÿÿ void wait()
ÿÿÿÿ {
ÿÿÿÿÿÿÿÿ int count = m_count.fetch_sub(1, std::memory_order_relaxed);
ÿÿÿÿÿÿÿÿ if (count < 1)
ÿÿÿÿÿÿÿÿÿÿÿÿ m_semaphore.wait();
ÿÿÿÿÿÿÿÿ std::atomic_thread_fence(std::memory_order_acquire);
ÿÿÿÿ }
private:
ÿÿÿÿ std::atomic m_count;
ÿÿÿÿ semaphore m_semaphore;
};
A futex'd counting semaphore that doesn't suffer stolen wakeups
(as with most implementations).
// hpp
#pragma once
#include <atomic>
#include <cassert>
#define XSEMAPHORE_TRICKY
struct xsemaphore
{
ÿÿÿÿxsemaphore( uint32_t initial = 0 ) noexcept;
ÿÿÿÿxsemaphore( const xsemaphore & ) = delete;
ÿÿÿÿ~xsemaphore();
ÿÿÿÿxsemaphore &operator =( const xsemaphore & ) = delete;
ÿÿÿÿvoid acquire() noexcept;
ÿÿÿÿvoid release( uint32_t n = 1 ) noexcept;
private:
ÿÿÿÿstatic constexpr unsigned
ÿÿÿÿÿÿÿ MASK_BITS = 21,
ÿÿÿÿÿÿÿ NOTIFY_BASE = MASK_BITS,
ÿÿÿÿÿÿÿ WAIT_BASE = 2 * MASK_BITS;
ÿÿÿÿstatic constexpr uint64_t
ÿÿÿÿÿÿÿ MASK21 = 0x1FFFFF,
ÿÿÿÿÿÿÿ COUNT_VALUE = 1,
ÿÿÿÿÿÿÿ NOTIFY_VALUE = 1ull << NOTIFY_BASE,
ÿÿÿÿÿÿÿ WAIT_VALUE = 1ull << WAIT_BASE,
ÿÿÿÿÿÿÿ NOTIFY_MASK = MASK21 << NOTIFY_BASE,
ÿÿÿÿÿÿÿ WAIT_MASK = MASK21 << WAIT_BASE;
ÿÿÿÿstatic constexpr std::memory_order
ÿÿÿÿÿÿÿ ACQ = std::memory_order_acquire,
ÿÿÿÿÿÿÿ REL = std::memory_order_release,
ÿÿÿÿÿÿÿ RLX = std::memory_order_relaxed;
ÿÿÿÿstd::atomic_uint64_t m_counters;
};
inline xsemaphore::~xsemaphore()
{
#if defined(XSEMAPHORE_TRICKY)
ÿÿÿÿassert(!((m_counters >> WAIT_BASE) & MASK21));
#endif
}
// cpp
#include "xsemaphore.hpp"
#include <algorithm>
using namespace std;
xsemaphore::xsemaphore( uint32_t initial ) noexcept :
ÿÿÿÿm_counters( [&]ÿÿÿ { return initial <= MASK21 ? initial : MASK21; }
() )
{
}
void xsemaphore::acquire() noexcept
{
ÿÿÿÿuint64_t ref = m_counters.load( RLX ), niu;
ÿÿÿÿfor( ; ; )
ÿÿÿÿÿÿÿ if( (ref & MASK21) )
ÿÿÿÿÿÿÿÿÿÿÿ if( m_counters.compare_exchange_strong( ref, ref -
COUNT_VALUE, ACQ, RLX ) )
ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ return;
ÿÿÿÿÿÿÿÿÿÿÿ else
ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ continue;
ÿÿÿÿÿÿÿ else
ÿÿÿÿÿÿÿ {
ÿÿÿÿÿÿÿÿÿÿÿ if( (ref & WAIT_MASK) == WAIT_MASK )
ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ abort();
ÿÿÿÿÿÿÿÿÿÿÿ niu = ref + WAIT_VALUE;
ÿÿÿÿÿÿÿÿÿÿÿ if( m_counters.compare_exchange_strong( ref, niu, RLX, RLX ) )
ÿÿÿÿÿÿÿÿÿÿÿ {
ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ ref = niu;
ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ break;
ÿÿÿÿÿÿÿÿÿÿÿ }
ÿÿÿÿÿÿÿ }
ÿÿÿÿfor( ; ; )
ÿÿÿÿ{
ÿÿÿÿÿÿÿ while( (ref & NOTIFY_MASK) )
ÿÿÿÿÿÿÿÿÿÿÿ if( m_counters.compare_exchange_strong( ref, ref - NOTIFY_VALUE, ACQ, RLX ) )
ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ return;
ÿÿÿÿÿÿÿ m_counters.wait( ref, RLX );
ÿÿÿÿÿÿÿ ref = m_counters.load( RLX );
ÿÿÿÿ}
}
void xsemaphore::release( uint32_t n ) noexcept
{
ÿÿÿÿif( !n )
ÿÿÿÿÿÿÿ return;
ÿÿÿÿuint64_t ref = m_counters.load( RLX ), niu, notifies;
ÿÿÿÿint64_t ahead;
ÿÿÿÿdo
ÿÿÿÿ{
ÿÿÿÿÿÿÿ uint64_t waiters = (ref >> WAIT_BASE) & MASK21;
ÿÿÿÿÿÿÿ ahead = n - waiters;
ÿÿÿÿÿÿÿ notifies = ahead >= 0 ? waiters : n;
ÿÿÿÿÿÿÿ uint64_t beyond = ahead >= 0 ? ahead : 0;
ÿÿÿÿÿÿÿ if( (ref & MASK21) + beyond > MASK21 )
ÿÿÿÿÿÿÿÿÿÿÿ abort();
ÿÿÿÿÿÿÿ niu = ref + beyond;
ÿÿÿÿÿÿÿ if( ((ref >> NOTIFY_BASE) & MASK21) + notifies > MASK21 )
ÿÿÿÿÿÿÿÿÿÿÿ abort();
ÿÿÿÿÿÿÿ niu += notifies << NOTIFY_BASE;
ÿÿÿÿÿÿÿ niu -= notifies << WAIT_BASE;
ÿÿÿÿ} while( !m_counters.compare_exchange_strong( ref, niu, REL, RLX ) );
ÿÿÿÿif( ahead >= 0 )
ÿÿÿÿÿÿÿ m_counters.notify_all();
ÿÿÿÿelse
ÿÿÿÿÿÿÿ for( ; notifies; m_counters.notify_one(), --notifies );
}
On 1/23/2026 10:06 AM, Bonita Montero wrote:
A futex'd counting semaphore that doesn't suffer stolen wakeups
(as with most implementations).
// hpp
#pragma once
#include <atomic>
#include <cassert>
#define XSEMAPHORE_TRICKY
struct xsemaphore
{
ÿÿÿÿÿxsemaphore( uint32_t initial = 0 ) noexcept;
ÿÿÿÿÿxsemaphore( const xsemaphore & ) = delete;
ÿÿÿÿÿ~xsemaphore();
ÿÿÿÿÿxsemaphore &operator =( const xsemaphore & ) = delete;
ÿÿÿÿÿvoid acquire() noexcept;
ÿÿÿÿÿvoid release( uint32_t n = 1 ) noexcept;
private:
ÿÿÿÿÿstatic constexpr unsigned
ÿÿÿÿÿÿÿÿ MASK_BITS = 21,
ÿÿÿÿÿÿÿÿ NOTIFY_BASE = MASK_BITS,
ÿÿÿÿÿÿÿÿ WAIT_BASE = 2 * MASK_BITS;
ÿÿÿÿÿstatic constexpr uint64_t
ÿÿÿÿÿÿÿÿ MASK21 = 0x1FFFFF,
ÿÿÿÿÿÿÿÿ COUNT_VALUE = 1,
ÿÿÿÿÿÿÿÿ NOTIFY_VALUE = 1ull << NOTIFY_BASE,
ÿÿÿÿÿÿÿÿ WAIT_VALUE = 1ull << WAIT_BASE,
ÿÿÿÿÿÿÿÿ NOTIFY_MASK = MASK21 << NOTIFY_BASE,
ÿÿÿÿÿÿÿÿ WAIT_MASK = MASK21 << WAIT_BASE;
ÿÿÿÿÿstatic constexpr std::memory_order
ÿÿÿÿÿÿÿÿ ACQ = std::memory_order_acquire,
ÿÿÿÿÿÿÿÿ REL = std::memory_order_release,
ÿÿÿÿÿÿÿÿ RLX = std::memory_order_relaxed;
ÿÿÿÿÿstd::atomic_uint64_t m_counters;
};
inline xsemaphore::~xsemaphore()
{
#if defined(XSEMAPHORE_TRICKY)
ÿÿÿÿÿassert(!((m_counters >> WAIT_BASE) & MASK21));
#endif
}
// cpp
#include "xsemaphore.hpp"
#include <algorithm>
using namespace std;
xsemaphore::xsemaphore( uint32_t initial ) noexcept :
ÿÿÿÿÿm_counters( [&]ÿÿÿ { return initial <= MASK21 ? initial :
MASK21; } () )
{
}
void xsemaphore::acquire() noexcept
{
ÿÿÿÿÿuint64_t ref = m_counters.load( RLX ), niu;
ÿÿÿÿÿfor( ; ; )
ÿÿÿÿÿÿÿÿ if( (ref & MASK21) )
ÿÿÿÿÿÿÿÿÿÿÿÿ if( m_counters.compare_exchange_strong( ref, ref -
COUNT_VALUE, ACQ, RLX ) )
ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ return;
ÿÿÿÿÿÿÿÿÿÿÿÿ else
ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ continue;
ÿÿÿÿÿÿÿÿ else
ÿÿÿÿÿÿÿÿ {
ÿÿÿÿÿÿÿÿÿÿÿÿ if( (ref & WAIT_MASK) == WAIT_MASK )
ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ abort();
Oh shit.
ÿÿÿÿÿÿÿÿÿÿÿÿ niu = ref + WAIT_VALUE;
ÿÿÿÿÿÿÿÿÿÿÿÿ if( m_counters.compare_exchange_strong( ref, niu, RLX,
RLX ) )
ÿÿÿÿÿÿÿÿÿÿÿÿ {
ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ ref = niu;
ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ break;
ÿÿÿÿÿÿÿÿÿÿÿÿ }
ÿÿÿÿÿÿÿÿ }
ÿÿÿÿÿfor( ; ; )
ÿÿÿÿÿ{
ÿÿÿÿÿÿÿÿ while( (ref & NOTIFY_MASK) )
ÿÿÿÿÿÿÿÿÿÿÿÿ if( m_counters.compare_exchange_strong( ref, ref -
NOTIFY_VALUE, ACQ, RLX ) )
ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ return;
ÿÿÿÿÿÿÿÿ m_counters.wait( ref, RLX );
ÿÿÿÿÿÿÿÿ ref = m_counters.load( RLX );
ÿÿÿÿÿ}
}
void xsemaphore::release( uint32_t n ) noexcept
{
ÿÿÿÿÿif( !n )
ÿÿÿÿÿÿÿÿ return;
ÿÿÿÿÿuint64_t ref = m_counters.load( RLX ), niu, notifies;
ÿÿÿÿÿint64_t ahead;
ÿÿÿÿÿdo
ÿÿÿÿÿ{
ÿÿÿÿÿÿÿÿ uint64_t waiters = (ref >> WAIT_BASE) & MASK21;
ÿÿÿÿÿÿÿÿ ahead = n - waiters;
ÿÿÿÿÿÿÿÿ notifies = ahead >= 0 ? waiters : n;
ÿÿÿÿÿÿÿÿ uint64_t beyond = ahead >= 0 ? ahead : 0;
ÿÿÿÿÿÿÿÿ if( (ref & MASK21) + beyond > MASK21 )
ÿÿÿÿÿÿÿÿÿÿÿÿ abort();
^^^^^^^^^^^^^^^^
Gotta love the abort here... ;^o
ÿÿÿÿÿÿÿÿ niu = ref + beyond;
ÿÿÿÿÿÿÿÿ if( ((ref >> NOTIFY_BASE) & MASK21) + notifies > MASK21 )
ÿÿÿÿÿÿÿÿÿÿÿÿ abort();
ÿÿÿÿÿÿÿÿ niu += notifies << NOTIFY_BASE;
ÿÿÿÿÿÿÿÿ niu -= notifies << WAIT_BASE;
ÿÿÿÿÿ} while( !m_counters.compare_exchange_strong( ref, niu, REL,
RLX ) );
ÿÿÿÿÿif( ahead >= 0 )
ÿÿÿÿÿÿÿÿ m_counters.notify_all();
ÿÿÿÿÿelse
ÿÿÿÿÿÿÿÿ for( ; notifies; m_counters.notify_one(), --notifies );
}
Sigh.
On 1/25/2026 1:17 AM, Bonita Montero wrote:
Am 25.01.2026 um 08:59 schrieb Chris M. Thomasson:
On 1/23/2026 10:06 AM, Bonita Montero wrote:
A futex'd counting semaphore that doesn't suffer stolen wakeups
(as with most implementations).
[...]
Are you trying to stress-test the CPU's branch prediction? All of
those loops...
A futex'd semaphore's performance isn't determined by the branch
prediction but by the speed of the cacheline-transfer between the
coress; this could be really slow. And sleeping inside the kernel
and being awakened by an intra processor interrupt is even two
magitudes slower.
I know how the futex works. Your loop here is interesting to me:
for( ; notifies; m_counters.notify_one(), --notifies );
I think you might be missing the forest for the trees?
Am 25.01.2026 um 23:39 schrieb Chris M. Thomasson:
On 1/25/2026 1:17 AM, Bonita Montero wrote:
Am 25.01.2026 um 08:59 schrieb Chris M. Thomasson:
On 1/23/2026 10:06 AM, Bonita Montero wrote:
A futex'd counting semaphore that doesn't suffer stolen wakeups
(as with most implementations).
[...]
Are you trying to stress-test the CPU's branch prediction? All of
those loops...
A futex'd semaphore's performance isn't determined by the branch
prediction but by the speed of the cacheline-transfer between the
coress; this could be really slow. And sleeping inside the kernel
and being awakened by an intra processor interrupt is even two
magitudes slower.
I know how the futex works. Your loop here is interesting to me:
for( ; notifies; m_counters.notify_one(), --notifies );
This when there are less notfifies than thee are waiting threads.
Am 27.01.2026 um 22:21 schrieb Chris M. Thomasson:
On 1/23/2026 10:06 AM, Bonita Montero wrote:
A futex'd counting semaphore that doesn't suffer stolen wakeups
(as with most implementations).
// hpp
#pragma once
#include <atomic>
#include <cassert>
#define XSEMAPHORE_TRICKY
struct xsemaphore
{
ÿÿÿÿÿxsemaphore( uint32_t initial = 0 ) noexcept;
ÿÿÿÿÿxsemaphore( const xsemaphore & ) = delete;
ÿÿÿÿÿ~xsemaphore();
ÿÿÿÿÿxsemaphore &operator =( const xsemaphore & ) = delete;
ÿÿÿÿÿvoid acquire() noexcept;
ÿÿÿÿÿvoid release( uint32_t n = 1 ) noexcept;
private:
ÿÿÿÿÿstatic constexpr unsigned
ÿÿÿÿÿÿÿÿ MASK_BITS = 21,
ÿÿÿÿÿÿÿÿ NOTIFY_BASE = MASK_BITS,
ÿÿÿÿÿÿÿÿ WAIT_BASE = 2 * MASK_BITS;
ÿÿÿÿÿstatic constexpr uint64_t
ÿÿÿÿÿÿÿÿ MASK21 = 0x1FFFFF,
ÿÿÿÿÿÿÿÿ COUNT_VALUE = 1,
ÿÿÿÿÿÿÿÿ NOTIFY_VALUE = 1ull << NOTIFY_BASE,
ÿÿÿÿÿÿÿÿ WAIT_VALUE = 1ull << WAIT_BASE,
ÿÿÿÿÿÿÿÿ NOTIFY_MASK = MASK21 << NOTIFY_BASE,
ÿÿÿÿÿÿÿÿ WAIT_MASK = MASK21 << WAIT_BASE;
ÿÿÿÿÿstatic constexpr std::memory_order
ÿÿÿÿÿÿÿÿ ACQ = std::memory_order_acquire,
ÿÿÿÿÿÿÿÿ REL = std::memory_order_release,
ÿÿÿÿÿÿÿÿ RLX = std::memory_order_relaxed;
ÿÿÿÿÿstd::atomic_uint64_t m_counters;
};
inline xsemaphore::~xsemaphore()
{
#if defined(XSEMAPHORE_TRICKY)
ÿÿÿÿÿassert(!((m_counters >> WAIT_BASE) & MASK21));
#endif
}
// cpp
#include "xsemaphore.hpp"
#include <algorithm>
using namespace std;
xsemaphore::xsemaphore( uint32_t initial ) noexcept :
ÿÿÿÿÿm_counters( [&]ÿÿÿ { return initial <= MASK21 ? initial :
MASK21; } () )
{
}
void xsemaphore::acquire() noexcept
{
ÿÿÿÿÿuint64_t ref = m_counters.load( RLX ), niu;
ÿÿÿÿÿfor( ; ; )
ÿÿÿÿÿÿÿÿ if( (ref & MASK21) )
ÿÿÿÿÿÿÿÿÿÿÿÿ if( m_counters.compare_exchange_strong( ref, ref -
COUNT_VALUE, ACQ, RLX ) )
ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ return;
ÿÿÿÿÿÿÿÿÿÿÿÿ else
ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ continue;
ÿÿÿÿÿÿÿÿ else
ÿÿÿÿÿÿÿÿ {
ÿÿÿÿÿÿÿÿÿÿÿÿ if( (ref & WAIT_MASK) == WAIT_MASK )
ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ abort();
Oh shit.
ÿÿÿÿÿÿÿÿÿÿÿÿ niu = ref + WAIT_VALUE;
ÿÿÿÿÿÿÿÿÿÿÿÿ if( m_counters.compare_exchange_strong( ref, niu, RLX,
RLX ) )
ÿÿÿÿÿÿÿÿÿÿÿÿ {
ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ ref = niu;
ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ break;
ÿÿÿÿÿÿÿÿÿÿÿÿ }
ÿÿÿÿÿÿÿÿ }
ÿÿÿÿÿfor( ; ; )
ÿÿÿÿÿ{
ÿÿÿÿÿÿÿÿ while( (ref & NOTIFY_MASK) )
ÿÿÿÿÿÿÿÿÿÿÿÿ if( m_counters.compare_exchange_strong( ref, ref -
NOTIFY_VALUE, ACQ, RLX ) )
ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ return;
ÿÿÿÿÿÿÿÿ m_counters.wait( ref, RLX );
ÿÿÿÿÿÿÿÿ ref = m_counters.load( RLX );
ÿÿÿÿÿ}
}
void xsemaphore::release( uint32_t n ) noexcept
{
ÿÿÿÿÿif( !n )
ÿÿÿÿÿÿÿÿ return;
ÿÿÿÿÿuint64_t ref = m_counters.load( RLX ), niu, notifies;
ÿÿÿÿÿint64_t ahead;
ÿÿÿÿÿdo
ÿÿÿÿÿ{
ÿÿÿÿÿÿÿÿ uint64_t waiters = (ref >> WAIT_BASE) & MASK21;
ÿÿÿÿÿÿÿÿ ahead = n - waiters;
ÿÿÿÿÿÿÿÿ notifies = ahead >= 0 ? waiters : n;
ÿÿÿÿÿÿÿÿ uint64_t beyond = ahead >= 0 ? ahead : 0;
ÿÿÿÿÿÿÿÿ if( (ref & MASK21) + beyond > MASK21 )
ÿÿÿÿÿÿÿÿÿÿÿÿ abort();
^^^^^^^^^^^^^^^^
Gotta love the abort here... ;^o
It's ab abort beyond 2 ^ 21 threads.
ÿÿÿÿÿÿÿÿ niu = ref + beyond;
ÿÿÿÿÿÿÿÿ if( ((ref >> NOTIFY_BASE) & MASK21) + notifies > MASK21 )
ÿÿÿÿÿÿÿÿÿÿÿÿ abort();
ÿÿÿÿÿÿÿÿ niu += notifies << NOTIFY_BASE;
ÿÿÿÿÿÿÿÿ niu -= notifies << WAIT_BASE;
ÿÿÿÿÿ} while( !m_counters.compare_exchange_strong( ref, niu, REL,
RLX ) );
ÿÿÿÿÿif( ahead >= 0 )
ÿÿÿÿÿÿÿÿ m_counters.notify_all();
ÿÿÿÿÿelse
ÿÿÿÿÿÿÿÿ for( ; notifies; m_counters.notify_one(), --notifies );
}
Sigh.
Am 27.01.2026 um 22:15 schrieb Chris M. Thomasson:
I think you might be missing the forest for the trees?
Calling a benaphore a semaphore is not precise as necessary.
| Sysop: | Tetrazocine |
|---|---|
| Location: | Melbourne, VIC, Australia |
| Users: | 15 |
| Nodes: | 8 (0 / 8) |
| Uptime: | 71:17:39 |
| Calls: | 208 |
| Calls today: | 1 |
| Files: | 21,502 |
| Messages: | 81,120 |