• ChatGPT calls my code "subtile"

    From Bonita Montero@3:633/10 to All on Fri Jan 23 19:06:21 2026
    A futex'd counting semaphore that doesn't suffer stolen wakeups
    (as with most implementations).

    // hpp

    #pragma once
    #include <atomic>
    #include <cassert>

    #define XSEMAPHORE_TRICKY

    struct xsemaphore
    {
    xsemaphore( uint32_t initial = 0 ) noexcept;
    xsemaphore( const xsemaphore & ) = delete;
    ~xsemaphore();
    xsemaphore &operator =( const xsemaphore & ) = delete;
    void acquire() noexcept;
    void release( uint32_t n = 1 ) noexcept;
    private:
    static constexpr unsigned
    MASK_BITS = 21,
    NOTIFY_BASE = MASK_BITS,
    WAIT_BASE = 2 * MASK_BITS;
    static constexpr uint64_t
    MASK21 = 0x1FFFFF,
    COUNT_VALUE = 1,
    NOTIFY_VALUE = 1ull << NOTIFY_BASE,
    WAIT_VALUE = 1ull << WAIT_BASE,
    NOTIFY_MASK = MASK21 << NOTIFY_BASE,
    WAIT_MASK = MASK21 << WAIT_BASE;
    static constexpr std::memory_order
    ACQ = std::memory_order_acquire,
    REL = std::memory_order_release,
    RLX = std::memory_order_relaxed;
    std::atomic_uint64_t m_counters;
    };

    inline xsemaphore::~xsemaphore()
    {
    #if defined(XSEMAPHORE_TRICKY)
    assert(!((m_counters >> WAIT_BASE) & MASK21));
    #endif
    }

    // cpp

    #include "xsemaphore.hpp"
    #include <algorithm>

    using namespace std;

    xsemaphore::xsemaphore( uint32_t initial ) noexcept :
    m_counters( [&] { return initial <= MASK21 ? initial : MASK21; }() )
    {
    }

    void xsemaphore::acquire() noexcept
    {
    uint64_t ref = m_counters.load( RLX ), niu;
    for( ; ; )
    if( (ref & MASK21) )
    if( m_counters.compare_exchange_strong( ref, ref - COUNT_VALUE, ACQ,
    RLX ) )
    return;
    else
    continue;
    else
    {
    if( (ref & WAIT_MASK) == WAIT_MASK )
    abort();
    niu = ref + WAIT_VALUE;
    if( m_counters.compare_exchange_strong( ref, niu, RLX, RLX ) )
    {
    ref = niu;
    break;
    }
    }
    for( ; ; )
    {
    while( (ref & NOTIFY_MASK) )
    if( m_counters.compare_exchange_strong( ref, ref - NOTIFY_VALUE, ACQ,
    RLX ) )
    return;
    m_counters.wait( ref, RLX );
    ref = m_counters.load( RLX );
    }
    }

    void xsemaphore::release( uint32_t n ) noexcept
    {
    if( !n )
    return;
    uint64_t ref = m_counters.load( RLX ), niu, notifies;
    int64_t ahead;
    do
    {
    uint64_t waiters = (ref >> WAIT_BASE) & MASK21;
    ahead = n - waiters;
    notifies = ahead >= 0 ? waiters : n;
    uint64_t beyond = ahead >= 0 ? ahead : 0;
    if( (ref & MASK21) + beyond > MASK21 )
    abort();
    niu = ref + beyond;
    if( ((ref >> NOTIFY_BASE) & MASK21) + notifies > MASK21 )
    abort();
    niu += notifies << NOTIFY_BASE;
    niu -= notifies << WAIT_BASE;
    } while( !m_counters.compare_exchange_strong( ref, niu, REL, RLX ) );
    if( ahead >= 0 )
    m_counters.notify_all();
    else
    for( ; notifies; m_counters.notify_one(), --notifies );
    }


    --- PyGate Linux v1.5.2
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Chris M. Thomasson@3:633/10 to All on Fri Jan 23 18:08:58 2026
    On 1/23/2026 10:06 AM, Bonita Montero wrote:
    A futex'd counting semaphore that doesn't suffer stolen wakeups
    (as with most implementations).


    [...]

    Need to look at it when I have some more time. But, I still like the benaphore. Simple, works, elegant. No CAS loops in sight.

    --- PyGate Linux v1.5.2
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Lynn McGuire@3:633/10 to All on Fri Jan 23 20:53:52 2026
    On 1/23/2026 12:06 PM, Bonita Montero wrote:
    A futex'd counting semaphore that doesn't suffer stolen wakeups
    (as with most implementations).

    // hpp

    #pragma once
    #include <atomic>
    #include <cassert>

    #define XSEMAPHORE_TRICKY

    struct xsemaphore
    {
    ÿÿÿÿxsemaphore( uint32_t initial = 0 ) noexcept;
    ÿÿÿÿxsemaphore( const xsemaphore & ) = delete;
    ÿÿÿÿ~xsemaphore();
    ÿÿÿÿxsemaphore &operator =( const xsemaphore & ) = delete;
    ÿÿÿÿvoid acquire() noexcept;
    ÿÿÿÿvoid release( uint32_t n = 1 ) noexcept;
    private:
    ÿÿÿÿstatic constexpr unsigned
    ÿÿÿÿÿÿÿ MASK_BITS = 21,
    ÿÿÿÿÿÿÿ NOTIFY_BASE = MASK_BITS,
    ÿÿÿÿÿÿÿ WAIT_BASE = 2 * MASK_BITS;
    ÿÿÿÿstatic constexpr uint64_t
    ÿÿÿÿÿÿÿ MASK21 = 0x1FFFFF,
    ÿÿÿÿÿÿÿ COUNT_VALUE = 1,
    ÿÿÿÿÿÿÿ NOTIFY_VALUE = 1ull << NOTIFY_BASE,
    ÿÿÿÿÿÿÿ WAIT_VALUE = 1ull << WAIT_BASE,
    ÿÿÿÿÿÿÿ NOTIFY_MASK = MASK21 << NOTIFY_BASE,
    ÿÿÿÿÿÿÿ WAIT_MASK = MASK21 << WAIT_BASE;
    ÿÿÿÿstatic constexpr std::memory_order
    ÿÿÿÿÿÿÿ ACQ = std::memory_order_acquire,
    ÿÿÿÿÿÿÿ REL = std::memory_order_release,
    ÿÿÿÿÿÿÿ RLX = std::memory_order_relaxed;
    ÿÿÿÿstd::atomic_uint64_t m_counters;
    };

    inline xsemaphore::~xsemaphore()
    {
    #if defined(XSEMAPHORE_TRICKY)
    ÿÿÿÿassert(!((m_counters >> WAIT_BASE) & MASK21));
    #endif
    }

    // cpp

    #include "xsemaphore.hpp"
    #include <algorithm>

    using namespace std;

    xsemaphore::xsemaphore( uint32_t initial ) noexcept :
    ÿÿÿÿm_counters( [&]ÿÿÿ { return initial <= MASK21 ? initial : MASK21; }
    () )
    {
    }

    void xsemaphore::acquire() noexcept
    {
    ÿÿÿÿuint64_t ref = m_counters.load( RLX ), niu;
    ÿÿÿÿfor( ; ; )
    ÿÿÿÿÿÿÿ if( (ref & MASK21) )
    ÿÿÿÿÿÿÿÿÿÿÿ if( m_counters.compare_exchange_strong( ref, ref -
    COUNT_VALUE, ACQ, RLX ) )
    ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ return;
    ÿÿÿÿÿÿÿÿÿÿÿ else
    ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ continue;
    ÿÿÿÿÿÿÿ else
    ÿÿÿÿÿÿÿ {
    ÿÿÿÿÿÿÿÿÿÿÿ if( (ref & WAIT_MASK) == WAIT_MASK )
    ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ abort();
    ÿÿÿÿÿÿÿÿÿÿÿ niu = ref + WAIT_VALUE;
    ÿÿÿÿÿÿÿÿÿÿÿ if( m_counters.compare_exchange_strong( ref, niu, RLX, RLX ) )
    ÿÿÿÿÿÿÿÿÿÿÿ {
    ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ ref = niu;
    ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ break;
    ÿÿÿÿÿÿÿÿÿÿÿ }
    ÿÿÿÿÿÿÿ }
    ÿÿÿÿfor( ; ; )
    ÿÿÿÿ{
    ÿÿÿÿÿÿÿ while( (ref & NOTIFY_MASK) )
    ÿÿÿÿÿÿÿÿÿÿÿ if( m_counters.compare_exchange_strong( ref, ref - NOTIFY_VALUE, ACQ, RLX ) )
    ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ return;
    ÿÿÿÿÿÿÿ m_counters.wait( ref, RLX );
    ÿÿÿÿÿÿÿ ref = m_counters.load( RLX );
    ÿÿÿÿ}
    }

    void xsemaphore::release( uint32_t n ) noexcept
    {
    ÿÿÿÿif( !n )
    ÿÿÿÿÿÿÿ return;
    ÿÿÿÿuint64_t ref = m_counters.load( RLX ), niu, notifies;
    ÿÿÿÿint64_t ahead;
    ÿÿÿÿdo
    ÿÿÿÿ{
    ÿÿÿÿÿÿÿ uint64_t waiters = (ref >> WAIT_BASE) & MASK21;
    ÿÿÿÿÿÿÿ ahead = n - waiters;
    ÿÿÿÿÿÿÿ notifies = ahead >= 0 ? waiters : n;
    ÿÿÿÿÿÿÿ uint64_t beyond = ahead >= 0 ? ahead : 0;
    ÿÿÿÿÿÿÿ if( (ref & MASK21) + beyond > MASK21 )
    ÿÿÿÿÿÿÿÿÿÿÿ abort();
    ÿÿÿÿÿÿÿ niu = ref + beyond;
    ÿÿÿÿÿÿÿ if( ((ref >> NOTIFY_BASE) & MASK21) + notifies > MASK21 )
    ÿÿÿÿÿÿÿÿÿÿÿ abort();
    ÿÿÿÿÿÿÿ niu += notifies << NOTIFY_BASE;
    ÿÿÿÿÿÿÿ niu -= notifies << WAIT_BASE;
    ÿÿÿÿ} while( !m_counters.compare_exchange_strong( ref, niu, REL, RLX ) );
    ÿÿÿÿif( ahead >= 0 )
    ÿÿÿÿÿÿÿ m_counters.notify_all();
    ÿÿÿÿelse
    ÿÿÿÿÿÿÿ for( ; notifies; m_counters.notify_one(), --notifies );
    }

    Again, "using namespace std" is imprecise programming.

    And an amazing lack of comments.

    Lynn


    --- PyGate Linux v1.5.2
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Bonita Montero@3:633/10 to All on Sat Jan 24 08:19:10 2026
    Am 24.01.2026 um 03:53 schrieb Lynn McGuire:
    On 1/23/2026 12:06 PM, Bonita Montero wrote:
    A futex'd counting semaphore that doesn't suffer stolen wakeups
    (as with most implementations).

    // hpp

    #pragma once
    #include <atomic>
    #include <cassert>

    #define XSEMAPHORE_TRICKY

    struct xsemaphore
    {
    ÿÿÿÿÿxsemaphore( uint32_t initial = 0 ) noexcept;
    ÿÿÿÿÿxsemaphore( const xsemaphore & ) = delete;
    ÿÿÿÿÿ~xsemaphore();
    ÿÿÿÿÿxsemaphore &operator =( const xsemaphore & ) = delete;
    ÿÿÿÿÿvoid acquire() noexcept;
    ÿÿÿÿÿvoid release( uint32_t n = 1 ) noexcept;
    private:
    ÿÿÿÿÿstatic constexpr unsigned
    ÿÿÿÿÿÿÿÿ MASK_BITS = 21,
    ÿÿÿÿÿÿÿÿ NOTIFY_BASE = MASK_BITS,
    ÿÿÿÿÿÿÿÿ WAIT_BASE = 2 * MASK_BITS;
    ÿÿÿÿÿstatic constexpr uint64_t
    ÿÿÿÿÿÿÿÿ MASK21 = 0x1FFFFF,
    ÿÿÿÿÿÿÿÿ COUNT_VALUE = 1,
    ÿÿÿÿÿÿÿÿ NOTIFY_VALUE = 1ull << NOTIFY_BASE,
    ÿÿÿÿÿÿÿÿ WAIT_VALUE = 1ull << WAIT_BASE,
    ÿÿÿÿÿÿÿÿ NOTIFY_MASK = MASK21 << NOTIFY_BASE,
    ÿÿÿÿÿÿÿÿ WAIT_MASK = MASK21 << WAIT_BASE;
    ÿÿÿÿÿstatic constexpr std::memory_order
    ÿÿÿÿÿÿÿÿ ACQ = std::memory_order_acquire,
    ÿÿÿÿÿÿÿÿ REL = std::memory_order_release,
    ÿÿÿÿÿÿÿÿ RLX = std::memory_order_relaxed;
    ÿÿÿÿÿstd::atomic_uint64_t m_counters;
    };

    inline xsemaphore::~xsemaphore()
    {
    #if defined(XSEMAPHORE_TRICKY)
    ÿÿÿÿÿassert(!((m_counters >> WAIT_BASE) & MASK21));
    #endif
    }

    // cpp

    #include "xsemaphore.hpp"
    #include <algorithm>

    using namespace std;

    xsemaphore::xsemaphore( uint32_t initial ) noexcept :
    ÿÿÿÿÿm_counters( [&]ÿÿÿ { return initial <= MASK21 ? initial :
    MASK21; } () )
    {
    }

    void xsemaphore::acquire() noexcept
    {
    ÿÿÿÿÿuint64_t ref = m_counters.load( RLX ), niu;
    ÿÿÿÿÿfor( ; ; )
    ÿÿÿÿÿÿÿÿ if( (ref & MASK21) )
    ÿÿÿÿÿÿÿÿÿÿÿÿ if( m_counters.compare_exchange_strong( ref, ref -
    COUNT_VALUE, ACQ, RLX ) )
    ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ return;
    ÿÿÿÿÿÿÿÿÿÿÿÿ else
    ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ continue;
    ÿÿÿÿÿÿÿÿ else
    ÿÿÿÿÿÿÿÿ {
    ÿÿÿÿÿÿÿÿÿÿÿÿ if( (ref & WAIT_MASK) == WAIT_MASK )
    ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ abort();
    ÿÿÿÿÿÿÿÿÿÿÿÿ niu = ref + WAIT_VALUE;
    ÿÿÿÿÿÿÿÿÿÿÿÿ if( m_counters.compare_exchange_strong( ref, niu, RLX,
    RLX ) )
    ÿÿÿÿÿÿÿÿÿÿÿÿ {
    ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ ref = niu;
    ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ break;
    ÿÿÿÿÿÿÿÿÿÿÿÿ }
    ÿÿÿÿÿÿÿÿ }
    ÿÿÿÿÿfor( ; ; )
    ÿÿÿÿÿ{
    ÿÿÿÿÿÿÿÿ while( (ref & NOTIFY_MASK) )
    ÿÿÿÿÿÿÿÿÿÿÿÿ if( m_counters.compare_exchange_strong( ref, ref -
    NOTIFY_VALUE, ACQ, RLX ) )
    ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ return;
    ÿÿÿÿÿÿÿÿ m_counters.wait( ref, RLX );
    ÿÿÿÿÿÿÿÿ ref = m_counters.load( RLX );
    ÿÿÿÿÿ}
    }

    void xsemaphore::release( uint32_t n ) noexcept
    {
    ÿÿÿÿÿif( !n )
    ÿÿÿÿÿÿÿÿ return;
    ÿÿÿÿÿuint64_t ref = m_counters.load( RLX ), niu, notifies;
    ÿÿÿÿÿint64_t ahead;
    ÿÿÿÿÿdo
    ÿÿÿÿÿ{
    ÿÿÿÿÿÿÿÿ uint64_t waiters = (ref >> WAIT_BASE) & MASK21;
    ÿÿÿÿÿÿÿÿ ahead = n - waiters;
    ÿÿÿÿÿÿÿÿ notifies = ahead >= 0 ? waiters : n;
    ÿÿÿÿÿÿÿÿ uint64_t beyond = ahead >= 0 ? ahead : 0;
    ÿÿÿÿÿÿÿÿ if( (ref & MASK21) + beyond > MASK21 )
    ÿÿÿÿÿÿÿÿÿÿÿÿ abort();
    ÿÿÿÿÿÿÿÿ niu = ref + beyond;
    ÿÿÿÿÿÿÿÿ if( ((ref >> NOTIFY_BASE) & MASK21) + notifies > MASK21 )
    ÿÿÿÿÿÿÿÿÿÿÿÿ abort();
    ÿÿÿÿÿÿÿÿ niu += notifies << NOTIFY_BASE;
    ÿÿÿÿÿÿÿÿ niu -= notifies << WAIT_BASE;
    ÿÿÿÿÿ} while( !m_counters.compare_exchange_strong( ref, niu, REL,
    RLX ) );
    ÿÿÿÿÿif( ahead >= 0 )
    ÿÿÿÿÿÿÿÿ m_counters.notify_all();
    ÿÿÿÿÿelse
    ÿÿÿÿÿÿÿÿ for( ; notifies; m_counters.notify_one(), --notifies );
    }

    Again, "using namespace std" is imprecise programming.

    Idiot.

    --- PyGate Linux v1.5.2
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Bonita Montero@3:633/10 to All on Sat Jan 24 08:19:45 2026
    Am 24.01.2026 um 03:08 schrieb Chris M. Thomasson:

    Need to look at it when I have some more time. But, I still like the benaphore. Simple, works, elegant. No CAS loops in sight.

    A benaphore isn't a semaphnore.


    --- PyGate Linux v1.5.2
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Chris M. Thomasson@3:633/10 to All on Sat Jan 24 13:35:35 2026
    On 1/23/2026 11:19 PM, Bonita Montero wrote:
    Am 24.01.2026 um 03:08 schrieb Chris M. Thomasson:

    Need to look at it when I have some more time. But, I still like the
    benaphore. Simple, works, elegant. No CAS loops in sight.

    A benaphore isn't a semaphnore.

    Are you sure about that? Have you even looked at it?

    --- PyGate Linux v1.5.5
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Chris M. Thomasson@3:633/10 to All on Sat Jan 24 13:36:32 2026
    On 1/23/2026 11:19 PM, Bonita Montero wrote:
    Am 24.01.2026 um 03:53 schrieb Lynn McGuire:
    On 1/23/2026 12:06 PM, Bonita Montero wrote:
    A futex'd counting semaphore that doesn't suffer stolen wakeups
    (as with most implementations).
    [...]
    Again, "using namespace std" is imprecise programming.

    Idiot.

    Pot kettle? You just tried to claim that a benaphore is not a semaphore.
    You, idiot? Humm...

    --- PyGate Linux v1.5.5
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Chris M. Thomasson@3:633/10 to All on Sat Jan 24 23:29:41 2026
    On 1/23/2026 11:19 PM, Bonita Montero wrote:
    Am 24.01.2026 um 03:08 schrieb Chris M. Thomasson:

    Need to look at it when I have some more time. But, I still like the
    benaphore. Simple, works, elegant. No CAS loops in sight.

    A benaphore isn't a semaphnore.


    Fwiw, here is a version from a while back:

    https://vorbrodt.blog/2019/02/05/fast-semaphore/

    Now for the slow path, a slow semaphore would work fine. Use a platform semaphore (sem_t, HANDLE, etc.) in the slow path.

    --- PyGate Linux v1.5.5
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Bonita Montero@3:633/10 to All on Sun Jan 25 08:55:34 2026
    Am 24.01.2026 um 22:36 schrieb Chris M. Thomasson:
    On 1/23/2026 11:19 PM, Bonita Montero wrote:
    Am 24.01.2026 um 03:53 schrieb Lynn McGuire:
    On 1/23/2026 12:06 PM, Bonita Montero wrote:
    A futex'd counting semaphore that doesn't suffer stolen wakeups
    (as with most implementations).
    [...]
    Again, "using namespace std" is imprecise programming.

    Idiot.

    Pot kettle? You just tried to claim that a benaphore is not a semaphore. You, idiot? Humm...

    A benaphore is a combination of an atomic counter and a semaphore
    to have a mutex.

    --- PyGate Linux v1.5.5
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Chris M. Thomasson@3:633/10 to All on Sat Jan 24 23:59:27 2026
    On 1/23/2026 10:06 AM, Bonita Montero wrote:
    A futex'd counting semaphore that doesn't suffer stolen wakeups
    (as with most implementations).
    [...]

    Are you trying to stress-test the CPU's branch prediction? All of those loops...

    --- PyGate Linux v1.5.5
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Bonita Montero@3:633/10 to All on Sun Jan 25 10:17:04 2026
    Am 25.01.2026 um 08:59 schrieb Chris M. Thomasson:

    On 1/23/2026 10:06 AM, Bonita Montero wrote:

    A futex'd counting semaphore that doesn't suffer stolen wakeups
    (as with most implementations).

    [...]

    Are you trying to stress-test the CPU's branch prediction? All of those loops...

    A futex'd semaphore's performance isn't determined by the branch
    prediction but by the speed of the cacheline-transfer between the
    coress; this could be really slow. And sleeping inside the kernel
    and being awakened by an intra processor interrupt is even two
    magitudes slower.


    --- PyGate Linux v1.5.5
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Chris M. Thomasson@3:633/10 to All on Sun Jan 25 12:34:01 2026
    On 1/24/2026 11:55 PM, Bonita Montero wrote:
    Am 24.01.2026 um 22:36 schrieb Chris M. Thomasson:
    On 1/23/2026 11:19 PM, Bonita Montero wrote:
    Am 24.01.2026 um 03:53 schrieb Lynn McGuire:
    On 1/23/2026 12:06 PM, Bonita Montero wrote:
    A futex'd counting semaphore that doesn't suffer stolen wakeups
    (as with most implementations).
    [...]
    Again, "using namespace std" is imprecise programming.

    Idiot.

    Pot kettle? You just tried to claim that a benaphore is not a
    semaphore. You, idiot? Humm...

    A benaphore is a combination of an atomic counter and a semaphore
    to have a mutex.

    A benaphore is basically atomic accounting using a fast path. The logic
    is loopless, well, wrt LOCK XADD. If that LOCK XADD is based on LL/SC
    logic, it ruins the loopless factor... Its simple. Your CAS infested
    thing is not so simple... I understand it, but wow.

    Benaphore:
    __________________
    class fast_semaphore
    {
    public:
    fast_semaphore(int count) noexcept
    : m_count(count), m_semaphore(0) {}

    void post()
    {
    std::atomic_thread_fence(std::memory_order_release);
    int count = m_count.fetch_add(1, std::memory_order_relaxed);
    if (count < 0)
    m_semaphore.post();
    }

    void wait()
    {
    int count = m_count.fetch_sub(1, std::memory_order_relaxed);
    if (count < 1)
    m_semaphore.wait();
    std::atomic_thread_fence(std::memory_order_acquire);
    }

    private:
    std::atomic m_count;
    semaphore m_semaphore;
    };
    __________________

    Pretty simple. Actually, I "think" on the wait the membar can be removed
    on the slow path because the m_semaphore.wait() should have acquire
    implied.

    void wait()
    {
    int count = m_count.fetch_sub(1, std::memory_order_relaxed);

    if (count < 1)
    {
    m_semaphore.wait();
    // acquire implied...
    }

    else {
    std::atomic_thread_fence(std::memory_order_acquire);
    }
    }


    Humm. should work okay. Not sure how much it buys us, but, well, there
    it is. ;^D


    --- PyGate Linux v1.5.5
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Chris M. Thomasson@3:633/10 to All on Sun Jan 25 14:39:06 2026
    On 1/25/2026 1:17 AM, Bonita Montero wrote:
    Am 25.01.2026 um 08:59 schrieb Chris M. Thomasson:

    On 1/23/2026 10:06 AM, Bonita Montero wrote:

    A futex'd counting semaphore that doesn't suffer stolen wakeups
    (as with most implementations).

    [...]

    Are you trying to stress-test the CPU's branch prediction? All of
    those loops...

    A futex'd semaphore's performance isn't determined by the branch
    prediction but by the speed of the cacheline-transfer between the
    coress; this could be really slow. And sleeping inside the kernel
    and being awakened by an intra processor interrupt is even two
    magitudes slower.


    I know how the futex works. Your loop here is interesting to me:

    for( ; notifies; m_counters.notify_one(), --notifies );

    --- PyGate Linux v1.5.5
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Bonita Montero@3:633/10 to All on Mon Jan 26 06:26:52 2026
    Am 25.01.2026 um 21:34 schrieb Chris M. Thomasson:
    On 1/24/2026 11:55 PM, Bonita Montero wrote:
    Am 24.01.2026 um 22:36 schrieb Chris M. Thomasson:
    On 1/23/2026 11:19 PM, Bonita Montero wrote:
    Am 24.01.2026 um 03:53 schrieb Lynn McGuire:
    On 1/23/2026 12:06 PM, Bonita Montero wrote:
    A futex'd counting semaphore that doesn't suffer stolen wakeups
    (as with most implementations).
    [...]
    Again, "using namespace std" is imprecise programming.

    Idiot.

    Pot kettle? You just tried to claim that a benaphore is not a
    semaphore. You, idiot? Humm...

    A benaphore is a combination of an atomic counter and a semaphore
    to have a mutex.

    A benaphore is basically atomic accounting using a fast path. The logic
    is loopless, well, wrt LOCK XADD. If that LOCK XADD is based on LL/SC
    logic, it ruins the loopless factor... Its simple. Your CAS infested
    thing is not so simple... I understand it, but wow.

    I know what a Benaphore is; it is not a semaphore but it has a
    semaphore.

    --- PyGate Linux v1.5.6
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Chris M. Thomasson@3:633/10 to All on Mon Jan 26 23:57:23 2026
    On 1/25/2026 9:26 PM, Bonita Montero wrote:
    Am 25.01.2026 um 21:34 schrieb Chris M. Thomasson:
    On 1/24/2026 11:55 PM, Bonita Montero wrote:
    Am 24.01.2026 um 22:36 schrieb Chris M. Thomasson:
    On 1/23/2026 11:19 PM, Bonita Montero wrote:
    Am 24.01.2026 um 03:53 schrieb Lynn McGuire:
    On 1/23/2026 12:06 PM, Bonita Montero wrote:
    A futex'd counting semaphore that doesn't suffer stolen wakeups
    (as with most implementations).
    [...]
    Again, "using namespace std" is imprecise programming.

    Idiot.

    Pot kettle? You just tried to claim that a benaphore is not a
    semaphore. You, idiot? Humm...

    A benaphore is a combination of an atomic counter and a semaphore
    to have a mutex.

    A benaphore is basically atomic accounting using a fast path. The
    logic is loopless, well, wrt LOCK XADD. If that LOCK XADD is based on
    LL/SC logic, it ruins the loopless factor... Its simple. Your CAS
    infested thing is not so simple... I understand it, but wow.

    I know what a Benaphore is; it is not a semaphore but it has a
    semaphore.

    A Benaphore is a semaphore.

    class fast_semaphore
    {
    public:
    fast_semaphore(int count) noexcept
    : m_count(count), m_semaphore(0) {}

    void post()
    {
    std::atomic_thread_fence(std::memory_order_release);
    int count = m_count.fetch_add(1, std::memory_order_relaxed);
    if (count < 0)
    m_semaphore.post();
    }

    void wait()
    {
    int count = m_count.fetch_sub(1, std::memory_order_relaxed);
    if (count < 1)
    m_semaphore.wait();
    std::atomic_thread_fence(std::memory_order_acquire);
    }

    private:
    std::atomic m_count;
    semaphore m_semaphore;
    };

    --- PyGate Linux v1.5.6
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Bonita Montero@3:633/10 to All on Tue Jan 27 09:33:01 2026
    Am 27.01.2026 um 08:57 schrieb Chris M. Thomasson:
    On 1/25/2026 9:26 PM, Bonita Montero wrote:
    Am 25.01.2026 um 21:34 schrieb Chris M. Thomasson:
    On 1/24/2026 11:55 PM, Bonita Montero wrote:
    Am 24.01.2026 um 22:36 schrieb Chris M. Thomasson:
    On 1/23/2026 11:19 PM, Bonita Montero wrote:
    Am 24.01.2026 um 03:53 schrieb Lynn McGuire:
    On 1/23/2026 12:06 PM, Bonita Montero wrote:
    A futex'd counting semaphore that doesn't suffer stolen wakeups >>>>>>>> (as with most implementations).
    [...]
    Again, "using namespace std" is imprecise programming.

    Idiot.

    Pot kettle? You just tried to claim that a benaphore is not a
    semaphore. You, idiot? Humm...

    A benaphore is a combination of an atomic counter and a semaphore
    to have a mutex.

    A benaphore is basically atomic accounting using a fast path. The
    logic is loopless, well, wrt LOCK XADD. If that LOCK XADD is based on
    LL/SC logic, it ruins the loopless factor... Its simple. Your CAS
    infested thing is not so simple... I understand it, but wow.

    I know what a Benaphore is; it is not a semaphore but it has a
    semaphore.

    A Benaphore is a semaphore.

    A benaphore can allow only one thread to run. A semaphore can trigger
    an arbitrary number of threads to run. So they're completely different.


    class fast_semaphore
    {
    public:
    ÿÿÿ fast_semaphore(int count) noexcept
    ÿÿÿ : m_count(count), m_semaphore(0) {}

    ÿÿÿ void post()
    ÿÿÿ {
    ÿÿÿÿÿÿÿ std::atomic_thread_fence(std::memory_order_release);
    ÿÿÿÿÿÿÿ int count = m_count.fetch_add(1, std::memory_order_relaxed);
    ÿÿÿÿÿÿÿ if (count < 0)
    ÿÿÿÿÿÿÿÿÿÿÿ m_semaphore.post();
    ÿÿÿ }

    ÿÿÿ void wait()
    ÿÿÿ {
    ÿÿÿÿÿÿÿ int count = m_count.fetch_sub(1, std::memory_order_relaxed);
    ÿÿÿÿÿÿÿ if (count < 1)
    ÿÿÿÿÿÿÿÿÿÿÿ m_semaphore.wait();
    ÿÿÿÿÿÿÿ std::atomic_thread_fence(std::memory_order_acquire);
    ÿÿÿ }

    private:
    ÿÿÿ std::atomic m_count;
    ÿÿÿ semaphore m_semaphore;
    };


    --- PyGate Linux v1.5.6
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Chris M. Thomasson@3:633/10 to All on Tue Jan 27 12:50:06 2026
    On 1/27/2026 12:33 AM, Bonita Montero wrote:
    Am 27.01.2026 um 08:57 schrieb Chris M. Thomasson:
    On 1/25/2026 9:26 PM, Bonita Montero wrote:
    Am 25.01.2026 um 21:34 schrieb Chris M. Thomasson:
    On 1/24/2026 11:55 PM, Bonita Montero wrote:
    Am 24.01.2026 um 22:36 schrieb Chris M. Thomasson:
    On 1/23/2026 11:19 PM, Bonita Montero wrote:
    Am 24.01.2026 um 03:53 schrieb Lynn McGuire:
    On 1/23/2026 12:06 PM, Bonita Montero wrote:
    A futex'd counting semaphore that doesn't suffer stolen wakeups >>>>>>>>> (as with most implementations).
    [...]
    Again, "using namespace std" is imprecise programming.

    Idiot.

    Pot kettle? You just tried to claim that a benaphore is not a
    semaphore. You, idiot? Humm...

    A benaphore is a combination of an atomic counter and a semaphore
    to have a mutex.

    A benaphore is basically atomic accounting using a fast path. The
    logic is loopless, well, wrt LOCK XADD. If that LOCK XADD is based
    on LL/SC logic, it ruins the loopless factor... Its simple. Your CAS
    infested thing is not so simple... I understand it, but wow.

    I know what a Benaphore is; it is not a semaphore but it has a
    semaphore.

    A Benaphore is a semaphore.

    A benaphore can allow only one thread to run. A semaphore can trigger
    an arbitrary number of threads to run. So they're completely different.

    You being rather pedantic... ;^) post and wait are standard semaphore operations. postn can be implented. Is all in the accounting. So, my
    example does not have say post_n, but it can be implemented. Without
    using any CAS and/or loops. Way better than other semaphore impls I have seen...

    Just thinking off the top of my head, it might be something like, typing
    in the newsreader sorry for any typos. I have old code on some hd's in storage. Joe Seigh had some fun logic back in comp.programming.thread a
    long time ago:
    ____________
    void post_n(int n)
    {
    std::atomic_thread_fence(std::memory_order_release);
    int old_count = m_count.fetch_add(n, std::memory_order_relaxed);

    // If old_count was negative, there were waiters.
    if (old_count < 0)
    {
    // Calculate how many actually need a signal.
    // If we have 5 waiters (count == -5) and we post 10,
    // we only signal 5.
    int to_signal = std::min(-old_count, n);
    m_semaphore.post_n(to_signal);
    }
    }
    ____________



    class fast_semaphore
    {
    public:
    ÿÿÿÿ fast_semaphore(int count) noexcept
    ÿÿÿÿ : m_count(count), m_semaphore(0) {}

    ÿÿÿÿ void post()
    ÿÿÿÿ {
    ÿÿÿÿÿÿÿÿ std::atomic_thread_fence(std::memory_order_release);
    ÿÿÿÿÿÿÿÿ int count = m_count.fetch_add(1, std::memory_order_relaxed);
    ÿÿÿÿÿÿÿÿ if (count < 0)
    ÿÿÿÿÿÿÿÿÿÿÿÿ m_semaphore.post();
    ÿÿÿÿ }

    ÿÿÿÿ void wait()
    ÿÿÿÿ {
    ÿÿÿÿÿÿÿÿ int count = m_count.fetch_sub(1, std::memory_order_relaxed);
    ÿÿÿÿÿÿÿÿ if (count < 1)
    ÿÿÿÿÿÿÿÿÿÿÿÿ m_semaphore.wait();
    ÿÿÿÿÿÿÿÿ std::atomic_thread_fence(std::memory_order_acquire);
    ÿÿÿÿ }

    private:
    ÿÿÿÿ std::atomic m_count;
    ÿÿÿÿ semaphore m_semaphore;
    };



    --- PyGate Linux v1.5.6
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Chris M. Thomasson@3:633/10 to All on Tue Jan 27 13:15:14 2026
    On 1/27/2026 12:33 AM, Bonita Montero wrote:
    Am 27.01.2026 um 08:57 schrieb Chris M. Thomasson:
    On 1/25/2026 9:26 PM, Bonita Montero wrote:
    Am 25.01.2026 um 21:34 schrieb Chris M. Thomasson:
    On 1/24/2026 11:55 PM, Bonita Montero wrote:
    Am 24.01.2026 um 22:36 schrieb Chris M. Thomasson:
    On 1/23/2026 11:19 PM, Bonita Montero wrote:
    Am 24.01.2026 um 03:53 schrieb Lynn McGuire:
    On 1/23/2026 12:06 PM, Bonita Montero wrote:
    A futex'd counting semaphore that doesn't suffer stolen wakeups >>>>>>>>> (as with most implementations).
    [...]
    Again, "using namespace std" is imprecise programming.

    Idiot.

    Pot kettle? You just tried to claim that a benaphore is not a
    semaphore. You, idiot? Humm...

    A benaphore is a combination of an atomic counter and a semaphore
    to have a mutex.

    A benaphore is basically atomic accounting using a fast path. The
    logic is loopless, well, wrt LOCK XADD. If that LOCK XADD is based
    on LL/SC logic, it ruins the loopless factor... Its simple. Your CAS
    infested thing is not so simple... I understand it, but wow.

    I know what a Benaphore is; it is not a semaphore but it has a
    semaphore.

    A Benaphore is a semaphore.

    A benaphore can allow only one thread to run. A semaphore can trigger
    an arbitrary number of threads to run. So they're completely different.

    I think you might be missing the forest for the trees?




    class fast_semaphore
    {
    public:
    ÿÿÿÿ fast_semaphore(int count) noexcept
    ÿÿÿÿ : m_count(count), m_semaphore(0) {}

    ÿÿÿÿ void post()
    ÿÿÿÿ {
    ÿÿÿÿÿÿÿÿ std::atomic_thread_fence(std::memory_order_release);
    ÿÿÿÿÿÿÿÿ int count = m_count.fetch_add(1, std::memory_order_relaxed);
    ÿÿÿÿÿÿÿÿ if (count < 0)
    ÿÿÿÿÿÿÿÿÿÿÿÿ m_semaphore.post();
    ÿÿÿÿ }

    ÿÿÿÿ void wait()
    ÿÿÿÿ {
    ÿÿÿÿÿÿÿÿ int count = m_count.fetch_sub(1, std::memory_order_relaxed);
    ÿÿÿÿÿÿÿÿ if (count < 1)
    ÿÿÿÿÿÿÿÿÿÿÿÿ m_semaphore.wait();
    ÿÿÿÿÿÿÿÿ std::atomic_thread_fence(std::memory_order_acquire);
    ÿÿÿÿ }

    private:
    ÿÿÿÿ std::atomic m_count;
    ÿÿÿÿ semaphore m_semaphore;
    };



    --- PyGate Linux v1.5.6
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Chris M. Thomasson@3:633/10 to All on Tue Jan 27 13:21:15 2026
    On 1/23/2026 10:06 AM, Bonita Montero wrote:
    A futex'd counting semaphore that doesn't suffer stolen wakeups
    (as with most implementations).

    // hpp

    #pragma once
    #include <atomic>
    #include <cassert>

    #define XSEMAPHORE_TRICKY

    struct xsemaphore
    {
    ÿÿÿÿxsemaphore( uint32_t initial = 0 ) noexcept;
    ÿÿÿÿxsemaphore( const xsemaphore & ) = delete;
    ÿÿÿÿ~xsemaphore();
    ÿÿÿÿxsemaphore &operator =( const xsemaphore & ) = delete;
    ÿÿÿÿvoid acquire() noexcept;
    ÿÿÿÿvoid release( uint32_t n = 1 ) noexcept;
    private:
    ÿÿÿÿstatic constexpr unsigned
    ÿÿÿÿÿÿÿ MASK_BITS = 21,
    ÿÿÿÿÿÿÿ NOTIFY_BASE = MASK_BITS,
    ÿÿÿÿÿÿÿ WAIT_BASE = 2 * MASK_BITS;
    ÿÿÿÿstatic constexpr uint64_t
    ÿÿÿÿÿÿÿ MASK21 = 0x1FFFFF,
    ÿÿÿÿÿÿÿ COUNT_VALUE = 1,
    ÿÿÿÿÿÿÿ NOTIFY_VALUE = 1ull << NOTIFY_BASE,
    ÿÿÿÿÿÿÿ WAIT_VALUE = 1ull << WAIT_BASE,
    ÿÿÿÿÿÿÿ NOTIFY_MASK = MASK21 << NOTIFY_BASE,
    ÿÿÿÿÿÿÿ WAIT_MASK = MASK21 << WAIT_BASE;
    ÿÿÿÿstatic constexpr std::memory_order
    ÿÿÿÿÿÿÿ ACQ = std::memory_order_acquire,
    ÿÿÿÿÿÿÿ REL = std::memory_order_release,
    ÿÿÿÿÿÿÿ RLX = std::memory_order_relaxed;
    ÿÿÿÿstd::atomic_uint64_t m_counters;
    };

    inline xsemaphore::~xsemaphore()
    {
    #if defined(XSEMAPHORE_TRICKY)
    ÿÿÿÿassert(!((m_counters >> WAIT_BASE) & MASK21));
    #endif
    }

    // cpp

    #include "xsemaphore.hpp"
    #include <algorithm>

    using namespace std;

    xsemaphore::xsemaphore( uint32_t initial ) noexcept :
    ÿÿÿÿm_counters( [&]ÿÿÿ { return initial <= MASK21 ? initial : MASK21; }
    () )
    {
    }

    void xsemaphore::acquire() noexcept
    {
    ÿÿÿÿuint64_t ref = m_counters.load( RLX ), niu;
    ÿÿÿÿfor( ; ; )
    ÿÿÿÿÿÿÿ if( (ref & MASK21) )
    ÿÿÿÿÿÿÿÿÿÿÿ if( m_counters.compare_exchange_strong( ref, ref -
    COUNT_VALUE, ACQ, RLX ) )
    ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ return;
    ÿÿÿÿÿÿÿÿÿÿÿ else
    ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ continue;
    ÿÿÿÿÿÿÿ else
    ÿÿÿÿÿÿÿ {
    ÿÿÿÿÿÿÿÿÿÿÿ if( (ref & WAIT_MASK) == WAIT_MASK )
    ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ abort();

    Oh shit.


    ÿÿÿÿÿÿÿÿÿÿÿ niu = ref + WAIT_VALUE;
    ÿÿÿÿÿÿÿÿÿÿÿ if( m_counters.compare_exchange_strong( ref, niu, RLX, RLX ) )
    ÿÿÿÿÿÿÿÿÿÿÿ {
    ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ ref = niu;
    ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ break;
    ÿÿÿÿÿÿÿÿÿÿÿ }
    ÿÿÿÿÿÿÿ }
    ÿÿÿÿfor( ; ; )
    ÿÿÿÿ{
    ÿÿÿÿÿÿÿ while( (ref & NOTIFY_MASK) )
    ÿÿÿÿÿÿÿÿÿÿÿ if( m_counters.compare_exchange_strong( ref, ref - NOTIFY_VALUE, ACQ, RLX ) )
    ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ return;
    ÿÿÿÿÿÿÿ m_counters.wait( ref, RLX );
    ÿÿÿÿÿÿÿ ref = m_counters.load( RLX );
    ÿÿÿÿ}
    }

    void xsemaphore::release( uint32_t n ) noexcept
    {
    ÿÿÿÿif( !n )
    ÿÿÿÿÿÿÿ return;
    ÿÿÿÿuint64_t ref = m_counters.load( RLX ), niu, notifies;
    ÿÿÿÿint64_t ahead;
    ÿÿÿÿdo
    ÿÿÿÿ{
    ÿÿÿÿÿÿÿ uint64_t waiters = (ref >> WAIT_BASE) & MASK21;
    ÿÿÿÿÿÿÿ ahead = n - waiters;
    ÿÿÿÿÿÿÿ notifies = ahead >= 0 ? waiters : n;
    ÿÿÿÿÿÿÿ uint64_t beyond = ahead >= 0 ? ahead : 0;
    ÿÿÿÿÿÿÿ if( (ref & MASK21) + beyond > MASK21 )
    ÿÿÿÿÿÿÿÿÿÿÿ abort();

    ^^^^^^^^^^^^^^^^


    Gotta love the abort here... ;^o


    ÿÿÿÿÿÿÿ niu = ref + beyond;
    ÿÿÿÿÿÿÿ if( ((ref >> NOTIFY_BASE) & MASK21) + notifies > MASK21 )
    ÿÿÿÿÿÿÿÿÿÿÿ abort();
    ÿÿÿÿÿÿÿ niu += notifies << NOTIFY_BASE;
    ÿÿÿÿÿÿÿ niu -= notifies << WAIT_BASE;
    ÿÿÿÿ} while( !m_counters.compare_exchange_strong( ref, niu, REL, RLX ) );
    ÿÿÿÿif( ahead >= 0 )
    ÿÿÿÿÿÿÿ m_counters.notify_all();
    ÿÿÿÿelse
    ÿÿÿÿÿÿÿ for( ; notifies; m_counters.notify_one(), --notifies );
    }


    Sigh.

    --- PyGate Linux v1.5.6
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Bonita Montero@3:633/10 to All on Wed Jan 28 09:08:27 2026
    Am 27.01.2026 um 22:21 schrieb Chris M. Thomasson:
    On 1/23/2026 10:06 AM, Bonita Montero wrote:
    A futex'd counting semaphore that doesn't suffer stolen wakeups
    (as with most implementations).

    // hpp

    #pragma once
    #include <atomic>
    #include <cassert>

    #define XSEMAPHORE_TRICKY

    struct xsemaphore
    {
    ÿÿÿÿÿxsemaphore( uint32_t initial = 0 ) noexcept;
    ÿÿÿÿÿxsemaphore( const xsemaphore & ) = delete;
    ÿÿÿÿÿ~xsemaphore();
    ÿÿÿÿÿxsemaphore &operator =( const xsemaphore & ) = delete;
    ÿÿÿÿÿvoid acquire() noexcept;
    ÿÿÿÿÿvoid release( uint32_t n = 1 ) noexcept;
    private:
    ÿÿÿÿÿstatic constexpr unsigned
    ÿÿÿÿÿÿÿÿ MASK_BITS = 21,
    ÿÿÿÿÿÿÿÿ NOTIFY_BASE = MASK_BITS,
    ÿÿÿÿÿÿÿÿ WAIT_BASE = 2 * MASK_BITS;
    ÿÿÿÿÿstatic constexpr uint64_t
    ÿÿÿÿÿÿÿÿ MASK21 = 0x1FFFFF,
    ÿÿÿÿÿÿÿÿ COUNT_VALUE = 1,
    ÿÿÿÿÿÿÿÿ NOTIFY_VALUE = 1ull << NOTIFY_BASE,
    ÿÿÿÿÿÿÿÿ WAIT_VALUE = 1ull << WAIT_BASE,
    ÿÿÿÿÿÿÿÿ NOTIFY_MASK = MASK21 << NOTIFY_BASE,
    ÿÿÿÿÿÿÿÿ WAIT_MASK = MASK21 << WAIT_BASE;
    ÿÿÿÿÿstatic constexpr std::memory_order
    ÿÿÿÿÿÿÿÿ ACQ = std::memory_order_acquire,
    ÿÿÿÿÿÿÿÿ REL = std::memory_order_release,
    ÿÿÿÿÿÿÿÿ RLX = std::memory_order_relaxed;
    ÿÿÿÿÿstd::atomic_uint64_t m_counters;
    };

    inline xsemaphore::~xsemaphore()
    {
    #if defined(XSEMAPHORE_TRICKY)
    ÿÿÿÿÿassert(!((m_counters >> WAIT_BASE) & MASK21));
    #endif
    }

    // cpp

    #include "xsemaphore.hpp"
    #include <algorithm>

    using namespace std;

    xsemaphore::xsemaphore( uint32_t initial ) noexcept :
    ÿÿÿÿÿm_counters( [&]ÿÿÿ { return initial <= MASK21 ? initial :
    MASK21; } () )
    {
    }

    void xsemaphore::acquire() noexcept
    {
    ÿÿÿÿÿuint64_t ref = m_counters.load( RLX ), niu;
    ÿÿÿÿÿfor( ; ; )
    ÿÿÿÿÿÿÿÿ if( (ref & MASK21) )
    ÿÿÿÿÿÿÿÿÿÿÿÿ if( m_counters.compare_exchange_strong( ref, ref -
    COUNT_VALUE, ACQ, RLX ) )
    ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ return;
    ÿÿÿÿÿÿÿÿÿÿÿÿ else
    ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ continue;
    ÿÿÿÿÿÿÿÿ else
    ÿÿÿÿÿÿÿÿ {
    ÿÿÿÿÿÿÿÿÿÿÿÿ if( (ref & WAIT_MASK) == WAIT_MASK )
    ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ abort();

    Oh shit.


    ÿÿÿÿÿÿÿÿÿÿÿÿ niu = ref + WAIT_VALUE;
    ÿÿÿÿÿÿÿÿÿÿÿÿ if( m_counters.compare_exchange_strong( ref, niu, RLX,
    RLX ) )
    ÿÿÿÿÿÿÿÿÿÿÿÿ {
    ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ ref = niu;
    ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ break;
    ÿÿÿÿÿÿÿÿÿÿÿÿ }
    ÿÿÿÿÿÿÿÿ }
    ÿÿÿÿÿfor( ; ; )
    ÿÿÿÿÿ{
    ÿÿÿÿÿÿÿÿ while( (ref & NOTIFY_MASK) )
    ÿÿÿÿÿÿÿÿÿÿÿÿ if( m_counters.compare_exchange_strong( ref, ref -
    NOTIFY_VALUE, ACQ, RLX ) )
    ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ return;
    ÿÿÿÿÿÿÿÿ m_counters.wait( ref, RLX );
    ÿÿÿÿÿÿÿÿ ref = m_counters.load( RLX );
    ÿÿÿÿÿ}
    }

    void xsemaphore::release( uint32_t n ) noexcept
    {
    ÿÿÿÿÿif( !n )
    ÿÿÿÿÿÿÿÿ return;
    ÿÿÿÿÿuint64_t ref = m_counters.load( RLX ), niu, notifies;
    ÿÿÿÿÿint64_t ahead;
    ÿÿÿÿÿdo
    ÿÿÿÿÿ{
    ÿÿÿÿÿÿÿÿ uint64_t waiters = (ref >> WAIT_BASE) & MASK21;
    ÿÿÿÿÿÿÿÿ ahead = n - waiters;
    ÿÿÿÿÿÿÿÿ notifies = ahead >= 0 ? waiters : n;
    ÿÿÿÿÿÿÿÿ uint64_t beyond = ahead >= 0 ? ahead : 0;
    ÿÿÿÿÿÿÿÿ if( (ref & MASK21) + beyond > MASK21 )
    ÿÿÿÿÿÿÿÿÿÿÿÿ abort();

    ^^^^^^^^^^^^^^^^


    Gotta love the abort here... ;^o

    It's ab abort beyond 2 ^ 21 threads.



    ÿÿÿÿÿÿÿÿ niu = ref + beyond;
    ÿÿÿÿÿÿÿÿ if( ((ref >> NOTIFY_BASE) & MASK21) + notifies > MASK21 )
    ÿÿÿÿÿÿÿÿÿÿÿÿ abort();
    ÿÿÿÿÿÿÿÿ niu += notifies << NOTIFY_BASE;
    ÿÿÿÿÿÿÿÿ niu -= notifies << WAIT_BASE;
    ÿÿÿÿÿ} while( !m_counters.compare_exchange_strong( ref, niu, REL,
    RLX ) );
    ÿÿÿÿÿif( ahead >= 0 )
    ÿÿÿÿÿÿÿÿ m_counters.notify_all();
    ÿÿÿÿÿelse
    ÿÿÿÿÿÿÿÿ for( ; notifies; m_counters.notify_one(), --notifies );
    }


    Sigh.


    --- PyGate Linux v1.5.6
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Bonita Montero@3:633/10 to All on Wed Jan 28 11:57:20 2026
    Am 25.01.2026 um 23:39 schrieb Chris M. Thomasson:
    On 1/25/2026 1:17 AM, Bonita Montero wrote:
    Am 25.01.2026 um 08:59 schrieb Chris M. Thomasson:

    On 1/23/2026 10:06 AM, Bonita Montero wrote:

    A futex'd counting semaphore that doesn't suffer stolen wakeups
    (as with most implementations).

    [...]

    Are you trying to stress-test the CPU's branch prediction? All of
    those loops...

    A futex'd semaphore's performance isn't determined by the branch
    prediction but by the speed of the cacheline-transfer between the
    coress; this could be really slow. And sleeping inside the kernel
    and being awakened by an intra processor interrupt is even two
    magitudes slower.


    I know how the futex works. Your loop here is interesting to me:

    for( ; notifies; m_counters.notify_one(), --notifies );

    This when there are less notfifies than thee are waiting threads.

    --- PyGate Linux v1.5.6
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Bonita Montero@3:633/10 to All on Wed Jan 28 14:46:17 2026
    Am 27.01.2026 um 22:15 schrieb Chris M. Thomasson:

    I think you might be missing the forest for the trees?

    Calling a benaphore a semaphore is not precise as necessary.

    --- PyGate Linux v1.5.6
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Chris M. Thomasson@3:633/10 to All on Wed Jan 28 12:49:44 2026
    On 1/28/2026 2:57 AM, Bonita Montero wrote:
    Am 25.01.2026 um 23:39 schrieb Chris M. Thomasson:
    On 1/25/2026 1:17 AM, Bonita Montero wrote:
    Am 25.01.2026 um 08:59 schrieb Chris M. Thomasson:

    On 1/23/2026 10:06 AM, Bonita Montero wrote:

    A futex'd counting semaphore that doesn't suffer stolen wakeups
    (as with most implementations).

    [...]

    Are you trying to stress-test the CPU's branch prediction? All of
    those loops...

    A futex'd semaphore's performance isn't determined by the branch
    prediction but by the speed of the cacheline-transfer between the
    coress; this could be really slow. And sleeping inside the kernel
    and being awakened by an intra processor interrupt is even two
    magitudes slower.


    I know how the futex works. Your loop here is interesting to me:

    for( ; notifies; m_counters.notify_one(), --notifies );

    This when there are less notfifies than thee are waiting threads.

    Hummm... Your logic is WAY over engineered?

    --- PyGate Linux v1.5.6
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Chris M. Thomasson@3:633/10 to All on Wed Jan 28 12:50:20 2026
    On 1/28/2026 12:08 AM, Bonita Montero wrote:
    Am 27.01.2026 um 22:21 schrieb Chris M. Thomasson:
    On 1/23/2026 10:06 AM, Bonita Montero wrote:
    A futex'd counting semaphore that doesn't suffer stolen wakeups
    (as with most implementations).

    // hpp

    #pragma once
    #include <atomic>
    #include <cassert>

    #define XSEMAPHORE_TRICKY

    struct xsemaphore
    {
    ÿÿÿÿÿxsemaphore( uint32_t initial = 0 ) noexcept;
    ÿÿÿÿÿxsemaphore( const xsemaphore & ) = delete;
    ÿÿÿÿÿ~xsemaphore();
    ÿÿÿÿÿxsemaphore &operator =( const xsemaphore & ) = delete;
    ÿÿÿÿÿvoid acquire() noexcept;
    ÿÿÿÿÿvoid release( uint32_t n = 1 ) noexcept;
    private:
    ÿÿÿÿÿstatic constexpr unsigned
    ÿÿÿÿÿÿÿÿ MASK_BITS = 21,
    ÿÿÿÿÿÿÿÿ NOTIFY_BASE = MASK_BITS,
    ÿÿÿÿÿÿÿÿ WAIT_BASE = 2 * MASK_BITS;
    ÿÿÿÿÿstatic constexpr uint64_t
    ÿÿÿÿÿÿÿÿ MASK21 = 0x1FFFFF,
    ÿÿÿÿÿÿÿÿ COUNT_VALUE = 1,
    ÿÿÿÿÿÿÿÿ NOTIFY_VALUE = 1ull << NOTIFY_BASE,
    ÿÿÿÿÿÿÿÿ WAIT_VALUE = 1ull << WAIT_BASE,
    ÿÿÿÿÿÿÿÿ NOTIFY_MASK = MASK21 << NOTIFY_BASE,
    ÿÿÿÿÿÿÿÿ WAIT_MASK = MASK21 << WAIT_BASE;
    ÿÿÿÿÿstatic constexpr std::memory_order
    ÿÿÿÿÿÿÿÿ ACQ = std::memory_order_acquire,
    ÿÿÿÿÿÿÿÿ REL = std::memory_order_release,
    ÿÿÿÿÿÿÿÿ RLX = std::memory_order_relaxed;
    ÿÿÿÿÿstd::atomic_uint64_t m_counters;
    };

    inline xsemaphore::~xsemaphore()
    {
    #if defined(XSEMAPHORE_TRICKY)
    ÿÿÿÿÿassert(!((m_counters >> WAIT_BASE) & MASK21));
    #endif
    }

    // cpp

    #include "xsemaphore.hpp"
    #include <algorithm>

    using namespace std;

    xsemaphore::xsemaphore( uint32_t initial ) noexcept :
    ÿÿÿÿÿm_counters( [&]ÿÿÿ { return initial <= MASK21 ? initial :
    MASK21; } () )
    {
    }

    void xsemaphore::acquire() noexcept
    {
    ÿÿÿÿÿuint64_t ref = m_counters.load( RLX ), niu;
    ÿÿÿÿÿfor( ; ; )
    ÿÿÿÿÿÿÿÿ if( (ref & MASK21) )
    ÿÿÿÿÿÿÿÿÿÿÿÿ if( m_counters.compare_exchange_strong( ref, ref -
    COUNT_VALUE, ACQ, RLX ) )
    ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ return;
    ÿÿÿÿÿÿÿÿÿÿÿÿ else
    ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ continue;
    ÿÿÿÿÿÿÿÿ else
    ÿÿÿÿÿÿÿÿ {
    ÿÿÿÿÿÿÿÿÿÿÿÿ if( (ref & WAIT_MASK) == WAIT_MASK )
    ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ abort();

    Oh shit.


    ÿÿÿÿÿÿÿÿÿÿÿÿ niu = ref + WAIT_VALUE;
    ÿÿÿÿÿÿÿÿÿÿÿÿ if( m_counters.compare_exchange_strong( ref, niu, RLX,
    RLX ) )
    ÿÿÿÿÿÿÿÿÿÿÿÿ {
    ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ ref = niu;
    ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ break;
    ÿÿÿÿÿÿÿÿÿÿÿÿ }
    ÿÿÿÿÿÿÿÿ }
    ÿÿÿÿÿfor( ; ; )
    ÿÿÿÿÿ{
    ÿÿÿÿÿÿÿÿ while( (ref & NOTIFY_MASK) )
    ÿÿÿÿÿÿÿÿÿÿÿÿ if( m_counters.compare_exchange_strong( ref, ref -
    NOTIFY_VALUE, ACQ, RLX ) )
    ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ return;
    ÿÿÿÿÿÿÿÿ m_counters.wait( ref, RLX );
    ÿÿÿÿÿÿÿÿ ref = m_counters.load( RLX );
    ÿÿÿÿÿ}
    }

    void xsemaphore::release( uint32_t n ) noexcept
    {
    ÿÿÿÿÿif( !n )
    ÿÿÿÿÿÿÿÿ return;
    ÿÿÿÿÿuint64_t ref = m_counters.load( RLX ), niu, notifies;
    ÿÿÿÿÿint64_t ahead;
    ÿÿÿÿÿdo
    ÿÿÿÿÿ{
    ÿÿÿÿÿÿÿÿ uint64_t waiters = (ref >> WAIT_BASE) & MASK21;
    ÿÿÿÿÿÿÿÿ ahead = n - waiters;
    ÿÿÿÿÿÿÿÿ notifies = ahead >= 0 ? waiters : n;
    ÿÿÿÿÿÿÿÿ uint64_t beyond = ahead >= 0 ? ahead : 0;
    ÿÿÿÿÿÿÿÿ if( (ref & MASK21) + beyond > MASK21 )
    ÿÿÿÿÿÿÿÿÿÿÿÿ abort();

    ^^^^^^^^^^^^^^^^


    Gotta love the abort here... ;^o

    It's ab abort beyond 2 ^ 21 threads.

    Shit happens... ;^o





    ÿÿÿÿÿÿÿÿ niu = ref + beyond;
    ÿÿÿÿÿÿÿÿ if( ((ref >> NOTIFY_BASE) & MASK21) + notifies > MASK21 )
    ÿÿÿÿÿÿÿÿÿÿÿÿ abort();
    ÿÿÿÿÿÿÿÿ niu += notifies << NOTIFY_BASE;
    ÿÿÿÿÿÿÿÿ niu -= notifies << WAIT_BASE;
    ÿÿÿÿÿ} while( !m_counters.compare_exchange_strong( ref, niu, REL,
    RLX ) );
    ÿÿÿÿÿif( ahead >= 0 )
    ÿÿÿÿÿÿÿÿ m_counters.notify_all();
    ÿÿÿÿÿelse
    ÿÿÿÿÿÿÿÿ for( ; notifies; m_counters.notify_one(), --notifies );
    }


    Sigh.



    --- PyGate Linux v1.5.6
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Chris M. Thomasson@3:633/10 to All on Wed Jan 28 12:53:01 2026
    On 1/28/2026 5:46 AM, Bonita Montero wrote:
    Am 27.01.2026 um 22:15 schrieb Chris M. Thomasson:

    I think you might be missing the forest for the trees?

    Calling a benaphore a semaphore is not precise as necessary.

    How about a fast-pathed semaphore? ;^)

    Even in the post n case:
    ____________
    void post_n(int n)
    {
    std::atomic_thread_fence(std::memory_order_release);
    int old_count = m_count.fetch_add(n, std::memory_order_relaxed);

    // If old_count was negative, there were waiters.
    if (old_count < 0)
    {
    // Calculate how many actually need a signal.
    // If we have 5 waiters (count == -5) and we post 10,
    // we only signal 5.
    int to_signal = std::min(-old_count, n);
    m_semaphore.post_n(to_signal);
    }
    }
    ____________

    Loopless, no CAS, just accounting.

    --- PyGate Linux v1.5.6
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)