• A simple futex'd benaphore

    From Bonita Montero@3:633/10 to All on Fri Mar 27 15:17:36 2026
    // Header:

    #pragma once
    #include <atomic>

    struct benaphore
    {
    benaphore() noexcept = default;
    benaphore( const benaphore & ) = delete;
    void operator =( const benaphore & ) = delete;
    void lock() noexcept;
    void unlock() noexcept;
    private:
    std::atomic_uint32_t m_lockCnt = 0;
    std::atomic_bool m_WakeUp;
    void wait() noexcept;
    void wakeUp() noexcept;
    };

    inline void benaphore::lock() noexcept
    {
    if( m_lockCnt.fetch_add( 1, std::memory_order_acquire ) ) [[unlikely]]
    wait();
    }

    inline void benaphore::unlock() noexcept
    {
    if( m_lockCnt.fetch_sub( 1, std::memory_order_release ) > 1 ) [[unlikely]]
    wakeUp();
    }

    // Cpp:

    #include "benaphore.hpp"

    void benaphore::wait() noexcept
    {
    using namespace std;
    bool ref = m_WakeUp.load( memory_order_relaxed );
    for( ; ; )
    if( ref )
    if( m_WakeUp.compare_exchange_strong( ref, false, memory_order_acquire, memory_order_relaxed ) )
    return;
    else;
    else
    {
    m_WakeUp.wait( false, memory_order_relaxed );
    ref = true;
    }
    }

    void benaphore::wakeUp() noexcept
    {
    m_WakeUp.store( true, std::memory_order_release );
    m_WakeUp.notify_one();
    }

    --- PyGate Linux v1.5.13
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Bonita Montero@3:633/10 to All on Mon Apr 6 19:24:35 2026
    Here's the final benapohre - with an templated option for recursive
    access.

    #pragma once
    #include <atomic>
    #include <semaphore>
    #include <chrono>
    #include <cassert>
    #include "thread_id.hpp"

    #if defined(__clang__)
    #pragma clang diagnostic push
    #pragma clang diagnostic ignored "-Wdangling-else"
    #endif

    template<bool Recursive = false>
    struct benaphore
    {
    benaphore() noexcept = default;
    benaphore( const benaphore & ) = delete;
    void operator =( const benaphore & ) = delete;
    void lock() noexcept;
    void unlock() noexcept;
    private:
    struct non_recursive
    {
    std::atomic_uint32_t lockCnt = 0;
    };
    struct recursive : non_recursive
    {
    std::atomic<thread_id> threadId = thread_id();
    unsigned recCount = 0;
    };
    using counter = std::conditional_t<Recursive, recursive, non_recursive>;
    counter m_counter;
    std::atomic_bool m_wakeUp = false;
    };

    template<bool Recursive>
    void benaphore<Recursive>::lock() noexcept
    {
    using namespace std;
    if constexpr( Recursive )
    if( m_counter.threadId.load( memory_order_relaxed ) == thread_self() )
    {
    ++m_counter.recCount;
    return;
    }
    if( m_counter.lockCnt.fetch_add( 1, memory_order_acquire ) > 0 ) [[unlikely]]
    for( bool ref; !m_wakeUp.compare_exchange_strong( ref = true, false,
    memory_order_acquire, memory_order_relaxed ); )
    m_wakeUp.wait( false, memory_order_relaxed );
    if constexpr( Recursive )
    {
    m_counter.threadId.store( thread_self(), memory_order_relaxed );
    m_counter.recCount = 0;
    }
    }

    template<bool Recursive>
    void benaphore<Recursive>::unlock() noexcept
    {
    using namespace std;
    if constexpr( Recursive )
    {
    if( m_counter.threadId.load( memory_order_relaxed ) == thread_self()
    && m_counter.recCount )
    {
    --m_counter.recCount;
    return;
    }
    m_counter.threadId.store( thread_id() );
    m_counter.recCount = 0;
    }
    if( m_counter.lockCnt.fetch_sub( 1, memory_order_release ) > 1 ) [[unlikely]]
    {
    m_wakeUp.store( true, memory_order_release );
    m_wakeUp.notify_one();
    }
    }

    #if defined(TPL_TEST_INSTANTIATE)
    template struct benaphore<true>;
    #endif

    template<bool Recursive = false, typename Clock = std::chrono::steady_clock> struct timed_benaphore
    {
    using time_point = Clock::time_point;
    using duration = Clock::duration;
    timed_benaphore() noexcept = default;
    timed_benaphore( const timed_benaphore & ) = delete;
    void operator =( const timed_benaphore & ) = delete;
    void lock() noexcept;
    bool try_lock_for( duration dur ) noexcept;
    bool try_lock_until( time_point deadline ) noexcept;
    void unlock() noexcept;
    private:
    static constexpr int32_t LockFlag = std::numeric_limits<int32_t>::min();
    struct non_recursive
    {
    std::atomic_int32_t lckdWaiting = 0;
    };
    struct recursive : non_recursive
    {
    std::atomic<thread_id> threadId = thread_id();
    unsigned recCount = 0;
    };
    using synch = std::conditional_t<Recursive, recursive, non_recursive>;
    synch m_synch;
    std::binary_semaphore m_wakeUp { 0 };
    bool tryLock( auto calcDeadline ) noexcept;
    };

    template<bool Recursive, typename Clock>
    inline void timed_benaphore<Recursive, Clock>::lock() noexcept
    {
    tryLock( []() noexcept { return time_point::max(); } );
    }

    template<bool Recursive, typename Clock>
    bool timed_benaphore<Recursive, Clock>::try_lock_for( duration dur )
    noexcept
    {
    return tryLock( [dur]() noexcept -> time_point
    {
    if( dur.count() < 0 )
    return time_point::max();
    time_point now = Clock::now();
    if( dur > time_point::max() - now )
    return time_point::max();
    return now + dur;
    } );
    }

    template<bool Recursive, typename Clock>
    bool timed_benaphore<Recursive, Clock>::try_lock_until( time_point
    deadline ) noexcept
    {
    return tryLock( [deadline]() noexcept { return deadline; } );
    }

    template<bool Recursive, typename Clock>
    void timed_benaphore<Recursive, Clock>::unlock() noexcept
    {
    using namespace std;
    if constexpr( Recursive )
    {
    if( m_synch.threadId.load( memory_order_relaxed ) == thread_self() &&
    m_synch.recCount )
    {
    --m_synch.recCount;
    return;
    }
    assert(m_synch.threadId == thread_self());
    m_synch.threadId.store( thread_id() );
    m_synch.recCount = 0;
    }
    assert(m_synch.lckdWaiting < 0);
    if( (m_synch.lckdWaiting.fetch_and( ~LockFlag, memory_order_release ) &
    ~LockFlag) ) [[unlikely]]
    m_wakeUp.release();
    }

    template<bool Recursive, typename Clock>
    bool timed_benaphore<Recursive, Clock>::tryLock( auto calcDeadline )
    noexcept
    {
    using namespace std;
    using namespace chrono;
    if constexpr( Recursive )
    if( m_synch.threadId.load( memory_order_relaxed ) == thread_self() )
    {
    ++m_synch.recCount;
    return true;
    }
    bool waiter = false;
    time_point deadline = time_point::min();
    for( int32_t ref = m_synch.lckdWaiting.load( memory_order_relaxed ); ; )
    {
    if( ref >= 0 )
    if( m_synch.lckdWaiting.compare_exchange_strong( ref, LockFlag | (ref
    - waiter), memory_order_acquire, memory_order_relaxed ) )
    break;
    else
    continue;
    if( !waiter && !m_synch.lckdWaiting.compare_exchange_strong( ref, ref
    + 1, memory_order_relaxed, memory_order_relaxed ) )
    continue;
    waiter = true;
    if( deadline.time_since_epoch().count() < 0 )
    deadline = calcDeadline();
    if( !m_wakeUp.try_acquire_until( deadline ) )
    {
    m_synch.lckdWaiting.fetch_sub( 1, memory_order_relaxed );
    return false;
    }
    ref = m_synch.lckdWaiting.load( memory_order_relaxed );
    }
    if constexpr( Recursive )
    {
    m_synch.threadId.store( thread_self(), memory_order_relaxed );
    m_synch.recCount = 0;
    }
    return true;
    }

    #if defined(__clang__)
    #pragma clang diagnostic pop
    #endif

    ---------

    And the code includes a version with timeouts (see above).

    --- PyGate Linux v1.5.13
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Chris M. Thomasson@3:633/10 to All on Mon Apr 6 15:39:54 2026
    On 4/6/2026 10:24 AM, Bonita Montero wrote:
    Here's the final benapohre - with an templated option for recursive
    access.
    [...]

    fast path semaphores are fun:


    Even in the post n case:
    ____________
    void post_n(int n)
    {
    std::atomic_thread_fence(std::memory_order_release);
    int old_count = m_count.fetch_add(n, std::memory_order_relaxed);

    // If old_count was negative, there were waiters.
    if (old_count < 0)
    {
    // Calculate how many actually need a signal.
    // If we have 5 waiters (count == -5) and we post 10,
    // we only signal 5.
    int to_signal = std::min(-old_count, n);
    m_semaphore.post_n(to_signal);
    }
    }
    ____________

    Loopless, no CAS, just accounting.


    --- PyGate Linux v1.5.13
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Bonita Montero@3:633/10 to All on Tue Apr 7 08:46:31 2026
    Am 07.04.2026 um 00:39 schrieb Chris M. Thomasson:

    fast path semaphores are fun:
    ...
    std::atomic_thread_fence(std::memory_order_release);
    Semaphore posting has an included thread-fence, at least with
    std::semaphore<N> and all operating system dependent semaphores.
    But usually you have an atomic operation before you use a sema-
    phore and you could apply a proper fence to this atomic op.


    --- PyGate Linux v1.5.13
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)