• Unicode...

    From Michael Sanders@3:633/10 to All on Fri Nov 14 21:03:38 2025
    Well, I finally got bitten by Unicode.

    Managed a work around, but I don't have enough experience
    with Unicode to know just exactly what I'm doing...

    #include <stdio.h>
    #include <string.h>

    static int utf8_width(const char *s) {
    int w = 0;
    const unsigned char *p = (const unsigned char *)s;

    while (*p) {
    if (*p < 0x80) { w++; p++; } // ASCII 1-byte
    else if ((*p & 0xE0) == 0xC0) { w++; p += 2; } // 2-byte UTF-8
    else if ((*p & 0xF0) == 0xE0) { w++; p += 3; } // 3-byte UTF-8
    else if ((*p & 0xF8) == 0xF0) { w++; p += 4; } // 4-byte UTF-8
    else { w++; p++; } // fallback
    }

    return w;
    }

    int main(void) {
    const char *s = "‚lan";
    printf("string: %s\n", s);
    printf("strlen: %d\n", strlen(s)); // 4
    printf("utf8_width: %d\n", utf8_width(s)); //5

    return 0;
    }

    --
    :wq
    Mike Sanders

    --- PyGate Linux v1.5
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Kaz Kylheku@3:633/10 to All on Fri Nov 14 21:20:43 2025
    On 2025-11-14, Michael Sanders <porkchop@invalid.foo> wrote:
    Well, I finally got bitten by Unicode.

    Managed a work around, but I don't have enough experience
    with Unicode to know just exactly what I'm doing...

    #include <stdio.h>
    #include <string.h>

    static int utf8_width(const char *s) {

    By width do you mean code point count?

    This is easily confusable for "display width" which is a concept
    of how many columns a Unicode string needs on a monospaced display
    or printer.

    If you ever edit UTF-8 in your Vim or whatever, you will see that
    certain, e.g. East Asian characters occupy two character positions.

    Kazinator's TXR language:

    This is the TXR Lisp interactive listener of TXR 302.
    Quit with :quit or Ctrl-D on an empty line. Ctrl-X ? for cheatsheet.
    (len "??????")
    6
    (display-width "??????")
    12
    (coded-length "??????")
    18

    The length (in terms of code points, not characters) is 6.

    Length is tricky, because code points are not characters; it depends
    on how you define it. In Unicode there are "grapheme clusters":
    combinations of code points making one character.

    The display width is 12: all characters are East Asian so take up
    two character cell widths on a monospaced terminal display.

    The coded-length is 18: 18 UTF-8 bytes. I didn't call the function utf8-length, because the project only supports UTF-8 encoding.

    All text-I/O is UTF-8 and that cannot be turned off.

    --
    TXR Programming Language: http://nongnu.org/txr
    Cygnal: Cygwin Native Application Library: http://kylheku.com/cygnal
    Mastodon: @Kazinator@mstdn.ca

    --- PyGate Linux v1.5
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Michael Sanders@3:633/10 to All on Fri Nov 14 21:46:40 2025
    On Fri, 14 Nov 2025 21:20:43 -0000 (UTC), Kaz Kylheku wrote:

    By width do you mean code point count?

    This is easily confusable for "display width" which is a concept
    of how many columns a Unicode string needs on a monospaced display
    or printer.

    Well maybe naively I mean the string's length per char...

    My 'fix' (cop-out - ouch!) for now? I'm staying 7bit clean:
    0x00?0x7F only, tinybase will reject anything at/above: 0x80
    for a user query.

    Kazinator's TXR language:

    Kazinator! chuckle.

    [...]

    I'll study the issue more, that's certainly the bulk of
    the problem, I need to read up on Unicode.

    Thank's Kaz, always interesting stuff from you.
    Here? Baby steps, but nevertheless steps...

    --
    :wq
    Mike Sanders

    --- PyGate Linux v1.5
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Richard Tobin@3:633/10 to All on Fri Nov 14 23:23:15 2025
    In article <10f85f9$33pck$1@dont-email.me>,
    Michael Sanders <porkchop@invalid.foo> wrote:
    const char *s = "‚lan";
    printf("string: %s\n", s);
    printf("strlen: %d\n", strlen(s)); // 4
    printf("utf8_width: %d\n", utf8_width(s)); //5

    I think you have those numbers the wrong way round.

    -- Richard

    --- PyGate Linux v1.5
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Michael Sanders@3:633/10 to All on Fri Nov 14 23:51:38 2025
    On Fri, 14 Nov 2025 23:23:15 -0000 (UTC), Richard Tobin wrote:

    In article <10f85f9$33pck$1@dont-email.me>,
    Michael Sanders <porkchop@invalid.foo> wrote:
    const char *s = "‚lan";
    printf("string: %s\n", s);
    printf("strlen: %d\n", strlen(s)); // 4
    printf("utf8_width: %d\n", utf8_width(s)); //5

    I think you have those numbers the wrong way round.

    I'm working on it Richard, little by little...

    Just discovered %z too.

    #include <stdio.h>
    #include <string.h>

    static int is_ascii_7bit(const char *s) {
    const unsigned char *p = (const unsigned char *)s;
    while (*p) {
    if (*p >= 0x80) return 0; // reject immediately
    p++;
    }
    return 1;
    }

    static size_t ascii_width(const char *s) { return strlen(s); }

    int main(void) {
    const char *s = "‚lan"; // NOT 7-bit clean
    printf("string: %s\n", s);
    printf("strlen: %zu\n", strlen(s));
    printf("7bit OK?: %s\n", is_ascii_7bit(s) ? "YES" : "NO");
    printf("ascii_width: %zu\n", ascii_width(s));
    return 0;
    }

    --
    :wq
    Mike Sanders

    --- PyGate Linux v1.5
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Keith Thompson@3:633/10 to All on Fri Nov 14 16:11:38 2025
    Michael Sanders <porkchop@invalid.foo> writes:
    Well, I finally got bitten by Unicode.

    Managed a work around, but I don't have enough experience
    with Unicode to know just exactly what I'm doing...

    #include <stdio.h>
    #include <string.h>

    static int utf8_width(const char *s) {
    int w = 0;
    const unsigned char *p = (const unsigned char *)s;

    while (*p) {
    if (*p < 0x80) { w++; p++; } // ASCII 1-byte
    else if ((*p & 0xE0) == 0xC0) { w++; p += 2; } // 2-byte UTF-8
    else if ((*p & 0xF0) == 0xE0) { w++; p += 3; } // 3-byte UTF-8
    else if ((*p & 0xF8) == 0xF0) { w++; p += 4; } // 4-byte UTF-8
    else { w++; p++; } // fallback
    }

    return w;
    }

    int main(void) {
    const char *s = "‚lan";
    printf("string: %s\n", s);
    printf("strlen: %d\n", strlen(s)); // 4
    printf("utf8_width: %d\n", utf8_width(s)); //5

    return 0;
    }

    I haven't really looked at the algorithm, but strlen returns a result
    of type size_t, so the correct format in the second printf call is
    "%zu", not "%d".

    It would make sense for utf8_width to return size_t, which would
    mean that the format in the third printf call would also be "%zu".

    --
    Keith Thompson (The_Other_Keith) Keith.S.Thompson+u@gmail.com
    void Void(void) { Void(); } /* The recursive call of the void */

    --- PyGate Linux v1.5
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Keith Thompson@3:633/10 to All on Fri Nov 14 16:12:36 2025
    Michael Sanders <porkchop@invalid.foo> writes:
    On Fri, 14 Nov 2025 21:20:43 -0000 (UTC), Kaz Kylheku wrote:
    By width do you mean code point count?

    This is easily confusable for "display width" which is a concept
    of how many columns a Unicode string needs on a monospaced display
    or printer.

    Well maybe naively I mean the string's length per char...

    Can you rephrase that? I can't figure out what "the string's length per
    char" means.

    [...]

    --
    Keith Thompson (The_Other_Keith) Keith.S.Thompson+u@gmail.com
    void Void(void) { Void(); } /* The recursive call of the void */

    --- PyGate Linux v1.5
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Michael Sanders@3:633/10 to All on Sat Nov 15 00:46:36 2025
    On Fri, 14 Nov 2025 16:12:36 -0800, Keith Thompson wrote:

    Well maybe naively I mean the string's length per char...

    Can you rephrase that? I can't figure out what "the string's length per char" means.

    I just want the length of the string, where each character within that
    string equals 1 & I want one way to get the length of any string.

    My mind is melting today due to this tagged block:

    tags: book, Wind, Sand and Stars, Antoine de Saint-Exup‚ry, 1939, Memoir

    Title: Wind, Sand and Stars
    ISBN: 978-0156027496
    Author: Antoine de Saint-Exup‚ry
    Year: 1939
    Publisher: Mariner Books

    Synopsis: "Wind, Sand and Stars" is a poetic meditation on the adventures and wonders of flight. Drawing from his own experiences as an aviator, Saint-Exup‚ry recounts thrilling tales of navigating the open skies, while also delving into philosophical reflections on the nature of adventure, friendship, and the human spirit. The memoir serves as a testament to the profound emotions evoked by the sheer beauty and danger of flight.

    --
    :wq
    Mike Sanders

    --- PyGate Linux v1.5
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Michael Sanders@3:633/10 to All on Sat Nov 15 00:49:29 2025
    On Fri, 14 Nov 2025 16:11:38 -0800, Keith Thompson wrote:

    I haven't really looked at the algorithm, but strlen returns a result
    of type size_t, so the correct format in the second printf call is
    "%zu", not "%d".

    Yes thank you, just getting familiar with %z on this end.

    --
    :wq
    Mike Sanders

    --- PyGate Linux v1.5
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Keith Thompson@3:633/10 to All on Fri Nov 14 18:47:51 2025
    Michael Sanders <porkchop@invalid.foo> writes:
    On Fri, 14 Nov 2025 16:12:36 -0800, Keith Thompson wrote:
    Well maybe naively I mean the string's length per char...
    Can you rephrase that? I can't figure out what "the string's length per
    char" means.

    I just want the length of the string, where each character within that
    string equals 1 & I want one way to get the length of any string.

    That sounds exactly like strlen(), unless you mean something else by "character".

    [...]

    --
    Keith Thompson (The_Other_Keith) Keith.S.Thompson+u@gmail.com
    void Void(void) { Void(); } /* The recursive call of the void */

    --- PyGate Linux v1.5
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Bonita Montero@3:633/10 to All on Sat Nov 15 05:51:55 2025
    Am 14.11.2025 um 22:03 schrieb Michael Sanders:
    Well, I finally got bitten by Unicode.

    Managed a work around, but I don't have enough experience
    with Unicode to know just exactly what I'm doing...

    #include <stdio.h>
    #include <string.h>

    static int utf8_width(const char *s) {
    int w = 0;
    const unsigned char *p = (const unsigned char *)s;

    while (*p) {
    if (*p < 0x80) { w++; p++; } // ASCII 1-byte
    else if ((*p & 0xE0) == 0xC0) { w++; p += 2; } // 2-byte UTF-8
    else if ((*p & 0xF0) == 0xE0) { w++; p += 3; } // 3-byte UTF-8
    else if ((*p & 0xF8) == 0xF0) { w++; p += 4; } // 4-byte UTF-8
    else { w++; p++; } // fallback
    }

    return w;
    }

    int main(void) {
    const char *s = "‚lan";
    printf("string: %s\n", s);
    printf("strlen: %d\n", strlen(s)); // 4
    printf("utf8_width: %d\n", utf8_width(s)); //5

    return 0;
    }

    Try this idea written in C++ in C:

    size_t utf8Width( span<char>::iterator it )
    {
    ˙ ˙ size_t w = 0;
    ˙ ˙ for( ; *it; ++w )
    ˙ ˙ ˙ ˙ if( int head = countl_zero( (unsigned char)~*it ); head <= 3 ) [[likely]]
    ˙ ˙ ˙ ˙ ˙ ˙ it += head + 1;
    ˙ ˙ ˙ ˙ else
    ˙ ˙ ˙ ˙ ˙ ˙ ++it;
    ˙ ˙ return w;
    }


    --- PyGate Linux v1.5
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Bonita Montero@3:633/10 to All on Sat Nov 15 06:24:39 2025
    A little bugfix and a perfect style:

    #include <iostream>
    #include <bit>
    #include <span>
    #include <optional>

    using namespace std;

    optional<size_t> utf8Width( u8string_view str )
    {
    ˙ ˙ size_t w = 0;
    ˙ ˙ for( auto it = str.begin(); it != str.end(); ++w ) [[likely]]
    ˙ ˙ ˙ ˙ if( size_t head = countl_zero( (unsigned char)~*it ); head <= 4
    && (size_t)(str.end() - it) >= head + 1 ) [[likely]]
    ˙ ˙ ˙ ˙ ˙ ˙ it += head + 1;
    ˙ ˙ ˙ ˙ else
    ˙ ˙ ˙ ˙ ˙ ˙ return nullopt;
    ˙ ˙ return w;
    }

    int main()
    {
    ˙ ˙ cout << *utf8Width( u8"Hello, ??!" ) << endl;
    }


    --- PyGate Linux v1.5
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Mikko@3:633/10 to All on Sat Nov 15 12:47:03 2025
    On 2025-11-14 21:03:38 +0000, Michael Sanders said:

    Well, I finally got bitten by Unicode.

    Managed a work around, but I don't have enough experience
    with Unicode to know just exactly what I'm doing...

    #include <stdio.h>
    #include <string.h>

    static int utf8_width(const char *s) {
    int w = 0;
    const unsigned char *p = (const unsigned char *)s;

    while (*p) {
    if (*p < 0x80) { w++; p++; } // ASCII 1-byte
    else if ((*p & 0xE0) == 0xC0) { w++; p += 2; } // 2-byte UTF-8
    else if ((*p & 0xF0) == 0xE0) { w++; p += 3; } // 3-byte UTF-8
    else if ((*p & 0xF8) == 0xF0) { w++; p += 4; } // 4-byte UTF-8
    else { w++; p++; } // fallback
    }

    return w;
    }

    The code above may cause problems if the argument string is not well
    formed UTF-8. For example, the zero terminator coud be missed. Of
    course an invalid tring can be expected to cause problems anyway but
    some errors are harder to debug than others.

    Another way is

    static int utf8_width(const char *s) {
    int w = 0;
    const unsigned char *p = (const unsigned char *)s;

    while (*p) {
    if ((*p & 0xC0) != 0x80) w++; // count the first bytes of each character
    }

    return w;
    }

    One could also add a check that each character has the right number of
    bytes of the right kind and if not regard that as the end of the string.

    --
    Mikko


    --- PyGate Linux v1.5
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Michael Sanders@3:633/10 to All on Sat Nov 15 19:06:57 2025
    On Sat, 15 Nov 2025 05:51:55 +0100, Bonita Montero wrote:

    Try this idea written in C++ in C:

    size_t utf8Width( span<char>::iterator it )
    {
    ˙ ˙ size_t w = 0;
    ˙ ˙ for( ; *it; ++w )
    ˙ ˙ ˙ ˙ if( int head = countl_zero( (unsigned char)~*it ); head <= 3 ) [[likely]]
    ˙ ˙ ˙ ˙ ˙ ˙ it += head + 1;
    ˙ ˙ ˙ ˙ else
    ˙ ˙ ˙ ˙ ˙ ˙ ++it;
    ˙ ˙ return w;
    }

    Thank you Bonita!

    --
    :wq
    Mike Sanders

    --- PyGate Linux v1.5
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Michael Sanders@3:633/10 to All on Sat Nov 15 19:09:16 2025
    On Sat, 15 Nov 2025 12:47:03 +0200, Mikko wrote:

    On 2025-11-14 21:03:38 +0000, Michael Sanders said:

    Well, I finally got bitten by Unicode.

    Managed a work around, but I don't have enough experience
    with Unicode to know just exactly what I'm doing...

    #include <stdio.h>
    #include <string.h>

    static int utf8_width(const char *s) {
    int w = 0;
    const unsigned char *p = (const unsigned char *)s;

    while (*p) {
    if (*p < 0x80) { w++; p++; } // ASCII 1-byte
    else if ((*p & 0xE0) == 0xC0) { w++; p += 2; } // 2-byte UTF-8
    else if ((*p & 0xF0) == 0xE0) { w++; p += 3; } // 3-byte UTF-8
    else if ((*p & 0xF8) == 0xF0) { w++; p += 4; } // 4-byte UTF-8
    else { w++; p++; } // fallback
    }

    return w;
    }

    The code above may cause problems if the argument string is not well
    formed UTF-8. For example, the zero terminator coud be missed. Of
    course an invalid tring can be expected to cause problems anyway but
    some errors are harder to debug than others.

    Another way is

    static int utf8_width(const char *s) {
    int w = 0;
    const unsigned char *p = (const unsigned char *)s;

    while (*p) {
    if ((*p & 0xC0) != 0x80) w++; // count the first bytes of each character
    }

    return w;
    }

    One could also add a check that each character has the right number of
    bytes of the right kind and if not regard that as the end of the string.

    Excellent I've added your reply to my notes, thank you Mikko.

    --
    :wq
    Mike Sanders

    --- PyGate Linux v1.5
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Michael Sanders@3:633/10 to All on Sat Nov 15 19:10:26 2025
    On Fri, 14 Nov 2025 18:47:51 -0800, Keith Thompson wrote:

    I just want the length of the string, where each character within that
    string equals 1 & I want one way to get the length of any string.

    That sounds exactly like strlen(), unless you mean something else by "character".

    Thank you Keith, I've gotten it fixed.

    --
    :wq
    Mike Sanders

    --- PyGate Linux v1.5
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Michael Sanders@3:633/10 to All on Sat Nov 15 19:14:02 2025
    On Fri, 14 Nov 2025 21:03:38 -0000 (UTC), Michael Sanders wrote:

    Well, I finally got bitten by Unicode.

    [...]

    Thanks to everyone for their help.

    Fixed the problem & now I surround matched query with <angle brackets>...

    ./tinybase -s='*exup‚ry' data/books.tbf

    FILE: data/books.tbf
    LINE: 362
    BLOCK: 25
    CRC-8: 0x7f
    QUERY: *exup‚ry
    MATCH: Antoine de Saint-Exup‚ry

    TAGS: book, Wind, Sand and Stars, <Antoine de Saint-Exup‚ry>, 1939, Memoir

    --
    :wq
    Mike Sanders

    --- PyGate Linux v1.5
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Michael Sanders@3:633/10 to All on Sat Nov 15 19:28:04 2025
    On Sat, 15 Nov 2025 06:24:39 +0100, Bonita Montero wrote:

    A little bugfix and a perfect style:

    #include <iostream>
    #include <bit>
    #include <span>
    #include <optional>

    using namespace std;

    optional<size_t> utf8Width( u8string_view str )
    {
    ˙ ˙ size_t w = 0;
    ˙ ˙ for( auto it = str.begin(); it != str.end(); ++w ) [[likely]]
    ˙ ˙ ˙ ˙ if( size_t head = countl_zero( (unsigned char)~*it ); head <= 4
    && (size_t)(str.end() - it) >= head + 1 ) [[likely]]
    ˙ ˙ ˙ ˙ ˙ ˙ it += head + 1;
    ˙ ˙ ˙ ˙ else
    ˙ ˙ ˙ ˙ ˙ ˙ return nullopt;
    ˙ ˙ return w;
    }

    int main()
    {
    ˙ ˙ cout << *utf8Width( u8"Hello, ??!" ) << endl;
    }

    Very nice!

    --
    :wq
    Mike Sanders

    --- PyGate Linux v1.5
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Michael Sanders@3:633/10 to All on Sat Nov 15 20:16:36 2025
    On Sat, 15 Nov 2025 19:14:02 -0000 (UTC), Michael Sanders wrote:

    Thanks to everyone for their help.

    Fixed the problem & now I surround matched query with <angle brackets>...

    And fixed in tag analytics too!

    before:

    TAG HITS PERCENT

    Anne Frank 1 1.89%
    Antoine de Saint-Exup‚ry 1 1.89%
    Arthur C. Clarke 1 1.89%

    after:

    TAG HITS PERCENT

    Anne Frank 1 1.89%
    Antoine de Saint-Exup‚ry 1 1.89%
    Arthur C. Clarke 1 1.89%

    --
    :wq
    Mike Sanders

    --- PyGate Linux v1.5
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Keith Thompson@3:633/10 to All on Sat Nov 15 13:51:20 2025
    Michael Sanders <porkchop@invalid.foo> writes:
    On Fri, 14 Nov 2025 18:47:51 -0800, Keith Thompson wrote:
    I just want the length of the string, where each character within that
    string equals 1 & I want one way to get the length of any string.

    That sounds exactly like strlen(), unless you mean something else by
    "character".

    Thank you Keith, I've gotten it fixed.

    Fixed how? I still don't know what you meant by "length of the string".

    --
    Keith Thompson (The_Other_Keith) Keith.S.Thompson+u@gmail.com
    void Void(void) { Void(); } /* The recursive call of the void */

    --- PyGate Linux v1.5
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Michael Sanders@3:633/10 to All on Sat Nov 15 22:31:51 2025
    On Sat, 15 Nov 2025 13:51:20 -0800, Keith Thompson wrote:

    Fixed how? I still don't know what you meant by "length of the string".

    The fix was (Kaz gave me the clue I needed)...

    - not byte-length
    - not character count
    - but 'terminal display width' (like wcwidth / wcswidth)

    I cannot use system wcwidth() because Windows doesn't have it
    & Tinybase must be fully portable...

    full blown c source demo (url may wrap):

    <https://drive.google.com/file/d/18P3IsogDArWAiAgCsg1ob1Ja7Ff8nDGQ/view?usp=sharing>

    Hope it helps whoever comes across it:

    --
    :wq
    Mike Sanders

    --- PyGate Linux v1.5
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Michael Sanders@3:633/10 to All on Sun Nov 16 00:38:05 2025
    On Sat, 15 Nov 2025 06:24:39 +0100, Bonita Montero wrote:

    A little bugfix and a perfect style:

    #include <iostream>
    #include <bit>
    #include <span>
    #include <optional>

    using namespace std;

    optional<size_t> utf8Width( u8string_view str )
    {
    ˙ ˙ size_t w = 0;
    ˙ ˙ for( auto it = str.begin(); it != str.end(); ++w ) [[likely]]
    ˙ ˙ ˙ ˙ if( size_t head = countl_zero( (unsigned char)~*it ); head <= 4
    && (size_t)(str.end() - it) >= head + 1 ) [[likely]]
    ˙ ˙ ˙ ˙ ˙ ˙ it += head + 1;
    ˙ ˙ ˙ ˙ else
    ˙ ˙ ˙ ˙ ˙ ˙ return nullopt;
    ˙ ˙ return w;
    }

    int main()
    {
    ˙ ˙ cout << *utf8Width( u8"Hello, ??!" ) << endl;
    }

    Very excited. =) It is very useful for someone to use
    native characters...

    ./tinybase -s='??' data/books.tbf

    FILE: data/books.tbf
    LINE: 1
    BLOCK: 1
    CRC-8: 0xf2
    QUERY: ??
    MATCH: ??

    TAGS: book, 1984, George Orwell, 1949, Dystopian Fiction, <??>

    --
    :wq
    Mike Sanders

    --- PyGate Linux v1.5
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Mikko@3:633/10 to All on Sun Nov 16 11:22:54 2025
    On 2025-11-15 19:09:16 +0000, Michael Sanders said:

    On Sat, 15 Nov 2025 12:47:03 +0200, Mikko wrote:

    On 2025-11-14 21:03:38 +0000, Michael Sanders said:

    Well, I finally got bitten by Unicode.

    Managed a work around, but I don't have enough experience
    with Unicode to know just exactly what I'm doing...

    #include <stdio.h>
    #include <string.h>

    static int utf8_width(const char *s) {
    int w = 0;
    const unsigned char *p = (const unsigned char *)s;

    while (*p) {
    if (*p < 0x80) { w++; p++; } // ASCII 1-byte
    else if ((*p & 0xE0) == 0xC0) { w++; p += 2; } // 2-byte UTF-8
    else if ((*p & 0xF0) == 0xE0) { w++; p += 3; } // 3-byte UTF-8
    else if ((*p & 0xF8) == 0xF0) { w++; p += 4; } // 4-byte UTF-8
    else { w++; p++; } // fallback
    }

    return w;
    }

    The code above may cause problems if the argument string is not well
    formed UTF-8. For example, the zero terminator coud be missed. Of
    course an invalid tring can be expected to cause problems anyway but
    some errors are harder to debug than others.

    Another way is

    static int utf8_width(const char *s) {
    int w = 0;
    const unsigned char *p = (const unsigned char *)s;

    while (*p) {
    if ((*p & 0xC0) != 0x80) w++; // count the first bytes of each character
    }

    return w;
    }

    One could also add a check that each character has the right number of
    bytes of the right kind and if not regard that as the end of the string.

    Excellent I've added your reply to my notes, thank you Mikko.

    You are welcome.

    --
    Mikko


    --- PyGate Linux v1.5
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Michael Sanders@3:633/10 to All on Sun Nov 16 20:30:46 2025
    Because someone reached out & helped me:

    <https://drive.google.com/file/d/18P3IsogDArWAiAgCsg1ob1Ja7Ff8nDGQ/view?usp=sharing>

    (earnest thanks I've hit the ground running), I'll gladly pay it forward...

    Hybrid Unicode/ASCII sorting:

    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>

    static int sort_order = 0; // 0 = sort ascending A-Z, 1 = sort descending Z-A

    int utf8_decode(const char *s, unsigned int *cp_out) {
    const unsigned char *p = (const unsigned char *)s;
    unsigned char c0 = p[0];

    if (c0 < 0x80) { *cp_out = c0; return 1; }

    if ((c0 & 0xE0) == 0xC0) {
    unsigned char c1 = p[1];
    if ((c1 & 0xC0) != 0x80) { *cp_out = c0; return 1; }
    *cp_out = ((c0 & 0x1F) << 6) | (c1 & 0x3F);
    return 2;
    }

    if ((c0 & 0xF0) == 0xE0) {
    unsigned char c1 = p[1], c2 = p[2];
    if ((c1 & 0xC0) != 0x80 || (c2 & 0xC0) != 0x80) {
    *cp_out = c0; return 1;
    }
    *cp_out = ((c0 & 0x0F) << 12) |
    ((c1 & 0x3F) << 6) |
    (c2 & 0x3F);
    return 3;
    }

    if ((c0 & 0xF8) == 0xF0) {
    unsigned char c1 = p[1], c2 = p[2], c3 = p[3];
    if ((c1 & 0xC0) != 0x80 ||
    (c2 & 0xC0) != 0x80 ||
    (c3 & 0xC0) != 0x80) {
    *cp_out = c0; return 1;
    }
    *cp_out = ((c0 & 0x07) << 18) |
    ((c1 & 0x3F) << 12) |
    ((c2 & 0x3F) << 6) |
    (c3 & 0x3F);
    return 4;
    }

    *cp_out = c0;
    return 1;
    }

    int utf8_cmp(const char *sa, const char *sb) {
    while (*sa && *sb) {
    unsigned int ca, cb;
    int la = utf8_decode(sa, &ca);
    int lb = utf8_decode(sb, &cb);
    if (ca != cb) return (ca < cb) ? -1 : 1;
    sa += la;
    sb += lb;
    }
    if (*sa == '\0' && *sb == '\0') return 0;
    return (*sa == '\0') ? -1 : 1;
    }

    // qsort comparator wrapper
    static int cmp_wrap(const void *A, const void *B) {
    const char *a = *(const char * const *)A;
    const char *b = *(const char * const *)B;
    int r = utf8_cmp(a, b);
    return sort_order ? -r : r;
    }

    int main(void) {
    /* Test set with ASCII, accented chars, Chinese, emoji */
    const char *items[] = {
    "Apple",
    "Banana",
    "?rbol",
    "??",
    "?",
    "? Emoji",
    "Zebra",
    "™sterreich",
    " baco",
    "ę???ŕ",
    NULL
    };

    int count = 0;
    while (items[count]) count++;

    printf("UTF-8 SORT (ASCENDING):\n\n");
    sort_order = 0;
    qsort(items, count, sizeof(char *), cmp_wrap);
    for (int i = 0; i < count; i++) printf(" %s\n", items[i]);

    printf("\nUTF-8 SORT (DESCENDING):\n\n");
    sort_order = 1;
    qsort(items, count, sizeof(char *), cmp_wrap);
    for (int i = 0; i < count; i++) printf(" %s\n", items[i]);

    return 0;
    }

    --- PyGate Linux v1.5
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Keith Thompson@3:633/10 to All on Sun Nov 16 16:13:25 2025
    Michael Sanders <porkchop@invalid.foo> writes:
    [...]
    static int sort_order = 0; // 0 = sort ascending A-Z, 1 = sort descending Z-A
    [...]

    If you use an enum rather than an int, you won't need comments
    explaining what the values mean.

    --
    Keith Thompson (The_Other_Keith) Keith.S.Thompson+u@gmail.com
    void Void(void) { Void(); } /* The recursive call of the void */

    --- PyGate Linux v1.5
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Michael Sanders@3:633/10 to All on Mon Nov 17 23:49:04 2025
    On Fri, 14 Nov 2025 21:03:38 -0000 (UTC), Michael Sanders wrote:

    Well, I finally got bitten by Unicode.

    [...]

    Smallest Unicode test I can manage. Might prove handy in some contexts:

    #include <locale.h>
    #include <string.h>
    #include <stdio.h>

    int got_unicode(void){
    char *l = setlocale(LC_CTYPE,"");
    return (l && strstr(l,"UTF-8"));
    }

    #define U(uni, asc) (got_unicode() ? (uni) : (asc))

    int main(void){
    printf("%s\n", U("Unicode OK: ?", "No Unicode."));
    return 0;
    }

    --
    :wq
    Mike Sanders

    --- PyGate Linux v1.5
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From James Kuyper@3:633/10 to All on Tue Nov 18 14:27:53 2025
    On 2025-11-17 18:49, Michael Sanders wrote:
    On Fri, 14 Nov 2025 21:03:38 -0000 (UTC), Michael Sanders wrote:

    Well, I finally got bitten by Unicode.

    [...]

    Smallest Unicode test I can manage. Might prove handy in some contexts:

    #include <locale.h>
    #include <string.h>
    #include <stdio.h>

    int got_unicode(void){
    char *l = setlocale(LC_CTYPE,"");
    return (l && strstr(l,"UTF-8"));
    }

    #define U(uni, asc) (got_unicode() ? (uni) : (asc))

    int main(void){
    printf("%s\n", U("Unicode OK: ?", "No Unicode."));
    return 0;
    }

    Could you identify which document guarantees that every Unicode locale
    contains "UTF-8"? Do you know what the domain of applicability of that
    document is? It apparently does not cover my Ubuntu Linux system. The
    command "locale -a" provides a list of all supported locales. Here's
    what it says:

    C
    C.utf8
    en_AG
    en_AG.utf8
    en_AU.utf8
    en_BW.utf8
    en_CA.utf8
    en_DK.utf8
    en_GB.utf8
    en_HK.utf8
    en_IE.utf8
    en_IL
    en_IL.utf8
    en_IN
    en_IN.utf8
    en_NG
    en_NG.utf8
    en_NZ.utf8
    en_PH.utf8
    en_SG.utf8
    en_US.utf8
    en_ZA.utf8
    en_ZM
    en_ZM.utf8
    en_ZW.utf8
    es_AR.utf8
    es_BO.utf8
    es_CL.utf8
    es_CO.utf8
    es_CR.utf8
    es_CU
    es_CU.utf8
    es_DO.utf8
    es_EC.utf8
    es_ES.utf8
    es_GT.utf8
    es_HN.utf8
    es_MX.utf8
    es_NI.utf8
    es_PA.utf8
    es_PE.utf8
    es_PR.utf8
    es_PY.utf8
    es_SV.utf8
    es_US.utf8
    es_UY.utf8
    es_VE.utf8
    POSIX
    ru_RU.utf8
    ru_UA.utf8
    uk_UA.utf8
    zh_HK.utf8
    zh_TW.utf8

    Are you aware that there's many other Unicode encodings, including
    UTF-16, UTF-32, GB18030, BOCU, SCSU, UTF-EBCDIC, and UTF-7.

    --- PyGate Linux v1.5
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Michael Sanders@3:633/10 to All on Tue Nov 18 20:17:24 2025
    On Tue, 18 Nov 2025 14:27:53 -0500, James Kuyper wrote:

    Could you identify which document guarantees that every Unicode locale contains "UTF-8"? Do you know what the domain of applicability of that document is? It apparently does not cover my Ubuntu Linux system. The
    command "locale -a" provides a list of all supported locales. Here's
    what it says:

    [...]

    Hi James, umm 'guarantees'? No no... It does NOT verify:

    - whether the environment actually supports UTF8 fully
    - whether multibyte functions are enabled
    - whether the terminal supports UTF8
    - whether the C library supports UTF8 normalization
    (combining characters, etc. but it seems to work well here)

    To be sure: It's not a UTF-8 capability test. It's only a
    locale-string check. So it likely misses many valid UTF8
    locale variants...

    Here I'm running any mixture of: Windows/BSD/Linix Mint LMDE.

    The best I can tell you at this stage is that it works on my end,
    not a very satisfying reply I'm sure you'd agree. But till I learn
    more about the issue that's the best I can offer.

    If you manage an improvement, please do post it here in the group
    so I can learn more too.

    --
    :wq
    Mike Sanders

    --- PyGate Linux v1.5
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Michael Sanders@3:633/10 to All on Tue Nov 18 20:40:58 2025
    On Tue, 18 Nov 2025 20:17:24 -0000 (UTC), Michael Sanders wrote:

    If you manage an improvement, please do post it here in the group
    so I can learn more too.

    /*
    * Robust UTF-8 capability test?
    *
    * This test checks:
    * 1. Locale reports UTF-8
    * 2. Wide-character conversion works (mbrtowc)
    * 3. Terminal accepts UTF-8 output (optional: write test char)
    *
    * Result returned:
    * 0 = No UTF-8 support detected
    * 1 = UTF-8 *likely* supported
    */

    #include <stdio.h>
    #include <string.h>
    #include <locale.h>
    #include <wchar.h>
    #include <errno.h>

    /* return 1 if UTF-8 capable, else 0 */

    int utf8_capable(void) {
    /* 1 check locale */
    const char *loc = setlocale(LC_CTYPE, "");
    if (!loc) return 0;
    if (!strstr(loc, "UTF-8") && !strstr(loc, "utf8")) return 0;

    /* 2 check UTF-8 decoding with mbrtowc */
    {
    const char *test = "?"; /* E2 9C 93 */
    wchar_t wc = 0;
    mbstate_t st;
    memset(&st, 0, sizeof(st));

    size_t n = mbrtowc(&wc, test, strlen(test), &st);

    if (n == (size_t)-1 || n == (size_t)-2) return 0; /* decode error */
    if (wc == 0) return 0; /* returned null? impossible for ? */
    }

    /* 3 ensure terminal accepts UTF-8 by writing */
    if (fwrite("?", 3, 1, stdout) != 1) {
    return 0; /* write failed ? suppress error message, just say no */
    }

    return 1;
    }

    int main(void) {
    if (utf8_capable()) printf("\nUTF-8 OK\n");
    else printf("\nNOT UTF-8\n");
    return 0;
    }

    --
    :wq
    Mike Sanders

    --- PyGate Linux v1.5
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Bonita Montero@3:633/10 to All on Wed Nov 19 11:56:55 2025
    Am 15.11.2025 um 20:28 schrieb Michael Sanders:
    On Sat, 15 Nov 2025 06:24:39 +0100, Bonita Montero wrote:

    A little bugfix and a perfect style:

    #include <iostream>
    #include <bit>
    #include <span>
    #include <optional>

    using namespace std;

    optional<size_t> utf8Width( u8string_view str )
    {
    ˙ ˙ size_t w = 0;
    ˙ ˙ for( auto it = str.begin(); it != str.end(); ++w ) [[likely]]
    ˙ ˙ ˙ ˙ if( size_t head = countl_zero( (unsigned char)~*it ); head <= 4
    && (size_t)(str.end() - it) >= head + 1 ) [[likely]]
    ˙ ˙ ˙ ˙ ˙ ˙ it += head + 1;
    ˙ ˙ ˙ ˙ else
    ˙ ˙ ˙ ˙ ˙ ˙ return nullopt;
    ˙ ˙ return w;
    }

    int main()
    {
    ˙ ˙ cout << *utf8Width( u8"Hello, ??!" ) << endl;
    }
    Very nice!

    #include <iostream>
    #include <string_view>
    #include <bit>

    using namespace std;

    template<bool Validate = false, typename View>
    ˙ ˙ requires std::same_as<View, string_view> || std::same_as<View, u8string_view>
    size_t utf8Width( View str )
    {
    ˙ ˙ size_t rem = str.end() - str.begin(), w = 0, chunk;
    ˙ ˙ for( auto it = str.begin(); rem; rem -= chunk, ++w ) [[likely]]
    ˙ ˙ {
    ˙ ˙ ˙ ˙ chunk = countl_one( (unsigned char)*it ) + 1;
    ˙ ˙ ˙ ˙ if constexpr( Validate )
    ˙ ˙ ˙ ˙ ˙ ˙ if( (*it & 0xC0) == 0x80 || chunk > 5 || rem < chunk ) [[unlikely]]
    ˙ ˙ ˙ ˙ ˙ ˙ ˙ ˙ return -1;
    ˙ ˙ ˙ ˙ auto end = it + chunk;
    ˙ ˙ ˙ ˙ if constexpr( !Validate )
    ˙ ˙ ˙ ˙ ˙ ˙ it = end;
    ˙ ˙ ˙ ˙ else
    ˙ ˙ ˙ ˙ ˙ ˙ while( ++it != end )
    ˙ ˙ ˙ ˙ ˙ ˙ ˙ ˙ if( (unsigned char)(*it & 0xC0) != 0x80 )
    ˙ ˙ ˙ ˙ ˙ ˙ ˙ ˙ ˙ ˙ return -1;
    ˙ ˙ }
    ˙ ˙ return w;
    }

    int main()
    {
    ˙ ˙ char8_t strU8[] = u8"Hello, ??!";
    ˙ ˙ string_view sv( (char *)strU8 );
    ˙ ˙ cout << utf8Width<false>( sv ) << endl;
    ˙ ˙ cout << utf8Width<true>( sv ) << endl;
    ˙ ˙ u8string_view svU8( strU8 );
    ˙ ˙ cout << utf8Width<false>( svU8 ) << endl;
    ˙ ˙ cout << utf8Width<true>( svU8 ) << endl;
    }

    Even cooler. Now the code accepts usual string_views as well as u8string_views.
    And if you supply a boolean temlpate parameter before the ()-parameter which
    is true the data is verified to be a valid UTF-8 string. If you supply false
    or omit the parameter the string isn't valiedated.


    --- PyGate Linux v1.5
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From James Kuyper@3:633/10 to All on Wed Nov 19 09:08:10 2025
    On 2025-11-18 15:17, Michael Sanders wrote:
    On Tue, 18 Nov 2025 14:27:53 -0500, James Kuyper wrote:

    Could you identify which document guarantees that every Unicode locale
    contains "UTF-8"? Do you know what the domain of applicability of that
    document is? It apparently does not cover my Ubuntu Linux system. The
    command "locale -a" provides a list of all supported locales. Here's
    what it says:

    [...]

    Hi James, umm 'guarantees'? No no... It does NOT verify:

    - whether the environment actually supports UTF8 fully
    - whether multibyte functions are enabled
    - whether the terminal supports UTF8
    - whether the C library supports UTF8 normalization
    (combining characters, etc. but it seems to work well here)

    To be sure: It's not a UTF-8 capability test. It's only a
    locale-string check. So it likely misses many valid UTF8
    locale variants...

    If intended for use by anyone other than yourself, you should document
    it's limitations in that regard, either with in-code comments or in user documentation.

    Here I'm running any mixture of: Windows/BSD/Linix Mint LMDE.

    The best I can tell you at this stage is that it works on my end,
    not a very satisfying reply I'm sure you'd agree. But till I learn
    more about the issue that's the best I can offer.

    If you manage an improvement, please do post it here in the group
    so I can learn more too.

    There might be documents specifying locale naming standards, but I'm not
    aware of any. In the absence of such standards, or on systems not
    covered by such standards, there's not much you can do about this.

    If your targets include Linux Mint, there's a chance the locale names
    might be similar to those on my Ubuntu Linux system - but I'm no expert
    on the differences between Linux distributions. If so, you should make
    the "UTF" search case-insensitive, and make the '-' optional, which
    would add considerable complexity to what is currently a very simple
    routine.

    I'm curious - if you're interested in Unicode, why are you not making
    any use of the Unicode support available in the current version of C?
    Does your code need to work under older versions of C?

    Since C2023, a conforming implementation of C is required to support
    character constants and string literals that use UTF-8, UTF-16, and
    UTF-32 encodings when prefixed with u8, u or U, respectively. Those use
    the char8_t, char16_t, and char32_t types. Also new in C2023 is
    mbrtoc8() and c8rtomb().
    Those prefixes and types go back to C2011, where it was optional whether
    they used those encodings. There were pre#defined macros which could be
    queried to determine whether or not they did. Routines for converting
    between those types and multi-byte strings or wchar_t also go back to
    that time.

    --- PyGate Linux v1.5
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Michael Bäuerle@3:633/10 to All on Wed Nov 19 15:29:37 2025
    James Kuyper wrote:
    On 2025-11-18 15:17, Michael Sanders wrote:
    On Tue, 18 Nov 2025 14:27:53 -0500, James Kuyper wrote:

    Could you identify which document guarantees that every Unicode locale contains "UTF-8"? Do you know what the domain of applicability of that document is? It apparently does not cover my Ubuntu Linux system. The command "locale -a" provides a list of all supported locales. Here's
    what it says:

    [...]

    Hi James, umm 'guarantees'? No no... It does NOT verify:

    - whether the environment actually supports UTF8 fully
    - whether multibyte functions are enabled
    - whether the terminal supports UTF8
    - whether the C library supports UTF8 normalization
    (combining characters, etc. but it seems to work well here)

    To be sure: It's not a UTF-8 capability test. It's only a
    locale-string check. So it likely misses many valid UTF8
    locale variants...

    If intended for use by anyone other than yourself, you should document
    it's limitations in that regard, either with in-code comments or in user documentation.

    Here I'm running any mixture of: Windows/BSD/Linix Mint LMDE.

    The best I can tell you at this stage is that it works on my end,
    not a very satisfying reply I'm sure you'd agree. But till I learn
    more about the issue that's the best I can offer.

    If you manage an improvement, please do post it here in the group
    so I can learn more too.

    There might be documents specifying locale naming standards, but I'm not aware of any. [...]

    POSIX.1-2024 documents one for the XSI extension in Section 8.2: <https://pubs.opengroup.org/onlinepubs/9799919799/basedefs/V1_chap08.html#tag_08_02>
    |
    | If the locale value has the form:
    |
    | language[_territory][.codeset]
    |
    | it refers to an implementation-provided locale, where settings of
    | language, territory, and codeset are implementation-defined.


    --- PyGate Linux v1.5
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Michael Sanders@3:633/10 to All on Wed Nov 19 19:22:52 2025
    On Wed, 19 Nov 2025 09:08:10 -0500, James Kuyper wrote:

    If your targets include Linux Mint, there's a chance the locale names
    might be similar to those on my Ubuntu Linux system - but I'm no expert
    on the differences between Linux distributions. If so, you should make
    the "UTF" search case-insensitive, and make the '-' optional, which
    would add considerable complexity to what is currently a very simple
    routine.

    Thanks for the info. Cases-insensitive routines were posted in this
    sub-thread yesterday. But I'll repost it here.

    James straight up, I'm simply trying to learn. I appreciate your
    comments but I need examples of what to do - that's how my mind works...

    /*
    * Robust UTF-8 capability test?
    *
    * This test checks:
    * 1. Locale reports UTF-8
    * 2. Wide-character conversion works (mbrtowc)
    * 3. Terminal accepts UTF-8 output (optional: write test char)
    *
    * Result returned:
    * 0 = No UTF-8 support detected
    * 1 = UTF-8 *likely* supported
    */

    #include <stdio.h>
    #include <string.h>
    #include <locale.h>
    #include <wchar.h>
    #include <errno.h>

    /* return 1 if UTF-8 capable, else 0 */

    int utf8_capable(void) {
    /* 1 check locale */
    const char *loc = setlocale(LC_CTYPE, "");
    if (!loc) return 0;
    if (!strstr(loc, "UTF-8") && !strstr(loc, "utf8")) return 0;

    /* 2 check UTF-8 decoding with mbrtowc */
    {
    const char *test = "?"; /* E2 9C 93 */
    wchar_t wc = 0;
    mbstate_t st;
    memset(&st, 0, sizeof(st));

    size_t n = mbrtowc(&wc, test, strlen(test), &st);

    if (n == (size_t)-1 || n == (size_t)-2) return 0; /* decode error */
    if (wc == 0) return 0; /* returned null? impossible for ? */
    }

    /* 3 ensure terminal accepts UTF-8 by writing */
    if (fwrite("?", 3, 1, stdout) != 1) {
    return 0; /* write failed ? suppress error message, just say no */
    }

    return 1;
    }

    int main(void) {
    if (utf8_capable()) printf("\nUTF-8 OK\n");
    else printf("\nNOT UTF-8\n");
    return 0;
    }

    --
    :wq
    Mike Sanders





    --
    :wq
    Mike Sanders

    --- PyGate Linux v1.5
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Michael Sanders@3:633/10 to All on Fri Nov 21 02:21:19 2025
    On Wed, 19 Nov 2025 11:56:55 +0100, Bonita Montero wrote:

    [...]

    Even cooler. Now the code accepts usual string_views as well as u8string_views.
    And if you supply a boolean temlpate parameter before the ()-parameter which is true the data is verified to be a valid UTF-8 string. If you supply false or omit the parameter the string isn't valiedated.

    Hi Bonita! These are nice c++/c examples you've provided.

    Thanks for your input, I appreciate your code & remarks.

    --
    :wq
    Mike Sanders

    --- PyGate Linux v1.5.1
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Bonita Montero@3:633/10 to All on Fri Nov 21 11:10:39 2025
    Am 21.11.2025 um 03:21 schrieb Michael Sanders:
    Hi Bonita! These are nice c++/c examples you've provided.
    Thanks for your input, I appreciate your code & remarks.

    That's an even cooler solution with AVX-512 and without validation.

    size_t utf8Width( const char *s )
    {
    ˙ ˙ __m512i
    ˙ ˙ ˙ ˙ zero = _mm512_setzero_si512(),
    ˙ ˙ ˙ ˙ oneMask = _mm512_set1_epi8( (char)0x80 ),
    ˙ ˙ ˙ ˙ oneHead = zero,
    ˙ ˙ ˙ ˙ twoMask = _mm512_set1_epi8( (char)0xE0 ),
    ˙ ˙ ˙ ˙ twoHead = _mm512_set1_epi8( (char)0xC0 ),
    ˙ ˙ ˙ ˙ threeMask = _mm512_set1_epi8( (char)0xF0 ),
    ˙ ˙ ˙ ˙ threeHead = _mm512_set1_epi8( (char)0xE0 ),
    ˙ ˙ ˙ ˙ fourMask = _mm512_set1_epi8( (char)0xF8 ),
    ˙ ˙ ˙ ˙ fourHead = _mm512_set1_epi8( (char)0xF0 );
    ˙ ˙ uintptr_t
    ˙ ˙ ˙ ˙ up = (uintptr_t)s,
    ˙ ˙ ˙ ˙ base = up & ~(uintptr_t)63;
    ˙ ˙ unsigned skip = (unsigned)(up - base);
    ˙ ˙ s = (char *)base;
    ˙ ˙ size_t count = 0;
    ˙ ˙ size_t i = 0;
    ˙ ˙ uint64_t nzMatches;
    ˙ ˙ do
    ˙ ˙ {
    ˙ ˙ ˙ ˙ __m512i chunk = _mm512_loadu_si512( (void *)s );
    ˙ ˙ ˙ ˙ nzMatches = ~(_mm512_cmpeq_epi8_mask( chunk, zero ) >> skip);
    ˙ ˙ ˙ ˙ nzMatches = nzMatches != -1 ? (1ull << countr_one( nzMatches ))
    - 1 : -1;
    ˙ ˙ ˙ ˙ uint64_t
    ˙ ˙ ˙ ˙ ˙ ˙ one = _mm512_cmpeq_epi8_mask( _mm512_and_si512( chunk,
    oneMask ), oneHead ) >> skip & nzMatches,
    ˙ ˙ ˙ ˙ ˙ ˙ two = _mm512_cmpeq_epi8_mask( _mm512_and_si512( chunk,
    twoMask ), twoHead ) >> skip & nzMatches,
    ˙ ˙ ˙ ˙ ˙ ˙ three = _mm512_cmpeq_epi8_mask( _mm512_and_si512( chunk, threeMask ), threeHead ) >> skip & nzMatches,
    ˙ ˙ ˙ ˙ ˙ ˙ four = _mm512_cmpeq_epi8_mask( _mm512_and_si512( chunk,
    fourMask ), fourHead ) >> skip & nzMatches;
    ˙ ˙ ˙ ˙ count += _mm_popcnt_u64( one ) + _mm_popcnt_u64( two ) + _mm_popcnt_u64( three ) + _mm_popcnt_u64( four );
    ˙ ˙ ˙ ˙ skip = 0;
    ˙ ˙ } while( nzMatches == -1 );
    ˙ ˙ return count;
    }


    --- PyGate Linux v1.5.1
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From bart@3:633/10 to All on Fri Nov 21 17:03:10 2025
    On 15/11/2025 05:24, Bonita Montero wrote:
    A little bugfix and a perfect style:

    #include <iostream>
    #include <bit>
    #include <span>
    #include <optional>

    using namespace std;

    optional<size_t> utf8Width( u8string_view str )
    {
    ˙ ˙ size_t w = 0;
    ˙ ˙ for( auto it = str.begin(); it != str.end(); ++w ) [[likely]]
    ˙ ˙ ˙ ˙ if( size_t head = countl_zero( (unsigned char)~*it ); head <= 4
    && (size_t)(str.end() - it) >= head + 1 ) [[likely]]
    ˙ ˙ ˙ ˙ ˙ ˙ it += head + 1;
    ˙ ˙ ˙ ˙ else
    ˙ ˙ ˙ ˙ ˙ ˙ return nullopt;
    ˙ ˙ return w;
    }

    int main()
    {
    ˙ ˙ cout << *utf8Width( u8"Hello, ??!" ) << endl;
    }


    The trouble with this is that I haven't a clue how it works or what
    those extras do, or how they impact on performance.

    A version in C is given below. This is much more straightforward. It
    doesn't verify anything, but then I don't know if yours does either.

    As for performance: I duplicated that test string to form one 104 times
    as long, then called that function one million times. Here are the timings:

    C gcc-O2 1.06 seconds
    C bcc 1.17 seconds
    C tcc 2.81 seconds

    C++ g++-O2 4.6 seconds
    C++ g++-O0 19 seconds

    --------------------------

    size_t utf8width(char* s) {
    size_t length;
    int c, n;

    length=0;
    while (c=*s) {
    if ((c & 0x80) == 0) n = 1;
    else if ((c & 0xE0) == 0xC0) n = 2;
    else if ((c & 0xF0) == 0xE0) n = 3;
    else n = 4;
    s += n;
    ++length;
    }
    return length;
    }


    --- PyGate Linux v1.5.1
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Michael Sanders@3:633/10 to All on Fri Nov 21 17:39:45 2025
    On Fri, 21 Nov 2025 17:03:10 +0000, bart wrote:

    size_t utf8width(char* s) {
    size_t length;
    int c, n;

    length=0;
    while (c=*s) {
    if ((c & 0x80) == 0) n = 1;
    else if ((c & 0xE0) == 0xC0) n = 2;
    else if ((c & 0xF0) == 0xE0) n = 3;
    else n = 4;
    s += n;
    ++length;
    }
    return length;
    }

    A variant based on your take:

    size_t utf8width(char *s) {
    size_t len = 0;
    unsigned char c;

    while ((c = (unsigned char)*s))
    s += (c < 0x80) ? 1 :
    (c < 0xE0) ? 2 :
    (c < 0xF0) ? 3 : 4,
    ++len;

    return len;
    }

    --
    :wq
    Mike Sanders

    --- PyGate Linux v1.5.1
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Bonita Montero@3:633/10 to All on Sat Nov 22 06:39:20 2025
    Am 21.11.2025 um 18:03 schrieb bart:
    On 15/11/2025 05:24, Bonita Montero wrote:
    A little bugfix and a perfect style:

    #include <iostream>
    #include <bit>
    #include <span>
    #include <optional>

    using namespace std;

    optional<size_t> utf8Width( u8string_view str )
    {
    ˙˙ ˙ size_t w = 0;
    ˙˙ ˙ for( auto it = str.begin(); it != str.end(); ++w ) [[likely]]
    ˙˙ ˙ ˙ ˙ if( size_t head = countl_zero( (unsigned char)~*it ); head
    <= 4 && (size_t)(str.end() - it) >= head + 1 ) [[likely]]
    ˙˙ ˙ ˙ ˙ ˙ ˙ it += head + 1;
    ˙˙ ˙ ˙ ˙ else
    ˙˙ ˙ ˙ ˙ ˙ ˙ return nullopt;
    ˙˙ ˙ return w;
    }

    int main()
    {
    ˙˙ ˙ cout << *utf8Width( u8"Hello, ??!" ) << endl;
    }


    The trouble with this is that I haven't a clue how it works or what
    those extras do, or how they impact on performance.

    A version in C is given below. This is much more straightforward. It
    doesn't verify anything, but then I don't know if yours does either.

    As for performance: I duplicated that test string to form one 104
    times as long, then called that function one million times. Here are
    the timings:

    ˙ C˙˙ gcc-O2˙˙˙˙ 1.06˙˙ seconds
    ˙ C˙˙ bcc˙˙˙˙˙˙˙ 1.17˙˙ seconds
    ˙ C˙˙ tcc˙˙˙˙˙˙˙ 2.81˙˙ seconds

    ˙ C++ g++-O2˙˙˙˙ 4.6˙˙ seconds
    ˙ C++ g++-O0˙˙˙ 19˙˙˙˙ seconds

    --------------------------

    size_t utf8width(char* s) {
    ˙˙˙ size_t length;
    ˙˙˙ int c, n;

    ˙˙˙ length=0;
    ˙˙˙ while (c=*s) {
    ˙˙˙˙˙˙˙ if ((c & 0x80) == 0) n = 1;
    ˙˙˙˙˙˙˙ else if ((c & 0xE0) == 0xC0) n = 2;
    ˙˙˙˙˙˙˙ else if ((c & 0xF0) == 0xE0) n = 3;
    ˙˙˙˙˙˙˙ else n = 4;
    ˙˙˙˙˙˙˙ s += n;
    ˙˙˙˙˙˙˙ ++length;
    ˙˙˙ }
    ˙˙˙ return length;
    }

    Take a string of a number of UTF-8 characters with a proper
    mixed chunk-lengths.


    --- PyGate Linux v1.5.1
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From bart@3:633/10 to All on Sat Nov 22 11:55:32 2025
    On 22/11/2025 05:39, Bonita Montero wrote:
    Am 21.11.2025 um 18:03 schrieb bart:
    On 15/11/2025 05:24, Bonita Montero wrote:
    A little bugfix and a perfect style:

    #include <iostream>
    #include <bit>
    #include <span>
    #include <optional>

    using namespace std;

    optional<size_t> utf8Width( u8string_view str )
    {
    ˙˙ ˙ size_t w = 0;
    ˙˙ ˙ for( auto it = str.begin(); it != str.end(); ++w ) [[likely]]
    ˙˙ ˙ ˙ ˙ if( size_t head = countl_zero( (unsigned char)~*it ); head
    <= 4 && (size_t)(str.end() - it) >= head + 1 ) [[likely]]
    ˙˙ ˙ ˙ ˙ ˙ ˙ it += head + 1;
    ˙˙ ˙ ˙ ˙ else
    ˙˙ ˙ ˙ ˙ ˙ ˙ return nullopt;
    ˙˙ ˙ return w;
    }

    int main()
    {
    ˙˙ ˙ cout << *utf8Width( u8"Hello, ??!" ) << endl;
    }


    The trouble with this is that I haven't a clue how it works or what
    those extras do, or how they impact on performance.

    A version in C is given below. This is much more straightforward. It
    doesn't verify anything, but then I don't know if yours does either.

    As for performance: I duplicated that test string to form one 104
    times as long, then called that function one million times. Here are
    the timings:

    ˙ C˙˙ gcc-O2˙˙˙˙ 1.06˙˙ seconds
    ˙ C˙˙ bcc˙˙˙˙˙˙˙ 1.17˙˙ seconds
    ˙ C˙˙ tcc˙˙˙˙˙˙˙ 2.81˙˙ seconds

    ˙ C++ g++-O2˙˙˙˙ 4.6˙˙ seconds
    ˙ C++ g++-O0˙˙˙ 19˙˙˙˙ seconds

    --------------------------

    size_t utf8width(char* s) {
    ˙˙˙ size_t length;
    ˙˙˙ int c, n;

    ˙˙˙ length=0;
    ˙˙˙ while (c=*s) {
    ˙˙˙˙˙˙˙ if ((c & 0x80) == 0) n = 1;
    ˙˙˙˙˙˙˙ else if ((c & 0xE0) == 0xC0) n = 2;
    ˙˙˙˙˙˙˙ else if ((c & 0xF0) == 0xE0) n = 3;
    ˙˙˙˙˙˙˙ else n = 4;
    ˙˙˙˙˙˙˙ s += n;
    ˙˙˙˙˙˙˙ ++length;
    ˙˙˙ }
    ˙˙˙ return length;
    }

    Take a string of a number of UTF-8 characters with a proper
    mixed chunk-lengths.

    OK. I took the Wikipedia article on China, /in Chinese/ and extract the
    source text.

    This was a file of 2179489 bytes. I got these results:

    My C: 1969415 chars (reading text from a file)
    Your C++: 1956525 chars (embedded u8"" string)

    There's a small discrepancy, so I used an online service, which gave me:

    1965068 chars

    My figure is somewhat closer than yours. However this site also reported
    the size of the pasted-in text slightly differently, so I looked for a source-code version, which I found at:

    https://www.daemonology.net/blog/2008-06-05-faster-utf8-strlen.html

    That gave me:

    1969415 chars

    Which is exactly my figure.

    BTW, repeating the count 1000 times, took this amount of time:

    C++ -O2 8.5 seconds
    C gcc-O2 2.1 seconds (my version)
    C gcc-O2 0.4 seconds (fast online version)
    C tcc 1.1 seconds (fast version)

    So even Tiny C wipes the floor with optimised C++ with its fancy
    libraries and advanced string types, and using ordinary C strings and
    standard C (that __builtin_prefetch line wasn't used).

    --- PyGate Linux v1.5.1
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Bonita Montero@3:633/10 to All on Sat Nov 22 14:10:46 2025
    This code with AVX512BW and BMI1 is 13,5 times faster than yours on my Zen4-PC.

    size_t utf8Width2( const char *s )

    {
    ˙ ˙ __m512i const
    ˙ ˙ ˙ ˙ ZERO = _mm512_setzero_si512(),
    ˙ ˙ ˙ ˙ ONE_MASK = _mm512_set1_epi8( (char)0x80 ),
    ˙ ˙ ˙ ˙ ONE_HEAD = ZERO,
    ˙ ˙ ˙ ˙ TWO_MASK = _mm512_set1_epi8( (char)0xE0 ),
    ˙ ˙ ˙ ˙ TWO_HEAD = _mm512_set1_epi8( (char)0xC0 ),
    ˙ ˙ ˙ ˙ THREE_MASK = _mm512_set1_epi8( (char)0xF0 ),
    ˙ ˙ ˙ ˙ THREE_HEAD = _mm512_set1_epi8( (char)0xE0 ),
    ˙ ˙ ˙ ˙ FOUR_MASK = _mm512_set1_epi8( (char)0xF8 ),
    ˙ ˙ ˙ ˙ FOUR_HEAD = _mm512_set1_epi8( (char)0xF0 );
    ˙ ˙ uintptr_t
    ˙ ˙ ˙ ˙ begin = (uintptr_t)s,
    ˙ ˙ ˙ ˙ base = begin & -64;
    ˙ ˙ s = (char *)base;
    ˙ ˙ size_t count = 0;
    ˙ ˙ __m512i chunk;
    ˙ ˙ uint64_t nzMask;
    ˙ ˙ auto doChunk = [&]() L_FORCEINLINE
    ˙ ˙ {
    ˙ ˙ ˙ ˙ uint64_t
    ˙ ˙ ˙ ˙ ˙ ˙ one = _mm512_cmpeq_epi8_mask( _mm512_and_si512( chunk,
    ONE_MASK ), ONE_HEAD ) & nzMask,
    ˙ ˙ ˙ ˙ ˙ ˙ two = _mm512_cmpeq_epi8_mask( _mm512_and_si512( chunk,
    TWO_MASK ), TWO_HEAD ) & nzMask,
    ˙ ˙ ˙ ˙ ˙ ˙ three = _mm512_cmpeq_epi8_mask( _mm512_and_si512( chunk, THREE_MASK ), THREE_HEAD ) & nzMask,
    ˙ ˙ ˙ ˙ ˙ ˙ four = _mm512_cmpeq_epi8_mask( _mm512_and_si512( chunk,
    FOUR_MASK ), FOUR_HEAD ) & nzMask;
    ˙ ˙ ˙ ˙ count += _mm_popcnt_u64( one ) + _mm_popcnt_u64( two ) + _mm_popcnt_u64( three ) + _mm_popcnt_u64( four );
    ˙ ˙ };
    ˙ ˙ chunk = _mm512_loadu_si512( s );
    ˙ ˙ unsigned head = (unsigned)(begin - base);
    ˙ ˙ nzMask = ~_mm512_cmpeq_epi8_mask( chunk, ZERO ) >> head;
    ˙ ˙ unsigned ones = countr_one( nzMask );
    ˙ ˙ nzMask &= ones < 64 ? (1ull << ones) - 1 : -1;
    ˙ ˙ nzMask <<= head;
    ˙ ˙ doChunk();
    ˙ ˙ if( (int64_t)nzMask >= 0 )
    ˙ ˙ ˙ ˙ return count;
    ˙ ˙ for( ; ; )
    ˙ ˙ {
    ˙ ˙ ˙ ˙ s += 64;
    ˙ ˙ ˙ ˙ chunk = _mm512_loadu_si512( s );
    ˙ ˙ ˙ ˙ nzMask = ~_mm512_cmpeq_epi8_mask( chunk, ZERO );
    ˙ ˙ ˙ ˙ ones = countr_one( nzMask );
    ˙ ˙ ˙ ˙ nzMask = ones < 64 ? (1ull << ones) - 1 : -1;
    ˙ ˙ ˙ ˙ if( !nzMask )
    ˙ ˙ ˙ ˙ ˙ ˙ break;
    ˙ ˙ ˙ ˙ doChunk();
    ˙ ˙ }
    ˙ ˙ return count;
    }

    --- PyGate Linux v1.5.1
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From bart@3:633/10 to All on Sat Nov 22 13:38:27 2025
    On 22/11/2025 13:10, Bonita Montero wrote:
    This code with AVX512BW and BMI1 is 13,5 times faster than yours on my Zen4-PC.

    size_t utf8Width2( const char *s )

    {
    ˙ ˙ __m512i const
    ˙ ˙ ˙ ˙ ZERO = _mm512_setzero_si512(),
    ˙ ˙ ˙ ˙ ONE_MASK = _mm512_set1_epi8( (char)0x80 ),
    ˙ ˙ ˙ ˙ ONE_HEAD = ZERO,
    ˙ ˙ ˙ ˙ TWO_MASK = _mm512_set1_epi8( (char)0xE0 ),
    ˙ ˙ ˙ ˙ TWO_HEAD = _mm512_set1_epi8( (char)0xC0 ),
    ˙ ˙ ˙ ˙ THREE_MASK = _mm512_set1_epi8( (char)0xF0 ),
    ˙ ˙ ˙ ˙ THREE_HEAD = _mm512_set1_epi8( (char)0xE0 ),
    ˙ ˙ ˙ ˙ FOUR_MASK = _mm512_set1_epi8( (char)0xF8 ),
    ˙ ˙ ˙ ˙ FOUR_HEAD = _mm512_set1_epi8( (char)0xF0 );
    ˙ ˙ uintptr_t
    ˙ ˙ ˙ ˙ begin = (uintptr_t)s,
    ˙ ˙ ˙ ˙ base = begin & -64;
    ˙ ˙ s = (char *)base;
    ˙ ˙ size_t count = 0;
    ˙ ˙ __m512i chunk;
    ˙ ˙ uint64_t nzMask;
    ˙ ˙ auto doChunk = [&]() L_FORCEINLINE
    ˙ ˙ {
    ˙ ˙ ˙ ˙ uint64_t
    ˙ ˙ ˙ ˙ ˙ ˙ one = _mm512_cmpeq_epi8_mask( _mm512_and_si512( chunk,
    ONE_MASK ), ONE_HEAD ) & nzMask,
    ˙ ˙ ˙ ˙ ˙ ˙ two = _mm512_cmpeq_epi8_mask( _mm512_and_si512( chunk,
    TWO_MASK ), TWO_HEAD ) & nzMask,
    ˙ ˙ ˙ ˙ ˙ ˙ three = _mm512_cmpeq_epi8_mask( _mm512_and_si512( chunk, THREE_MASK ), THREE_HEAD ) & nzMask,
    ˙ ˙ ˙ ˙ ˙ ˙ four = _mm512_cmpeq_epi8_mask( _mm512_and_si512( chunk, FOUR_MASK ), FOUR_HEAD ) & nzMask;
    ˙ ˙ ˙ ˙ count += _mm_popcnt_u64( one ) + _mm_popcnt_u64( two ) + _mm_popcnt_u64( three ) + _mm_popcnt_u64( four );
    ˙ ˙ };
    ˙ ˙ chunk = _mm512_loadu_si512( s );
    ˙ ˙ unsigned head = (unsigned)(begin - base);
    ˙ ˙ nzMask = ~_mm512_cmpeq_epi8_mask( chunk, ZERO ) >> head;
    ˙ ˙ unsigned ones = countr_one( nzMask );
    ˙ ˙ nzMask &= ones < 64 ? (1ull << ones) - 1 : -1;
    ˙ ˙ nzMask <<= head;
    ˙ ˙ doChunk();
    ˙ ˙ if( (int64_t)nzMask >= 0 )
    ˙ ˙ ˙ ˙ return count;
    ˙ ˙ for( ; ; )
    ˙ ˙ {
    ˙ ˙ ˙ ˙ s += 64;
    ˙ ˙ ˙ ˙ chunk = _mm512_loadu_si512( s );
    ˙ ˙ ˙ ˙ nzMask = ~_mm512_cmpeq_epi8_mask( chunk, ZERO );
    ˙ ˙ ˙ ˙ ones = countr_one( nzMask );
    ˙ ˙ ˙ ˙ nzMask = ones < 64 ? (1ull << ones) - 1 : -1;
    ˙ ˙ ˙ ˙ if( !nzMask )
    ˙ ˙ ˙ ˙ ˙ ˙ break;
    ˙ ˙ ˙ ˙ doChunk();
    ˙ ˙ }
    ˙ ˙ return count;
    }


    Doesn't compile, even after I add suitable *intrin headers.

    I took out L_FORCEINLINE (not recognised); added std:: to countr_one,
    but it still gave me errors like this:

    C:/tdm/lib/gcc/x86_64-w64-mingw32/14.1.0/include/popcntintrin.h: In
    lambda function: C:/tdm/lib/gcc/x86_64-w64-mingw32/14.1.0/include/popcntintrin.h:42:1:
    error: inlining failed in call to 'always_inline' 'long long int _mm_popcnt_u64(long long unsigned int)': target specific option mismatch
    42 | _mm_popcnt_u64 (unsigned long long __X)
    | ^~~~~~~~~~~~~~


    You have to give complete compilable code or have only simple
    dependencies like stdio.h.



    --- PyGate Linux v1.5.1
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Bonita Montero@3:633/10 to All on Sat Nov 22 15:08:16 2025
    Am 22.11.2025 um 14:38 schrieb bart:
    Doesn't compile, even after I add suitable *intrin headers.
    I took out L_FORCEINLINE (not recognised); added std:: to countr_one,
    but it still gave me errors like this: C:/tdm/lib/gcc/x86_64-w64-mingw32/14.1.0/include/popcntintrin.h: In
    lambda function: C:/tdm/lib/gcc/x86_64-w64-mingw32/14.1.0/include/popcntintrin.h:42:1:
    error: inlining failed in call to 'always_inline' 'long long int _mm_popcnt_u64(long long unsigned int)': target specific option mismatch
    ˙˙ 42 | _mm_popcnt_u64 (unsigned long long __X)
    ˙˙˙˙˙ | ^~~~~~~~~~~~~~
    You have to give complete compilable code or have only simple
    dependencies like stdio.h.

    Try __attribute__((always_inline)) instead. The code requires enabled
    AVX512 compilation
    with g++ and a AVX512-compatible CPU (Intel since Skylake-X Xeons, AMD
    since Zen4).
    If you want to test for an older CPU you can stick with the below code,
    which is AVX2.
    On my CPU this is only seven times faster than yours. AVX-512 really
    rocks the house.

    size_t utf8Width256( const char *s )
    {
    ˙ ˙ __m256i const
    ˙ ˙ ˙ ˙ ZERO = _mm256_setzero_si256(),
    ˙ ˙ ˙ ˙ ONE_MASK = _mm256_set1_epi8( (char)0x80 ),
    ˙ ˙ ˙ ˙ ONE_HEAD = ZERO,
    ˙ ˙ ˙ ˙ TWO_MASK = _mm256_set1_epi8( (char)0xE0 ),
    ˙ ˙ ˙ ˙ TWO_HEAD = _mm256_set1_epi8( (char)0xC0 ),
    ˙ ˙ ˙ ˙ THREE_MASK = _mm256_set1_epi8( (char)0xF0 ),
    ˙ ˙ ˙ ˙ THREE_HEAD = _mm256_set1_epi8( (char)0xE0 ),
    ˙ ˙ ˙ ˙ FOUR_MASK = _mm256_set1_epi8( (char)0xF8 ),
    ˙ ˙ ˙ ˙ FOUR_HEAD = _mm256_set1_epi8( (char)0xF0 );
    ˙ ˙ uintptr_t
    ˙ ˙ ˙ ˙ begin = (uintptr_t)s,
    ˙ ˙ ˙ ˙ base = begin & -32;
    ˙ ˙ s = (char *)base;
    ˙ ˙ size_t count = 0;
    ˙ ˙ __m256i chunk;
    ˙ ˙ uint32_t nzMask;
    ˙ ˙ auto doChunk = [&]() L_FORCEINLINE
    ˙ ˙ {
    ˙ ˙ ˙ ˙ uint32_t
    ˙ ˙ ˙ ˙ ˙ ˙ one = _mm256_movemask_epi8( _mm256_cmpeq_epi8(
    _mm256_and_si256( chunk, ONE_MASK ), ONE_HEAD ) ) & nzMask,
    ˙ ˙ ˙ ˙ ˙ ˙ two = _mm256_movemask_epi8( _mm256_cmpeq_epi8(
    _mm256_and_si256( chunk, TWO_MASK ), TWO_HEAD ) ) & nzMask,
    ˙ ˙ ˙ ˙ ˙ ˙ three = _mm256_movemask_epi8( _mm256_cmpeq_epi8( _mm256_and_si256( chunk, THREE_MASK ), THREE_HEAD ) ) & nzMask,
    ˙ ˙ ˙ ˙ ˙ ˙ four = _mm256_movemask_epi8( _mm256_cmpeq_epi8(
    _mm256_and_si256( chunk, FOUR_MASK ), FOUR_HEAD ) ) & nzMask;
    ˙ ˙ ˙ ˙ count += _mm_popcnt_u64( one ) + _mm_popcnt_u64( two ) + _mm_popcnt_u64( three ) + _mm_popcnt_u64( four );
    ˙ ˙ };

    ˙ ˙ chunk = _mm256_loadu_si256( (__m256i *)s );
    ˙ ˙ unsigned head = (unsigned)(begin - base);
    ˙ ˙ nzMask = ~_mm256_movemask_epi8( _mm256_cmpeq_epi8( chunk, ZERO ) )
    head;
    ˙ ˙ unsigned ones = countr_one( nzMask );
    ˙ ˙ nzMask &= ones < 32 ? (1ull << ones) - 1 : -1;
    ˙ ˙ nzMask <<= head;
    ˙ ˙ doChunk();
    ˙ ˙ if( (int32_t)nzMask >= 0 )
    ˙ ˙ ˙ ˙ return count;
    ˙ ˙ for( ; ; )
    ˙ ˙ {
    ˙ ˙ ˙ ˙ s += 32;
    ˙ ˙ ˙ ˙ chunk = _mm256_loadu_si256( (__m256i *)s );
    ˙ ˙ ˙ ˙ nzMask = ~_mm256_movemask_epi8( _mm256_cmpeq_epi8( chunk, ZERO ) );
    ˙ ˙ ˙ ˙ ones = countr_one( nzMask );
    ˙ ˙ ˙ ˙ nzMask = ones < 32 ? (1ull << ones) - 1 : -1;
    ˙ ˙ ˙ ˙ if( !nzMask )
    ˙ ˙ ˙ ˙ ˙ ˙ break;
    ˙ ˙ ˙ ˙ doChunk();
    ˙ ˙ }
    ˙ ˙ return count;
    }




    --- PyGate Linux v1.5.1
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From bart@3:633/10 to All on Sat Nov 22 14:28:11 2025
    On 22/11/2025 14:08, Bonita Montero wrote:
    Am 22.11.2025 um 14:38 schrieb bart:
    Doesn't compile, even after I add suitable *intrin headers.
    I took out L_FORCEINLINE (not recognised); added std:: to countr_one,
    but it still gave me errors like this:
    C:/tdm/lib/gcc/x86_64-w64-mingw32/14.1.0/include/popcntintrin.h: In
    lambda function:
    C:/tdm/lib/gcc/x86_64-w64-mingw32/14.1.0/include/popcntintrin.h:42:1:
    error: inlining failed in call to 'always_inline' 'long long int
    _mm_popcnt_u64(long long unsigned int)': target specific option mismatch
    ˙˙ 42 | _mm_popcnt_u64 (unsigned long long __X)
    ˙˙˙˙˙ | ^~~~~~~~~~~~~~
    You have to give complete compilable code or have only simple
    dependencies like stdio.h.

    Try __attribute__((always_inline)) instead. The code requires enabled
    AVX512 compilation
    with g++ and a AVX512-compatible CPU (Intel since Skylake-X Xeons, AMD
    since Zen4).
    If you want to test for an older CPU you can stick with the below code, which is AVX2.

    Still doesn't work. I'm using g++ 14.1.0. It doesn't like 'countr_one'
    with or without std::

    Would it hurt to post a complete, compilable program? Plus the compiler invocation if it needs anything unusual.

    It only needs a minimal main() routine which I can tweak to my test
    input. Unless all it needs to use it is a call to utf8Width("abc") which returns a simple integer.

    But ATM my C version is still faster!



    --- PyGate Linux v1.5.1
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Bonita Montero@3:633/10 to All on Sat Nov 22 15:51:07 2025
    Am 22.11.2025 um 15:28 schrieb bart:
    Still doesn't work. I'm using g++ 14.1.0. It doesn't like 'countr_one'
    with or without std::
    -std=c++20
    Would it hurt to post a complete, compilable program? Plus the
    compiler invocation if it needs anything unusual.
    I'm using Visual C++ or clang-cl (MSVC-compatible clang).
    I guess with g++ / clang you need <x86intrin.h>
    It only needs a minimal main() routine which I can tweak to my test
    input. Unless all it needs to use it is a call to utf8Width("abc")
    which returns a simple integer.
    It works the same as your code, i.e. it takes a char-pointer.
    But ATM my C version is still faster!
    For sure not that fast as my AVX (seven times) / AVX-512 (13,5 times)
    version.

    --- PyGate Linux v1.5.1
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Bonita Montero@3:633/10 to All on Sat Nov 22 16:05:51 2025
    Take this and -mavx512bw and -std=c++23.

    #include <iostream>
    #include <string_view>
    #include <bit>
    #include <algorithm>
    #include <random>
    #include <array>
    #include <span>
    #include <chrono>
    #if defined(_MSC_VER)
    ˙ ˙ #include <intrin.h>
    #elif defined(__GNUC__) || defined(__clang__)
    ˙ ˙ #include <x86intrin.h>
    #endif
    #include "inline.h"

    #if defined(_MSC_VER) && !defined(__clang__)
    ˙ ˙ #pragma warning(disable: 26815) // dangling pointer
    #endif

    using namespace std;
    using namespace chrono;

    template<bool Validate = false, typename View>
    ˙ ˙ requires std::same_as<View, string_view> || std::same_as<View, u8string_view>
    NOINLINE size_t utf8Width( View str )
    {
    ˙ ˙ ptrdiff_t rem = str.end() - str.begin(), w = 0, width;
    ˙ ˙ for( auto it = str.begin(); rem > 0; rem -= width, ++w ) [[likely]]
    ˙ ˙ {
    ˙ ˙ ˙ ˙ width = countl_one( (unsigned char)*it );
    ˙ ˙ ˙ ˙ width += (size_t)!width;
    ˙ ˙ ˙ ˙ if constexpr( Validate )
    ˙ ˙ ˙ ˙ ˙ ˙ if( (*it & 0xC0) == 0x80 || width > min( 4Z, rem ) )
    [[unlikely]]
    ˙ ˙ ˙ ˙ ˙ ˙ ˙ ˙ return -1;
    ˙ ˙ ˙ ˙ auto end = it + width;
    ˙ ˙ ˙ ˙ if constexpr( !Validate )
    ˙ ˙ ˙ ˙ ˙ ˙ it = end;
    ˙ ˙ ˙ ˙ else
    ˙ ˙ ˙ ˙ ˙ ˙ while( ++it != end )
    ˙ ˙ ˙ ˙ ˙ ˙ ˙ ˙ if( (*it & 0xC0) != 0x80 )
    ˙ ˙ ˙ ˙ ˙ ˙ ˙ ˙ ˙ ˙ return -1;
    ˙ ˙ }
    ˙ ˙ if constexpr( Validate )
    ˙ ˙ ˙ ˙ if( rem )
    ˙ ˙ ˙ ˙ ˙ ˙ return -1;
    ˙ ˙ return w;
    }

    NOINLINE size_t utf8widthC( char const *str )
    {
    ˙ ˙ size_t length = 0, n;
    ˙ ˙ for( char8_t c; (c = *str); ++length )
    ˙ ˙ {
    ˙ ˙ ˙ ˙ if( (c & 0x80) == 0 )
    ˙ ˙ ˙ ˙ ˙ ˙ n = 1;
    ˙ ˙ ˙ ˙ else if( (c & 0xE0) == 0xC0 )
    ˙ ˙ ˙ ˙ ˙ ˙ n = 2;
    ˙ ˙ ˙ ˙ else if( (c & 0xF0) == 0xE0 )
    ˙ ˙ ˙ ˙ ˙ ˙ n = 3;
    ˙ ˙ ˙ ˙ else
    ˙ ˙ ˙ ˙ ˙ ˙ n = 4;
    ˙ ˙ ˙ ˙ n += (size_t)!n;
    ˙ ˙ ˙ ˙ str += n;
    ˙ ˙ }
    ˙ ˙ return length;
    }

    NOINLINE size_t utf8Width512( const char *s )
    {
    ˙ ˙ __m512i const
    ˙ ˙ ˙ ˙ ZERO = _mm512_setzero_si512(),
    ˙ ˙ ˙ ˙ ONE_MASK = _mm512_set1_epi8( (char)0x80 ),
    ˙ ˙ ˙ ˙ ONE_HEAD = ZERO,
    ˙ ˙ ˙ ˙ TWO_MASK = _mm512_set1_epi8( (char)0xE0 ),
    ˙ ˙ ˙ ˙ TWO_HEAD = _mm512_set1_epi8( (char)0xC0 ),
    ˙ ˙ ˙ ˙ THREE_MASK = _mm512_set1_epi8( (char)0xF0 ),
    ˙ ˙ ˙ ˙ THREE_HEAD = _mm512_set1_epi8( (char)0xE0 ),
    ˙ ˙ ˙ ˙ FOUR_MASK = _mm512_set1_epi8( (char)0xF8 ),
    ˙ ˙ ˙ ˙ FOUR_HEAD = _mm512_set1_epi8( (char)0xF0 );
    ˙ ˙ uintptr_t
    ˙ ˙ ˙ ˙ begin = (uintptr_t)s,
    ˙ ˙ ˙ ˙ base = begin & -64;
    ˙ ˙ s = (char *)base;
    ˙ ˙ size_t count = 0;
    ˙ ˙ __m512i chunk;
    ˙ ˙ uint64_t nzMask;
    ˙ ˙ auto doChunk = [&]() L_FORCEINLINE
    ˙ ˙ {
    ˙ ˙ ˙ ˙ uint64_t
    ˙ ˙ ˙ ˙ ˙ ˙ one = _mm512_cmpeq_epi8_mask( _mm512_and_si512( chunk,
    ONE_MASK ), ONE_HEAD ) & nzMask,
    ˙ ˙ ˙ ˙ ˙ ˙ two = _mm512_cmpeq_epi8_mask( _mm512_and_si512( chunk,
    TWO_MASK ), TWO_HEAD ) & nzMask,
    ˙ ˙ ˙ ˙ ˙ ˙ three = _mm512_cmpeq_epi8_mask( _mm512_and_si512( chunk, THREE_MASK ), THREE_HEAD ) & nzMask,
    ˙ ˙ ˙ ˙ ˙ ˙ four = _mm512_cmpeq_epi8_mask( _mm512_and_si512( chunk,
    FOUR_MASK ), FOUR_HEAD ) & nzMask;
    ˙ ˙ ˙ ˙ count += _mm_popcnt_u64( one ) + _mm_popcnt_u64( two ) + _mm_popcnt_u64( three ) + _mm_popcnt_u64( four );
    ˙ ˙ };
    ˙ ˙ chunk = _mm512_loadu_si512( s );
    ˙ ˙ unsigned head = (unsigned)(begin - base);
    ˙ ˙ nzMask = ~_mm512_cmpeq_epi8_mask( chunk, ZERO ) >> head;
    ˙ ˙ unsigned ones = countr_one( nzMask );
    ˙ ˙ nzMask &= ones < 64 ? (1ull << ones) - 1 : -1;
    ˙ ˙ nzMask <<= head;
    ˙ ˙ doChunk();
    ˙ ˙ if( (int64_t)nzMask >= 0 )
    ˙ ˙ ˙ ˙ return count;
    ˙ ˙ for( ; ; )
    ˙ ˙ {
    ˙ ˙ ˙ ˙ s += 64;
    ˙ ˙ ˙ ˙ chunk = _mm512_loadu_si512( s );
    ˙ ˙ ˙ ˙ nzMask = ~_mm512_cmpeq_epi8_mask( chunk, ZERO );
    ˙ ˙ ˙ ˙ ones = countr_one( nzMask );
    ˙ ˙ ˙ ˙ nzMask = ones < 64 ? (1ull << ones) - 1 : -1;
    ˙ ˙ ˙ ˙ if( !nzMask )
    ˙ ˙ ˙ ˙ ˙ ˙ break;
    ˙ ˙ ˙ ˙ doChunk();
    ˙ ˙ }
    ˙ ˙ return count;
    }

    NOINLINE size_t utf8Width256( const char *s )
    {
    ˙ ˙ __m256i const
    ˙ ˙ ˙ ˙ ZERO = _mm256_setzero_si256(),
    ˙ ˙ ˙ ˙ ONE_MASK = _mm256_set1_epi8( (char)0x80 ),
    ˙ ˙ ˙ ˙ ONE_HEAD = ZERO,
    ˙ ˙ ˙ ˙ TWO_MASK = _mm256_set1_epi8( (char)0xE0 ),
    ˙ ˙ ˙ ˙ TWO_HEAD = _mm256_set1_epi8( (char)0xC0 ),
    ˙ ˙ ˙ ˙ THREE_MASK = _mm256_set1_epi8( (char)0xF0 ),
    ˙ ˙ ˙ ˙ THREE_HEAD = _mm256_set1_epi8( (char)0xE0 ),
    ˙ ˙ ˙ ˙ FOUR_MASK = _mm256_set1_epi8( (char)0xF8 ),
    ˙ ˙ ˙ ˙ FOUR_HEAD = _mm256_set1_epi8( (char)0xF0 );
    ˙ ˙ uintptr_t
    ˙ ˙ ˙ ˙ begin = (uintptr_t)s,
    ˙ ˙ ˙ ˙ base = begin & -32;
    ˙ ˙ s = (char *)base;
    ˙ ˙ size_t count = 0;
    ˙ ˙ __m256i chunk;
    ˙ ˙ uint32_t nzMask;
    ˙ ˙ auto doChunk = [&]() L_FORCEINLINE
    ˙ ˙ {
    ˙ ˙ ˙ ˙ uint32_t
    ˙ ˙ ˙ ˙ ˙ ˙ one = _mm256_movemask_epi8( _mm256_cmpeq_epi8(
    _mm256_and_si256( chunk, ONE_MASK ), ONE_HEAD ) ) & nzMask,
    ˙ ˙ ˙ ˙ ˙ ˙ two = _mm256_movemask_epi8( _mm256_cmpeq_epi8(
    _mm256_and_si256( chunk, TWO_MASK ), TWO_HEAD ) ) & nzMask,
    ˙ ˙ ˙ ˙ ˙ ˙ three = _mm256_movemask_epi8( _mm256_cmpeq_epi8( _mm256_and_si256( chunk, THREE_MASK ), THREE_HEAD ) ) & nzMask,
    ˙ ˙ ˙ ˙ ˙ ˙ four = _mm256_movemask_epi8( _mm256_cmpeq_epi8(
    _mm256_and_si256( chunk, FOUR_MASK ), FOUR_HEAD ) ) & nzMask;
    ˙ ˙ ˙ ˙ count += _mm_popcnt_u64( one ) + _mm_popcnt_u64( two ) + _mm_popcnt_u64( three ) + _mm_popcnt_u64( four );
    ˙ ˙ };
    ˙ ˙ chunk = _mm256_loadu_si256( (__m256i *)s );
    ˙ ˙ unsigned head = (unsigned)(begin - base);
    ˙ ˙ nzMask = ~_mm256_movemask_epi8( _mm256_cmpeq_epi8( chunk, ZERO ) )
    head;
    ˙ ˙ unsigned ones = countr_one( nzMask );
    ˙ ˙ nzMask &= ones < 32 ? (1ull << ones) - 1 : -1;
    ˙ ˙ nzMask <<= head;
    ˙ ˙ doChunk();
    ˙ ˙ if( (int32_t)nzMask >= 0 )
    ˙ ˙ ˙ ˙ return count;
    ˙ ˙ for( ; ; )
    ˙ ˙ {
    ˙ ˙ ˙ ˙ s += 32;
    ˙ ˙ ˙ ˙ chunk = _mm256_loadu_si256( (__m256i *)s );
    ˙ ˙ ˙ ˙ nzMask = ~_mm256_movemask_epi8( _mm256_cmpeq_epi8( chunk, ZERO ) );
    ˙ ˙ ˙ ˙ ones = countr_one( nzMask );
    ˙ ˙ ˙ ˙ nzMask = ones < 32 ? (1ull << ones) - 1 : -1;
    ˙ ˙ ˙ ˙ if( !nzMask )
    ˙ ˙ ˙ ˙ ˙ ˙ break;
    ˙ ˙ ˙ ˙ doChunk();
    ˙ ˙ }
    ˙ ˙ return count;
    }

    int main()
    {
    ˙ ˙ constexpr unsigned
    ˙ ˙ ˙ ˙ TYPE1_BITS = 7,
    ˙ ˙ ˙ ˙ TYPE2_BITS = 11,
    ˙ ˙ ˙ ˙ TYPE3_BITS = 16,
    ˙ ˙ ˙ ˙ TYPE4_BITS = 21;
    ˙ ˙ constexpr char32_t
    ˙ ˙ ˙ ˙ TYPE1_END = 1 << TYPE1_BITS,
    ˙ ˙ ˙ ˙ TYPE2_END = 1 << TYPE2_BITS,
    ˙ ˙ ˙ ˙ TYPE3_END = 1 << TYPE3_BITS,
    ˙ ˙ ˙ ˙ TYPE4_END = 1 << TYPE4_BITS;
    ˙ ˙ using urand = uniform_int_distribution<unsigned>;
    ˙ ˙ mt19937_64 mt;
    ˙ ˙ uniform_int_distribution<size_t> rndWidth( 1, 4 );
    ˙ ˙ urand rawRanges[4] =
    ˙ ˙ {
    ˙ ˙ ˙ ˙ urand( 1, TYPE1_END - 1 ),
    ˙ ˙ ˙ ˙ urand( TYPE1_END, TYPE2_END - 1 ),
    ˙ ˙ ˙ ˙ urand( TYPE2_END, TYPE3_END - 1 ),
    ˙ ˙ ˙ ˙ urand( TYPE3_END, TYPE4_END - 1 )
    ˙ ˙ };
    ˙ ˙ span ranges( rawRanges );
    ˙ ˙ char8_t rawTypeHeads[4] { 0, 0xC0, 0xE0, 0xF0 };
    ˙ ˙ span typeHeads( rawTypeHeads );
    ˙ ˙ constexpr size_t BUF_MIN = 0x10000;
    ˙ ˙ u8string u8Str( BUF_MIN + 3, (char8_t)0 );
    ˙ ˙ using u8s_it = u8string::iterator;
    ˙ ˙ u8s_it
    ˙ ˙ ˙ ˙ itChar = u8Str.begin(),
    ˙ ˙ ˙ ˙ itCharEnd = itChar + BUF_MIN;
    ˙ ˙ for( size_t width, type; itChar < itCharEnd; itChar += width )
    ˙ ˙ {
    ˙ ˙ ˙ ˙ width = rndWidth( mt );
    ˙ ˙ ˙ ˙ type = width - 1;
    ˙ ˙ ˙ ˙ char32_t c = (ranges[type])( mt );
    ˙ ˙ ˙ ˙ for( u8s_it itTail = itChar + width; --itTail > itChar; c >>= 6 )
    ˙ ˙ ˙ ˙ ˙ ˙ *itTail = (char8_t)(0x80 | c & 0x3F);
    ˙ ˙ ˙ ˙ *itChar = typeHeads[type] | (char8_t)c;
    ˙ ˙ }
    ˙ ˙ u8Str.resize( itChar - u8Str.begin() );
    #if defined(NDEBUG)
    ˙ ˙ constexpr size_t ROUNDS = 100'000;
    #else
    ˙ ˙ constexpr size_t ROUNDS = 1'000;
    #endif
    ˙ ˙ auto bench = [&]( char const *what, auto fn )
    ˙ ˙ {
    ˙ ˙ ˙ ˙ auto start = high_resolution_clock::now();
    ˙ ˙ ˙ ˙ for( size_t r = ROUNDS; r; --r )
    ˙ ˙ ˙ ˙ ˙ ˙ fn( u8Str );
    ˙ ˙ ˙ ˙ double secs = (double)duration_cast<nanoseconds>( high_resolution_clock::now() - start ).count() / 1.0e9;
    ˙ ˙ ˙ ˙ cout << what << secs << endl;
    ˙ ˙ };
    ˙ ˙ size_t (*volatile utf8widthC)( char const * ) = ::utf8widthC;
    ˙ ˙ size_t (*volatile utf8Width256)(const char *s) = ::utf8Width256;
    ˙ ˙ size_t (*volatile utf8Width512)(const char *s) = ::utf8Width512;
    ˙ ˙ size_t total = 0;
    ˙ ˙ bench( "my: ", [&]( u8string const &str ) { total += utf8Width256(
    (char *)str.c_str() ); } );
    ˙ ˙ bench( "nerd: ", [&]( u8string const &str ) { total += utf8widthC(
    (char *)str.c_str() ); } );
    ˙ ˙ return (int)total;

    }


    --- PyGate Linux v1.5.1
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From bart@3:633/10 to All on Sat Nov 22 16:35:13 2025
    On 22/11/2025 15:05, Bonita Montero wrote:
    Take this and -mavx512bw and -std=c++23.

    #include <iostream>
    #include <string_view>
    #include <bit>
    #include <algorithm>
    #include <random>
    #include <array>
    #include <span>
    #include <chrono>
    #if defined(_MSC_VER)
    ˙ ˙ #include <intrin.h>
    #elif defined(__GNUC__) || defined(__clang__)
    ˙ ˙ #include <x86intrin.h>
    #endif
    #include "inline.h"

    I don't have 'inline.h'. If I comment that out, then I get the errors
    below from 'g++ -std=c++23 prog.c', also with -Wno-inline.

    Your code seems incredibly fragile.

    c.cpp: In function 'size_t utf8Width512(const char*)':
    c.cpp:72:37: warning: AVX512F vector return without AVX512F enabled
    changes the ABI [-Wpsabi]
    72 | ZERO = _mm512_setzero_si512(),
    | ^
    c.cpp: In function 'size_t utf8Width256(const char*)':
    c.cpp:123:37: warning: AVX vector return without AVX enabled changes the
    ABI [-Wpsabi]
    123 | ZERO = _mm256_setzero_si256(),
    | ^
    In file included from C:/tdm/lib/gcc/x86_64-w64-mingw32/14.1.0/include/x86gprintrin.h:73,
    from C:/tdm/lib/gcc/x86_64-w64-mingw32/14.1.0/include/x86intrin.h:27,
    from c.cpp:13: C:/tdm/lib/gcc/x86_64-w64-mingw32/14.1.0/include/popcntintrin.h: In
    lambda function: C:/tdm/lib/gcc/x86_64-w64-mingw32/14.1.0/include/popcntintrin.h:42:1:
    error: inlining failed in call to 'always_inline' 'long long int _mm_popcnt_u64(long long unsigned int)': target specific option mismatch
    42 | _mm_popcnt_u64 (unsigned long long __X)
    | ^~~~~~~~~~~~~~
    c.cpp:95:106: note: called from here
    95 | count += _mm_popcnt_u64( one ) + _mm_popcnt_u64( two )
    + _mm_popcnt_u64( three ) + _mm_popcnt_u64( four );
    |
    ~~~~~~~~~~~~~~^~~~~~~~ C:/tdm/lib/gcc/x86_64-w64-mingw32/14.1.0/include/popcntintrin.h:42:1:
    error: inlining failed in call to 'always_inline' 'long long int _mm_popcnt_u64(long long unsigned int)': target specific option mismatch
    42 | _mm_popcnt_u64 (unsigned long long __X)
    | ^~~~~~~~~~~~~~
    c.cpp:95:80: note: called from here
    95 | count += _mm_popcnt_u64( one ) + _mm_popcnt_u64( two )
    + _mm_popcnt_u64( three ) + _mm_popcnt_u64( four );
    |
    ~~~~~~~~~~~~~~^~~~~~~~~ C:/tdm/lib/gcc/x86_64-w64-mingw32/14.1.0/include/popcntintrin.h:42:1:
    error: inlining failed in call to 'always_inline' 'long long int _mm_popcnt_u64(long long unsigned int)': target specific option mismatch
    42 | _mm_popcnt_u64 (unsigned long long __X)
    | ^~~~~~~~~~~~~~
    c.cpp:95:56: note: called from here
    95 | count += _mm_popcnt_u64( one ) + _mm_popcnt_u64( two )
    + _mm_popcnt_u64( three ) + _mm_popcnt_u64( four );
    | ~~~~~~~~~~~~~~^~~~~~~ C:/tdm/lib/gcc/x86_64-w64-mingw32/14.1.0/include/popcntintrin.h:42:1:
    error: inlining failed in call to 'always_inline' 'long long int _mm_popcnt_u64(long long unsigned int)': target specific option mismatch
    42 | _mm_popcnt_u64 (unsigned long long __X)
    | ^~~~~~~~~~~~~~
    c.cpp:95:32: note: called from here
    95 | count += _mm_popcnt_u64( one ) + _mm_popcnt_u64( two )
    + _mm_popcnt_u64( three ) + _mm_popcnt_u64( four );
    | ~~~~~~~~~~~~~~^~~~~~~
    In file included from C:/tdm/lib/gcc/x86_64-w64-mingw32/14.1.0/include/immintrin.h:65,
    from C:/tdm/lib/gcc/x86_64-w64-mingw32/14.1.0/include/x86intrin.h:32: C:/tdm/lib/gcc/x86_64-w64-mingw32/14.1.0/include/avx512bwintrin.h:1716:1: error: inlining failed in call to 'always_inline' '__mmask64 _mm512_cmpeq_epi8_mask(__m512i, __m512i)': target specific option mismatch
    1716 | _mm512_cmpeq_epi8_mask (__m512i __A, __m512i __B)
    | ^~~~~~~~~~~~~~~~~~~~~~
    c.cpp:94:42: note: called from here
    94 | four = _mm512_cmpeq_epi8_mask( _mm512_and_si512(
    chunk, FOUR_MASK ), FOUR_HEAD ) & nzMask;
    | ~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    In file included from C:/tdm/lib/gcc/x86_64-w64-mingw32/14.1.0/include/immintrin.h:55: C:/tdm/lib/gcc/x86_64-w64-mingw32/14.1.0/include/avx512fintrin.h:10651:1: error: inlining failed in call to 'always_inline' '__m512i _mm512_and_si512(__m512i, __m512i)': target specific option mismatch
    10651 | _mm512_and_si512 (__m512i __A, __m512i __B)
    | ^~~~~~~~~~~~~~~~
    c.cpp:94:42: note: called from here
    94 | four = _mm512_cmpeq_epi8_mask( _mm512_and_si512(
    chunk, FOUR_MASK ), FOUR_HEAD ) & nzMask;
    | ~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ C:/tdm/lib/gcc/x86_64-w64-mingw32/14.1.0/include/avx512bwintrin.h:1716:1: error: inlining failed in call to 'always_inline' '__mmask64 _mm512_cmpeq_epi8_mask(__m512i, __m512i)': target specific option mismatch
    1716 | _mm512_cmpeq_epi8_mask (__m512i __A, __m512i __B)
    | ^~~~~~~~~~~~~~~~~~~~~~
    c.cpp:93:43: note: called from here
    93 | three = _mm512_cmpeq_epi8_mask( _mm512_and_si512(
    chunk, THREE_MASK ), THREE_HEAD ) & nzMask,
    | ~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ C:/tdm/lib/gcc/x86_64-w64-mingw32/14.1.0/include/avx512fintrin.h:10651:1: error: inlining failed in call to 'always_inline' '__m512i _mm512_and_si512(__m512i, __m512i)': target specific option mismatch
    10651 | _mm512_and_si512 (__m512i __A, __m512i __B)
    | ^~~~~~~~~~~~~~~~
    c.cpp:93:43: note: called from here
    93 | three = _mm512_cmpeq_epi8_mask( _mm512_and_si512(
    chunk, THREE_MASK ), THREE_HEAD ) & nzMask,
    | ~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ C:/tdm/lib/gcc/x86_64-w64-mingw32/14.1.0/include/avx512bwintrin.h:1716:1: error: inlining failed in call to 'always_inline' '__mmask64 _mm512_cmpeq_epi8_mask(__m512i, __m512i)': target specific option mismatch
    1716 | _mm512_cmpeq_epi8_mask (__m512i __A, __m512i __B)
    | ^~~~~~~~~~~~~~~~~~~~~~
    c.cpp:92:41: note: called from here
    92 | two = _mm512_cmpeq_epi8_mask( _mm512_and_si512(
    chunk, TWO_MASK ), TWO_HEAD ) & nzMask,
    | ~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ C:/tdm/lib/gcc/x86_64-w64-mingw32/14.1.0/include/avx512fintrin.h:10651:1: error: inlining failed in call to 'always_inline' '__m512i _mm512_and_si512(__m512i, __m512i)': target specific option mismatch
    10651 | _mm512_and_si512 (__m512i __A, __m512i __B)
    | ^~~~~~~~~~~~~~~~
    c.cpp:92:41: note: called from here
    92 | two = _mm512_cmpeq_epi8_mask( _mm512_and_si512(
    chunk, TWO_MASK ), TWO_HEAD ) & nzMask,
    | ~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ C:/tdm/lib/gcc/x86_64-w64-mingw32/14.1.0/include/avx512bwintrin.h:1716:1: error: inlining failed in call to 'always_inline' '__mmask64 _mm512_cmpeq_epi8_mask(__m512i, __m512i)': target specific option mismatch
    1716 | _mm512_cmpeq_epi8_mask (__m512i __A, __m512i __B)
    | ^~~~~~~~~~~~~~~~~~~~~~
    c.cpp:91:41: note: called from here
    91 | one = _mm512_cmpeq_epi8_mask( _mm512_and_si512(
    chunk, ONE_MASK ), ONE_HEAD ) & nzMask,
    | ~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ C:/tdm/lib/gcc/x86_64-w64-mingw32/14.1.0/include/avx512fintrin.h:10651:1: error: inlining failed in call to 'always_inline' '__m512i _mm512_and_si512(__m512i, __m512i)': target specific option mismatch
    10651 | _mm512_and_si512 (__m512i __A, __m512i __B)
    | ^~~~~~~~~~~~~~~~
    c.cpp:91:41: note: called from here
    91 | one = _mm512_cmpeq_epi8_mask( _mm512_and_si512(
    chunk, ONE_MASK ), ONE_HEAD ) & nzMask,
    | ~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    --- PyGate Linux v1.5.1
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Bonita Montero@3:633/10 to All on Sat Nov 22 18:13:07 2025
    You can compile the code with -mavx512bw.
    This is "inline.h":

    #if !defined(INLINE_HEADER)
    ˙ ˙ #define INLINE_HEADER

    ˙ ˙ #if !defined(NOINLINE)
    ˙ ˙ ˙ ˙ #if defined(__GNUC__) || defined(__clang__)
    ˙ ˙ ˙ ˙ ˙ ˙ #define NOINLINE __attribute__((noinline))
    ˙ ˙ ˙ ˙ #elif defined(_MSC_VER)
    ˙ ˙ ˙ ˙ ˙ ˙ #define NOINLINE __declspec(noinline)
    ˙ ˙ ˙ ˙ #elif
    ˙ ˙ ˙ ˙ ˙ ˙ #define NOINLINE
    ˙ ˙ ˙ ˙ #endif
    ˙ ˙ #endif

    ˙ ˙ #if defined(__GNUC__)
    ˙ ˙ ˙ ˙ #pragma GCC diagnostic ignored "-Wattributes"
    ˙ ˙ #endif

    ˙ ˙ #if !defined(FORCEINLINE)
    ˙ ˙ ˙ ˙ #if (defined(__GNUC__) || defined(__clang__))
    ˙ ˙ ˙ ˙ ˙ ˙ #define FORCEINLINE __attribute__((always_inline)) inline
    ˙ ˙ ˙ ˙ #elif defined(_MSC_VER)
    ˙ ˙ ˙ ˙ ˙ ˙ #define FORCEINLINE __forceinline
    ˙ ˙ ˙ ˙ #elif
    ˙ ˙ ˙ ˙ ˙ ˙ #define FORCEINLINE inline
    ˙ ˙ ˙ ˙ #endif
    ˙ ˙ #endif

    ˙ ˙ #if !defined(L_FORCEINLINE)
    ˙ ˙ ˙ ˙ #if defined(__GNUC__) || defined(__clang__)
    ˙ ˙ ˙ ˙ ˙ ˙ #define L_FORCEINLINE __attribute__((always_inline))
    ˙ ˙ ˙ ˙ #elif defined(_MSC_VER)
    ˙ ˙ ˙ ˙ ˙ ˙ #define L_FORCEINLINE [[msvc::forceinline]]
    ˙ ˙ ˙ ˙ #elif
    ˙ ˙ ˙ ˙ ˙ ˙ #define L_FORCEINLINE
    ˙ ˙ ˙ ˙ #endif
    ˙ ˙ #endif
    #endif

    Am 22.11.2025 um 17:35 schrieb bart:
    On 22/11/2025 15:05, Bonita Montero wrote:
    Take this and -mavx512bw and -std=c++23.

    #include <iostream>
    #include <string_view>
    #include <bit>
    #include <algorithm>
    #include <random>
    #include <array>
    #include <span>
    #include <chrono>
    #if defined(_MSC_VER)
    ˙˙ ˙ #include <intrin.h>
    #elif defined(__GNUC__) || defined(__clang__)
    ˙˙ ˙ #include <x86intrin.h>
    #endif
    #include "inline.h"

    I don't have 'inline.h'. If I comment that out, then I get the errors
    below from 'g++ -std=c++23 prog.c', also with -Wno-inline.

    Your code seems incredibly fragile.

    c.cpp: In function 'size_t utf8Width512(const char*)':
    c.cpp:72:37: warning: AVX512F vector return without AVX512F enabled
    changes the ABI [-Wpsabi]
    ˙˙ 72 |˙˙˙˙˙˙˙˙ ZERO = _mm512_setzero_si512(),
    ˙˙˙˙˙ |˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙ ^
    c.cpp: In function 'size_t utf8Width256(const char*)':
    c.cpp:123:37: warning: AVX vector return without AVX enabled changes
    the ABI [-Wpsabi]
    ˙ 123 |˙˙˙˙˙˙˙˙ ZERO = _mm256_setzero_si256(),
    ˙˙˙˙˙ |˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙ ^
    In file included from C:/tdm/lib/gcc/x86_64-w64-mingw32/14.1.0/include/x86gprintrin.h:73,
    ˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙ from C:/tdm/lib/gcc/x86_64-w64-mingw32/14.1.0/include/x86intrin.h:27,
    ˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙ from c.cpp:13: C:/tdm/lib/gcc/x86_64-w64-mingw32/14.1.0/include/popcntintrin.h: In
    lambda function: C:/tdm/lib/gcc/x86_64-w64-mingw32/14.1.0/include/popcntintrin.h:42:1:
    error: inlining failed in call to 'always_inline' 'long long int _mm_popcnt_u64(long long unsigned int)': target specific option mismatch
    ˙˙ 42 | _mm_popcnt_u64 (unsigned long long __X)
    ˙˙˙˙˙ | ^~~~~~~~~~~~~~
    c.cpp:95:106: note: called from here
    ˙˙ 95 |˙˙˙˙˙˙˙˙ count += _mm_popcnt_u64( one ) + _mm_popcnt_u64( two )
    + _mm_popcnt_u64( three ) + _mm_popcnt_u64( four );
    ˙˙˙˙˙ | ˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙ ~~~~~~~~~~~~~~^~~~~~~~ C:/tdm/lib/gcc/x86_64-w64-mingw32/14.1.0/include/popcntintrin.h:42:1:
    error: inlining failed in call to 'always_inline' 'long long int _mm_popcnt_u64(long long unsigned int)': target specific option mismatch
    ˙˙ 42 | _mm_popcnt_u64 (unsigned long long __X)
    ˙˙˙˙˙ | ^~~~~~~~~~~~~~
    c.cpp:95:80: note: called from here
    ˙˙ 95 |˙˙˙˙˙˙˙˙ count += _mm_popcnt_u64( one ) + _mm_popcnt_u64( two )
    + _mm_popcnt_u64( three ) + _mm_popcnt_u64( four );
    ˙˙˙˙˙ | ˙~~~~~~~~~~~~~~^~~~~~~~~ C:/tdm/lib/gcc/x86_64-w64-mingw32/14.1.0/include/popcntintrin.h:42:1:
    error: inlining failed in call to 'always_inline' 'long long int _mm_popcnt_u64(long long unsigned int)': target specific option mismatch
    ˙˙ 42 | _mm_popcnt_u64 (unsigned long long __X)
    ˙˙˙˙˙ | ^~~~~~~~~~~~~~
    c.cpp:95:56: note: called from here
    ˙˙ 95 |˙˙˙˙˙˙˙˙ count += _mm_popcnt_u64( one ) + _mm_popcnt_u64( two )
    + _mm_popcnt_u64( three ) + _mm_popcnt_u64( four );
    ˙˙˙˙˙ | ~~~~~~~~~~~~~~^~~~~~~ C:/tdm/lib/gcc/x86_64-w64-mingw32/14.1.0/include/popcntintrin.h:42:1:
    error: inlining failed in call to 'always_inline' 'long long int _mm_popcnt_u64(long long unsigned int)': target specific option mismatch
    ˙˙ 42 | _mm_popcnt_u64 (unsigned long long __X)
    ˙˙˙˙˙ | ^~~~~~~~~~~~~~
    c.cpp:95:32: note: called from here
    ˙˙ 95 |˙˙˙˙˙˙˙˙ count += _mm_popcnt_u64( one ) + _mm_popcnt_u64( two )
    + _mm_popcnt_u64( three ) + _mm_popcnt_u64( four );
    ˙˙˙˙˙ |˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙ ~~~~~~~~~~~~~~^~~~~~~
    In file included from C:/tdm/lib/gcc/x86_64-w64-mingw32/14.1.0/include/immintrin.h:65,
    ˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙ from C:/tdm/lib/gcc/x86_64-w64-mingw32/14.1.0/include/x86intrin.h:32: C:/tdm/lib/gcc/x86_64-w64-mingw32/14.1.0/include/avx512bwintrin.h:1716:1: error: inlining failed in call to 'always_inline' '__mmask64 _mm512_cmpeq_epi8_mask(__m512i, __m512i)': target specific option
    mismatch
    ˙1716 | _mm512_cmpeq_epi8_mask (__m512i __A, __m512i __B)
    ˙˙˙˙˙ | ^~~~~~~~~~~~~~~~~~~~~~
    c.cpp:94:42: note: called from here
    ˙˙ 94 |˙˙˙˙˙˙˙˙˙˙˙˙ four = _mm512_cmpeq_epi8_mask( _mm512_and_si512(
    chunk, FOUR_MASK ), FOUR_HEAD ) & nzMask;
    ˙˙˙˙˙ | ~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    In file included from C:/tdm/lib/gcc/x86_64-w64-mingw32/14.1.0/include/immintrin.h:55: C:/tdm/lib/gcc/x86_64-w64-mingw32/14.1.0/include/avx512fintrin.h:10651:1: error: inlining failed in call to 'always_inline' '__m512i _mm512_and_si512(__m512i, __m512i)': target specific option mismatch
    10651 | _mm512_and_si512 (__m512i __A, __m512i __B)
    ˙˙˙˙˙ | ^~~~~~~~~~~~~~~~
    c.cpp:94:42: note: called from here
    ˙˙ 94 |˙˙˙˙˙˙˙˙˙˙˙˙ four = _mm512_cmpeq_epi8_mask( _mm512_and_si512(
    chunk, FOUR_MASK ), FOUR_HEAD ) & nzMask;
    ˙˙˙˙˙ | ~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ C:/tdm/lib/gcc/x86_64-w64-mingw32/14.1.0/include/avx512bwintrin.h:1716:1: error: inlining failed in call to 'always_inline' '__mmask64 _mm512_cmpeq_epi8_mask(__m512i, __m512i)': target specific option
    mismatch
    ˙1716 | _mm512_cmpeq_epi8_mask (__m512i __A, __m512i __B)
    ˙˙˙˙˙ | ^~~~~~~~~~~~~~~~~~~~~~
    c.cpp:93:43: note: called from here
    ˙˙ 93 |˙˙˙˙˙˙˙˙˙˙˙˙ three = _mm512_cmpeq_epi8_mask( _mm512_and_si512(
    chunk, THREE_MASK ), THREE_HEAD ) & nzMask,
    ˙˙˙˙˙ | ~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ C:/tdm/lib/gcc/x86_64-w64-mingw32/14.1.0/include/avx512fintrin.h:10651:1: error: inlining failed in call to 'always_inline' '__m512i _mm512_and_si512(__m512i, __m512i)': target specific option mismatch
    10651 | _mm512_and_si512 (__m512i __A, __m512i __B)
    ˙˙˙˙˙ | ^~~~~~~~~~~~~~~~
    c.cpp:93:43: note: called from here
    ˙˙ 93 |˙˙˙˙˙˙˙˙˙˙˙˙ three = _mm512_cmpeq_epi8_mask( _mm512_and_si512(
    chunk, THREE_MASK ), THREE_HEAD ) & nzMask,
    ˙˙˙˙˙ | ~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ C:/tdm/lib/gcc/x86_64-w64-mingw32/14.1.0/include/avx512bwintrin.h:1716:1: error: inlining failed in call to 'always_inline' '__mmask64 _mm512_cmpeq_epi8_mask(__m512i, __m512i)': target specific option
    mismatch
    ˙1716 | _mm512_cmpeq_epi8_mask (__m512i __A, __m512i __B)
    ˙˙˙˙˙ | ^~~~~~~~~~~~~~~~~~~~~~
    c.cpp:92:41: note: called from here
    ˙˙ 92 |˙˙˙˙˙˙˙˙˙˙˙˙ two = _mm512_cmpeq_epi8_mask( _mm512_and_si512(
    chunk, TWO_MASK ), TWO_HEAD ) & nzMask,
    ˙˙˙˙˙ | ~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ C:/tdm/lib/gcc/x86_64-w64-mingw32/14.1.0/include/avx512fintrin.h:10651:1: error: inlining failed in call to 'always_inline' '__m512i _mm512_and_si512(__m512i, __m512i)': target specific option mismatch
    10651 | _mm512_and_si512 (__m512i __A, __m512i __B)
    ˙˙˙˙˙ | ^~~~~~~~~~~~~~~~
    c.cpp:92:41: note: called from here
    ˙˙ 92 |˙˙˙˙˙˙˙˙˙˙˙˙ two = _mm512_cmpeq_epi8_mask( _mm512_and_si512(
    chunk, TWO_MASK ), TWO_HEAD ) & nzMask,
    ˙˙˙˙˙ | ~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ C:/tdm/lib/gcc/x86_64-w64-mingw32/14.1.0/include/avx512bwintrin.h:1716:1: error: inlining failed in call to 'always_inline' '__mmask64 _mm512_cmpeq_epi8_mask(__m512i, __m512i)': target specific option
    mismatch
    ˙1716 | _mm512_cmpeq_epi8_mask (__m512i __A, __m512i __B)
    ˙˙˙˙˙ | ^~~~~~~~~~~~~~~~~~~~~~
    c.cpp:91:41: note: called from here
    ˙˙ 91 |˙˙˙˙˙˙˙˙˙˙˙˙ one = _mm512_cmpeq_epi8_mask( _mm512_and_si512(
    chunk, ONE_MASK ), ONE_HEAD ) & nzMask,
    ˙˙˙˙˙ | ~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ C:/tdm/lib/gcc/x86_64-w64-mingw32/14.1.0/include/avx512fintrin.h:10651:1: error: inlining failed in call to 'always_inline' '__m512i _mm512_and_si512(__m512i, __m512i)': target specific option mismatch
    10651 | _mm512_and_si512 (__m512i __A, __m512i __B)
    ˙˙˙˙˙ | ^~~~~~~~~~~~~~~~
    c.cpp:91:41: note: called from here
    ˙˙ 91 |˙˙˙˙˙˙˙˙˙˙˙˙ one = _mm512_cmpeq_epi8_mask( _mm512_and_si512(
    chunk, ONE_MASK ), ONE_HEAD ) & nzMask,
    ˙˙˙˙˙ | ~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~



    --- PyGate Linux v1.5.1
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From bart@3:633/10 to All on Sat Nov 22 17:35:12 2025
    On 22/11/2025 17:13, Bonita Montero wrote:
    You can compile the code with -mavx512bw.
    This is "inline.h":

    But I now get, from:

    g++ =std=c++23 -mavx512bw -O2 c.cpp

    the errors shown below. I tried -fconcepts too.

    So, what also do I need? (So far you're not selling C++ very well!)

    ---------------------------------

    c.cpp:33:54: warning: use of C++23 'make_signed_t<size_t>' integer constant
    33 | if( (*it & 0xC0) == 0x80 || width > min( 4Z, rem )
    ) [[unlikely]]
    | ^~
    c.cpp:24:5: error: 'requires' does not name a type
    24 | requires std::same_as<View, string_view> ||
    std::same_as<View, u8string_view>
    | ^~~~~~~~
    c.cpp:24:5: note: 'requires' only available with '-std=c++20' or
    '-fconcepts'
    c.cpp: In function 'size_t utf8widthC(const char*)':
    c.cpp:52:10: error: 'char8_t' was not declared in this scope; did you
    mean 'wchar_t'?
    52 | for( char8_t c; (c = *str); ++length )
    | ^~~~~~~
    | wchar_t
    c.cpp:52:22: error: 'c' was not declared in this scope
    52 | for( char8_t c; (c = *str); ++length )
    | ^
    c.cpp: In function 'size_t utf8Width512(const char*)':
    c.cpp:99:21: error: 'countr_one' was not declared in this scope
    99 | unsigned ones = countr_one( nzMask );
    | ^~~~~~~~~~
    c.cpp: In function 'size_t utf8Width256(const char*)':
    c.cpp:150:21: error: 'countr_one' was not declared in this scope
    150 | unsigned ones = countr_one( nzMask );
    | ^~~~~~~~~~
    c.cpp: In function 'int main()':
    c.cpp:192:5: error: 'span' was not declared in this scope
    192 | span ranges( rawRanges );
    | ^~~~
    c.cpp:192:5: note: 'std::span' is only available from C++20 onwards c.cpp:193:5: error: 'char8_t' was not declared in this scope; did you
    mean 'wchar_t'?
    193 | char8_t rawTypeHeads[4] { 0, 0xC0, 0xE0, 0xF0 };
    | ^~~~~~~
    | wchar_t
    c.cpp:194:9: error: expected ';' before 'typeHeads'
    194 | span typeHeads( rawTypeHeads );
    | ^~~~~~~~~~
    | ;
    c.cpp:196:5: error: 'u8string' was not declared in this scope
    196 | u8string u8Str( BUF_MIN + 3, (char8_t)0 );
    | ^~~~~~~~
    c.cpp:196:5: note: 'std::u8string' is only available from C++20 onwards c.cpp:197:20: error: 'u8string' does not name a type
    197 | using u8s_it = u8string::iterator;
    | ^~~~~~~~
    c.cpp:198:5: error: 'u8s_it' was not declared in this scope
    198 | u8s_it
    | ^~~~~~
    c.cpp:201:30: error: 'itChar' was not declared in this scope
    201 | for( size_t width, type; itChar < itCharEnd; itChar += width )
    | ^~~~~~
    c.cpp:201:39: error: 'itCharEnd' was not declared in this scope
    201 | for( size_t width, type; itChar < itCharEnd; itChar += width )
    | ^~~~~~~~~
    c.cpp:205:23: error: 'ranges' was not declared in this scope; did you
    mean 'rawRanges'?
    205 | char32_t c = (ranges[type])( mt );
    | ^~~~~~
    | rawRanges
    c.cpp:206:20: error: expected ';' before 'itTail'
    206 | for( u8s_it itTail = itChar + width; --itTail > itChar;
    c >>= 6 )
    | ^~~~~~~
    | ;
    c.cpp:206:48: error: 'itTail' was not declared in this scope
    206 | for( u8s_it itTail = itChar + width; --itTail > itChar;
    c >>= 6 )
    | ^~~~~~
    c.cpp:208:19: error: 'typeHeads' was not declared in this scope
    208 | *itChar = typeHeads[type] | (char8_t)c;
    | ^~~~~~~~~
    c.cpp:210:5: error: 'u8Str' was not declared in this scope
    210 | u8Str.resize( itChar - u8Str.begin() );
    | ^~~~~
    c.cpp:210:19: error: 'itChar' was not declared in this scope
    210 | u8Str.resize( itChar - u8Str.begin() );
    | ^~~~~~
    c.cpp:228:25: error: 'u8string' is not a type
    228 | bench( "my: ", [&]( u8string const &str ) { total += utf8Width256( (char *)str.c_str() ); } );
    | ^~~~~~~~
    c.cpp: In lambda function:
    c.cpp:228:84: error: request for member 'c_str' in 'str', which is of non-class type 'const int'
    228 | bench( "my: ", [&]( u8string const &str ) { total += utf8Width256( (char *)str.c_str() ); } );
    |
    ^~~~~
    c.cpp: In function 'int main()':
    c.cpp:229:27: error: 'u8string' is not a type
    229 | bench( "nerd: ", [&]( u8string const &str ) { total += utf8widthC( (char *)str.c_str() ); } );
    | ^~~~~~~~
    c.cpp: In lambda function:
    c.cpp:229:84: error: request for member 'c_str' in 'str', which is of non-class type 'const int'
    229 | bench( "nerd: ", [&]( u8string const &str ) { total += utf8widthC( (char *)str.c_str() ); } );
    |
    ^~~~~



    --- PyGate Linux v1.5.1
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From bart@3:633/10 to All on Sat Nov 22 17:39:19 2025
    On 22/11/2025 17:35, bart wrote:
    On 22/11/2025 17:13, Bonita Montero wrote:
    You can compile the code with -mavx512bw.
    This is "inline.h":

    But I now get, from:

    ˙ g++ =std=c++23 -mavx512bw -O2 c.cpp

    the errors shown below. I tried -fconcepts too.

    So, what also do I need? (So far you're not selling C++ very well!)


    Wait, there's a "=std" in that command line instead of "-std".
    Apparently it is not an error (?).

    Anyway, it now compiles, and I can do some tests.

    --- PyGate Linux v1.5.1
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Bonita Montero@3:633/10 to All on Sat Nov 22 18:44:02 2025
    A lot of errors look like that you haven't enable at C++23 properly.
    Can you install a current g++ ? Maybe the newest from the repository
    is sufficient.

    Am 22.11.2025 um 18:35 schrieb bart:
    On 22/11/2025 17:13, Bonita Montero wrote:
    You can compile the code with -mavx512bw.
    This is "inline.h":

    But I now get, from:

    ˙ g++ =std=c++23 -mavx512bw -O2 c.cpp

    the errors shown below. I tried -fconcepts too.

    So, what also do I need? (So far you're not selling C++ very well!)

    ---------------------------------

    c.cpp:33:54: warning: use of C++23 'make_signed_t<size_t>' integer
    constant
    ˙˙ 33 |˙˙˙˙˙˙˙˙˙˙˙˙ if( (*it & 0xC0) == 0x80 || width > min( 4Z, rem )
    ) [[unlikely]]
    ˙˙˙˙˙ |˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙ ^~
    c.cpp:24:5: error: 'requires' does not name a type
    ˙˙ 24 |˙˙˙˙ requires std::same_as<View, string_view> ||
    std::same_as<View, u8string_view>
    ˙˙˙˙˙ |˙˙˙˙ ^~~~~~~~
    c.cpp:24:5: note: 'requires' only available with '-std=c++20' or '-fconcepts'
    c.cpp: In function 'size_t utf8widthC(const char*)':
    c.cpp:52:10: error: 'char8_t' was not declared in this scope; did you
    mean 'wchar_t'?
    ˙˙ 52 |˙˙˙˙ for( char8_t c; (c = *str); ++length )
    ˙˙˙˙˙ |˙˙˙˙˙˙˙˙˙ ^~~~~~~
    ˙˙˙˙˙ |˙˙˙˙˙˙˙˙˙ wchar_t
    c.cpp:52:22: error: 'c' was not declared in this scope
    ˙˙ 52 |˙˙˙˙ for( char8_t c; (c = *str); ++length )
    ˙˙˙˙˙ |˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙ ^
    c.cpp: In function 'size_t utf8Width512(const char*)':
    c.cpp:99:21: error: 'countr_one' was not declared in this scope
    ˙˙ 99 |˙˙˙˙ unsigned ones = countr_one( nzMask );
    ˙˙˙˙˙ |˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙ ^~~~~~~~~~
    c.cpp: In function 'size_t utf8Width256(const char*)':
    c.cpp:150:21: error: 'countr_one' was not declared in this scope
    ˙ 150 |˙˙˙˙ unsigned ones = countr_one( nzMask );
    ˙˙˙˙˙ |˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙ ^~~~~~~~~~
    c.cpp: In function 'int main()':
    c.cpp:192:5: error: 'span' was not declared in this scope
    ˙ 192 |˙˙˙˙ span ranges( rawRanges );
    ˙˙˙˙˙ |˙˙˙˙ ^~~~
    c.cpp:192:5: note: 'std::span' is only available from C++20 onwards c.cpp:193:5: error: 'char8_t' was not declared in this scope; did you
    mean 'wchar_t'?
    ˙ 193 |˙˙˙˙ char8_t rawTypeHeads[4] { 0, 0xC0, 0xE0, 0xF0 };
    ˙˙˙˙˙ |˙˙˙˙ ^~~~~~~
    ˙˙˙˙˙ |˙˙˙˙ wchar_t
    c.cpp:194:9: error: expected ';' before 'typeHeads'
    ˙ 194 |˙˙˙˙ span typeHeads( rawTypeHeads );
    ˙˙˙˙˙ |˙˙˙˙˙˙˙˙ ^~~~~~~~~~
    ˙˙˙˙˙ |˙˙˙˙˙˙˙˙ ;
    c.cpp:196:5: error: 'u8string' was not declared in this scope
    ˙ 196 |˙˙˙˙ u8string u8Str( BUF_MIN + 3, (char8_t)0 );
    ˙˙˙˙˙ |˙˙˙˙ ^~~~~~~~
    c.cpp:196:5: note: 'std::u8string' is only available from C++20 onwards c.cpp:197:20: error: 'u8string' does not name a type
    ˙ 197 |˙˙˙˙ using u8s_it = u8string::iterator;
    ˙˙˙˙˙ |˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙ ^~~~~~~~
    c.cpp:198:5: error: 'u8s_it' was not declared in this scope
    ˙ 198 |˙˙˙˙ u8s_it
    ˙˙˙˙˙ |˙˙˙˙ ^~~~~~
    c.cpp:201:30: error: 'itChar' was not declared in this scope
    ˙ 201 |˙˙˙˙ for( size_t width, type; itChar < itCharEnd; itChar +=
    width )
    ˙˙˙˙˙ |˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙ ^~~~~~
    c.cpp:201:39: error: 'itCharEnd' was not declared in this scope
    ˙ 201 |˙˙˙˙ for( size_t width, type; itChar < itCharEnd; itChar +=
    width )
    ˙˙˙˙˙ |˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙ ^~~~~~~~~
    c.cpp:205:23: error: 'ranges' was not declared in this scope; did you
    mean 'rawRanges'?
    ˙ 205 |˙˙˙˙˙˙˙˙ char32_t c = (ranges[type])( mt );
    ˙˙˙˙˙ |˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙ ^~~~~~
    ˙˙˙˙˙ |˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙ rawRanges
    c.cpp:206:20: error: expected ';' before 'itTail'
    ˙ 206 |˙˙˙˙˙˙˙˙ for( u8s_it itTail = itChar + width; --itTail >
    itChar; c >>= 6 )
    ˙˙˙˙˙ |˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙ ^~~~~~~
    ˙˙˙˙˙ |˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙ ;
    c.cpp:206:48: error: 'itTail' was not declared in this scope
    ˙ 206 |˙˙˙˙˙˙˙˙ for( u8s_it itTail = itChar + width; --itTail >
    itChar; c >>= 6 )
    ˙˙˙˙˙ |˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙ ^~~~~~
    c.cpp:208:19: error: 'typeHeads' was not declared in this scope
    ˙ 208 |˙˙˙˙˙˙˙˙ *itChar = typeHeads[type] | (char8_t)c;
    ˙˙˙˙˙ |˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙ ^~~~~~~~~
    c.cpp:210:5: error: 'u8Str' was not declared in this scope
    ˙ 210 |˙˙˙˙ u8Str.resize( itChar - u8Str.begin() );
    ˙˙˙˙˙ |˙˙˙˙ ^~~~~
    c.cpp:210:19: error: 'itChar' was not declared in this scope
    ˙ 210 |˙˙˙˙ u8Str.resize( itChar - u8Str.begin() );
    ˙˙˙˙˙ |˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙ ^~~~~~
    c.cpp:228:25: error: 'u8string' is not a type
    ˙ 228 |˙˙˙˙ bench( "my: ", [&]( u8string const &str ) { total += utf8Width256( (char *)str.c_str() ); } );
    ˙˙˙˙˙ |˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙ ^~~~~~~~
    c.cpp: In lambda function:
    c.cpp:228:84: error: request for member 'c_str' in 'str', which is of non-class type 'const int'
    ˙ 228 |˙˙˙˙ bench( "my: ", [&]( u8string const &str ) { total += utf8Width256( (char *)str.c_str() ); } );
    ˙˙˙˙˙ | ˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙ ^~~~~
    c.cpp: In function 'int main()':
    c.cpp:229:27: error: 'u8string' is not a type
    ˙ 229 |˙˙˙˙ bench( "nerd: ", [&]( u8string const &str ) { total += utf8widthC( (char *)str.c_str() ); } );
    ˙˙˙˙˙ |˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙ ^~~~~~~~
    c.cpp: In lambda function:
    c.cpp:229:84: error: request for member 'c_str' in 'str', which is of non-class type 'const int'
    ˙ 229 |˙˙˙˙ bench( "nerd: ", [&]( u8string const &str ) { total += utf8widthC( (char *)str.c_str() ); } );
    ˙˙˙˙˙ | ˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙ ^~~~~




    --- PyGate Linux v1.5.1
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From bart@3:633/10 to All on Sat Nov 22 19:28:32 2025
    On 22/11/2025 17:44, Bonita Montero wrote:
    A lot of errors look like that you haven't enable at C++23 properly.
    Can you install a current g++ ? Maybe the newest from the repository
    is sufficient.


    I said in a followup that I'd typed =std instead of -std, which didn't generate any error from the compiler.

    But I managed to compile it. However the long program with a complicated main() just crashed trying to run it, sometime before it got to the
    actual UTF8 bit.

    So I applied those headers and options to the first mm512
    single-function version you posted. There I only had to add std:: to
    those countr.one's.

    I used this test driver

    int main() {
    size_t n = 0;
    n = utf8Width("Hello, ??!" );
    printf("%zu\n", n);
    }

    And it crashes inside that function.

    It's all just too damn complicated, sorry. It might well be fast, but
    that's no good if it is troublesome to build and run for anyone else.

    Another factor is this: each build, even at -O0, takes 3 whole seconds
    on my machine. That must be a huge pile of junk it is including.

    Building my C version takes some 1/20th of a second (even gcc takes only
    0.3 seconds).


    --- PyGate Linux v1.5.1
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Bonita Montero@3:633/10 to All on Sat Nov 22 20:59:08 2025
    For me the following code works:

    ˙ ˙ ˙ ˙ size_t n = 0;
    ˙ ˙ ˙ ˙ n = utf8Width( string_view( "Hello, ??!" ) );
    ˙ ˙ ˙ ˙ printf( "%zu\n", n );
    ˙ ˙ ˙ ˙ return 0;

    But this is the templated code for the Non-AVX-version.
    Try utf8Width256 for the AVX version and˙utf8Width56
    for the AVX-512 version.˙Do you have any IDE like CLion ?

    Am 22.11.2025 um 20:28 schrieb bart:
    On 22/11/2025 17:44, Bonita Montero wrote:
    A lot of errors look like that you haven't enable at C++23 properly.
    Can you install a current g++ ? Maybe the newest from the repository
    is sufficient.


    I said in a followup that I'd typed =std instead of -std, which didn't generate any error from the compiler.

    But I managed to compile it. However the long program with a
    complicated main() just crashed trying to run it, sometime before it
    got to the actual UTF8 bit.

    So I applied those headers and options to the first mm512
    single-function version you posted. There I only had to add std:: to
    those countr.one's.

    I used this test driver

    ˙ int main() {
    ˙˙˙˙˙ size_t n = 0;
    ˙˙˙˙˙ n = utf8Width("Hello, ??!" );
    ˙˙˙˙˙ printf("%zu\n", n);
    ˙ }

    And it crashes inside that function.

    It's all just too damn complicated, sorry. It might well be fast, but
    that's no good if it is troublesome to build and run for anyone else.

    Another factor is this: each build, even at -O0, takes 3 whole seconds
    on my machine. That must be a huge pile of junk it is including.

    Building my C version takes some 1/20th of a second (even gcc takes
    only 0.3 seconds).



    --- PyGate Linux v1.5.1
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Keith Thompson@3:633/10 to All on Sat Nov 22 15:24:54 2025
    bart <bc@freeuk.com> writes:
    On 22/11/2025 17:35, bart wrote:
    On 22/11/2025 17:13, Bonita Montero wrote:
    You can compile the code with -mavx512bw.
    This is "inline.h":
    But I now get, from:
    ˙ g++ =std=c++23 -mavx512bw -O2 c.cpp
    the errors shown below. I tried -fconcepts too.
    So, what also do I need? (So far you're not selling C++ very well!)

    Wait, there's a "=std" in that command line instead of
    "-std". Apparently it is not an error (?).
    [...]

    It seems that gcc and g++ interpret any unrecognized command line
    argument as the name of a "linker input file".

    BTW, comp.lang.c++ is down the hall, just past the water cooler.

    --
    Keith Thompson (The_Other_Keith) Keith.S.Thompson+u@gmail.com
    void Void(void) { Void(); } /* The recursive call of the void */

    --- PyGate Linux v1.5.1
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From bart@3:633/10 to All on Sun Nov 23 00:14:58 2025
    On 22/11/2025 23:24, Keith Thompson wrote:
    bart <bc@freeuk.com> writes:
    On 22/11/2025 17:35, bart wrote:
    On 22/11/2025 17:13, Bonita Montero wrote:
    You can compile the code with -mavx512bw.
    This is "inline.h":
    But I now get, from:
    ˙ g++ =std=c++23 -mavx512bw -O2 c.cpp
    the errors shown below. I tried -fconcepts too.
    So, what also do I need? (So far you're not selling C++ very well!)

    Wait, there's a "=std" in that command line instead of
    "-std". Apparently it is not an error (?).
    [...]

    It seems that gcc and g++ interpret any unrecognized command line
    argument as the name of a "linker input file".

    It looks like it compiles any source code first, so won't get around to reporting an error if that compilation fails.

    BTW, comp.lang.c++ is down the hall, just past the water cooler.


    This was supposed be about comparing a C approach to C++. Except there
    were problems in getting the 'fast' C++ code to compile and then to run.

    I think I'll stick with the simple C version which can also be trivially ported to any language as there are no heavy dependencies.


    --- PyGate Linux v1.5.1
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Philipp Klaus Krause@3:633/10 to All on Sun Nov 23 12:42:20 2025
    Am 14.11.25 um 22:03 schrieb Michael Sanders:
    static int utf8_width(const char *s) {
    int w = 0;
    const unsigned char *p = (const unsigned char *)s;

    while (*p) {
    if (*p < 0x80) { w++; p++; } // ASCII 1-byte
    else if ((*p & 0xE0) == 0xC0) { w++; p += 2; } // 2-byte UTF-8
    else if ((*p & 0xF0) == 0xE0) { w++; p += 3; } // 3-byte UTF-8
    else if ((*p & 0xF8) == 0xF0) { w++; p += 4; } // 4-byte UTF-8
    else { w++; p++; } // fallback
    }

    return w;
    }
    Do you need this to work under non-UTF-8 locales? If you only need that
    length when the locale is UTF-8, why not just use mblen from stdlib.h?

    Philipp


    --- PyGate Linux v1.5.1
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From David Brown@3:633/10 to All on Sun Nov 23 13:32:18 2025
    On 23/11/2025 01:14, bart wrote:
    On 22/11/2025 23:24, Keith Thompson wrote:
    bart <bc@freeuk.com> writes:
    On 22/11/2025 17:35, bart wrote:
    On 22/11/2025 17:13, Bonita Montero wrote:
    You can compile the code with -mavx512bw.
    This is "inline.h":
    But I now get, from:
    ˙ ˙ g++ =std=c++23 -mavx512bw -O2 c.cpp
    the errors shown below. I tried -fconcepts too.
    So, what also do I need? (So far you're not selling C++ very well!)

    Wait, there's a "=std" in that command line instead of
    "-std". Apparently it is not an error (?).
    [...]

    It seems that gcc and g++ interpret any unrecognized command line
    argument as the name of a "linker input file".

    It looks like it compiles any source code first, so won't get around to reporting an error if that compilation fails.

    Correct. Compile first, then link - that has to be the order of
    business. So if the compilation fails, gcc or g++ (which are just
    "driver" programs that start the real compiler, assembler, and linker)
    doesn't get a far as trying to link, and thus doesn't get as far as
    looking to see if this mysterious "=std=c++23" file exists or not.

    gcc will happily complain when you give it an incorrect or unknown
    option, but it has to recognise that it /is/ an option!


    BTW, comp.lang.c++ is down the hall, just past the water cooler.


    This was supposed be about comparing a C approach to C++. Except there
    were problems in getting the 'fast' C++ code to compile and then to run.

    I think I'll stick with the simple C version which can also be trivially ported to any language as there are no heavy dependencies.


    Bonita has a years-long habit of interrupting C discussions with C++ distractions. I agree that sometimes a comparison to different
    languages is relevant in a C discussion, but for C++ details it is
    better to move over to c.l.c++.

    However, this code is neither C nor C++ - it is x86 assembly, wrapped in
    some C++ and a whole lot of Bonita-specific stuff. I've no idea if
    there is a suitable forum for that!


    --- PyGate Linux v1.5.1
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Michael Sanders@3:633/10 to All on Sun Nov 23 22:05:35 2025
    On Sun, 23 Nov 2025 12:42:20 +0100, Philipp Klaus Krause wrote:

    Do you need this to work under non-UTF-8 locales? If you only need that length when the locale is UTF-8, why not just use mblen from stdlib.h?

    Two reasons...

    - mblen() does not return the display width of utf8 characters

    - portability...

    What I needed to accomplish was using a printf() expression where
    columns were uniformly padded/aligned by the longest string
    (whether utf8 or ascii) in the 1st column. Example...

    123 foo
    1 foo
    12345 foo
    12 foo

    After studying *cough* stealing *cough* any/all examples I could
    come across, I cobbled this composite together:

    #include <stdio.h>
    #include <string.h>

    int utf8_display_width(const char *s) {
    int w = 0;

    while (*s) {
    unsigned char b = *s;
    unsigned cp;
    int n;

    // UTF-8 decoder
    if (b <= 0x7F) { // 1-byte ASCII
    cp = b;
    n = 1;
    } else if (b >= 0xC0 && b <= 0xDF) { // 2-byte
    cp = ((b & 0x1F) << 6) |
    (s[1] & 0x3F);
    n = 2;
    } else if (b >= 0xE0 && b <= 0xEF) { // 3-byte
    cp = ((b & 0x0F) << 12) |
    ((s[1] & 0x3F) << 6) |
    (s[2] & 0x3F);
    n = 3;
    } else if (b >= 0xF0 && b <= 0xF7) { // 4-byte
    cp = ((b & 0x07) << 18) |
    ((s[1] & 0x3F) << 12) |
    ((s[2] & 0x3F) << 6) |
    (s[3] & 0x3F);
    n = 4;
    } else { // invalid, treat as 1-byte
    cp = b;
    n = 1;
    }

    // display width
    if (cp >= 0x0300 && cp <= 0x036F) {} // combining marks like e? (zero width)
    else if ( // double-width characters...
    (cp >= 0x1100 && cp <= 0x115F) || // hangul jamo
    (cp >= 0x2E80 && cp <= 0xA4CF) || // cjk radicals & unified ideographs
    (cp >= 0xAC00 && cp <= 0xD7A3) || // hangul syllables
    (cp >= 0xF900 && cp <= 0xFAFF) || // cjk compatibility ideographs
    (cp >= 0x1F300 && cp <= 0x1FAFF) // emoji + symbols
    ) { w += 2; }
    // exceptional wide characters (unicode requirement I've read elsewhere)
    else if (cp == 0x2329 || cp == 0x232A) { w += 2; }
    else { w += 1; } // normal width for everything else

    s += n;
    }

    return w;
    }

    int main(void) {
    const char *tests[] = {
    "hello",
    "Caf‚",
    "??",
    "?",
    "?",
    NULL
    };

    // find maximum display width in 1st column
    int maxw = 0;
    for (int i = 0; tests[i]; i++) {
    int w = utf8_display_width(tests[i]);
    if (w > maxw) maxw = w;
    }

    // total padding after each 1st column + 3 spaces
    int total_pad = maxw + 3;

    for (int i = 0; tests[i]; i++) {

    int w = utf8_display_width(tests[i]);
    int sl = strlen(tests[i]);
    printf("%s", tests[i]);
    int pad = total_pad - w;
    while (pad-- > 0) putchar(' ');
    printf("strlen: %d utf8 display width: %d\n", sl, w);
    }

    return 0;
    }

    // eof

    --
    :wq
    Mike Sanders

    --- PyGate Linux v1.5.1
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)
  • From Bonita Montero@3:633/10 to All on Wed Nov 26 19:42:09 2025
    I've developed a UTF-8 width function with AVX-512 that can validate
    for a proper number of extension bytes after the header bytes. The
    validation is done with bit-masks delivered from AVX-intrinsics,
    i.e. without loops.
    The code accepts a basic_string_view with a chacacter widh of one
    byte (all three char-types and char8_t). It's about 20 times faster
    than a pure validation basing on non-vectored code.
    I'll make an AVX (without 512) version so ghat you can test the code.

    template<bool Validate, typename Char, typename Traits>
    ˙ ˙ requires is_integral_v<Char> && (sizeof(Char) == 1)
    size_t utf8Width512( basic_string_view<Char, Traits> str )
    {
    ˙ ˙ if( str.empty() )
    ˙ ˙ ˙ ˙ return 0;
    ˙ ˙ constexpr uint64_t ALL_ONES = -1;
    ˙ ˙ __m512i const
    ˙ ˙ ˙ ˙ oneMask = _mm512_set1_epi8( (char)0x80 ),
    ˙ ˙ ˙ ˙ oneHead = _mm512_setzero_si512();
    ˙ ˙ uintptr_t
    ˙ ˙ ˙ ˙ uBegin = (uintptr_t)to_address( str.begin() ),
    ˙ ˙ ˙ ˙ uEnd = (uintptr_t)to_address( str.end() );
    ˙ ˙ using span_t = span<__m512i>;
    ˙ ˙ span<__m512i> range64( (__m512i *)(uBegin & -64), (__m512i *)(uEnd
    + 63 & -64) );
    ˙ ˙ size_t
    ˙ ˙ ˙ ˙ head = uBegin & 63,
    ˙ ˙ ˙ ˙ tail = uEnd & 63;
    ˙ ˙ size_t n = 0;
    ˙ ˙ uint64_t mask;
    ˙ ˙ if constexpr( Validate )
    ˙ ˙ {
    ˙ ˙ ˙ ˙ __m512i const
    ˙ ˙ ˙ ˙ ˙ ˙ extendMask = _mm512_set1_epi8( (char)0xC0 ),
    ˙ ˙ ˙ ˙ ˙ ˙ extendHead = _mm512_set1_epi8( (char)0x80 ),
    ˙ ˙ ˙ ˙ ˙ ˙ twoMask = _mm512_set1_epi8( (char)0xE0 ),
    ˙ ˙ ˙ ˙ ˙ ˙ twoHead = _mm512_set1_epi8( (char)0xC0 ),
    ˙ ˙ ˙ ˙ ˙ ˙ threeMask = _mm512_set1_epi8( (char)0xF0 ),
    ˙ ˙ ˙ ˙ ˙ ˙ threeHead = _mm512_set1_epi8( (char)0xE0 ),
    ˙ ˙ ˙ ˙ ˙ ˙ fourMask = _mm512_set1_epi8( (char)0xF8 ),
    ˙ ˙ ˙ ˙ ˙ ˙ fourHead = _mm512_set1_epi8( (char)0xF0 ),
    ˙ ˙ ˙ ˙ ˙ ˙ invalid = fourMask;
    ˙ ˙ ˙ ˙ uint64_t one = 0, extend = 0, extendPrev = 0, two = 0, three =
    0, four = 0;
    ˙ ˙ ˙ ˙ auto doChunk = [&]( span_t::iterator it64 ) L_FORCEINLINE
    ˙ ˙ ˙ ˙ {
    ˙ ˙ ˙ ˙ ˙ ˙ (void)(it64 + 1);
    ˙ ˙ ˙ ˙ ˙ ˙ __m512i chunk = _mm512_load_si512( to_address( it64 ) );
    ˙ ˙ ˙ ˙ ˙ ˙ if( _mm512_cmpeq_epi8_mask( _mm512_and_si512( chunk,
    invalid ), invalid ) & mask ) [[unlikely]]
    ˙ ˙ ˙ ˙ ˙ ˙ ˙ ˙ return false;
    ˙ ˙ ˙ ˙ ˙ ˙ one = _mm512_cmpeq_epi8_mask( _mm512_and_si512( chunk,
    oneMask ), oneHead ) & mask;
    ˙ ˙ ˙ ˙ ˙ ˙ extend = _mm512_cmpeq_epi8_mask( _mm512_and_si512( chunk, extendMask ), extendHead ) & mask;
    ˙ ˙ ˙ ˙ ˙ ˙ two = _mm512_cmpeq_epi8_mask( _mm512_and_si512( chunk,
    twoMask ), twoHead ) & mask;
    ˙ ˙ ˙ ˙ ˙ ˙ three = _mm512_cmpeq_epi8_mask( _mm512_and_si512( chunk, threeMask ), threeHead ) & mask;
    ˙ ˙ ˙ ˙ ˙ ˙ four = _mm512_cmpeq_epi8_mask( _mm512_and_si512( chunk,
    fourMask ), fourHead ) & mask;
    ˙ ˙ ˙ ˙ ˙ ˙ auto shrd = []( uint64_t left, uint64_t right, unsigned n ) L_FORCEINLINE { return˙ left << 64 - n | right >> n; };
    ˙ ˙ ˙ ˙ ˙ ˙ uint64_t
    ˙ ˙ ˙ ˙ ˙ ˙ ˙ ˙ extend2 = shrd( extendPrev, extend, 1 ),
    ˙ ˙ ˙ ˙ ˙ ˙ ˙ ˙ extend3 = shrd( extendPrev, extend, 2 ) & extend2,
    ˙ ˙ ˙ ˙ ˙ ˙ ˙ ˙ extend4 = shrd( extendPrev, extend, 3 ) & extend3,
    ˙ ˙ ˙ ˙ ˙ ˙ ˙ ˙ beyond = shrd( extendPrev, extend, 4 ) & extend4,
    ˙ ˙ ˙ ˙ ˙ ˙ ˙ ˙ err;
    ˙ ˙ ˙ ˙ ˙ ˙ err = (two & extend2) ^ two;
    ˙ ˙ ˙ ˙ ˙ ˙ err |= (three & extend3) ^ three;
    ˙ ˙ ˙ ˙ ˙ ˙ err |= (four & extend4) ^ four;
    ˙ ˙ ˙ ˙ ˙ ˙ err |= one & extend2;
    ˙ ˙ ˙ ˙ ˙ ˙ err |= two & extend3;
    ˙ ˙ ˙ ˙ ˙ ˙ err |= three & extend4;
    ˙ ˙ ˙ ˙ ˙ ˙ err |= four & beyond;
    ˙ ˙ ˙ ˙ ˙ ˙ if( err ) [[unlikely]]
    ˙ ˙ ˙ ˙ ˙ ˙ ˙ ˙ return false;
    ˙ ˙ ˙ ˙ ˙ ˙ n += popcount( one | two | three | four );
    ˙ ˙ ˙ ˙ ˙ ˙ extendPrev = extend;
    ˙ ˙ ˙ ˙ ˙ ˙ return true;
    ˙ ˙ ˙ ˙ };
    ˙ ˙ ˙ ˙ span_t::iterator it64 = range64.end();
    ˙ ˙ ˙ ˙ mask = tail ? ~(ALL_ONES << tail) : ALL_ONES;
    ˙ ˙ ˙ ˙ while( it64 > range64.begin() + (size_t)(bool)head )
    ˙ ˙ ˙ ˙ ˙ ˙ if( doChunk( --it64 ) ) [[likely]]
    ˙ ˙ ˙ ˙ ˙ ˙ ˙ ˙ mask = ALL_ONES;
    ˙ ˙ ˙ ˙ ˙ ˙ else
    ˙ ˙ ˙ ˙ ˙ ˙ ˙ ˙ return -1;
    ˙ ˙ ˙ ˙ if( head ) [[likely]]
    ˙ ˙ ˙ ˙ {
    ˙ ˙ ˙ ˙ ˙ ˙ mask &= ALL_ONES << head;
    ˙ ˙ ˙ ˙ ˙ ˙ doChunk( it64 );
    ˙ ˙ ˙ ˙ }
    ˙ ˙ ˙ ˙ if( countr_zero( extendPrev ) < countr_zero( one | two | three
    | four ) ) [[unlikely]]
    ˙ ˙ ˙ ˙ ˙ ˙ return -1;
    ˙ ˙ ˙ ˙ return n;
    ˙ ˙ }
    ˙ ˙ else
    ˙ ˙ {
    ˙ ˙ ˙ ˙ __m512i const
    ˙ ˙ ˙ ˙ ˙ ˙ mask24 = _mm512_set1_epi8( (char)0xC0 ),
    ˙ ˙ ˙ ˙ ˙ ˙ head24 = mask24;
    ˙ ˙ ˙ ˙ auto doChunk = [&]( span_t::iterator it64 ) L_FORCEINLINE
    ˙ ˙ ˙ ˙ {
    ˙ ˙ ˙ ˙ ˙ ˙ (void)(it64 + 1);
    ˙ ˙ ˙ ˙ ˙ ˙ __m512i chunk = _mm512_load_si512( to_address( it64 ) );
    ˙ ˙ ˙ ˙ ˙ ˙ uint64_t
    ˙ ˙ ˙ ˙ ˙ ˙ ˙ ˙ one = _mm512_cmpeq_epi8_mask( _mm512_and_si512( chunk, oneMask ), oneHead ) & mask,
    ˙ ˙ ˙ ˙ ˙ ˙ ˙ ˙ twoAndMore = _mm512_cmpeq_epi8_mask( _mm512_and_si512(
    chunk, mask24 ), head24 ) & mask;
    ˙ ˙ ˙ ˙ ˙ ˙ n += popcount( one | twoAndMore );
    ˙ ˙ ˙ ˙ };
    ˙ ˙ ˙ ˙ span_t::iterator it64 = range64.begin();
    ˙ ˙ ˙ ˙ mask = ALL_ONES << head;;
    ˙ ˙ ˙ ˙ for( ; it64 != range64.end() - (bool)tail; ++it64 )
    ˙ ˙ ˙ ˙ {
    ˙ ˙ ˙ ˙ ˙ ˙ doChunk( it64);
    ˙ ˙ ˙ ˙ ˙ ˙ mask = -1;
    ˙ ˙ ˙ ˙ }
    ˙ ˙ ˙ ˙ if( !tail )
    ˙ ˙ ˙ ˙ ˙ ˙ return n;
    ˙ ˙ ˙ ˙ mask &= ~(ALL_ONES << tail);
    ˙ ˙ ˙ ˙ doChunk( it64 );
    ˙ ˙ ˙ ˙ return n;
    ˙ ˙ }
    }


    --- PyGate Linux v1.5.1
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)