working on it

This commit is contained in:
Dawid Sobczak 2025-04-05 10:55:40 +01:00
parent 56a6e78765
commit 35a88970c2
1094 changed files with 51093 additions and 51 deletions

View file

@ -0,0 +1,7 @@
#include <stdio.h>
#include <wchar.h>
wint_t btowc(int c)
{
return c<128U ? c : EOF;
}

View file

@ -0,0 +1,47 @@
/*
* This code was written by Rich Felker in 2010; no copyright is claimed.
* This code is in the public domain. Attribution is appreciated but
* unnecessary.
*/
#include <stdlib.h>
#include <inttypes.h>
#include <wchar.h>
#include <errno.h>
#include "internal.h"
/* Decodes UTF-8 byte-by-byte. The c argument must be initialized to 0
* to begin decoding; when finished it will contain the Unicode scalar
* value decoded. Return value is 1 if finished, 0 if in-progress, and
* -1 if an invalid sequence was encountered. After an invalid sequence,
* the state (in c) automatically resets to 0 if a continuation byte was
* expected to facilitate a calling idiom of immediately retrying a
* failed decode call after processing the invalid sequence. If the
* second try fails, the byte is invalid as a starter as well.
*
* A trivial usage idiom is:
* while (src<end && (n=decode(dst, *src))>=0) 1[dst+=n]=0, src++;
*/
int decode(unsigned *c, unsigned b)
{
if (!*c) {
if (b < 0x80) {
*c = b;
return 1;
} else if (b-SA >= SB-SA) {
*c = FAILSTATE;
return -1;
}
*c = bittab[b-SA];
return 0;
}
if (OOB(*c,b)) {
*c = 0;
return -1;
}
*c = *c<<6 | b-0x80;
return !(*c&(1U<<31));
}

View file

@ -0,0 +1,60 @@
/*
* This code was written by Rich Felker in 2010; no copyright is claimed.
* This code is in the public domain. Attribution is appreciated but
* unnecessary.
*/
#include <inttypes.h>
#include "internal.h"
#define C(x) ( x<2 ? -1 : ( R(0x80,0xc0) | x ) )
#define D(x) C((x+16))
#define E(x) ( ( x==0 ? R(0xa0,0xc0) : \
x==0xd ? R(0x80,0xa0) : \
R(0x80,0xc0) ) \
| ( R(0x80,0xc0) >> 6 ) \
| x )
#ifdef I_FAILED_TO_RTFM_RFC3629
#define F0(x) (( x==0 ? R(0x90,0xc0) : \
R(0x80,0xc0) ) \
| ( R(0x80,0xc0) >> 6 ) \
| ( R(0x80,0xc0) >> 12 ) \
| x )
#define F8(x) (( x==0 ? R(0xa0,0xc0) : \
R(0x80,0xc0) ) \
| ( R(0x80,0xc0) >> 6 ) \
| ( R(0x80,0xc0) >> 12 ) \
| ( R(0x80,0xc0) >> 18 ) \
| x )
#define FC(x) (( x==0 ? R(0x88,0xc0) : \
R(0x80,0xc0) ) \
| ( R(0x80,0xc0) >> 6 ) \
| ( R(0x80,0xc0) >> 12 ) \
| ( R(0x80,0xc0) >> 18 ) \
| ( R(0x80,0xc0) >> 24 ) \
| x )
#define F(x) ( x<8 ? F0(x) : x<12 ? F8((x&3)) : x<14 ? FC((x&1)) : -1 )
#else
#define F(x) ( ( x>=5 ? 0 : \
x==0 ? R(0x90,0xc0) : \
x==4 ? R(0x80,0xa0) : \
R(0x80,0xc0) ) \
| ( R(0x80,0xc0) >> 6 ) \
| ( R(0x80,0xc0) >> 12 ) \
| x )
#endif
const uint32_t bittab[] = {
C(0x2),C(0x3),C(0x4),C(0x5),C(0x6),C(0x7),
C(0x8),C(0x9),C(0xa),C(0xb),C(0xc),C(0xd),C(0xe),C(0xf),
D(0x0),D(0x1),D(0x2),D(0x3),D(0x4),D(0x5),D(0x6),D(0x7),
D(0x8),D(0x9),D(0xa),D(0xb),D(0xc),D(0xd),D(0xe),D(0xf),
E(0x0),E(0x1),E(0x2),E(0x3),E(0x4),E(0x5),E(0x6),E(0x7),
E(0x8),E(0x9),E(0xa),E(0xb),E(0xc),E(0xd),E(0xe),E(0xf),
F(0x0),F(0x1),F(0x2),F(0x3),F(0x4),
#ifdef I_FAILED_TO_RTFM_RFC3629
F(0x5),F(0x6),F(0x7),
F(0x8),F(0x9),F(0xa),F(0xb),F(0xc),F(0xd)
#endif
};

View file

@ -0,0 +1,61 @@
/*
* This code was written by Rich Felker in 2010; no copyright is claimed.
* This code is in the public domain. Attribution is appreciated but
* unnecessary.
*/
#define LIBC
#ifndef LIBC
/* rename functions not to conflict with libc */
#ifndef myprefix
#define myprefix fsmu8_
#endif
#define concat2(a,b) a ## b
#define concat(a,b) concat2(a,b)
#define prefix(b) concat(myprefix,b)
#undef mblen
#undef mbrlen
#undef mbrtowc
#undef mbsinit
#undef mbsnrtowcs
#undef mbsrtowcs
#undef wcrtomb
#undef wcsrtombs
#undef wcstombs
#undef wctomb
#define mblen prefix(mblen)
#define mbrlen prefix(mbrlen)
#define mbrtowc prefix(mbrtowc)
#define mbsinit prefix(mbsinit)
#define mbsnrtowcs prefix(mbsnrtowcs)
#define mbsrtowcs prefix(mbsrtowcs)
#define mbstowcs prefix(mbstowcs)
#define wcrtomb prefix(wcrtomb)
#define wcsnrtombs prefix(wcsnrtombs)
#define wcsrtombs prefix(wcsrtombs)
#define wcstombs prefix(wcstombs)
#define wctomb prefix(wctomb)
#define bittab prefix(bittab)
#else
#define bittab __fsmu8
#endif
extern const uint32_t bittab[];
/* Upper 6 state bits are a negative integer offset to bound-check next byte */
/* equivalent to: ( (b-0x80) | (b+offset) ) & ~0x3f */
#define OOB(c,b) (((((b)>>3)-0x10)|(((b)>>3)+((int32_t)(c)>>26))) & ~7)
/* Interval [a,b). Either a must be 80 or b must be c0, lower 3 bits clear. */
#define R(a,b) ((uint32_t)((a==0x80 ? 0x40-b : -a) << 23))
#define FAILSTATE R(0x80,0x80)
#ifdef I_FAILED_TO_RTFM_RFC3629
#define SA 0xc2u
#define SB 0xfeu
#else
#define SA 0xc2u
#define SB 0xf5u
#endif

View file

@ -0,0 +1,17 @@
/*
* This code was written by Rich Felker in 2010; no copyright is claimed.
* This code is in the public domain. Attribution is appreciated but
* unnecessary.
*/
#include <stdlib.h>
#include <inttypes.h>
#include <wchar.h>
#include <errno.h>
#include "internal.h"
int mblen(const char *s, size_t n)
{
return mbtowc(0, s, n);
}

View file

@ -0,0 +1,18 @@
/*
* This code was written by Rich Felker in 2010; no copyright is claimed.
* This code is in the public domain. Attribution is appreciated but
* unnecessary.
*/
#include <stdlib.h>
#include <inttypes.h>
#include <wchar.h>
#include <errno.h>
#include "internal.h"
size_t mbrlen(const char *s, size_t n, mbstate_t *st)
{
static unsigned internal;
return mbrtowc(0, s, n, st ? st : (mbstate_t *)&internal);
}

View file

@ -0,0 +1,58 @@
/*
* This code was written by Rich Felker in 2010; no copyright is claimed.
* This code is in the public domain. Attribution is appreciated but
* unnecessary.
*/
#include <stdlib.h>
#include <inttypes.h>
#include <wchar.h>
#include <errno.h>
#include "internal.h"
size_t mbrtowc(wchar_t *wc, const char *src, size_t n, mbstate_t *st)
{
static unsigned internal_state;
unsigned c;
const unsigned char *s = (const void *)src;
const unsigned N = n;
if (!st) st = (void *)&internal_state;
c = *(unsigned *)st;
if (!s) {
s = "";
wc = (void *)&wc;
n = 1;
} else if (!wc) wc = (void *)&wc;
if (!n) return -2;
if (!c) {
if (*s < 0x80) return !!(*wc = *s);
if (*s-SA > SB-SA) goto ilseq;
c = bittab[*s++-SA]; n--;
}
if (n) {
if (OOB(c,*s)) goto ilseq;
loop:
c = c<<6 | *s++-0x80; n--;
if (!(c&(1U<<31))) {
*(unsigned *)st = 0;
*wc = c;
return N-n;
}
if (n) {
if (*s-0x80u >= 0x40) goto ilseq;
goto loop;
}
}
*(unsigned *)st = c;
return -2;
ilseq:
*(unsigned *)st = FAILSTATE;
errno = EILSEQ;
return -1;
}

View file

@ -0,0 +1,17 @@
/*
* This code was written by Rich Felker in 2010; no copyright is claimed.
* This code is in the public domain. Attribution is appreciated but
* unnecessary.
*/
#include <stdlib.h>
#include <inttypes.h>
#include <wchar.h>
#include <errno.h>
#include "internal.h"
int mbsinit(const mbstate_t *st)
{
return !*(unsigned *)st;
}

View file

@ -0,0 +1,61 @@
/*
* This code was written by Rich Felker in 2010; no copyright is claimed.
* This code is in the public domain. Attribution is appreciated but
* unnecessary.
*/
#include <stdlib.h>
#include <inttypes.h>
#include <wchar.h>
#include <errno.h>
#include <stdio.h>
#include "internal.h"
size_t mbsnrtowcs(wchar_t *wcs, const char **src, size_t n, size_t wn, mbstate_t *st)
{
size_t l, cnt=0, n2;
wchar_t *ws, wbuf[256];
const char *s = *src;
if (!wcs) ws = wbuf, wn = sizeof wbuf / sizeof *wbuf;
else ws = wcs;
/* making sure output buffer size is at most n/4 will ensure
* that mbsrtowcs never reads more than n input bytes. thus
* we can use mbsrtowcs as long as it's practical.. */
while ( s && wn && ( (n2=n/4)>=wn || n2>32 ) ) {
if (n2>=wn) n2=wn;
n -= n2;
l = mbsrtowcs(ws, &s, n2, st);
if (!(l+1)) {
cnt = l;
wn = 0;
break;
}
if (ws != wbuf) {
ws += l;
wn -= l;
}
cnt += l;
}
if (s) while (wn && n) {
l = mbrtowc(ws, s, n, st);
if (l+2<=2) {
if (!(l+1)) {
cnt = l;
break;
}
/* have to roll back partial character */
*(unsigned *)st = 0;
break;
}
s += l; n -= l;
/* safe - this loop runs fewer than sizeof(wbuf)/8 times */
ws++; wn--;
cnt++;
}
if (wcs) *src = s;
return cnt;
}

View file

@ -0,0 +1,121 @@
/*
* This code was written by Rich Felker in 2010; no copyright is claimed.
* This code is in the public domain. Attribution is appreciated but
* unnecessary.
*/
#include <stdlib.h>
#include <inttypes.h>
#include <wchar.h>
#include <errno.h>
#include "internal.h"
size_t mbsrtowcs(wchar_t *ws, const char **src, size_t wn, mbstate_t *st)
{
unsigned c;
const unsigned char *s = (const void *)*src;
const wchar_t *wsorig = ws;
if (!st) st = (void *)&c, c = 0;
else c = *(unsigned *)st;
if (c) {
*(unsigned *)st = 0;
if (!ws) {
wn = 0;
goto resume0;
}
goto resume;
}
if (!ws) for (wn=0;;) {
if (*s-SA >= SB-SA) {
while (((uintptr_t)s&3) && *s-1u<0x7f) s++, wn++;
while (!(( *(uint32_t*)s | *(uint32_t*)s-0x01010101) & 0x80808080)) s+=4, wn+=4;
while (*s-1u<0x7f) s++, wn++;
if (!*s) return wn;
if (*s-SA >= SB-SA) goto ilseq2;
}
c = bittab[*s++-SA];
do {
resume0:
if (OOB(c,*s)) goto ilseq2; s++;
c <<= 6; if (!(c&(1U<<31))) break;
#ifdef I_FAILED_TO_RTFM_RFC3629
if (*s++-0x80u >= 0x40) goto ilseq2;
c <<= 6; if (!(c&(1U<<31))) break;
if (*s++-0x80u >= 0x40) goto ilseq2;
c <<= 6; if (!(c&(1U<<31))) break;
#endif
if (*s++-0x80u >= 0x40) goto ilseq2;
c <<= 6; if (!(c&(1U<<31))) break;
if (*s++-0x80u >= 0x40) goto ilseq2;
} while (0);
wn++; c = 0;
}
while (wn) {
if (*s-SA >= SB-SA) {
if (wn >= 7) {
while (((uintptr_t)s&3) && *s-1u<0x7f) {
*ws++ = *s++;
wn--;
}
while (wn>=4 && !(( *(uint32_t*)s | *(uint32_t*)s-0x01010101) & 0x80808080)) {
*ws++ = *s++;
*ws++ = *s++;
*ws++ = *s++;
*ws++ = *s++;
wn -= 4;
}
}
while (wn && *s-1u<0x7f) {
*ws++ = *s++;
wn--;
}
if (!wn) break;
if (!*s) {
*ws = 0;
*src = 0;
return ws-wsorig;
}
if (*s-SA >= SB-SA) goto ilseq;
}
c = bittab[*s++-SA];
do {
resume:
if (OOB(c,*s)) goto ilseq;
c = (c<<6) | *s++-0x80;
if (!(c&(1U<<31))) break;
#ifdef I_FAILED_TO_RTFM_RFC3629
if (*s-0x80u >= 0x40) goto ilseq;
c = (c<<6) | *s++-0x80;
if (!(c&(1U<<31))) break;
if (*s-0x80u >= 0x40) goto ilseq;
c = (c<<6) | *s++-0x80;
if (!(c&(1U<<31))) break;
#endif
if (*s-0x80u >= 0x40) goto ilseq;
c = (c<<6) | *s++-0x80;
if (!(c&(1U<<31))) break;
if (*s-0x80u >= 0x40) goto ilseq;
c = (c<<6) | *s++-0x80;
} while (0);
*ws++ = c; wn--; c = 0;
}
*src = (const void *)s;
return ws-wsorig;
ilseq:
*src = (const void *)s;
ilseq2:
/* enter permanently failing state */
*(unsigned *)st = FAILSTATE;
errno = EILSEQ;
return -1;
}

View file

@ -0,0 +1,18 @@
/*
* This code was written by Rich Felker in 2010; no copyright is claimed.
* This code is in the public domain. Attribution is appreciated but
* unnecessary.
*/
#include <stdlib.h>
#include <inttypes.h>
#include <wchar.h>
#include <errno.h>
#include "internal.h"
size_t mbstowcs(wchar_t *ws, const char *s, size_t wn)
{
mbstate_t st = { 0 };
return mbsrtowcs(ws, (void*)&s, wn, &st);
}

View file

@ -0,0 +1,19 @@
/*
* This code was written by Rich Felker in 2010; no copyright is claimed.
* This code is in the public domain. Attribution is appreciated but
* unnecessary.
*/
#include <stdlib.h>
#include <inttypes.h>
#include <wchar.h>
#include <errno.h>
#include "internal.h"
int mbtowc(wchar_t *wc, const char *s, size_t n)
{
mbstate_t st = { 0 };
n = mbrtowc(wc, s, n, &st);
return n+2 ? n : -1;
}

View file

@ -0,0 +1,38 @@
/*
* This code was written by Rich Felker in 2010; no copyright is claimed.
* This code is in the public domain. Attribution is appreciated but
* unnecessary.
*/
#include <stdlib.h>
#include <inttypes.h>
#include <wchar.h>
#include <errno.h>
#include "internal.h"
size_t wcrtomb(char *s, wchar_t wc, mbstate_t *st)
{
if (!s) return 1;
if ((unsigned)wc < 0x80) {
*s = wc;
return 1;
} else if ((unsigned)wc < 0x800) {
*s++ = 0xc0 | (wc>>6);
*s = 0x80 | (wc&0x3f);
return 2;
} else if ((unsigned)wc < 0xd800 || (unsigned)wc-0xe000 < 0x2000) {
*s++ = 0xe0 | (wc>>12);
*s++ = 0x80 | ((wc>>6)&0x3f);
*s = 0x80 | (wc&0x3f);
return 3;
} else if ((unsigned)wc-0x10000 < 0x100000) {
*s++ = 0xf0 | (wc>>18);
*s++ = 0x80 | ((wc>>12)&0x3f);
*s++ = 0x80 | ((wc>>6)&0x3f);
*s = 0x80 | (wc&0x3f);
return 4;
}
errno = EILSEQ;
return -1;
}

View file

@ -0,0 +1,51 @@
/*
* This code was written by Rich Felker in 2010; no copyright is claimed.
* This code is in the public domain. Attribution is appreciated but
* unnecessary.
*/
#include <stdlib.h>
#include <inttypes.h>
#include <wchar.h>
#include <errno.h>
#include "internal.h"
size_t wcsnrtombs(char *dst, const wchar_t **wcs, size_t wn, size_t n, mbstate_t *st)
{
size_t l, cnt=0, n2;
char *s, buf[256];
const wchar_t *ws = *wcs;
if (!dst) s = buf, n = sizeof buf;
else s = dst;
while ( n && ( (n2=wn)>=n || n2>32 ) ) {
if (n2>=n) n2=n;
wn -= n2;
l = wcsrtombs(s, &ws, n2, 0);
if (!(l+1)) {
cnt = l;
n = 0;
break;
}
if (s != buf) {
s += l;
n -= l;
}
cnt += l;
}
while (n && wn) {
l = wcrtomb(s, *ws, 0);
if (!(l+1)) {
cnt = l;
break;
}
ws++; wn--;
/* safe - this loop runs fewer than sizeof(buf) times */
s+=l; n-=l;
cnt++;
}
if (dst) *wcs = ws;
return cnt;
}

View file

@ -0,0 +1,58 @@
/*
* This code was written by Rich Felker in 2010; no copyright is claimed.
* This code is in the public domain. Attribution is appreciated but
* unnecessary.
*/
#include <stdlib.h>
#include <inttypes.h>
#include <wchar.h>
#include <errno.h>
#include "internal.h"
size_t wcsrtombs(char *s, const wchar_t **ws, size_t n, mbstate_t *st)
{
const wchar_t *ws2;
char buf[4];
size_t N = n, l;
if (!s) {
for (n=0, ws2=*ws; *ws2; ws2++) {
if (*ws2 >= 0x80) {
l = wcrtomb(buf, *ws2, 0);
if (!(l+1)) return -1;
n += l;
} else n++;
}
return n;
}
while (n>=4 && **ws) {
if (**ws >= 0x80) {
l = wcrtomb(s, **ws, 0);
if (!(l+1)) return -1;
s += l;
n -= l;
} else {
*s++ = **ws;
n--;
}
(*ws)++;
}
while (n && **ws) {
if (**ws >= 0x80) {
l = wcrtomb(buf, **ws, 0);
if (!(l+1)) return -1;
if (l>n) return N-n;
wcrtomb(s, **ws, 0);
s += l;
n -= l;
} else {
*s++ = **ws;
n--;
}
(*ws)++;
}
if (n) *s = 0;
*ws = 0;
return N-n;
}

View file

@ -0,0 +1,17 @@
/*
* This code was written by Rich Felker in 2010; no copyright is claimed.
* This code is in the public domain. Attribution is appreciated but
* unnecessary.
*/
#include <stdlib.h>
#include <inttypes.h>
#include <wchar.h>
#include <errno.h>
#include "internal.h"
size_t wcstombs(char *s, const wchar_t *ws, size_t n)
{
return wcsrtombs(s, &ws, n, 0);
}

View file

@ -0,0 +1,8 @@
#include <stdio.h>
#include <wchar.h>
int wctob(wint_t c)
{
if (c < 128U) return c;
return EOF;
}

View file

@ -0,0 +1,18 @@
/*
* This code was written by Rich Felker in 2010; no copyright is claimed.
* This code is in the public domain. Attribution is appreciated but
* unnecessary.
*/
#include <stdlib.h>
#include <inttypes.h>
#include <wchar.h>
#include <errno.h>
#include "internal.h"
int wctomb(char *s, wchar_t wc)
{
if (!s) return 0;
return wcrtomb(s, wc, 0);
}