Init
This commit is contained in:
182
cppdraft/text/c/strings.md
Normal file
182
cppdraft/text/c/strings.md
Normal file
@@ -0,0 +1,182 @@
|
||||
[text.c.strings]
|
||||
|
||||
# 28 Text processing library [[text]](./#text)
|
||||
|
||||
## 28.7 Null-terminated sequence utilities [text.c.strings]
|
||||
|
||||
### [28.7.1](#cctype.syn) Header <cctype> synopsis [[cctype.syn]](cctype.syn)
|
||||
|
||||
[ð](#lib:isalnum)
|
||||
|
||||
namespace std {int isalnum(int c); int isalpha(int c); int isblank(int c); int iscntrl(int c); int isdigit(int c); int isgraph(int c); int islower(int c); int isprint(int c); int ispunct(int c); int isspace(int c); int isupper(int c); int isxdigit(int c); int tolower(int c); int toupper(int c);}
|
||||
|
||||
[1](#cctype.syn-1)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L13029)
|
||||
|
||||
The contents and meaning of the header <cctype> are the same as the C standard library header [<ctype.h>](support.c.headers.general#header:%3cctype.h%3e "17.15.1 General [support.c.headers.general]")[.](#cctype.syn-1.sentence-1)
|
||||
|
||||
See also: ISO/IEC 9899:2024, 7.4
|
||||
|
||||
### [28.7.2](#cwctype.syn) Header <cwctype> synopsis [[cwctype.syn]](cwctype.syn)
|
||||
|
||||
[ð](#lib:wint_t)
|
||||
|
||||
namespace std {using wint_t = *see below*; using wctrans_t = *see below*; using wctype_t = *see below*; int iswalnum(wint_t wc); int iswalpha(wint_t wc); int iswblank(wint_t wc); int iswcntrl(wint_t wc); int iswdigit(wint_t wc); int iswgraph(wint_t wc); int iswlower(wint_t wc); int iswprint(wint_t wc); int iswpunct(wint_t wc); int iswspace(wint_t wc); int iswupper(wint_t wc); int iswxdigit(wint_t wc); int iswctype(wint_t wc, wctype_t desc);
|
||||
wctype_t wctype(const char* property);
|
||||
wint_t towlower(wint_t wc);
|
||||
wint_t towupper(wint_t wc);
|
||||
wint_t towctrans(wint_t wc, wctrans_t desc);
|
||||
wctrans_t wctrans(const char* property);}#define WEOF *see below*
|
||||
|
||||
[1](#cwctype.syn-1)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L13087)
|
||||
|
||||
The contents and meaning of the header <cwctype> are the same as the C standard library header [<wctype.h>](support.c.headers.general#header:%3cwctype.h%3e "17.15.1 General [support.c.headers.general]")[.](#cwctype.syn-1.sentence-1)
|
||||
|
||||
See also: ISO/IEC 9899:2024, 7.32
|
||||
|
||||
### [28.7.3](#cwchar.syn) Header <cwchar> synopsis [[cwchar.syn]](cwchar.syn)
|
||||
|
||||
[ð](#header:%3ccwchar%3e)
|
||||
|
||||
#define __STDC_VERSION_WCHAR_H__ 202311Lnamespace std {using size_t = *see [[support.types.layout]](support.types.layout "17.2.4 Sizes, alignments, and offsets")*; // freestandingusing mbstate_t = *see below*; // freestandingusing wint_t = *see below*; // freestandingstruct tm; int fwprintf(FILE* stream, const wchar_t* format, ...); int fwscanf(FILE* stream, const wchar_t* format, ...); int swprintf(wchar_t* s, size_t n, const wchar_t* format, ...); int swscanf(const wchar_t* s, const wchar_t* format, ...); int vfwprintf(FILE* stream, const wchar_t* format, va_list arg); int vfwscanf(FILE* stream, const wchar_t* format, va_list arg); int vswprintf(wchar_t* s, size_t n, const wchar_t* format, va_list arg); int vswscanf(const wchar_t* s, const wchar_t* format, va_list arg); int vwprintf(const wchar_t* format, va_list arg); int vwscanf(const wchar_t* format, va_list arg); int wprintf(const wchar_t* format, ...); int wscanf(const wchar_t* format, ...);
|
||||
wint_t fgetwc(FILE* stream); wchar_t* fgetws(wchar_t* s, int n, FILE* stream);
|
||||
wint_t fputwc(wchar_t c, FILE* stream); int fputws(const wchar_t* s, FILE* stream); int fwide(FILE* stream, int mode);
|
||||
wint_t getwc(FILE* stream);
|
||||
wint_t getwchar();
|
||||
wint_t putwc(wchar_t c, FILE* stream);
|
||||
wint_t putwchar(wchar_t c);
|
||||
wint_t ungetwc(wint_t c, FILE* stream); double wcstod(const wchar_t* nptr, wchar_t** endptr); float wcstof(const wchar_t* nptr, wchar_t** endptr); long double wcstold(const wchar_t* nptr, wchar_t** endptr); long int wcstol(const wchar_t* nptr, wchar_t** endptr, int base); long long int wcstoll(const wchar_t* nptr, wchar_t** endptr, int base); unsigned long int wcstoul(const wchar_t* nptr, wchar_t** endptr, int base); unsigned long long int wcstoull(const wchar_t* nptr, wchar_t** endptr, int base); wchar_t* wcscpy(wchar_t* s1, const wchar_t* s2); // freestandingwchar_t* wcsncpy(wchar_t* s1, const wchar_t* s2, size_t n); // freestandingwchar_t* wmemcpy(wchar_t* s1, const wchar_t* s2, size_t n); // freestandingwchar_t* wmemmove(wchar_t* s1, const wchar_t* s2, size_t n); // freestandingwchar_t* wcscat(wchar_t* s1, const wchar_t* s2); // freestandingwchar_t* wcsncat(wchar_t* s1, const wchar_t* s2, size_t n); // freestandingint wcscmp(const wchar_t* s1, const wchar_t* s2); // freestandingint wcscoll(const wchar_t* s1, const wchar_t* s2); int wcsncmp(const wchar_t* s1, const wchar_t* s2, size_t n); // freestanding size_t wcsxfrm(wchar_t* s1, const wchar_t* s2, size_t n); int wmemcmp(const wchar_t* s1, const wchar_t* s2, size_t n); // freestandingconst wchar_t* wcschr(const wchar_t* s, wchar_t c); // freestanding; see [[library.c]](library.c "16.2 The C standard library")wchar_t* wcschr(wchar_t* s, wchar_t c); // freestanding; see [[library.c]](library.c "16.2 The C standard library") size_t wcscspn(const wchar_t* s1, const wchar_t* s2); // freestandingconst wchar_t* wcspbrk(const wchar_t* s1, const wchar_t* s2); // freestanding; see [[library.c]](library.c "16.2 The C standard library")wchar_t* wcspbrk(wchar_t* s1, const wchar_t* s2); // freestanding; see [[library.c]](library.c "16.2 The C standard library")const wchar_t* wcsrchr(const wchar_t* s, wchar_t c); // freestanding; see [[library.c]](library.c "16.2 The C standard library")wchar_t* wcsrchr(wchar_t* s, wchar_t c); // freestanding; see [[library.c]](library.c "16.2 The C standard library") size_t wcsspn(const wchar_t* s1, const wchar_t* s2); // freestandingconst wchar_t* wcsstr(const wchar_t* s1, const wchar_t* s2); // freestanding; see [[library.c]](library.c "16.2 The C standard library")wchar_t* wcsstr(wchar_t* s1, const wchar_t* s2); // freestanding; see [[library.c]](library.c "16.2 The C standard library")wchar_t* wcstok(wchar_t* s1, const wchar_t* s2, wchar_t** ptr); // freestandingconst wchar_t* wmemchr(const wchar_t* s, wchar_t c, size_t n); // freestanding; see [[library.c]](library.c "16.2 The C standard library")wchar_t* wmemchr(wchar_t* s, wchar_t c, size_t n); // freestanding; see [[library.c]](library.c "16.2 The C standard library") size_t wcslen(const wchar_t* s); // freestandingwchar_t* wmemset(wchar_t* s, wchar_t c, size_t n); // freestanding size_t wcsftime(wchar_t* s, size_t maxsize, const wchar_t* format, const tm* timeptr);
|
||||
wint_t btowc(int c); int wctob(wint_t c); // [[c.mb.wcs]](#c.mb.wcs "28.7.5 Multibyte / wide string and character conversion functions"), multibyte / wide string and character conversion functionsint mbsinit(const mbstate_t* ps);
|
||||
size_t mbrlen(const char* s, size_t n, mbstate_t* ps);
|
||||
size_t mbrtowc(wchar_t* pwc, const char* s, size_t n, mbstate_t* ps);
|
||||
size_t wcrtomb(char* s, wchar_t wc, mbstate_t* ps);
|
||||
size_t mbsrtowcs(wchar_t* dst, const char** src, size_t len, mbstate_t* ps);
|
||||
size_t wcsrtombs(char* dst, const wchar_t** src, size_t len, mbstate_t* ps);}#define NULL *see [[support.types.nullptr]](support.types.nullptr "17.2.3 Null pointers")* // freestanding#define WCHAR_MAX *see below* // freestanding#define WCHAR_MIN *see below* // freestanding#define WEOF *see below* // freestanding#define WCHAR_WIDTH *see below* // freestanding
|
||||
|
||||
[1](#cwchar.syn-1)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L13244)
|
||||
|
||||
The contents and meaning of the header [<cwchar>](#header:%3ccwchar%3e "28.7.3 Header <cwchar> synopsis [cwchar.syn]") are the same as the C standard library header[<wchar.h>](support.c.headers.general#header:%3cwchar.h%3e "17.15.1 General [support.c.headers.general]"), except that it does not declare a type wchar_t[.](#cwchar.syn-1.sentence-1)
|
||||
|
||||
[2](#cwchar.syn-2)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L13249)
|
||||
|
||||
[*Note [1](#cwchar.syn-note-1)*:
|
||||
|
||||
The functionswcschr, wcspbrk, wcsrchr, wcsstr, and wmemchr have different signatures in this document,
|
||||
but they have the same behavior as in the C standard library ([[library.c]](library.c "16.2 The C standard library"))[.](#cwchar.syn-2.sentence-1)
|
||||
|
||||
â *end note*]
|
||||
|
||||
See also: ISO/IEC 9899:2024, 7.31
|
||||
|
||||
### [28.7.4](#cuchar.syn) Header <cuchar> synopsis [[cuchar.syn]](cuchar.syn)
|
||||
|
||||
[ð](#lib:mbstate_t_)
|
||||
|
||||
#define __STDC_VERSION_UCHAR_H__ 202311Lnamespace std {using mbstate_t = *see below*; using size_t = *see [[support.types.layout]](support.types.layout "17.2.4 Sizes, alignments, and offsets")*;
|
||||
|
||||
size_t mbrtoc8(char8_t* pc8, const char* s, size_t n, mbstate_t* ps);
|
||||
size_t c8rtomb(char* s, char8_t c8, mbstate_t* ps);
|
||||
size_t mbrtoc16(char16_t* pc16, const char* s, size_t n, mbstate_t* ps);
|
||||
size_t c16rtomb(char* s, char16_t c16, mbstate_t* ps);
|
||||
size_t mbrtoc32(char32_t* pc32, const char* s, size_t n, mbstate_t* ps);
|
||||
size_t c32rtomb(char* s, char32_t c32, mbstate_t* ps);}
|
||||
|
||||
[1](#cuchar.syn-1)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L13285)
|
||||
|
||||
The contents and meaning of the header <cuchar> are the same as the C standard library header [<uchar.h>](support.c.headers.general#header:%3cuchar.h%3e "17.15.1 General [support.c.headers.general]"),
|
||||
except that it does not declare typeschar8_t, char16_t, or char32_t[.](#cuchar.syn-1.sentence-1)
|
||||
|
||||
See also: ISO/IEC 9899:2024, 7.30
|
||||
|
||||
### [28.7.5](#c.mb.wcs) Multibyte / wide string and character conversion functions [[c.mb.wcs]](c.mb.wcs)
|
||||
|
||||
[1](#c.mb.wcs-1)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L13295)
|
||||
|
||||
[*Note [1](#c.mb.wcs-note-1)*:
|
||||
|
||||
The headers [<cstdlib>](cstdlib.syn#header:%3ccstdlib%3e "17.2.2 Header <cstdlib> synopsis [cstdlib.syn]"),[<cuchar>](#header:%3ccuchar%3e "28.7.4 Header <cuchar> synopsis [cuchar.syn]"),
|
||||
and [<cwchar>](#header:%3ccwchar%3e "28.7.3 Header <cwchar> synopsis [cwchar.syn]") declare the functions described in this subclause[.](#c.mb.wcs-1.sentence-1)
|
||||
|
||||
â *end note*]
|
||||
|
||||
[ð](#lib:mbsinit_)
|
||||
|
||||
`int mbsinit(const mbstate_t* ps);
|
||||
int mblen(const char* s, size_t n);
|
||||
size_t mbstowcs(wchar_t* pwcs, const char* s, size_t n);
|
||||
size_t wcstombs(char* s, const wchar_t* pwcs, size_t n);
|
||||
`
|
||||
|
||||
[2](#c.mb.wcs-2)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L13315)
|
||||
|
||||
*Effects*: These functions have the semantics specified in the C standard library[.](#c.mb.wcs-2.sentence-1)
|
||||
|
||||
See also: ISO/IEC 9899:2024, 7.22.7.1, 7.22.8, 7.29.6.2.1
|
||||
|
||||
[ð](#lib:mbtowc)
|
||||
|
||||
`int mbtowc(wchar_t* pwc, const char* s, size_t n);
|
||||
int wctomb(char* s, wchar_t wchar);
|
||||
`
|
||||
|
||||
[3](#c.mb.wcs-3)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L13330)
|
||||
|
||||
*Effects*: These functions have the semantics specified in the C standard library[.](#c.mb.wcs-3.sentence-1)
|
||||
|
||||
[4](#c.mb.wcs-4)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L13334)
|
||||
|
||||
*Remarks*: Calls to these functions
|
||||
may introduce a data race ([[res.on.data.races]](res.on.data.races "16.4.6.10 Data race avoidance"))
|
||||
with other calls to the same function[.](#c.mb.wcs-4.sentence-1)
|
||||
|
||||
See also: ISO/IEC 9899:2024, 7.22.7
|
||||
|
||||
[ð](#c.mb.wcs-itemdecl:3)
|
||||
|
||||
`size_t [mbrlen](#lib:mbrlen "28.7.5 Multibyte / wide string and character conversion functions [c.mb.wcs]")(const char* s, size_t n, mbstate_t* ps);
|
||||
size_t [mbrtowc](#lib:mbrtowc "28.7.5 Multibyte / wide string and character conversion functions [c.mb.wcs]")(wchar_t* pwc, const char* s, size_t n, mbstate_t* ps);
|
||||
size_t [wcrtomb](#lib:wcrtomb "28.7.5 Multibyte / wide string and character conversion functions [c.mb.wcs]")(char* s, wchar_t wc, mbstate_t* ps);
|
||||
size_t [mbrtoc8](#lib:mbrtoc8 "28.7.5 Multibyte / wide string and character conversion functions [c.mb.wcs]")(char8_t* pc8, const char* s, size_t n, mbstate_t* ps);
|
||||
size_t [c8rtomb](#lib:c8rtomb "28.7.5 Multibyte / wide string and character conversion functions [c.mb.wcs]")(char* s, char8_t c8, mbstate_t* ps);
|
||||
size_t [mbrtoc16](#lib:mbrtoc16 "28.7.5 Multibyte / wide string and character conversion functions [c.mb.wcs]")(char16_t* pc16, const char* s, size_t n, mbstate_t* ps);
|
||||
size_t [c16rtomb](#lib:c16rtomb "28.7.5 Multibyte / wide string and character conversion functions [c.mb.wcs]")(char* s, char16_t c16, mbstate_t* ps);
|
||||
size_t [mbrtoc32](#lib:mbrtoc32 "28.7.5 Multibyte / wide string and character conversion functions [c.mb.wcs]")(char32_t* pc32, const char* s, size_t n, mbstate_t* ps);
|
||||
size_t [c32rtomb](#lib:c32rtomb "28.7.5 Multibyte / wide string and character conversion functions [c.mb.wcs]")(char* s, char32_t c32, mbstate_t* ps);
|
||||
size_t [mbsrtowcs](#lib:mbsrtowcs "28.7.5 Multibyte / wide string and character conversion functions [c.mb.wcs]")(wchar_t* dst, const char** src, size_t len, mbstate_t* ps);
|
||||
size_t [wcsrtombs](#lib:wcsrtombs "28.7.5 Multibyte / wide string and character conversion functions [c.mb.wcs]")(char* dst, const wchar_t** src, size_t len, mbstate_t* ps);
|
||||
`
|
||||
|
||||
[5](#c.mb.wcs-5)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L13358)
|
||||
|
||||
*Effects*: These functions have the semantics specified in the C standard library[.](#c.mb.wcs-5.sentence-1)
|
||||
|
||||
[6](#c.mb.wcs-6)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L13362)
|
||||
|
||||
*Remarks*: Calling these functions
|
||||
with an mbstate_t* argument that is a null pointer value
|
||||
may introduce a data race ([[res.on.data.races]](res.on.data.races "16.4.6.10 Data race avoidance"))
|
||||
with other calls to the same function
|
||||
with an mbstate_t* argument that is a null pointer value[.](#c.mb.wcs-6.sentence-1)
|
||||
|
||||
See also: ISO/IEC 9899:2024, 7.30.1, 7.31.6.3, 7.31.6.4
|
||||
742
cppdraft/text/encoding.md
Normal file
742
cppdraft/text/encoding.md
Normal file
@@ -0,0 +1,742 @@
|
||||
[text.encoding]
|
||||
|
||||
# 28 Text processing library [[text]](./#text)
|
||||
|
||||
## 28.4 Text encodings identification [text.encoding]
|
||||
|
||||
### [28.4.1](#syn) Header <text_encoding> synopsis [[text.encoding.syn]](text.encoding.syn)
|
||||
|
||||
[ð](#header:%3ctext_encoding%3e)
|
||||
|
||||
namespace std {struct text_encoding; // [[text.encoding.hash]](#hash "28.4.2.7 Hash support"), hash supporttemplate<class T> struct hash; template<> struct hash<text_encoding>;}
|
||||
|
||||
### [28.4.2](#class) Class text_encoding [[text.encoding.class]](text.encoding.class)
|
||||
|
||||
#### [28.4.2.1](#overview) Overview [[text.encoding.overview]](text.encoding.overview)
|
||||
|
||||
[1](#overview-1)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5014)
|
||||
|
||||
The class text_encoding describes an interface
|
||||
for accessing the IANA Character Sets registry[[bib]](bibliography#bib:iana-charset "Bibliography")[.](#overview-1.sentence-1)
|
||||
|
||||
[ð](#lib:text_encoding)
|
||||
|
||||
namespace std {struct text_encoding {static constexpr size_t max_name_length = 63; // [[text.encoding.id]](#id "28.4.2.6 Enumeration text_encoding::id"), enumeration text_encoding::idenum class id : int_least32_t {*see below*}; using enum id; constexpr text_encoding() = default; constexpr explicit text_encoding(string_view enc) noexcept; constexpr text_encoding(id i) noexcept; constexpr id mib() const noexcept; constexpr const char* name() const noexcept; // [[text.encoding.aliases]](#aliases "28.4.2.5 Class text_encoding::aliases_view"), class text_encoding::aliases_viewstruct aliases_view; constexpr aliases_view aliases() const noexcept; friend constexpr bool operator==(const text_encoding& a, const text_encoding& b) noexcept; friend constexpr bool operator==(const text_encoding& encoding, id i) noexcept; static consteval text_encoding literal() noexcept; static text_encoding environment(); template<id i> static bool environment_is(); private: id *mib_* = id::unknown; // *exposition only*char *name_*[max_name_length + 1] = {0}; // *exposition only*static constexpr bool *comp-name*(string_view a, string_view b); // *exposition only*};}
|
||||
|
||||
[2](#overview-2)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5057)
|
||||
|
||||
Class text_encoding is
|
||||
a trivially copyable type ([[basic.types.general]](basic.types.general#term.trivially.copyable.type "6.9.1 General"))[.](#overview-2.sentence-1)
|
||||
|
||||
#### [28.4.2.2](#general) General [[text.encoding.general]](text.encoding.general)
|
||||
|
||||
[1](#general-1)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5063)
|
||||
|
||||
A [*registered character encoding*](#def:encoding,registered_character "28.4.2.2 General [text.encoding.general]") is
|
||||
a character encoding scheme in the IANA Character Sets registry[.](#general-1.sentence-1)
|
||||
|
||||
[*Note [1](#general-note-1)*:
|
||||
|
||||
The IANA Character Sets registry uses the term âcharacter setsâ
|
||||
to refer to character encodings[.](#general-1.sentence-2)
|
||||
|
||||
â *end note*]
|
||||
|
||||
The primary name of a registered character encoding is
|
||||
the name of that encoding specified in the IANA Character Sets registry[.](#general-1.sentence-3)
|
||||
|
||||
[2](#general-2)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5073)
|
||||
|
||||
The set of known registered character encodings contains
|
||||
every registered character encoding
|
||||
specified in the IANA Character Sets registry except for the following:
|
||||
|
||||
- [(2.1)](#general-2.1)
|
||||
|
||||
NATS-DANO (33)
|
||||
|
||||
- [(2.2)](#general-2.2)
|
||||
|
||||
NATS-DANO-ADD (34)
|
||||
|
||||
[3](#general-3)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5082)
|
||||
|
||||
Each known registered character encoding
|
||||
is identified by an enumerator in text_encoding::id, and
|
||||
has a set of zero or more [*aliases*](#def:encoding,registered_character,alias "28.4.2.2 General [text.encoding.general]")[.](#general-3.sentence-1)
|
||||
|
||||
[4](#general-4)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5087)
|
||||
|
||||
The set of aliases of a known registered character encoding is animplementation-defined
|
||||
superset of the aliases specified in the IANA Character Sets registry[.](#general-4.sentence-1)
|
||||
|
||||
The set of aliases for US-ASCII includes âASCIIâ[.](#general-4.sentence-2)
|
||||
|
||||
No two aliases or primary names of distinct registered character encodings
|
||||
are equivalent when compared by text_encoding::*comp-name*[.](#general-4.sentence-3)
|
||||
|
||||
[5](#general-5)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5095)
|
||||
|
||||
How a text_encoding object
|
||||
is determined to be representative of a character encoding scheme
|
||||
implemented in the translation or execution environment isimplementation-defined[.](#general-5.sentence-1)
|
||||
|
||||
[6](#general-6)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5102)
|
||||
|
||||
An object e of type text_encoding such thate.mib() == text_encoding::id::unknown is false ande.mib() == text_encoding::id::other is false maintains the following invariants:
|
||||
|
||||
- [(6.1)](#general-6.1)
|
||||
|
||||
*e.name() == '\0' is false, and
|
||||
|
||||
- [(6.2)](#general-6.2)
|
||||
|
||||
e.mib() == text_encoding(e.name()).mib() is true[.](#general-6.sentence-1)
|
||||
|
||||
[7](#general-7)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5112)
|
||||
|
||||
*Recommended practice*:
|
||||
|
||||
- [(7.1)](#general-7.1)
|
||||
|
||||
Implementations should not consider registered encodings to be interchangeable[.](#general-7.1.sentence-1)
|
||||
[*Example [1](#general-example-1)*:
|
||||
Shift_JIS and Windows-31J denote different encodings[.](#general-7.1.sentence-2)
|
||||
â *end example*]
|
||||
|
||||
- [(7.2)](#general-7.2)
|
||||
|
||||
Implementations should not use the name of a registered encoding
|
||||
to describe another similar yet different non-registered encoding
|
||||
unless there is a precedent on that implementation[.](#general-7.2.sentence-1)
|
||||
[*Example [2](#general-example-2)*:
|
||||
Big5
|
||||
â *end example*]
|
||||
|
||||
#### [28.4.2.3](#members) Members [[text.encoding.members]](text.encoding.members)
|
||||
|
||||
[ð](#lib:text_encoding,constructor)
|
||||
|
||||
`constexpr explicit text_encoding(string_view enc) noexcept;
|
||||
`
|
||||
|
||||
[1](#members-1)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5137)
|
||||
|
||||
*Preconditions*:
|
||||
|
||||
- [(1.1)](#members-1.1)
|
||||
|
||||
enc represents a string in the ordinary literal encoding
|
||||
consisting only of elements of the basic character set ([[lex.charset]](lex.charset "5.3.1 Character sets"))[.](#members-1.1.sentence-1)
|
||||
|
||||
- [(1.2)](#members-1.2)
|
||||
|
||||
enc.size() <= max_name_length is true[.](#members-1.2.sentence-1)
|
||||
|
||||
- [(1.3)](#members-1.3)
|
||||
|
||||
enc.contains('\0') is false[.](#members-1.3.sentence-1)
|
||||
|
||||
[2](#members-2)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5149)
|
||||
|
||||
*Postconditions*:
|
||||
|
||||
- [(2.1)](#members-2.1)
|
||||
|
||||
If there exists a primary name or alias a of a known registered character encoding such that*comp-name*(a, enc) is true,*mib_* has the value of the enumerator of id associated with that registered character encoding[.](#members-2.1.sentence-1)
|
||||
Otherwise, *mib_* == id::other is true[.](#members-2.1.sentence-2)
|
||||
|
||||
- [(2.2)](#members-2.2)
|
||||
|
||||
enc.compare(*name_*) == 0 is true[.](#members-2.2.sentence-1)
|
||||
|
||||
[ð](#lib:text_encoding,constructor_)
|
||||
|
||||
`constexpr text_encoding(id i) noexcept;
|
||||
`
|
||||
|
||||
[3](#members-3)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5170)
|
||||
|
||||
*Preconditions*: i has the value of one of the enumerators of id[.](#members-3.sentence-1)
|
||||
|
||||
[4](#members-4)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5174)
|
||||
|
||||
*Postconditions*:
|
||||
|
||||
- [(4.1)](#members-4.1)
|
||||
|
||||
*mib_* == i is true[.](#members-4.1.sentence-1)
|
||||
|
||||
- [(4.2)](#members-4.2)
|
||||
|
||||
If (*mib_* == id::unknown || *mib_* == id::other) is true,strlen(*name_*) == 0 is true[.](#members-4.2.sentence-1)
|
||||
Otherwise,ranges::contains(aliases(), string_view(*name_*)) is true[.](#members-4.2.sentence-2)
|
||||
|
||||
[ð](#lib:mib,text_encoding)
|
||||
|
||||
`constexpr id mib() const noexcept;
|
||||
`
|
||||
|
||||
[5](#members-5)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5195)
|
||||
|
||||
*Returns*: *mib_*[.](#members-5.sentence-1)
|
||||
|
||||
[ð](#lib:name,text_encoding)
|
||||
|
||||
`constexpr const char* name() const noexcept;
|
||||
`
|
||||
|
||||
[6](#members-6)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5206)
|
||||
|
||||
*Returns*: *name_*[.](#members-6.sentence-1)
|
||||
|
||||
[7](#members-7)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5210)
|
||||
|
||||
*Remarks*: name() is an ntbs and
|
||||
accessing elements of *name_* outside of the range name()+[0, strlen(name()) + 1) is undefined behavior[.](#members-7.sentence-1)
|
||||
|
||||
[ð](#lib:aliases,text_encoding)
|
||||
|
||||
`constexpr aliases_view aliases() const noexcept;
|
||||
`
|
||||
|
||||
Let r denote an instance of aliases_view[.](#members-sentence-1)
|
||||
|
||||
If *this represents a known registered character encoding, then:
|
||||
|
||||
- r.front() is the primary name of the registered character encoding,
|
||||
- r contains the aliases of the registered character encoding, and
|
||||
- r does not contain duplicate values when compared with strcmp[.](#members-sentence-2)
|
||||
|
||||
Otherwise, r is an empty range[.](#members-sentence-3)
|
||||
|
||||
[8](#members-8)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5236)
|
||||
|
||||
Each element in r is a non-null, non-empty ntbs encoded in the literal character encoding and
|
||||
comprising only characters from the basic character set[.](#members-8.sentence-1)
|
||||
|
||||
[9](#members-9)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5241)
|
||||
|
||||
*Returns*: r[.](#members-9.sentence-1)
|
||||
|
||||
[10](#members-10)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5245)
|
||||
|
||||
[*Note [1](#members-note-1)*:
|
||||
|
||||
The order of aliases in r is unspecified[.](#members-10.sentence-1)
|
||||
|
||||
â *end note*]
|
||||
|
||||
[ð](#lib:literal,text_encoding)
|
||||
|
||||
`static consteval text_encoding literal() noexcept;
|
||||
`
|
||||
|
||||
[11](#members-11)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5257)
|
||||
|
||||
*Mandates*: CHAR_BIT == 8 is true[.](#members-11.sentence-1)
|
||||
|
||||
[12](#members-12)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5261)
|
||||
|
||||
*Returns*: A text_encoding object representing
|
||||
the ordinary character literal encoding ([[lex.charset]](lex.charset "5.3.1 Character sets"))[.](#members-12.sentence-1)
|
||||
|
||||
[ð](#lib:environment,text_encoding)
|
||||
|
||||
`static text_encoding environment();
|
||||
`
|
||||
|
||||
[13](#members-13)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5273)
|
||||
|
||||
*Mandates*: CHAR_BIT == 8 is true[.](#members-13.sentence-1)
|
||||
|
||||
[14](#members-14)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5277)
|
||||
|
||||
*Returns*: A text_encoding object representing
|
||||
the implementation-defined
|
||||
character encoding scheme of the environment[.](#members-14.sentence-1)
|
||||
|
||||
On a POSIX implementation, this is the encoding scheme associated with
|
||||
the POSIX locale denoted by the empty string ""[.](#members-14.sentence-2)
|
||||
|
||||
[15](#members-15)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5285)
|
||||
|
||||
[*Note [2](#members-note-2)*:
|
||||
|
||||
This function is not affected by calls to setlocale[.](#members-15.sentence-1)
|
||||
|
||||
â *end note*]
|
||||
|
||||
[16](#members-16)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5290)
|
||||
|
||||
*Recommended practice*: Implementations should return a value that is not affected by calls to
|
||||
the POSIX function setenv and
|
||||
other functions which can modify the environment ([[support.runtime]](support.runtime "17.14 Other runtime support"))[.](#members-16.sentence-1)
|
||||
|
||||
[ð](#lib:environment_is,text_encoding)
|
||||
|
||||
`template<id i>
|
||||
static bool environment_is();
|
||||
`
|
||||
|
||||
[17](#members-17)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5304)
|
||||
|
||||
*Mandates*: CHAR_BIT == 8 is true[.](#members-17.sentence-1)
|
||||
|
||||
[18](#members-18)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5308)
|
||||
|
||||
*Returns*: environment() == i[.](#members-18.sentence-1)
|
||||
|
||||
[ð](#lib:comp-name,text_encoding)
|
||||
|
||||
`static constexpr bool comp-name(string_view a, string_view b);
|
||||
`
|
||||
|
||||
[19](#members-19)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5319)
|
||||
|
||||
*Returns*: true if the two strings a and b encoded in the ordinary literal encoding
|
||||
are equal, ignoring, from left-to-right,
|
||||
|
||||
- [(19.1)](#members-19.1)
|
||||
|
||||
all elements that are not digits or letters ([[character.seq.general]](character.seq.general "16.3.3.3.4.1 General")),
|
||||
|
||||
- [(19.2)](#members-19.2)
|
||||
|
||||
character case, and
|
||||
|
||||
- [(19.3)](#members-19.3)
|
||||
|
||||
any sequence of one or more 0 characters
|
||||
not immediately preceded by a numeric prefix, where
|
||||
a numeric prefix is a sequence consisting of
|
||||
a digit in the range [1, 9]
|
||||
optionally followed by one or more elements which are not digits or letters,
|
||||
|
||||
and false otherwise[.](#members-19.sentence-1)
|
||||
|
||||
[*Note [3](#members-note-3)*:
|
||||
|
||||
This comparison is identical to
|
||||
the âCharset Alias Matchingâ algorithm
|
||||
described in the Unicode Technical Standard 22[[bib]](bibliography#bib:unicode-charmap "Bibliography")[.](#members-19.sentence-2)
|
||||
|
||||
â *end note*]
|
||||
|
||||
[*Example [1](#members-example-1)*: static_assert(*comp-name*("UTF-8", "utf8") == true);static_assert(*comp-name*("u.t.f-008", "utf8") == true);static_assert(*comp-name*("ut8", "utf8") == false);static_assert(*comp-name*("utf-80", "utf8") == false); â *end example*]
|
||||
|
||||
#### [28.4.2.4](#cmp) Comparison functions [[text.encoding.cmp]](text.encoding.cmp)
|
||||
|
||||
[ð](#lib:operator==,text_encoding)
|
||||
|
||||
`friend constexpr bool operator==(const text_encoding& a, const text_encoding& b) noexcept;
|
||||
`
|
||||
|
||||
[1](#cmp-1)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5362)
|
||||
|
||||
*Returns*: If a.*mib_* == id::other && b.*mib_* == id::other is true,
|
||||
then *comp-name*(a.*name_*,
|
||||
b.*name_*)[.](#cmp-1.sentence-1)
|
||||
|
||||
Otherwise, a.*mib_* == b.*mib_*[.](#cmp-1.sentence-2)
|
||||
|
||||
[ð](#lib:operator==,text_encoding_)
|
||||
|
||||
`friend constexpr bool operator==(const text_encoding& encoding, id i) noexcept;
|
||||
`
|
||||
|
||||
[2](#cmp-2)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5376)
|
||||
|
||||
*Returns*: encoding.*mib_* == i[.](#cmp-2.sentence-1)
|
||||
|
||||
[3](#cmp-3)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5380)
|
||||
|
||||
*Remarks*: This operator induces an equivalence relation on its arguments
|
||||
if and only if i != id::other is true[.](#cmp-3.sentence-1)
|
||||
|
||||
#### [28.4.2.5](#aliases) Class text_encoding::aliases_view [[text.encoding.aliases]](text.encoding.aliases)
|
||||
|
||||
[ð](#lib:aliases_view,text_encoding)
|
||||
|
||||
`struct text_encoding::aliases_view : ranges::view_interface<text_encoding::aliases_view> {
|
||||
constexpr implementation-defined begin() const;
|
||||
constexpr implementation-defined end() const;
|
||||
};
|
||||
`
|
||||
|
||||
[1](#aliases-1)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5399)
|
||||
|
||||
text_encoding::aliases_view models[copyable](concepts.object#concept:copyable "18.6 Object concepts [concepts.object]"),ranges::[view](range.view#concept:view "25.4.5 Views [range.view]"),ranges::[random_access_range](range.refinements#concept:random_access_range "25.4.6 Other range refinements [range.refinements]"), andranges::[borrowed_range](range.range#concept:borrowed_range "25.4.2 Ranges [range.range]")[.](#aliases-1.sentence-1)
|
||||
|
||||
[*Note [1](#aliases-note-1)*:
|
||||
|
||||
text_encoding::aliases_view is not required to satisfyranges::[common_range](range.refinements#concept:common_range "25.4.6 Other range refinements [range.refinements]"),
|
||||
nor [default_initializable](concept.default.init#concept:default_initializable "18.4.12 Concept default_initializable [concept.default.init]")[.](#aliases-1.sentence-2)
|
||||
|
||||
â *end note*]
|
||||
|
||||
[2](#aliases-2)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5411)
|
||||
|
||||
Bothranges::range_value_t<text_encoding::aliases_view> andranges::range_reference_t<text_encoding::aliases_view> denote const char*[.](#aliases-2.sentence-1)
|
||||
|
||||
[3](#aliases-3)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5417)
|
||||
|
||||
ranges::iterator_t<text_encoding::aliases_view> is a constexpr iterator ([[iterator.requirements.general]](iterator.requirements.general "24.3.1 General"))[.](#aliases-3.sentence-1)
|
||||
|
||||
#### [28.4.2.6](#id) Enumeration text_encoding::id [[text.encoding.id]](text.encoding.id)
|
||||
|
||||
[ð](#lib:id,text_encoding)
|
||||
|
||||
namespace std {enum class text_encoding::id : int_least32_t { other = 1,
|
||||
unknown = 2,
|
||||
ASCII = 3,
|
||||
ISOLatin1 = 4,
|
||||
ISOLatin2 = 5,
|
||||
ISOLatin3 = 6,
|
||||
ISOLatin4 = 7,
|
||||
ISOLatinCyrillic = 8,
|
||||
ISOLatinArabic = 9,
|
||||
ISOLatinGreek = 10,
|
||||
ISOLatinHebrew = 11,
|
||||
ISOLatin5 = 12,
|
||||
ISOLatin6 = 13,
|
||||
ISOTextComm = 14,
|
||||
HalfWidthKatakana = 15,
|
||||
JISEncoding = 16,
|
||||
ShiftJIS = 17,
|
||||
EUCPkdFmtJapanese = 18,
|
||||
EUCFixWidJapanese = 19,
|
||||
ISO4UnitedKingdom = 20,
|
||||
ISO11SwedishForNames = 21,
|
||||
ISO15Italian = 22,
|
||||
ISO17Spanish = 23,
|
||||
ISO21German = 24,
|
||||
ISO60DanishNorwegian = 25,
|
||||
ISO69French = 26,
|
||||
ISO10646UTF1 = 27,
|
||||
ISO646basic1983 = 28,
|
||||
INVARIANT = 29,
|
||||
ISO2IntlRefVersion = 30,
|
||||
NATSSEFI = 31,
|
||||
NATSSEFIADD = 32,
|
||||
ISO10Swedish = 35,
|
||||
KSC56011987 = 36,
|
||||
ISO2022KR = 37,
|
||||
EUCKR = 38,
|
||||
ISO2022JP = 39,
|
||||
ISO2022JP2 = 40,
|
||||
ISO13JISC6220jp = 41,
|
||||
ISO14JISC6220ro = 42,
|
||||
ISO16Portuguese = 43,
|
||||
ISO18Greek7Old = 44,
|
||||
ISO19LatinGreek = 45,
|
||||
ISO25French = 46,
|
||||
ISO27LatinGreek1 = 47,
|
||||
ISO5427Cyrillic = 48,
|
||||
ISO42JISC62261978 = 49,
|
||||
ISO47BSViewdata = 50,
|
||||
ISO49INIS = 51,
|
||||
ISO50INIS8 = 52,
|
||||
ISO51INISCyrillic = 53,
|
||||
ISO54271981 = 54,
|
||||
ISO5428Greek = 55,
|
||||
ISO57GB1988 = 56,
|
||||
ISO58GB231280 = 57,
|
||||
ISO61Norwegian2 = 58,
|
||||
ISO70VideotexSupp1 = 59,
|
||||
ISO84Portuguese2 = 60,
|
||||
ISO85Spanish2 = 61,
|
||||
ISO86Hungarian = 62,
|
||||
ISO87JISX0208 = 63,
|
||||
ISO88Greek7 = 64,
|
||||
ISO89ASMO449 = 65,
|
||||
ISO90 = 66,
|
||||
ISO91JISC62291984a = 67,
|
||||
ISO92JISC62991984b = 68,
|
||||
ISO93JIS62291984badd = 69,
|
||||
ISO94JIS62291984hand = 70,
|
||||
ISO95JIS62291984handadd = 71,
|
||||
ISO96JISC62291984kana = 72,
|
||||
ISO2033 = 73,
|
||||
ISO99NAPLPS = 74,
|
||||
ISO102T617bit = 75,
|
||||
ISO103T618bit = 76,
|
||||
ISO111ECMACyrillic = 77,
|
||||
ISO121Canadian1 = 78,
|
||||
ISO122Canadian2 = 79,
|
||||
ISO123CSAZ24341985gr = 80,
|
||||
ISO88596E = 81,
|
||||
ISO88596I = 82,
|
||||
ISO128T101G2 = 83,
|
||||
ISO88598E = 84,
|
||||
ISO88598I = 85,
|
||||
ISO139CSN369103 = 86,
|
||||
ISO141JUSIB1002 = 87,
|
||||
ISO143IECP271 = 88,
|
||||
ISO146Serbian = 89,
|
||||
ISO147Macedonian = 90,
|
||||
ISO150 = 91,
|
||||
ISO151Cuba = 92,
|
||||
ISO6937Add = 93,
|
||||
ISO153GOST1976874 = 94,
|
||||
ISO8859Supp = 95,
|
||||
ISO10367Box = 96,
|
||||
ISO158Lap = 97,
|
||||
ISO159JISX02121990 = 98,
|
||||
ISO646Danish = 99,
|
||||
USDK = 100,
|
||||
DKUS = 101,
|
||||
KSC5636 = 102,
|
||||
Unicode11UTF7 = 103,
|
||||
ISO2022CN = 104,
|
||||
ISO2022CNEXT = 105,
|
||||
UTF8 = 106,
|
||||
ISO885913 = 109,
|
||||
ISO885914 = 110,
|
||||
ISO885915 = 111,
|
||||
ISO885916 = 112,
|
||||
GBK = 113,
|
||||
GB18030 = 114,
|
||||
OSDEBCDICDF0415 = 115,
|
||||
OSDEBCDICDF03IRV = 116,
|
||||
OSDEBCDICDF041 = 117,
|
||||
ISO115481 = 118,
|
||||
KZ1048 = 119,
|
||||
UCS2 = 1000,
|
||||
UCS4 = 1001,
|
||||
UnicodeASCII = 1002,
|
||||
UnicodeLatin1 = 1003,
|
||||
UnicodeJapanese = 1004,
|
||||
UnicodeIBM1261 = 1005,
|
||||
UnicodeIBM1268 = 1006,
|
||||
UnicodeIBM1276 = 1007,
|
||||
UnicodeIBM1264 = 1008,
|
||||
UnicodeIBM1265 = 1009,
|
||||
Unicode11 = 1010,
|
||||
SCSU = 1011,
|
||||
UTF7 = 1012,
|
||||
UTF16BE = 1013,
|
||||
UTF16LE = 1014,
|
||||
UTF16 = 1015,
|
||||
CESU8 = 1016,
|
||||
UTF32 = 1017,
|
||||
UTF32BE = 1018,
|
||||
UTF32LE = 1019,
|
||||
BOCU1 = 1020,
|
||||
UTF7IMAP = 1021,
|
||||
Windows30Latin1 = 2000,
|
||||
Windows31Latin1 = 2001,
|
||||
Windows31Latin2 = 2002,
|
||||
Windows31Latin5 = 2003,
|
||||
HPRoman8 = 2004,
|
||||
AdobeStandardEncoding = 2005,
|
||||
VenturaUS = 2006,
|
||||
VenturaInternational = 2007,
|
||||
DECMCS = 2008,
|
||||
PC850Multilingual = 2009,
|
||||
PCp852 = 2010,
|
||||
PC8CodePage437 = 2011,
|
||||
PC8DanishNorwegian = 2012,
|
||||
PC862LatinHebrew = 2013,
|
||||
PC8Turkish = 2014,
|
||||
IBMSymbols = 2015,
|
||||
IBMThai = 2016,
|
||||
HPLegal = 2017,
|
||||
HPPiFont = 2018,
|
||||
HPMath8 = 2019,
|
||||
HPPSMath = 2020,
|
||||
HPDesktop = 2021,
|
||||
VenturaMath = 2022,
|
||||
MicrosoftPublishing = 2023,
|
||||
Windows31J = 2024,
|
||||
GB2312 = 2025,
|
||||
Big5 = 2026,
|
||||
Macintosh = 2027,
|
||||
IBM037 = 2028,
|
||||
IBM038 = 2029,
|
||||
IBM273 = 2030,
|
||||
IBM274 = 2031,
|
||||
IBM275 = 2032,
|
||||
IBM277 = 2033,
|
||||
IBM278 = 2034,
|
||||
IBM280 = 2035,
|
||||
IBM281 = 2036,
|
||||
IBM284 = 2037,
|
||||
IBM285 = 2038,
|
||||
IBM290 = 2039,
|
||||
IBM297 = 2040,
|
||||
IBM420 = 2041,
|
||||
IBM423 = 2042,
|
||||
IBM424 = 2043,
|
||||
IBM500 = 2044,
|
||||
IBM851 = 2045,
|
||||
IBM855 = 2046,
|
||||
IBM857 = 2047,
|
||||
IBM860 = 2048,
|
||||
IBM861 = 2049,
|
||||
IBM863 = 2050,
|
||||
IBM864 = 2051,
|
||||
IBM865 = 2052,
|
||||
IBM868 = 2053,
|
||||
IBM869 = 2054,
|
||||
IBM870 = 2055,
|
||||
IBM871 = 2056,
|
||||
IBM880 = 2057,
|
||||
IBM891 = 2058,
|
||||
IBM903 = 2059,
|
||||
IBM904 = 2060,
|
||||
IBM905 = 2061,
|
||||
IBM918 = 2062,
|
||||
IBM1026 = 2063,
|
||||
IBMEBCDICATDE = 2064,
|
||||
EBCDICATDEA = 2065,
|
||||
EBCDICCAFR = 2066,
|
||||
EBCDICDKNO = 2067,
|
||||
EBCDICDKNOA = 2068,
|
||||
EBCDICFISE = 2069,
|
||||
EBCDICFISEA = 2070,
|
||||
EBCDICFR = 2071,
|
||||
EBCDICIT = 2072,
|
||||
EBCDICPT = 2073,
|
||||
EBCDICES = 2074,
|
||||
EBCDICESA = 2075,
|
||||
EBCDICESS = 2076,
|
||||
EBCDICUK = 2077,
|
||||
EBCDICUS = 2078,
|
||||
Unknown8BiT = 2079,
|
||||
Mnemonic = 2080,
|
||||
Mnem = 2081,
|
||||
VISCII = 2082,
|
||||
VIQR = 2083,
|
||||
KOI8R = 2084,
|
||||
HZGB2312 = 2085,
|
||||
IBM866 = 2086,
|
||||
PC775Baltic = 2087,
|
||||
KOI8U = 2088,
|
||||
IBM00858 = 2089,
|
||||
IBM00924 = 2090,
|
||||
IBM01140 = 2091,
|
||||
IBM01141 = 2092,
|
||||
IBM01142 = 2093,
|
||||
IBM01143 = 2094,
|
||||
IBM01144 = 2095,
|
||||
IBM01145 = 2096,
|
||||
IBM01146 = 2097,
|
||||
IBM01147 = 2098,
|
||||
IBM01148 = 2099,
|
||||
IBM01149 = 2100,
|
||||
Big5HKSCS = 2101,
|
||||
IBM1047 = 2102,
|
||||
PTCP154 = 2103,
|
||||
Amiga1251 = 2104,
|
||||
KOI7switched = 2105,
|
||||
BRF = 2106,
|
||||
TSCII = 2107,
|
||||
CP51932 = 2108,
|
||||
windows874 = 2109,
|
||||
windows1250 = 2250,
|
||||
windows1251 = 2251,
|
||||
windows1252 = 2252,
|
||||
windows1253 = 2253,
|
||||
windows1254 = 2254,
|
||||
windows1255 = 2255,
|
||||
windows1256 = 2256,
|
||||
windows1257 = 2257,
|
||||
windows1258 = 2258,
|
||||
TIS620 = 2259,
|
||||
CP50220 = 2260};}
|
||||
|
||||
[*Note [1](#id-note-1)*:
|
||||
|
||||
The text_encoding::id enumeration
|
||||
contains an enumerator for each known registered character encoding[.](#id-sentence-1)
|
||||
|
||||
For each encoding, the corresponding enumerator is derived from
|
||||
the alias beginning with âcsâ, as follows
|
||||
|
||||
- csUnicode is mapped to text_encoding::id::UCS2,
|
||||
- csIBBM904 is mapped to text_encoding::id::IBM904, and
|
||||
- the âcsâ prefix is removed from other names[.](#id-sentence-2)
|
||||
|
||||
â *end note*]
|
||||
|
||||
#### [28.4.2.7](#hash) Hash support [[text.encoding.hash]](text.encoding.hash)
|
||||
|
||||
[ð](#lib:hash,text_encoding)
|
||||
|
||||
`template<> struct hash<text_encoding>;
|
||||
`
|
||||
|
||||
[1](#hash-1)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5713)
|
||||
|
||||
The specialization is enabled ([[unord.hash]](unord.hash "22.10.19 Class template hash"))[.](#hash-1.sentence-1)
|
||||
42
cppdraft/text/encoding/aliases.md
Normal file
42
cppdraft/text/encoding/aliases.md
Normal file
@@ -0,0 +1,42 @@
|
||||
[text.encoding.aliases]
|
||||
|
||||
# 28 Text processing library [[text]](./#text)
|
||||
|
||||
## 28.4 Text encodings identification [[text.encoding]](text.encoding#aliases)
|
||||
|
||||
### 28.4.2 Class text_encoding [[text.encoding.class]](text.encoding.class#text.encoding.aliases)
|
||||
|
||||
#### 28.4.2.5 Class text_encoding::aliases_view [text.encoding.aliases]
|
||||
|
||||
[ð](#lib:aliases_view,text_encoding)
|
||||
|
||||
`struct text_encoding::aliases_view : ranges::view_interface<text_encoding::aliases_view> {
|
||||
constexpr implementation-defined begin() const;
|
||||
constexpr implementation-defined end() const;
|
||||
};
|
||||
`
|
||||
|
||||
[1](#1)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5399)
|
||||
|
||||
text_encoding::aliases_view models[copyable](concepts.object#concept:copyable "18.6 Object concepts [concepts.object]"),ranges::[view](range.view#concept:view "25.4.5 Views [range.view]"),ranges::[random_access_range](range.refinements#concept:random_access_range "25.4.6 Other range refinements [range.refinements]"), andranges::[borrowed_range](range.range#concept:borrowed_range "25.4.2 Ranges [range.range]")[.](#1.sentence-1)
|
||||
|
||||
[*Note [1](#note-1)*:
|
||||
|
||||
text_encoding::aliases_view is not required to satisfyranges::[common_range](range.refinements#concept:common_range "25.4.6 Other range refinements [range.refinements]"),
|
||||
nor [default_initializable](concept.default.init#concept:default_initializable "18.4.12 Concept default_initializable [concept.default.init]")[.](#1.sentence-2)
|
||||
|
||||
â *end note*]
|
||||
|
||||
[2](#2)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5411)
|
||||
|
||||
Bothranges::range_value_t<text_encoding::aliases_view> andranges::range_reference_t<text_encoding::aliases_view> denote const char*[.](#2.sentence-1)
|
||||
|
||||
[3](#3)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5417)
|
||||
|
||||
ranges::iterator_t<text_encoding::aliases_view> is a constexpr iterator ([[iterator.requirements.general]](iterator.requirements.general "24.3.1 General"))[.](#3.sentence-1)
|
||||
736
cppdraft/text/encoding/class.md
Normal file
736
cppdraft/text/encoding/class.md
Normal file
@@ -0,0 +1,736 @@
|
||||
[text.encoding.class]
|
||||
|
||||
# 28 Text processing library [[text]](./#text)
|
||||
|
||||
## 28.4 Text encodings identification [[text.encoding]](text.encoding#class)
|
||||
|
||||
### 28.4.2 Class text_encoding [text.encoding.class]
|
||||
|
||||
#### [28.4.2.1](#text.encoding.overview) Overview [[text.encoding.overview]](text.encoding.overview)
|
||||
|
||||
[1](#text.encoding.overview-1)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5014)
|
||||
|
||||
The class text_encoding describes an interface
|
||||
for accessing the IANA Character Sets registry[[bib]](bibliography#bib:iana-charset "Bibliography")[.](#text.encoding.overview-1.sentence-1)
|
||||
|
||||
[ð](#lib:text_encoding)
|
||||
|
||||
namespace std {struct text_encoding {static constexpr size_t max_name_length = 63; // [[text.encoding.id]](#text.encoding.id "28.4.2.6 Enumeration text_encoding::id"), enumeration text_encoding::idenum class id : int_least32_t {*see below*}; using enum id; constexpr text_encoding() = default; constexpr explicit text_encoding(string_view enc) noexcept; constexpr text_encoding(id i) noexcept; constexpr id mib() const noexcept; constexpr const char* name() const noexcept; // [[text.encoding.aliases]](#text.encoding.aliases "28.4.2.5 Class text_encoding::aliases_view"), class text_encoding::aliases_viewstruct aliases_view; constexpr aliases_view aliases() const noexcept; friend constexpr bool operator==(const text_encoding& a, const text_encoding& b) noexcept; friend constexpr bool operator==(const text_encoding& encoding, id i) noexcept; static consteval text_encoding literal() noexcept; static text_encoding environment(); template<id i> static bool environment_is(); private: id *mib_* = id::unknown; // *exposition only*char *name_*[max_name_length + 1] = {0}; // *exposition only*static constexpr bool *comp-name*(string_view a, string_view b); // *exposition only*};}
|
||||
|
||||
[2](#text.encoding.overview-2)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5057)
|
||||
|
||||
Class text_encoding is
|
||||
a trivially copyable type ([[basic.types.general]](basic.types.general#term.trivially.copyable.type "6.9.1 General"))[.](#text.encoding.overview-2.sentence-1)
|
||||
|
||||
#### [28.4.2.2](#text.encoding.general) General [[text.encoding.general]](text.encoding.general)
|
||||
|
||||
[1](#text.encoding.general-1)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5063)
|
||||
|
||||
A [*registered character encoding*](#def:encoding,registered_character "28.4.2.2 General [text.encoding.general]") is
|
||||
a character encoding scheme in the IANA Character Sets registry[.](#text.encoding.general-1.sentence-1)
|
||||
|
||||
[*Note [1](#text.encoding.general-note-1)*:
|
||||
|
||||
The IANA Character Sets registry uses the term âcharacter setsâ
|
||||
to refer to character encodings[.](#text.encoding.general-1.sentence-2)
|
||||
|
||||
â *end note*]
|
||||
|
||||
The primary name of a registered character encoding is
|
||||
the name of that encoding specified in the IANA Character Sets registry[.](#text.encoding.general-1.sentence-3)
|
||||
|
||||
[2](#text.encoding.general-2)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5073)
|
||||
|
||||
The set of known registered character encodings contains
|
||||
every registered character encoding
|
||||
specified in the IANA Character Sets registry except for the following:
|
||||
|
||||
- [(2.1)](#text.encoding.general-2.1)
|
||||
|
||||
NATS-DANO (33)
|
||||
|
||||
- [(2.2)](#text.encoding.general-2.2)
|
||||
|
||||
NATS-DANO-ADD (34)
|
||||
|
||||
[3](#text.encoding.general-3)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5082)
|
||||
|
||||
Each known registered character encoding
|
||||
is identified by an enumerator in text_encoding::id, and
|
||||
has a set of zero or more [*aliases*](#def:encoding,registered_character,alias "28.4.2.2 General [text.encoding.general]")[.](#text.encoding.general-3.sentence-1)
|
||||
|
||||
[4](#text.encoding.general-4)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5087)
|
||||
|
||||
The set of aliases of a known registered character encoding is animplementation-defined
|
||||
superset of the aliases specified in the IANA Character Sets registry[.](#text.encoding.general-4.sentence-1)
|
||||
|
||||
The set of aliases for US-ASCII includes âASCIIâ[.](#text.encoding.general-4.sentence-2)
|
||||
|
||||
No two aliases or primary names of distinct registered character encodings
|
||||
are equivalent when compared by text_encoding::*comp-name*[.](#text.encoding.general-4.sentence-3)
|
||||
|
||||
[5](#text.encoding.general-5)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5095)
|
||||
|
||||
How a text_encoding object
|
||||
is determined to be representative of a character encoding scheme
|
||||
implemented in the translation or execution environment isimplementation-defined[.](#text.encoding.general-5.sentence-1)
|
||||
|
||||
[6](#text.encoding.general-6)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5102)
|
||||
|
||||
An object e of type text_encoding such thate.mib() == text_encoding::id::unknown is false ande.mib() == text_encoding::id::other is false maintains the following invariants:
|
||||
|
||||
- [(6.1)](#text.encoding.general-6.1)
|
||||
|
||||
*e.name() == '\0' is false, and
|
||||
|
||||
- [(6.2)](#text.encoding.general-6.2)
|
||||
|
||||
e.mib() == text_encoding(e.name()).mib() is true[.](#text.encoding.general-6.sentence-1)
|
||||
|
||||
[7](#text.encoding.general-7)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5112)
|
||||
|
||||
*Recommended practice*:
|
||||
|
||||
- [(7.1)](#text.encoding.general-7.1)
|
||||
|
||||
Implementations should not consider registered encodings to be interchangeable[.](#text.encoding.general-7.1.sentence-1)
|
||||
[*Example [1](#text.encoding.general-example-1)*:
|
||||
Shift_JIS and Windows-31J denote different encodings[.](#text.encoding.general-7.1.sentence-2)
|
||||
â *end example*]
|
||||
|
||||
- [(7.2)](#text.encoding.general-7.2)
|
||||
|
||||
Implementations should not use the name of a registered encoding
|
||||
to describe another similar yet different non-registered encoding
|
||||
unless there is a precedent on that implementation[.](#text.encoding.general-7.2.sentence-1)
|
||||
[*Example [2](#text.encoding.general-example-2)*:
|
||||
Big5
|
||||
â *end example*]
|
||||
|
||||
#### [28.4.2.3](#text.encoding.members) Members [[text.encoding.members]](text.encoding.members)
|
||||
|
||||
[ð](#lib:text_encoding,constructor)
|
||||
|
||||
`constexpr explicit text_encoding(string_view enc) noexcept;
|
||||
`
|
||||
|
||||
[1](#text.encoding.members-1)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5137)
|
||||
|
||||
*Preconditions*:
|
||||
|
||||
- [(1.1)](#text.encoding.members-1.1)
|
||||
|
||||
enc represents a string in the ordinary literal encoding
|
||||
consisting only of elements of the basic character set ([[lex.charset]](lex.charset "5.3.1 Character sets"))[.](#text.encoding.members-1.1.sentence-1)
|
||||
|
||||
- [(1.2)](#text.encoding.members-1.2)
|
||||
|
||||
enc.size() <= max_name_length is true[.](#text.encoding.members-1.2.sentence-1)
|
||||
|
||||
- [(1.3)](#text.encoding.members-1.3)
|
||||
|
||||
enc.contains('\0') is false[.](#text.encoding.members-1.3.sentence-1)
|
||||
|
||||
[2](#text.encoding.members-2)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5149)
|
||||
|
||||
*Postconditions*:
|
||||
|
||||
- [(2.1)](#text.encoding.members-2.1)
|
||||
|
||||
If there exists a primary name or alias a of a known registered character encoding such that*comp-name*(a, enc) is true,*mib_* has the value of the enumerator of id associated with that registered character encoding[.](#text.encoding.members-2.1.sentence-1)
|
||||
Otherwise, *mib_* == id::other is true[.](#text.encoding.members-2.1.sentence-2)
|
||||
|
||||
- [(2.2)](#text.encoding.members-2.2)
|
||||
|
||||
enc.compare(*name_*) == 0 is true[.](#text.encoding.members-2.2.sentence-1)
|
||||
|
||||
[ð](#lib:text_encoding,constructor_)
|
||||
|
||||
`constexpr text_encoding(id i) noexcept;
|
||||
`
|
||||
|
||||
[3](#text.encoding.members-3)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5170)
|
||||
|
||||
*Preconditions*: i has the value of one of the enumerators of id[.](#text.encoding.members-3.sentence-1)
|
||||
|
||||
[4](#text.encoding.members-4)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5174)
|
||||
|
||||
*Postconditions*:
|
||||
|
||||
- [(4.1)](#text.encoding.members-4.1)
|
||||
|
||||
*mib_* == i is true[.](#text.encoding.members-4.1.sentence-1)
|
||||
|
||||
- [(4.2)](#text.encoding.members-4.2)
|
||||
|
||||
If (*mib_* == id::unknown || *mib_* == id::other) is true,strlen(*name_*) == 0 is true[.](#text.encoding.members-4.2.sentence-1)
|
||||
Otherwise,ranges::contains(aliases(), string_view(*name_*)) is true[.](#text.encoding.members-4.2.sentence-2)
|
||||
|
||||
[ð](#lib:mib,text_encoding)
|
||||
|
||||
`constexpr id mib() const noexcept;
|
||||
`
|
||||
|
||||
[5](#text.encoding.members-5)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5195)
|
||||
|
||||
*Returns*: *mib_*[.](#text.encoding.members-5.sentence-1)
|
||||
|
||||
[ð](#lib:name,text_encoding)
|
||||
|
||||
`constexpr const char* name() const noexcept;
|
||||
`
|
||||
|
||||
[6](#text.encoding.members-6)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5206)
|
||||
|
||||
*Returns*: *name_*[.](#text.encoding.members-6.sentence-1)
|
||||
|
||||
[7](#text.encoding.members-7)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5210)
|
||||
|
||||
*Remarks*: name() is an ntbs and
|
||||
accessing elements of *name_* outside of the range name()+[0, strlen(name()) + 1) is undefined behavior[.](#text.encoding.members-7.sentence-1)
|
||||
|
||||
[ð](#lib:aliases,text_encoding)
|
||||
|
||||
`constexpr aliases_view aliases() const noexcept;
|
||||
`
|
||||
|
||||
Let r denote an instance of aliases_view[.](#text.encoding.members-sentence-1)
|
||||
|
||||
If *this represents a known registered character encoding, then:
|
||||
|
||||
- r.front() is the primary name of the registered character encoding,
|
||||
- r contains the aliases of the registered character encoding, and
|
||||
- r does not contain duplicate values when compared with strcmp[.](#text.encoding.members-sentence-2)
|
||||
|
||||
Otherwise, r is an empty range[.](#text.encoding.members-sentence-3)
|
||||
|
||||
[8](#text.encoding.members-8)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5236)
|
||||
|
||||
Each element in r is a non-null, non-empty ntbs encoded in the literal character encoding and
|
||||
comprising only characters from the basic character set[.](#text.encoding.members-8.sentence-1)
|
||||
|
||||
[9](#text.encoding.members-9)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5241)
|
||||
|
||||
*Returns*: r[.](#text.encoding.members-9.sentence-1)
|
||||
|
||||
[10](#text.encoding.members-10)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5245)
|
||||
|
||||
[*Note [1](#text.encoding.members-note-1)*:
|
||||
|
||||
The order of aliases in r is unspecified[.](#text.encoding.members-10.sentence-1)
|
||||
|
||||
â *end note*]
|
||||
|
||||
[ð](#lib:literal,text_encoding)
|
||||
|
||||
`static consteval text_encoding literal() noexcept;
|
||||
`
|
||||
|
||||
[11](#text.encoding.members-11)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5257)
|
||||
|
||||
*Mandates*: CHAR_BIT == 8 is true[.](#text.encoding.members-11.sentence-1)
|
||||
|
||||
[12](#text.encoding.members-12)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5261)
|
||||
|
||||
*Returns*: A text_encoding object representing
|
||||
the ordinary character literal encoding ([[lex.charset]](lex.charset "5.3.1 Character sets"))[.](#text.encoding.members-12.sentence-1)
|
||||
|
||||
[ð](#lib:environment,text_encoding)
|
||||
|
||||
`static text_encoding environment();
|
||||
`
|
||||
|
||||
[13](#text.encoding.members-13)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5273)
|
||||
|
||||
*Mandates*: CHAR_BIT == 8 is true[.](#text.encoding.members-13.sentence-1)
|
||||
|
||||
[14](#text.encoding.members-14)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5277)
|
||||
|
||||
*Returns*: A text_encoding object representing
|
||||
the implementation-defined
|
||||
character encoding scheme of the environment[.](#text.encoding.members-14.sentence-1)
|
||||
|
||||
On a POSIX implementation, this is the encoding scheme associated with
|
||||
the POSIX locale denoted by the empty string ""[.](#text.encoding.members-14.sentence-2)
|
||||
|
||||
[15](#text.encoding.members-15)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5285)
|
||||
|
||||
[*Note [2](#text.encoding.members-note-2)*:
|
||||
|
||||
This function is not affected by calls to setlocale[.](#text.encoding.members-15.sentence-1)
|
||||
|
||||
â *end note*]
|
||||
|
||||
[16](#text.encoding.members-16)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5290)
|
||||
|
||||
*Recommended practice*: Implementations should return a value that is not affected by calls to
|
||||
the POSIX function setenv and
|
||||
other functions which can modify the environment ([[support.runtime]](support.runtime "17.14 Other runtime support"))[.](#text.encoding.members-16.sentence-1)
|
||||
|
||||
[ð](#lib:environment_is,text_encoding)
|
||||
|
||||
`template<id i>
|
||||
static bool environment_is();
|
||||
`
|
||||
|
||||
[17](#text.encoding.members-17)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5304)
|
||||
|
||||
*Mandates*: CHAR_BIT == 8 is true[.](#text.encoding.members-17.sentence-1)
|
||||
|
||||
[18](#text.encoding.members-18)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5308)
|
||||
|
||||
*Returns*: environment() == i[.](#text.encoding.members-18.sentence-1)
|
||||
|
||||
[ð](#lib:comp-name,text_encoding)
|
||||
|
||||
`static constexpr bool comp-name(string_view a, string_view b);
|
||||
`
|
||||
|
||||
[19](#text.encoding.members-19)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5319)
|
||||
|
||||
*Returns*: true if the two strings a and b encoded in the ordinary literal encoding
|
||||
are equal, ignoring, from left-to-right,
|
||||
|
||||
- [(19.1)](#text.encoding.members-19.1)
|
||||
|
||||
all elements that are not digits or letters ([[character.seq.general]](character.seq.general "16.3.3.3.4.1 General")),
|
||||
|
||||
- [(19.2)](#text.encoding.members-19.2)
|
||||
|
||||
character case, and
|
||||
|
||||
- [(19.3)](#text.encoding.members-19.3)
|
||||
|
||||
any sequence of one or more 0 characters
|
||||
not immediately preceded by a numeric prefix, where
|
||||
a numeric prefix is a sequence consisting of
|
||||
a digit in the range [1, 9]
|
||||
optionally followed by one or more elements which are not digits or letters,
|
||||
|
||||
and false otherwise[.](#text.encoding.members-19.sentence-1)
|
||||
|
||||
[*Note [3](#text.encoding.members-note-3)*:
|
||||
|
||||
This comparison is identical to
|
||||
the âCharset Alias Matchingâ algorithm
|
||||
described in the Unicode Technical Standard 22[[bib]](bibliography#bib:unicode-charmap "Bibliography")[.](#text.encoding.members-19.sentence-2)
|
||||
|
||||
â *end note*]
|
||||
|
||||
[*Example [1](#text.encoding.members-example-1)*: static_assert(*comp-name*("UTF-8", "utf8") == true);static_assert(*comp-name*("u.t.f-008", "utf8") == true);static_assert(*comp-name*("ut8", "utf8") == false);static_assert(*comp-name*("utf-80", "utf8") == false); â *end example*]
|
||||
|
||||
#### [28.4.2.4](#text.encoding.cmp) Comparison functions [[text.encoding.cmp]](text.encoding.cmp)
|
||||
|
||||
[ð](#lib:operator==,text_encoding)
|
||||
|
||||
`friend constexpr bool operator==(const text_encoding& a, const text_encoding& b) noexcept;
|
||||
`
|
||||
|
||||
[1](#text.encoding.cmp-1)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5362)
|
||||
|
||||
*Returns*: If a.*mib_* == id::other && b.*mib_* == id::other is true,
|
||||
then *comp-name*(a.*name_*,
|
||||
b.*name_*)[.](#text.encoding.cmp-1.sentence-1)
|
||||
|
||||
Otherwise, a.*mib_* == b.*mib_*[.](#text.encoding.cmp-1.sentence-2)
|
||||
|
||||
[ð](#lib:operator==,text_encoding_)
|
||||
|
||||
`friend constexpr bool operator==(const text_encoding& encoding, id i) noexcept;
|
||||
`
|
||||
|
||||
[2](#text.encoding.cmp-2)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5376)
|
||||
|
||||
*Returns*: encoding.*mib_* == i[.](#text.encoding.cmp-2.sentence-1)
|
||||
|
||||
[3](#text.encoding.cmp-3)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5380)
|
||||
|
||||
*Remarks*: This operator induces an equivalence relation on its arguments
|
||||
if and only if i != id::other is true[.](#text.encoding.cmp-3.sentence-1)
|
||||
|
||||
#### [28.4.2.5](#text.encoding.aliases) Class text_encoding::aliases_view [[text.encoding.aliases]](text.encoding.aliases)
|
||||
|
||||
[ð](#lib:aliases_view,text_encoding)
|
||||
|
||||
`struct text_encoding::aliases_view : ranges::view_interface<text_encoding::aliases_view> {
|
||||
constexpr implementation-defined begin() const;
|
||||
constexpr implementation-defined end() const;
|
||||
};
|
||||
`
|
||||
|
||||
[1](#text.encoding.aliases-1)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5399)
|
||||
|
||||
text_encoding::aliases_view models[copyable](concepts.object#concept:copyable "18.6 Object concepts [concepts.object]"),ranges::[view](range.view#concept:view "25.4.5 Views [range.view]"),ranges::[random_access_range](range.refinements#concept:random_access_range "25.4.6 Other range refinements [range.refinements]"), andranges::[borrowed_range](range.range#concept:borrowed_range "25.4.2 Ranges [range.range]")[.](#text.encoding.aliases-1.sentence-1)
|
||||
|
||||
[*Note [1](#text.encoding.aliases-note-1)*:
|
||||
|
||||
text_encoding::aliases_view is not required to satisfyranges::[common_range](range.refinements#concept:common_range "25.4.6 Other range refinements [range.refinements]"),
|
||||
nor [default_initializable](concept.default.init#concept:default_initializable "18.4.12 Concept default_initializable [concept.default.init]")[.](#text.encoding.aliases-1.sentence-2)
|
||||
|
||||
â *end note*]
|
||||
|
||||
[2](#text.encoding.aliases-2)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5411)
|
||||
|
||||
Bothranges::range_value_t<text_encoding::aliases_view> andranges::range_reference_t<text_encoding::aliases_view> denote const char*[.](#text.encoding.aliases-2.sentence-1)
|
||||
|
||||
[3](#text.encoding.aliases-3)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5417)
|
||||
|
||||
ranges::iterator_t<text_encoding::aliases_view> is a constexpr iterator ([[iterator.requirements.general]](iterator.requirements.general "24.3.1 General"))[.](#text.encoding.aliases-3.sentence-1)
|
||||
|
||||
#### [28.4.2.6](#text.encoding.id) Enumeration text_encoding::id [[text.encoding.id]](text.encoding.id)
|
||||
|
||||
[ð](#lib:id,text_encoding)
|
||||
|
||||
namespace std {enum class text_encoding::id : int_least32_t { other = 1,
|
||||
unknown = 2,
|
||||
ASCII = 3,
|
||||
ISOLatin1 = 4,
|
||||
ISOLatin2 = 5,
|
||||
ISOLatin3 = 6,
|
||||
ISOLatin4 = 7,
|
||||
ISOLatinCyrillic = 8,
|
||||
ISOLatinArabic = 9,
|
||||
ISOLatinGreek = 10,
|
||||
ISOLatinHebrew = 11,
|
||||
ISOLatin5 = 12,
|
||||
ISOLatin6 = 13,
|
||||
ISOTextComm = 14,
|
||||
HalfWidthKatakana = 15,
|
||||
JISEncoding = 16,
|
||||
ShiftJIS = 17,
|
||||
EUCPkdFmtJapanese = 18,
|
||||
EUCFixWidJapanese = 19,
|
||||
ISO4UnitedKingdom = 20,
|
||||
ISO11SwedishForNames = 21,
|
||||
ISO15Italian = 22,
|
||||
ISO17Spanish = 23,
|
||||
ISO21German = 24,
|
||||
ISO60DanishNorwegian = 25,
|
||||
ISO69French = 26,
|
||||
ISO10646UTF1 = 27,
|
||||
ISO646basic1983 = 28,
|
||||
INVARIANT = 29,
|
||||
ISO2IntlRefVersion = 30,
|
||||
NATSSEFI = 31,
|
||||
NATSSEFIADD = 32,
|
||||
ISO10Swedish = 35,
|
||||
KSC56011987 = 36,
|
||||
ISO2022KR = 37,
|
||||
EUCKR = 38,
|
||||
ISO2022JP = 39,
|
||||
ISO2022JP2 = 40,
|
||||
ISO13JISC6220jp = 41,
|
||||
ISO14JISC6220ro = 42,
|
||||
ISO16Portuguese = 43,
|
||||
ISO18Greek7Old = 44,
|
||||
ISO19LatinGreek = 45,
|
||||
ISO25French = 46,
|
||||
ISO27LatinGreek1 = 47,
|
||||
ISO5427Cyrillic = 48,
|
||||
ISO42JISC62261978 = 49,
|
||||
ISO47BSViewdata = 50,
|
||||
ISO49INIS = 51,
|
||||
ISO50INIS8 = 52,
|
||||
ISO51INISCyrillic = 53,
|
||||
ISO54271981 = 54,
|
||||
ISO5428Greek = 55,
|
||||
ISO57GB1988 = 56,
|
||||
ISO58GB231280 = 57,
|
||||
ISO61Norwegian2 = 58,
|
||||
ISO70VideotexSupp1 = 59,
|
||||
ISO84Portuguese2 = 60,
|
||||
ISO85Spanish2 = 61,
|
||||
ISO86Hungarian = 62,
|
||||
ISO87JISX0208 = 63,
|
||||
ISO88Greek7 = 64,
|
||||
ISO89ASMO449 = 65,
|
||||
ISO90 = 66,
|
||||
ISO91JISC62291984a = 67,
|
||||
ISO92JISC62991984b = 68,
|
||||
ISO93JIS62291984badd = 69,
|
||||
ISO94JIS62291984hand = 70,
|
||||
ISO95JIS62291984handadd = 71,
|
||||
ISO96JISC62291984kana = 72,
|
||||
ISO2033 = 73,
|
||||
ISO99NAPLPS = 74,
|
||||
ISO102T617bit = 75,
|
||||
ISO103T618bit = 76,
|
||||
ISO111ECMACyrillic = 77,
|
||||
ISO121Canadian1 = 78,
|
||||
ISO122Canadian2 = 79,
|
||||
ISO123CSAZ24341985gr = 80,
|
||||
ISO88596E = 81,
|
||||
ISO88596I = 82,
|
||||
ISO128T101G2 = 83,
|
||||
ISO88598E = 84,
|
||||
ISO88598I = 85,
|
||||
ISO139CSN369103 = 86,
|
||||
ISO141JUSIB1002 = 87,
|
||||
ISO143IECP271 = 88,
|
||||
ISO146Serbian = 89,
|
||||
ISO147Macedonian = 90,
|
||||
ISO150 = 91,
|
||||
ISO151Cuba = 92,
|
||||
ISO6937Add = 93,
|
||||
ISO153GOST1976874 = 94,
|
||||
ISO8859Supp = 95,
|
||||
ISO10367Box = 96,
|
||||
ISO158Lap = 97,
|
||||
ISO159JISX02121990 = 98,
|
||||
ISO646Danish = 99,
|
||||
USDK = 100,
|
||||
DKUS = 101,
|
||||
KSC5636 = 102,
|
||||
Unicode11UTF7 = 103,
|
||||
ISO2022CN = 104,
|
||||
ISO2022CNEXT = 105,
|
||||
UTF8 = 106,
|
||||
ISO885913 = 109,
|
||||
ISO885914 = 110,
|
||||
ISO885915 = 111,
|
||||
ISO885916 = 112,
|
||||
GBK = 113,
|
||||
GB18030 = 114,
|
||||
OSDEBCDICDF0415 = 115,
|
||||
OSDEBCDICDF03IRV = 116,
|
||||
OSDEBCDICDF041 = 117,
|
||||
ISO115481 = 118,
|
||||
KZ1048 = 119,
|
||||
UCS2 = 1000,
|
||||
UCS4 = 1001,
|
||||
UnicodeASCII = 1002,
|
||||
UnicodeLatin1 = 1003,
|
||||
UnicodeJapanese = 1004,
|
||||
UnicodeIBM1261 = 1005,
|
||||
UnicodeIBM1268 = 1006,
|
||||
UnicodeIBM1276 = 1007,
|
||||
UnicodeIBM1264 = 1008,
|
||||
UnicodeIBM1265 = 1009,
|
||||
Unicode11 = 1010,
|
||||
SCSU = 1011,
|
||||
UTF7 = 1012,
|
||||
UTF16BE = 1013,
|
||||
UTF16LE = 1014,
|
||||
UTF16 = 1015,
|
||||
CESU8 = 1016,
|
||||
UTF32 = 1017,
|
||||
UTF32BE = 1018,
|
||||
UTF32LE = 1019,
|
||||
BOCU1 = 1020,
|
||||
UTF7IMAP = 1021,
|
||||
Windows30Latin1 = 2000,
|
||||
Windows31Latin1 = 2001,
|
||||
Windows31Latin2 = 2002,
|
||||
Windows31Latin5 = 2003,
|
||||
HPRoman8 = 2004,
|
||||
AdobeStandardEncoding = 2005,
|
||||
VenturaUS = 2006,
|
||||
VenturaInternational = 2007,
|
||||
DECMCS = 2008,
|
||||
PC850Multilingual = 2009,
|
||||
PCp852 = 2010,
|
||||
PC8CodePage437 = 2011,
|
||||
PC8DanishNorwegian = 2012,
|
||||
PC862LatinHebrew = 2013,
|
||||
PC8Turkish = 2014,
|
||||
IBMSymbols = 2015,
|
||||
IBMThai = 2016,
|
||||
HPLegal = 2017,
|
||||
HPPiFont = 2018,
|
||||
HPMath8 = 2019,
|
||||
HPPSMath = 2020,
|
||||
HPDesktop = 2021,
|
||||
VenturaMath = 2022,
|
||||
MicrosoftPublishing = 2023,
|
||||
Windows31J = 2024,
|
||||
GB2312 = 2025,
|
||||
Big5 = 2026,
|
||||
Macintosh = 2027,
|
||||
IBM037 = 2028,
|
||||
IBM038 = 2029,
|
||||
IBM273 = 2030,
|
||||
IBM274 = 2031,
|
||||
IBM275 = 2032,
|
||||
IBM277 = 2033,
|
||||
IBM278 = 2034,
|
||||
IBM280 = 2035,
|
||||
IBM281 = 2036,
|
||||
IBM284 = 2037,
|
||||
IBM285 = 2038,
|
||||
IBM290 = 2039,
|
||||
IBM297 = 2040,
|
||||
IBM420 = 2041,
|
||||
IBM423 = 2042,
|
||||
IBM424 = 2043,
|
||||
IBM500 = 2044,
|
||||
IBM851 = 2045,
|
||||
IBM855 = 2046,
|
||||
IBM857 = 2047,
|
||||
IBM860 = 2048,
|
||||
IBM861 = 2049,
|
||||
IBM863 = 2050,
|
||||
IBM864 = 2051,
|
||||
IBM865 = 2052,
|
||||
IBM868 = 2053,
|
||||
IBM869 = 2054,
|
||||
IBM870 = 2055,
|
||||
IBM871 = 2056,
|
||||
IBM880 = 2057,
|
||||
IBM891 = 2058,
|
||||
IBM903 = 2059,
|
||||
IBM904 = 2060,
|
||||
IBM905 = 2061,
|
||||
IBM918 = 2062,
|
||||
IBM1026 = 2063,
|
||||
IBMEBCDICATDE = 2064,
|
||||
EBCDICATDEA = 2065,
|
||||
EBCDICCAFR = 2066,
|
||||
EBCDICDKNO = 2067,
|
||||
EBCDICDKNOA = 2068,
|
||||
EBCDICFISE = 2069,
|
||||
EBCDICFISEA = 2070,
|
||||
EBCDICFR = 2071,
|
||||
EBCDICIT = 2072,
|
||||
EBCDICPT = 2073,
|
||||
EBCDICES = 2074,
|
||||
EBCDICESA = 2075,
|
||||
EBCDICESS = 2076,
|
||||
EBCDICUK = 2077,
|
||||
EBCDICUS = 2078,
|
||||
Unknown8BiT = 2079,
|
||||
Mnemonic = 2080,
|
||||
Mnem = 2081,
|
||||
VISCII = 2082,
|
||||
VIQR = 2083,
|
||||
KOI8R = 2084,
|
||||
HZGB2312 = 2085,
|
||||
IBM866 = 2086,
|
||||
PC775Baltic = 2087,
|
||||
KOI8U = 2088,
|
||||
IBM00858 = 2089,
|
||||
IBM00924 = 2090,
|
||||
IBM01140 = 2091,
|
||||
IBM01141 = 2092,
|
||||
IBM01142 = 2093,
|
||||
IBM01143 = 2094,
|
||||
IBM01144 = 2095,
|
||||
IBM01145 = 2096,
|
||||
IBM01146 = 2097,
|
||||
IBM01147 = 2098,
|
||||
IBM01148 = 2099,
|
||||
IBM01149 = 2100,
|
||||
Big5HKSCS = 2101,
|
||||
IBM1047 = 2102,
|
||||
PTCP154 = 2103,
|
||||
Amiga1251 = 2104,
|
||||
KOI7switched = 2105,
|
||||
BRF = 2106,
|
||||
TSCII = 2107,
|
||||
CP51932 = 2108,
|
||||
windows874 = 2109,
|
||||
windows1250 = 2250,
|
||||
windows1251 = 2251,
|
||||
windows1252 = 2252,
|
||||
windows1253 = 2253,
|
||||
windows1254 = 2254,
|
||||
windows1255 = 2255,
|
||||
windows1256 = 2256,
|
||||
windows1257 = 2257,
|
||||
windows1258 = 2258,
|
||||
TIS620 = 2259,
|
||||
CP50220 = 2260};}
|
||||
|
||||
[*Note [1](#text.encoding.id-note-1)*:
|
||||
|
||||
The text_encoding::id enumeration
|
||||
contains an enumerator for each known registered character encoding[.](#text.encoding.id-sentence-1)
|
||||
|
||||
For each encoding, the corresponding enumerator is derived from
|
||||
the alias beginning with âcsâ, as follows
|
||||
|
||||
- csUnicode is mapped to text_encoding::id::UCS2,
|
||||
- csIBBM904 is mapped to text_encoding::id::IBM904, and
|
||||
- the âcsâ prefix is removed from other names[.](#text.encoding.id-sentence-2)
|
||||
|
||||
â *end note*]
|
||||
|
||||
#### [28.4.2.7](#text.encoding.hash) Hash support [[text.encoding.hash]](text.encoding.hash)
|
||||
|
||||
[ð](#lib:hash,text_encoding)
|
||||
|
||||
`template<> struct hash<text_encoding>;
|
||||
`
|
||||
|
||||
[1](#text.encoding.hash-1)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5713)
|
||||
|
||||
The specialization is enabled ([[unord.hash]](unord.hash "22.10.19 Class template hash"))[.](#text.encoding.hash-1.sentence-1)
|
||||
42
cppdraft/text/encoding/cmp.md
Normal file
42
cppdraft/text/encoding/cmp.md
Normal file
@@ -0,0 +1,42 @@
|
||||
[text.encoding.cmp]
|
||||
|
||||
# 28 Text processing library [[text]](./#text)
|
||||
|
||||
## 28.4 Text encodings identification [[text.encoding]](text.encoding#cmp)
|
||||
|
||||
### 28.4.2 Class text_encoding [[text.encoding.class]](text.encoding.class#text.encoding.cmp)
|
||||
|
||||
#### 28.4.2.4 Comparison functions [text.encoding.cmp]
|
||||
|
||||
[ð](#lib:operator==,text_encoding)
|
||||
|
||||
`friend constexpr bool operator==(const text_encoding& a, const text_encoding& b) noexcept;
|
||||
`
|
||||
|
||||
[1](#1)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5362)
|
||||
|
||||
*Returns*: If a.*mib_* == id::other && b.*mib_* == id::other is true,
|
||||
then *comp-name*(a.*name_*,
|
||||
b.*name_*)[.](#1.sentence-1)
|
||||
|
||||
Otherwise, a.*mib_* == b.*mib_*[.](#1.sentence-2)
|
||||
|
||||
[ð](#lib:operator==,text_encoding_)
|
||||
|
||||
`friend constexpr bool operator==(const text_encoding& encoding, id i) noexcept;
|
||||
`
|
||||
|
||||
[2](#2)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5376)
|
||||
|
||||
*Returns*: encoding.*mib_* == i[.](#2.sentence-1)
|
||||
|
||||
[3](#3)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5380)
|
||||
|
||||
*Remarks*: This operator induces an equivalence relation on its arguments
|
||||
if and only if i != id::other is true[.](#3.sentence-1)
|
||||
106
cppdraft/text/encoding/general.md
Normal file
106
cppdraft/text/encoding/general.md
Normal file
@@ -0,0 +1,106 @@
|
||||
[text.encoding.general]
|
||||
|
||||
# 28 Text processing library [[text]](./#text)
|
||||
|
||||
## 28.4 Text encodings identification [[text.encoding]](text.encoding#general)
|
||||
|
||||
### 28.4.2 Class text_encoding [[text.encoding.class]](text.encoding.class#text.encoding.general)
|
||||
|
||||
#### 28.4.2.2 General [text.encoding.general]
|
||||
|
||||
[1](#1)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5063)
|
||||
|
||||
A [*registered character encoding*](#def:encoding,registered_character "28.4.2.2 General [text.encoding.general]") is
|
||||
a character encoding scheme in the IANA Character Sets registry[.](#1.sentence-1)
|
||||
|
||||
[*Note [1](#note-1)*:
|
||||
|
||||
The IANA Character Sets registry uses the term âcharacter setsâ
|
||||
to refer to character encodings[.](#1.sentence-2)
|
||||
|
||||
â *end note*]
|
||||
|
||||
The primary name of a registered character encoding is
|
||||
the name of that encoding specified in the IANA Character Sets registry[.](#1.sentence-3)
|
||||
|
||||
[2](#2)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5073)
|
||||
|
||||
The set of known registered character encodings contains
|
||||
every registered character encoding
|
||||
specified in the IANA Character Sets registry except for the following:
|
||||
|
||||
- [(2.1)](#2.1)
|
||||
|
||||
NATS-DANO (33)
|
||||
|
||||
- [(2.2)](#2.2)
|
||||
|
||||
NATS-DANO-ADD (34)
|
||||
|
||||
[3](#3)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5082)
|
||||
|
||||
Each known registered character encoding
|
||||
is identified by an enumerator in text_encoding::id, and
|
||||
has a set of zero or more [*aliases*](#def:encoding,registered_character,alias "28.4.2.2 General [text.encoding.general]")[.](#3.sentence-1)
|
||||
|
||||
[4](#4)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5087)
|
||||
|
||||
The set of aliases of a known registered character encoding is animplementation-defined
|
||||
superset of the aliases specified in the IANA Character Sets registry[.](#4.sentence-1)
|
||||
|
||||
The set of aliases for US-ASCII includes âASCIIâ[.](#4.sentence-2)
|
||||
|
||||
No two aliases or primary names of distinct registered character encodings
|
||||
are equivalent when compared by text_encoding::*comp-name*[.](#4.sentence-3)
|
||||
|
||||
[5](#5)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5095)
|
||||
|
||||
How a text_encoding object
|
||||
is determined to be representative of a character encoding scheme
|
||||
implemented in the translation or execution environment isimplementation-defined[.](#5.sentence-1)
|
||||
|
||||
[6](#6)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5102)
|
||||
|
||||
An object e of type text_encoding such thate.mib() == text_encoding::id::unknown is false ande.mib() == text_encoding::id::other is false maintains the following invariants:
|
||||
|
||||
- [(6.1)](#6.1)
|
||||
|
||||
*e.name() == '\0' is false, and
|
||||
|
||||
- [(6.2)](#6.2)
|
||||
|
||||
e.mib() == text_encoding(e.name()).mib() is true[.](#6.sentence-1)
|
||||
|
||||
[7](#7)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5112)
|
||||
|
||||
*Recommended practice*:
|
||||
|
||||
- [(7.1)](#7.1)
|
||||
|
||||
Implementations should not consider registered encodings to be interchangeable[.](#7.1.sentence-1)
|
||||
[*Example [1](#example-1)*:
|
||||
Shift_JIS and Windows-31J denote different encodings[.](#7.1.sentence-2)
|
||||
â *end example*]
|
||||
|
||||
- [(7.2)](#7.2)
|
||||
|
||||
Implementations should not use the name of a registered encoding
|
||||
to describe another similar yet different non-registered encoding
|
||||
unless there is a precedent on that implementation[.](#7.2.sentence-1)
|
||||
[*Example [2](#example-2)*:
|
||||
Big5
|
||||
â *end example*]
|
||||
20
cppdraft/text/encoding/hash.md
Normal file
20
cppdraft/text/encoding/hash.md
Normal file
@@ -0,0 +1,20 @@
|
||||
[text.encoding.hash]
|
||||
|
||||
# 28 Text processing library [[text]](./#text)
|
||||
|
||||
## 28.4 Text encodings identification [[text.encoding]](text.encoding#hash)
|
||||
|
||||
### 28.4.2 Class text_encoding [[text.encoding.class]](text.encoding.class#text.encoding.hash)
|
||||
|
||||
#### 28.4.2.7 Hash support [text.encoding.hash]
|
||||
|
||||
[ð](#lib:hash,text_encoding)
|
||||
|
||||
`template<> struct hash<text_encoding>;
|
||||
`
|
||||
|
||||
[1](#1)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5713)
|
||||
|
||||
The specialization is enabled ([[unord.hash]](unord.hash "22.10.19 Class template hash"))[.](#1.sentence-1)
|
||||
284
cppdraft/text/encoding/id.md
Normal file
284
cppdraft/text/encoding/id.md
Normal file
@@ -0,0 +1,284 @@
|
||||
[text.encoding.id]
|
||||
|
||||
# 28 Text processing library [[text]](./#text)
|
||||
|
||||
## 28.4 Text encodings identification [[text.encoding]](text.encoding#id)
|
||||
|
||||
### 28.4.2 Class text_encoding [[text.encoding.class]](text.encoding.class#text.encoding.id)
|
||||
|
||||
#### 28.4.2.6 Enumeration text_encoding::id [text.encoding.id]
|
||||
|
||||
[ð](#lib:id,text_encoding)
|
||||
|
||||
namespace std {enum class text_encoding::id : int_least32_t { other = 1,
|
||||
unknown = 2,
|
||||
ASCII = 3,
|
||||
ISOLatin1 = 4,
|
||||
ISOLatin2 = 5,
|
||||
ISOLatin3 = 6,
|
||||
ISOLatin4 = 7,
|
||||
ISOLatinCyrillic = 8,
|
||||
ISOLatinArabic = 9,
|
||||
ISOLatinGreek = 10,
|
||||
ISOLatinHebrew = 11,
|
||||
ISOLatin5 = 12,
|
||||
ISOLatin6 = 13,
|
||||
ISOTextComm = 14,
|
||||
HalfWidthKatakana = 15,
|
||||
JISEncoding = 16,
|
||||
ShiftJIS = 17,
|
||||
EUCPkdFmtJapanese = 18,
|
||||
EUCFixWidJapanese = 19,
|
||||
ISO4UnitedKingdom = 20,
|
||||
ISO11SwedishForNames = 21,
|
||||
ISO15Italian = 22,
|
||||
ISO17Spanish = 23,
|
||||
ISO21German = 24,
|
||||
ISO60DanishNorwegian = 25,
|
||||
ISO69French = 26,
|
||||
ISO10646UTF1 = 27,
|
||||
ISO646basic1983 = 28,
|
||||
INVARIANT = 29,
|
||||
ISO2IntlRefVersion = 30,
|
||||
NATSSEFI = 31,
|
||||
NATSSEFIADD = 32,
|
||||
ISO10Swedish = 35,
|
||||
KSC56011987 = 36,
|
||||
ISO2022KR = 37,
|
||||
EUCKR = 38,
|
||||
ISO2022JP = 39,
|
||||
ISO2022JP2 = 40,
|
||||
ISO13JISC6220jp = 41,
|
||||
ISO14JISC6220ro = 42,
|
||||
ISO16Portuguese = 43,
|
||||
ISO18Greek7Old = 44,
|
||||
ISO19LatinGreek = 45,
|
||||
ISO25French = 46,
|
||||
ISO27LatinGreek1 = 47,
|
||||
ISO5427Cyrillic = 48,
|
||||
ISO42JISC62261978 = 49,
|
||||
ISO47BSViewdata = 50,
|
||||
ISO49INIS = 51,
|
||||
ISO50INIS8 = 52,
|
||||
ISO51INISCyrillic = 53,
|
||||
ISO54271981 = 54,
|
||||
ISO5428Greek = 55,
|
||||
ISO57GB1988 = 56,
|
||||
ISO58GB231280 = 57,
|
||||
ISO61Norwegian2 = 58,
|
||||
ISO70VideotexSupp1 = 59,
|
||||
ISO84Portuguese2 = 60,
|
||||
ISO85Spanish2 = 61,
|
||||
ISO86Hungarian = 62,
|
||||
ISO87JISX0208 = 63,
|
||||
ISO88Greek7 = 64,
|
||||
ISO89ASMO449 = 65,
|
||||
ISO90 = 66,
|
||||
ISO91JISC62291984a = 67,
|
||||
ISO92JISC62991984b = 68,
|
||||
ISO93JIS62291984badd = 69,
|
||||
ISO94JIS62291984hand = 70,
|
||||
ISO95JIS62291984handadd = 71,
|
||||
ISO96JISC62291984kana = 72,
|
||||
ISO2033 = 73,
|
||||
ISO99NAPLPS = 74,
|
||||
ISO102T617bit = 75,
|
||||
ISO103T618bit = 76,
|
||||
ISO111ECMACyrillic = 77,
|
||||
ISO121Canadian1 = 78,
|
||||
ISO122Canadian2 = 79,
|
||||
ISO123CSAZ24341985gr = 80,
|
||||
ISO88596E = 81,
|
||||
ISO88596I = 82,
|
||||
ISO128T101G2 = 83,
|
||||
ISO88598E = 84,
|
||||
ISO88598I = 85,
|
||||
ISO139CSN369103 = 86,
|
||||
ISO141JUSIB1002 = 87,
|
||||
ISO143IECP271 = 88,
|
||||
ISO146Serbian = 89,
|
||||
ISO147Macedonian = 90,
|
||||
ISO150 = 91,
|
||||
ISO151Cuba = 92,
|
||||
ISO6937Add = 93,
|
||||
ISO153GOST1976874 = 94,
|
||||
ISO8859Supp = 95,
|
||||
ISO10367Box = 96,
|
||||
ISO158Lap = 97,
|
||||
ISO159JISX02121990 = 98,
|
||||
ISO646Danish = 99,
|
||||
USDK = 100,
|
||||
DKUS = 101,
|
||||
KSC5636 = 102,
|
||||
Unicode11UTF7 = 103,
|
||||
ISO2022CN = 104,
|
||||
ISO2022CNEXT = 105,
|
||||
UTF8 = 106,
|
||||
ISO885913 = 109,
|
||||
ISO885914 = 110,
|
||||
ISO885915 = 111,
|
||||
ISO885916 = 112,
|
||||
GBK = 113,
|
||||
GB18030 = 114,
|
||||
OSDEBCDICDF0415 = 115,
|
||||
OSDEBCDICDF03IRV = 116,
|
||||
OSDEBCDICDF041 = 117,
|
||||
ISO115481 = 118,
|
||||
KZ1048 = 119,
|
||||
UCS2 = 1000,
|
||||
UCS4 = 1001,
|
||||
UnicodeASCII = 1002,
|
||||
UnicodeLatin1 = 1003,
|
||||
UnicodeJapanese = 1004,
|
||||
UnicodeIBM1261 = 1005,
|
||||
UnicodeIBM1268 = 1006,
|
||||
UnicodeIBM1276 = 1007,
|
||||
UnicodeIBM1264 = 1008,
|
||||
UnicodeIBM1265 = 1009,
|
||||
Unicode11 = 1010,
|
||||
SCSU = 1011,
|
||||
UTF7 = 1012,
|
||||
UTF16BE = 1013,
|
||||
UTF16LE = 1014,
|
||||
UTF16 = 1015,
|
||||
CESU8 = 1016,
|
||||
UTF32 = 1017,
|
||||
UTF32BE = 1018,
|
||||
UTF32LE = 1019,
|
||||
BOCU1 = 1020,
|
||||
UTF7IMAP = 1021,
|
||||
Windows30Latin1 = 2000,
|
||||
Windows31Latin1 = 2001,
|
||||
Windows31Latin2 = 2002,
|
||||
Windows31Latin5 = 2003,
|
||||
HPRoman8 = 2004,
|
||||
AdobeStandardEncoding = 2005,
|
||||
VenturaUS = 2006,
|
||||
VenturaInternational = 2007,
|
||||
DECMCS = 2008,
|
||||
PC850Multilingual = 2009,
|
||||
PCp852 = 2010,
|
||||
PC8CodePage437 = 2011,
|
||||
PC8DanishNorwegian = 2012,
|
||||
PC862LatinHebrew = 2013,
|
||||
PC8Turkish = 2014,
|
||||
IBMSymbols = 2015,
|
||||
IBMThai = 2016,
|
||||
HPLegal = 2017,
|
||||
HPPiFont = 2018,
|
||||
HPMath8 = 2019,
|
||||
HPPSMath = 2020,
|
||||
HPDesktop = 2021,
|
||||
VenturaMath = 2022,
|
||||
MicrosoftPublishing = 2023,
|
||||
Windows31J = 2024,
|
||||
GB2312 = 2025,
|
||||
Big5 = 2026,
|
||||
Macintosh = 2027,
|
||||
IBM037 = 2028,
|
||||
IBM038 = 2029,
|
||||
IBM273 = 2030,
|
||||
IBM274 = 2031,
|
||||
IBM275 = 2032,
|
||||
IBM277 = 2033,
|
||||
IBM278 = 2034,
|
||||
IBM280 = 2035,
|
||||
IBM281 = 2036,
|
||||
IBM284 = 2037,
|
||||
IBM285 = 2038,
|
||||
IBM290 = 2039,
|
||||
IBM297 = 2040,
|
||||
IBM420 = 2041,
|
||||
IBM423 = 2042,
|
||||
IBM424 = 2043,
|
||||
IBM500 = 2044,
|
||||
IBM851 = 2045,
|
||||
IBM855 = 2046,
|
||||
IBM857 = 2047,
|
||||
IBM860 = 2048,
|
||||
IBM861 = 2049,
|
||||
IBM863 = 2050,
|
||||
IBM864 = 2051,
|
||||
IBM865 = 2052,
|
||||
IBM868 = 2053,
|
||||
IBM869 = 2054,
|
||||
IBM870 = 2055,
|
||||
IBM871 = 2056,
|
||||
IBM880 = 2057,
|
||||
IBM891 = 2058,
|
||||
IBM903 = 2059,
|
||||
IBM904 = 2060,
|
||||
IBM905 = 2061,
|
||||
IBM918 = 2062,
|
||||
IBM1026 = 2063,
|
||||
IBMEBCDICATDE = 2064,
|
||||
EBCDICATDEA = 2065,
|
||||
EBCDICCAFR = 2066,
|
||||
EBCDICDKNO = 2067,
|
||||
EBCDICDKNOA = 2068,
|
||||
EBCDICFISE = 2069,
|
||||
EBCDICFISEA = 2070,
|
||||
EBCDICFR = 2071,
|
||||
EBCDICIT = 2072,
|
||||
EBCDICPT = 2073,
|
||||
EBCDICES = 2074,
|
||||
EBCDICESA = 2075,
|
||||
EBCDICESS = 2076,
|
||||
EBCDICUK = 2077,
|
||||
EBCDICUS = 2078,
|
||||
Unknown8BiT = 2079,
|
||||
Mnemonic = 2080,
|
||||
Mnem = 2081,
|
||||
VISCII = 2082,
|
||||
VIQR = 2083,
|
||||
KOI8R = 2084,
|
||||
HZGB2312 = 2085,
|
||||
IBM866 = 2086,
|
||||
PC775Baltic = 2087,
|
||||
KOI8U = 2088,
|
||||
IBM00858 = 2089,
|
||||
IBM00924 = 2090,
|
||||
IBM01140 = 2091,
|
||||
IBM01141 = 2092,
|
||||
IBM01142 = 2093,
|
||||
IBM01143 = 2094,
|
||||
IBM01144 = 2095,
|
||||
IBM01145 = 2096,
|
||||
IBM01146 = 2097,
|
||||
IBM01147 = 2098,
|
||||
IBM01148 = 2099,
|
||||
IBM01149 = 2100,
|
||||
Big5HKSCS = 2101,
|
||||
IBM1047 = 2102,
|
||||
PTCP154 = 2103,
|
||||
Amiga1251 = 2104,
|
||||
KOI7switched = 2105,
|
||||
BRF = 2106,
|
||||
TSCII = 2107,
|
||||
CP51932 = 2108,
|
||||
windows874 = 2109,
|
||||
windows1250 = 2250,
|
||||
windows1251 = 2251,
|
||||
windows1252 = 2252,
|
||||
windows1253 = 2253,
|
||||
windows1254 = 2254,
|
||||
windows1255 = 2255,
|
||||
windows1256 = 2256,
|
||||
windows1257 = 2257,
|
||||
windows1258 = 2258,
|
||||
TIS620 = 2259,
|
||||
CP50220 = 2260};}
|
||||
|
||||
[*Note [1](#note-1)*:
|
||||
|
||||
The text_encoding::id enumeration
|
||||
contains an enumerator for each known registered character encoding[.](#sentence-1)
|
||||
|
||||
For each encoding, the corresponding enumerator is derived from
|
||||
the alias beginning with âcsâ, as follows
|
||||
|
||||
- csUnicode is mapped to text_encoding::id::UCS2,
|
||||
- csIBBM904 is mapped to text_encoding::id::IBM904, and
|
||||
- the âcsâ prefix is removed from other names[.](#sentence-2)
|
||||
|
||||
â *end note*]
|
||||
257
cppdraft/text/encoding/members.md
Normal file
257
cppdraft/text/encoding/members.md
Normal file
@@ -0,0 +1,257 @@
|
||||
[text.encoding.members]
|
||||
|
||||
# 28 Text processing library [[text]](./#text)
|
||||
|
||||
## 28.4 Text encodings identification [[text.encoding]](text.encoding#members)
|
||||
|
||||
### 28.4.2 Class text_encoding [[text.encoding.class]](text.encoding.class#text.encoding.members)
|
||||
|
||||
#### 28.4.2.3 Members [text.encoding.members]
|
||||
|
||||
[ð](#lib:text_encoding,constructor)
|
||||
|
||||
`constexpr explicit text_encoding(string_view enc) noexcept;
|
||||
`
|
||||
|
||||
[1](#1)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5137)
|
||||
|
||||
*Preconditions*:
|
||||
|
||||
- [(1.1)](#1.1)
|
||||
|
||||
enc represents a string in the ordinary literal encoding
|
||||
consisting only of elements of the basic character set ([[lex.charset]](lex.charset "5.3.1 Character sets"))[.](#1.1.sentence-1)
|
||||
|
||||
- [(1.2)](#1.2)
|
||||
|
||||
enc.size() <= max_name_length is true[.](#1.2.sentence-1)
|
||||
|
||||
- [(1.3)](#1.3)
|
||||
|
||||
enc.contains('\0') is false[.](#1.3.sentence-1)
|
||||
|
||||
[2](#2)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5149)
|
||||
|
||||
*Postconditions*:
|
||||
|
||||
- [(2.1)](#2.1)
|
||||
|
||||
If there exists a primary name or alias a of a known registered character encoding such that*comp-name*(a, enc) is true,*mib_* has the value of the enumerator of id associated with that registered character encoding[.](#2.1.sentence-1)
|
||||
Otherwise, *mib_* == id::other is true[.](#2.1.sentence-2)
|
||||
|
||||
- [(2.2)](#2.2)
|
||||
|
||||
enc.compare(*name_*) == 0 is true[.](#2.2.sentence-1)
|
||||
|
||||
[ð](#lib:text_encoding,constructor_)
|
||||
|
||||
`constexpr text_encoding(id i) noexcept;
|
||||
`
|
||||
|
||||
[3](#3)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5170)
|
||||
|
||||
*Preconditions*: i has the value of one of the enumerators of id[.](#3.sentence-1)
|
||||
|
||||
[4](#4)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5174)
|
||||
|
||||
*Postconditions*:
|
||||
|
||||
- [(4.1)](#4.1)
|
||||
|
||||
*mib_* == i is true[.](#4.1.sentence-1)
|
||||
|
||||
- [(4.2)](#4.2)
|
||||
|
||||
If (*mib_* == id::unknown || *mib_* == id::other) is true,strlen(*name_*) == 0 is true[.](#4.2.sentence-1)
|
||||
Otherwise,ranges::contains(aliases(), string_view(*name_*)) is true[.](#4.2.sentence-2)
|
||||
|
||||
[ð](#lib:mib,text_encoding)
|
||||
|
||||
`constexpr id mib() const noexcept;
|
||||
`
|
||||
|
||||
[5](#5)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5195)
|
||||
|
||||
*Returns*: *mib_*[.](#5.sentence-1)
|
||||
|
||||
[ð](#lib:name,text_encoding)
|
||||
|
||||
`constexpr const char* name() const noexcept;
|
||||
`
|
||||
|
||||
[6](#6)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5206)
|
||||
|
||||
*Returns*: *name_*[.](#6.sentence-1)
|
||||
|
||||
[7](#7)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5210)
|
||||
|
||||
*Remarks*: name() is an ntbs and
|
||||
accessing elements of *name_* outside of the range name()+[0, strlen(name()) + 1) is undefined behavior[.](#7.sentence-1)
|
||||
|
||||
[ð](#lib:aliases,text_encoding)
|
||||
|
||||
`constexpr aliases_view aliases() const noexcept;
|
||||
`
|
||||
|
||||
Let r denote an instance of aliases_view[.](#sentence-1)
|
||||
|
||||
If *this represents a known registered character encoding, then:
|
||||
|
||||
- r.front() is the primary name of the registered character encoding,
|
||||
- r contains the aliases of the registered character encoding, and
|
||||
- r does not contain duplicate values when compared with strcmp[.](#sentence-2)
|
||||
|
||||
Otherwise, r is an empty range[.](#sentence-3)
|
||||
|
||||
[8](#8)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5236)
|
||||
|
||||
Each element in r is a non-null, non-empty ntbs encoded in the literal character encoding and
|
||||
comprising only characters from the basic character set[.](#8.sentence-1)
|
||||
|
||||
[9](#9)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5241)
|
||||
|
||||
*Returns*: r[.](#9.sentence-1)
|
||||
|
||||
[10](#10)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5245)
|
||||
|
||||
[*Note [1](#note-1)*:
|
||||
|
||||
The order of aliases in r is unspecified[.](#10.sentence-1)
|
||||
|
||||
â *end note*]
|
||||
|
||||
[ð](#lib:literal,text_encoding)
|
||||
|
||||
`static consteval text_encoding literal() noexcept;
|
||||
`
|
||||
|
||||
[11](#11)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5257)
|
||||
|
||||
*Mandates*: CHAR_BIT == 8 is true[.](#11.sentence-1)
|
||||
|
||||
[12](#12)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5261)
|
||||
|
||||
*Returns*: A text_encoding object representing
|
||||
the ordinary character literal encoding ([[lex.charset]](lex.charset "5.3.1 Character sets"))[.](#12.sentence-1)
|
||||
|
||||
[ð](#lib:environment,text_encoding)
|
||||
|
||||
`static text_encoding environment();
|
||||
`
|
||||
|
||||
[13](#13)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5273)
|
||||
|
||||
*Mandates*: CHAR_BIT == 8 is true[.](#13.sentence-1)
|
||||
|
||||
[14](#14)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5277)
|
||||
|
||||
*Returns*: A text_encoding object representing
|
||||
the implementation-defined
|
||||
character encoding scheme of the environment[.](#14.sentence-1)
|
||||
|
||||
On a POSIX implementation, this is the encoding scheme associated with
|
||||
the POSIX locale denoted by the empty string ""[.](#14.sentence-2)
|
||||
|
||||
[15](#15)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5285)
|
||||
|
||||
[*Note [2](#note-2)*:
|
||||
|
||||
This function is not affected by calls to setlocale[.](#15.sentence-1)
|
||||
|
||||
â *end note*]
|
||||
|
||||
[16](#16)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5290)
|
||||
|
||||
*Recommended practice*: Implementations should return a value that is not affected by calls to
|
||||
the POSIX function setenv and
|
||||
other functions which can modify the environment ([[support.runtime]](support.runtime "17.14 Other runtime support"))[.](#16.sentence-1)
|
||||
|
||||
[ð](#lib:environment_is,text_encoding)
|
||||
|
||||
`template<id i>
|
||||
static bool environment_is();
|
||||
`
|
||||
|
||||
[17](#17)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5304)
|
||||
|
||||
*Mandates*: CHAR_BIT == 8 is true[.](#17.sentence-1)
|
||||
|
||||
[18](#18)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5308)
|
||||
|
||||
*Returns*: environment() == i[.](#18.sentence-1)
|
||||
|
||||
[ð](#lib:comp-name,text_encoding)
|
||||
|
||||
`static constexpr bool comp-name(string_view a, string_view b);
|
||||
`
|
||||
|
||||
[19](#19)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5319)
|
||||
|
||||
*Returns*: true if the two strings a and b encoded in the ordinary literal encoding
|
||||
are equal, ignoring, from left-to-right,
|
||||
|
||||
- [(19.1)](#19.1)
|
||||
|
||||
all elements that are not digits or letters ([[character.seq.general]](character.seq.general "16.3.3.3.4.1 General")),
|
||||
|
||||
- [(19.2)](#19.2)
|
||||
|
||||
character case, and
|
||||
|
||||
- [(19.3)](#19.3)
|
||||
|
||||
any sequence of one or more 0 characters
|
||||
not immediately preceded by a numeric prefix, where
|
||||
a numeric prefix is a sequence consisting of
|
||||
a digit in the range [1, 9]
|
||||
optionally followed by one or more elements which are not digits or letters,
|
||||
|
||||
and false otherwise[.](#19.sentence-1)
|
||||
|
||||
[*Note [3](#note-3)*:
|
||||
|
||||
This comparison is identical to
|
||||
the âCharset Alias Matchingâ algorithm
|
||||
described in the Unicode Technical Standard 22[[bib]](bibliography#bib:unicode-charmap "Bibliography")[.](#19.sentence-2)
|
||||
|
||||
â *end note*]
|
||||
|
||||
[*Example [1](#example-1)*: static_assert(*comp-name*("UTF-8", "utf8") == true);static_assert(*comp-name*("u.t.f-008", "utf8") == true);static_assert(*comp-name*("ut8", "utf8") == false);static_assert(*comp-name*("utf-80", "utf8") == false); â *end example*]
|
||||
27
cppdraft/text/encoding/overview.md
Normal file
27
cppdraft/text/encoding/overview.md
Normal file
@@ -0,0 +1,27 @@
|
||||
[text.encoding.overview]
|
||||
|
||||
# 28 Text processing library [[text]](./#text)
|
||||
|
||||
## 28.4 Text encodings identification [[text.encoding]](text.encoding#overview)
|
||||
|
||||
### 28.4.2 Class text_encoding [[text.encoding.class]](text.encoding.class#text.encoding.overview)
|
||||
|
||||
#### 28.4.2.1 Overview [text.encoding.overview]
|
||||
|
||||
[1](#1)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5014)
|
||||
|
||||
The class text_encoding describes an interface
|
||||
for accessing the IANA Character Sets registry[[bib]](bibliography#bib:iana-charset "Bibliography")[.](#1.sentence-1)
|
||||
|
||||
[ð](#lib:text_encoding)
|
||||
|
||||
namespace std {struct text_encoding {static constexpr size_t max_name_length = 63; // [[text.encoding.id]](text.encoding.id "28.4.2.6 Enumeration text_encoding::id"), enumeration text_encoding::idenum class id : int_least32_t {*see below*}; using enum id; constexpr text_encoding() = default; constexpr explicit text_encoding(string_view enc) noexcept; constexpr text_encoding(id i) noexcept; constexpr id mib() const noexcept; constexpr const char* name() const noexcept; // [[text.encoding.aliases]](text.encoding.aliases "28.4.2.5 Class text_encoding::aliases_view"), class text_encoding::aliases_viewstruct aliases_view; constexpr aliases_view aliases() const noexcept; friend constexpr bool operator==(const text_encoding& a, const text_encoding& b) noexcept; friend constexpr bool operator==(const text_encoding& encoding, id i) noexcept; static consteval text_encoding literal() noexcept; static text_encoding environment(); template<id i> static bool environment_is(); private: id *mib_* = id::unknown; // *exposition only*char *name_*[max_name_length + 1] = {0}; // *exposition only*static constexpr bool *comp-name*(string_view a, string_view b); // *exposition only*};}
|
||||
|
||||
[2](#2)
|
||||
|
||||
[#](http://github.com/Eelis/draft/tree/9adde4bc1c62ec234483e63ea3b70a59724c745a/source/text.tex#L5057)
|
||||
|
||||
Class text_encoding is
|
||||
a trivially copyable type ([[basic.types.general]](basic.types.general#term.trivially.copyable.type "6.9.1 General"))[.](#2.sentence-1)
|
||||
11
cppdraft/text/encoding/syn.md
Normal file
11
cppdraft/text/encoding/syn.md
Normal file
@@ -0,0 +1,11 @@
|
||||
[text.encoding.syn]
|
||||
|
||||
# 28 Text processing library [[text]](./#text)
|
||||
|
||||
## 28.4 Text encodings identification [[text.encoding]](text.encoding#syn)
|
||||
|
||||
### 28.4.1 Header <text_encoding> synopsis [text.encoding.syn]
|
||||
|
||||
[ð](#header:%3ctext_encoding%3e)
|
||||
|
||||
namespace std {struct text_encoding; // [[text.encoding.hash]](text.encoding.hash "28.4.2.7 Hash support"), hash supporttemplate<class T> struct hash; template<> struct hash<text_encoding>;}
|
||||
20
cppdraft/text/general.md
Normal file
20
cppdraft/text/general.md
Normal file
@@ -0,0 +1,20 @@
|
||||
[text.general]
|
||||
|
||||
# 28 Text processing library [[text]](./#text)
|
||||
|
||||
## 28.1 General [text.general]
|
||||
|
||||
This Clause describes components for dealing with text[.](#sentence-1)
|
||||
|
||||
These components are summarized in Table [89](#tab:text.summary "Table 89: Text library summary")[.](#sentence-2)
|
||||
|
||||
Table [89](#tab:text.summary) — Text library summary [[tab:text.summary]](./tab:text.summary)
|
||||
|
||||
| [ð](#tab:text.summary-row-1) | **Subclause** | **Header** |
|
||||
| --- | --- | --- |
|
||||
| [ð](#tab:text.summary-row-2)<br>[[charconv]](charconv "28.2 Primitive numeric conversions") | Primitive numeric conversions | <charconv> |
|
||||
| [ð](#tab:text.summary-row-3)<br>[[localization]](localization "28.3 Localization library") | Localization library | <locale>, <clocale> |
|
||||
| [ð](#tab:text.summary-row-4)<br>[[format]](format "28.5 Formatting") | Formatting | <format> |
|
||||
| [ð](#tab:text.summary-row-5)<br>[[text.encoding]](text.encoding "28.4 Text encodings identification") | Text encodings identification | <text_encoding> |
|
||||
| [ð](#tab:text.summary-row-6)<br>[[re]](re "28.6 Regular expressions library") | Regular expressions library | <regex> |
|
||||
| [ð](#tab:text.summary-row-7)<br>[[text.c.strings]](text.c.strings "28.7 Null-terminated sequence utilities") | Null-terminated sequence utilities | <cctype>, <cstdlib>, <cuchar>, <cwchar>, <cwctype> |
|
||||
Reference in New Issue
Block a user