mirror of
https://github.com/fish-shell/fish-shell.git
synced 2026-05-29 18:51:15 -03:00
Fix wide char related tests on Cygwin
This makes the wide char tests run by `./fish_tests` pass on systems where sizeof wchar_t is two (e.g., Cygwin). In doing so it corrects several problems with the underlying code in module *utf8.cpp* such as allowing five and six byte UTF-8 sequences. They were allowed by the original Unicode proposal but are not allowed by the adopted standard.
This commit is contained in:
45
src/utf8.cpp
45
src/utf8.cpp
@@ -26,8 +26,6 @@
|
||||
#define _SEQ2 0xc0
|
||||
#define _SEQ3 0xe0
|
||||
#define _SEQ4 0xf0
|
||||
#define _SEQ5 0xf8
|
||||
#define _SEQ6 0xfc
|
||||
|
||||
#define _BOM 0xfeff
|
||||
|
||||
@@ -37,8 +35,6 @@ typedef wchar_t utf8_wchar_t;
|
||||
|
||||
typedef std::basic_string<utf8_wchar_t> utf8_wstring_t;
|
||||
|
||||
bool is_wchar_ucs2() { return UTF8_WCHAR_MAX <= 0xFFFF; }
|
||||
|
||||
static size_t utf8_to_wchar_internal(const char *in, size_t insize, utf8_wstring_t *result,
|
||||
int flags);
|
||||
static size_t wchar_to_utf8_internal(const utf8_wchar_t *in, size_t insize, char *out,
|
||||
@@ -195,12 +191,6 @@ static size_t utf8_to_wchar_internal(const char *in, size_t insize, utf8_wstring
|
||||
} else if ((*p & 0xf8) == _SEQ4) {
|
||||
n = 4;
|
||||
high = (utf8_wchar_t)(*p & 0x07);
|
||||
} else if ((*p & 0xfc) == _SEQ5) {
|
||||
n = 5;
|
||||
high = (utf8_wchar_t)(*p & 0x03);
|
||||
} else if ((*p & 0xfe) == _SEQ6) {
|
||||
n = 6;
|
||||
high = (utf8_wchar_t)(*p & 0x01);
|
||||
} else {
|
||||
if ((flags & UTF8_IGNORE_ERROR) == 0) return 0;
|
||||
continue;
|
||||
@@ -298,12 +288,18 @@ static size_t wchar_to_utf8_internal(const utf8_wchar_t *in, size_t insize, char
|
||||
if ((flags & UTF8_IGNORE_ERROR) == 0) return 0;
|
||||
continue;
|
||||
}
|
||||
if (w_wide <= 0x0000007f) n = 1;
|
||||
else if (w_wide <= 0x000007ff) n = 2;
|
||||
else if (w_wide <= 0x0000ffff) n = 3;
|
||||
else if (w_wide <= 0x001fffff) n = 4;
|
||||
else if (w_wide <= 0x03ffffff) n = 5;
|
||||
else n = 6; /// if (w_wide <= 0x7fffffff)
|
||||
if (w_wide <= 0x0000007f)
|
||||
n = 1;
|
||||
else if (w_wide <= 0x000007ff)
|
||||
n = 2;
|
||||
else if (w_wide <= 0x0000ffff)
|
||||
n = 3;
|
||||
else if (w_wide <= 0x001fffff)
|
||||
n = 4;
|
||||
else if (w_wide <= 0x03ffffff)
|
||||
n = 5;
|
||||
else
|
||||
n = 6; /// if (w_wide <= 0x7fffffff)
|
||||
|
||||
total += n;
|
||||
|
||||
@@ -345,23 +341,6 @@ static size_t wchar_to_utf8_internal(const utf8_wchar_t *in, size_t insize, char
|
||||
p[0] = _SEQ4 | ((oc[1] & 0x1f) >> 2);
|
||||
break;
|
||||
}
|
||||
case 5: {
|
||||
p[4] = _NXT | (oc[3] & 0x3f);
|
||||
p[3] = _NXT | (oc[3] >> 6) | ((oc[2] & 0x0f) << 2);
|
||||
p[2] = _NXT | ((oc[2] & 0xf0) >> 4) | ((oc[1] & 0x03) << 4);
|
||||
p[1] = _NXT | (oc[1] >> 2);
|
||||
p[0] = _SEQ5 | (oc[0] & 0x03);
|
||||
break;
|
||||
}
|
||||
case 6: {
|
||||
p[5] = _NXT | (oc[3] & 0x3f);
|
||||
p[4] = _NXT | (oc[3] >> 6) | ((oc[2] & 0x0f) << 2);
|
||||
p[3] = _NXT | (oc[2] >> 4) | ((oc[1] & 0x03) << 4);
|
||||
p[2] = _NXT | (oc[1] >> 2);
|
||||
p[1] = _NXT | (oc[0] & 0x3f);
|
||||
p[0] = _SEQ6 | ((oc[0] & 0x40) >> 6);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// NOTE: do not check here for forbitten UTF-8 characters. They cannot appear here because
|
||||
|
||||
Reference in New Issue
Block a user