From 3b15e995e755888e68db72e21788263092672ab3 Mon Sep 17 00:00:00 2001 From: Johannes Altmanninger Date: Sat, 1 Apr 2023 12:23:28 +0200 Subject: [PATCH] str2wcs: encode invalid Unicode characters in the private use area Rust does not like invalid code points, so let's ease the transition by treating them like byte sequences that do not map to any code point. See https://github.com/fish-shell/fish-shell/pull/9688#discussion_r1155089596 --- src/common.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/common.cpp b/src/common.cpp index 854fbd3a8..c7ced748c 100644 --- a/src/common.cpp +++ b/src/common.cpp @@ -338,6 +338,8 @@ static wcstring str2wcs_internal(const char *in, const size_t in_len) { // Determine whether to encode this character with our crazy scheme. if (wc >= ENCODE_DIRECT_BASE && wc < ENCODE_DIRECT_BASE + 256) { use_encode_direct = true; + } else if ((wc >= 0xD800 && wc <= 0xDFFF) || static_cast(wc) >= 0x110000) { + use_encode_direct = true; } else if (wc == INTERNAL_SEPARATOR) { use_encode_direct = true; } else if (ret == static_cast(-2)) {