From 583d771b106970863230f8df68d2c36b601db8b9 Mon Sep 17 00:00:00 2001 From: Fabian Homborg Date: Mon, 8 Jan 2018 11:05:02 +0100 Subject: [PATCH] Fix escape_string_var for UTF-16 We're now actually handling wchar_t here, so comparing the 0x80 bit would break for UTF-16, causing ASCII false-positives. Also simplifies a bit, since we no longer need a second variable. --- src/common.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/common.cpp b/src/common.cpp index a7aaf6964..6dc4001af 100644 --- a/src/common.cpp +++ b/src/common.cpp @@ -857,23 +857,21 @@ static bool unescape_string_url(const wchar_t *in, wcstring *out) { static void escape_string_var(const wcstring &in, wcstring &out) { bool prev_was_hex_encoded = false; for (auto c1 : in) { - // This silliness is so we get the correct result whether chars are signed or unsigned. - unsigned int c2 = (unsigned int)c1 & 0xFF; - if (!(c2 & 0x80) && isalnum(c2) && (!prev_was_hex_encoded || !is_hex_digit(c2))) { + if (c1 >= 0 && c1 <= 127 && isalnum(c1) && (!prev_was_hex_encoded || !is_hex_digit(c1))) { // ASCII alphanumerics don't need to be encoded. if (prev_was_hex_encoded) { out.push_back(L'_'); prev_was_hex_encoded = false; } - out.push_back((wchar_t)c2); - } else if (c2 == '_') { + out.push_back(c1); + } else if (c1 == L'_') { // Underscores are encoded by doubling them. out.append(L"__"); prev_was_hex_encoded = false; } else { // All other chars need to have their UTF-8 representation encoded in hex. wchar_t buf[4]; - swprintf(buf, sizeof buf / sizeof buf[0], L"_%02X", c2); + swprintf(buf, sizeof buf / sizeof buf[0], L"_%02X", c1); out.append(buf); prev_was_hex_encoded = true; }