diff --git a/po/de.po b/po/de.po index c89ddb22f..5f1176ccf 100644 --- a/po/de.po +++ b/po/de.po @@ -1187,9 +1187,6 @@ msgstr "" msgid "Invalid arguments" msgstr "Ungültige Argumente" -msgid "Invalid code points not yet supported by printf" -msgstr "Ungültige Codepunkte werden von printf noch nicht unterstützt" - msgid "Invalid index value" msgstr "Ungültiger Indexwert" @@ -1292,6 +1289,10 @@ msgstr "Keine Funktion" msgid "Not a number" msgstr "Keine Zahl" +#, c-format +msgid "Not a valid Unicode character: %s" +msgstr "Kein gültiges Unicodezeichen: %s" + msgid "Notifications about universal variable changes" msgstr "" @@ -1638,10 +1639,6 @@ msgstr "Unerwartetes Stringende, eckige Klammern passen nicht" msgid "Unexpected token" msgstr "Unerwartetes Token" -#, c-format -msgid "Unicode character out of range: \\%c%0*x" -msgstr "Unicodezeichen außerhalb des gültigen Bereichs: \\%c%0*x" - msgid "Unknown" msgstr "Unbekannt" diff --git a/po/en.po b/po/en.po index 626b35b88..410bf4c46 100644 --- a/po/en.po +++ b/po/en.po @@ -1185,9 +1185,6 @@ msgstr "" msgid "Invalid arguments" msgstr "" -msgid "Invalid code points not yet supported by printf" -msgstr "" - msgid "Invalid index value" msgstr "" @@ -1290,6 +1287,10 @@ msgstr "" msgid "Not a number" msgstr "" +#, c-format +msgid "Not a valid Unicode character: %s" +msgstr "Not a valid Unicode character: %s" + msgid "Notifications about universal variable changes" msgstr "" @@ -1636,10 +1637,6 @@ msgstr "Unexpected end of string, square brackets do not match" msgid "Unexpected token" msgstr "" -#, c-format -msgid "Unicode character out of range: \\%c%0*x" -msgstr "Unicode character out of range: \\%c%0*x" - msgid "Unknown" msgstr "Unknown" diff --git a/po/fr.po b/po/fr.po index 1b0ef73ed..4f1b521f4 100644 --- a/po/fr.po +++ b/po/fr.po @@ -1316,9 +1316,6 @@ msgstr "" msgid "Invalid arguments" msgstr "" -msgid "Invalid code points not yet supported by printf" -msgstr "" - msgid "Invalid index value" msgstr "" @@ -1421,6 +1418,10 @@ msgstr "Pas une fonction" msgid "Not a number" msgstr "" +#, c-format +msgid "Not a valid Unicode character: %s" +msgstr "" + msgid "Notifications about universal variable changes" msgstr "" @@ -1767,10 +1768,6 @@ msgstr "Fin de chaîne inattendue, les crochets ne sont pas refermés" msgid "Unexpected token" msgstr "" -#, c-format -msgid "Unicode character out of range: \\%c%0*x" -msgstr "Caractère Unicode hors limite: \\%c%0*x" - msgid "Unknown" msgstr "Inconnu" diff --git a/po/pl.po b/po/pl.po index c7fe70ab3..8f06144fc 100644 --- a/po/pl.po +++ b/po/pl.po @@ -1181,9 +1181,6 @@ msgstr "" msgid "Invalid arguments" msgstr "" -msgid "Invalid code points not yet supported by printf" -msgstr "" - msgid "Invalid index value" msgstr "" @@ -1286,6 +1283,10 @@ msgstr "" msgid "Not a number" msgstr "" +#, c-format +msgid "Not a valid Unicode character: %s" +msgstr "" + msgid "Notifications about universal variable changes" msgstr "" @@ -1632,10 +1633,6 @@ msgstr "" msgid "Unexpected token" msgstr "" -#, c-format -msgid "Unicode character out of range: \\%c%0*x" -msgstr "" - msgid "Unknown" msgstr "Nieznany" diff --git a/po/pt_BR.po b/po/pt_BR.po index cd2e1d5e9..38cd39d36 100644 --- a/po/pt_BR.po +++ b/po/pt_BR.po @@ -1186,9 +1186,6 @@ msgstr "" msgid "Invalid arguments" msgstr "" -msgid "Invalid code points not yet supported by printf" -msgstr "" - msgid "Invalid index value" msgstr "" @@ -1291,6 +1288,10 @@ msgstr "" msgid "Not a number" msgstr "" +#, c-format +msgid "Not a valid Unicode character: %s" +msgstr "" + msgid "Notifications about universal variable changes" msgstr "" @@ -1637,10 +1638,6 @@ msgstr "Final inesperado de string, colchetes não batem" msgid "Unexpected token" msgstr "" -#, c-format -msgid "Unicode character out of range: \\%c%0*x" -msgstr "Caracter Unicode fora dos limites: \\%c%0*x" - msgid "Unknown" msgstr "Desconhecido" diff --git a/po/sv.po b/po/sv.po index 0599993c3..a5c3d130a 100644 --- a/po/sv.po +++ b/po/sv.po @@ -1182,9 +1182,6 @@ msgstr "" msgid "Invalid arguments" msgstr "" -msgid "Invalid code points not yet supported by printf" -msgstr "" - msgid "Invalid index value" msgstr "" @@ -1287,6 +1284,10 @@ msgstr "" msgid "Not a number" msgstr "" +#, c-format +msgid "Not a valid Unicode character: %s" +msgstr "" + msgid "Notifications about universal variable changes" msgstr "" @@ -1633,10 +1634,6 @@ msgstr "" msgid "Unexpected token" msgstr "" -#, c-format -msgid "Unicode character out of range: \\%c%0*x" -msgstr "Unicodetecken utanför giltigt spann: \\%c%0*x" - msgid "Unknown" msgstr "Okänd" diff --git a/po/zh_CN.po b/po/zh_CN.po index c15cd478f..63518c890 100644 --- a/po/zh_CN.po +++ b/po/zh_CN.po @@ -1211,9 +1211,6 @@ msgstr "主题监视器的内部细节" msgid "Invalid arguments" msgstr "无效参数" -msgid "Invalid code points not yet supported by printf" -msgstr "尚未由 printf 支持的无效码点" - msgid "Invalid index value" msgstr "无效索引值" @@ -1316,6 +1313,10 @@ msgstr "不是一个函数" msgid "Not a number" msgstr "非数字" +#, c-format +msgid "Not a valid Unicode character: %s" +msgstr "" + msgid "Notifications about universal variable changes" msgstr "通用变量变更通知" @@ -1662,10 +1663,6 @@ msgstr "字符串意外结束,方括号不匹配" msgid "Unexpected token" msgstr "意外的记号" -#, c-format -msgid "Unicode character out of range: \\%c%0*x" -msgstr "Unicode 字符超出范围:\\%c%0*x" - msgid "Unknown" msgstr "未知" diff --git a/po/zh_TW.po b/po/zh_TW.po index ee2a6bd62..6e3ae8562 100644 --- a/po/zh_TW.po +++ b/po/zh_TW.po @@ -1185,9 +1185,6 @@ msgstr "主題監聽的內部資訊" msgid "Invalid arguments" msgstr "引數無效" -msgid "Invalid code points not yet supported by printf" -msgstr "printf 尚不支援無效的碼位" - msgid "Invalid index value" msgstr "索引值無效" @@ -1290,6 +1287,10 @@ msgstr "不是函式" msgid "Not a number" msgstr "不是數字" +#, c-format +msgid "Not a valid Unicode character: %s" +msgstr "" + msgid "Notifications about universal variable changes" msgstr "通域變數變更通知" @@ -1637,10 +1638,6 @@ msgstr "非預期的字串結尾,中括弧不對稱" msgid "Unexpected token" msgstr "非預期的詞元" -#, c-format -msgid "Unicode character out of range: \\%c%0*x" -msgstr "Unicode 字元超出範圍:\\%c%0*x" - msgid "Unknown" msgstr "未知" diff --git a/src/builtins/printf.rs b/src/builtins/printf.rs index f01a52e02..53f6b1be3 100644 --- a/src/builtins/printf.rs +++ b/src/builtins/printf.rs @@ -50,7 +50,7 @@ use super::prelude::*; use crate::locale::{Locale, get_numeric_locale}; -use crate::wchar::encode_byte_to_char; +use crate::wchar::{decode_byte_from_char, encode_byte_to_char}; use crate::wutil::{ errors::Error, wcstod::wcstod, @@ -665,20 +665,26 @@ fn print_esc(&mut self, escstart: &wstr, octal_0: bool) -> usize { uni_value = uni_value * 16 + p.char_at(0).to_digit(16).unwrap(); p = &p[1..]; } - // N.B. we assume __STDC_ISO_10646__. - if uni_value > 0x10FFFF { - self.fatal_error(wgettext_fmt!( - "Unicode character out of range: \\%c%0*x", - esc_char, - exp_esc_length, - uni_value - )); - } else { - // TODO-RUST: if uni_value is a surrogate, we need to encode it using our PUA scheme. - if let Some(c) = char::from_u32(uni_value) { - self.append_output(c); - } else { - self.fatal_error(wgettext!("Invalid code points not yet supported by printf")); + match char::from_u32(uni_value) { + Some(c) => { + // Test if this character would be treated specially when decoding. + // If so, PUA-encode it. + if decode_byte_from_char(c).is_some() { + // A `char` represents an Unicode scalar value, which takes up at most 4 bytes when encoded in UTF-8. + let mut converted = [0_u8; 4]; + for byte in c.encode_utf8(&mut converted).as_bytes() { + self.append_output(encode_byte_to_char(*byte)); + } + } else { + self.append_output(c); + } + } + None => { + let escaped_char_string = format!("\\{esc_char}{uni_value:0exp_esc_length$x}"); + self.fatal_error(wgettext_fmt!( + "Not a valid Unicode character: %s", + escaped_char_string + )); } } } else { diff --git a/tests/checks/printf.fish b/tests/checks/printf.fish index 06aa07a22..72c6038f1 100644 --- a/tests/checks/printf.fish +++ b/tests/checks/printf.fish @@ -172,3 +172,13 @@ printf '|%.1s|\n' '𒈙a' #CHECK: |𒈙| printf '|%3.3s|\n' '👨‍👨‍👧‍👧' #CHECK: | 👨‍👨‍👧‍👧| + +# Check handling of chars we use in our internal PUA encoding. +printf '\uf641' | display_bytes +# CHECK: 0000000 357 231 201 +# CHECK: 0000003 + +# UTF-8 representation of \uf641 +printf '%s' \xef\x99\x81 | display_bytes +# CHECK: 0000000 357 231 201 +# CHECK: 0000003