diff --git a/doc_src/language.rst b/doc_src/language.rst index 68446caa9..64e45aab8 100644 --- a/doc_src/language.rst +++ b/doc_src/language.rst @@ -1409,6 +1409,7 @@ The locale variables are: ``LANG``, ``LC_ALL``, ``LC_COLLATE``, ``LC_CTYPE``, `` The most common way to set the locale to use a command like ``set -gx LANG en_GB.utf8``, which sets the current locale to be the English language, as used in Great Britain, using the UTF-8 character set. That way any program that requires one setting differently can easily override just that and doesn't have to resort to LC_ALL. For a list of available locales on your system, try ``locale -a``. +Because it needs to handle output that might include multibyte characters (like e.g. emojis), fish will try to set its own internal LC_CTYPE to one that is UTF8-capable even if given an effective LC_CTYPE of "C" (the default). This prevents issues with e.g. filenames given in autosuggestions even if the user started fish with LC_ALL=C. To turn this handling off, set ``fish_allow_singlebyte_locale`` to "1". .. _builtin-overview: diff --git a/src/env_dispatch.cpp b/src/env_dispatch.cpp index d7e150682..b8bf6fea2 100644 --- a/src/env_dispatch.cpp +++ b/src/env_dispatch.cpp @@ -64,7 +64,7 @@ static const wcstring locale_variables[] = { L"LANG", L"LANGUAGE", L"LC_ALL", L"LC_ADDRESS", L"LC_COLLATE", L"LC_CTYPE", L"LC_IDENTIFICATION", L"LC_MEASUREMENT", L"LC_MESSAGES", L"LC_MONETARY", L"LC_NAME", L"LC_NUMERIC", L"LC_PAPER", L"LC_TELEPHONE", L"LC_TIME", - L"LOCPATH"}; + L"fish_allow_singlebyte_locale", L"LOCPATH"}; /// List of all curses environment variable names that might trigger (re)initializing the curses /// subsystem. @@ -556,6 +556,15 @@ static void init_curses(const environment_t &vars) { curses_initialized = true; } +static const char *utf8_locales[] = { + "C.UTF-8", + "en_US.UTF-8", + "en_GB.UTF-8", + "de_DE.UTF-8", + "C.utf8", + "UTF-8", +}; + /// Initialize the locale subsystem. static void init_locale(const environment_t &vars) { // We have to make a copy because the subsequent setlocale() call to change the locale will @@ -576,6 +585,28 @@ static void init_locale(const environment_t &vars) { } char *locale = setlocale(LC_ALL, ""); + + // Try to get a multibyte-capable encoding + // A "C" locale is broken for our purposes - any wchar functions will break on it. + // So we try *really really really hard* to not have one. + bool fix_locale = true; + if (auto allow_c = vars.get(L"fish_allow_singlebyte_locale")) { + fix_locale = allow_c.missing_or_empty() ? true : !bool_from_string(allow_c->as_string()); + } + if (fix_locale && MB_CUR_MAX == 1) { + FLOGF(env_locale, L"Have singlebyte locale, trying to fix"); + for (auto loc : utf8_locales) { + setlocale(LC_CTYPE, loc); + if (MB_CUR_MAX > 1) { + FLOGF(env_locale, L"Fixed locale: '%s'", loc); + break; + } + } + if (MB_CUR_MAX == 1) { + FLOGF(env_locale, L"Failed to fix locale"); + } + } + fish_setlocale(); FLOGF(env_locale, L"init_locale() setlocale(): '%s'", locale); diff --git a/tests/checks/locale.fish b/tests/checks/locale.fish index 01aff36b6..e23fc23f3 100644 --- a/tests/checks/locale.fish +++ b/tests/checks/locale.fish @@ -3,6 +3,9 @@ # see #7934. #REQUIRES: test -z "$GITHUB_WORKFLOW" +# We typically try to force a utf8-capable locale, +# this turns that off. +set -gx fish_allow_singlebyte_locale 1 # A function to display bytes, necessary because GNU and BSD implementations of `od` have different output. # We used to use xxd, but it's not available everywhere. See #3797.