From 6035e6cc8317600c3100fdf1070890c3e42715a7 Mon Sep 17 00:00:00 2001 From: Dan Albert Date: Wed, 30 Jul 2014 10:53:48 -0700 Subject: [PATCH] Proper MB_CUR_MAX. Previously this was hard coded to 4. This is only the case for UTF-8 locales. As a side effect, this properly reports C.UTF-8 as the default locale instead of C. Change-Id: I7c73cc8fe6ffac61d211cd5f75287e36de06f4fc (cherry picked from commit 1aec7c1a35b2d03038b194967d5ebdc8e2c24b80) --- libc/bionic/locale.cpp | 42 +++++++++++++++++++++++++++++------------- libc/include/stdlib.h | 3 ++- tests/locale_test.cpp | 21 +++++++++++++++++++-- tests/stdio_test.cpp | 5 +++++ 4 files changed, 55 insertions(+), 16 deletions(-) diff --git a/libc/bionic/locale.cpp b/libc/bionic/locale.cpp index 363140e92..4c3fd7f31 100644 --- a/libc/bionic/locale.cpp +++ b/libc/bionic/locale.cpp @@ -30,10 +30,27 @@ #include #include +#include "private/bionic_macros.h" + // We currently support a single locale, the "C" locale (also known as "POSIX"). +static bool __bionic_current_locale_is_utf8 = true; + struct __locale_t { - // Because we only support one locale, these are just tokens with no data. + size_t mb_cur_max; + + __locale_t(size_t mb_cur_max) : mb_cur_max(mb_cur_max) { + } + + __locale_t(const __locale_t* other) { + if (other == LC_GLOBAL_LOCALE) { + mb_cur_max = __bionic_current_locale_is_utf8 ? 4 : 1; + } else { + mb_cur_max = other->mb_cur_max; + } + } + + DISALLOW_COPY_AND_ASSIGN(__locale_t); }; static pthread_once_t g_locale_once = PTHREAD_ONCE_INIT; @@ -75,7 +92,14 @@ static void __locale_init() { g_locale.int_n_sign_posn = CHAR_MAX; } -static bool __bionic_current_locale_is_utf8 = false; +size_t __mb_cur_max() { + locale_t l = reinterpret_cast(pthread_getspecific(g_uselocale_key)); + if (l == nullptr || l == LC_GLOBAL_LOCALE) { + return __bionic_current_locale_is_utf8 ? 4 : 1; + } else { + return l->mb_cur_max; + } +} static bool __is_supported_locale(const char* locale) { return (strcmp(locale, "") == 0 || @@ -85,25 +109,17 @@ static bool __is_supported_locale(const char* locale) { strcmp(locale, "POSIX") == 0); } -static locale_t __new_locale() { - return reinterpret_cast(malloc(sizeof(__locale_t))); -} - lconv* localeconv() { pthread_once(&g_locale_once, __locale_init); return &g_locale; } locale_t duplocale(locale_t l) { - locale_t clone = __new_locale(); - if (clone != NULL && l != LC_GLOBAL_LOCALE) { - *clone = *l; - } - return clone; + return new __locale_t(l); } void freelocale(locale_t l) { - free(l); + delete l; } locale_t newlocale(int category_mask, const char* locale_name, locale_t /*base*/) { @@ -118,7 +134,7 @@ locale_t newlocale(int category_mask, const char* locale_name, locale_t /*base*/ return NULL; } - return __new_locale(); + return new __locale_t(strstr(locale_name, "UTF-8") != NULL ? 4 : 1); } char* setlocale(int category, const char* locale_name) { diff --git a/libc/include/stdlib.h b/libc/include/stdlib.h index e29fdba4c..a5eb3d15b 100644 --- a/libc/include/stdlib.h +++ b/libc/include/stdlib.h @@ -161,7 +161,8 @@ extern int mbtowc(wchar_t *, const char *, size_t); extern int wctomb(char *, wchar_t); extern size_t wcstombs(char *, const wchar_t *, size_t); -#define MB_CUR_MAX 4U +extern size_t __mb_cur_max(void); +#define MB_CUR_MAX __mb_cur_max() __END_DECLS diff --git a/tests/locale_test.cpp b/tests/locale_test.cpp index 7d063f9dd..325f6ceda 100644 --- a/tests/locale_test.cpp +++ b/tests/locale_test.cpp @@ -48,8 +48,8 @@ TEST(locale, localeconv) { } TEST(locale, setlocale) { - EXPECT_STREQ("C", setlocale(LC_ALL, NULL)); - EXPECT_STREQ("C", setlocale(LC_CTYPE, NULL)); + EXPECT_STREQ("C.UTF-8", setlocale(LC_ALL, NULL)); + EXPECT_STREQ("C.UTF-8", setlocale(LC_CTYPE, NULL)); errno = 0; EXPECT_EQ(NULL, setlocale(-1, NULL)); @@ -105,3 +105,20 @@ TEST(locale, uselocale) { EXPECT_EQ(n, uselocale(NULL)); } + +TEST(locale, mb_cur_max) { + // We can't reliably test the behavior with setlocale(3) or the behavior for + // initial program conditions because (unless we're the only test that was + // run), another test has almost certainly called uselocale(3) in this thread. + // See b/16685652. + locale_t cloc = newlocale(LC_ALL, "C", 0); + locale_t cloc_utf8 = newlocale(LC_ALL, "C.UTF-8", 0); + + uselocale(cloc); + ASSERT_EQ(1U, MB_CUR_MAX); + uselocale(cloc_utf8); + ASSERT_EQ(4U, MB_CUR_MAX); + + freelocale(cloc); + freelocale(cloc_utf8); +} diff --git a/tests/stdio_test.cpp b/tests/stdio_test.cpp index 18dae9c2f..bb86509c2 100644 --- a/tests/stdio_test.cpp +++ b/tests/stdio_test.cpp @@ -427,6 +427,9 @@ TEST(stdio, snprintf_negative_zero_5084292) { } TEST(stdio, snprintf_utf8_15439554) { + locale_t cloc = newlocale(LC_ALL, "C.UTF-8", 0); + uselocale(cloc); + // http://b/15439554 char buf[BUFSIZ]; @@ -442,6 +445,8 @@ TEST(stdio, snprintf_utf8_15439554) { // 4-byte character. snprintf(buf, sizeof(buf), "%d\xf0\xa4\xad\xa2%d", 1, 2); EXPECT_STREQ("1𤭢2", buf); + + freelocale(cloc); } TEST(stdio, fprintf_failures_7229520) {