diff options
Diffstat (limited to 'intl/localcharset.c')
-rw-r--r-- | intl/localcharset.c | 98 |
1 files changed, 64 insertions, 34 deletions
diff --git a/intl/localcharset.c b/intl/localcharset.c index cbcdaceb6..71be7c110 100644 --- a/intl/localcharset.c +++ b/intl/localcharset.c @@ -1,21 +1,19 @@ /* Determine a canonical name for the current locale's character encoding. - Copyright (C) 2000-2006, 2008-2010 Free Software Foundation, Inc. + Copyright (C) 2000-2006, 2008-2014 Free Software Foundation, Inc. - This program is free software; you can redistribute it and/or modify it - under the terms of the GNU Library General Public License as published - by the Free Software Foundation; either version 2, or (at your option) - any later version. + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. - You should have received a copy of the GNU Library General Public - License along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, - USA. */ + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ /* Written by Bruno Haible <[email protected]>. */ @@ -31,11 +29,12 @@ #include <stdlib.h> #if defined __APPLE__ && defined __MACH__ && HAVE_LANGINFO_CODESET -# define DARWIN7 /* Darwin 7 or newer, i.e. MacOS X 10.3 or newer */ +# define DARWIN7 /* Darwin 7 or newer, i.e. Mac OS X 10.3 or newer */ #endif #if defined _WIN32 || defined __WIN32__ -# define WIN32_NATIVE +# define WINDOWS_NATIVE +# include <locale.h> #endif #if defined __EMX__ @@ -45,7 +44,7 @@ # endif #endif -#if !defined WIN32_NATIVE +#if !defined WINDOWS_NATIVE # include <unistd.h> # if HAVE_LANGINFO_CODESET # include <langinfo.h> @@ -58,7 +57,7 @@ # define WIN32_LEAN_AND_MEAN # include <windows.h> # endif -#elif defined WIN32_NATIVE +#elif defined WINDOWS_NATIVE # define WIN32_LEAN_AND_MEAN # include <windows.h> #endif @@ -67,6 +66,11 @@ # include <os2.h> #endif +/* For MB_CUR_MAX_L */ +#if defined DARWIN7 +# include <xlocale.h> +#endif + #if ENABLE_RELOCATABLE # include "relocatable.h" #else @@ -84,7 +88,7 @@ #endif #if defined _WIN32 || defined __WIN32__ || defined __CYGWIN__ || defined __EMX__ || defined __DJGPP__ - /* Win32, Cygwin, OS/2, DOS */ + /* Native Windows, Cygwin, OS/2, DOS */ # define ISSLASH(C) ((C) == '/' || (C) == '\\') #endif @@ -124,7 +128,7 @@ get_charset_aliases (void) cp = charset_aliases; if (cp == NULL) { -#if !(defined DARWIN7 || defined VMS || defined WIN32_NATIVE || defined __CYGWIN__) +#if !(defined DARWIN7 || defined VMS || defined WINDOWS_NATIVE || defined __CYGWIN__) const char *dir; const char *base = "charset.alias"; char *file_name; @@ -229,8 +233,7 @@ get_charset_aliases (void) { /* Out of memory. */ res_size = 0; - if (old_res_ptr != NULL) - free (old_res_ptr); + free (old_res_ptr); break; } strcpy (res_ptr + res_size - (l2 + 1) - (l1 + 1), buf1); @@ -310,7 +313,7 @@ get_charset_aliases (void) "DECKOREAN" "\0" "EUC-KR" "\0"; # endif -# if defined WIN32_NATIVE || defined __CYGWIN__ +# if defined WINDOWS_NATIVE || defined __CYGWIN__ /* To avoid the troubles of installing a separate file in the same directory as the DLL and of retrieving the DLL's directory at runtime, simply inline the aliases here. */ @@ -362,7 +365,7 @@ locale_charset (void) const char *codeset; const char *aliases; -#if !(defined WIN32_NATIVE || defined OS2) +#if !(defined WINDOWS_NATIVE || defined OS2) # if HAVE_LANGINFO_CODESET @@ -409,10 +412,10 @@ locale_charset (void) } } - /* Woe32 has a function returning the locale's codepage as a number: - GetACP(). This encoding is used by Cygwin, unless the user has set - the environment variable CYGWIN=codepage:oem (which very few people - do). + /* The Windows API has a function returning the locale's codepage as a + number: GetACP(). This encoding is used by Cygwin, unless the user + has set the environment variable CYGWIN=codepage:oem (which very few + people do). Output directed to console windows needs to be converted (to GetOEMCP() if the console is using a raster font, or to GetConsoleOutputCP() if it is using a TrueType font). Cygwin does @@ -455,18 +458,38 @@ locale_charset (void) # endif -#elif defined WIN32_NATIVE +#elif defined WINDOWS_NATIVE static char buf[2 + 10 + 1]; - /* Woe32 has a function returning the locale's codepage as a number: - GetACP(). - When the output goes to a console window, it needs to be provided in - GetOEMCP() encoding if the console is using a raster font, or in - GetConsoleOutputCP() encoding if it is using a TrueType font. - But in GUI programs and for output sent to files and pipes, GetACP() - encoding is the best bet. */ - sprintf (buf, "CP%u", GetACP ()); + /* The Windows API has a function returning the locale's codepage as + a number, but the value doesn't change according to what the + 'setlocale' call specified. So we use it as a last resort, in + case the string returned by 'setlocale' doesn't specify the + codepage. */ + char *current_locale = setlocale (LC_ALL, NULL); + char *pdot; + + /* If they set different locales for different categories, + 'setlocale' will return a semi-colon separated list of locale + values. To make sure we use the correct one, we choose LC_CTYPE. */ + if (strchr (current_locale, ';')) + current_locale = setlocale (LC_CTYPE, NULL); + + pdot = strrchr (current_locale, '.'); + if (pdot) + sprintf (buf, "CP%s", pdot + 1); + else + { + /* The Windows API has a function returning the locale's codepage as a + number: GetACP(). + When the output goes to a console window, it needs to be provided in + GetOEMCP() encoding if the console is using a raster font, or in + GetConsoleOutputCP() encoding if it is using a TrueType font. + But in GUI programs and for output sent to files and pipes, GetACP() + encoding is the best bet. */ + sprintf (buf, "CP%u", GetACP ()); + } codeset = buf; #elif defined OS2 @@ -545,5 +568,12 @@ locale_charset (void) if (codeset[0] == '\0') codeset = "ASCII"; +#ifdef DARWIN7 + /* Mac OS X sets MB_CUR_MAX to 1 when LC_ALL=C, and "UTF-8" + (the default codeset) does not work when MB_CUR_MAX is 1. */ + if (strcmp (codeset, "UTF-8") == 0 && MB_CUR_MAX_L (uselocale (NULL)) <= 1) + codeset = "ASCII"; +#endif + return codeset; } |