aboutsummaryrefslogtreecommitdiffstats
path: root/intl/localcharset.c
diff options
context:
space:
mode:
Diffstat (limited to 'intl/localcharset.c')
-rw-r--r--intl/localcharset.c98
1 files changed, 64 insertions, 34 deletions
diff --git a/intl/localcharset.c b/intl/localcharset.c
index cbcdaceb6..71be7c110 100644
--- a/intl/localcharset.c
+++ b/intl/localcharset.c
@@ -1,21 +1,19 @@
/* Determine a canonical name for the current locale's character encoding.
- Copyright (C) 2000-2006, 2008-2010 Free Software Foundation, Inc.
+ Copyright (C) 2000-2006, 2008-2014 Free Software Foundation, Inc.
- This program is free software; you can redistribute it and/or modify it
- under the terms of the GNU Library General Public License as published
- by the Free Software Foundation; either version 2, or (at your option)
- any later version.
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Lesser General Public License for more details.
- You should have received a copy of the GNU Library General Public
- License along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
- USA. */
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
/* Written by Bruno Haible <[email protected]>. */
@@ -31,11 +29,12 @@
#include <stdlib.h>
#if defined __APPLE__ && defined __MACH__ && HAVE_LANGINFO_CODESET
-# define DARWIN7 /* Darwin 7 or newer, i.e. MacOS X 10.3 or newer */
+# define DARWIN7 /* Darwin 7 or newer, i.e. Mac OS X 10.3 or newer */
#endif
#if defined _WIN32 || defined __WIN32__
-# define WIN32_NATIVE
+# define WINDOWS_NATIVE
+# include <locale.h>
#endif
#if defined __EMX__
@@ -45,7 +44,7 @@
# endif
#endif
-#if !defined WIN32_NATIVE
+#if !defined WINDOWS_NATIVE
# include <unistd.h>
# if HAVE_LANGINFO_CODESET
# include <langinfo.h>
@@ -58,7 +57,7 @@
# define WIN32_LEAN_AND_MEAN
# include <windows.h>
# endif
-#elif defined WIN32_NATIVE
+#elif defined WINDOWS_NATIVE
# define WIN32_LEAN_AND_MEAN
# include <windows.h>
#endif
@@ -67,6 +66,11 @@
# include <os2.h>
#endif
+/* For MB_CUR_MAX_L */
+#if defined DARWIN7
+# include <xlocale.h>
+#endif
+
#if ENABLE_RELOCATABLE
# include "relocatable.h"
#else
@@ -84,7 +88,7 @@
#endif
#if defined _WIN32 || defined __WIN32__ || defined __CYGWIN__ || defined __EMX__ || defined __DJGPP__
- /* Win32, Cygwin, OS/2, DOS */
+ /* Native Windows, Cygwin, OS/2, DOS */
# define ISSLASH(C) ((C) == '/' || (C) == '\\')
#endif
@@ -124,7 +128,7 @@ get_charset_aliases (void)
cp = charset_aliases;
if (cp == NULL)
{
-#if !(defined DARWIN7 || defined VMS || defined WIN32_NATIVE || defined __CYGWIN__)
+#if !(defined DARWIN7 || defined VMS || defined WINDOWS_NATIVE || defined __CYGWIN__)
const char *dir;
const char *base = "charset.alias";
char *file_name;
@@ -229,8 +233,7 @@ get_charset_aliases (void)
{
/* Out of memory. */
res_size = 0;
- if (old_res_ptr != NULL)
- free (old_res_ptr);
+ free (old_res_ptr);
break;
}
strcpy (res_ptr + res_size - (l2 + 1) - (l1 + 1), buf1);
@@ -310,7 +313,7 @@ get_charset_aliases (void)
"DECKOREAN" "\0" "EUC-KR" "\0";
# endif
-# if defined WIN32_NATIVE || defined __CYGWIN__
+# if defined WINDOWS_NATIVE || defined __CYGWIN__
/* To avoid the troubles of installing a separate file in the same
directory as the DLL and of retrieving the DLL's directory at
runtime, simply inline the aliases here. */
@@ -362,7 +365,7 @@ locale_charset (void)
const char *codeset;
const char *aliases;
-#if !(defined WIN32_NATIVE || defined OS2)
+#if !(defined WINDOWS_NATIVE || defined OS2)
# if HAVE_LANGINFO_CODESET
@@ -409,10 +412,10 @@ locale_charset (void)
}
}
- /* Woe32 has a function returning the locale's codepage as a number:
- GetACP(). This encoding is used by Cygwin, unless the user has set
- the environment variable CYGWIN=codepage:oem (which very few people
- do).
+ /* The Windows API has a function returning the locale's codepage as a
+ number: GetACP(). This encoding is used by Cygwin, unless the user
+ has set the environment variable CYGWIN=codepage:oem (which very few
+ people do).
Output directed to console windows needs to be converted (to
GetOEMCP() if the console is using a raster font, or to
GetConsoleOutputCP() if it is using a TrueType font). Cygwin does
@@ -455,18 +458,38 @@ locale_charset (void)
# endif
-#elif defined WIN32_NATIVE
+#elif defined WINDOWS_NATIVE
static char buf[2 + 10 + 1];
- /* Woe32 has a function returning the locale's codepage as a number:
- GetACP().
- When the output goes to a console window, it needs to be provided in
- GetOEMCP() encoding if the console is using a raster font, or in
- GetConsoleOutputCP() encoding if it is using a TrueType font.
- But in GUI programs and for output sent to files and pipes, GetACP()
- encoding is the best bet. */
- sprintf (buf, "CP%u", GetACP ());
+ /* The Windows API has a function returning the locale's codepage as
+ a number, but the value doesn't change according to what the
+ 'setlocale' call specified. So we use it as a last resort, in
+ case the string returned by 'setlocale' doesn't specify the
+ codepage. */
+ char *current_locale = setlocale (LC_ALL, NULL);
+ char *pdot;
+
+ /* If they set different locales for different categories,
+ 'setlocale' will return a semi-colon separated list of locale
+ values. To make sure we use the correct one, we choose LC_CTYPE. */
+ if (strchr (current_locale, ';'))
+ current_locale = setlocale (LC_CTYPE, NULL);
+
+ pdot = strrchr (current_locale, '.');
+ if (pdot)
+ sprintf (buf, "CP%s", pdot + 1);
+ else
+ {
+ /* The Windows API has a function returning the locale's codepage as a
+ number: GetACP().
+ When the output goes to a console window, it needs to be provided in
+ GetOEMCP() encoding if the console is using a raster font, or in
+ GetConsoleOutputCP() encoding if it is using a TrueType font.
+ But in GUI programs and for output sent to files and pipes, GetACP()
+ encoding is the best bet. */
+ sprintf (buf, "CP%u", GetACP ());
+ }
codeset = buf;
#elif defined OS2
@@ -545,5 +568,12 @@ locale_charset (void)
if (codeset[0] == '\0')
codeset = "ASCII";
+#ifdef DARWIN7
+ /* Mac OS X sets MB_CUR_MAX to 1 when LC_ALL=C, and "UTF-8"
+ (the default codeset) does not work when MB_CUR_MAX is 1. */
+ if (strcmp (codeset, "UTF-8") == 0 && MB_CUR_MAX_L (uselocale (NULL)) <= 1)
+ codeset = "ASCII";
+#endif
+
return codeset;
}