gdb/charset.c - gdb

Global variables defined

Data types defined

Functions defined

Macros defined

Source code

  1. /* Character set conversion support for GDB.

  2.    Copyright (C) 2001-2015 Free Software Foundation, Inc.

  3.    This file is part of GDB.

  4.    This program is free software; you can redistribute it and/or modify
  5.    it under the terms of the GNU General Public License as published by
  6.    the Free Software Foundation; either version 3 of the License, or
  7.    (at your option) any later version.

  8.    This program is distributed in the hope that it will be useful,
  9.    but WITHOUT ANY WARRANTY; without even the implied warranty of
  10.    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  11.    GNU General Public License for more details.

  12.    You should have received a copy of the GNU General Public License
  13.    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */

  14. #include "defs.h"
  15. #include "charset.h"
  16. #include "gdbcmd.h"
  17. #include "gdb_obstack.h"
  18. #include "gdb_wait.h"
  19. #include "charset-list.h"
  20. #include "vec.h"
  21. #include "environ.h"
  22. #include "arch-utils.h"
  23. #include "gdb_vecs.h"
  24. #include <ctype.h>

  25. #ifdef USE_WIN32API
  26. #include <windows.h>
  27. #endif

  28. /* How GDB's character set support works

  29.    GDB has three global settings:

  30.    - The `current host character set' is the character set GDB should
  31.      use in talking to the user, and which (hopefully) the user's
  32.      terminal knows how to display properly.  Most users should not
  33.      change this.

  34.    - The `current target character set' is the character set the
  35.      program being debugged uses.

  36.    - The `current target wide character set' is the wide character set
  37.      the program being debugged uses, that is, the encoding used for
  38.      wchar_t.

  39.    There are commands to set each of these, and mechanisms for
  40.    choosing reasonable default values.  GDB has a global list of
  41.    character sets that it can use as its host or target character
  42.    sets.

  43.    The header file `charset.h' declares various functions that
  44.    different pieces of GDB need to perform tasks like:

  45.    - printing target strings and characters to the user's terminal
  46.      (mostly target->host conversions),

  47.    - building target-appropriate representations of strings and
  48.      characters the user enters in expressions (mostly host->target
  49.      conversions),

  50.      and so on.

  51.    To avoid excessive code duplication and maintenance efforts,
  52.    GDB simply requires a capable iconv function.  Users on platforms
  53.    without a suitable iconv can use the GNU iconv library.  */


  54. #ifdef PHONY_ICONV

  55. /* Provide a phony iconv that does as little as possible.  Also,
  56.    arrange for there to be a single available character set.  */

  57. #undef GDB_DEFAULT_HOST_CHARSET
  58. #define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1"
  59. #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
  60. #define GDB_DEFAULT_TARGET_WIDE_CHARSET "ISO-8859-1"
  61. #undef DEFAULT_CHARSET_NAMES
  62. #define DEFAULT_CHARSET_NAMES GDB_DEFAULT_HOST_CHARSET ,

  63. #undef iconv_t
  64. #define iconv_t int
  65. #undef iconv_open
  66. #define iconv_open phony_iconv_open
  67. #undef iconv
  68. #define iconv phony_iconv
  69. #undef iconv_close
  70. #define iconv_close phony_iconv_close

  71. #undef ICONV_CONST
  72. #define ICONV_CONST const

  73. static iconv_t
  74. phony_iconv_open (const char *to, const char *from)
  75. {
  76.   /* We allow conversions from UTF-32BE, wchar_t, and the host charset.
  77.      We allow conversions to wchar_t and the host charset.  */
  78.   if (strcmp (from, "UTF-32BE") && strcmp (from, "wchar_t")
  79.       && strcmp (from, GDB_DEFAULT_HOST_CHARSET))
  80.     return -1;
  81.   if (strcmp (to, "wchar_t") && strcmp (to, GDB_DEFAULT_HOST_CHARSET))
  82.     return -1;

  83.   /* Return 1 if we are converting from UTF-32BE, 0 otherwise.  This is
  84.      used as a flag in calls to iconv.  */
  85.   return !strcmp (from, "UTF-32BE");
  86. }

  87. static int
  88. phony_iconv_close (iconv_t arg)
  89. {
  90.   return 0;
  91. }

  92. static size_t
  93. phony_iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft,
  94.              char **outbuf, size_t *outbytesleft)
  95. {
  96.   if (utf_flag)
  97.     {
  98.       while (*inbytesleft >= 4)
  99.         {
  100.           size_t j;
  101.           unsigned long c = 0;

  102.           for (j = 0; j < 4; ++j)
  103.             {
  104.               c <<= 8;
  105.               c += (*inbuf)[j] & 0xff;
  106.             }

  107.           if (c >= 256)
  108.             {
  109.               errno = EILSEQ;
  110.               return -1;
  111.             }
  112.           **outbuf = c & 0xff;
  113.           ++*outbuf;
  114.           --*outbytesleft;

  115.           ++*inbuf;
  116.           *inbytesleft -= 4;
  117.         }
  118.       if (*inbytesleft < 4)
  119.         {
  120.           errno = EINVAL;
  121.           return -1;
  122.         }
  123.     }
  124.   else
  125.     {
  126.       /* In all other cases we simply copy input bytes to the
  127.          output.  */
  128.       size_t amt = *inbytesleft;

  129.       if (amt > *outbytesleft)
  130.         amt = *outbytesleft;
  131.       memcpy (*outbuf, *inbuf, amt);
  132.       *inbuf += amt;
  133.       *outbuf += amt;
  134.       *inbytesleft -= amt;
  135.       *outbytesleft -= amt;
  136.     }

  137.   if (*inbytesleft)
  138.     {
  139.       errno = E2BIG;
  140.       return -1;
  141.     }

  142.   /* The number of non-reversible conversions -- but they were all
  143.      reversible.  */
  144.   return 0;
  145. }

  146. #else /* PHONY_ICONV */

  147. /* On systems that don't have EILSEQ, GNU iconv's iconv.h defines it
  148.    to ENOENT, while gnulib defines it to a different value.  Always
  149.    map ENOENT to gnulib's EILSEQ, leaving callers agnostic.  */

  150. static size_t
  151. gdb_iconv (iconv_t utf_flag, ICONV_CONST char **inbuf, size_t *inbytesleft,
  152.            char **outbuf, size_t *outbytesleft)
  153. {
  154.   size_t ret;

  155.   ret = iconv (utf_flag, inbuf, inbytesleft, outbuf, outbytesleft);
  156.   if (errno == ENOENT)
  157.     errno = EILSEQ;
  158.   return ret;
  159. }

  160. #undef iconv
  161. #define iconv gdb_iconv

  162. #endif /* PHONY_ICONV */


  163. /* The global lists of character sets and translations.  */


  164. #ifndef GDB_DEFAULT_TARGET_CHARSET
  165. #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
  166. #endif

  167. #ifndef GDB_DEFAULT_TARGET_WIDE_CHARSET
  168. #define GDB_DEFAULT_TARGET_WIDE_CHARSET "UTF-32"
  169. #endif

  170. static const char *auto_host_charset_name = GDB_DEFAULT_HOST_CHARSET;
  171. static const char *host_charset_name = "auto";
  172. static void
  173. show_host_charset_name (struct ui_file *file, int from_tty,
  174.                         struct cmd_list_element *c,
  175.                         const char *value)
  176. {
  177.   if (!strcmp (value, "auto"))
  178.     fprintf_filtered (file,
  179.                       _("The host character set is \"auto; currently %s\".\n"),
  180.                       auto_host_charset_name);
  181.   else
  182.     fprintf_filtered (file, _("The host character set is \"%s\".\n"), value);
  183. }

  184. static const char *target_charset_name = "auto";
  185. static void
  186. show_target_charset_name (struct ui_file *file, int from_tty,
  187.                           struct cmd_list_element *c, const char *value)
  188. {
  189.   if (!strcmp (value, "auto"))
  190.     fprintf_filtered (file,
  191.                       _("The target character set is \"auto; "
  192.                         "currently %s\".\n"),
  193.                       gdbarch_auto_charset (get_current_arch ()));
  194.   else
  195.     fprintf_filtered (file, _("The target character set is \"%s\".\n"),
  196.                       value);
  197. }

  198. static const char *target_wide_charset_name = "auto";
  199. static void
  200. show_target_wide_charset_name (struct ui_file *file,
  201.                                int from_tty,
  202.                                struct cmd_list_element *c,
  203.                                const char *value)
  204. {
  205.   if (!strcmp (value, "auto"))
  206.     fprintf_filtered (file,
  207.                       _("The target wide character set is \"auto; "
  208.                         "currently %s\".\n"),
  209.                       gdbarch_auto_wide_charset (get_current_arch ()));
  210.   else
  211.     fprintf_filtered (file, _("The target wide character set is \"%s\".\n"),
  212.                       value);
  213. }

  214. static const char *default_charset_names[] =
  215. {
  216.   DEFAULT_CHARSET_NAMES
  217.   0
  218. };

  219. static const char **charset_enum;


  220. /* If the target wide character set has big- or little-endian
  221.    variants, these are the corresponding names.  */
  222. static const char *target_wide_charset_be_name;
  223. static const char *target_wide_charset_le_name;

  224. /* The architecture for which the BE- and LE-names are valid.  */
  225. static struct gdbarch *be_le_arch;

  226. /* A helper function which sets the target wide big- and little-endian
  227.    character set names, if possible.  */

  228. static void
  229. set_be_le_names (struct gdbarch *gdbarch)
  230. {
  231.   int i, len;
  232.   const char *target_wide;

  233.   if (be_le_arch == gdbarch)
  234.     return;
  235.   be_le_arch = gdbarch;

  236.   target_wide_charset_le_name = NULL;
  237.   target_wide_charset_be_name = NULL;

  238.   target_wide = target_wide_charset_name;
  239.   if (!strcmp (target_wide, "auto"))
  240.     target_wide = gdbarch_auto_wide_charset (gdbarch);

  241.   len = strlen (target_wide);
  242.   for (i = 0; charset_enum[i]; ++i)
  243.     {
  244.       if (strncmp (target_wide, charset_enum[i], len))
  245.         continue;
  246.       if ((charset_enum[i][len] == 'B'
  247.            || charset_enum[i][len] == 'L')
  248.           && charset_enum[i][len + 1] == 'E'
  249.           && charset_enum[i][len + 2] == '\0')
  250.         {
  251.           if (charset_enum[i][len] == 'B')
  252.             target_wide_charset_be_name = charset_enum[i];
  253.           else
  254.             target_wide_charset_le_name = charset_enum[i];
  255.         }
  256.     }
  257. }

  258. /* 'Set charset', 'set host-charset', 'set target-charset', 'set
  259.    target-wide-charset', 'set charset' sfunc's.  */

  260. static void
  261. validate (struct gdbarch *gdbarch)
  262. {
  263.   iconv_t desc;
  264.   const char *host_cset = host_charset ();
  265.   const char *target_cset = target_charset (gdbarch);
  266.   const char *target_wide_cset = target_wide_charset_name;

  267.   if (!strcmp (target_wide_cset, "auto"))
  268.     target_wide_cset = gdbarch_auto_wide_charset (gdbarch);

  269.   desc = iconv_open (target_wide_cset, host_cset);
  270.   if (desc == (iconv_t) -1)
  271.     error (_("Cannot convert between character sets `%s' and `%s'"),
  272.            target_wide_cset, host_cset);
  273.   iconv_close (desc);

  274.   desc = iconv_open (target_cset, host_cset);
  275.   if (desc == (iconv_t) -1)
  276.     error (_("Cannot convert between character sets `%s' and `%s'"),
  277.            target_cset, host_cset);
  278.   iconv_close (desc);

  279.   /* Clear the cache.  */
  280.   be_le_arch = NULL;
  281. }

  282. /* This is the sfunc for the 'set charset' command.  */
  283. static void
  284. set_charset_sfunc (char *charset, int from_tty,
  285.                    struct cmd_list_element *c)
  286. {
  287.   /* CAREFUL: set the target charset here as well.  */
  288.   target_charset_name = host_charset_name;
  289.   validate (get_current_arch ());
  290. }

  291. /* 'set host-charset' command sfunc.  We need a wrapper here because
  292.    the function needs to have a specific signature.  */
  293. static void
  294. set_host_charset_sfunc (char *charset, int from_tty,
  295.                         struct cmd_list_element *c)
  296. {
  297.   validate (get_current_arch ());
  298. }

  299. /* Wrapper for the 'set target-charset' command.  */
  300. static void
  301. set_target_charset_sfunc (char *charset, int from_tty,
  302.                           struct cmd_list_element *c)
  303. {
  304.   validate (get_current_arch ());
  305. }

  306. /* Wrapper for the 'set target-wide-charset' command.  */
  307. static void
  308. set_target_wide_charset_sfunc (char *charset, int from_tty,
  309.                                struct cmd_list_element *c)
  310. {
  311.   validate (get_current_arch ());
  312. }

  313. /* sfunc for the 'show charset' command.  */
  314. static void
  315. show_charset (struct ui_file *file, int from_tty,
  316.               struct cmd_list_element *c,
  317.               const char *name)
  318. {
  319.   show_host_charset_name (file, from_tty, c, host_charset_name);
  320.   show_target_charset_name (file, from_tty, c, target_charset_name);
  321.   show_target_wide_charset_name (file, from_tty, c,
  322.                                  target_wide_charset_name);
  323. }


  324. /* Accessor functions.  */

  325. const char *
  326. host_charset (void)
  327. {
  328.   if (!strcmp (host_charset_name, "auto"))
  329.     return auto_host_charset_name;
  330.   return host_charset_name;
  331. }

  332. const char *
  333. target_charset (struct gdbarch *gdbarch)
  334. {
  335.   if (!strcmp (target_charset_name, "auto"))
  336.     return gdbarch_auto_charset (gdbarch);
  337.   return target_charset_name;
  338. }

  339. const char *
  340. target_wide_charset (struct gdbarch *gdbarch)
  341. {
  342.   enum bfd_endian byte_order = gdbarch_byte_order (gdbarch);

  343.   set_be_le_names (gdbarch);
  344.   if (byte_order == BFD_ENDIAN_BIG)
  345.     {
  346.       if (target_wide_charset_be_name)
  347.         return target_wide_charset_be_name;
  348.     }
  349.   else
  350.     {
  351.       if (target_wide_charset_le_name)
  352.         return target_wide_charset_le_name;
  353.     }

  354.   if (!strcmp (target_wide_charset_name, "auto"))
  355.     return gdbarch_auto_wide_charset (gdbarch);

  356.   return target_wide_charset_name;
  357. }


  358. /* Host character set management.  For the time being, we assume that
  359.    the host character set is some superset of ASCII.  */

  360. char
  361. host_letter_to_control_character (char c)
  362. {
  363.   if (c == '?')
  364.     return 0177;
  365.   return c & 0237;
  366. }

  367. /* Convert a host character, C, to its hex valueC must already have
  368.    been validated using isxdigit.  */

  369. int
  370. host_hex_value (char c)
  371. {
  372.   if (isdigit (c))
  373.     return c - '0';
  374.   if (c >= 'a' && c <= 'f')
  375.     return 10 + c - 'a';
  376.   gdb_assert (c >= 'A' && c <= 'F');
  377.   return 10 + c - 'A';
  378. }


  379. /* Public character management functions.  */

  380. /* A cleanup function which is run to close an iconv descriptor.  */

  381. static void
  382. cleanup_iconv (void *p)
  383. {
  384.   iconv_t *descp = p;
  385.   iconv_close (*descp);
  386. }

  387. void
  388. convert_between_encodings (const char *from, const char *to,
  389.                            const gdb_byte *bytes, unsigned int num_bytes,
  390.                            int width, struct obstack *output,
  391.                            enum transliterations translit)
  392. {
  393.   iconv_t desc;
  394.   struct cleanup *cleanups;
  395.   size_t inleft;
  396.   ICONV_CONST char *inp;
  397.   unsigned int space_request;

  398.   /* Often, the host and target charsets will be the same.  */
  399.   if (!strcmp (from, to))
  400.     {
  401.       obstack_grow (output, bytes, num_bytes);
  402.       return;
  403.     }

  404.   desc = iconv_open (to, from);
  405.   if (desc == (iconv_t) -1)
  406.     perror_with_name (_("Converting character sets"));
  407.   cleanups = make_cleanup (cleanup_iconv, &desc);

  408.   inleft = num_bytes;
  409.   inp = (ICONV_CONST char *) bytes;

  410.   space_request = num_bytes;

  411.   while (inleft > 0)
  412.     {
  413.       char *outp;
  414.       size_t outleft, r;
  415.       int old_size;

  416.       old_size = obstack_object_size (output);
  417.       obstack_blank (output, space_request);

  418.       outp = (char *) obstack_base (output) + old_size;
  419.       outleft = space_request;

  420.       r = iconv (desc, &inp, &inleft, &outp, &outleft);

  421.       /* Now make sure that the object on the obstack only includes
  422.          bytes we have converted.  */
  423.       obstack_blank_fast (output, -outleft);

  424.       if (r == (size_t) -1)
  425.         {
  426.           switch (errno)
  427.             {
  428.             case EILSEQ:
  429.               {
  430.                 int i;

  431.                 /* Invalid input sequence.  */
  432.                 if (translit == translit_none)
  433.                   error (_("Could not convert character "
  434.                            "to `%s' character set"), to);

  435.                 /* We emit escape sequence for the bytes, skip them,
  436.                    and try again.  */
  437.                 for (i = 0; i < width; ++i)
  438.                   {
  439.                     char octal[5];

  440.                     xsnprintf (octal, sizeof (octal), "\\%.3o", *inp & 0xff);
  441.                     obstack_grow_str (output, octal);

  442.                     ++inp;
  443.                     --inleft;
  444.                   }
  445.               }
  446.               break;

  447.             case E2BIG:
  448.               /* We ran out of space in the output buffer.  Make it
  449.                  bigger next time around.  */
  450.               space_request *= 2;
  451.               break;

  452.             case EINVAL:
  453.               /* Incomplete input sequence.  FIXME: ought to report this
  454.                  to the caller somehow.  */
  455.               inleft = 0;
  456.               break;

  457.             default:
  458.               perror_with_name (_("Internal error while "
  459.                                   "converting character sets"));
  460.             }
  461.         }
  462.     }

  463.   do_cleanups (cleanups);
  464. }



  465. /* An iterator that returns host wchar_t's from a target string.  */
  466. struct wchar_iterator
  467. {
  468.   /* The underlying iconv descriptor.  */
  469.   iconv_t desc;

  470.   /* The input string.  This is updated as convert characters.  */
  471.   const gdb_byte *input;
  472.   /* The number of bytes remaining in the input.  */
  473.   size_t bytes;

  474.   /* The width of an input character.  */
  475.   size_t width;

  476.   /* The output buffer and its size.  */
  477.   gdb_wchar_t *out;
  478.   size_t out_size;
  479. };

  480. /* Create a new iterator.  */
  481. struct wchar_iterator *
  482. make_wchar_iterator (const gdb_byte *input, size_t bytes,
  483.                      const char *charset, size_t width)
  484. {
  485.   struct wchar_iterator *result;
  486.   iconv_t desc;

  487.   desc = iconv_open (INTERMEDIATE_ENCODING, charset);
  488.   if (desc == (iconv_t) -1)
  489.     perror_with_name (_("Converting character sets"));

  490.   result = XNEW (struct wchar_iterator);
  491.   result->desc = desc;
  492.   result->input = input;
  493.   result->bytes = bytes;
  494.   result->width = width;

  495.   result->out = XNEW (gdb_wchar_t);
  496.   result->out_size = 1;

  497.   return result;
  498. }

  499. static void
  500. do_cleanup_iterator (void *p)
  501. {
  502.   struct wchar_iterator *iter = p;

  503.   iconv_close (iter->desc);
  504.   xfree (iter->out);
  505.   xfree (iter);
  506. }

  507. struct cleanup *
  508. make_cleanup_wchar_iterator (struct wchar_iterator *iter)
  509. {
  510.   return make_cleanup (do_cleanup_iterator, iter);
  511. }

  512. int
  513. wchar_iterate (struct wchar_iterator *iter,
  514.                enum wchar_iterate_result *out_result,
  515.                gdb_wchar_t **out_chars,
  516.                const gdb_byte **ptr,
  517.                size_t *len)
  518. {
  519.   size_t out_request;

  520.   /* Try to convert some characters.  At first we try to convert just
  521.      a single character.  The reason for this is that iconv does not
  522.      necessarily update its outgoing arguments when it encounters an
  523.      invalid input sequence -- but we want to reliably report this to
  524.      our caller so it can emit an escape sequence.  */
  525.   out_request = 1;
  526.   while (iter->bytes > 0)
  527.     {
  528.       ICONV_CONST char *inptr = (ICONV_CONST char *) iter->input;
  529.       char *outptr = (char *) &iter->out[0];
  530.       const gdb_byte *orig_inptr = iter->input;
  531.       size_t orig_in = iter->bytes;
  532.       size_t out_avail = out_request * sizeof (gdb_wchar_t);
  533.       size_t num;
  534.       size_t r = iconv (iter->desc, &inptr, &iter->bytes, &outptr, &out_avail);

  535.       iter->input = (gdb_byte *) inptr;

  536.       if (r == (size_t) -1)
  537.         {
  538.           switch (errno)
  539.             {
  540.             case EILSEQ:
  541.               /* Invalid input sequence.  We still might have
  542.                  converted a character; if so, return it.  */
  543.               if (out_avail < out_request * sizeof (gdb_wchar_t))
  544.                 break;

  545.               /* Otherwise skip the first invalid character, and let
  546.                  the caller know about it.  */
  547.               *out_result = wchar_iterate_invalid;
  548.               *ptr = iter->input;
  549.               *len = iter->width;
  550.               iter->input += iter->width;
  551.               iter->bytes -= iter->width;
  552.               return 0;

  553.             case E2BIG:
  554.               /* We ran out of space.  We still might have converted a
  555.                  character; if so, return it.  Otherwise, grow the
  556.                  buffer and try again.  */
  557.               if (out_avail < out_request * sizeof (gdb_wchar_t))
  558.                 break;

  559.               ++out_request;
  560.               if (out_request > iter->out_size)
  561.                 {
  562.                   iter->out_size = out_request;
  563.                   iter->out = xrealloc (iter->out,
  564.                                         out_request * sizeof (gdb_wchar_t));
  565.                 }
  566.               continue;

  567.             case EINVAL:
  568.               /* Incomplete input sequence.  Let the caller know, and
  569.                  arrange for future calls to see EOF.  */
  570.               *out_result = wchar_iterate_incomplete;
  571.               *ptr = iter->input;
  572.               *len = iter->bytes;
  573.               iter->bytes = 0;
  574.               return 0;

  575.             default:
  576.               perror_with_name (_("Internal error while "
  577.                                   "converting character sets"));
  578.             }
  579.         }

  580.       /* We converted something.  */
  581.       num = out_request - out_avail / sizeof (gdb_wchar_t);
  582.       *out_result = wchar_iterate_ok;
  583.       *out_chars = iter->out;
  584.       *ptr = orig_inptr;
  585.       *len = orig_in - iter->bytes;
  586.       return num;
  587.     }

  588.   /* Really done.  */
  589.   *out_result = wchar_iterate_eof;
  590.   return -1;
  591. }


  592. /* The charset.c module initialization function.  */

  593. extern initialize_file_ftype _initialize_charset; /* -Wmissing-prototype */

  594. static VEC (char_ptr) *charsets;

  595. #ifdef PHONY_ICONV

  596. static void
  597. find_charset_names (void)
  598. {
  599.   VEC_safe_push (char_ptr, charsets, GDB_DEFAULT_HOST_CHARSET);
  600.   VEC_safe_push (char_ptr, charsets, NULL);
  601. }

  602. #else /* PHONY_ICONV */

  603. /* Sometimes, libiconv redefines iconvlist as libiconvlist -- but
  604.    provides different symbols in the static and dynamic libraries.
  605.    So, configure may see libiconvlist but not iconvlist.  But, calling
  606.    iconvlist is the right thing to do and will work.  Hence we do a
  607.    check here but unconditionally call iconvlist below.  */
  608. #if defined (HAVE_ICONVLIST) || defined (HAVE_LIBICONVLIST)

  609. /* A helper function that adds some character sets to the vector of
  610.    all character sets.  This is a callback function for iconvlist.  */

  611. static int
  612. add_one (unsigned int count, const char *const *names, void *data)
  613. {
  614.   unsigned int i;

  615.   for (i = 0; i < count; ++i)
  616.     VEC_safe_push (char_ptr, charsets, xstrdup (names[i]));

  617.   return 0;
  618. }

  619. static void
  620. find_charset_names (void)
  621. {
  622.   iconvlist (add_one, NULL);
  623.   VEC_safe_push (char_ptr, charsets, NULL);
  624. }

  625. #else

  626. /* Return non-zero if LINE (output from iconv) should be ignored.
  627.    Older iconv programs (e.g. 2.2.2) include the human readable
  628.    introduction even when stdout is not a tty.  Newer versions omit
  629.    the intro if stdout is not a tty.  */

  630. static int
  631. ignore_line_p (const char *line)
  632. {
  633.   /* This table is used to filter the output.  If this text appears
  634.      anywhere in the line, it is ignored (strstr is used).  */
  635.   static const char * const ignore_lines[] =
  636.     {
  637.       "The following",
  638.       "not necessarily",
  639.       "the FROM and TO",
  640.       "listed with several",
  641.       NULL
  642.     };
  643.   int i;

  644.   for (i = 0; ignore_lines[i] != NULL; ++i)
  645.     {
  646.       if (strstr (line, ignore_lines[i]) != NULL)
  647.         return 1;
  648.     }

  649.   return 0;
  650. }

  651. static void
  652. find_charset_names (void)
  653. {
  654.   struct pex_obj *child;
  655.   char *args[3];
  656.   int err, status;
  657.   int fail = 1;
  658.   int flags;
  659.   struct gdb_environ *iconv_env;
  660.   char *iconv_program;

  661.   /* Older iconvs, e.g. 2.2.2, don't omit the intro text if stdout is
  662.      not a tty.  We need to recognize it and ignore it.  This text is
  663.      subject to translation, so force LANGUAGE=C.  */
  664.   iconv_env = make_environ ();
  665.   init_environ (iconv_env);
  666.   set_in_environ (iconv_env, "LANGUAGE", "C");
  667.   set_in_environ (iconv_env, "LC_ALL", "C");

  668.   child = pex_init (PEX_USE_PIPES, "iconv", NULL);

  669. #ifdef ICONV_BIN
  670.   {
  671.     char *iconv_dir = relocate_gdb_directory (ICONV_BIN,
  672.                                               ICONV_BIN_RELOCATABLE);
  673.     iconv_program = concat (iconv_dir, SLASH_STRING, "iconv", NULL);
  674.     xfree (iconv_dir);
  675.   }
  676. #else
  677.   iconv_program = xstrdup ("iconv");
  678. #endif
  679.   args[0] = iconv_program;
  680.   args[1] = "-l";
  681.   args[2] = NULL;
  682.   flags = PEX_STDERR_TO_STDOUT;
  683. #ifndef ICONV_BIN
  684.   flags |= PEX_SEARCH;
  685. #endif
  686.   /* Note that we simply ignore errors here.  */
  687.   if (!pex_run_in_environment (child, flags,
  688.                                args[0], args, environ_vector (iconv_env),
  689.                                NULL, NULL, &err))
  690.     {
  691.       FILE *in = pex_read_output (child, 0);

  692.       /* POSIX says that iconv -l uses an unspecified format.  We
  693.          parse the glibc and libiconv formats; feel free to add others
  694.          as needed.  */

  695.       while (in != NULL && !feof (in))
  696.         {
  697.           /* The size of buf is chosen arbitrarily.  */
  698.           char buf[1024];
  699.           char *start, *r;
  700.           int len;

  701.           r = fgets (buf, sizeof (buf), in);
  702.           if (!r)
  703.             break;
  704.           len = strlen (r);
  705.           if (len <= 3)
  706.             continue;
  707.           if (ignore_line_p (r))
  708.             continue;

  709.           /* Strip off the newline.  */
  710.           --len;
  711.           /* Strip off one or two '/'s.  glibc will print lines like
  712.              "8859_7//", but also "10646-1:1993/UCS4/".  */
  713.           if (buf[len - 1] == '/')
  714.             --len;
  715.           if (buf[len - 1] == '/')
  716.             --len;
  717.           buf[len] = '\0';

  718.           /* libiconv will print multiple entries per line, separated
  719.              by spaces.  Older iconvs will print multiple entries per
  720.              line, indented by two spaces, and separated by ", "
  721.              (i.e. the human readable form).  */
  722.           start = buf;
  723.           while (1)
  724.             {
  725.               int keep_going;
  726.               char *p;

  727.               /* Skip leading blanks.  */
  728.               for (p = start; *p && *p == ' '; ++p)
  729.                 ;
  730.               start = p;
  731.               /* Find the next space, comma, or end-of-line.  */
  732.               for ( ; *p && *p != ' ' && *p != ','; ++p)
  733.                 ;
  734.               /* Ignore an empty result.  */
  735.               if (p == start)
  736.                 break;
  737.               keep_going = *p;
  738.               *p = '\0';
  739.               VEC_safe_push (char_ptr, charsets, xstrdup (start));
  740.               if (!keep_going)
  741.                 break;
  742.               /* Skip any extra spaces.  */
  743.               for (start = p + 1; *start && *start == ' '; ++start)
  744.                 ;
  745.             }
  746.         }

  747.       if (pex_get_status (child, 1, &status)
  748.           && WIFEXITED (status) && !WEXITSTATUS (status))
  749.         fail = 0;

  750.     }

  751.   xfree (iconv_program);
  752.   pex_free (child);
  753.   free_environ (iconv_env);

  754.   if (fail)
  755.     {
  756.       /* Some error occurred, so drop the vector.  */
  757.       free_char_ptr_vec (charsets);
  758.       charsets = NULL;
  759.     }
  760.   else
  761.     VEC_safe_push (char_ptr, charsets, NULL);
  762. }

  763. #endif /* HAVE_ICONVLIST || HAVE_LIBICONVLIST */
  764. #endif /* PHONY_ICONV */

  765. /* The "auto" target charset used by default_auto_charset.  */
  766. static const char *auto_target_charset_name = GDB_DEFAULT_TARGET_CHARSET;

  767. const char *
  768. default_auto_charset (void)
  769. {
  770.   return auto_target_charset_name;
  771. }

  772. const char *
  773. default_auto_wide_charset (void)
  774. {
  775.   return GDB_DEFAULT_TARGET_WIDE_CHARSET;
  776. }


  777. #ifdef USE_INTERMEDIATE_ENCODING_FUNCTION
  778. /* Macro used for UTF or UCS endianness suffix.  */
  779. #if WORDS_BIGENDIAN
  780. #define ENDIAN_SUFFIX "BE"
  781. #else
  782. #define ENDIAN_SUFFIX "LE"
  783. #endif

  784. /* The code below serves to generate a compile time error if
  785.    gdb_wchar_t type is not of size 2 nor 4, despite the fact that
  786.    macro __STDC_ISO_10646__ is defined.
  787.    This is better than a gdb_assert call, because GDB cannot handle
  788.    strings correctly if this size is different.  */

  789. extern char your_gdb_wchar_t_is_bogus[(sizeof (gdb_wchar_t) == 2
  790.                                        || sizeof (gdb_wchar_t) == 4)
  791.                                       ? 1 : -1];

  792. /* intermediate_encoding returns the charset used internally by
  793.    GDB to convert between target and host encodings. As the test above
  794.    compiled, sizeof (gdb_wchar_t) is either 2 or 4 bytes.
  795.    UTF-16/32 is tested first, UCS-2/4 is tested as a second option,
  796.    otherwise an error is generated.  */

  797. const char *
  798. intermediate_encoding (void)
  799. {
  800.   iconv_t desc;
  801.   static const char *stored_result = NULL;
  802.   char *result;

  803.   if (stored_result)
  804.     return stored_result;
  805.   result = xstrprintf ("UTF-%d%s", (int) (sizeof (gdb_wchar_t) * 8),
  806.                        ENDIAN_SUFFIX);
  807.   /* Check that the name is supported by iconv_open.  */
  808.   desc = iconv_open (result, host_charset ());
  809.   if (desc != (iconv_t) -1)
  810.     {
  811.       iconv_close (desc);
  812.       stored_result = result;
  813.       return result;
  814.     }
  815.   /* Not valid, free the allocated memory.  */
  816.   xfree (result);
  817.   /* Second try, with UCS-2 type.  */
  818.   result = xstrprintf ("UCS-%d%s", (int) sizeof (gdb_wchar_t),
  819.                        ENDIAN_SUFFIX);
  820.   /* Check that the name is supported by iconv_open.  */
  821.   desc = iconv_open (result, host_charset ());
  822.   if (desc != (iconv_t) -1)
  823.     {
  824.       iconv_close (desc);
  825.       stored_result = result;
  826.       return result;
  827.     }
  828.   /* Not valid, free the allocated memory.  */
  829.   xfree (result);
  830.   /* No valid charset found, generate error here.  */
  831.   error (_("Unable to find a vaild charset for string conversions"));
  832. }

  833. #endif /* USE_INTERMEDIATE_ENCODING_FUNCTION */

  834. void
  835. _initialize_charset (void)
  836. {
  837.   /* The first element is always "auto".  */
  838.   VEC_safe_push (char_ptr, charsets, xstrdup ("auto"));
  839.   find_charset_names ();

  840.   if (VEC_length (char_ptr, charsets) > 1)
  841.     charset_enum = (const char **) VEC_address (char_ptr, charsets);
  842.   else
  843.     charset_enum = default_charset_names;

  844. #ifndef PHONY_ICONV
  845. #ifdef HAVE_LANGINFO_CODESET
  846.   /* The result of nl_langinfo may be overwritten later.  This may
  847.      leak a little memory, if the user later changes the host charset,
  848.      but that doesn't matter much.  */
  849.   auto_host_charset_name = xstrdup (nl_langinfo (CODESET));
  850.   /* Solaris will return `646' here -- but the Solaris iconv then does
  851.      not accept this.  Darwin (and maybe FreeBSD) may return "" here,
  852.      which GNU libiconv doesn't like (infinite loop).  */
  853.   if (!strcmp (auto_host_charset_name, "646") || !*auto_host_charset_name)
  854.     auto_host_charset_name = "ASCII";
  855.   auto_target_charset_name = auto_host_charset_name;
  856. #elif defined (USE_WIN32API)
  857.   {
  858.     /* "CP" + x<=5 digits + paranoia.  */
  859.     static char w32_host_default_charset[16];

  860.     snprintf (w32_host_default_charset, sizeof w32_host_default_charset,
  861.               "CP%d", GetACP());
  862.     auto_host_charset_name = w32_host_default_charset;
  863.     auto_target_charset_name = auto_host_charset_name;
  864.   }
  865. #endif
  866. #endif

  867.   add_setshow_enum_cmd ("charset", class_support,
  868.                         charset_enum, &host_charset_name, _("\
  869. Set the host and target character sets."), _("\
  870. Show the host and target character sets."), _("\
  871. The `host character set' is the one used by the system GDB is running on.\n\
  872. The `target character set' is the one used by the program being debugged.\n\
  873. You may only use supersets of ASCII for your host character set; GDB does\n\
  874. not support any others.\n\
  875. To see a list of the character sets GDB supports, type `set charset <TAB>'."),
  876.                         /* Note that the sfunc below needs to set
  877.                            target_charset_name, because the 'set
  878.                            charset' command sets two variables.  */
  879.                         set_charset_sfunc,
  880.                         show_charset,
  881.                         &setlist, &showlist);

  882.   add_setshow_enum_cmd ("host-charset", class_support,
  883.                         charset_enum, &host_charset_name, _("\
  884. Set the host character set."), _("\
  885. Show the host character set."), _("\
  886. The `host character set' is the one used by the system GDB is running on.\n\
  887. You may only use supersets of ASCII for your host character set; GDB does\n\
  888. not support any others.\n\
  889. To see a list of the character sets GDB supports, type `set host-charset <TAB>'."),
  890.                         set_host_charset_sfunc,
  891.                         show_host_charset_name,
  892.                         &setlist, &showlist);

  893.   add_setshow_enum_cmd ("target-charset", class_support,
  894.                         charset_enum, &target_charset_name, _("\
  895. Set the target character set."), _("\
  896. Show the target character set."), _("\
  897. The `target character set' is the one used by the program being debugged.\n\
  898. GDB translates characters and strings between the host and target\n\
  899. character sets as needed.\n\
  900. To see a list of the character sets GDB supports, type `set target-charset'<TAB>"),
  901.                         set_target_charset_sfunc,
  902.                         show_target_charset_name,
  903.                         &setlist, &showlist);

  904.   add_setshow_enum_cmd ("target-wide-charset", class_support,
  905.                         charset_enum, &target_wide_charset_name,
  906.                         _("\
  907. Set the target wide character set."), _("\
  908. Show the target wide character set."), _("\
  909. The `target wide character set' is the one used by the program being debugged.\
  910. \nIn particular it is the encoding used by `wchar_t'.\n\
  911. GDB translates characters and strings between the host and target\n\
  912. character sets as needed.\n\
  913. To see a list of the character sets GDB supports, type\n\
  914. `set target-wide-charset'<TAB>"),
  915.                         set_target_wide_charset_sfunc,
  916.                         show_target_wide_charset_name,
  917.                         &setlist, &showlist);
  918. }