OS-7498: __ctype_mask[EOF] has been working by accident

Details

Issue Type:Bug
Priority:4 - Normal
Status:Resolved
Created at:2019-01-09T18:25:12.598Z
Updated at:2019-08-06T20:45:10.212Z

People

Created by:Former user
Reported by:Former user
Assigned to:Former user

Resolution

Fixed: A fix for this issue is checked into the tree and tested.
(Resolution Date: 2019-08-06T20:45:10.197Z)

Description

In booting COAL with a gcc7-compiled libc, some variants of sed no longer functioned in that they simply spin:

[root@0a78c361-abaf-415c-9143-d6a2dafefbe8 (east-bay-1:cnapi0) ~]# cat /etc/pkgsrc_version 
release: 2011Q4
architecture: i386
[root@0a78c361-abaf-415c-9143-d6a2dafefbe8 (east-bay-1:cnapi0) ~]# sed --version
GNU sed version 4.2.1
Copyright (C) 2009 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE,
to the extent permitted by law.

GNU sed home page: <http://www.gnu.org/software/sed/>.
General help using GNU software: <http://www.gnu.org/gethelp/>.
E-mail bug reports to: <bug-gnu-utils@gnu.org>.
Be sure to include the word ``sed'' somewhere in the ``Subject:'' field.
[root@0a78c361-abaf-415c-9143-d6a2dafefbe8 (east-bay-1:cnapi0) ~]# echo foo | sed -e 's/foo/bar/'

Here is the code that we're spinning in:

in_nonblank:                    pushl  %ebp
in_nonblank+1:                  movl   %esp,%ebp
in_nonblank+3:                  subl   $0x8,%esp
in_nonblank+6:                  movl   %esi,%esi
in_nonblank+8:                  call   -0xad    <inchar>
in_nonblank+0xd:                movl   0x80830cc,%edx   <gsed`__ctype_mask>
in_nonblank+0x13:               testb  $0x40,(%edx,%eax,4)
in_nonblank+0x17:               jne    -0x11    <in_nonblank+8>
in_nonblank+0x19:               leave  
in_nonblank+0x1a:               ret    

The problem is that inchar is (properly) returning EOF (-1), and we are using that to dereference off of __ctype_mask:

> __ctype_mask/K 
gsed`__ctype_mask:
gsed`__ctype_mask:              fee1b7a0        
> fee1b7a0-4,4/Xna
libc.so.1`___trans_lower+0x3fc: ff              
libc.so.1`___ctype_mask:        20              
libc.so.1`___ctype_mask+4:      20              
libc.so.1`___ctype_mask+8:      20              
libc.so.1`___ctype_mask+0xc:    

The problem is that this code is (implicitly) relying on the padding that the compiler was providing in that it is expecting that {{__ctype_mask[1]}} is 0 - but gcc7 has structured this differently, putting ___trans_lower ahead of ___ctype_mask.

Ironically, the code in sed would have worked correctly (if accidentally) were it not for IRIX 4.0.5; here's the code in this ancient version of sed that defines ISBLANK:

/* handle misdesigned <ctype.h> macros (snarfed from lib/regex.c) */
/* Jim Meyering writes:

   "... Some ctype macros are valid only for character codes that
   isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
   using /bin/cc or gcc but without giving an ansi option).  So, all
   ctype uses should be through macros like ISPRINT...  If
   STDC_HEADERS is defined, then autoconf has verified that the ctype
   macros don't need to be guarded with references to isascii. ...
   Defining isascii to 1 should let any compiler worth its salt
   eliminate the && through constant folding."
   Solaris defines some of these symbols so we must undefine them first. */

#undef ISASCII
#if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
# define ISASCII(c) 1
#else
# define ISASCII(c) isascii(c)
#endif

#if defined isblank || defined HAVE_ISBLANK
# define ISBLANK(c) (ISASCII (c) && isblank (c))
#else
# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
#endif

isascii -- even of this platform's vintage -- would have correctly returned 0 on EOF.

In any case, the fix is to expand ___ctype_mask and then point __ctype_mask to be &___ctype_mask[1]:

diff --git a/usr/src/lib/libc/port/locale/table.c b/usr/src/lib/libc/port/locale/table.c
index 3c0ce09901..2a6517a48f 100644
--- a/usr/src/lib/libc/port/locale/table.c
+++ b/usr/src/lib/libc/port/locale/table.c
@@ -39,7 +39,7 @@
 #include "mblocal.h"
 #include "_ctype.h"
 
-#define	_DEFRUNETYPE { \
+#define	_DEFRUNETYPE \
 	/* 00 */ \
 	_CTYPE_C, \
 	_CTYPE_C, \
@@ -183,10 +183,9 @@
 	_CTYPE_P|_CTYPE_R|_CTYPE_G, \
 	_CTYPE_P|_CTYPE_R|_CTYPE_G, \
 	_CTYPE_P|_CTYPE_R|_CTYPE_G, \
-	_CTYPE_C, \
-}
+	_CTYPE_C
 
-#define	_DEFMAPLOWER { \
+#define	_DEFMAPLOWER \
 	0x00,	0x01,	0x02,	0x03,	0x04,	0x05,	0x06,	0x07, \
 	0x08,	0x09,	0x0a,	0x0b,	0x0c,	0x0d,	0x0e,	0x0f, \
 	0x10,	0x11,	0x12,	0x13,	0x14,	0x15,	0x16,	0x17, \
@@ -218,10 +217,9 @@
 	0xe0,	0xe1,	0xe2,	0xe3,	0xe4,	0xe5,	0xe6,	0xe7, \
 	0xe8,	0xe9,	0xea,	0xeb,	0xec,	0xed,	0xee,	0xef, \
 	0xf0,	0xf1,	0xf2,	0xf3,	0xf4,	0xf5,	0xf6,	0xf7, \
-	0xf8,	0xf9,	0xfa,	0xfb,	0xfc,	0xfd,	0xfe,	0xff, \
-}
+	0xf8,	0xf9,	0xfa,	0xfb,	0xfc,	0xfd,	0xfe,	0xff
 
-#define	_DEFMAPUPPER { \
+#define	_DEFMAPUPPER \
 	0x00,	0x01,	0x02,	0x03,	0x04,	0x05,	0x06,	0x07, \
 	0x08,	0x09,	0x0a,	0x0b,	0x0c,	0x0d,	0x0e,	0x0f, \
 	0x10,	0x11,	0x12,	0x13,	0x14,	0x15,	0x16,	0x17, \
@@ -253,27 +251,32 @@
 	0xe0,	0xe1,	0xe2,	0xe3,	0xe4,	0xe5,	0xe6,	0xe7, \
 	0xe8,	0xe9,	0xea,	0xeb,	0xec,	0xed,	0xee,	0xef, \
 	0xf0,	0xf1,	0xf2,	0xf3,	0xf4,	0xf5,	0xf6,	0xf7, \
-	0xf8,	0xf9,	0xfa,	0xfb,	0xfc,	0xfd,	0xfe,	0xff, \
-}
+	0xf8,	0xf9,	0xfa,	0xfb,	0xfc,	0xfd,	0xfe,	0xff,
 
 _RuneLocale _DefaultRuneLocale = {
 	_RUNE_MAGIC_1,
 	"NONE",
-	_DEFRUNETYPE,
-	_DEFMAPLOWER,
-	_DEFMAPUPPER,
+	{ _DEFRUNETYPE },
+	{ _DEFMAPLOWER },
+	{ _DEFMAPUPPER },
 };
 
 /*
  * __ctype_mask, __trans_lower, and __trans_upper come from former _ctype.c and
  * have to stay pointers for binary compatibility, so we provide separate
- * storage for them, initialized to "C" locale contents by default.
+ * storage for them, initialized to "C" locale contents by default.  Note that
+ * legacy code may dereference __ctype_mask[-1] when checking against EOF,
+ * relying on that value to be 0.  To allow this, ___ctype_mask is expanded by
+ * one value and prepended with a leading 0, with __ctype_mask being set to
+ * point to ___ctype_mask[1].  (__trans_lower and __trans_upper do not suffer
+ * from this as EOF access was prevented in legacy code by a check against
+ * isascii(), which always returned 0 for EOF.)
  */
-static unsigned int ___ctype_mask[_CACHED_RUNES] = _DEFRUNETYPE;
-unsigned int *__ctype_mask = ___ctype_mask;
+static unsigned int ___ctype_mask[_CACHED_RUNES + 1] = { 0, _DEFRUNETYPE };
+unsigned int *__ctype_mask = &___ctype_mask[1];
 
-static int ___trans_lower[_CACHED_RUNES] = _DEFMAPLOWER;
+static int ___trans_lower[_CACHED_RUNES] = { _DEFMAPLOWER };
 int *__trans_lower = ___trans_lower;
 
-static int ___trans_upper[_CACHED_RUNES] = _DEFMAPUPPER;
+static int ___trans_upper[_CACHED_RUNES] = { _DEFMAPUPPER };
 int *__trans_upper = ___trans_upper;

Comments

Comment by Jira Bot
Created at 2019-02-05T19:27:59.616Z

illumos-joyent commit 5b248548c9ca0108aa36e7e3db7fbc46077bcf4b (branch master, by Bryan Cantrill)

OS-7498 __ctype_mask[EOF] has been working by accident
OS-7511 undefined behavior in lx_cap_update_priv() turns gcc7 demonic
OS-7517 -faggressive-loop-optimizations is too aggressive
OS-7518 array over-read in has_saved_fp()
OS-7535 allow platform builds on base-64 image
OS-7536 stop shipping poold(1M) and its cohorts
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: John Levon <john.levon@joyent.com>
Approved by: Robert Mustacchi <rm@joyent.com>