4 files changed, 202 insertions, 61 deletions
diff --git a/util/ChangeLog b/util/ChangeLog
index 95b06019a..a451e4a9a 100644
--- a/util/ChangeLog
+++ b/util/ChangeLog
@@ -1,3 +1,14 @@
+Tue Aug 31 17:20:44 CEST 1999  Werner Koch  <[email protected]>
+
+
+	* strgutil (utf8_to_native): Implemented.
+	(check_utf8_string): Removed.
+
+	* miscutil.c (make_printable_string): Fixed possible buffer overflow.
+	(print_utf8_string): New.
+
+	* ttyio.c (tty_print_utf8_string): New.
+
 Mon Aug 30 20:38:33 CEST 1999  Werner Koch  <[email protected]>
 
 
diff --git a/util/miscutil.c b/util/miscutil.c
index 2b95d97d6..eb72415bb 100644
--- a/util/miscutil.c
+++ b/util/miscutil.c
@@ -194,6 +194,29 @@ print_string( FILE *fp, const byte *p, size_t n, int delim )
 }
 
 /****************
+ * Print an UTF8 string to FP and filter all control characters out.
+ */
+void
+print_utf8_string( FILE *fp, const byte *p, size_t n )
+{
+    size_t i;
+    char *buf;
+
+    /* we can handle plain ascii simpler, so check for it first */
+    for(i=0; i < n; i++ ) {
+	if( p[i] & 0x80 )
+	    break;
+    }
+    if( i < n ) {
+	buf = utf8_to_native( p, n );
+	fputs( buf, fp );
+	m_free( buf );
+    }
+    else
+	print_string( fp, p, n, 0 );
+}
+
+/****************
  * This function returns a string which is suitable for printing
  * Caller must release it with m_free()
  */
@@ -211,7 +234,7 @@ make_printable_string( const byte *p, size_t n, int delim )
 		|| *p=='\v' || *p=='\b' || !*p )
 		buflen += 2;
 	    else
-		buflen += 3;
+		buflen += 4;
 	}
 	else
 	    buflen++;
diff --git a/util/strgutil.c b/util/strgutil.c
index 87eaad423..9ab63a047 100644
--- a/util/strgutil.c
+++ b/util/strgutil.c
@@ -376,77 +376,161 @@ native_to_utf8( const char *string )
 
 
 /****************
- * Convert string, which is in UTF8 to native encoding.  Replace
- * illegal encodings by some "\xnn".
+ * Convert string, which is in UTF8 to native encoding.
+ * illegal encodings by some "\xnn" and quote all control characters
  */
 char *
-utf8_to_native( const char *string )
+utf8_to_native( const char *string, size_t length )
 {
-  #if 0
+    int nleft;
+    int i;
+    byte encbuf[7];
+    int encidx;
     const byte *s;
     size_t n;
-    byte *buffer, *p;
-
-    /* quick check whether we actually have characters with bit 8 set */
-    for( s=string; *s; s++ )
-	if( *s & 0x80 )
-	    break;
-    if( !*s ) /* that is easy */
-	return m_strdup(string);
-
-    /* count the extended utf-8 characters */
-	110x xxxx
-	1110 xxxx
-	1111 0xxx
-    for( n=1, s=string; *s; s++ ) {
-	if( !(*s & 0x80) )
-	    n++;
-	else if( (*s & 0xe0) == 0xc0 )
-	    n += 2;
-	else if( (*s & 0xf0) == 0xe0 )
-	    n += 3;
-	else if( (*s & 0xf8) == 0xf0 )
-	    n += 4;
-	else
-	    n++; /* invalid encoding */
-    }
+    byte *buffer = NULL, *p = NULL;
+    unsigned long val = 0;
+    size_t slen;
+    int resync = 0;
+
+    /* 1. pass (p==NULL): count the extended utf-8 characters */
+    /* 2. pass (p!=NULL): create string */
+    for( ;; ) {
+	for( slen=length, nleft=encidx=0, n=0, s=string; slen; s++, slen-- ) {
+	    if( resync ) {
+		if( !(*s < 128 || (*s >= 0xc0 && *s <= 0xfd)) ) {
+		    /* still invalid */
+		    if( p ) {
+			sprintf(p, "\\x%02x", *s );
+			p += 4;
+		    }
+		    n += 4;
+		    continue;
+		}
+		resync = 0;
+	    }
+	    if( !nleft ) {
+		if( !(*s & 0x80) ) { /* plain ascii */
+		    if( iscntrl( *s ) ) {
+			n++;
+			if( p )
+			    *p++ = '\\';
+			switch( *s ) {
+			  case '\n': n++; if( p ) *p++ = 'n'; break;
+			  case '\r': n++; if( p ) *p++ = 'r'; break;
+			  case '\f': n++; if( p ) *p++ = 'f'; break;
+			  case '\v': n++; if( p ) *p++ = 'v'; break;
+			  case '\b': n++; if( p ) *p++ = 'b'; break;
+			  case	 0 : n++; if( p ) *p++ = '0'; break;
+			  default: n += 3;
+				   sprintf( p, "x%02x", *s );
+				   p += 3;
+				   break;
+			}
+		    }
+		    else {
+			if( p ) *p++ = *s;
+			n++;
+		    }
+		}
+		else if( (*s & 0xe0) == 0xc0 ) { /* 110x xxxx */
+		    val = *s & 0x1f;
+		    nleft = 1;
+		    encbuf[encidx=0] = *s;
+		}
+		else if( (*s & 0xf0) == 0xe0 ) { /* 1110 xxxx */
+		    val = *s & 0x0f;
+		    nleft = 2;
+		    encbuf[encidx=0] = *s;
+		}
+		else if( (*s & 0xf8) == 0xf0 ) { /* 1111 0xxx */
+		    val = *s & 0x07;
+		    nleft = 3;
+		    encbuf[encidx=0] = *s;
+		}
+		else if( (*s & 0xfc) == 0xf8 ) { /* 1111 10xx */
+		    val = *s & 0x03;
+		    nleft = 4;
+		    encbuf[encidx=0] = *s;
+		}
+		else if( (*s & 0xfe) == 0xfc ) { /* 1111 110x */
+		    val = *s & 0x01;
+		    nleft = 5;
+		    encbuf[encidx=0] = *s;
+		}
+		else {	/* invalid encoding: print as \xnn */
+		    if( p ) {
+			sprintf(p, "\\x%02x", *s );
+			p += 4;
+		    }
+		    n += 4;
+		    resync = 1;
+		}
+	    }
+	    else if( *s < 0x80 || *s >= 0xc0 ) { /* invalid */
+		if( p ) {
+		    sprintf(p, "\\x%02x", *s );
+		    p += 4;
+		}
+		n += 4;
+		nleft = 0;
+		resync = 1;
+	    }
+	    else {
+		encbuf[++encidx] = *s;
+		val <<= 6;
+		val |= *s & 0x3f;
+		if( !--nleft ) { /* ready */
+		    if( active_charset ) { /* table lookup */
+			for(i=0; i < 128; i++ ) {
+			    if( active_charset[i] == val )
+				break;
+			}
+			if( i < 128 ) { /* we can print this one */
+			    if( p ) *p++ = i+128;
+			    n++;
+			}
+			else { /* we do not have a translation: print utf8 */
+			    if( p ) {
+				for(i=0; i < encidx; i++ ) {
+				    sprintf(p, "\\x%02x", encbuf[i] );
+				    p += 4;
+				}
+			    }
+			    n += encidx*4;
+			}
+		    }
+		    else { /* native set */
+			if( val >= 0x80 && val < 256 ) {
+			    n++;    /* we can simply print this character */
+			    if( p ) *p++ = val;
+			}
+			else { /* we do not have a translation: print utf8 */
+			    if( p ) {
+				for(i=0; i < encidx; i++ ) {
+				    sprintf(p, "\\x%02x", encbuf[i] );
+				    p += 4;
+				}
+			    }
+			    n += encidx*4;
+			}
+		    }
 
-    buffer = p = m_alloc( n );
-    for( s=string; *s; ) {
-	if( !(*s & 0x80) )
-	    *p++ = *s++;
-	else if( (*s & 0xe0) == 0xc0 ) {
-	    u32 val;
-	    if( (s[1] & 0xc0) != 0x80 )
-		;
-	    val = (*s << 6) | (s[1] & 0x3f);
+		}
+
+	    }
+	}
+	if( !buffer ) { /* allocate the buffer after the first pass */
+	    buffer = p = m_alloc( n + 1 );
+	}
+	else {
+	    *p = 0; /* make a string */
+	    return buffer;
 	}
-	else if( (*s & 0xf0) == 0xe0 )
-	    n += 3;
-	else if( (*s & 0xf8) == 0xf0 )
-	    n += 4;
-	else
-	    n++; /* invalid encoding */
     }
-   #endif
-     return m_strdup(string);
-
 }
 
 
-/****************
- * check whether string is a valid UTF8 string.
- * Returns 0 = Okay
- *	   1 = Too short
- *	   2 = invalid encoding
- */
-int
-check_utf8_string( const char *string )
-{
-    /*fixme */
-    return 0;
-}
-
 
 /*********************************************
  ********** missing string functions *********
diff --git a/util/ttyio.c b/util/ttyio.c
index 3f5eb7900..74f6ce0f6 100644
--- a/util/ttyio.c
+++ b/util/ttyio.c
@@ -235,6 +235,29 @@ tty_print_string( byte *p, size_t n )
   #endif
 }
 
+void
+tty_print_utf8_string( byte *p, size_t n )
+{
+    size_t i;
+    char *buf;
+
+    if (no_terminal)
+	return;
+
+    /* we can handle plain ascii simpler, so check for it first */
+    for(i=0; i < n; i++ ) {
+	if( p[i] & 0x80 )
+	    break;
+    }
+    if( i < n ) {
+	buf = utf8_to_native( p, n );
+	tty_printf("%s", buf );
+	m_free( buf );
+    }
+    else
+	tty_print_string( p, n );
+}
+