diff options
Diffstat (limited to '')
-rw-r--r-- | util/ChangeLog | 11 | ||||
-rw-r--r-- | util/miscutil.c | 25 | ||||
-rw-r--r-- | util/strgutil.c | 204 | ||||
-rw-r--r-- | util/ttyio.c | 23 |
4 files changed, 202 insertions, 61 deletions
diff --git a/util/ChangeLog b/util/ChangeLog index 95b06019a..a451e4a9a 100644 --- a/util/ChangeLog +++ b/util/ChangeLog @@ -1,3 +1,14 @@ +Tue Aug 31 17:20:44 CEST 1999 Werner Koch <[email protected]> + + + * strgutil (utf8_to_native): Implemented. + (check_utf8_string): Removed. + + * miscutil.c (make_printable_string): Fixed possible buffer overflow. + (print_utf8_string): New. + + * ttyio.c (tty_print_utf8_string): New. + Mon Aug 30 20:38:33 CEST 1999 Werner Koch <[email protected]> diff --git a/util/miscutil.c b/util/miscutil.c index 2b95d97d6..eb72415bb 100644 --- a/util/miscutil.c +++ b/util/miscutil.c @@ -194,6 +194,29 @@ print_string( FILE *fp, const byte *p, size_t n, int delim ) } /**************** + * Print an UTF8 string to FP and filter all control characters out. + */ +void +print_utf8_string( FILE *fp, const byte *p, size_t n ) +{ + size_t i; + char *buf; + + /* we can handle plain ascii simpler, so check for it first */ + for(i=0; i < n; i++ ) { + if( p[i] & 0x80 ) + break; + } + if( i < n ) { + buf = utf8_to_native( p, n ); + fputs( buf, fp ); + m_free( buf ); + } + else + print_string( fp, p, n, 0 ); +} + +/**************** * This function returns a string which is suitable for printing * Caller must release it with m_free() */ @@ -211,7 +234,7 @@ make_printable_string( const byte *p, size_t n, int delim ) || *p=='\v' || *p=='\b' || !*p ) buflen += 2; else - buflen += 3; + buflen += 4; } else buflen++; diff --git a/util/strgutil.c b/util/strgutil.c index 87eaad423..9ab63a047 100644 --- a/util/strgutil.c +++ b/util/strgutil.c @@ -376,77 +376,161 @@ native_to_utf8( const char *string ) /**************** - * Convert string, which is in UTF8 to native encoding. Replace - * illegal encodings by some "\xnn". + * Convert string, which is in UTF8 to native encoding. + * illegal encodings by some "\xnn" and quote all control characters */ char * -utf8_to_native( const char *string ) +utf8_to_native( const char *string, size_t length ) { - #if 0 + int nleft; + int i; + byte encbuf[7]; + int encidx; const byte *s; size_t n; - byte *buffer, *p; - - /* quick check whether we actually have characters with bit 8 set */ - for( s=string; *s; s++ ) - if( *s & 0x80 ) - break; - if( !*s ) /* that is easy */ - return m_strdup(string); - - /* count the extended utf-8 characters */ - 110x xxxx - 1110 xxxx - 1111 0xxx - for( n=1, s=string; *s; s++ ) { - if( !(*s & 0x80) ) - n++; - else if( (*s & 0xe0) == 0xc0 ) - n += 2; - else if( (*s & 0xf0) == 0xe0 ) - n += 3; - else if( (*s & 0xf8) == 0xf0 ) - n += 4; - else - n++; /* invalid encoding */ - } + byte *buffer = NULL, *p = NULL; + unsigned long val = 0; + size_t slen; + int resync = 0; + + /* 1. pass (p==NULL): count the extended utf-8 characters */ + /* 2. pass (p!=NULL): create string */ + for( ;; ) { + for( slen=length, nleft=encidx=0, n=0, s=string; slen; s++, slen-- ) { + if( resync ) { + if( !(*s < 128 || (*s >= 0xc0 && *s <= 0xfd)) ) { + /* still invalid */ + if( p ) { + sprintf(p, "\\x%02x", *s ); + p += 4; + } + n += 4; + continue; + } + resync = 0; + } + if( !nleft ) { + if( !(*s & 0x80) ) { /* plain ascii */ + if( iscntrl( *s ) ) { + n++; + if( p ) + *p++ = '\\'; + switch( *s ) { + case '\n': n++; if( p ) *p++ = 'n'; break; + case '\r': n++; if( p ) *p++ = 'r'; break; + case '\f': n++; if( p ) *p++ = 'f'; break; + case '\v': n++; if( p ) *p++ = 'v'; break; + case '\b': n++; if( p ) *p++ = 'b'; break; + case 0 : n++; if( p ) *p++ = '0'; break; + default: n += 3; + sprintf( p, "x%02x", *s ); + p += 3; + break; + } + } + else { + if( p ) *p++ = *s; + n++; + } + } + else if( (*s & 0xe0) == 0xc0 ) { /* 110x xxxx */ + val = *s & 0x1f; + nleft = 1; + encbuf[encidx=0] = *s; + } + else if( (*s & 0xf0) == 0xe0 ) { /* 1110 xxxx */ + val = *s & 0x0f; + nleft = 2; + encbuf[encidx=0] = *s; + } + else if( (*s & 0xf8) == 0xf0 ) { /* 1111 0xxx */ + val = *s & 0x07; + nleft = 3; + encbuf[encidx=0] = *s; + } + else if( (*s & 0xfc) == 0xf8 ) { /* 1111 10xx */ + val = *s & 0x03; + nleft = 4; + encbuf[encidx=0] = *s; + } + else if( (*s & 0xfe) == 0xfc ) { /* 1111 110x */ + val = *s & 0x01; + nleft = 5; + encbuf[encidx=0] = *s; + } + else { /* invalid encoding: print as \xnn */ + if( p ) { + sprintf(p, "\\x%02x", *s ); + p += 4; + } + n += 4; + resync = 1; + } + } + else if( *s < 0x80 || *s >= 0xc0 ) { /* invalid */ + if( p ) { + sprintf(p, "\\x%02x", *s ); + p += 4; + } + n += 4; + nleft = 0; + resync = 1; + } + else { + encbuf[++encidx] = *s; + val <<= 6; + val |= *s & 0x3f; + if( !--nleft ) { /* ready */ + if( active_charset ) { /* table lookup */ + for(i=0; i < 128; i++ ) { + if( active_charset[i] == val ) + break; + } + if( i < 128 ) { /* we can print this one */ + if( p ) *p++ = i+128; + n++; + } + else { /* we do not have a translation: print utf8 */ + if( p ) { + for(i=0; i < encidx; i++ ) { + sprintf(p, "\\x%02x", encbuf[i] ); + p += 4; + } + } + n += encidx*4; + } + } + else { /* native set */ + if( val >= 0x80 && val < 256 ) { + n++; /* we can simply print this character */ + if( p ) *p++ = val; + } + else { /* we do not have a translation: print utf8 */ + if( p ) { + for(i=0; i < encidx; i++ ) { + sprintf(p, "\\x%02x", encbuf[i] ); + p += 4; + } + } + n += encidx*4; + } + } - buffer = p = m_alloc( n ); - for( s=string; *s; ) { - if( !(*s & 0x80) ) - *p++ = *s++; - else if( (*s & 0xe0) == 0xc0 ) { - u32 val; - if( (s[1] & 0xc0) != 0x80 ) - ; - val = (*s << 6) | (s[1] & 0x3f); + } + + } + } + if( !buffer ) { /* allocate the buffer after the first pass */ + buffer = p = m_alloc( n + 1 ); + } + else { + *p = 0; /* make a string */ + return buffer; } - else if( (*s & 0xf0) == 0xe0 ) - n += 3; - else if( (*s & 0xf8) == 0xf0 ) - n += 4; - else - n++; /* invalid encoding */ } - #endif - return m_strdup(string); - } -/**************** - * check whether string is a valid UTF8 string. - * Returns 0 = Okay - * 1 = Too short - * 2 = invalid encoding - */ -int -check_utf8_string( const char *string ) -{ - /*fixme */ - return 0; -} - /********************************************* ********** missing string functions ********* diff --git a/util/ttyio.c b/util/ttyio.c index 3f5eb7900..74f6ce0f6 100644 --- a/util/ttyio.c +++ b/util/ttyio.c @@ -235,6 +235,29 @@ tty_print_string( byte *p, size_t n ) #endif } +void +tty_print_utf8_string( byte *p, size_t n ) +{ + size_t i; + char *buf; + + if (no_terminal) + return; + + /* we can handle plain ascii simpler, so check for it first */ + for(i=0; i < n; i++ ) { + if( p[i] & 0x80 ) + break; + } + if( i < n ) { + buf = utf8_to_native( p, n ); + tty_printf("%s", buf ); + m_free( buf ); + } + else + tty_print_string( p, n ); +} + |