# HG changeset patch # User Guido Berhoerster # Date 1530389266 -7200 # Node ID 683ebd334b21e83f3ac4249d7c7041146f51ae9f # Parent f0accfc74f7bf0b3a7323bf08dc987b2a8f814c3 Try to decode octal and "Meta-ASCII" escapes diff -r f0accfc74f7b -r 683ebd334b21 xwd-application.c --- a/xwd-application.c Sat Jul 28 22:02:24 2018 +0200 +++ b/xwd-application.c Sat Jun 30 22:07:46 2018 +0200 @@ -93,6 +93,68 @@ } static void +string_decode_octal(GString *string) +{ + GString *result; + gsize remaining = string->len; + gchar *p = string->str; + gchar oct[4] = { '\0' }; + guint64 num; + char *end; + + result = g_string_sized_new(string->len); + + while (remaining > 0) { + if ((remaining >= 4) && (*p == '\\')) { + /* put octal number in NUL-terminated buffer */ + memcpy(oct, p + 1, 3); + /* convert valid octal number to byte */ + num = g_ascii_strtoull(oct, &end, 8); + if ((*end == '\0') && (num <= 0xff)) { + /* conversion succeeded */ + p += 4; + remaining -= 4; + g_string_append_c(result, num); + continue; + } + } + + g_string_append_c(result, *p++); + remaining--; + } + + string_copy(string, result); + g_string_free(result, TRUE); +} + +static void +string_decode_meta_ascii(GString *string) +{ + GString *result; + gchar *p = string->str; + gsize remaining = string->len; + + result = g_string_sized_new(string->len); + + while (remaining > 0) { + if ((remaining >= 3) && (*p == 'M') && (*(p + 1) == '-') && + ((*(p + 2) & (1 << 7)) == 0)) { + /* restore 8th bit */ + g_string_append_c(result, *(p + 2) | (1 << 7)); + remaining -= 3; + p += 3; + continue; + } + + g_string_append_c(result, *p++); + remaining--; + } + + string_copy(string, result); + g_string_free(result, TRUE); +} + +static void string_to_valid_utf8(GString *string) { GString *result; @@ -243,9 +305,18 @@ * (U+FFFD) and non-printable characters are removed. Additionally, * padding typically added by wall(1) implementations is removed in * order to improve readability. + * Some write(1) and wall(1) implementations encode non-ASCII + * characters, in particular UTF-8 sequences, by prefixing them with + * "M-" and clearing the 8th bit while others (e.g. util-linux) use + * octal escape sequences. These encodings are reversed before messages + * are processed further. However some implementations such as NetBSD + * write(1) uncoditionally process each byte with toascii(3) which + * makes it impossible to restore the original value. */ message = g_string_new_len(self->message_buf->str, self->message_buf->len); + string_decode_octal(message); + string_decode_meta_ascii(message); string_to_valid_utf8(message); string_filter_nonprintable(message); string_trim_lines(message);