changeset 20:683ebd334b21

Try to decode octal and "Meta-ASCII" escapes
author Guido Berhoerster <guido+xwrited@berhoerster.name>
date Sat, 30 Jun 2018 22:07:46 +0200
parents f0accfc74f7b
children 0897a8fe27af
files xwd-application.c
diffstat 1 files changed, 71 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/xwd-application.c	Sat Jul 28 22:02:24 2018 +0200
+++ b/xwd-application.c	Sat Jun 30 22:07:46 2018 +0200
@@ -93,6 +93,68 @@
 }
 
 static void
+string_decode_octal(GString *string)
+{
+	GString	*result;
+	gsize	remaining = string->len;
+	gchar	*p = string->str;
+	gchar	oct[4] = { '\0' };
+	guint64	num;
+	char	*end;
+
+	result = g_string_sized_new(string->len);
+
+	while (remaining > 0) {
+		if ((remaining >= 4) && (*p == '\\')) {
+			/* put octal number in NUL-terminated buffer */
+			memcpy(oct, p + 1, 3);
+			/* convert valid octal number to byte */
+			num = g_ascii_strtoull(oct, &end, 8);
+			if ((*end == '\0') && (num <= 0xff)) {
+				/* conversion succeeded */
+				p += 4;
+				remaining -= 4;
+				g_string_append_c(result, num);
+				continue;
+			}
+		}
+
+		g_string_append_c(result, *p++);
+		remaining--;
+	}
+
+	string_copy(string, result);
+	g_string_free(result, TRUE);
+}
+
+static void
+string_decode_meta_ascii(GString *string)
+{
+	GString		*result;
+	gchar		*p = string->str;
+	gsize		remaining = string->len;
+
+	result = g_string_sized_new(string->len);
+
+	while (remaining > 0) {
+		if ((remaining >= 3) && (*p == 'M') && (*(p + 1) == '-') &&
+		    ((*(p + 2) & (1 << 7)) == 0)) {
+			/* restore 8th bit */
+			g_string_append_c(result, *(p + 2) | (1 << 7));
+			remaining -= 3;
+			p += 3;
+			continue;
+		}
+
+		g_string_append_c(result, *p++);
+		remaining--;
+	}
+
+	string_copy(string, result);
+	g_string_free(result, TRUE);
+}
+
+static void
 string_to_valid_utf8(GString *string)
 {
 	GString		*result;
@@ -243,9 +305,18 @@
 	 * (U+FFFD) and non-printable characters are removed. Additionally,
 	 * padding typically added by wall(1) implementations is removed in
 	 * order to improve readability.
+	 * Some write(1) and wall(1) implementations encode non-ASCII
+	 * characters, in particular UTF-8 sequences, by prefixing them with
+	 * "M-" and clearing the 8th bit while others (e.g. util-linux) use
+	 * octal escape sequences. These encodings are reversed before messages
+	 * are processed further. However some implementations such as NetBSD
+	 * write(1) uncoditionally process each byte with toascii(3) which
+	 * makes it impossible to restore the original value.
 	 */
 	message = g_string_new_len(self->message_buf->str,
 	    self->message_buf->len);
+	string_decode_octal(message);
+	string_decode_meta_ascii(message);
 	string_to_valid_utf8(message);
 	string_filter_nonprintable(message);
 	string_trim_lines(message);