提交 ddbaff53 编写于 作者: H Heinrich Schuchardt

lib/charset: utf8_get() should return error

utf8_get() should return an error if hitting an illegal UTF-8 sequence and
not silently convert the input to a question mark.

Correct utf_8() and the its unit test.

console_read_unicode() now will ignore illegal UTF-8 sequences.
Signed-off-by: NHeinrich Schuchardt <xypron.glpk@gmx.de>
上级 73bb90ca
...@@ -32,7 +32,7 @@ static struct capitalization_table capitalization_table[] = ...@@ -32,7 +32,7 @@ static struct capitalization_table capitalization_table[] =
* *
* @read_u8: - stream reader * @read_u8: - stream reader
* @src: - string buffer passed to stream reader, optional * @src: - string buffer passed to stream reader, optional
* Return: - Unicode code point * Return: - Unicode code point, or -1
*/ */
static int get_code(u8 (*read_u8)(void *data), void *data) static int get_code(u8 (*read_u8)(void *data), void *data)
{ {
...@@ -78,7 +78,7 @@ static int get_code(u8 (*read_u8)(void *data), void *data) ...@@ -78,7 +78,7 @@ static int get_code(u8 (*read_u8)(void *data), void *data)
} }
return ch; return ch;
error: error:
return '?'; return -1;
} }
/** /**
...@@ -120,14 +120,21 @@ static u8 read_console(void *data) ...@@ -120,14 +120,21 @@ static u8 read_console(void *data)
int console_read_unicode(s32 *code) int console_read_unicode(s32 *code)
{ {
if (!tstc()) { for (;;) {
/* No input available */ s32 c;
return 1;
}
/* Read Unicode code */ if (!tstc()) {
*code = get_code(read_console, NULL); /* No input available */
return 0; return 1;
}
/* Read Unicode code */
c = get_code(read_console, NULL);
if (c > 0) {
*code = c;
return 0;
}
}
} }
s32 utf8_get(const char **src) s32 utf8_get(const char **src)
......
...@@ -52,6 +52,7 @@ static const char d4[] = {0xf0, 0x90, 0x92, 0x8d, 0xf0, 0x90, 0x92, 0x96, ...@@ -52,6 +52,7 @@ static const char d4[] = {0xf0, 0x90, 0x92, 0x8d, 0xf0, 0x90, 0x92, 0x96,
static const char j1[] = {0x6a, 0x31, 0xa1, 0x6c, 0x00}; static const char j1[] = {0x6a, 0x31, 0xa1, 0x6c, 0x00};
static const char j2[] = {0x6a, 0x32, 0xc3, 0xc3, 0x6c, 0x00}; static const char j2[] = {0x6a, 0x32, 0xc3, 0xc3, 0x6c, 0x00};
static const char j3[] = {0x6a, 0x33, 0xf0, 0x90, 0xf0, 0x00}; static const char j3[] = {0x6a, 0x33, 0xf0, 0x90, 0xf0, 0x00};
static const char j4[] = {0xa1, 0x00};
static int unicode_test_u16_strlen(struct unit_test_state *uts) static int unicode_test_u16_strlen(struct unit_test_state *uts)
{ {
...@@ -165,6 +166,12 @@ static int unicode_test_utf8_get(struct unit_test_state *uts) ...@@ -165,6 +166,12 @@ static int unicode_test_utf8_get(struct unit_test_state *uts)
ut_asserteq(0x0001048d, code); ut_asserteq(0x0001048d, code);
ut_asserteq_ptr(s, d4 + 4); ut_asserteq_ptr(s, d4 + 4);
/* Check illegal character */
s = j4;
code = utf8_get((const char **)&s);
ut_asserteq(-1, code);
ut_asserteq_ptr(j4 + 1, s);
return 0; return 0;
} }
UNICODE_TEST(unicode_test_utf8_get); UNICODE_TEST(unicode_test_utf8_get);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册