提交 3e7fc205 编写于 作者: A Andrew Gabbasov 提交者: Jan Kara

udf: Join functions for UTF8 and NLS conversions

There is no much sense to have separate functions for UTF8 and
NLS conversions, since UTF8 encoding is actually the special case
of NLS.

However, although UTF8 is also supported by general NLS framework,
it would be good to have separate UTF8 character conversion functions
(char2uni and uni2char) locally in UDF code, so that they could be
used even if NLS support is not enabled in the kernel configuration.
Signed-off-by: NAndrew Gabbasov <andrew_gabbasov@mentor.com>
Signed-off-by: NJan Kara <jack@suse.cz>
上级 525e2c56
...@@ -76,151 +76,72 @@ static void udf_build_ustr_exact(struct ustr *dest, dstring *ptr, int exactsize) ...@@ -76,151 +76,72 @@ static void udf_build_ustr_exact(struct ustr *dest, dstring *ptr, int exactsize)
memcpy(dest->u_name, ptr + 1, exactsize - 1); memcpy(dest->u_name, ptr + 1, exactsize - 1);
} }
/* static int udf_uni2char_utf8(wchar_t uni,
* udf_CS0toUTF8 unsigned char *out,
* int boundlen)
* PURPOSE
* Convert OSTA Compressed Unicode to the UTF-8 equivalent.
*
* PRE-CONDITIONS
* utf Pointer to UTF-8 output buffer.
* ocu Pointer to OSTA Compressed Unicode input buffer
* of size UDF_NAME_LEN bytes.
* both of type "struct ustr *"
*
* POST-CONDITIONS
* <return> >= 0 on success.
*
* HISTORY
* November 12, 1997 - Andrew E. Mileski
* Written, tested, and released.
*/
int udf_CS0toUTF8(struct ustr *utf_o, const struct ustr *ocu_i)
{ {
const uint8_t *ocu; int u_len = 0;
uint8_t cmp_id, ocu_len;
int i; if (boundlen <= 0)
return -ENAMETOOLONG;
ocu_len = ocu_i->u_len;
if (ocu_len == 0) { if (uni < 0x80) {
memset(utf_o, 0, sizeof(struct ustr)); out[u_len++] = (unsigned char)uni;
return 0; } else if (uni < 0x800) {
} if (boundlen < 2)
return -ENAMETOOLONG;
cmp_id = ocu_i->u_cmpID; out[u_len++] = (unsigned char)(0xc0 | (uni >> 6));
if (cmp_id != 8 && cmp_id != 16) { out[u_len++] = (unsigned char)(0x80 | (uni & 0x3f));
memset(utf_o, 0, sizeof(struct ustr)); } else {
pr_err("unknown compression code (%d) stri=%s\n", if (boundlen < 3)
cmp_id, ocu_i->u_name); return -ENAMETOOLONG;
return -EINVAL; out[u_len++] = (unsigned char)(0xe0 | (uni >> 12));
} out[u_len++] = (unsigned char)(0x80 | ((uni >> 6) & 0x3f));
out[u_len++] = (unsigned char)(0x80 | (uni & 0x3f));
ocu = ocu_i->u_name;
utf_o->u_len = 0;
for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN - 3));) {
/* Expand OSTA compressed Unicode to Unicode */
uint32_t c = ocu[i++];
if (cmp_id == 16)
c = (c << 8) | ocu[i++];
/* Compress Unicode to UTF-8 */
if (c < 0x80U)
utf_o->u_name[utf_o->u_len++] = (uint8_t)c;
else if (c < 0x800U) {
if (utf_o->u_len > (UDF_NAME_LEN - 4))
break;
utf_o->u_name[utf_o->u_len++] =
(uint8_t)(0xc0 | (c >> 6));
utf_o->u_name[utf_o->u_len++] =
(uint8_t)(0x80 | (c & 0x3f));
} else {
if (utf_o->u_len > (UDF_NAME_LEN - 5))
break;
utf_o->u_name[utf_o->u_len++] =
(uint8_t)(0xe0 | (c >> 12));
utf_o->u_name[utf_o->u_len++] =
(uint8_t)(0x80 |
((c >> 6) & 0x3f));
utf_o->u_name[utf_o->u_len++] =
(uint8_t)(0x80 | (c & 0x3f));
}
} }
utf_o->u_cmpID = 8; return u_len;
return utf_o->u_len;
} }
/* static int udf_char2uni_utf8(const unsigned char *in,
* int boundlen,
* udf_UTF8toCS0 wchar_t *uni)
*
* PURPOSE
* Convert UTF-8 to the OSTA Compressed Unicode equivalent.
*
* DESCRIPTION
* This routine is only called by udf_lookup().
*
* PRE-CONDITIONS
* ocu Pointer to OSTA Compressed Unicode output
* buffer of size UDF_NAME_LEN bytes.
* utf Pointer to UTF-8 input buffer.
* utf_len Length of UTF-8 input buffer in bytes.
*
* POST-CONDITIONS
* <return> Zero on success.
*
* HISTORY
* November 12, 1997 - Andrew E. Mileski
* Written, tested, and released.
*/
static int udf_UTF8toCS0(dstring *ocu, struct ustr *utf, int length)
{ {
unsigned c, i, max_val, utf_char; unsigned int utf_char;
int utf_cnt, u_len, u_ch; unsigned char c;
int utf_cnt, u_len;
memset(ocu, 0, sizeof(dstring) * length); utf_char = 0;
ocu[0] = 8; utf_cnt = 0;
max_val = 0xffU; for (u_len = 0; u_len < boundlen;) {
u_ch = 1; c = in[u_len++];
try_again:
u_len = 0U;
utf_char = 0U;
utf_cnt = 0U;
for (i = 0U; i < utf->u_len; i++) {
/* Name didn't fit? */
if (u_len + 1 + u_ch >= length)
return 0;
c = (uint8_t)utf->u_name[i];
/* Complete a multi-byte UTF-8 character */ /* Complete a multi-byte UTF-8 character */
if (utf_cnt) { if (utf_cnt) {
utf_char = (utf_char << 6) | (c & 0x3fU); utf_char = (utf_char << 6) | (c & 0x3f);
if (--utf_cnt) if (--utf_cnt)
continue; continue;
} else { } else {
/* Check for a multi-byte UTF-8 character */ /* Check for a multi-byte UTF-8 character */
if (c & 0x80U) { if (c & 0x80) {
/* Start a multi-byte UTF-8 character */ /* Start a multi-byte UTF-8 character */
if ((c & 0xe0U) == 0xc0U) { if ((c & 0xe0) == 0xc0) {
utf_char = c & 0x1fU; utf_char = c & 0x1f;
utf_cnt = 1; utf_cnt = 1;
} else if ((c & 0xf0U) == 0xe0U) { } else if ((c & 0xf0) == 0xe0) {
utf_char = c & 0x0fU; utf_char = c & 0x0f;
utf_cnt = 2; utf_cnt = 2;
} else if ((c & 0xf8U) == 0xf0U) { } else if ((c & 0xf8) == 0xf0) {
utf_char = c & 0x07U; utf_char = c & 0x07;
utf_cnt = 3; utf_cnt = 3;
} else if ((c & 0xfcU) == 0xf8U) { } else if ((c & 0xfc) == 0xf8) {
utf_char = c & 0x03U; utf_char = c & 0x03;
utf_cnt = 4; utf_cnt = 4;
} else if ((c & 0xfeU) == 0xfcU) { } else if ((c & 0xfe) == 0xfc) {
utf_char = c & 0x01U; utf_char = c & 0x01;
utf_cnt = 5; utf_cnt = 5;
} else { } else {
goto error_out; utf_cnt = -1;
break;
} }
continue; continue;
} else { } else {
...@@ -228,36 +149,19 @@ static int udf_UTF8toCS0(dstring *ocu, struct ustr *utf, int length) ...@@ -228,36 +149,19 @@ static int udf_UTF8toCS0(dstring *ocu, struct ustr *utf, int length)
utf_char = c; utf_char = c;
} }
} }
*uni = utf_char;
/* Choose no compression if necessary */ break;
if (utf_char > max_val) {
if (max_val == 0xffU) {
max_val = 0xffffU;
ocu[0] = (uint8_t)0x10U;
u_ch = 2;
goto try_again;
}
goto error_out;
}
if (max_val == 0xffffU)
ocu[++u_len] = (uint8_t)(utf_char >> 8);
ocu[++u_len] = (uint8_t)(utf_char & 0xffU);
} }
if (utf_cnt) { if (utf_cnt) {
error_out: *uni = '?';
ocu[++u_len] = '?'; return -EINVAL;
printk(KERN_DEBUG pr_fmt("bad UTF-8 character\n"));
} }
return u_len;
ocu[length - 1] = (uint8_t)u_len + 1;
return u_len + 1;
} }
static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o, static int udf_name_from_CS0(struct ustr *utf_o,
const struct ustr *ocu_i) const struct ustr *ocu_i,
int (*conv_f)(wchar_t, unsigned char *, int))
{ {
const uint8_t *ocu; const uint8_t *ocu;
uint8_t cmp_id, ocu_len; uint8_t cmp_id, ocu_len;
...@@ -286,11 +190,13 @@ static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o, ...@@ -286,11 +190,13 @@ static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o,
if (cmp_id == 16) if (cmp_id == 16)
c = (c << 8) | ocu[i++]; c = (c << 8) | ocu[i++];
len = nls->uni2char(c, &utf_o->u_name[utf_o->u_len], len = conv_f(c, &utf_o->u_name[utf_o->u_len],
UDF_NAME_LEN - 2 - utf_o->u_len); UDF_NAME_LEN - 2 - utf_o->u_len);
/* Valid character? */ /* Valid character? */
if (len >= 0) if (len >= 0)
utf_o->u_len += len; utf_o->u_len += len;
else if (len == -ENAMETOOLONG)
break;
else else
utf_o->u_name[utf_o->u_len++] = '?'; utf_o->u_name[utf_o->u_len++] = '?';
} }
...@@ -299,26 +205,26 @@ static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o, ...@@ -299,26 +205,26 @@ static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o,
return utf_o->u_len; return utf_o->u_len;
} }
static int udf_NLStoCS0(struct nls_table *nls, dstring *ocu, struct ustr *uni, static int udf_name_to_CS0(dstring *ocu, struct ustr *uni, int length,
int length) int (*conv_f)(const unsigned char *, int, wchar_t *))
{ {
int len; int i, len;
unsigned i, max_val; unsigned int max_val;
uint16_t uni_char; wchar_t uni_char;
int u_len, u_ch; int u_len, u_ch;
memset(ocu, 0, sizeof(dstring) * length); memset(ocu, 0, sizeof(dstring) * length);
ocu[0] = 8; ocu[0] = 8;
max_val = 0xffU; max_val = 0xff;
u_ch = 1; u_ch = 1;
try_again: try_again:
u_len = 0U; u_len = 0;
for (i = 0U; i < uni->u_len; i++) { for (i = 0; i < uni->u_len; i++) {
/* Name didn't fit? */ /* Name didn't fit? */
if (u_len + 1 + u_ch >= length) if (u_len + 1 + u_ch >= length)
return 0; return 0;
len = nls->char2uni(&uni->u_name[i], uni->u_len - i, &uni_char); len = conv_f(&uni->u_name[i], uni->u_len - i, &uni_char);
if (!len) if (!len)
continue; continue;
/* Invalid character, deal with it */ /* Invalid character, deal with it */
...@@ -328,15 +234,15 @@ static int udf_NLStoCS0(struct nls_table *nls, dstring *ocu, struct ustr *uni, ...@@ -328,15 +234,15 @@ static int udf_NLStoCS0(struct nls_table *nls, dstring *ocu, struct ustr *uni,
} }
if (uni_char > max_val) { if (uni_char > max_val) {
max_val = 0xffffU; max_val = 0xffff;
ocu[0] = (uint8_t)0x10U; ocu[0] = 0x10;
u_ch = 2; u_ch = 2;
goto try_again; goto try_again;
} }
if (max_val == 0xffffU) if (max_val == 0xffff)
ocu[++u_len] = (uint8_t)(uni_char >> 8); ocu[++u_len] = (uint8_t)(uni_char >> 8);
ocu[++u_len] = (uint8_t)(uni_char & 0xffU); ocu[++u_len] = (uint8_t)(uni_char & 0xff);
i += len - 1; i += len - 1;
} }
...@@ -344,10 +250,16 @@ static int udf_NLStoCS0(struct nls_table *nls, dstring *ocu, struct ustr *uni, ...@@ -344,10 +250,16 @@ static int udf_NLStoCS0(struct nls_table *nls, dstring *ocu, struct ustr *uni,
return u_len + 1; return u_len + 1;
} }
int udf_CS0toUTF8(struct ustr *utf_o, const struct ustr *ocu_i)
{
return udf_name_from_CS0(utf_o, ocu_i, udf_uni2char_utf8);
}
int udf_get_filename(struct super_block *sb, uint8_t *sname, int slen, int udf_get_filename(struct super_block *sb, uint8_t *sname, int slen,
uint8_t *dname, int dlen) uint8_t *dname, int dlen)
{ {
struct ustr *filename, *unifilename; struct ustr *filename, *unifilename;
int (*conv_f)(wchar_t, unsigned char *, int);
int ret; int ret;
if (!slen) if (!slen)
...@@ -365,23 +277,18 @@ int udf_get_filename(struct super_block *sb, uint8_t *sname, int slen, ...@@ -365,23 +277,18 @@ int udf_get_filename(struct super_block *sb, uint8_t *sname, int slen,
udf_build_ustr_exact(unifilename, sname, slen); udf_build_ustr_exact(unifilename, sname, slen);
if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) { if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) {
ret = udf_CS0toUTF8(filename, unifilename); conv_f = udf_uni2char_utf8;
if (ret < 0) {
udf_debug("Failed in udf_get_filename: sname = %s\n",
sname);
goto out2;
}
} else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) { } else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) {
ret = udf_CS0toNLS(UDF_SB(sb)->s_nls_map, filename, conv_f = UDF_SB(sb)->s_nls_map->uni2char;
unifilename);
if (ret < 0) {
udf_debug("Failed in udf_get_filename: sname = %s\n",
sname);
goto out2;
}
} else } else
BUG(); BUG();
ret = udf_name_from_CS0(filename, unifilename, conv_f);
if (ret < 0) {
udf_debug("Failed in udf_get_filename: sname = %s\n", sname);
goto out2;
}
ret = udf_translate_to_linux(dname, dlen, ret = udf_translate_to_linux(dname, dlen,
filename->u_name, filename->u_len, filename->u_name, filename->u_len,
unifilename->u_name, unifilename->u_len); unifilename->u_name, unifilename->u_len);
...@@ -399,24 +306,19 @@ int udf_put_filename(struct super_block *sb, const uint8_t *sname, int slen, ...@@ -399,24 +306,19 @@ int udf_put_filename(struct super_block *sb, const uint8_t *sname, int slen,
uint8_t *dname, int dlen) uint8_t *dname, int dlen)
{ {
struct ustr unifilename; struct ustr unifilename;
int namelen; int (*conv_f)(const unsigned char *, int, wchar_t *);
if (!udf_char_to_ustr(&unifilename, sname, slen)) if (!udf_char_to_ustr(&unifilename, sname, slen))
return 0; return 0;
if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) { if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) {
namelen = udf_UTF8toCS0(dname, &unifilename, dlen); conv_f = udf_char2uni_utf8;
if (!namelen)
return 0;
} else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) { } else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) {
namelen = udf_NLStoCS0(UDF_SB(sb)->s_nls_map, dname, conv_f = UDF_SB(sb)->s_nls_map->char2uni;
&unifilename, dlen);
if (!namelen)
return 0;
} else } else
return 0; BUG();
return namelen; return udf_name_to_CS0(dname, &unifilename, dlen, conv_f);
} }
#define ILLEGAL_CHAR_MARK '_' #define ILLEGAL_CHAR_MARK '_'
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册