cifs_unicode.c 8.8 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3
/*
 *   fs/cifs/cifs_unicode.c
 *
4
 *   Copyright (c) International Business Machines  Corp., 2000,2009
L
Linus Torvalds 已提交
5 6 7 8
 *   Modified by Steve French (sfrench@us.ibm.com)
 *
 *   This program is free software;  you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
S
Steve French 已提交
9
 *   the Free Software Foundation; either version 2 of the License, or
L
Linus Torvalds 已提交
10
 *   (at your option) any later version.
S
Steve French 已提交
11
 *
L
Linus Torvalds 已提交
12 13 14 15 16 17
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
 *   the GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
S
Steve French 已提交
18
 *   along with this program;  if not, write to the Free Software
L
Linus Torvalds 已提交
19 20 21
 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 */
#include <linux/fs.h>
22
#include <linux/slab.h>
L
Linus Torvalds 已提交
23 24 25
#include "cifs_unicode.h"
#include "cifs_uniupr.h"
#include "cifspdu.h"
26
#include "cifsglob.h"
L
Linus Torvalds 已提交
27 28
#include "cifs_debug.h"

29
/*
30 31
 * cifs_utf16_bytes - how long will a string be after conversion?
 * @utf16 - pointer to input string
32 33 34
 * @maxbytes - don't go past this many bytes of input string
 * @codepage - destination codepage
 *
35
 * Walk a utf16le string and return the number of bytes that the string will
36 37 38 39
 * be after being converted to the given charset, not including any null
 * termination required. Don't walk past maxbytes in the source buffer.
 */
int
40
cifs_utf16_bytes(const __le16 *from, int maxbytes,
41 42 43 44 45 46
		const struct nls_table *codepage)
{
	int i;
	int charlen, outlen = 0;
	int maxwords = maxbytes / 2;
	char tmp[NLS_MAX_CHARSET_SIZE];
47
	__u16 ftmp;
48

49 50 51 52 53 54
	for (i = 0; i < maxwords; i++) {
		ftmp = get_unaligned_le16(&from[i]);
		if (ftmp == 0)
			break;

		charlen = codepage->uni2char(ftmp, tmp, NLS_MAX_CHARSET_SIZE);
55 56 57 58 59 60 61 62 63
		if (charlen > 0)
			outlen += charlen;
		else
			outlen++;
	}

	return outlen;
}

64
/*
65
 * cifs_mapchar - convert a host-endian char to proper char in codepage
66
 * @target - where converted character should be copied
67
 * @src_char - 2 byte host-endian source character
68 69 70 71 72 73 74 75
 * @cp - codepage to which character should be converted
 * @mapchar - should character be mapped according to mapchars mount option?
 *
 * This function handles the conversion of a single character. It is the
 * responsibility of the caller to ensure that the target buffer is large
 * enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE).
 */
static int
76
cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp,
77 78 79 80 81 82 83 84 85 86 87 88
	     bool mapchar)
{
	int len = 1;

	if (!mapchar)
		goto cp_convert;

	/*
	 * BB: Cannot handle remapping UNI_SLASH until all the calls to
	 *     build_path_from_dentry are modified, as they use slash as
	 *     separator.
	 */
89
	switch (src_char) {
90 91 92
	case UNI_COLON:
		*target = ':';
		break;
93
	case UNI_ASTERISK:
94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
		*target = '*';
		break;
	case UNI_QUESTION:
		*target = '?';
		break;
	case UNI_PIPE:
		*target = '|';
		break;
	case UNI_GRTRTHAN:
		*target = '>';
		break;
	case UNI_LESSTHAN:
		*target = '<';
		break;
	default:
		goto cp_convert;
	}

out:
	return len;

cp_convert:
116
	len = cp->uni2char(src_char, target, NLS_MAX_CHARSET_SIZE);
117 118 119 120 121 122 123 124
	if (len <= 0) {
		*target = '?';
		len = 1;
	}
	goto out;
}

/*
125
 * cifs_from_utf16 - convert utf16le string to local charset
126 127 128 129 130 131 132
 * @to - destination buffer
 * @from - source buffer
 * @tolen - destination buffer size (in bytes)
 * @fromlen - source buffer size (in bytes)
 * @codepage - codepage to which characters should be converted
 * @mapchar - should characters be remapped according to the mapchars option?
 *
133
 * Convert a little-endian utf16le string (as sent by the server) to a string
134 135 136 137 138 139 140 141
 * in the provided codepage. The tolen and fromlen parameters are to ensure
 * that the code doesn't walk off of the end of the buffer (which is always
 * a danger if the alignment of the source buffer is off). The destination
 * string is always properly null terminated and fits in the destination
 * buffer. Returns the length of the destination string in bytes (including
 * null terminator).
 *
 * Note that some windows versions actually send multiword UTF-16 characters
142
 * instead of straight UTF16-2. The linux nls routines however aren't able to
143 144 145 146
 * deal with those characters properly. In the event that we get some of
 * those characters, they won't be translated properly.
 */
int
147
cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen,
148 149 150 151 152 153 154
		 const struct nls_table *codepage, bool mapchar)
{
	int i, charlen, safelen;
	int outlen = 0;
	int nullsize = nls_nullsize(codepage);
	int fromwords = fromlen / 2;
	char tmp[NLS_MAX_CHARSET_SIZE];
155
	__u16 ftmp;
156 157 158 159 160 161 162 163 164

	/*
	 * because the chars can be of varying widths, we need to take care
	 * not to overflow the destination buffer when we get close to the
	 * end of it. Until we get to this offset, we don't need to check
	 * for overflow however.
	 */
	safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize);

165 166 167 168 169
	for (i = 0; i < fromwords; i++) {
		ftmp = get_unaligned_le16(&from[i]);
		if (ftmp == 0)
			break;

170 171 172 173 174
		/*
		 * check to see if converting this character might make the
		 * conversion bleed into the null terminator
		 */
		if (outlen >= safelen) {
175
			charlen = cifs_mapchar(tmp, ftmp, codepage, mapchar);
176 177 178 179 180
			if ((outlen + charlen) > (tolen - nullsize))
				break;
		}

		/* put converted char into 'to' buffer */
181
		charlen = cifs_mapchar(&to[outlen], ftmp, codepage, mapchar);
182 183 184 185 186 187 188 189 190 191
		outlen += charlen;
	}

	/* properly null-terminate string */
	for (i = 0; i < nullsize; i++)
		to[outlen++] = 0;

	return outlen;
}

L
Linus Torvalds 已提交
192
/*
193
 * NAME:	cifs_strtoUTF16()
L
Linus Torvalds 已提交
194 195 196 197 198
 *
 * FUNCTION:	Convert character string to unicode string
 *
 */
int
199
cifs_strtoUTF16(__le16 *to, const char *from, int len,
L
Linus Torvalds 已提交
200 201 202 203
	      const struct nls_table *codepage)
{
	int charlen;
	int i;
204
	wchar_t wchar_to; /* needed to quiet sparse */
L
Linus Torvalds 已提交
205 206

	for (i = 0; len && *from; i++, from += charlen, len -= charlen) {
207
		charlen = codepage->char2uni(from, len, &wchar_to);
L
Linus Torvalds 已提交
208
		if (charlen < 1) {
209
			cERROR(1, "strtoUTF16: char2uni of 0x%x returned %d",
210
				*from, charlen);
211
			/* A question mark */
212
			wchar_to = 0x003f;
L
Linus Torvalds 已提交
213
			charlen = 1;
214 215
		}
		put_unaligned_le16(wchar_to, &to[i]);
L
Linus Torvalds 已提交
216 217
	}

218
	put_unaligned_le16(0, &to[i]);
L
Linus Torvalds 已提交
219 220 221
	return i;
}

222
/*
223 224
 * cifs_strndup_from_utf16 - copy a string from wire format to the local
 * codepage
225 226 227 228 229 230 231 232 233 234
 * @src - source string
 * @maxlen - don't walk past this many bytes in the source string
 * @is_unicode - is this a unicode string?
 * @codepage - destination codepage
 *
 * Take a string given by the server, convert it to the local codepage and
 * put it in a new buffer. Returns a pointer to the new string or NULL on
 * error.
 */
char *
235 236
cifs_strndup_from_utf16(const char *src, const int maxlen,
			const bool is_unicode, const struct nls_table *codepage)
237 238 239 240 241
{
	int len;
	char *dst;

	if (is_unicode) {
242
		len = cifs_utf16_bytes((__le16 *) src, maxlen, codepage);
243 244 245 246
		len += nls_nullsize(codepage);
		dst = kmalloc(len, GFP_KERNEL);
		if (!dst)
			return NULL;
247
		cifs_from_utf16(dst, (__le16 *) src, len, maxlen, codepage,
248 249 250 251 252 253 254 255 256 257 258 259 260
			       false);
	} else {
		len = strnlen(src, maxlen);
		len++;
		dst = kmalloc(len, GFP_KERNEL);
		if (!dst)
			return NULL;
		strlcpy(dst, src, len);
	}

	return dst;
}

261 262 263 264 265 266 267
/*
 * Convert 16 bit Unicode pathname to wire format from string in current code
 * page. Conversion may involve remapping up the six characters that are
 * only legal in POSIX-like OS (if they are present in the string). Path
 * names are little endian 16 bit Unicode on the wire
 */
int
268
cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
269 270 271 272
		 const struct nls_table *cp, int mapChars)
{
	int i, j, charlen;
	char src_char;
273 274
	__le16 dst_char;
	wchar_t tmp;
275 276

	if (!mapChars)
277
		return cifs_strtoUTF16(target, source, PATH_MAX, cp);
278

279
	for (i = 0, j = 0; i < srclen; j++) {
280
		src_char = source[i];
281
		charlen = 1;
282 283
		switch (src_char) {
		case 0:
284
			put_unaligned(0, &target[j]);
285
			goto ctoUTF16_out;
286
		case ':':
287
			dst_char = cpu_to_le16(UNI_COLON);
288 289
			break;
		case '*':
290
			dst_char = cpu_to_le16(UNI_ASTERISK);
291 292
			break;
		case '?':
293
			dst_char = cpu_to_le16(UNI_QUESTION);
294 295
			break;
		case '<':
296
			dst_char = cpu_to_le16(UNI_LESSTHAN);
297 298
			break;
		case '>':
299
			dst_char = cpu_to_le16(UNI_GRTRTHAN);
300 301
			break;
		case '|':
302
			dst_char = cpu_to_le16(UNI_PIPE);
303 304 305 306 307 308 309
			break;
		/*
		 * FIXME: We can not handle remapping backslash (UNI_SLASH)
		 * until all the calls to build_path_from_dentry are modified,
		 * as they use backslash as separator.
		 */
		default:
310 311 312
			charlen = cp->char2uni(source + i, srclen - i, &tmp);
			dst_char = cpu_to_le16(tmp);

313 314 315 316 317
			/*
			 * if no match, use question mark, which at least in
			 * some cases serves as wild card
			 */
			if (charlen < 1) {
318
				dst_char = cpu_to_le16(0x003f);
319 320 321
				charlen = 1;
			}
		}
322 323 324 325 326
		/*
		 * character may take more than one byte in the source string,
		 * but will take exactly two bytes in the target string
		 */
		i += charlen;
327
		put_unaligned(dst_char, &target[j]);
328 329
	}

330
ctoUTF16_out:
331 332
	return i;
}