lzo1x_compress.c 6.1 KB
Newer Older
1
/*
2
 *  LZO1X Compressor from LZO
3
 *
4
 *  Copyright (C) 1996-2012 Markus F.X.J. Oberhumer <markus@oberhumer.com>
5 6 7 8
 *
 *  The full LZO package can be found at:
 *  http://www.oberhumer.com/opensource/lzo/
 *
9
 *  Changed for Linux kernel use by:
10 11 12 13 14 15 16
 *  Nitin Gupta <nitingupta910@gmail.com>
 *  Richard Purdie <rpurdie@openedhand.com>
 */

#include <linux/module.h>
#include <linux/kernel.h>
#include <asm/unaligned.h>
17
#include <linux/lzo.h>
18 19 20
#include "lzodefs.h"

static noinline size_t
21 22 23
lzo1x_1_do_compress(const unsigned char *in, size_t in_len,
		    unsigned char *out, size_t *out_len,
		    size_t ti, void *wrkmem)
24
{
25 26
	const unsigned char *ip;
	unsigned char *op;
27
	const unsigned char * const in_end = in + in_len;
28 29 30
	const unsigned char * const ip_end = in + in_len - 20;
	const unsigned char *ii;
	lzo_dict_t * const dict = (lzo_dict_t *) wrkmem;
31

32 33 34 35
	op = out;
	ip = in;
	ii = ip;
	ip += ti < 4 ? 4 - ti : 0;
36 37

	for (;;) {
38 39 40
		const unsigned char *m_pos;
		size_t t, m_len, m_off;
		u32 dv;
41
literal:
42 43
		ip += 1 + ((ip - ii) >> 5);
next:
44 45
		if (unlikely(ip >= ip_end))
			break;
46 47 48 49 50 51
		dv = get_unaligned_le32(ip);
		t = ((dv * 0x1824429d) >> (32 - D_BITS)) & D_MASK;
		m_pos = in + dict[t];
		dict[t] = (lzo_dict_t) (ip - in);
		if (unlikely(dv != get_unaligned_le32(m_pos)))
			goto literal;
52

53 54 55 56
		ii -= ti;
		ti = 0;
		t = ip - ii;
		if (t != 0) {
57 58
			if (t <= 3) {
				op[-2] |= t;
59 60 61
				COPY4(op, ii);
				op += t;
			} else if (t <= 16) {
62
				*op++ = (t - 3);
63 64 65
				COPY8(op, ii);
				COPY8(op + 8, ii + 8);
				op += t;
66
			} else {
67 68 69 70
				if (t <= 18) {
					*op++ = (t - 3);
				} else {
					size_t tt = t - 18;
71
					*op++ = 0;
72 73 74 75 76
					while (unlikely(tt > 255)) {
						tt -= 255;
						*op++ = 0;
					}
					*op++ = tt;
77
				}
78 79 80 81 82 83 84 85 86 87
				do {
					COPY8(op, ii);
					COPY8(op + 8, ii + 8);
					op += 16;
					ii += 16;
					t -= 16;
				} while (t >= 16);
				if (t > 0) do {
					*op++ = *ii++;
				} while (--t > 0);
88 89 90
			}
		}

91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169
		m_len = 4;
		{
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(LZO_USE_CTZ64)
		u64 v;
		v = get_unaligned((const u64 *) (ip + m_len)) ^
		    get_unaligned((const u64 *) (m_pos + m_len));
		if (unlikely(v == 0)) {
			do {
				m_len += 8;
				v = get_unaligned((const u64 *) (ip + m_len)) ^
				    get_unaligned((const u64 *) (m_pos + m_len));
				if (unlikely(ip + m_len >= ip_end))
					goto m_len_done;
			} while (v == 0);
		}
#  if defined(__LITTLE_ENDIAN)
		m_len += (unsigned) __builtin_ctzll(v) / 8;
#  elif defined(__BIG_ENDIAN)
		m_len += (unsigned) __builtin_clzll(v) / 8;
#  else
#    error "missing endian definition"
#  endif
#elif defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(LZO_USE_CTZ32)
		u32 v;
		v = get_unaligned((const u32 *) (ip + m_len)) ^
		    get_unaligned((const u32 *) (m_pos + m_len));
		if (unlikely(v == 0)) {
			do {
				m_len += 4;
				v = get_unaligned((const u32 *) (ip + m_len)) ^
				    get_unaligned((const u32 *) (m_pos + m_len));
				if (v != 0)
					break;
				m_len += 4;
				v = get_unaligned((const u32 *) (ip + m_len)) ^
				    get_unaligned((const u32 *) (m_pos + m_len));
				if (unlikely(ip + m_len >= ip_end))
					goto m_len_done;
			} while (v == 0);
		}
#  if defined(__LITTLE_ENDIAN)
		m_len += (unsigned) __builtin_ctz(v) / 8;
#  elif defined(__BIG_ENDIAN)
		m_len += (unsigned) __builtin_clz(v) / 8;
#  else
#    error "missing endian definition"
#  endif
#else
		if (unlikely(ip[m_len] == m_pos[m_len])) {
			do {
				m_len += 1;
				if (ip[m_len] != m_pos[m_len])
					break;
				m_len += 1;
				if (ip[m_len] != m_pos[m_len])
					break;
				m_len += 1;
				if (ip[m_len] != m_pos[m_len])
					break;
				m_len += 1;
				if (ip[m_len] != m_pos[m_len])
					break;
				m_len += 1;
				if (ip[m_len] != m_pos[m_len])
					break;
				m_len += 1;
				if (ip[m_len] != m_pos[m_len])
					break;
				m_len += 1;
				if (ip[m_len] != m_pos[m_len])
					break;
				m_len += 1;
				if (unlikely(ip + m_len >= ip_end))
					goto m_len_done;
			} while (ip[m_len] == m_pos[m_len]);
		}
#endif
		}
m_len_done:
170

171 172 173 174 175 176 177 178 179 180
		m_off = ip - m_pos;
		ip += m_len;
		ii = ip;
		if (m_len <= M2_MAX_LEN && m_off <= M2_MAX_OFFSET) {
			m_off -= 1;
			*op++ = (((m_len - 1) << 5) | ((m_off & 7) << 2));
			*op++ = (m_off >> 3);
		} else if (m_off <= M3_MAX_OFFSET) {
			m_off -= 1;
			if (m_len <= M3_MAX_LEN)
181
				*op++ = (M3_MARKER | (m_len - 2));
182 183 184 185 186 187 188 189
			else {
				m_len -= M3_MAX_LEN;
				*op++ = M3_MARKER | 0;
				while (unlikely(m_len > 255)) {
					m_len -= 255;
					*op++ = 0;
				}
				*op++ = (m_len);
190
			}
191 192
			*op++ = (m_off << 2);
			*op++ = (m_off >> 6);
193
		} else {
194 195 196
			m_off -= 0x4000;
			if (m_len <= M4_MAX_LEN)
				*op++ = (M4_MARKER | ((m_off >> 11) & 8)
197
						| (m_len - 2));
198 199 200 201 202 203
			else {
				m_len -= M4_MAX_LEN;
				*op++ = (M4_MARKER | ((m_off >> 11) & 8));
				while (unlikely(m_len > 255)) {
					m_len -= 255;
					*op++ = 0;
204
				}
205
				*op++ = (m_len);
206
			}
207
			*op++ = (m_off << 2);
208 209
			*op++ = (m_off >> 6);
		}
210
		goto next;
211 212
	}
	*out_len = op - out;
213
	return in_end - (ii - ti);
214 215
}

216 217 218
int lzo1x_1_compress(const unsigned char *in, size_t in_len,
		     unsigned char *out, size_t *out_len,
		     void *wrkmem)
219
{
220
	const unsigned char *ip = in;
221
	unsigned char *op = out;
222 223
	size_t l = in_len;
	size_t t = 0;
224

225 226 227 228 229 230 231 232 233
	while (l > 20) {
		size_t ll = l <= (M4_MAX_OFFSET + 1) ? l : (M4_MAX_OFFSET + 1);
		uintptr_t ll_end = (uintptr_t) ip + ll;
		if ((ll_end + ((t + ll) >> 5)) <= ll_end)
			break;
		BUILD_BUG_ON(D_SIZE * sizeof(lzo_dict_t) > LZO1X_1_MEM_COMPRESS);
		memset(wrkmem, 0, D_SIZE * sizeof(lzo_dict_t));
		t = lzo1x_1_do_compress(ip, ll, op, out_len, t, wrkmem);
		ip += ll;
234
		op += *out_len;
235
		l  -= ll;
236
	}
237
	t += l;
238 239

	if (t > 0) {
240
		const unsigned char *ii = in + in_len - t;
241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256

		if (op == out && t <= 238) {
			*op++ = (17 + t);
		} else if (t <= 3) {
			op[-2] |= t;
		} else if (t <= 18) {
			*op++ = (t - 3);
		} else {
			size_t tt = t - 18;
			*op++ = 0;
			while (tt > 255) {
				tt -= 255;
				*op++ = 0;
			}
			*op++ = tt;
		}
257 258 259 260 261 262 263 264
		if (t >= 16) do {
			COPY8(op, ii);
			COPY8(op + 8, ii + 8);
			op += 16;
			ii += 16;
			t -= 16;
		} while (t >= 16);
		if (t > 0) do {
265 266 267 268 269 270 271 272 273 274 275 276 277 278 279
			*op++ = *ii++;
		} while (--t > 0);
	}

	*op++ = M4_MARKER | 1;
	*op++ = 0;
	*op++ = 0;

	*out_len = op - out;
	return LZO_E_OK;
}
EXPORT_SYMBOL_GPL(lzo1x_1_compress);

MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("LZO1X-1 Compressor");