version bump 0.5.9: sheetRows partial processing

- opts.sheetRows limits parsing; default (0) parses all rows - added -n mode to xlsx2csv to control number of rows - !ref will be adjusted; !fullref holds full range

version bump 0.5.9: sheetRows partial processing
- opts.sheetRows limits parsing; default (0) parses all rows - added -n mode to xlsx2csv to control number of rows - !ref will be adjusted; !fullref holds full range
5c4b5827 · SheetJS · 509f7bf9 · 5c4b5827 · 5c4b5827 · 5c4b5827
12 changed file
--- a/Makefile
+++ b/Makefile
 LIB=xlsx
 DEPS=$(wildcard bits/*.js)
 TARGET=$(LIB).js
-FMT=xlsx xlsm xlsb
+FMT=xlsx xlsm xlsb misc

 $(TARGET): $(DEPS)
 	cat $^ > $@

--- a/README.md
+++ b/README.md
@@ -79,7 +79,9 @@ The exported `read` and `readFile` functions accept an options argument:
 | cellHTML    | true    | Parse rich text and save HTML to the .h field |
 | cellNF      | false   | Save number format string to the .z field |
 | sheetStubs  | false   | Create cell objects for stub cells |
+| sheetRows   | 0       | If >0, read the first `sheetRows` rows ** |
 | bookDeps    | false   | If true, parse calculation chains |
+| bookFiles   | false   | If true, add raw files to book object ** |
 | bookProps   | false   | If true, only parse enough to get book metadata ** |
 | bookSheets  | false   | If true, only parse enough to get the sheet names |

@@ -89,13 +91,17 @@ The exported `read` and `readFile` functions accept an options argument:
 - In some cases, sheets may be parsed even if `bookSheets` is false.
 - `bookSheets` and `bookProps` combine to give both sets of information
 - `Deps` will be an empty object if `bookDeps` is falsy
+- `bookFiles` adds a `keys` array (paths in the ZIP) and a `files` hash (whose
+  keys are paths and values are objects representing the files)
+- `sheetRows-1` rows will be generated when looking at the JSON object output
+  (since the header row is counted as a row when parsing the data)

 The defaults are enumerated in bits/84_defaults.js

 ## Tested Environments

 - Node 0.8.14, 0.10.1
- - IE 6/7/8/9/10 using Base64 mode (IE10 using HTML5 mode)
+ - IE 6/7/8/9/10 using Base64 mode (IE10/11 using HTML5 mode)
 - FF 18 using Base64 or HTML5 mode
 - Chrome 24 using Base64 or HTML5 mode


--- a/bin/xlsx2csv.njs
+++ b/bin/xlsx2csv.njs
@@ -15,6 +15,7 @@ program
 	.option('-J, --raw-js', 'emit raw JS object rather than CSV (raw numbers)')
 	.option('-F, --field-sep <sep>', 'CSV field separator', ",")
 	.option('-R, --row-sep <sep>', 'CSV row separator', "\n")
+	.option('-n, --sheet-rows <num>', 'Number of rows to process (0=all rows)')
 	.option('--dev', 'development mode')
 	.option('--read', 'read but do not print out contents')
 	.option('-q, --quiet', 'quiet mode');
@@ -46,6 +47,7 @@ if(!fs.existsSync(filename)) {

 var opts = {}, wb;
 if(program.listSheets) opts.bookSheets = true;
+if(program.sheetRows) opts.sheetRows = program.sheetRows;

 if(program.dev) {
 	X.verbose = 2;

--- a/bits/31_version.js
+++ b/bits/31_version.js
-XLSX.version = '0.5.8';
+XLSX.version = '0.5.9';
--- a/bits/65_comments.js
+++ b/bits/65_comments.js
@@ -11,6 +11,8 @@ function parse_comments_xml(data, opts) {
 		if(x === "" || x.trim() === "") return;
 		var y = parsexmltag(x.match(/<comment[^>]*>/)[0]);
 		var comment = { author: y.authorId && authors[y.authorId] ? authors[y.authorId] : undefined, ref: y.ref, guid: y.guid };
+		var cell = decode_cell(y.ref);
+		if(opts.sheetRows && opts.sheetRows <= cell.r) return;
 		var textMatch = x.match(/<text>([^\u2603]*)<\/text>/m);
 		if (!textMatch || !textMatch[1]) return; // a comment may contain an empty text tag.
 		var rt = parse_si(textMatch[1]);
@@ -26,7 +28,7 @@ function parse_comments(zip, dirComments, sheets, sheetRels, opts) {
 	for(var i = 0; i != dirComments.length; ++i) {
 		var canonicalpath=dirComments[i];
 		var comments=parse_comments_xml(getzipdata(zip, canonicalpath.replace(/^\//,''), true), opts);
-		if(!comments || !comments.length) return;
+		if(!comments || !comments.length) continue;
 		// find the sheets targeted by these comments
 		var sheetNames = Object.keys(sheets);
 		for(var j = 0; j != sheetNames.length; ++j) {

--- a/bits/72_wsxml.js
+++ b/bits/72_wsxml.js
@@ -18,9 +18,9 @@ function parse_ws_xml(data, opts) {

 		/* 18.3.1.73 row CT_Row */
 		var row = parsexmltag(x.match(/<row[^>]*>/)[0]);
+		if(opts.sheetRows && opts.sheetRows < +row.r) return;
 		if(refguess.s.r > row.r - 1) refguess.s.r = row.r - 1;
 		if(refguess.e.r < row.r - 1) refguess.e.r = row.r - 1;
-
 		/* 18.3.1.4 c CT_Cell */
 		var cells = x.substr(x.indexOf('>')+1).split(/<c /);
 		cells.forEach(function(c, idx) { if(c === "" || c.trim() === "") return;
@@ -82,6 +82,18 @@ function parse_ws_xml(data, opts) {
 		});
 	});
 	if(!s["!ref"]) s["!ref"] = encode_range(refguess);
+	if(opts.sheetRows) {
+		var tmpref = decode_range(s["!ref"]);
+		if(opts.sheetRows < +tmpref.e.r) {
+			tmpref.e.r = opts.sheetRows - 1;
+			if(tmpref.e.r > refguess.e.r) tmpref.e.r = refguess.e.r;
+			if(tmpref.e.r < tmpref.s.r) tmpref.s.r = tmpref.e.r;
+			if(tmpref.e.c > refguess.e.c) tmpref.e.c = refguess.e.c;
+			if(tmpref.e.c < tmpref.s.c) tmpref.s.c = tmpref.e.c;
+			s["!fullref"] = s["!ref"];
+			s["!ref"] = encode_range(tmpref);
+		}
+	}
 	return s;
 }

--- a/bits/73_wsbin.js
+++ b/bits/73_wsbin.js
@@ -123,13 +123,18 @@ var parse_ws_bin = function(data, opts) {
 	var s = {};

 	var ref;
+	var refguess = {s: {r:1000000, c:1000000}, e: {r:0, c:0} };

-	var pass = false;
+	var pass = false, end = false;
 	var row, p, cf;
 	recordhopper(data, function(val, R) {
+		if(end) return;
 		switch(R.n) {
 			case 'BrtWsDim': ref = val; break;
-			case 'BrtRowHdr': row = val; break;
+			case 'BrtRowHdr':
+				row = val;
+				if(opts.sheetRows && opts.sheetRows <= row.r) end=true;
+				break;

 			case 'BrtFmlaBool':
 			case 'BrtFmlaError':
@@ -154,7 +159,11 @@ var parse_ws_bin = function(data, opts) {
 					if(opts.cellNF) p.z = SSF._table[cf.ifmt];
 				} catch(e) { if(opts.WTF) throw e; }
 				s[encode_cell({c:val[0].c,r:row.r})] = p;
-				break; // TODO
+				if(refguess.s.r > row.r) refguess.s.r = row.r;
+				if(refguess.s.c > val[0].c) refguess.s.c = val[0].c;
+				if(refguess.e.r < row.r) refguess.e.r = row.r;
+				if(refguess.e.c < val[0].c) refguess.e.c = val[0].c;
+				break;

 			case 'BrtCellBlank': break; // (blank cell)

@@ -192,6 +201,18 @@ var parse_ws_bin = function(data, opts) {
 		}
 	}, opts);
 	s["!ref"] = encode_range(ref);
+	if(opts.sheetRows) {
+		var tmpref = decode_range(s["!ref"]);
+		if(opts.sheetRows < +tmpref.e.r) {
+			tmpref.e.r = opts.sheetRows - 1;
+			if(tmpref.e.r > refguess.e.r) tmpref.e.r = refguess.e.r;
+			if(tmpref.e.r < tmpref.s.r) tmpref.s.r = tmpref.e.r;
+			if(tmpref.e.c > refguess.e.c) tmpref.e.c = refguess.e.c;
+			if(tmpref.e.c < tmpref.s.c) tmpref.s.c = tmpref.e.c;
+			s["!fullref"] = s["!ref"];
+			s["!ref"] = encode_range(tmpref);
+		}
+	}
 	return s;
 };

--- a/bits/84_defaults.js
+++ b/bits/84_defaults.js
@@ -5,7 +5,7 @@ function fixopts(opts) {
 		['cellFormula', true], /* emit formulae as .h */

 		['sheetStubs', false], /* emit empty cells */
-
+		['sheetRows', 0, 'n'], /* read n rows (0 = read all rows) */
 		['bookDeps', false], /* parse calculation chains */
 		['bookSheets', false], /* only try to get sheet names (no Sheets) */
 		['bookProps', false], /* only try to get properties (no Sheets) */
@@ -13,5 +13,8 @@ function fixopts(opts) {

 		['WTF', false] /* WTF mode (throws errors) */
 	];
-	defaults.forEach(function(d) { if(typeof opts[d[0]] === 'undefined') opts[d[0]] = d[1]; });
+	defaults.forEach(function(d) {
+		if(typeof opts[d[0]] === 'undefined') opts[d[0]] = d[1];
+		if(d[2] === 'n') opts[d[0]] = Number(opts[d[0]]);
+	});
 }
--- a/package.json
+++ b/package.json
 {
 	"name": "xlsx",
-	"version": "0.5.8",
+	"version": "0.5.9",
 	"author": "sheetjs",
 	"description": "XLSB / XLSX / XLSM parser",
 	"keywords": [ "xlsx", "xlsb", "xlsm", "office", "excel", "spreadsheet" ],

--- a/test.js
+++ b/test.js
@@ -5,7 +5,6 @@ describe('source',function(){it('should load',function(){XLSX=require('./');});}

 var opts = {};
 if(process.env.WTF) opts.WTF = true;
-
 var ex = [".xlsb", ".xlsm", ".xlsx"];
 if(process.env.FMTS) ex=process.env.FMTS.split(":").map(function(x){return x[0]==="."?x:"."+x;});
 var exp = ex.map(function(x){ return x + ".pending"; });
@@ -154,6 +153,42 @@ describe('options', function() {
 			var wb = XLSX.readFile(dir+'merge_cells.xlsx', {sheetStubs:true});
 			assert(typeof wb.Sheets.Merge.A2.t !== 'undefined');
 		});
+		it('should read all cells by default', function() {
+			var wb = XLSX.readFile(dir+'formula_stress_test.xlsb');
+			assert(typeof wb.Sheets.Text.A46 !== 'undefined');
+			assert(typeof wb.Sheets.Text.B26 !== 'undefined');
+			assert(typeof wb.Sheets.Text.C16 !== 'undefined');
+			assert(typeof wb.Sheets.Text.D2 !== 'undefined');
+			wb = XLSX.readFile(dir+'formula_stress_test.xlsx');
+			assert(typeof wb.Sheets.Text.A46 !== 'undefined');
+			assert(typeof wb.Sheets.Text.B26 !== 'undefined');
+			assert(typeof wb.Sheets.Text.C16 !== 'undefined');
+			assert(typeof wb.Sheets.Text.D2 !== 'undefined');
+		});
+		it('sheetRows n=20', function() {
+			var wb = XLSX.readFile(dir+'formula_stress_test.xlsx', {sheetRows:20});
+			assert(typeof wb.Sheets.Text.A46 === 'undefined');
+			assert(typeof wb.Sheets.Text.B26 === 'undefined');
+			assert(typeof wb.Sheets.Text.C16 !== 'undefined');
+			assert(typeof wb.Sheets.Text.D2 !== 'undefined');
+			wb = XLSX.readFile(dir+'formula_stress_test.xlsb', {sheetRows:20});
+			assert(typeof wb.Sheets.Text.A46 === 'undefined');
+			assert(typeof wb.Sheets.Text.B26 === 'undefined');
+			assert(typeof wb.Sheets.Text.C16 !== 'undefined');
+			assert(typeof wb.Sheets.Text.D2 !== 'undefined');
+		});
+		it('sheetRows n=10', function() {
+			var wb = XLSX.readFile(dir+'formula_stress_test.xlsb', {sheetRows:10});
+			assert(typeof wb.Sheets.Text.A46 === 'undefined');
+			assert(typeof wb.Sheets.Text.B26 === 'undefined');
+			assert(typeof wb.Sheets.Text.C16 === 'undefined');
+			assert(typeof wb.Sheets.Text.D2 !== 'undefined');
+			wb = XLSX.readFile(dir+'formula_stress_test.xlsx', {sheetRows:10});
+			assert(typeof wb.Sheets.Text.A46 === 'undefined');
+			assert(typeof wb.Sheets.Text.B26 === 'undefined');
+			assert(typeof wb.Sheets.Text.C16 === 'undefined');
+			assert(typeof wb.Sheets.Text.D2 !== 'undefined');
+		});
 	});
 	describe('book', function() {
 		it('bookSheets should not generate sheets', function() {
@@ -224,7 +259,7 @@ describe('features', function() {
 		});
 	});

-	describe('should have core properties and custom properties parsed', function() {
+	describe('should parse core properties and custom properties', function() {
 		var wb;
 		before(function() {
 			XLSX = require('./');
@@ -242,7 +277,7 @@ describe('features', function() {
 		});
 	});

-	describe('should parse cells with date type', function() {
+	describe('should parse cells with date type (XLSX/XLSB)', function() {
 		var wb, ws;
 		before(function() {
 			XLSX = require('./');
@@ -255,4 +290,29 @@ describe('features', function() {
 			assert.equal(sheet[3]['てすと'], '2/14/14');
 		});
 	});
+
+	describe('sheetRows', function() {
+		it('should use original range if not set', function() {
+			var wb = XLSX.readFile(dir+'formula_stress_test.xlsb');
+			assert.equal(wb.Sheets.Text["!ref"],"A1:F49");
+			wb = XLSX.readFile(dir+'formula_stress_test.xlsx');
+			assert.equal(wb.Sheets.Text["!ref"],"A1:F49");
+		});
+		it('should adjust range if set', function() {
+			var wb = XLSX.readFile(dir+'formula_stress_test.xlsx', {sheetRows:10});
+			assert.equal(wb.Sheets.Text["!fullref"],"A1:F49");
+			assert.equal(wb.Sheets.Text["!ref"],"A1:F10");
+			wb = XLSX.readFile(dir+'formula_stress_test.xlsb', {sheetRows:10});
+			assert.equal(wb.Sheets.Text["!fullref"],"A1:F49");
+			assert.equal(wb.Sheets.Text["!ref"],"A1:F10");
+		});
+		it('should not generate comment cells', function() {
+			var wb = XLSX.readFile(dir+'comments_stress_test.xlsx', {sheetRows:10});
+			assert.equal(wb.Sheets.Sheet7["!fullref"],"A1:N34");
+			assert.equal(wb.Sheets.Sheet7["!ref"],"A1:A1");
+			wb = XLSX.readFile(dir+'comments_stress_test.xlsb', {sheetRows:10});
+			assert.equal(wb.Sheets.Sheet7["!fullref"],"A1:N34");
+			assert.equal(wb.Sheets.Sheet7["!ref"],"A1:A1");
+		});
+	});
 });
--- a/tests.lst
+++ b/tests.lst
@@ -8,6 +8,7 @@ named_ranges_2011.xlsb
 number_format.xlsb
 rich_text_stress.xlsb
 time_stress_test_1.xlsb
+xlsx-stream-d-date-cell.xlsb
 LONumbers-2010.xlsx
 LONumbers-2011.xlsx
 LONumbers.xlsx
@@ -233,6 +234,7 @@ xlrd_test_comments_excel.xlsx
 xlrd_test_comments_gdocs.xlsx
 xlrd_text_bar.xlsx
 חישוב_נקודות_זיכוי.xlsx
+xlsx-stream-d-date-cell.xlsx
 apachepoi_45431.xlsm
 apachepoi_47026.xlsm
 apachepoi_47089.xlsm

--- a/xlsx.js
+++ b/xlsx.js