提交 5c4b5827 编写于 作者: S SheetJS

version bump 0.5.9: sheetRows partial processing

- opts.sheetRows limits parsing; default (0) parses all rows
- added -n mode to xlsx2csv to control number of rows
- !ref will be adjusted; !fullref holds full range
上级 509f7bf9
LIB=xlsx
DEPS=$(wildcard bits/*.js)
TARGET=$(LIB).js
FMT=xlsx xlsm xlsb
FMT=xlsx xlsm xlsb misc
$(TARGET): $(DEPS)
cat $^ > $@
......
......@@ -79,7 +79,9 @@ The exported `read` and `readFile` functions accept an options argument:
| cellHTML | true | Parse rich text and save HTML to the .h field |
| cellNF | false | Save number format string to the .z field |
| sheetStubs | false | Create cell objects for stub cells |
| sheetRows | 0 | If >0, read the first `sheetRows` rows ** |
| bookDeps | false | If true, parse calculation chains |
| bookFiles | false | If true, add raw files to book object ** |
| bookProps | false | If true, only parse enough to get book metadata ** |
| bookSheets | false | If true, only parse enough to get the sheet names |
......@@ -89,13 +91,17 @@ The exported `read` and `readFile` functions accept an options argument:
- In some cases, sheets may be parsed even if `bookSheets` is false.
- `bookSheets` and `bookProps` combine to give both sets of information
- `Deps` will be an empty object if `bookDeps` is falsy
- `bookFiles` adds a `keys` array (paths in the ZIP) and a `files` hash (whose
keys are paths and values are objects representing the files)
- `sheetRows-1` rows will be generated when looking at the JSON object output
(since the header row is counted as a row when parsing the data)
The defaults are enumerated in bits/84_defaults.js
## Tested Environments
- Node 0.8.14, 0.10.1
- IE 6/7/8/9/10 using Base64 mode (IE10 using HTML5 mode)
- IE 6/7/8/9/10 using Base64 mode (IE10/11 using HTML5 mode)
- FF 18 using Base64 or HTML5 mode
- Chrome 24 using Base64 or HTML5 mode
......
......@@ -15,6 +15,7 @@ program
.option('-J, --raw-js', 'emit raw JS object rather than CSV (raw numbers)')
.option('-F, --field-sep <sep>', 'CSV field separator', ",")
.option('-R, --row-sep <sep>', 'CSV row separator', "\n")
.option('-n, --sheet-rows <num>', 'Number of rows to process (0=all rows)')
.option('--dev', 'development mode')
.option('--read', 'read but do not print out contents')
.option('-q, --quiet', 'quiet mode');
......@@ -46,6 +47,7 @@ if(!fs.existsSync(filename)) {
var opts = {}, wb;
if(program.listSheets) opts.bookSheets = true;
if(program.sheetRows) opts.sheetRows = program.sheetRows;
if(program.dev) {
X.verbose = 2;
......
XLSX.version = '0.5.8';
XLSX.version = '0.5.9';
......@@ -11,6 +11,8 @@ function parse_comments_xml(data, opts) {
if(x === "" || x.trim() === "") return;
var y = parsexmltag(x.match(/<comment[^>]*>/)[0]);
var comment = { author: y.authorId && authors[y.authorId] ? authors[y.authorId] : undefined, ref: y.ref, guid: y.guid };
var cell = decode_cell(y.ref);
if(opts.sheetRows && opts.sheetRows <= cell.r) return;
var textMatch = x.match(/<text>([^\u2603]*)<\/text>/m);
if (!textMatch || !textMatch[1]) return; // a comment may contain an empty text tag.
var rt = parse_si(textMatch[1]);
......@@ -26,7 +28,7 @@ function parse_comments(zip, dirComments, sheets, sheetRels, opts) {
for(var i = 0; i != dirComments.length; ++i) {
var canonicalpath=dirComments[i];
var comments=parse_comments_xml(getzipdata(zip, canonicalpath.replace(/^\//,''), true), opts);
if(!comments || !comments.length) return;
if(!comments || !comments.length) continue;
// find the sheets targeted by these comments
var sheetNames = Object.keys(sheets);
for(var j = 0; j != sheetNames.length; ++j) {
......
......@@ -18,9 +18,9 @@ function parse_ws_xml(data, opts) {
/* 18.3.1.73 row CT_Row */
var row = parsexmltag(x.match(/<row[^>]*>/)[0]);
if(opts.sheetRows && opts.sheetRows < +row.r) return;
if(refguess.s.r > row.r - 1) refguess.s.r = row.r - 1;
if(refguess.e.r < row.r - 1) refguess.e.r = row.r - 1;
/* 18.3.1.4 c CT_Cell */
var cells = x.substr(x.indexOf('>')+1).split(/<c /);
cells.forEach(function(c, idx) { if(c === "" || c.trim() === "") return;
......@@ -82,6 +82,18 @@ function parse_ws_xml(data, opts) {
});
});
if(!s["!ref"]) s["!ref"] = encode_range(refguess);
if(opts.sheetRows) {
var tmpref = decode_range(s["!ref"]);
if(opts.sheetRows < +tmpref.e.r) {
tmpref.e.r = opts.sheetRows - 1;
if(tmpref.e.r > refguess.e.r) tmpref.e.r = refguess.e.r;
if(tmpref.e.r < tmpref.s.r) tmpref.s.r = tmpref.e.r;
if(tmpref.e.c > refguess.e.c) tmpref.e.c = refguess.e.c;
if(tmpref.e.c < tmpref.s.c) tmpref.s.c = tmpref.e.c;
s["!fullref"] = s["!ref"];
s["!ref"] = encode_range(tmpref);
}
}
return s;
}
......@@ -123,13 +123,18 @@ var parse_ws_bin = function(data, opts) {
var s = {};
var ref;
var refguess = {s: {r:1000000, c:1000000}, e: {r:0, c:0} };
var pass = false;
var pass = false, end = false;
var row, p, cf;
recordhopper(data, function(val, R) {
if(end) return;
switch(R.n) {
case 'BrtWsDim': ref = val; break;
case 'BrtRowHdr': row = val; break;
case 'BrtRowHdr':
row = val;
if(opts.sheetRows && opts.sheetRows <= row.r) end=true;
break;
case 'BrtFmlaBool':
case 'BrtFmlaError':
......@@ -154,7 +159,11 @@ var parse_ws_bin = function(data, opts) {
if(opts.cellNF) p.z = SSF._table[cf.ifmt];
} catch(e) { if(opts.WTF) throw e; }
s[encode_cell({c:val[0].c,r:row.r})] = p;
break; // TODO
if(refguess.s.r > row.r) refguess.s.r = row.r;
if(refguess.s.c > val[0].c) refguess.s.c = val[0].c;
if(refguess.e.r < row.r) refguess.e.r = row.r;
if(refguess.e.c < val[0].c) refguess.e.c = val[0].c;
break;
case 'BrtCellBlank': break; // (blank cell)
......@@ -192,6 +201,18 @@ var parse_ws_bin = function(data, opts) {
}
}, opts);
s["!ref"] = encode_range(ref);
if(opts.sheetRows) {
var tmpref = decode_range(s["!ref"]);
if(opts.sheetRows < +tmpref.e.r) {
tmpref.e.r = opts.sheetRows - 1;
if(tmpref.e.r > refguess.e.r) tmpref.e.r = refguess.e.r;
if(tmpref.e.r < tmpref.s.r) tmpref.s.r = tmpref.e.r;
if(tmpref.e.c > refguess.e.c) tmpref.e.c = refguess.e.c;
if(tmpref.e.c < tmpref.s.c) tmpref.s.c = tmpref.e.c;
s["!fullref"] = s["!ref"];
s["!ref"] = encode_range(tmpref);
}
}
return s;
};
......@@ -5,7 +5,7 @@ function fixopts(opts) {
['cellFormula', true], /* emit formulae as .h */
['sheetStubs', false], /* emit empty cells */
['sheetRows', 0, 'n'], /* read n rows (0 = read all rows) */
['bookDeps', false], /* parse calculation chains */
['bookSheets', false], /* only try to get sheet names (no Sheets) */
['bookProps', false], /* only try to get properties (no Sheets) */
......@@ -13,5 +13,8 @@ function fixopts(opts) {
['WTF', false] /* WTF mode (throws errors) */
];
defaults.forEach(function(d) { if(typeof opts[d[0]] === 'undefined') opts[d[0]] = d[1]; });
defaults.forEach(function(d) {
if(typeof opts[d[0]] === 'undefined') opts[d[0]] = d[1];
if(d[2] === 'n') opts[d[0]] = Number(opts[d[0]]);
});
}
{
"name": "xlsx",
"version": "0.5.8",
"version": "0.5.9",
"author": "sheetjs",
"description": "XLSB / XLSX / XLSM parser",
"keywords": [ "xlsx", "xlsb", "xlsm", "office", "excel", "spreadsheet" ],
......
......@@ -5,7 +5,6 @@ describe('source',function(){it('should load',function(){XLSX=require('./');});}
var opts = {};
if(process.env.WTF) opts.WTF = true;
var ex = [".xlsb", ".xlsm", ".xlsx"];
if(process.env.FMTS) ex=process.env.FMTS.split(":").map(function(x){return x[0]==="."?x:"."+x;});
var exp = ex.map(function(x){ return x + ".pending"; });
......@@ -154,6 +153,42 @@ describe('options', function() {
var wb = XLSX.readFile(dir+'merge_cells.xlsx', {sheetStubs:true});
assert(typeof wb.Sheets.Merge.A2.t !== 'undefined');
});
it('should read all cells by default', function() {
var wb = XLSX.readFile(dir+'formula_stress_test.xlsb');
assert(typeof wb.Sheets.Text.A46 !== 'undefined');
assert(typeof wb.Sheets.Text.B26 !== 'undefined');
assert(typeof wb.Sheets.Text.C16 !== 'undefined');
assert(typeof wb.Sheets.Text.D2 !== 'undefined');
wb = XLSX.readFile(dir+'formula_stress_test.xlsx');
assert(typeof wb.Sheets.Text.A46 !== 'undefined');
assert(typeof wb.Sheets.Text.B26 !== 'undefined');
assert(typeof wb.Sheets.Text.C16 !== 'undefined');
assert(typeof wb.Sheets.Text.D2 !== 'undefined');
});
it('sheetRows n=20', function() {
var wb = XLSX.readFile(dir+'formula_stress_test.xlsx', {sheetRows:20});
assert(typeof wb.Sheets.Text.A46 === 'undefined');
assert(typeof wb.Sheets.Text.B26 === 'undefined');
assert(typeof wb.Sheets.Text.C16 !== 'undefined');
assert(typeof wb.Sheets.Text.D2 !== 'undefined');
wb = XLSX.readFile(dir+'formula_stress_test.xlsb', {sheetRows:20});
assert(typeof wb.Sheets.Text.A46 === 'undefined');
assert(typeof wb.Sheets.Text.B26 === 'undefined');
assert(typeof wb.Sheets.Text.C16 !== 'undefined');
assert(typeof wb.Sheets.Text.D2 !== 'undefined');
});
it('sheetRows n=10', function() {
var wb = XLSX.readFile(dir+'formula_stress_test.xlsb', {sheetRows:10});
assert(typeof wb.Sheets.Text.A46 === 'undefined');
assert(typeof wb.Sheets.Text.B26 === 'undefined');
assert(typeof wb.Sheets.Text.C16 === 'undefined');
assert(typeof wb.Sheets.Text.D2 !== 'undefined');
wb = XLSX.readFile(dir+'formula_stress_test.xlsx', {sheetRows:10});
assert(typeof wb.Sheets.Text.A46 === 'undefined');
assert(typeof wb.Sheets.Text.B26 === 'undefined');
assert(typeof wb.Sheets.Text.C16 === 'undefined');
assert(typeof wb.Sheets.Text.D2 !== 'undefined');
});
});
describe('book', function() {
it('bookSheets should not generate sheets', function() {
......@@ -224,7 +259,7 @@ describe('features', function() {
});
});
describe('should have core properties and custom properties parsed', function() {
describe('should parse core properties and custom properties', function() {
var wb;
before(function() {
XLSX = require('./');
......@@ -242,7 +277,7 @@ describe('features', function() {
});
});
describe('should parse cells with date type', function() {
describe('should parse cells with date type (XLSX/XLSB)', function() {
var wb, ws;
before(function() {
XLSX = require('./');
......@@ -255,4 +290,29 @@ describe('features', function() {
assert.equal(sheet[3]['てすと'], '2/14/14');
});
});
describe('sheetRows', function() {
it('should use original range if not set', function() {
var wb = XLSX.readFile(dir+'formula_stress_test.xlsb');
assert.equal(wb.Sheets.Text["!ref"],"A1:F49");
wb = XLSX.readFile(dir+'formula_stress_test.xlsx');
assert.equal(wb.Sheets.Text["!ref"],"A1:F49");
});
it('should adjust range if set', function() {
var wb = XLSX.readFile(dir+'formula_stress_test.xlsx', {sheetRows:10});
assert.equal(wb.Sheets.Text["!fullref"],"A1:F49");
assert.equal(wb.Sheets.Text["!ref"],"A1:F10");
wb = XLSX.readFile(dir+'formula_stress_test.xlsb', {sheetRows:10});
assert.equal(wb.Sheets.Text["!fullref"],"A1:F49");
assert.equal(wb.Sheets.Text["!ref"],"A1:F10");
});
it('should not generate comment cells', function() {
var wb = XLSX.readFile(dir+'comments_stress_test.xlsx', {sheetRows:10});
assert.equal(wb.Sheets.Sheet7["!fullref"],"A1:N34");
assert.equal(wb.Sheets.Sheet7["!ref"],"A1:A1");
wb = XLSX.readFile(dir+'comments_stress_test.xlsb', {sheetRows:10});
assert.equal(wb.Sheets.Sheet7["!fullref"],"A1:N34");
assert.equal(wb.Sheets.Sheet7["!ref"],"A1:A1");
});
});
});
......@@ -8,6 +8,7 @@ named_ranges_2011.xlsb
number_format.xlsb
rich_text_stress.xlsb
time_stress_test_1.xlsb
xlsx-stream-d-date-cell.xlsb
LONumbers-2010.xlsx
LONumbers-2011.xlsx
LONumbers.xlsx
......@@ -233,6 +234,7 @@ xlrd_test_comments_excel.xlsx
xlrd_test_comments_gdocs.xlsx
xlrd_text_bar.xlsx
חישוב_נקודות_זיכוי.xlsx
xlsx-stream-d-date-cell.xlsx
apachepoi_45431.xlsm
apachepoi_47026.xlsm
apachepoi_47089.xlsm
......
此差异由.gitattributes 抑制。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册