XlsSaxAnalyser.java 8.9 KB
Newer Older
1
package com.alibaba.excel.analysis.v03;
J
update  
jipengfei.jpf 已提交
2

Z
zhuangjiaju 已提交
3 4 5
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
庄家钜's avatar
庄家钜 已提交
6
import java.util.LinkedHashMap;
Z
zhuangjiaju 已提交
7
import java.util.List;
Z
zhuangjiaju 已提交
8 9
import java.util.Map;
import java.util.TreeMap;
Z
zhuangjiaju 已提交
10 11 12 13 14 15 16 17

import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder;
import org.apache.poi.hssf.eventusermodel.FormatTrackingHSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
import org.apache.poi.hssf.eventusermodel.HSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFRequest;
import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener;
import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord;
18 19
import org.apache.poi.hssf.record.BOFRecord;
import org.apache.poi.hssf.record.BoundSheetRecord;
Z
zhuangjiaju 已提交
20 21 22
import org.apache.poi.hssf.record.Record;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
23 24
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Z
zhuangjiaju 已提交
25

庄家钜's avatar
庄家钜 已提交
26
import com.alibaba.excel.analysis.ExcelReadExecutor;
Z
zhuangjiaju 已提交
27
import com.alibaba.excel.analysis.v03.handlers.BlankOrErrorRecordHandler;
28
import com.alibaba.excel.analysis.v03.handlers.BofRecordHandler;
Z
zhuangjiaju 已提交
29 30 31 32 33
import com.alibaba.excel.analysis.v03.handlers.FormulaRecordHandler;
import com.alibaba.excel.analysis.v03.handlers.LabelRecordHandler;
import com.alibaba.excel.analysis.v03.handlers.MissingCellDummyRecordHandler;
import com.alibaba.excel.analysis.v03.handlers.NoteRecordHandler;
import com.alibaba.excel.analysis.v03.handlers.NumberRecordHandler;
34 35
import com.alibaba.excel.analysis.v03.handlers.RkRecordHandler;
import com.alibaba.excel.analysis.v03.handlers.SstRecordHandler;
36
import com.alibaba.excel.context.AnalysisContext;
Z
zhuangjiaju 已提交
37
import com.alibaba.excel.enums.CellDataTypeEnum;
38
import com.alibaba.excel.exception.ExcelAnalysisException;
Z
zhuangjiaju 已提交
39
import com.alibaba.excel.metadata.CellData;
40 41
import com.alibaba.excel.read.listener.event.EachRowAnalysisFinishEvent;
import com.alibaba.excel.read.metadata.ReadSheet;
Z
zhuangjiaju 已提交
42
import com.alibaba.excel.read.metadata.holder.ReadRowHolder;
clevertension's avatar
clevertension 已提交
43
import com.alibaba.excel.util.CollectionUtils;
44

J
update  
jipengfei.jpf 已提交
45
/**
Z
zhuangjiaju 已提交
46 47 48 49 50 51 52 53 54 55 56
 * /** * A text extractor for Excel files. *
 * <p>
 * * Returns the textual content of the file, suitable for * indexing by something like Lucene, but not really *
 * intended for display to the user. *
 * </p>
 * *
 * <p>
 * * To turn an excel file into a CSV or similar, then see * the XLS2CSVmra example *
 * </p>
 * * * @see <a href=
 * "http://svn.apache.org/repos/asf/poi/trunk/src/examples/src/org/apache/poi/hssf/eventusermodel/examples/XLS2CSVmra.java">XLS2CSVmra</a>
J
update  
jipengfei.jpf 已提交
57 58 59
 *
 * @author jipengfei
 */
庄家钜's avatar
庄家钜 已提交
60
public class XlsSaxAnalyser implements HSSFListener, ExcelReadExecutor {
61 62
    private static final Logger LOGGER = LoggerFactory.getLogger(XlsSaxAnalyser.class);

Z
zhuangjiaju 已提交
63
    private POIFSFileSystem poifsFileSystem;
庄家钜's avatar
庄家钜 已提交
64 65
    private Boolean readAll;
    private List<ReadSheet> readSheetList;
clevertension's avatar
clevertension 已提交
66 67 68 69 70 71 72
    private int lastRowNumber;
    private int lastColumnNumber;
    /**
     * For parsing Formulas
     */
    private EventWorkbookBuilder.SheetRecordCollectingListener workbookBuildingListener;
    private FormatTrackingHSSFListener formatListener;
Z
zhuangjiaju 已提交
73
    private Map<Integer, CellData> records;
74
    private List<ReadSheet> sheets;
clevertension's avatar
clevertension 已提交
75 76
    private HSSFWorkbook stubWorkbook;
    private List<XlsRecordHandler> recordHandlers = new ArrayList<XlsRecordHandler>();
77
    private AnalysisContext analysisContext;
Z
zhuangjiaju 已提交
78

79
    public XlsSaxAnalyser(AnalysisContext context, POIFSFileSystem poifsFileSystem) {
J
update  
jipengfei.jpf 已提交
80
        this.analysisContext = context;
庄家钜's avatar
庄家钜 已提交
81
        this.records = new LinkedHashMap<Integer, CellData>();
Z
zhuangjiaju 已提交
82
        this.poifsFileSystem = poifsFileSystem;
83
        analysisContext.readWorkbookHolder().setPoifsFileSystem(poifsFileSystem);
J
update  
jipengfei.jpf 已提交
84 85
    }

86
    @Override
87
    public List<ReadSheet> sheetList() {
88 89 90 91 92
        if (sheets == null) {
            LOGGER.warn("Getting the 'sheetList' before reading will cause the file to be read twice.");
            XlsListSheetListener xlsListSheetListener = new XlsListSheetListener(analysisContext, poifsFileSystem);
            sheets = xlsListSheetListener.getSheetList();
        }
J
update  
jipengfei.jpf 已提交
93 94 95 96
        return sheets;
    }

    @Override
庄家钜's avatar
庄家钜 已提交
97 98 99
    public void execute(List<ReadSheet> readSheetList, Boolean readAll) {
        this.readAll = readAll;
        this.readSheetList = readSheetList;
J
update  
jipengfei.jpf 已提交
100 101
        MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(this);
        formatListener = new FormatTrackingHSSFListener(listener);
clevertension's avatar
clevertension 已提交
102 103 104 105 106
        workbookBuildingListener = new EventWorkbookBuilder.SheetRecordCollectingListener(formatListener);
        if (workbookBuildingListener != null && stubWorkbook == null) {
            stubWorkbook = workbookBuildingListener.getStubHSSFWorkbook();
        }
        init();
J
update  
jipengfei.jpf 已提交
107 108
        HSSFEventFactory factory = new HSSFEventFactory();
        HSSFRequest request = new HSSFRequest();
庄家钜's avatar
庄家钜 已提交
109
        request.addListenerForAllRecords(formatListener);
J
update  
jipengfei.jpf 已提交
110
        try {
Z
zhuangjiaju 已提交
111
            factory.processWorkbookEvents(request, poifsFileSystem);
J
update  
jipengfei.jpf 已提交
112 113 114
        } catch (IOException e) {
            throw new ExcelAnalysisException(e);
        }
Z
zhuangjiaju 已提交
115 116 117 118
        // Sometimes tables lack the end record of the last column
        if (!records.isEmpty()) {
            endRow();
        }
J
update  
jipengfei.jpf 已提交
119 120 121 122 123
    }

    private void init() {
        lastRowNumber = 0;
        lastColumnNumber = 0;
Z
zhuangjiaju 已提交
124
        records = new TreeMap<Integer, CellData>();
clevertension's avatar
clevertension 已提交
125
        buildXlsRecordHandlers();
J
update  
jipengfei.jpf 已提交
126 127
    }

128
    @Override
J
update  
jipengfei.jpf 已提交
129
    public void processRecord(Record record) {
130 131 132 133
        // Not data from the current sheet
        if (ignoreRecord(record)) {
            return;
        }
J
update  
jipengfei.jpf 已提交
134 135
        int thisRow = -1;
        int thisColumn = -1;
Z
zhuangjiaju 已提交
136
        CellData cellData = null;
Z
zhuangjiaju 已提交
137
        for (XlsRecordHandler handler : this.recordHandlers) {
clevertension's avatar
clevertension 已提交
138 139 140 141
            if (handler.support(record)) {
                handler.processRecord(record);
                thisRow = handler.getRow();
                thisColumn = handler.getColumn();
Z
zhuangjiaju 已提交
142 143
                cellData = handler.getCellData();
                if (cellData != null) {
144
                    cellData.checkEmpty();
庄家钜's avatar
庄家钜 已提交
145 146 147
                    if (CellDataTypeEnum.EMPTY != cellData.getType()) {
                        records.put(thisColumn, cellData);
                    }
Z
zhuangjiaju 已提交
148
                }
J
update  
jipengfei.jpf 已提交
149
                break;
clevertension's avatar
clevertension 已提交
150
            }
J
update  
jipengfei.jpf 已提交
151 152
        }
        // If we got something to print out, do so
庄家钜's avatar
庄家钜 已提交
153 154 155
        if (cellData != null && analysisContext.currentReadHolder().globalConfiguration().getAutoTrim()
            && CellDataTypeEnum.STRING == cellData.getType()) {
            cellData.setStringValue(cellData.getStringValue().trim());
J
update  
jipengfei.jpf 已提交
156 157
        }

clevertension's avatar
clevertension 已提交
158 159 160 161 162
        // Handle new row
        if (thisRow != -1 && thisRow != lastRowNumber) {
            lastColumnNumber = -1;
        }

J
update  
jipengfei.jpf 已提交
163 164 165 166 167 168 169 170
        // Update column and row count
        if (thisRow > -1) {
            lastRowNumber = thisRow;
        }
        if (thisColumn > -1) {
            lastColumnNumber = thisColumn;
        }

clevertension's avatar
clevertension 已提交
171 172 173
        processLastCellOfRow(record);
    }

174 175 176 177 178
    private boolean ignoreRecord(Record record) {
        return analysisContext.readWorkbookHolder().getIgnoreRecord03() && record.getSid() != BoundSheetRecord.sid
            && record.getSid() != BOFRecord.sid;
    }

clevertension's avatar
clevertension 已提交
179
    private void processLastCellOfRow(Record record) {
J
update  
jipengfei.jpf 已提交
180 181
        // Handle end of row
        if (record instanceof LastCellOfRowDummyRecord) {
Z
zhuangjiaju 已提交
182 183 184 185 186 187 188 189
            endRow();
        }
    }

    private void endRow() {
        if (lastColumnNumber == -1) {
            lastColumnNumber = 0;
        }
庄家钜's avatar
庄家钜 已提交
190 191 192
        analysisContext.readRowHolder(
            new ReadRowHolder(lastRowNumber, analysisContext.readSheetHolder().getGlobalConfiguration()));
        analysisContext.readSheetHolder().notifyEndOneRow(new EachRowAnalysisFinishEvent(records), analysisContext);
Z
zhuangjiaju 已提交
193 194
        records.clear();
        lastColumnNumber = -1;
J
update  
jipengfei.jpf 已提交
195
    }
clevertension's avatar
clevertension 已提交
196 197 198 199

    private void buildXlsRecordHandlers() {
        if (CollectionUtils.isEmpty(recordHandlers)) {
            recordHandlers.add(new BlankOrErrorRecordHandler());
200 201 202
            // The table has been counted and there are no duplicate statistics
            if (sheets == null) {
                sheets = new ArrayList<ReadSheet>();
庄家钜's avatar
庄家钜 已提交
203
                recordHandlers.add(new BofRecordHandler(analysisContext, sheets, false, true));
204
            } else {
庄家钜's avatar
庄家钜 已提交
205
                recordHandlers.add(new BofRecordHandler(analysisContext, sheets, true, true));
206
            }
clevertension's avatar
clevertension 已提交
207 208 209 210
            recordHandlers.add(new FormulaRecordHandler(stubWorkbook, formatListener));
            recordHandlers.add(new LabelRecordHandler());
            recordHandlers.add(new NoteRecordHandler());
            recordHandlers.add(new NumberRecordHandler(formatListener));
211 212
            recordHandlers.add(new RkRecordHandler());
            recordHandlers.add(new SstRecordHandler());
clevertension's avatar
clevertension 已提交
213 214 215 216
            recordHandlers.add(new MissingCellDummyRecordHandler());
            Collections.sort(recordHandlers);
        }

Z
zhuangjiaju 已提交
217
        for (XlsRecordHandler x : recordHandlers) {
clevertension's avatar
clevertension 已提交
218
            x.init();
庄家钜's avatar
庄家钜 已提交
219 220 221 222
            if (x instanceof BofRecordHandler) {
                BofRecordHandler bofRecordHandler = (BofRecordHandler)x;
                bofRecordHandler.init(readSheetList, readAll);
            }
clevertension's avatar
clevertension 已提交
223 224
        }
    }
J
update  
jipengfei.jpf 已提交
225
}