XlsSaxAnalyser.java 7.7 KB
Newer Older
1
package com.alibaba.excel.analysis.v03;
J
update  
jipengfei.jpf 已提交
2

Z
zhuangjiaju 已提交
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder;
import org.apache.poi.hssf.eventusermodel.FormatTrackingHSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
import org.apache.poi.hssf.eventusermodel.HSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFRequest;
import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener;
import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord;
import org.apache.poi.hssf.record.Record;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;

import com.alibaba.excel.analysis.ExcelExecutor;
import com.alibaba.excel.analysis.v03.handlers.BlankOrErrorRecordHandler;
21
import com.alibaba.excel.analysis.v03.handlers.BofRecordHandler;
Z
zhuangjiaju 已提交
22 23 24 25 26
import com.alibaba.excel.analysis.v03.handlers.FormulaRecordHandler;
import com.alibaba.excel.analysis.v03.handlers.LabelRecordHandler;
import com.alibaba.excel.analysis.v03.handlers.MissingCellDummyRecordHandler;
import com.alibaba.excel.analysis.v03.handlers.NoteRecordHandler;
import com.alibaba.excel.analysis.v03.handlers.NumberRecordHandler;
27 28
import com.alibaba.excel.analysis.v03.handlers.RkRecordHandler;
import com.alibaba.excel.analysis.v03.handlers.SstRecordHandler;
29
import com.alibaba.excel.context.AnalysisContext;
Z
zhuangjiaju 已提交
30
import com.alibaba.excel.enums.CellDataTypeEnum;
31
import com.alibaba.excel.exception.ExcelAnalysisException;
Z
zhuangjiaju 已提交
32
import com.alibaba.excel.metadata.CellData;
33 34
import com.alibaba.excel.read.listener.event.EachRowAnalysisFinishEvent;
import com.alibaba.excel.read.metadata.ReadSheet;
Z
zhuangjiaju 已提交
35
import com.alibaba.excel.read.metadata.holder.ReadRowHolder;
36
import com.alibaba.excel.read.metadata.holder.ReadWorkbookHolder;
clevertension's avatar
clevertension 已提交
37
import com.alibaba.excel.util.CollectionUtils;
38

J
update  
jipengfei.jpf 已提交
39
/**
Z
zhuangjiaju 已提交
40 41 42 43 44 45 46 47 48 49 50
 * /** * A text extractor for Excel files. *
 * <p>
 * * Returns the textual content of the file, suitable for * indexing by something like Lucene, but not really *
 * intended for display to the user. *
 * </p>
 * *
 * <p>
 * * To turn an excel file into a CSV or similar, then see * the XLS2CSVmra example *
 * </p>
 * * * @see <a href=
 * "http://svn.apache.org/repos/asf/poi/trunk/src/examples/src/org/apache/poi/hssf/eventusermodel/examples/XLS2CSVmra.java">XLS2CSVmra</a>
J
update  
jipengfei.jpf 已提交
51 52 53
 *
 * @author jipengfei
 */
Z
zhuangjiaju 已提交
54
public class XlsSaxAnalyser implements HSSFListener, ExcelExecutor {
clevertension's avatar
clevertension 已提交
55 56 57 58
    private boolean outputFormulaValues = true;
    private POIFSFileSystem fs;
    private int lastRowNumber;
    private int lastColumnNumber;
Z
zhuangjiaju 已提交
59
    private boolean notAllEmpty = false;
clevertension's avatar
clevertension 已提交
60 61 62 63 64
    /**
     * For parsing Formulas
     */
    private EventWorkbookBuilder.SheetRecordCollectingListener workbookBuildingListener;
    private FormatTrackingHSSFListener formatListener;
Z
zhuangjiaju 已提交
65
    private List<CellData> records;
66
    private List<ReadSheet> sheets = new ArrayList<ReadSheet>();
clevertension's avatar
clevertension 已提交
67 68
    private HSSFWorkbook stubWorkbook;
    private List<XlsRecordHandler> recordHandlers = new ArrayList<XlsRecordHandler>();
69
    private AnalysisContext analysisContext;
Z
zhuangjiaju 已提交
70

71
    public XlsSaxAnalyser(AnalysisContext context) throws IOException {
J
update  
jipengfei.jpf 已提交
72
        this.analysisContext = context;
Z
zhuangjiaju 已提交
73
        this.records = new ArrayList<CellData>();
74 75 76 77 78 79
        ReadWorkbookHolder readWorkbookHolder = analysisContext.readWorkbookHolder();
        if (readWorkbookHolder.getFile() != null) {
            this.fs = new POIFSFileSystem(readWorkbookHolder.getFile());
        } else {
            this.fs = new POIFSFileSystem(readWorkbookHolder.getInputStream());
        }
Z
zhuangjiaju 已提交
80

J
update  
jipengfei.jpf 已提交
81 82
    }

83
    @Override
84
    public List<ReadSheet> sheetList() {
J
update  
jipengfei.jpf 已提交
85 86 87 88 89 90 91
        return sheets;
    }

    @Override
    public void execute() {
        MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(this);
        formatListener = new FormatTrackingHSSFListener(listener);
clevertension's avatar
clevertension 已提交
92 93 94 95 96 97
        workbookBuildingListener = new EventWorkbookBuilder.SheetRecordCollectingListener(formatListener);
        if (workbookBuildingListener != null && stubWorkbook == null) {
            stubWorkbook = workbookBuildingListener.getStubHSSFWorkbook();
        }

        init();
J
update  
jipengfei.jpf 已提交
98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116

        HSSFEventFactory factory = new HSSFEventFactory();
        HSSFRequest request = new HSSFRequest();
        if (outputFormulaValues) {
            request.addListenerForAllRecords(formatListener);
        } else {
            request.addListenerForAllRecords(workbookBuildingListener);
        }

        try {
            factory.processWorkbookEvents(request, fs);
        } catch (IOException e) {
            throw new ExcelAnalysisException(e);
        }
    }

    private void init() {
        lastRowNumber = 0;
        lastColumnNumber = 0;
Z
zhuangjiaju 已提交
117
        records = new ArrayList<CellData>();
118
        sheets = new ArrayList<ReadSheet>();
clevertension's avatar
clevertension 已提交
119
        buildXlsRecordHandlers();
J
update  
jipengfei.jpf 已提交
120 121
    }

122
    @Override
J
update  
jipengfei.jpf 已提交
123 124 125
    public void processRecord(Record record) {
        int thisRow = -1;
        int thisColumn = -1;
Z
zhuangjiaju 已提交
126
        CellData cellData = null;
Z
zhuangjiaju 已提交
127
        for (XlsRecordHandler handler : this.recordHandlers) {
clevertension's avatar
clevertension 已提交
128 129 130 131
            if (handler.support(record)) {
                handler.processRecord(record);
                thisRow = handler.getRow();
                thisColumn = handler.getColumn();
Z
zhuangjiaju 已提交
132 133 134 135
                cellData = handler.getCellData();
                if (cellData != null) {
                    records.add(cellData);
                }
J
update  
jipengfei.jpf 已提交
136
                break;
clevertension's avatar
clevertension 已提交
137
            }
J
update  
jipengfei.jpf 已提交
138 139
        }
        // If we got something to print out, do so
Z
zhuangjiaju 已提交
140 141 142 143
        if (cellData != null) {
            if (analysisContext.currentReadHolder().globalConfiguration().getAutoTrim()
                && CellDataTypeEnum.STRING == cellData.getType()) {
                cellData.setStringValue(cellData.getStringValue().trim());
J
update  
jipengfei.jpf 已提交
144
            }
Z
zhuangjiaju 已提交
145
            if (CellDataTypeEnum.EMPTY != cellData.getType()) {
J
update  
jipengfei.jpf 已提交
146 147 148 149
                notAllEmpty = true;
            }
        }

clevertension's avatar
clevertension 已提交
150 151 152 153 154
        // Handle new row
        if (thisRow != -1 && thisRow != lastRowNumber) {
            lastColumnNumber = -1;
        }

J
update  
jipengfei.jpf 已提交
155 156 157 158 159 160 161 162
        // Update column and row count
        if (thisRow > -1) {
            lastRowNumber = thisRow;
        }
        if (thisColumn > -1) {
            lastColumnNumber = thisColumn;
        }

clevertension's avatar
clevertension 已提交
163 164 165 166
        processLastCellOfRow(record);
    }

    private void processLastCellOfRow(Record record) {
J
update  
jipengfei.jpf 已提交
167 168
        // Handle end of row
        if (record instanceof LastCellOfRowDummyRecord) {
clevertension's avatar
clevertension 已提交
169
            int row = ((LastCellOfRowDummyRecord)record).getRow();
J
update  
jipengfei.jpf 已提交
170 171 172
            if (lastColumnNumber == -1) {
                lastColumnNumber = 0;
            }
clevertension's avatar
clevertension 已提交
173
            if (notAllEmpty) {
Z
zhuangjiaju 已提交
174 175 176 177
                analysisContext.readRowHolder(
                    new ReadRowHolder(lastRowNumber, analysisContext.readSheetHolder().getGlobalConfiguration()));
                analysisContext.readSheetHolder().notifyEndOneRow(new EachRowAnalysisFinishEvent(records),
                    analysisContext);
J
update  
jipengfei.jpf 已提交
178 179 180 181 182
            }
            records.clear();
            lastColumnNumber = -1;
        }
    }
clevertension's avatar
clevertension 已提交
183 184 185 186

    private void buildXlsRecordHandlers() {
        if (CollectionUtils.isEmpty(recordHandlers)) {
            recordHandlers.add(new BlankOrErrorRecordHandler());
187
            recordHandlers.add(new BofRecordHandler(workbookBuildingListener, analysisContext, sheets));
clevertension's avatar
clevertension 已提交
188 189 190 191
            recordHandlers.add(new FormulaRecordHandler(stubWorkbook, formatListener));
            recordHandlers.add(new LabelRecordHandler());
            recordHandlers.add(new NoteRecordHandler());
            recordHandlers.add(new NumberRecordHandler(formatListener));
192 193
            recordHandlers.add(new RkRecordHandler());
            recordHandlers.add(new SstRecordHandler());
clevertension's avatar
clevertension 已提交
194 195 196 197
            recordHandlers.add(new MissingCellDummyRecordHandler());
            Collections.sort(recordHandlers);
        }

Z
zhuangjiaju 已提交
198
        for (XlsRecordHandler x : recordHandlers) {
clevertension's avatar
clevertension 已提交
199 200 201
            x.init();
        }
    }
J
update  
jipengfei.jpf 已提交
202
}