XlsxSaxAnalyser.java 6.9 KB
Newer Older
1 2
package com.alibaba.excel.analysis.v07;

Z
zhuangjiaju 已提交
3
import java.io.File;
Z
zhuangjiaju 已提交
4 5 6
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
Z
zhuangjiaju 已提交
7
import java.util.HashMap;
Z
zhuangjiaju 已提交
8
import java.util.List;
Z
zhuangjiaju 已提交
9 10
import java.util.Map;
import java.util.UUID;
Z
zhuangjiaju 已提交
11 12 13 14

import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

15
import org.apache.poi.openxml4j.opc.OPCPackage;
Z
zhuangjiaju 已提交
16
import org.apache.poi.openxml4j.opc.PackagePart;
17
import org.apache.poi.xssf.eventusermodel.XSSFReader;
Z
zhuangjiaju 已提交
18
import org.apache.poi.xssf.usermodel.XSSFRelation;
Z
zhuangjiaju 已提交
19 20 21
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorkbook;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorkbookPr;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.WorkbookDocument;
Z
zhuangjiaju 已提交
22 23
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
24 25 26 27
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.XMLReader;

Z
zhuangjiaju 已提交
28 29
import com.alibaba.excel.analysis.ExcelExecutor;
import com.alibaba.excel.cache.Ehcache;
Z
zhuangjiaju 已提交
30
import com.alibaba.excel.cache.MapCache;
Z
zhuangjiaju 已提交
31 32
import com.alibaba.excel.context.AnalysisContext;
import com.alibaba.excel.exception.ExcelAnalysisException;
33 34
import com.alibaba.excel.read.metadata.ReadSheet;
import com.alibaba.excel.read.metadata.holder.ReadWorkbookHolder;
Z
zhuangjiaju 已提交
35
import com.alibaba.excel.util.FileUtils;
36 37

/**
38
 *
39 40
 * @author jipengfei
 */
Z
zhuangjiaju 已提交
41
public class XlsxSaxAnalyser implements ExcelExecutor {
Z
zhuangjiaju 已提交
42 43 44 45 46
    private static final Logger LOGGER = LoggerFactory.getLogger(XlsxSaxAnalyser.class);
    /**
     * If it's less than 5M, use map cache, or use ehcache.
     */
    private static final long USE_MAP_CACHE_SIZE = 5 * 1000 * 1000L;
Z
zhuangjiaju 已提交
47
    private AnalysisContext analysisContext;
48
    private List<ReadSheet> sheetList;
Z
zhuangjiaju 已提交
49
    private Map<Integer, InputStream> sheetMap;
50

Z
zhuangjiaju 已提交
51 52
    public XlsxSaxAnalyser(AnalysisContext analysisContext) throws Exception {
        this.analysisContext = analysisContext;
53 54
        // Initialize cache
        ReadWorkbookHolder readWorkbookHolder = analysisContext.readWorkbookHolder();
Z
zhuangjiaju 已提交
55 56 57 58 59

        OPCPackage pkg = readOpcPackage(readWorkbookHolder);

        PackagePart sharedStringsTablePackagePart =
            pkg.getPartsByContentType(XSSFRelation.SHARED_STRINGS.getContentType()).get(0);
60
        if (readWorkbookHolder.getReadCache() == null) {
Z
zhuangjiaju 已提交
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
            long size = sharedStringsTablePackagePart.getSize();
            if (size < 0) {
                size = sharedStringsTablePackagePart.getInputStream().available();
            }
            if (size < USE_MAP_CACHE_SIZE) {
                if (LOGGER.isDebugEnabled()) {
                    LOGGER.info("Use map cache.size:{}", size);
                }
                readWorkbookHolder.setReadCache(new MapCache());
            } else {
                if (LOGGER.isDebugEnabled()) {
                    LOGGER.info("Use ehcache.size:{}", size);
                }
                readWorkbookHolder.setReadCache(new Ehcache());
            }
Z
zhuangjiaju 已提交
76
        }
77
        readWorkbookHolder.getReadCache().init(analysisContext);
Z
zhuangjiaju 已提交
78

Z
zhuangjiaju 已提交
79
        // Analysis sharedStringsTable.xml
Z
zhuangjiaju 已提交
80
        analysisSharedStringsTable(sharedStringsTablePackagePart.getInputStream(), readWorkbookHolder);
81

Z
zhuangjiaju 已提交
82
        XSSFReader xssfReader = new XSSFReader(pkg);
83

84
        analysisUse1904WindowDate(xssfReader, readWorkbookHolder);
85

86
        sheetList = new ArrayList<ReadSheet>();
Z
zhuangjiaju 已提交
87 88 89
        sheetMap = new HashMap<Integer, InputStream>();
        XSSFReader.SheetIterator ite = (XSSFReader.SheetIterator)xssfReader.getSheetsData();
        int index = 0;
90 91 92
        if (!ite.hasNext()) {
            throw new ExcelAnalysisException("Can not find any sheet!");
        }
93 94
        while (ite.hasNext()) {
            InputStream inputStream = ite.next();
95
            sheetList.add(new ReadSheet(index, ite.getSheetName()));
Z
zhuangjiaju 已提交
96 97
            sheetMap.put(index, inputStream);
            index++;
98 99 100
        }
    }

101 102 103 104 105
    private void analysisUse1904WindowDate(XSSFReader xssfReader, ReadWorkbookHolder readWorkbookHolder)
        throws Exception {
        if (readWorkbookHolder.globalConfiguration().getUse1904windowing() != null) {
            return;
        }
Z
zhuangjiaju 已提交
106 107 108 109 110
        InputStream workbookXml = xssfReader.getWorkbookData();
        WorkbookDocument ctWorkbook = WorkbookDocument.Factory.parse(workbookXml);
        CTWorkbook wb = ctWorkbook.getWorkbook();
        CTWorkbookPr prefix = wb.getWorkbookPr();
        if (prefix != null && prefix.getDate1904()) {
111
            readWorkbookHolder.getGlobalConfiguration().setUse1904windowing(Boolean.TRUE);
112 113
        } else {
            readWorkbookHolder.getGlobalConfiguration().setUse1904windowing(Boolean.FALSE);
Z
zhuangjiaju 已提交
114 115 116
        }
    }

Z
zhuangjiaju 已提交
117 118
    private void analysisSharedStringsTable(InputStream sharedStringsTableInputStream,
        ReadWorkbookHolder readWorkbookHolder) throws Exception {
119
        ContentHandler handler = new SharedStringsTableHandler(readWorkbookHolder.getReadCache());
Z
zhuangjiaju 已提交
120
        parseXmlSource(sharedStringsTableInputStream, handler);
121
        readWorkbookHolder.getReadCache().putFinished();
Z
zhuangjiaju 已提交
122 123
    }

124 125 126
    private OPCPackage readOpcPackage(ReadWorkbookHolder readWorkbookHolder) throws Exception {
        if (readWorkbookHolder.getFile() != null) {
            return OPCPackage.open(readWorkbookHolder.getFile());
Z
zhuangjiaju 已提交
127
        }
128 129
        if (readWorkbookHolder.getMandatoryUseInputStream()) {
            return OPCPackage.open(readWorkbookHolder.getInputStream());
130
        }
Z
zhuangjiaju 已提交
131
        File readTempFile = FileUtils.createCacheTmpFile();
132
        readWorkbookHolder.setTempFile(readTempFile);
Z
zhuangjiaju 已提交
133
        File tempFile = new File(readTempFile.getPath(), UUID.randomUUID().toString() + ".xlsx");
Z
zhuangjiaju 已提交
134
        FileUtils.writeToFile(tempFile, readWorkbookHolder.getInputStream());
Z
zhuangjiaju 已提交
135
        return OPCPackage.open(tempFile);
136 137
    }

Z
zhuangjiaju 已提交
138
    @Override
139
    public List<ReadSheet> sheetList() {
Z
zhuangjiaju 已提交
140 141 142 143 144
        return sheetList;
    }

    private void parseXmlSource(InputStream inputStream, ContentHandler handler) {
        InputSource inputSource = new InputSource(inputStream);
145 146 147 148 149 150 151 152
        try {
            SAXParserFactory saxFactory = SAXParserFactory.newInstance();
            saxFactory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
            saxFactory.setFeature("http://xml.org/sax/features/external-general-entities", false);
            saxFactory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
            SAXParser saxParser = saxFactory.newSAXParser();
            XMLReader xmlReader = saxParser.getXMLReader();
            xmlReader.setContentHandler(handler);
Z
zhuangjiaju 已提交
153
            xmlReader.parse(inputSource);
154 155 156
            inputStream.close();
        } catch (Exception e) {
            throw new ExcelAnalysisException(e);
Z
zhuangjiaju 已提交
157 158 159 160 161 162 163 164
        } finally {
            if (inputStream != null) {
                try {
                    inputStream.close();
                } catch (IOException e) {
                    throw new ExcelAnalysisException("Can not close 'inputStream'!");
                }
            }
165 166 167
        }
    }

168 169 170 171 172 173
    @Override
    public void execute() {
        parseXmlSource(sheetMap.get(analysisContext.readSheetHolder().getSheetNo()),
            new XlsxRowHandler(analysisContext));
    }

174
}