XlsxSaxAnalyser.java 7.9 KB
Newer Older
1 2
package com.alibaba.excel.analysis.v07;

Z
zhuangjiaju 已提交
3
import java.io.File;
Z
zhuangjiaju 已提交
4 5 6
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
Z
zhuangjiaju 已提交
7
import java.util.HashMap;
Z
zhuangjiaju 已提交
8
import java.util.List;
Z
zhuangjiaju 已提交
9 10
import java.util.Map;
import java.util.UUID;
Z
zhuangjiaju 已提交
11 12 13 14

import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

15
import org.apache.poi.openxml4j.opc.OPCPackage;
Z
zhuangjiaju 已提交
16
import org.apache.poi.openxml4j.opc.PackagePart;
17
import org.apache.poi.xssf.eventusermodel.XSSFReader;
Z
zhuangjiaju 已提交
18
import org.apache.poi.xssf.model.StylesTable;
Z
zhuangjiaju 已提交
19
import org.apache.poi.xssf.usermodel.XSSFRelation;
Z
zhuangjiaju 已提交
20 21 22
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorkbook;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorkbookPr;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.WorkbookDocument;
Z
zhuangjiaju 已提交
23 24
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
25 26 27 28
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.XMLReader;

Z
zhuangjiaju 已提交
29 30
import com.alibaba.excel.analysis.ExcelExecutor;
import com.alibaba.excel.cache.Ehcache;
Z
zhuangjiaju 已提交
31
import com.alibaba.excel.cache.MapCache;
Z
zhuangjiaju 已提交
32 33
import com.alibaba.excel.context.AnalysisContext;
import com.alibaba.excel.exception.ExcelAnalysisException;
34 35
import com.alibaba.excel.read.metadata.ReadSheet;
import com.alibaba.excel.read.metadata.holder.ReadWorkbookHolder;
Z
zhuangjiaju 已提交
36
import com.alibaba.excel.util.FileUtils;
37 38

/**
39
 *
40 41
 * @author jipengfei
 */
Z
zhuangjiaju 已提交
42
public class XlsxSaxAnalyser implements ExcelExecutor {
Z
zhuangjiaju 已提交
43 44 45 46 47
    private static final Logger LOGGER = LoggerFactory.getLogger(XlsxSaxAnalyser.class);
    /**
     * If it's less than 5M, use map cache, or use ehcache.
     */
    private static final long USE_MAP_CACHE_SIZE = 5 * 1000 * 1000L;
Z
zhuangjiaju 已提交
48
    private AnalysisContext analysisContext;
49
    private List<ReadSheet> sheetList;
Z
zhuangjiaju 已提交
50
    private Map<Integer, InputStream> sheetMap;
Z
zhuangjiaju 已提交
51 52 53 54
    /**
     * Current style information
     */
    private StylesTable stylesTable;
55

Z
zhuangjiaju 已提交
56
    public XlsxSaxAnalyser(AnalysisContext analysisContext, InputStream decryptedStream) throws Exception {
Z
zhuangjiaju 已提交
57
        this.analysisContext = analysisContext;
58 59
        // Initialize cache
        ReadWorkbookHolder readWorkbookHolder = analysisContext.readWorkbookHolder();
Z
zhuangjiaju 已提交
60

Z
zhuangjiaju 已提交
61
        OPCPackage pkg = readOpcPackage(readWorkbookHolder, decryptedStream);
62
        readWorkbookHolder.setOpcPackage(pkg);
Z
zhuangjiaju 已提交
63 64
        PackagePart sharedStringsTablePackagePart =
            pkg.getPartsByContentType(XSSFRelation.SHARED_STRINGS.getContentType()).get(0);
Z
zhuangjiaju 已提交
65 66 67

        // Specify default cache
        defaultReadCache(readWorkbookHolder, sharedStringsTablePackagePart);
Z
zhuangjiaju 已提交
68

Z
zhuangjiaju 已提交
69
        // Analysis sharedStringsTable.xml
Z
zhuangjiaju 已提交
70
        analysisSharedStringsTable(sharedStringsTablePackagePart.getInputStream(), readWorkbookHolder);
71

Z
zhuangjiaju 已提交
72
        XSSFReader xssfReader = new XSSFReader(pkg);
73
        analysisUse1904WindowDate(xssfReader, readWorkbookHolder);
74

Z
zhuangjiaju 已提交
75
        stylesTable = xssfReader.getStylesTable();
76
        sheetList = new ArrayList<ReadSheet>();
Z
zhuangjiaju 已提交
77 78 79
        sheetMap = new HashMap<Integer, InputStream>();
        XSSFReader.SheetIterator ite = (XSSFReader.SheetIterator)xssfReader.getSheetsData();
        int index = 0;
80 81 82
        if (!ite.hasNext()) {
            throw new ExcelAnalysisException("Can not find any sheet!");
        }
83 84
        while (ite.hasNext()) {
            InputStream inputStream = ite.next();
85
            sheetList.add(new ReadSheet(index, ite.getSheetName()));
Z
zhuangjiaju 已提交
86 87
            sheetMap.put(index, inputStream);
            index++;
88 89 90
        }
    }

Z
zhuangjiaju 已提交
91 92 93 94 95 96 97 98 99 100 101 102
    private void defaultReadCache(ReadWorkbookHolder readWorkbookHolder, PackagePart sharedStringsTablePackagePart)
        throws IOException {
        if (readWorkbookHolder.getReadCache() != null) {
            readWorkbookHolder.getReadCache().init(analysisContext);
            return;
        }
        long size = sharedStringsTablePackagePart.getSize();
        if (size < 0) {
            size = sharedStringsTablePackagePart.getInputStream().available();
        }
        if (size < USE_MAP_CACHE_SIZE) {
            if (LOGGER.isDebugEnabled()) {
Z
zhuangjiaju 已提交
103
                LOGGER.debug("Use map cache.size:{}", size);
Z
zhuangjiaju 已提交
104 105 106 107
            }
            readWorkbookHolder.setReadCache(new MapCache());
        } else {
            if (LOGGER.isDebugEnabled()) {
Z
zhuangjiaju 已提交
108
                LOGGER.debug("Use ehcache.size:{}", size);
Z
zhuangjiaju 已提交
109 110 111 112 113 114
            }
            readWorkbookHolder.setReadCache(new Ehcache());
        }
        readWorkbookHolder.getReadCache().init(analysisContext);
    }

115 116 117 118 119
    private void analysisUse1904WindowDate(XSSFReader xssfReader, ReadWorkbookHolder readWorkbookHolder)
        throws Exception {
        if (readWorkbookHolder.globalConfiguration().getUse1904windowing() != null) {
            return;
        }
Z
zhuangjiaju 已提交
120 121 122 123 124
        InputStream workbookXml = xssfReader.getWorkbookData();
        WorkbookDocument ctWorkbook = WorkbookDocument.Factory.parse(workbookXml);
        CTWorkbook wb = ctWorkbook.getWorkbook();
        CTWorkbookPr prefix = wb.getWorkbookPr();
        if (prefix != null && prefix.getDate1904()) {
125
            readWorkbookHolder.getGlobalConfiguration().setUse1904windowing(Boolean.TRUE);
126 127
        } else {
            readWorkbookHolder.getGlobalConfiguration().setUse1904windowing(Boolean.FALSE);
Z
zhuangjiaju 已提交
128 129 130
        }
    }

Z
zhuangjiaju 已提交
131 132
    private void analysisSharedStringsTable(InputStream sharedStringsTableInputStream,
        ReadWorkbookHolder readWorkbookHolder) throws Exception {
133
        ContentHandler handler = new SharedStringsTableHandler(readWorkbookHolder.getReadCache());
Z
zhuangjiaju 已提交
134
        parseXmlSource(sharedStringsTableInputStream, handler);
135
        readWorkbookHolder.getReadCache().putFinished();
Z
zhuangjiaju 已提交
136 137
    }

Z
zhuangjiaju 已提交
138 139 140
    private OPCPackage readOpcPackage(ReadWorkbookHolder readWorkbookHolder, InputStream decryptedStream)
        throws Exception {
        if (decryptedStream == null && readWorkbookHolder.getFile() != null) {
141
            return OPCPackage.open(readWorkbookHolder.getFile());
Z
zhuangjiaju 已提交
142
        }
143
        if (readWorkbookHolder.getMandatoryUseInputStream()) {
Z
zhuangjiaju 已提交
144 145 146 147 148
            if (decryptedStream != null) {
                return OPCPackage.open(decryptedStream);
            } else {
                return OPCPackage.open(readWorkbookHolder.getInputStream());
            }
149
        }
Z
zhuangjiaju 已提交
150
        File readTempFile = FileUtils.createCacheTmpFile();
151
        readWorkbookHolder.setTempFile(readTempFile);
Z
zhuangjiaju 已提交
152
        File tempFile = new File(readTempFile.getPath(), UUID.randomUUID().toString() + ".xlsx");
Z
zhuangjiaju 已提交
153 154 155 156 157
        if (decryptedStream != null) {
            FileUtils.writeToFile(tempFile, decryptedStream);
        } else {
            FileUtils.writeToFile(tempFile, readWorkbookHolder.getInputStream());
        }
Z
zhuangjiaju 已提交
158
        return OPCPackage.open(tempFile);
159 160
    }

Z
zhuangjiaju 已提交
161
    @Override
162
    public List<ReadSheet> sheetList() {
Z
zhuangjiaju 已提交
163 164 165 166 167
        return sheetList;
    }

    private void parseXmlSource(InputStream inputStream, ContentHandler handler) {
        InputSource inputSource = new InputSource(inputStream);
168 169 170 171 172 173 174 175
        try {
            SAXParserFactory saxFactory = SAXParserFactory.newInstance();
            saxFactory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
            saxFactory.setFeature("http://xml.org/sax/features/external-general-entities", false);
            saxFactory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
            SAXParser saxParser = saxFactory.newSAXParser();
            XMLReader xmlReader = saxParser.getXMLReader();
            xmlReader.setContentHandler(handler);
Z
zhuangjiaju 已提交
176
            xmlReader.parse(inputSource);
177
            inputStream.close();
Z
zhuangjiaju 已提交
178 179
        } catch (ExcelAnalysisException e) {
            throw e;
180 181
        } catch (Exception e) {
            throw new ExcelAnalysisException(e);
Z
zhuangjiaju 已提交
182 183 184 185 186 187 188 189
        } finally {
            if (inputStream != null) {
                try {
                    inputStream.close();
                } catch (IOException e) {
                    throw new ExcelAnalysisException("Can not close 'inputStream'!");
                }
            }
190 191 192
        }
    }

193 194 195
    @Override
    public void execute() {
        parseXmlSource(sheetMap.get(analysisContext.readSheetHolder().getSheetNo()),
Z
zhuangjiaju 已提交
196
            new XlsxRowHandler(analysisContext, stylesTable));
197 198
    }

199
}