阅读使用POI的XSSF和Excel表SAX

阅读使用POI的XSSF和Excel表SAX

本文介绍了阅读使用POI的XSSF和Excel表SAX(事件API)的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我读使用POI的XSSF和SAX(事件API)一个Excel工作表。 Excel工作表有成千上万像用户名,电子邮件,地址,年龄等部门

的用户信息行

我需要从Excel读取每一行,将其转换成用户对象,该用户对象添加到用户对象的名单。

我可以成功读取Excel工作表,但我不知道在什么时候在阅读我应该创建用户对象的实例,并从Excel工作表中的数据填充它。

下面是我的整个工作code。

 进口的java.io.File;
    进口java.io.FileOutputStream中;
    进口java.io.IOException异常;
    进口的java.io.InputStream;
    进口java.io.PrintStream中;
    进口的java.util.ArrayList;
    进口的java.util.List;    进口javax.xml.parsers.ParserConfigurationException;
    进口javax.xml.parsers.SAXParser中;
    进口javax.xml.parsers.SAXParserFactory中;    进口org.apache.poi.openxml4j.exceptions.OpenXML4JException;
    进口org.apache.poi.openxml4j.opc.OPCPackage;
    进口org.apache.poi.openxml4j.opc.PackageAccess;
    进口org.apache.poi.ss.usermodel.BuiltinFormats;
    进口org.apache.poi.ss.usermodel.DataFormatter;
    进口org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
    进口org.apache.poi.xssf.eventusermodel.XSSFReader;
    进口org.apache.poi.xssf.model.StylesTable;
    进口org.apache.poi.xssf.usermodel.XSSFCellStyle;
    进口org.apache.poi.xssf.usermodel.XSSFRichTextString;
    进口org.xml.sax.Attributes;
    进口org.xml.sax.ContentHandler中;
    进口org.xml.sax.InputSource中;
    进口org.xml.sax.SAXException;
    进口org.xml.sax.XMLReader中;
    进口org.xml.sax.helpers.DefaultHandler中;    公共类ExcelSheetParser {        枚举xssfDataType {
            BOOL,ERROR,配方,INLINESTR,SSTINDEX,NUMBER
        }        INT countrows = 0;        类XSSFSheetHandler扩展的DefaultHandler {            / **
             *使用样式表
             * /
            私人StylesTable stylesTable;            / **
             *表具有独特的琴弦
             * /
            私人ReadOnlySharedStringsTable sharedStringsTable;            / **
             *目的地数据
             * /
            私人最终PrintStream的输出;            私人列表<>名单=新的ArrayList();            私有类clazz所;            / **
             *列数读先从最左边
             * /
            私人最终诠释minColumnCount;            //设置当V开始元素被认为是
            私人布尔vIsOpen;            //设置当细胞开始元素被发现;
            //当电池接近的元素被视为使用。
            私人xssfDataType nextDataType;            //用于格式化数字的单元格值。
            私人短formatIndex;
            私人字符串的formatString;
            私人最终DataFormatter格式;            私人INT thisColumn = -1;
            //最后一列打印到输出流
            私人INT lastColumnNumber = -1;            //因为他们看到的收集字符。
            私人StringBuffer的价值;            / **
             *接受在分析需要的对象。
             *
             * @参数风格
             *样式表
             * @参数字符串
             *共用字符串表
             * @参数COLS
             *列的最小数量显示
             * @参数目标
             *水槽输出
             * /
            公共XSSFSheetHandler(StylesTable风格,
                    ReadOnlySharedStringsTable字符串,INT COLS,PrintStream的目标,类clazz所){
                this.stylesTable =风格;
                this.sharedStringsTable =串;
                this.minColumnCount = COLS;
                this.output =目标;
                THIS.VALUE =新的StringBuffer();
                this.nextDataType = xssfDataType.NUMBER;
                this.formatter =新DataFormatter();
                this.clazz = clazz所;
            }            公共无效的startElement(URI字符串,字符串的localName,字符串名称,
                    属性属性)抛出的SAXException {                如果(inlineStr.equals(名称)||v的.equals(名称)){
                    vIsOpen = TRUE;
                    //清除缓存内容
                    value.setLength(0);
                }
                // C =>细胞
                否则如果(c的.equals(名称)){
                    //获取单元格引用
                    串R = attributes.getValue(R);
                    INT firstDigit = -1;
                    对(INT C = 0;℃下r.length(); ++三){
                        如果(Character.isDigit(r.charAt(三))){
                            firstDigit = C;
                            打破;
                        }
                    }
                    thisColumn = nameToColumn(r.substring(0,firstDigit));                    //设置默认值。
                    this.nextDataType = xssfDataType.NUMBER;
                    this.formatIndex = -1;
                    this.formatString = NULL;
                    字符串CELLTYPE = attributes.getValue(T);
                    串cellStyleStr = attributes.getValue(S);
                    如果(b的.equals(CELLTYPE))
                        nextDataType = xssfDataType.BOOL;
                    否则,如果(E.equals(CELLTYPE))
                        nextDataType = xssfDataType.ERROR;
                    否则,如果(inlineStr.equals(CELLTYPE))
                        nextDataType = xssfDataType.INLINESTR;
                    否则如果(S.equals(CELLTYPE))
                        nextDataType = xssfDataType.SSTINDEX;
                    否则,如果(STR.equals(CELLTYPE))
                        nextDataType = xssfDataType.FORMULA;
                    否则,如果(cellStyleStr!= NULL){
                        //这是一个数字,但几乎当然是一个
                        用特殊的风格或格式//
                        INT styleIndex =的Integer.parseInt(cellStyleStr);
                        XSSFCellStyle风格= stylesTable.getStyleAt(styleIndex);
                        this.formatIndex = style.getDataFormat();
                        this.formatString = style.getDataFormatString();
                        如果(this.formatString == NULL)
                            this.formatString = BuiltinFormats
                                    .getBuiltinFormat(this.formatIndex);
                    }
                }            }            公共无效的endElement(URI字符串,字符串的localName,字符串名称)
                    抛出的SAXException {                字符串thisStr = NULL;                // V =​​>一个细胞的内容
                如果(V.equals(名称)){
                    //过程中的价值内容要求。
                    //立即执行,如字符()可以被称为一次以上
                    开关(nextDataType){                    案例BOOL:
                        烧焦第一= value.charAt(0);
                        thisStr =第一=='0'? 假:TRUE;
                        打破;                    案例错误:
                        thisStr =\\错误:+ value.toString()+'';
                        打破;                    案例公式:
                        //公式可能会导致一个字符串值,
                        //所以总是加上双引号字符。
                        thisStr ='+ value.toString()+'';
                        打破;                    案例INLINESTR:
                        // TODO:已经看到了这样的一个例子,所以这是未经考验的。
                        XSSFRichTextString RTSI =新XSSFRichTextString(价值
                                的ToString());
                        thisStr ='+ rtsi.toString()+'';
                        打破;                    案例SSTINDEX:
                        串sstIndex = value.toString();
                        尝试{
                            INT IDX =的Integer.parseInt(sstIndex);
                            XSSFRichTextString RTSS =新XSSFRichTextString(
                                    sharedStringsTable.getEntryAt(IDX));
                            thisStr ='+ rtss.toString()+'';
                        }赶上(NumberFormatException的前){
                            output.println(无法解析SST指数'+ sstIndex
                                    +':+ ex.toString());
                        }
                        打破;                    案件编号:
                        字符串n = value.toString();
                        如果(this.formatString!= NULL)
                            thisStr = formatter.formatRawCellContents(双人间
                                    .parseDouble(n)时,this.formatIndex,
                                    this.formatString);
                        其他
                            thisStr = N;
                        打破;                    默认:
                        thisStr =(TODO:意外的类型:+ nextDataType +);
                        打破;
                    }                    //输出后我们见过的字符串内容
                    //放出任何领域逗号失踪这一一行
                    如果(lastColumnNumber == -1){
                        lastColumnNumber = 0;
                    }
                    的for(int i = lastColumnNumber; I< thisColumn ++ I)
                        output.print(,);                    //可能是空字符串。
                    output.print(thisColumn +:+ thisStr);                    //更新列
                    如果(thisColumn -1个)
                        lastColumnNumber = thisColumn;                }否则如果(行.equals(名称)){                    //如果需要打印出任何缺少逗号
                    如果(minColumns大于0){
                        //列是根据0
                        如果(lastColumnNumber == -1){
                            lastColumnNumber = 0;
                        }
                        对于(INT I = lastColumnNumber;我≤(this.minColumnCount);我++){
                            output.print(,);
                        }
                    }                    //我们到一个新行                    output.println();
                    output.println(countrows ++);
                    lastColumnNumber = -1;                }            }            / **
             *捕捉人物只有一个合适的元素是开放的。本来
             *只是V;延长inlineStr也。
             * /
            公共无效字符(字符[]通道,诠释开始,诠释长度)
                    抛出的SAXException {
                如果(vIsOpen)
                    value.append(CH,开始,长度);
            }            / **
             *转换一个Excel列名称,比如C到一个从零开始的索引。
             *
             * @参数名称
             *对应于指定名称@返回首页
             * /
            私人诠释nameToColumn(字符串名称){
                INT列= -1;
                的for(int i = 0; I< name.length(); ++ I){
                    INT C = name.charAt(I)
                    列=(柱+ 1)* 26 + C - 'A';
                }
                返回列;
            }        }        // /////////////////////////////////////        私人OPCPackage xlsxPackage;
        私人诠释minColumns;
        私人PrintStream的输出;
        私有类clazz所;        / **
         *创建一个新的XLSX - > CSV转换器
         *
         * @参数PKG
         *本XLSX包处理
         * @参数输出
         *本PrintStream的输出到CSV
         * @参数minColumns
         *输出列的最小数,或者-1没有最低
         * /
        公共ExcelSheetParser(OPCPackage PKG,为PrintStream输出,诠释minColumns,类clazz所){
            this.xlsxPackage = PKG;
            this.output =输出;
            this.minColumns = minColumns;
            this.clazz = clazz所;        }        / **
         *解析并示出了使用指定的样式的一个片材的含量和
         *共享字符串表。
         *
         * @参数风格
         * @参数字符串
         * @参数sheetInputStream
         * /
        公共无效processSheet(StylesTable风格,
                ReadOnlySharedStringsTable字符串的InputStream sheetInputStream)
                抛出IOException异常,的ParserConfigurationException,SAXException中{            InputSource的sheetSource =新的InputSource(sheetInputStream);
            的SAXParserFactory saxFactory = SAXParserFactory.newInstance();
            的SAXParser的SAXParser = saxFactory.newSAXParser();
            XMLReader的sheetParser = saxParser.getXMLReader();
            ContentHandler的处理程序=新XSSFSheetHandler(款式,字符串,
                    this.minColumns,this.output,this.clazz);
            sheetParser.setContentHandler(处理);
            sheetParser.parse(sheetSource);
        }        / **
         *启动XLS工作簿文件以CSV的处理。
         *
         *引发IOException
         * @throws OpenXML4JException
         * @throws的ParserConfigurationException
         * @throws的SAXException
         * /
        公共无效过程()抛出IOException异常,OpenXML4JException,
                的ParserConfigurationException,SAXException中{            ReadOnlySharedStringsTable串=新ReadOnlySharedStringsTable(
                    this.xlsxPackage);
            XSSFReader xssfReader =新XSSFReader(this.xlsxPackage);            StylesTable样式= xssfReader.getStylesTable();
            XSSFReader.SheetIterator ITER =(XSSFReader.SheetIterator)xssfReader
                    .getSheetsData();
            INT索引= 0;
            而(iter.hasNext()){
                InputStream的流= iter.next();
                串SHEETNAME = iter.getSheetName();
                this.output.println(SHEETNAME +[指数=+指数+]:);
                processSheet(款式,字符串流);
                stream.close();
                ++指数;
            }
        }
    }


解决方案

什么我可能会做的是开始建立用户对象的行启动时。当你打的行中的单元格,您填入您的用户对象。当行结束后,验证用户对象,如果它的罚款添加即可。因为你在做SAX解析,你会得到所有这些开始和事件,这样你就可以有附上您的逻辑。

我建议你看一看在Apache POI例子XLSX2CSV 。它显示了如何去处理不同类型的单元格内容(你将需要用于填充用户对象),如何做,当你到达行的末尾,以及处理缺失的细胞等。

I am reading an Excel sheet using POI's XSSF and SAX (Event API). The Excel sheet has thousands of rows of user information like user name, email, address, age, department etc.

I need to read each row from Excel, convert it into a User object and add this User object to a List of User objects.

I can read the Excel sheet successfully, but I am not sure at what point while reading I should create an instance of the User object and populate it with the data from the Excel sheet.

Below is my entire working code.

    import java.io.File;
    import java.io.FileOutputStream;
    import java.io.IOException;
    import java.io.InputStream;
    import java.io.PrintStream;
    import java.util.ArrayList;
    import java.util.List;

    import javax.xml.parsers.ParserConfigurationException;
    import javax.xml.parsers.SAXParser;
    import javax.xml.parsers.SAXParserFactory;

    import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
    import org.apache.poi.openxml4j.opc.OPCPackage;
    import org.apache.poi.openxml4j.opc.PackageAccess;
    import org.apache.poi.ss.usermodel.BuiltinFormats;
    import org.apache.poi.ss.usermodel.DataFormatter;
    import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
    import org.apache.poi.xssf.eventusermodel.XSSFReader;
    import org.apache.poi.xssf.model.StylesTable;
    import org.apache.poi.xssf.usermodel.XSSFCellStyle;
    import org.apache.poi.xssf.usermodel.XSSFRichTextString;
    import org.xml.sax.Attributes;
    import org.xml.sax.ContentHandler;
    import org.xml.sax.InputSource;
    import org.xml.sax.SAXException;
    import org.xml.sax.XMLReader;
    import org.xml.sax.helpers.DefaultHandler;

    public class ExcelSheetParser {

        enum xssfDataType {
            BOOL, ERROR, FORMULA, INLINESTR, SSTINDEX, NUMBER,
        }

        int countrows = 0;

        class XSSFSheetHandler extends DefaultHandler {

            /**
             * Table with styles
             */
            private StylesTable stylesTable;

            /**
             * Table with unique strings
             */
            private ReadOnlySharedStringsTable sharedStringsTable;

            /**
             * Destination for data
             */
            private final PrintStream output;

            private List<?> list = new ArrayList();

            private Class clazz;

            /**
             * Number of columns to read starting with leftmost
             */
            private final int minColumnCount;

            // Set when V start element is seen
            private boolean vIsOpen;

            // Set when cell start element is seen;
            // used when cell close element is seen.
            private xssfDataType nextDataType;

            // Used to format numeric cell values.
            private short formatIndex;
            private String formatString;
            private final DataFormatter formatter;

            private int thisColumn = -1;
            // The last column printed to the output stream
            private int lastColumnNumber = -1;

            // Gathers characters as they are seen.
            private StringBuffer value;

            /**
             * Accepts objects needed while parsing.
             *
             * @param styles
             *            Table of styles
             * @param strings
             *            Table of shared strings
             * @param cols
             *            Minimum number of columns to show
             * @param target
             *            Sink for output
             */
            public XSSFSheetHandler(StylesTable styles,
                    ReadOnlySharedStringsTable strings, int cols, PrintStream target, Class clazz) {
                this.stylesTable = styles;
                this.sharedStringsTable = strings;
                this.minColumnCount = cols;
                this.output = target;
                this.value = new StringBuffer();
                this.nextDataType = xssfDataType.NUMBER;
                this.formatter = new DataFormatter();
                this.clazz = clazz;
            }

            public void startElement(String uri, String localName, String name,
                    Attributes attributes) throws SAXException {

                if ("inlineStr".equals(name) || "v".equals(name)) {
                    vIsOpen = true;
                    // Clear contents cache
                    value.setLength(0);
                }
                // c => cell
                else if ("c".equals(name)) {
                    // Get the cell reference
                    String r = attributes.getValue("r");
                    int firstDigit = -1;
                    for (int c = 0; c < r.length(); ++c) {
                        if (Character.isDigit(r.charAt(c))) {
                            firstDigit = c;
                            break;
                        }
                    }
                    thisColumn = nameToColumn(r.substring(0, firstDigit));

                    // Set up defaults.
                    this.nextDataType = xssfDataType.NUMBER;
                    this.formatIndex = -1;
                    this.formatString = null;
                    String cellType = attributes.getValue("t");
                    String cellStyleStr = attributes.getValue("s");
                    if ("b".equals(cellType))
                        nextDataType = xssfDataType.BOOL;
                    else if ("e".equals(cellType))
                        nextDataType = xssfDataType.ERROR;
                    else if ("inlineStr".equals(cellType))
                        nextDataType = xssfDataType.INLINESTR;
                    else if ("s".equals(cellType))
                        nextDataType = xssfDataType.SSTINDEX;
                    else if ("str".equals(cellType))
                        nextDataType = xssfDataType.FORMULA;
                    else if (cellStyleStr != null) {
                        // It's a number, but almost certainly one
                        // with a special style or format
                        int styleIndex = Integer.parseInt(cellStyleStr);
                        XSSFCellStyle style = stylesTable.getStyleAt(styleIndex);
                        this.formatIndex = style.getDataFormat();
                        this.formatString = style.getDataFormatString();
                        if (this.formatString == null)
                            this.formatString = BuiltinFormats
                                    .getBuiltinFormat(this.formatIndex);
                    }
                }

            }

            public void endElement(String uri, String localName, String name)
                    throws SAXException {

                String thisStr = null;

                // v => contents of a cell
                if ("v".equals(name)) {
                    // Process the value contents as required.
                    // Do now, as characters() may be called more than once
                    switch (nextDataType) {

                    case BOOL:
                        char first = value.charAt(0);
                        thisStr = first == '0' ? "FALSE" : "TRUE";
                        break;

                    case ERROR:
                        thisStr = "\"ERROR:" + value.toString() + '"';
                        break;

                    case FORMULA:
                        // A formula could result in a string value,
                        // so always add double-quote characters.
                        thisStr = '"' + value.toString() + '"';
                        break;

                    case INLINESTR:
                        // TODO: have seen an example of this, so it's untested.
                        XSSFRichTextString rtsi = new XSSFRichTextString(value
                                .toString());
                        thisStr = '"' + rtsi.toString() + '"';
                        break;

                    case SSTINDEX:
                        String sstIndex = value.toString();
                        try {
                            int idx = Integer.parseInt(sstIndex);
                            XSSFRichTextString rtss = new XSSFRichTextString(
                                    sharedStringsTable.getEntryAt(idx));
                            thisStr = '"' + rtss.toString() + '"';
                        } catch (NumberFormatException ex) {
                            output.println("Failed to parse SST index '" + sstIndex
                                    + "': " + ex.toString());
                        }
                        break;

                    case NUMBER:
                        String n = value.toString();
                        if (this.formatString != null)
                            thisStr = formatter.formatRawCellContents(Double
                                    .parseDouble(n), this.formatIndex,
                                    this.formatString);
                        else
                            thisStr = n;
                        break;

                    default:
                        thisStr = "(TODO: Unexpected type: " + nextDataType + ")";
                        break;
                    }

                    // Output after we've seen the string contents
                    // Emit commas for any fields that were missing on this row
                    if (lastColumnNumber == -1) {
                        lastColumnNumber = 0;
                    }
                    for (int i = lastColumnNumber; i < thisColumn; ++i)
                        output.print(',');

                    // Might be the empty string.
                    output.print(thisColumn +" : "+thisStr);

                    // Update column
                    if (thisColumn > -1)
                        lastColumnNumber = thisColumn;

                } else if ("row".equals(name)) {

                    // Print out any missing commas if needed
                    if (minColumns > 0) {
                        // Columns are 0 based
                        if (lastColumnNumber == -1) {
                            lastColumnNumber = 0;
                        }
                        for (int i = lastColumnNumber; i < (this.minColumnCount); i++) {
                            output.print(',');
                        }
                    }



                    // We're onto a new row

                    output.println();
                    output.println(countrows++);
                    lastColumnNumber = -1;

                }

            }

            /**
             * Captures characters only if a suitable element is open. Originally
             * was just "v"; extended for inlineStr also.
             */
            public void characters(char[] ch, int start, int length)
                    throws SAXException {
                if (vIsOpen)
                    value.append(ch, start, length);
            }

            /**
             * Converts an Excel column name like "C" to a zero-based index.
             *
             * @param name
             * @return Index corresponding to the specified name
             */
            private int nameToColumn(String name) {
                int column = -1;
                for (int i = 0; i < name.length(); ++i) {
                    int c = name.charAt(i);
                    column = (column + 1) * 26 + c - 'A';
                }
                return column;
            }

        }

        // /////////////////////////////////////

        private OPCPackage xlsxPackage;
        private int minColumns;
        private PrintStream output;
        private Class clazz;

        /**
         * Creates a new XLSX -> CSV converter
         *
         * @param pkg
         *            The XLSX package to process
         * @param output
         *            The PrintStream to output the CSV to
         * @param minColumns
         *            The minimum number of columns to output, or -1 for no minimum
         */
        public ExcelSheetParser(OPCPackage pkg, PrintStream output, int minColumns, Class clazz) {
            this.xlsxPackage = pkg;
            this.output = output;
            this.minColumns = minColumns;
            this.clazz = clazz;

        }

        /**
         * Parses and shows the content of one sheet using the specified styles and
         * shared-strings tables.
         *
         * @param styles
         * @param strings
         * @param sheetInputStream
         */
        public void processSheet(StylesTable styles,
                ReadOnlySharedStringsTable strings, InputStream sheetInputStream)
                throws IOException, ParserConfigurationException, SAXException {

            InputSource sheetSource = new InputSource(sheetInputStream);
            SAXParserFactory saxFactory = SAXParserFactory.newInstance();
            SAXParser saxParser = saxFactory.newSAXParser();
            XMLReader sheetParser = saxParser.getXMLReader();
            ContentHandler handler = new XSSFSheetHandler(styles, strings,
                    this.minColumns, this.output, this.clazz);
            sheetParser.setContentHandler(handler);
            sheetParser.parse(sheetSource);
        }

        /**
         * Initiates the processing of the XLS workbook file to CSV.
         *
         * @throws IOException
         * @throws OpenXML4JException
         * @throws ParserConfigurationException
         * @throws SAXException
         */
        public void process() throws IOException, OpenXML4JException,
                ParserConfigurationException, SAXException {

            ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(
                    this.xlsxPackage);
            XSSFReader xssfReader = new XSSFReader(this.xlsxPackage);

            StylesTable styles = xssfReader.getStylesTable();
            XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader
                    .getSheetsData();
            int index = 0;
            while (iter.hasNext()) {
                InputStream stream = iter.next();
                String sheetName = iter.getSheetName();
                this.output.println(sheetName + " [index=" + index + "]:");
                processSheet(styles, strings, stream);
                stream.close();
                ++index;
            }
        }
    }
解决方案

What I'd probably do is start building the User object when the row starts. As you hit the cells in the row, you populate your User object. When the row ends, validate the User object, and if it's fine add it then. Because you're doing SAX parsing, you'll get the start and events for all of these, so you can attach your logic there.

I'd suggest you take a look at XLSX2CSV in the Apache POI Examples. It shows how to go about handling the different kinds of cell contents (which you'll need for populating your user object), how to do something when you reach the end of the row, as well as handling missing cells etc.

这篇关于阅读使用POI的XSSF和Excel表SAX(事件API)的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持!

08-05 21:44