我正在将CSV转换为XML数据。通过查看各种示例,我能够编写用于解析CSV文件和获取XML文件的代码。但是,我编写的代码返回带有错误标签的XML文件。

这是转换代码:

    package com.adarsh.parse;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.StringTokenizer;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.FactoryConfigurationError;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.Document;
import org.w3c.dom.Element;



public class Converter {

    /* Protected members to avoid instantiation */
    protected DocumentBuilderFactory domFactory = null;
    protected DocumentBuilder domBuilder = null;
    /* Constant strings */
    // Input CSV file
    final String INPUT_FILE = "sample_data.csv";
    // Output XML document
    final String OUTPUT_FILE ="in.xml";
    // First element in the XML document
    final String FIRST_ELEMENT="school";
    public Converter(){
        try {
            domFactory = DocumentBuilderFactory.newInstance();
            /* Obtaining instance of class DocumentBuilder */
            domBuilder = domFactory.newDocumentBuilder();
        }
        catch(ParserConfigurationException exp) {
            System.err.println(exp.toString());

        }
        catch(FactoryConfigurationError exp){
            System.err.println(exp.toString());
        }
        catch(Exception exp){
            System.err.println(exp.toString());
        }
    }
    /**
     * This method converts the given CSV file into an XML document
     */
    public  int convert(String csvFileName, String xmlFileName) {
        int rowCount = -1;
        try {
            /* Initializing the XML document  */
            Document newDoc = domBuilder.newDocument();
            /* Creating the root element in the XML */
            Element rootElem = newDoc.createElement(FIRST_ELEMENT);
            newDoc.appendChild(rootElem);
            /* Reading the CSV file */
            BufferedReader csvFileReader;
            csvFileName = INPUT_FILE;
            csvFileReader = new BufferedReader(new FileReader(csvFileName));
            /* Initialize the number of fields to 0 */
            int fieldCount = 0;
            String[] csvFields = null;
            StringTokenizer stringTokenizer = null;

            /**
             * Map the column names in the CSV file as the elements in the XML
             * document, eliminate any other characters not eligible for XML element
             * naming
             */
            /* Initialize the current line variable */
            String currLine = csvFileReader.readLine();
            /* Loop until we reach the end of the file
             * edge case: Empty CSV file
             * */

            if(currLine != null) {
                /* Separate fields based on commas */
                stringTokenizer = new StringTokenizer(currLine, ",");
                fieldCount = stringTokenizer.countTokens();
                /* If there is data in the CSV file */
                if(fieldCount > 0) {
                    /* Initialize a String Array of Fields */
                    csvFields = new String[fieldCount];
                    int i = 0;
                    /* Loop till all elements are found and save fields */
                    while (stringTokenizer.hasMoreElements()) {
                        csvFields[i++] = String.valueOf(stringTokenizer.nextElement());
                    }
                }

            }
            else {
                System.out.println("Nothing to parse");
            }
            /* reading rows from the CSV file */
            while((currLine = csvFileReader.readLine()) != null) {
                stringTokenizer = new StringTokenizer(currLine, ",");
                fieldCount = stringTokenizer.countTokens();
                /* if rows exist in the CSV file*/
                if(fieldCount > 0) {
                    /* Create the row element*/
                    Element rowElem = newDoc.createElement("row");
                    int i = 0;
                    /* until there are more elements*/
                    while(stringTokenizer.hasMoreElements()) {
                        try {
                            /* Append each element found to each row element*/
                            String currValue = String.valueOf(stringTokenizer.nextElement());
                            Element currElem = newDoc.createElement(csvFields[i++]);
                            currElem.appendChild(newDoc.createTextNode(currValue));
                            rowElem.appendChild(currElem);
                        }
                        catch(Exception exp) {

                        }
                    }
                    /* Append the rows to the root element*/
                    rootElem.appendChild(rowElem);
                    rowCount++;
                }
            }
            /* Finish reading the CSV file */
            csvFileReader.close();

            /* Saving the generated XML doc into required format file to disk */
            TransformerFactory tranFactory = TransformerFactory.newInstance();
            Transformer aTransformer = tranFactory.newTransformer();
            aTransformer.setOutputProperty(OutputKeys.INDENT, "yes");
            aTransformer.setOutputProperty(OutputKeys.METHOD, "xml");
            aTransformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");
            Source src = new DOMSource(newDoc);
            xmlFileName = OUTPUT_FILE;
            Result dest = new StreamResult(new File(xmlFileName));
            aTransformer.transform(src, dest);

            rowCount++;
        }
        catch(IOException exp) {
            System.err.println(exp.toString());
        }
        catch(Exception exp) {
            System.err.println(exp.toString());
        }
        /* Number of rows parsed into XML */
        return rowCount;

    }

}


这是文件中的示例CSV数据:


教室编号,教室名称,老师_1_id,老师_1_姓,老师_1_名字,老师_2_id,老师_2_姓,老师_2_名字,学生_id,学生姓氏,学生_名字,学生等级
103,布莱恩的居室,10300000001,奥唐奈,布莱恩,,,,,,,102,
史密斯先生的PhysEd课,10200000001,史密斯,亚瑟,10200000011,
约翰·帕特森(John Patterson),10200000011,布兰顿(Brandon)麦克兰西(McCrancy),1102年
物理教育班,10200000001,史密斯,亚瑟,10200000011,帕特森,
John,10200000018,Reginald,Alexis,1102,史密斯先生的物理班,
10200000001,史密斯,亚瑟,10200000011,帕特森,约翰,10200000019,
盖尔(Gayle),马修(Matthew)1102,史密斯先生的PhysEd班级,10200000001,史密斯(
亚瑟,10200000011,帕特森,约翰,10200000010,史密斯,纳撒尼尔,1
102,史密斯先生的PhysEd课程,10200000001,史密斯,亚瑟,
10200000011,帕特森,约翰,10200000013,拉尼,埃里卡,1102,先生。
Smith的PhysEd类,10200000001,Smith,Arthur,10200000011,
约翰·帕特森,10200000014,迈克尔·弗洛雷斯,1102年,史密斯先生
物理教育班,10200000001,史密斯,亚瑟,10200000011,帕特森,
John,10200000012,Marco,Elizabeth,1102,史密斯先生的PhysEd课,
10200000001,史密斯,亚瑟,10200000011,帕特森,约翰,10200000016,
佩雷斯(布列塔尼),1 102,史密斯先生的物理教育班,10200000001,史密斯,
亚瑟,10200000011,帕特森,约翰,10200000015,希尔,茉莉,1
102,史密斯先生的PhysEd课程,10200000001,史密斯,亚瑟,
10200000011,帕特森,约翰,10200000017,希兰,威廉,1101,太太。
琼斯数学班,10100000001,琼斯,芭芭拉,,,,10100000015,
克鲁斯(Alex)克鲁斯(1101),琼斯太太的数学班,10100000001,琼斯,
芭芭拉(Barbara),“ 10100000014”,加西亚(Garcia),莉兹(Lizzie),1101,琼斯夫人的数学
班级10100000001,琼斯,芭芭拉,,,10100000013,梅尔卡多,托比,
1101,琼斯夫人的数学班,10100000001,琼斯,芭芭拉,,,,
10100000011,古铁雷斯,金伯利,2101,琼斯夫人的数学班,
10100000001,Jones,Barbara,,,,10100000010,Gil,Michael,2


我期望在XML文件中获得如下输出:





<grade id="1">
    <classroom id="101" name="Mrs. Jones' Math Class">
        <teacher id="10100000001" first_name="Barbara" last_name="Jones"/>

        <student id="10100000010" first_name="Michael" last_name="Gil"/>
        <student id="10100000011" first_name="Kimberly" last_name="Gutierrez"/>
        <student id="10100000013" first_name="Toby" last_name="Mercado"/>
        <student id="10100000014" first_name="Lizzie" last_name="Garcia"/>
        <student id="10100000015" first_name="Alex" last_name="Cruz"/>
    </classroom>


    <classroom id="102" name="Mr. Smith's PhysEd Class">
        <teacher id="10200000001" first_name="Arthur" last_name="Smith"/>
        <teacher id="10200000011" first_name="John" last_name="Patterson"/>

        <student id="10200000010" first_name="Nathaniel" last_name="Smith"/>
        <student id="10200000011" first_name="Brandon" last_name="McCrancy"/>
        <student id="10200000012" first_name="Elizabeth" last_name="Marco"/>
        <student id="10200000013" first_name="Erica" last_name="Lanni"/>
        <student id="10200000014" first_name="Michael" last_name="Flores"/>
        <student id="10200000015" first_name="Jasmin" last_name="Hill"/>
        <student id="10200000016" first_name="Brittany" last_name="Perez"/>
        <student id="10200000017" first_name="William" last_name="Hiram"/>
        <student id="10200000018" first_name="Alexis" last_name="Reginald"/>
        <student id="10200000019" first_name="Matthew" last_name="Gayle"/>
    </classroom>

    <classroom id="103" name="Brian's Homeroom">
        <teacher id="10300000001" first_name="Brian" last_name="O'Donnell"/>
    </classroom>
</grade>




这是我当前获取输出的方式:

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<school>
    <row>
        <classroom_id>101</classroom_id>
    </row>
    <row>
        <classroom_id>101</classroom_id>
    </row>
    <row>
        <classroom_id>101</classroom_id>
    </row>
    <row>
        <classroom_id>101</classroom_id>
    </row>
    <row>
        <classroom_id>101</classroom_id>
    </row>
    <row>
        <classroom_id>102</classroom_id>
    </row>
    <row>
        <classroom_id>102</classroom_id>
    </row>
    <row>
        <classroom_id>102</classroom_id>
    </row>
    <row>
        <classroom_id>102</classroom_id>
    </row>
    <row>
        <classroom_id>102</classroom_id>
    </row>
    <row>
        <classroom_id>102</classroom_id>
    </row>
    <row>
        <classroom_id>102</classroom_id>
    </row>
    <row>
        <classroom_id>102</classroom_id>
    </row>
    <row>
        <classroom_id>102</classroom_id>
    </row>
    <row>
        <classroom_id>102</classroom_id>
    </row>
    <row>
        <classroom_id>103</classroom_id>
    </row>
</school>


所以有人可以帮我吗?我想知道我要去哪里错了。谢谢

附言我已经在stackoverflow上提到了有关CSV到XML转换的其他问题。但是,我无法找到适合我特定问题的合适解决方案或解释。

P.S.S.如果不是强制将此类CSV数据解析为XML,请不要建议我使用XSLT。如果没有其他选择,那么我将不得不学习XSLT,因为我对XSLT的了解很少。如果您建议更改我已经编写的代码,将不胜感激。

最佳答案

您的CSV内容似乎没有换行符。

09-10 03:11