有没有一种XML解析器包装器库可以允许在配置或运行时切换实际的XML解析器引擎,而不是强制我在libxml2,expat或Xalan-C++之间进行选择?

最佳答案

我前一段时间写了类似的东西:

struct xerces;
struct msxml;
struct rapid;
struct tiny;
struct pugixml;

template <typename T> struct platform_manager;
template <typename T> double parse_file(std::string const& f, QueryPerfCounter& qpc);

template<class T>
void demo(std::string const& f, size_t N = 10) {
    platform_manager<T> pm;
    QueryPerfCounter qpc;
    std::vector<double> timing_data;
    timing_data.reserve(N);
    std::generate_n(std::back_inserter(timing_data), N, std::tr1::bind(&parse_file<typename T>, f, qpc));
    adobe::Statistics<double> s(timing_data.begin(), timing_data.end());
    std::cout << "Iteration count: " << s.count() << " Mean time: " << s.mean() << "s. Variance: " << s.variance() << "s.\n";
}
/***************************************************************/
template <>
struct platform_manager<msxml> {
    platform_manager() {
        if (FAILED(CoInitialize(NULL)))
            throw std::runtime_error("CoCreateInstance failed");
    }

    ~platform_manager() {
        CoUninitialize();
    }
};

template<>
double parse_file<msxml>(std::string const& f, QueryPerfCounter& qpc) {
    CComPtr<IXMLDOMDocument> pXMLDom;
    HRESULT hr = CoCreateInstance(__uuidof(MSXML2::DOMDocument60), NULL, CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&pXMLDom));
    CComPtr<IXMLDOMParseError> pXMLErr;
    VARIANT_BOOL varStatus;
    qpc.Start();
    if (FAILED(pXMLDom->load(CComVariant(f.c_str()), &varStatus)))
        std::cout << "Parsing failed" << std::endl;
    qpc.Stop();
    return qpc.Duration(QueryPerfCounter::seconds);
}

/***************************************************************/
#include <xercesc/parsers/XercesDOMParser.hpp>
#include <xercesc/dom/DOM.hpp>
#include <xercesc/sax/HandlerBase.hpp>
#include <xercesc/util/XMLString.hpp>
#include <xercesc/util/PlatformUtils.hpp>

#ifdef XERCES_CPP_NAMESPACE_USE
XERCES_CPP_NAMESPACE_USE
#endif

template <>
struct platform_manager<xerces> {
    platform_manager() try {
        XMLPlatformUtils::Initialize();
    } catch (const XMLException& toCatch) {
        char* message = XMLString::transcode(toCatch.getMessage());
        std::cout << "Failed to init: " << XMLString::transcode(message) << std::endl;
        XMLString::release(&message);
    }

    ~platform_manager() {
        XMLPlatformUtils::Terminate();
    }
};

template<>
double parse_file<xerces>(std::string const& f, QueryPerfCounter& qpc) {
    double duration = 0;
    std::tr1::shared_ptr<XercesDOMParser> parser(new XercesDOMParser());
    parser->setValidationScheme(XercesDOMParser::Val_Always);
    parser->setDoNamespaces(true);    // optional

    std::tr1::shared_ptr<ErrorHandler> errHandler(new HandlerBase());
    parser->setErrorHandler(errHandler.get());

    try {
        qpc.Start();
        parser->parse(f.c_str());
        qpc.Stop();
        duration = qpc.Duration(QueryPerfCounter::seconds);
    }
    catch (const XMLException& toCatch) {
        char* message = XMLString::transcode(toCatch.getMessage());
        std::cout << "Exception message is: \n"
            << message << "\n";
        XMLString::release(&message);
    }
    catch (const DOMException& toCatch) {
        char* message = XMLString::transcode(toCatch.msg);
        std::cout << "Exception message is: \n"
            << message << "\n";
        XMLString::release(&message);
    }
    catch (...) {
        std::cout << "Unexpected Exception \n" ;
    }
    return duration;
}

/***************************************************************/
#include "rapidxml.hpp"
#include <vector>
#include <fstream>
#include <iterator>

template <>
struct platform_manager<rapid> {};

enum size_hint { B = 1, KB = 1024, MB = 1024 * 1024 };

double file_size(std::ifstream& f, size_hint factor = MB) {
    f.seekg (0, std::ios::end);
    size_t length = f.tellg();
    f.seekg (0, std::ios::beg);
    return double(length) / factor;
}

template<>
double parse_file<rapid>(std::string const& f, QueryPerfCounter& qpc) {
    double duration = 0;
    rapidxml::xml_document<> doc;
    try {
        qpc.Start();
        std::ifstream myfile(f.c_str());
        myfile.seekg (0, std::ios::end);
        size_t length = myfile.tellg();
        myfile.seekg (0, std::ios::beg);
        std::vector<char> buffer(length);
        myfile.read(& buffer[0], length);
        //buffer.reserve(length);
        //buffer.insert(std::istreambuf_iterator<char>(myfile)), std::istreambuf_iterator<char>( ));
        //std::copy(std::istreambuf_iterator<char>(myfile), std::istreambuf_iterator<char>( ), std::back_insert_iterator(buffer));
        buffer.push_back('\0');
        qpc.Stop();
        duration += qpc.Duration(QueryPerfCounter::seconds);
        //std::cout << "Buffer load time: " << duration << "s" << std::endl;

        //QueryPerfCounter qpc;
        qpc.Start();
        doc.parse<rapidxml::parse_non_destructive>(&buffer[0]);
        qpc.Stop();
        duration += qpc.Duration(QueryPerfCounter::seconds);
    } catch (rapidxml::parse_error const& e) {
        std::cout << e.what() << std::endl;
    } catch (std::exception const& e) {
        std::cout << e.what() << std::endl;
    }
    return duration;
}
/***************************************************************/
template <>
struct platform_manager<tiny> {};

template<>
double parse_file<tiny>(std::string const& f, QueryPerfCounter& qpc) {
    tinyxml2::XMLDocument doc;
    qpc.Start();
    doc.LoadFile(f.c_str());
    doc.PrintError(); // emits nothing on success
    qpc.Stop();
    return qpc.Duration(QueryPerfCounter::seconds);
}
/***************************************************************/
struct banner_printer {
    banner_printer(std::string const& libname, std::string const& input) : lib(libname), in(input) {
        std::cout << "/*+------------------- BEGIN test for " << lib << " with file: " << in << " -------------------+*/" << std::endl;
    }
    ~banner_printer() {
        std::cout << "/*+------------------- END test for " << lib << " with file: " << in << " -------------------+*/" << std::endl;
    }
private:
    std::string lib, in;
};
/***************************************************************/
#include "pugixml.hpp"

template <>
struct platform_manager<pugixml> {};

template<>
double parse_file<pugixml>(std::string const& f, QueryPerfCounter& qpc) {
    pugi::xml_document doc;
    qpc.Start();
    pugi::xml_parse_result result = doc.load_file(f.c_str());
    qpc.Stop();
    if (!result) {
        std::cout << "XML [" << f << "] parsed with errors, attr value: [" << doc.child("node").attribute("attr").value() << "]\n";
        std::cout << "Error description: " << result.description() << "\n";
        std::cout << "Error offset: " << result.offset << " (error at offset [..." << (result.offset) << "]\n\n";
    }
    return qpc.Duration(QueryPerfCounter::seconds);
}
/***************************************************************/

int main() {
    std::vector<std::string> v = parse_catalog("D:/Work/xml_parsers/perfcompare/benchmark/catalog.txt");
    std::for_each(v.begin(), v.end(), [](std::string const& s) {
    {
        std::ifstream f(s);
        std::cout << "Input file name: " << s << " size: " << file_size(f) << "MB\n\n";
    }
    {
        banner_printer b("xerces", s);
        demo<xerces>(s);
    }
    {
        banner_printer b("rapid", s);
        demo<rapid>(s);
    }
    {
        banner_printer b("tiny", s);
        demo<tiny>(s);
    }
    {
        banner_printer b("pugi", s);
        demo<pugixml>(s);
    }
    {
        banner_printer b("MSXML6", s);
        demo<msxml>(s);
    }
    }
    );
    //expat_demo(argc, argv);
    return 0;
}

它可能会或可能不会帮助您入门。我已跳过 header 包含和其他一些琐事。我试图保持界面简单和相同。这意味着某些库需要附加的助手功能。

09-25 21:40