有没有一种XML解析器包装器库可以允许在配置或运行时切换实际的XML解析器引擎,而不是强制我在libxml2,expat或Xalan-C++之间进行选择?
最佳答案
我前一段时间写了类似的东西:
struct xerces;
struct msxml;
struct rapid;
struct tiny;
struct pugixml;
template <typename T> struct platform_manager;
template <typename T> double parse_file(std::string const& f, QueryPerfCounter& qpc);
template<class T>
void demo(std::string const& f, size_t N = 10) {
platform_manager<T> pm;
QueryPerfCounter qpc;
std::vector<double> timing_data;
timing_data.reserve(N);
std::generate_n(std::back_inserter(timing_data), N, std::tr1::bind(&parse_file<typename T>, f, qpc));
adobe::Statistics<double> s(timing_data.begin(), timing_data.end());
std::cout << "Iteration count: " << s.count() << " Mean time: " << s.mean() << "s. Variance: " << s.variance() << "s.\n";
}
/***************************************************************/
template <>
struct platform_manager<msxml> {
platform_manager() {
if (FAILED(CoInitialize(NULL)))
throw std::runtime_error("CoCreateInstance failed");
}
~platform_manager() {
CoUninitialize();
}
};
template<>
double parse_file<msxml>(std::string const& f, QueryPerfCounter& qpc) {
CComPtr<IXMLDOMDocument> pXMLDom;
HRESULT hr = CoCreateInstance(__uuidof(MSXML2::DOMDocument60), NULL, CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&pXMLDom));
CComPtr<IXMLDOMParseError> pXMLErr;
VARIANT_BOOL varStatus;
qpc.Start();
if (FAILED(pXMLDom->load(CComVariant(f.c_str()), &varStatus)))
std::cout << "Parsing failed" << std::endl;
qpc.Stop();
return qpc.Duration(QueryPerfCounter::seconds);
}
/***************************************************************/
#include <xercesc/parsers/XercesDOMParser.hpp>
#include <xercesc/dom/DOM.hpp>
#include <xercesc/sax/HandlerBase.hpp>
#include <xercesc/util/XMLString.hpp>
#include <xercesc/util/PlatformUtils.hpp>
#ifdef XERCES_CPP_NAMESPACE_USE
XERCES_CPP_NAMESPACE_USE
#endif
template <>
struct platform_manager<xerces> {
platform_manager() try {
XMLPlatformUtils::Initialize();
} catch (const XMLException& toCatch) {
char* message = XMLString::transcode(toCatch.getMessage());
std::cout << "Failed to init: " << XMLString::transcode(message) << std::endl;
XMLString::release(&message);
}
~platform_manager() {
XMLPlatformUtils::Terminate();
}
};
template<>
double parse_file<xerces>(std::string const& f, QueryPerfCounter& qpc) {
double duration = 0;
std::tr1::shared_ptr<XercesDOMParser> parser(new XercesDOMParser());
parser->setValidationScheme(XercesDOMParser::Val_Always);
parser->setDoNamespaces(true); // optional
std::tr1::shared_ptr<ErrorHandler> errHandler(new HandlerBase());
parser->setErrorHandler(errHandler.get());
try {
qpc.Start();
parser->parse(f.c_str());
qpc.Stop();
duration = qpc.Duration(QueryPerfCounter::seconds);
}
catch (const XMLException& toCatch) {
char* message = XMLString::transcode(toCatch.getMessage());
std::cout << "Exception message is: \n"
<< message << "\n";
XMLString::release(&message);
}
catch (const DOMException& toCatch) {
char* message = XMLString::transcode(toCatch.msg);
std::cout << "Exception message is: \n"
<< message << "\n";
XMLString::release(&message);
}
catch (...) {
std::cout << "Unexpected Exception \n" ;
}
return duration;
}
/***************************************************************/
#include "rapidxml.hpp"
#include <vector>
#include <fstream>
#include <iterator>
template <>
struct platform_manager<rapid> {};
enum size_hint { B = 1, KB = 1024, MB = 1024 * 1024 };
double file_size(std::ifstream& f, size_hint factor = MB) {
f.seekg (0, std::ios::end);
size_t length = f.tellg();
f.seekg (0, std::ios::beg);
return double(length) / factor;
}
template<>
double parse_file<rapid>(std::string const& f, QueryPerfCounter& qpc) {
double duration = 0;
rapidxml::xml_document<> doc;
try {
qpc.Start();
std::ifstream myfile(f.c_str());
myfile.seekg (0, std::ios::end);
size_t length = myfile.tellg();
myfile.seekg (0, std::ios::beg);
std::vector<char> buffer(length);
myfile.read(& buffer[0], length);
//buffer.reserve(length);
//buffer.insert(std::istreambuf_iterator<char>(myfile)), std::istreambuf_iterator<char>( ));
//std::copy(std::istreambuf_iterator<char>(myfile), std::istreambuf_iterator<char>( ), std::back_insert_iterator(buffer));
buffer.push_back('\0');
qpc.Stop();
duration += qpc.Duration(QueryPerfCounter::seconds);
//std::cout << "Buffer load time: " << duration << "s" << std::endl;
//QueryPerfCounter qpc;
qpc.Start();
doc.parse<rapidxml::parse_non_destructive>(&buffer[0]);
qpc.Stop();
duration += qpc.Duration(QueryPerfCounter::seconds);
} catch (rapidxml::parse_error const& e) {
std::cout << e.what() << std::endl;
} catch (std::exception const& e) {
std::cout << e.what() << std::endl;
}
return duration;
}
/***************************************************************/
template <>
struct platform_manager<tiny> {};
template<>
double parse_file<tiny>(std::string const& f, QueryPerfCounter& qpc) {
tinyxml2::XMLDocument doc;
qpc.Start();
doc.LoadFile(f.c_str());
doc.PrintError(); // emits nothing on success
qpc.Stop();
return qpc.Duration(QueryPerfCounter::seconds);
}
/***************************************************************/
struct banner_printer {
banner_printer(std::string const& libname, std::string const& input) : lib(libname), in(input) {
std::cout << "/*+------------------- BEGIN test for " << lib << " with file: " << in << " -------------------+*/" << std::endl;
}
~banner_printer() {
std::cout << "/*+------------------- END test for " << lib << " with file: " << in << " -------------------+*/" << std::endl;
}
private:
std::string lib, in;
};
/***************************************************************/
#include "pugixml.hpp"
template <>
struct platform_manager<pugixml> {};
template<>
double parse_file<pugixml>(std::string const& f, QueryPerfCounter& qpc) {
pugi::xml_document doc;
qpc.Start();
pugi::xml_parse_result result = doc.load_file(f.c_str());
qpc.Stop();
if (!result) {
std::cout << "XML [" << f << "] parsed with errors, attr value: [" << doc.child("node").attribute("attr").value() << "]\n";
std::cout << "Error description: " << result.description() << "\n";
std::cout << "Error offset: " << result.offset << " (error at offset [..." << (result.offset) << "]\n\n";
}
return qpc.Duration(QueryPerfCounter::seconds);
}
/***************************************************************/
int main() {
std::vector<std::string> v = parse_catalog("D:/Work/xml_parsers/perfcompare/benchmark/catalog.txt");
std::for_each(v.begin(), v.end(), [](std::string const& s) {
{
std::ifstream f(s);
std::cout << "Input file name: " << s << " size: " << file_size(f) << "MB\n\n";
}
{
banner_printer b("xerces", s);
demo<xerces>(s);
}
{
banner_printer b("rapid", s);
demo<rapid>(s);
}
{
banner_printer b("tiny", s);
demo<tiny>(s);
}
{
banner_printer b("pugi", s);
demo<pugixml>(s);
}
{
banner_printer b("MSXML6", s);
demo<msxml>(s);
}
}
);
//expat_demo(argc, argv);
return 0;
}
它可能会或可能不会帮助您入门。我已跳过 header 包含和其他一些琐事。我试图保持界面简单和相同。这意味着某些库需要附加的助手功能。