376 lines
11 KiB
C++
376 lines
11 KiB
C++
/*
|
|
*
|
|
* Copyright (c) 1998-2002
|
|
* John Maddock
|
|
*
|
|
* Use, modification and distribution are subject to the
|
|
* Boost Software License, Version 1.0. (See accompanying file
|
|
* LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
|
*
|
|
*/
|
|
|
|
#ifdef _MSC_VER
|
|
#pragma warning(disable: 4996 4127)
|
|
#endif
|
|
|
|
#include <boost/config.hpp>
|
|
#include <boost/regex.hpp>
|
|
#include <boost/cregex.hpp>
|
|
#include <boost/timer.hpp>
|
|
#include <boost/smart_ptr.hpp>
|
|
|
|
#include <string>
|
|
#include <algorithm>
|
|
#include <deque>
|
|
#include <iterator>
|
|
|
|
#ifdef BOOST_RE_OLD_IOSTREAM
|
|
#include <iostream.h>
|
|
#include <fstream.h>
|
|
#else
|
|
#include <iostream>
|
|
#include <fstream>
|
|
using std::cout;
|
|
using std::cin;
|
|
using std::cerr;
|
|
using std::istream;
|
|
using std::ostream;
|
|
using std::endl;
|
|
using std::ifstream;
|
|
using std::streambuf;
|
|
using std::getline;
|
|
#endif
|
|
|
|
#if defined(_WIN32) && defined(BOOST_REGEX_USE_WIN32_LOCALE)
|
|
#include <windows.h>
|
|
#endif
|
|
|
|
#if (defined(_MSC_VER) && (_MSC_VER <= 1300)) || defined(__sgi)
|
|
// maybe no Koenig lookup, use using declaration instead:
|
|
using namespace boost;
|
|
#endif
|
|
|
|
#ifndef BOOST_NO_WREGEX
|
|
ostream& operator << (ostream& os, const std::wstring& s)
|
|
{
|
|
std::wstring::const_iterator i, j;
|
|
i = s.begin();
|
|
j = s.end();
|
|
while(i != j)
|
|
{
|
|
os.put(static_cast<char>(*i));
|
|
++i;
|
|
}
|
|
return os;
|
|
}
|
|
#endif
|
|
|
|
template <class S>
|
|
class string_out_iterator
|
|
{
|
|
public:
|
|
typedef std::output_iterator_tag iterator_category;
|
|
typedef void value_type;
|
|
typedef void difference_type;
|
|
typedef void pointer;
|
|
typedef void reference;
|
|
private:
|
|
S* out;
|
|
public:
|
|
string_out_iterator(S& s) : out(&s) {}
|
|
string_out_iterator& operator++() { return *this; }
|
|
string_out_iterator& operator++(int) { return *this; }
|
|
string_out_iterator& operator*() { return *this; }
|
|
string_out_iterator& operator=(typename S::value_type v)
|
|
{
|
|
out->append(1, v);
|
|
return *this;
|
|
}
|
|
};
|
|
|
|
namespace boost{
|
|
#if defined(BOOST_MSVC) || (defined(BOOST_BORLANDC) && (BOOST_BORLANDC == 0x550)) || defined(__SGI_STL_PORT)
|
|
//
|
|
// problem with std::getline under MSVC6sp3
|
|
// and C++ Builder 5.5, is this really that hard?
|
|
istream& getline(istream& is, std::string& s)
|
|
{
|
|
s.erase();
|
|
char c = static_cast<char>(is.get());
|
|
while(c != '\n')
|
|
{
|
|
BOOST_ASSERT(is.good());
|
|
s.append(1, c);
|
|
c = static_cast<char>(is.get());
|
|
}
|
|
return is;
|
|
}
|
|
#else
|
|
istream& getline(istream& is, std::string& s)
|
|
{
|
|
std::getline(is, s);
|
|
if(s.size() && (s[s.size() -1] == '\r'))
|
|
s.erase(s.size() - 1);
|
|
return is;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
|
|
int main(int argc, char**argv)
|
|
{
|
|
ifstream ifs;
|
|
std::istream* p_in = &std::cin;
|
|
if(argc == 2)
|
|
{
|
|
ifs.open(argv[1]);
|
|
ifs.peek();
|
|
if(!ifs.good())
|
|
{
|
|
cout << "Bad filename: " << argv[1] << endl;
|
|
return -1;
|
|
}
|
|
p_in = &ifs;
|
|
}
|
|
|
|
boost::regex ex;
|
|
boost::match_results<std::string::const_iterator> sm;
|
|
#ifndef BOOST_NO_WREGEX
|
|
std::wstring ws1, ws2;
|
|
boost::wregex wex;
|
|
boost::match_results<std::wstring::const_iterator> wsm;
|
|
#endif
|
|
boost::match_results<std::deque<char>::iterator> dm;
|
|
std::string s1, s2, ts;
|
|
std::deque<char> ds;
|
|
boost::regex_tA r;
|
|
boost::scoped_array<boost::regmatch_t> matches;
|
|
std::size_t nsubs;
|
|
boost::timer t;
|
|
double tim;
|
|
int result = 0;
|
|
unsigned iters = 100;
|
|
double wait_time = (std::min)(t.elapsed_min() * 1000, 0.5);
|
|
|
|
while(true)
|
|
{
|
|
cout << "Enter expression (or \"quit\" to exit): ";
|
|
boost::getline(*p_in, s1);
|
|
if(argc == 2)
|
|
cout << endl << s1 << endl;
|
|
if(s1 == "quit")
|
|
break;
|
|
#ifndef BOOST_NO_WREGEX
|
|
ws1.erase();
|
|
std::copy(s1.begin(), s1.end(), string_out_iterator<std::wstring>(ws1));
|
|
#endif
|
|
try{
|
|
ex.assign(s1);
|
|
#ifndef BOOST_NO_WREGEX
|
|
wex.assign(ws1);
|
|
#endif
|
|
}
|
|
catch(std::exception& e)
|
|
{
|
|
cout << "Error in expression: \"" << e.what() << "\"" << endl;
|
|
continue;
|
|
}
|
|
int code = regcompA(&r, s1.c_str(), boost::REG_PERL);
|
|
if(code != 0)
|
|
{
|
|
char buf[256];
|
|
regerrorA(code, &r, buf, 256);
|
|
cout << "regcompA error: \"" << buf << "\"" << endl;
|
|
continue;
|
|
}
|
|
nsubs = r.re_nsub + 1;
|
|
matches.reset(new boost::regmatch_t[nsubs]);
|
|
|
|
while(true)
|
|
{
|
|
cout << "Enter string to search (or \"quit\" to exit): ";
|
|
boost::getline(*p_in, s2);
|
|
if(argc == 2)
|
|
cout << endl << s2 << endl;
|
|
if(s2 == "quit")
|
|
break;
|
|
|
|
#ifndef BOOST_NO_WREGEX
|
|
ws2.erase();
|
|
std::copy(s2.begin(), s2.end(), string_out_iterator<std::wstring>(ws2));
|
|
#endif
|
|
ds.erase(ds.begin(), ds.end());
|
|
std::copy(s2.begin(), s2.end(), std::back_inserter(ds));
|
|
|
|
unsigned i;
|
|
iters = 10;
|
|
tim = 1.1;
|
|
|
|
#if defined(_WIN32) && defined(BOOST_REGEX_USE_WIN32_LOCALE)
|
|
MSG msg;
|
|
PeekMessage(&msg, 0, 0, 0, 0);
|
|
Sleep(0);
|
|
#endif
|
|
|
|
// cache load:
|
|
regex_search(s2, sm, ex);
|
|
|
|
// measure time interval for basic_regex<char>
|
|
do{
|
|
iters *= static_cast<unsigned>((tim > 0.001) ? (1.1/tim) : 100);
|
|
t.restart();
|
|
for(i =0; i < iters; ++i)
|
|
{
|
|
result = regex_search(s2, sm, ex);
|
|
}
|
|
tim = t.elapsed();
|
|
}while(tim < wait_time);
|
|
|
|
cout << "regex time: " << (tim * 1000000 / iters) << "us" << endl;
|
|
if(result)
|
|
{
|
|
for(i = 0; i < sm.size(); ++i)
|
|
{
|
|
ts = sm[i];
|
|
cout << "\tmatch " << i << ": \"";
|
|
cout << ts;
|
|
cout << "\" (matched=" << sm[i].matched << ")" << endl;
|
|
}
|
|
cout << "\tmatch $`: \"";
|
|
cout << std::string(sm[-1]);
|
|
cout << "\" (matched=" << sm[-1].matched << ")" << endl;
|
|
cout << "\tmatch $': \"";
|
|
cout << std::string(sm[-2]);
|
|
cout << "\" (matched=" << sm[-2].matched << ")" << endl << endl;
|
|
}
|
|
|
|
#ifndef BOOST_NO_WREGEX
|
|
// measure time interval for boost::wregex
|
|
iters = 10;
|
|
tim = 1.1;
|
|
// cache load:
|
|
regex_search(ws2, wsm, wex);
|
|
do{
|
|
iters *= static_cast<unsigned>((tim > 0.001) ? (1.1/tim) : 100);
|
|
t.restart();
|
|
for(i = 0; i < iters; ++i)
|
|
{
|
|
result = regex_search(ws2, wsm, wex);
|
|
}
|
|
tim = t.elapsed();
|
|
}while(tim < wait_time);
|
|
cout << "wregex time: " << (tim * 1000000 / iters) << "us" << endl;
|
|
if(result)
|
|
{
|
|
std::wstring tw;
|
|
for(i = 0; i < wsm.size(); ++i)
|
|
{
|
|
tw.erase();
|
|
std::copy(wsm[i].first, wsm[i].second, string_out_iterator<std::wstring>(tw));
|
|
cout << "\tmatch " << i << ": \"" << tw;
|
|
cout << "\" (matched=" << sm[i].matched << ")" << endl;
|
|
}
|
|
cout << "\tmatch $`: \"";
|
|
tw.erase();
|
|
std::copy(wsm[-1].first, wsm[-1].second, string_out_iterator<std::wstring>(tw));
|
|
cout << tw;
|
|
cout << "\" (matched=" << sm[-1].matched << ")" << endl;
|
|
cout << "\tmatch $': \"";
|
|
tw.erase();
|
|
std::copy(wsm[-2].first, wsm[-2].second, string_out_iterator<std::wstring>(tw));
|
|
cout << tw;
|
|
cout << "\" (matched=" << sm[-2].matched << ")" << endl << endl;
|
|
}
|
|
#endif
|
|
|
|
// measure time interval for basic_regex<char> using a deque
|
|
iters = 10;
|
|
tim = 1.1;
|
|
// cache load:
|
|
regex_search(ds.begin(), ds.end(), dm, ex);
|
|
do{
|
|
iters *= static_cast<unsigned>((tim > 0.001) ? (1.1/tim) : 100);
|
|
t.restart();
|
|
for(i = 0; i < iters; ++i)
|
|
{
|
|
result = regex_search(ds.begin(), ds.end(), dm, ex);
|
|
}
|
|
tim = t.elapsed();
|
|
}while(tim < wait_time);
|
|
cout << "regex time (search over std::deque<char>): " << (tim * 1000000 / iters) << "us" << endl;
|
|
|
|
if(result)
|
|
{
|
|
for(i = 0; i < dm.size(); ++i)
|
|
{
|
|
ts.erase();
|
|
std::copy(dm[i].first, dm[i].second, string_out_iterator<std::string>(ts));
|
|
cout << "\tmatch " << i << ": \"" << ts;
|
|
cout << "\" (matched=" << sm[i].matched << ")" << endl;
|
|
}
|
|
cout << "\tmatch $`: \"";
|
|
ts.erase();
|
|
std::copy(dm[-1].first, dm[-1].second, string_out_iterator<std::string>(ts));
|
|
cout << ts;
|
|
cout << "\" (matched=" << sm[-1].matched << ")" << endl;
|
|
cout << "\tmatch $': \"";
|
|
ts.erase();
|
|
std::copy(dm[-2].first, dm[-2].second, string_out_iterator<std::string>(ts));
|
|
cout << ts;
|
|
cout << "\" (matched=" << sm[-2].matched << ")" << endl << endl;
|
|
}
|
|
|
|
// measure time interval for POSIX matcher:
|
|
iters = 10;
|
|
tim = 1.1;
|
|
// cache load:
|
|
regexecA(&r, s2.c_str(), nsubs, matches.get(), 0);
|
|
do{
|
|
iters *= static_cast<unsigned>((tim > 0.001) ? (1.1/tim) : 100);
|
|
t.restart();
|
|
for(i = 0; i < iters; ++i)
|
|
{
|
|
result = regexecA(&r, s2.c_str(), nsubs, matches.get(), 0);
|
|
}
|
|
tim = t.elapsed();
|
|
}while(tim < wait_time);
|
|
cout << "POSIX regexecA time: " << (tim * 1000000 / iters) << "us" << endl;
|
|
|
|
if(result == 0)
|
|
{
|
|
for(i = 0; i < nsubs; ++i)
|
|
{
|
|
if(matches[i].rm_so >= 0)
|
|
{
|
|
ts.assign(s2.begin() + matches[i].rm_so, s2.begin() + matches[i].rm_eo);
|
|
cout << "\tmatch " << i << ": \"" << ts << "\" (matched=" << (matches[i].rm_so != -1) << ")"<< endl;
|
|
}
|
|
else
|
|
cout << "\tmatch " << i << ": \"\" (matched=" << (matches[i].rm_so != -1) << ")" << endl; // no match
|
|
}
|
|
cout << "\tmatch $`: \"";
|
|
ts.erase();
|
|
ts.assign(s2.begin(), s2.begin() + matches[0].rm_so);
|
|
cout << ts;
|
|
cout << "\" (matched=" << (matches[0].rm_so != 0) << ")" << endl;
|
|
cout << "\tmatch $': \"";
|
|
ts.erase();
|
|
ts.assign(s2.begin() + matches[0].rm_eo, s2.end());
|
|
cout << ts;
|
|
cout << "\" (matched=" << (matches[0].rm_eo != (int)s2.size()) << ")" << endl << endl;
|
|
}
|
|
}
|
|
regfreeA(&r);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
#if defined(_WIN32) && defined(BOOST_REGEX_USE_WIN32_LOCALE) && !defined(UNDER_CE)
|
|
#if !defined(BOOST_EMBTC)
|
|
#pragma comment(lib, "user32.lib")
|
|
#else
|
|
#pragma comment(lib, "user32.a")
|
|
#endif
|
|
#endif
|