StringTokenizer trailing empty tokens consistency

Modified StringTokenizer to eith include or exclude all empty tokens
(including trailing ones based on TOK_IGNORE_EMPTY option.
This commit is contained in:
aleks-f
2012-11-07 00:30:01 -06:00
parent 15d27daea6
commit 67a27ac2fa
4 changed files with 266 additions and 243 deletions

View File

@@ -70,10 +70,6 @@ public:
/// Additionally, options can be specified:
/// * TOK_IGNORE_EMPTY: empty tokens are ignored
/// * TOK_TRIM: trailing and leading whitespace is removed from tokens.
/// An empty token at the end of str is always ignored. For example,
/// a StringTokenizer with the following arguments:
/// StringTokenizer(",ab,cd,", ",");
/// will produce three tokens, "", "ab" and "cd".
~StringTokenizer();
/// Destroys the tokenizer.
@@ -112,6 +108,8 @@ private:
StringTokenizer(const StringTokenizer&);
StringTokenizer& operator = (const StringTokenizer&);
void trim (std::string& token);
TokenVec _tokens;
};

View File

@@ -44,38 +44,35 @@ namespace Poco {
StringTokenizer::StringTokenizer(const std::string& str, const std::string& separators, int options)
{
std::string::const_iterator it1 = str.begin();
std::string::const_iterator it2;
std::string::const_iterator it3;
std::string::const_iterator it = str.begin();
std::string::const_iterator end = str.end();
std::string token;
bool doTrim = ((options & TOK_TRIM) != 0);
bool ignoreEmpty = ((options & TOK_IGNORE_EMPTY) != 0);
bool lastToken = false;
while (it1 != end)
for (;it != end; ++it)
{
if (options & TOK_TRIM)
if (separators.find(*it) != std::string::npos)
{
while (it1 != end && Ascii::isSpace(*it1)) ++it1;
}
it2 = it1;
while (it2 != end && separators.find(*it2) == std::string::npos) ++it2;
it3 = it2;
if (it3 != it1 && (options & TOK_TRIM))
{
--it3;
while (it3 != it1 && Ascii::isSpace(*it3)) --it3;
if (!Ascii::isSpace(*it3)) ++it3;
}
if (options & TOK_IGNORE_EMPTY)
{
if (it3 != it1)
_tokens.push_back(std::string(it1, it3));
if (doTrim) trim(token);
if (!token.empty() || !ignoreEmpty)_tokens.push_back(token);
if (!ignoreEmpty) lastToken = true;
token = "";
}
else
{
_tokens.push_back(std::string(it1, it3));
token += *it;
lastToken = false;
}
it1 = it2;
if (it1 != end) ++it1;
}
if (!token.empty())
{
if (doTrim) trim(token);
if (!token.empty()) _tokens.push_back(token);
}
else if (lastToken) _tokens.push_back("");
}
@@ -84,6 +81,28 @@ StringTokenizer::~StringTokenizer()
}
void StringTokenizer::trim (std::string& token)
{
int front = 0, back = 0, length = token.length();
std::string::const_iterator tIt = token.begin();
std::string::const_iterator tEnd = token.end();
for (; tIt != tEnd; ++tIt, ++front)
{
if (!Ascii::isSpace(*tIt)) break;
}
if (tIt != tEnd)
{
std::string::const_reverse_iterator tRit = token.rbegin();
std::string::const_reverse_iterator tRend = token.rend();
for (; tRit != tRend; ++tRit, ++back)
{
if (!Ascii::isSpace(*tRit)) break;
}
}
token = token.substr(front, length - back - front);
}
std::size_t StringTokenizer::count(const std::string& token) const
{
std::size_t result = 0;

View File

@@ -133,6 +133,8 @@ void StringTokenizerTest::testStringTokenizer()
assert (*it++ == "a");
assert (it != st.end());
assert (*it++ == "");
assert (it != st.end());
assert (*it++ == "");
assert (it == st.end());
}
{
@@ -144,6 +146,8 @@ void StringTokenizerTest::testStringTokenizer()
assert (*it++ == "a");
assert (it != st.end());
assert (*it++ == "");
assert (it != st.end());
assert (*it++ == "");
assert (it == st.end());
}
{
@@ -306,6 +310,8 @@ void StringTokenizerTest::testStringTokenizer()
assert (*it++ == "ab");
assert (it != st.end());
assert (*it++ == "cd");
assert (it != st.end());
assert (*it++ == "");
assert (it == st.end());
}
{

View File

@@ -2,7 +2,7 @@ Microsoft Visual Studio Solution File, Format Version 11.00
# Visual Studio 2010
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "JSON", "JSON_vs100.vcxproj", "{A63135B8-0DFC-428C-AF90-BC020BE83D0C}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "TestSuite", "testsuite\TestSuite_vs100.vcxproj", "{8DDEF92F-2475-410B-8292-1D67B4C6E4A6}"
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "TestSuite", "testsuite\TestSuite_vs100.vcxproj", "{96CF3103-E49E-3F5E-A11D-6DBCDA043053}"
ProjectSection(ProjectDependencies) = postProject
{A63135B8-0DFC-428C-AF90-BC020BE83D0C} = {A63135B8-0DFC-428C-AF90-BC020BE83D0C}
EndProjectSection
@@ -35,24 +35,24 @@ Global
{A63135B8-0DFC-428C-AF90-BC020BE83D0C}.release_static_mt|Win32.ActiveCfg = release_static_mt|Win32
{A63135B8-0DFC-428C-AF90-BC020BE83D0C}.release_static_mt|Win32.Build.0 = release_static_mt|Win32
{A63135B8-0DFC-428C-AF90-BC020BE83D0C}.release_static_mt|Win32.Deploy.0 = release_static_mt|Win32
{8DDEF92F-2475-410B-8292-1D67B4C6E4A6}.debug_shared|Win32.ActiveCfg = debug_shared|Win32
{8DDEF92F-2475-410B-8292-1D67B4C6E4A6}.debug_shared|Win32.Build.0 = debug_shared|Win32
{8DDEF92F-2475-410B-8292-1D67B4C6E4A6}.debug_shared|Win32.Deploy.0 = debug_shared|Win32
{8DDEF92F-2475-410B-8292-1D67B4C6E4A6}.debug_static_md|Win32.ActiveCfg = debug_static_md|Win32
{8DDEF92F-2475-410B-8292-1D67B4C6E4A6}.debug_static_md|Win32.Build.0 = debug_static_md|Win32
{8DDEF92F-2475-410B-8292-1D67B4C6E4A6}.debug_static_md|Win32.Deploy.0 = debug_static_md|Win32
{8DDEF92F-2475-410B-8292-1D67B4C6E4A6}.debug_static_mt|Win32.ActiveCfg = debug_static_mt|Win32
{8DDEF92F-2475-410B-8292-1D67B4C6E4A6}.debug_static_mt|Win32.Build.0 = debug_static_mt|Win32
{8DDEF92F-2475-410B-8292-1D67B4C6E4A6}.debug_static_mt|Win32.Deploy.0 = debug_static_mt|Win32
{8DDEF92F-2475-410B-8292-1D67B4C6E4A6}.release_shared|Win32.ActiveCfg = release_shared|Win32
{8DDEF92F-2475-410B-8292-1D67B4C6E4A6}.release_shared|Win32.Build.0 = release_shared|Win32
{8DDEF92F-2475-410B-8292-1D67B4C6E4A6}.release_shared|Win32.Deploy.0 = release_shared|Win32
{8DDEF92F-2475-410B-8292-1D67B4C6E4A6}.release_static_md|Win32.ActiveCfg = release_static_md|Win32
{8DDEF92F-2475-410B-8292-1D67B4C6E4A6}.release_static_md|Win32.Build.0 = release_static_md|Win32
{8DDEF92F-2475-410B-8292-1D67B4C6E4A6}.release_static_md|Win32.Deploy.0 = release_static_md|Win32
{8DDEF92F-2475-410B-8292-1D67B4C6E4A6}.release_static_mt|Win32.ActiveCfg = release_static_mt|Win32
{8DDEF92F-2475-410B-8292-1D67B4C6E4A6}.release_static_mt|Win32.Build.0 = release_static_mt|Win32
{8DDEF92F-2475-410B-8292-1D67B4C6E4A6}.release_static_mt|Win32.Deploy.0 = release_static_mt|Win32
{96CF3103-E49E-3F5E-A11D-6DBCDA043053}.debug_shared|Win32.ActiveCfg = debug_shared|Win32
{96CF3103-E49E-3F5E-A11D-6DBCDA043053}.debug_shared|Win32.Build.0 = debug_shared|Win32
{96CF3103-E49E-3F5E-A11D-6DBCDA043053}.debug_shared|Win32.Deploy.0 = debug_shared|Win32
{96CF3103-E49E-3F5E-A11D-6DBCDA043053}.debug_static_md|Win32.ActiveCfg = debug_static_md|Win32
{96CF3103-E49E-3F5E-A11D-6DBCDA043053}.debug_static_md|Win32.Build.0 = debug_static_md|Win32
{96CF3103-E49E-3F5E-A11D-6DBCDA043053}.debug_static_md|Win32.Deploy.0 = debug_static_md|Win32
{96CF3103-E49E-3F5E-A11D-6DBCDA043053}.debug_static_mt|Win32.ActiveCfg = debug_static_mt|Win32
{96CF3103-E49E-3F5E-A11D-6DBCDA043053}.debug_static_mt|Win32.Build.0 = debug_static_mt|Win32
{96CF3103-E49E-3F5E-A11D-6DBCDA043053}.debug_static_mt|Win32.Deploy.0 = debug_static_mt|Win32
{96CF3103-E49E-3F5E-A11D-6DBCDA043053}.release_shared|Win32.ActiveCfg = release_shared|Win32
{96CF3103-E49E-3F5E-A11D-6DBCDA043053}.release_shared|Win32.Build.0 = release_shared|Win32
{96CF3103-E49E-3F5E-A11D-6DBCDA043053}.release_shared|Win32.Deploy.0 = release_shared|Win32
{96CF3103-E49E-3F5E-A11D-6DBCDA043053}.release_static_md|Win32.ActiveCfg = release_static_md|Win32
{96CF3103-E49E-3F5E-A11D-6DBCDA043053}.release_static_md|Win32.Build.0 = release_static_md|Win32
{96CF3103-E49E-3F5E-A11D-6DBCDA043053}.release_static_md|Win32.Deploy.0 = release_static_md|Win32
{96CF3103-E49E-3F5E-A11D-6DBCDA043053}.release_static_mt|Win32.ActiveCfg = release_static_mt|Win32
{96CF3103-E49E-3F5E-A11D-6DBCDA043053}.release_static_mt|Win32.Build.0 = release_static_mt|Win32
{96CF3103-E49E-3F5E-A11D-6DBCDA043053}.release_static_mt|Win32.Deploy.0 = release_static_mt|Win32
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE