StringTokenizer trailing empty tokens consistency

Modified StringTokenizer to eith include or exclude all empty tokens (including trailing ones based on TOK_IGNORE_EMPTY option.
2025-11-26 00:40:05 +01:00 · 2012-11-07 00:30:01 -06:00
parent 15d27daea6
commit 67a27ac2fa
4 changed files with 266 additions and 243 deletions
--- a/Foundation/src/StringTokenizer.cpp
+++ b/Foundation/src/StringTokenizer.cpp
@@ -44,38 +44,35 @@ namespace Poco {

 StringTokenizer::StringTokenizer(const std::string& str, const std::string& separators, int options)
 {
-	std::string::const_iterator it1 = str.begin();
-	std::string::const_iterator it2;
-	std::string::const_iterator it3;
+	std::string::const_iterator it = str.begin();
 	std::string::const_iterator end = str.end();
-	
-	while (it1 != end)
+	std::string token;
+	bool doTrim = ((options & TOK_TRIM) != 0);
+	bool ignoreEmpty = ((options & TOK_IGNORE_EMPTY) != 0);
+	bool lastToken = false;
+
+	for (;it != end; ++it)
 	{
-		if (options & TOK_TRIM)
+		if (separators.find(*it) != std::string::npos) 
 		{
-			while (it1 != end && Ascii::isSpace(*it1)) ++it1;
-		}
-		it2 = it1;
-		while (it2 != end && separators.find(*it2) == std::string::npos) ++it2;
-		it3 = it2;
-		if (it3 != it1 && (options & TOK_TRIM))
-		{
-			--it3;
-			while (it3 != it1 && Ascii::isSpace(*it3)) --it3;
-			if (!Ascii::isSpace(*it3)) ++it3;
-		}
-		if (options & TOK_IGNORE_EMPTY)
-		{
-			if (it3 != it1)
-				_tokens.push_back(std::string(it1, it3));
+			if (doTrim) trim(token);
+			if (!token.empty() || !ignoreEmpty)_tokens.push_back(token);
+			if (!ignoreEmpty) lastToken = true;
+			token = "";
 		}
 		else
 		{
-			_tokens.push_back(std::string(it1, it3));
+			token += *it;
+			lastToken = false;
 		}
-		it1 = it2;
-		if (it1 != end) ++it1;
 	}
+
+	if (!token.empty())
+	{
+		if (doTrim) trim(token);
+		if (!token.empty()) _tokens.push_back(token);
+	}
+	else if (lastToken) _tokens.push_back("");
 }


@@ -84,6 +81,28 @@ StringTokenizer::~StringTokenizer()
 }


+void StringTokenizer::trim (std::string& token)
+{
+	int front = 0, back = 0, length = token.length();
+	std::string::const_iterator tIt = token.begin();
+	std::string::const_iterator tEnd = token.end();
+	for (; tIt != tEnd; ++tIt, ++front)
+	{
+		if (!Ascii::isSpace(*tIt)) break;
+	}
+	if (tIt != tEnd)
+	{
+		std::string::const_reverse_iterator tRit = token.rbegin();
+		std::string::const_reverse_iterator tRend = token.rend();
+		for (; tRit != tRend; ++tRit, ++back)
+		{
+			if (!Ascii::isSpace(*tRit)) break;
+		}
+	}
+	token = token.substr(front, length - back - front);
+}
+
+
 std::size_t StringTokenizer::count(const std::string& token) const
 {
 	std::size_t result = 0;