mirror of
				https://github.com/pocoproject/poco.git
				synced 2025-10-26 10:32:56 +01:00 
			
		
		
		
	Glob now uses UTF-8 strings and support caseless comparison
This commit is contained in:
		| @@ -1,7 +1,7 @@ | |||||||
| // | // | ||||||
| // Glob.h | // Glob.h | ||||||
| // | // | ||||||
| // $Id: //poco/svn/Foundation/include/Poco/Glob.h#2 $ | // $Id: //poco/1.3/Foundation/include/Poco/Glob.h#3 $ | ||||||
| // | // | ||||||
| // Library: Foundation | // Library: Foundation | ||||||
| // Package: Filesystem | // Package: Filesystem | ||||||
| @@ -9,7 +9,7 @@ | |||||||
| // | // | ||||||
| // Definition of the Glob class. | // Definition of the Glob class. | ||||||
| // | // | ||||||
| // Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. | // Copyright (c) 2004-2009, Applied Informatics Software Engineering GmbH. | ||||||
| // and Contributors. | // and Contributors. | ||||||
| // | // | ||||||
| // Permission is hereby granted, free of charge, to any person or organization | // Permission is hereby granted, free of charge, to any person or organization | ||||||
| @@ -41,6 +41,7 @@ | |||||||
|  |  | ||||||
|  |  | ||||||
| #include "Poco/Foundation.h" | #include "Poco/Foundation.h" | ||||||
|  | #include "Poco/TextIterator.h" | ||||||
| #include <set> | #include <set> | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -67,7 +68,7 @@ class Foundation_API Glob | |||||||
| 	/// To suppress the special syntactic significance of any of '[]*?!-\', | 	/// To suppress the special syntactic significance of any of '[]*?!-\', | ||||||
| 	/// and match the character exactly, precede it with a backslash. | 	/// and match the character exactly, precede it with a backslash. | ||||||
| 	/// | 	/// | ||||||
| 	/// UTF-8 encoded strings are not supported. | 	/// All strings are assumed to be UTF-8 encoded. | ||||||
| { | { | ||||||
| public: | public: | ||||||
| 	enum Options | 	enum Options | ||||||
| @@ -76,6 +77,7 @@ public: | |||||||
| 		GLOB_DEFAULT         = 0x00, /// default behavior | 		GLOB_DEFAULT         = 0x00, /// default behavior | ||||||
| 		GLOB_DOT_SPECIAL     = 0x01, /// '*' and '?' do not match '.' at beginning of subject | 		GLOB_DOT_SPECIAL     = 0x01, /// '*' and '?' do not match '.' at beginning of subject | ||||||
| 		GLOB_FOLLOW_SYMLINKS = 0x02, /// follow symbolic links | 		GLOB_FOLLOW_SYMLINKS = 0x02, /// follow symbolic links | ||||||
|  | 		GLOB_CASELESS        = 0x04, /// ignore case when comparing characters | ||||||
| 		GLOB_DIRS_ONLY       = 0x80  /// only glob for directories (for internal use only) | 		GLOB_DIRS_ONLY       = 0x80  /// only glob for directories (for internal use only) | ||||||
| 	}; | 	}; | ||||||
| 	 | 	 | ||||||
| @@ -138,9 +140,9 @@ public: | |||||||
| 		/// ignored. | 		/// ignored. | ||||||
|  |  | ||||||
| protected: | protected: | ||||||
| 	bool match(std::string::const_iterator& itp, const std::string::const_iterator& endp, std::string::const_iterator& its, const std::string::const_iterator& ends); | 	bool match(TextIterator& itp, const TextIterator& endp, TextIterator& its, const TextIterator& ends); | ||||||
| 	bool matchAfterAsterisk(std::string::const_iterator itp, const std::string::const_iterator& endp, std::string::const_iterator its, const std::string::const_iterator& ends); | 	bool matchAfterAsterisk(TextIterator itp, const TextIterator& endp, TextIterator its, const TextIterator& ends); | ||||||
| 	bool matchSet(std::string::const_iterator& itp, const std::string::const_iterator& endp, char c); | 	bool matchSet(TextIterator& itp, const TextIterator& endp, int c); | ||||||
| 	static void collect(const Path& pathPattern, const Path& base, const Path& current, const std::string& pattern, std::set<std::string>& files, int options); | 	static void collect(const Path& pathPattern, const Path& base, const Path& current, const std::string& pattern, std::set<std::string>& files, int options); | ||||||
| 	static bool isDirectory(const Path& path, bool followSymlink); | 	static bool isDirectory(const Path& path, bool followSymlink); | ||||||
| 	 | 	 | ||||||
|   | |||||||
| @@ -1,7 +1,7 @@ | |||||||
| // | // | ||||||
| // Glob.cpp | // Glob.cpp | ||||||
| // | // | ||||||
| // $Id: //poco/svn/Foundation/src/Glob.cpp#2 $ | // $Id: //poco/1.3/Foundation/src/Glob.cpp#3 $ | ||||||
| // | // | ||||||
| // Library: Foundation | // Library: Foundation | ||||||
| // Package: Filesystem | // Package: Filesystem | ||||||
| @@ -39,6 +39,8 @@ | |||||||
| #include "Poco/Exception.h" | #include "Poco/Exception.h" | ||||||
| #include "Poco/DirectoryIterator.h" | #include "Poco/DirectoryIterator.h" | ||||||
| #include "Poco/File.h" | #include "Poco/File.h" | ||||||
|  | #include "Poco/UTF8Encoding.h" | ||||||
|  | #include "Poco/Unicode.h" | ||||||
|  |  | ||||||
|  |  | ||||||
| namespace Poco { | namespace Poco { | ||||||
| @@ -59,10 +61,11 @@ Glob::~Glob() | |||||||
|  |  | ||||||
| bool Glob::match(const std::string& subject) | bool Glob::match(const std::string& subject) | ||||||
| { | { | ||||||
| 	std::string::const_iterator itp  = _pattern.begin(); | 	UTF8Encoding utf8; | ||||||
| 	std::string::const_iterator endp = _pattern.end(); | 	TextIterator itp(_pattern, utf8); | ||||||
| 	std::string::const_iterator its  = subject.begin(); | 	TextIterator endp(_pattern); | ||||||
| 	std::string::const_iterator ends = subject.end(); | 	TextIterator its(subject, utf8); | ||||||
|  | 	TextIterator ends(subject); | ||||||
| 	 | 	 | ||||||
| 	if ((_options & GLOB_DOT_SPECIAL) && its != ends && *its == '.' && (*itp == '?' || *itp == '*')) | 	if ((_options & GLOB_DOT_SPECIAL) && its != ends && *its == '.' && (*itp == '?' || *itp == '*')) | ||||||
| 		return false; | 		return false; | ||||||
| @@ -100,7 +103,7 @@ void Glob::glob(const Path& pathPattern, std::set<std::string>& files, int optio | |||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
| bool Glob::match(std::string::const_iterator& itp, const std::string::const_iterator& endp, std::string::const_iterator& its, const std::string::const_iterator& ends) | bool Glob::match(TextIterator& itp, const TextIterator& endp, TextIterator& its, const TextIterator& ends) | ||||||
| { | { | ||||||
| 	while (itp != endp) | 	while (itp != endp) | ||||||
| 	{ | 	{ | ||||||
| @@ -138,7 +141,14 @@ bool Glob::match(std::string::const_iterator& itp, const std::string::const_iter | |||||||
| 			if (++itp == endp) throw SyntaxException("backslash must be followed by character in glob pattern"); | 			if (++itp == endp) throw SyntaxException("backslash must be followed by character in glob pattern"); | ||||||
| 			// fallthrough | 			// fallthrough | ||||||
| 		default: | 		default: | ||||||
| 			if (*itp != *its) return false; | 			if (_options & GLOB_CASELESS) | ||||||
|  | 			{ | ||||||
|  | 				if (Unicode::toLower(*itp) != Unicode::toLower(*its)) return false; | ||||||
|  | 			} | ||||||
|  | 			else | ||||||
|  | 			{ | ||||||
|  | 				if (*itp != *its) return false; | ||||||
|  | 			} | ||||||
| 			++itp; ++its; | 			++itp; ++its; | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| @@ -146,14 +156,17 @@ bool Glob::match(std::string::const_iterator& itp, const std::string::const_iter | |||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
| bool Glob::matchAfterAsterisk(std::string::const_iterator itp, const std::string::const_iterator& endp, std::string::const_iterator its, const std::string::const_iterator& ends) | bool Glob::matchAfterAsterisk(TextIterator itp, const TextIterator& endp, TextIterator its, const TextIterator& ends) | ||||||
| { | { | ||||||
| 	return match(itp, endp, its, ends); | 	return match(itp, endp, its, ends); | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
| bool Glob::matchSet(std::string::const_iterator& itp, const std::string::const_iterator& endp, char c) | bool Glob::matchSet(TextIterator& itp, const TextIterator& endp, int c) | ||||||
| { | { | ||||||
|  | 	if (_options & GLOB_CASELESS) | ||||||
|  | 		c = Unicode::toLower(c); | ||||||
|  |  | ||||||
| 	while (itp != endp) | 	while (itp != endp) | ||||||
| 	{ | 	{ | ||||||
| 		switch (*itp) | 		switch (*itp) | ||||||
| @@ -164,8 +177,8 @@ bool Glob::matchSet(std::string::const_iterator& itp, const std::string::const_i | |||||||
| 		case '\\': | 		case '\\': | ||||||
| 			if (++itp == endp) throw SyntaxException("backslash must be followed by character in glob pattern"); | 			if (++itp == endp) throw SyntaxException("backslash must be followed by character in glob pattern"); | ||||||
| 		} | 		} | ||||||
| 		char first = *itp; | 		int first = *itp; | ||||||
| 		char last  = first; | 		int last  = first; | ||||||
| 		if (++itp != endp && *itp == '-') | 		if (++itp != endp && *itp == '-') | ||||||
| 		{ | 		{ | ||||||
| 			if (++itp != endp) | 			if (++itp != endp) | ||||||
| @@ -173,6 +186,11 @@ bool Glob::matchSet(std::string::const_iterator& itp, const std::string::const_i | |||||||
| 			else | 			else | ||||||
| 				throw SyntaxException("bad range syntax in glob pattern"); | 				throw SyntaxException("bad range syntax in glob pattern"); | ||||||
| 		} | 		} | ||||||
|  | 		if (_options & GLOB_CASELESS) | ||||||
|  | 		{ | ||||||
|  | 			first = Unicode::toLower(first); | ||||||
|  | 			last  = Unicode::toLower(last); | ||||||
|  | 		} | ||||||
| 		if (first <= c && c <= last) | 		if (first <= c && c <= last) | ||||||
| 		{ | 		{ | ||||||
| 			while (itp != endp) | 			while (itp != endp) | ||||||
|   | |||||||
| @@ -1,7 +1,7 @@ | |||||||
| // | // | ||||||
| // GlobTest.cpp | // GlobTest.cpp | ||||||
| // | // | ||||||
| // $Id: //poco/svn/Foundation/testsuite/src/GlobTest.cpp#2 $ | // $Id: //poco/1.3/Foundation/testsuite/src/GlobTest.cpp#2 $ | ||||||
| // | // | ||||||
| // Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. | // Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. | ||||||
| // and Contributors. | // and Contributors. | ||||||
| @@ -331,6 +331,72 @@ void GlobTest::testMisc() | |||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | void GlobTest::testCaseless() | ||||||
|  | { | ||||||
|  | 	Glob g1("*.cpp", Glob::GLOB_CASELESS); | ||||||
|  | 	assert (g1.match("Glob.cpp")); | ||||||
|  | 	assert (!g1.match("Glob.h")); | ||||||
|  | 	assert (g1.match("Glob.CPP")); | ||||||
|  | 	assert (!g1.match("Glob.H")); | ||||||
|  | 	 | ||||||
|  | 	Glob g2("*.[hc]", Glob::GLOB_CASELESS); | ||||||
|  | 	assert (g2.match("foo.c")); | ||||||
|  | 	assert (g2.match("foo.h")); | ||||||
|  | 	assert (!g2.match("foo.i")); | ||||||
|  | 	assert (g2.match("foo.C")); | ||||||
|  | 	assert (g2.match("foo.H")); | ||||||
|  | 	assert (!g2.match("foo.I")); | ||||||
|  | 		 | ||||||
|  | 	Glob g4("File*.?pp", Glob::GLOB_CASELESS); | ||||||
|  | 	assert (g4.match("file.hpp")); | ||||||
|  | 	assert (g4.match("FILE.CPP")); | ||||||
|  | 	assert (g4.match("filesystem.hpp")); | ||||||
|  | 	assert (g4.match("FILESYSTEM.HPP")); | ||||||
|  | 	assert (!g4.match("FILE.H")); | ||||||
|  | 	assert (!g4.match("file.h")); | ||||||
|  | 	 | ||||||
|  | 	Glob g5("File*.[ch]*", Glob::GLOB_CASELESS); | ||||||
|  | 	assert (g5.match("file.hpp")); | ||||||
|  | 	assert (g5.match("FILE.HPP")); | ||||||
|  | 	assert (g5.match("file.cpp")); | ||||||
|  | 	assert (g5.match("FILE.CPP")); | ||||||
|  | 	assert (g5.match("filesystem.hpp")); | ||||||
|  | 	assert (g5.match("FILESYSTEM.HPP")); | ||||||
|  | 	assert (g5.match("file.h")); | ||||||
|  | 	assert (g5.match("FILE.H")); | ||||||
|  | 	assert (g5.match("filesystem.cp")); | ||||||
|  | 	assert (g5.match("FILESYSTEM.CP")); | ||||||
|  |  | ||||||
|  | 	Glob g6("[abc]", Glob::GLOB_CASELESS); | ||||||
|  | 	assert (g6.match("a")); | ||||||
|  | 	assert (g6.match("b")); | ||||||
|  | 	assert (g6.match("c")); | ||||||
|  | 	assert (g6.match("A")); | ||||||
|  | 	assert (g6.match("B")); | ||||||
|  | 	assert (g6.match("C")); | ||||||
|  |  | ||||||
|  | 	Glob g7("[a-f]", Glob::GLOB_CASELESS); | ||||||
|  | 	assert (g7.match("a")); | ||||||
|  | 	assert (g7.match("b")); | ||||||
|  | 	assert (g7.match("f")); | ||||||
|  | 	assert (!g7.match("g")); | ||||||
|  | 	assert (g7.match("A")); | ||||||
|  | 	assert (g7.match("B")); | ||||||
|  | 	assert (g7.match("F")); | ||||||
|  | 	assert (!g7.match("G")); | ||||||
|  |  | ||||||
|  | 	Glob g8("[A-F]", Glob::GLOB_CASELESS); | ||||||
|  | 	assert (g8.match("a")); | ||||||
|  | 	assert (g8.match("b")); | ||||||
|  | 	assert (g8.match("f")); | ||||||
|  | 	assert (!g8.match("g")); | ||||||
|  | 	assert (g8.match("A")); | ||||||
|  | 	assert (g8.match("B")); | ||||||
|  | 	assert (g8.match("F")); | ||||||
|  | 	assert (!g8.match("G")); | ||||||
|  | } | ||||||
|  |  | ||||||
|  |  | ||||||
| void GlobTest::testGlob() | void GlobTest::testGlob() | ||||||
| { | { | ||||||
| 	createFile("globtest/Makefile"); | 	createFile("globtest/Makefile"); | ||||||
| @@ -353,6 +419,16 @@ void GlobTest::testGlob() | |||||||
| 	assert (files.find("globtest/include/") != files.end()); | 	assert (files.find("globtest/include/") != files.end()); | ||||||
| 	assert (files.find("globtest/src/") != files.end()); | 	assert (files.find("globtest/src/") != files.end()); | ||||||
| 	assert (files.find("globtest/testsuite/") != files.end()); | 	assert (files.find("globtest/testsuite/") != files.end()); | ||||||
|  |  | ||||||
|  | 	files.clear(); | ||||||
|  | 	Glob::glob("GlobTest/*", files, Glob::GLOB_CASELESS); | ||||||
|  | 	translatePaths(files); | ||||||
|  | 	assert (files.size() == 5); | ||||||
|  | 	assert (files.find("globtest/Makefile") != files.end()); | ||||||
|  | 	assert (files.find("globtest/.hidden") != files.end()); | ||||||
|  | 	assert (files.find("globtest/include/") != files.end()); | ||||||
|  | 	assert (files.find("globtest/src/") != files.end()); | ||||||
|  | 	assert (files.find("globtest/testsuite/") != files.end()); | ||||||
| 	 | 	 | ||||||
| 	files.clear(); | 	files.clear(); | ||||||
| 	Glob::glob("globtest/*/*.[hc]", files); | 	Glob::glob("globtest/*/*.[hc]", files); | ||||||
| @@ -370,6 +446,13 @@ void GlobTest::testGlob() | |||||||
| 	assert (files.size() == 2); | 	assert (files.size() == 2); | ||||||
| 	assert (files.find("globtest/testsuite/src/test.c") != files.end()); | 	assert (files.find("globtest/testsuite/src/test.c") != files.end()); | ||||||
| 	assert (files.find("globtest/testsuite/src/main.c") != files.end()); | 	assert (files.find("globtest/testsuite/src/main.c") != files.end()); | ||||||
|  |  | ||||||
|  | 	files.clear(); | ||||||
|  | 	Glob::glob("Gl?bT?st/*/*/*.C", files, Glob::GLOB_CASELESS); | ||||||
|  | 	translatePaths(files); | ||||||
|  | 	assert (files.size() == 2); | ||||||
|  | 	assert (files.find("globtest/testsuite/src/test.c") != files.end()); | ||||||
|  | 	assert (files.find("globtest/testsuite/src/main.c") != files.end()); | ||||||
| 	 | 	 | ||||||
| 	files.clear(); | 	files.clear(); | ||||||
| 	Glob::glob("globtest/*/src/*", files); | 	Glob::glob("globtest/*/src/*", files); | ||||||
| @@ -439,6 +522,7 @@ CppUnit::Test* GlobTest::suite() | |||||||
| 	CppUnit_addTest(pSuite, GlobTest, testMatchAsterisk); | 	CppUnit_addTest(pSuite, GlobTest, testMatchAsterisk); | ||||||
| 	CppUnit_addTest(pSuite, GlobTest, testMatchRange); | 	CppUnit_addTest(pSuite, GlobTest, testMatchRange); | ||||||
| 	CppUnit_addTest(pSuite, GlobTest, testMisc); | 	CppUnit_addTest(pSuite, GlobTest, testMisc); | ||||||
|  | 	CppUnit_addTest(pSuite, GlobTest, testCaseless); | ||||||
| 	CppUnit_addTest(pSuite, GlobTest, testGlob); | 	CppUnit_addTest(pSuite, GlobTest, testGlob); | ||||||
|  |  | ||||||
| 	return pSuite; | 	return pSuite; | ||||||
|   | |||||||
| @@ -1,7 +1,7 @@ | |||||||
| // | // | ||||||
| // GlobTest.h | // GlobTest.h | ||||||
| // | // | ||||||
| // $Id: //poco/svn/Foundation/testsuite/src/GlobTest.h#2 $ | // $Id: //poco/1.3/Foundation/testsuite/src/GlobTest.h#2 $ | ||||||
| // | // | ||||||
| // Definition of the GlobTest class. | // Definition of the GlobTest class. | ||||||
| // | // | ||||||
| @@ -53,6 +53,7 @@ public: | |||||||
| 	void testMatchRange(); | 	void testMatchRange(); | ||||||
| 	void testMisc(); | 	void testMisc(); | ||||||
| 	void testGlob(); | 	void testGlob(); | ||||||
|  | 	void testCaseless(); | ||||||
|  |  | ||||||
| 	void setUp(); | 	void setUp(); | ||||||
| 	void tearDown(); | 	void tearDown(); | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Guenter Obiltschnig
					Guenter Obiltschnig