Glob now uses UTF-8 strings and support caseless comparison

This commit is contained in:
Guenter Obiltschnig 2009-03-25 13:10:40 +00:00
parent d991074a5c
commit 21ca665427
4 changed files with 124 additions and 19 deletions

View File

@ -1,7 +1,7 @@
//
// Glob.h
//
// $Id: //poco/svn/Foundation/include/Poco/Glob.h#2 $
// $Id: //poco/1.3/Foundation/include/Poco/Glob.h#3 $
//
// Library: Foundation
// Package: Filesystem
@ -9,7 +9,7 @@
//
// Definition of the Glob class.
//
// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH.
// Copyright (c) 2004-2009, Applied Informatics Software Engineering GmbH.
// and Contributors.
//
// Permission is hereby granted, free of charge, to any person or organization
@ -41,6 +41,7 @@
#include "Poco/Foundation.h"
#include "Poco/TextIterator.h"
#include <set>
@ -67,7 +68,7 @@ class Foundation_API Glob
/// To suppress the special syntactic significance of any of '[]*?!-\',
/// and match the character exactly, precede it with a backslash.
///
/// UTF-8 encoded strings are not supported.
/// All strings are assumed to be UTF-8 encoded.
{
public:
enum Options
@ -76,6 +77,7 @@ public:
GLOB_DEFAULT = 0x00, /// default behavior
GLOB_DOT_SPECIAL = 0x01, /// '*' and '?' do not match '.' at beginning of subject
GLOB_FOLLOW_SYMLINKS = 0x02, /// follow symbolic links
GLOB_CASELESS = 0x04, /// ignore case when comparing characters
GLOB_DIRS_ONLY = 0x80 /// only glob for directories (for internal use only)
};
@ -138,9 +140,9 @@ public:
/// ignored.
protected:
bool match(std::string::const_iterator& itp, const std::string::const_iterator& endp, std::string::const_iterator& its, const std::string::const_iterator& ends);
bool matchAfterAsterisk(std::string::const_iterator itp, const std::string::const_iterator& endp, std::string::const_iterator its, const std::string::const_iterator& ends);
bool matchSet(std::string::const_iterator& itp, const std::string::const_iterator& endp, char c);
bool match(TextIterator& itp, const TextIterator& endp, TextIterator& its, const TextIterator& ends);
bool matchAfterAsterisk(TextIterator itp, const TextIterator& endp, TextIterator its, const TextIterator& ends);
bool matchSet(TextIterator& itp, const TextIterator& endp, int c);
static void collect(const Path& pathPattern, const Path& base, const Path& current, const std::string& pattern, std::set<std::string>& files, int options);
static bool isDirectory(const Path& path, bool followSymlink);

View File

@ -1,7 +1,7 @@
//
// Glob.cpp
//
// $Id: //poco/svn/Foundation/src/Glob.cpp#2 $
// $Id: //poco/1.3/Foundation/src/Glob.cpp#3 $
//
// Library: Foundation
// Package: Filesystem
@ -39,6 +39,8 @@
#include "Poco/Exception.h"
#include "Poco/DirectoryIterator.h"
#include "Poco/File.h"
#include "Poco/UTF8Encoding.h"
#include "Poco/Unicode.h"
namespace Poco {
@ -59,10 +61,11 @@ Glob::~Glob()
bool Glob::match(const std::string& subject)
{
std::string::const_iterator itp = _pattern.begin();
std::string::const_iterator endp = _pattern.end();
std::string::const_iterator its = subject.begin();
std::string::const_iterator ends = subject.end();
UTF8Encoding utf8;
TextIterator itp(_pattern, utf8);
TextIterator endp(_pattern);
TextIterator its(subject, utf8);
TextIterator ends(subject);
if ((_options & GLOB_DOT_SPECIAL) && its != ends && *its == '.' && (*itp == '?' || *itp == '*'))
return false;
@ -100,7 +103,7 @@ void Glob::glob(const Path& pathPattern, std::set<std::string>& files, int optio
}
bool Glob::match(std::string::const_iterator& itp, const std::string::const_iterator& endp, std::string::const_iterator& its, const std::string::const_iterator& ends)
bool Glob::match(TextIterator& itp, const TextIterator& endp, TextIterator& its, const TextIterator& ends)
{
while (itp != endp)
{
@ -138,7 +141,14 @@ bool Glob::match(std::string::const_iterator& itp, const std::string::const_iter
if (++itp == endp) throw SyntaxException("backslash must be followed by character in glob pattern");
// fallthrough
default:
if (*itp != *its) return false;
if (_options & GLOB_CASELESS)
{
if (Unicode::toLower(*itp) != Unicode::toLower(*its)) return false;
}
else
{
if (*itp != *its) return false;
}
++itp; ++its;
}
}
@ -146,14 +156,17 @@ bool Glob::match(std::string::const_iterator& itp, const std::string::const_iter
}
bool Glob::matchAfterAsterisk(std::string::const_iterator itp, const std::string::const_iterator& endp, std::string::const_iterator its, const std::string::const_iterator& ends)
bool Glob::matchAfterAsterisk(TextIterator itp, const TextIterator& endp, TextIterator its, const TextIterator& ends)
{
return match(itp, endp, its, ends);
}
bool Glob::matchSet(std::string::const_iterator& itp, const std::string::const_iterator& endp, char c)
bool Glob::matchSet(TextIterator& itp, const TextIterator& endp, int c)
{
if (_options & GLOB_CASELESS)
c = Unicode::toLower(c);
while (itp != endp)
{
switch (*itp)
@ -164,8 +177,8 @@ bool Glob::matchSet(std::string::const_iterator& itp, const std::string::const_i
case '\\':
if (++itp == endp) throw SyntaxException("backslash must be followed by character in glob pattern");
}
char first = *itp;
char last = first;
int first = *itp;
int last = first;
if (++itp != endp && *itp == '-')
{
if (++itp != endp)
@ -173,6 +186,11 @@ bool Glob::matchSet(std::string::const_iterator& itp, const std::string::const_i
else
throw SyntaxException("bad range syntax in glob pattern");
}
if (_options & GLOB_CASELESS)
{
first = Unicode::toLower(first);
last = Unicode::toLower(last);
}
if (first <= c && c <= last)
{
while (itp != endp)

View File

@ -1,7 +1,7 @@
//
// GlobTest.cpp
//
// $Id: //poco/svn/Foundation/testsuite/src/GlobTest.cpp#2 $
// $Id: //poco/1.3/Foundation/testsuite/src/GlobTest.cpp#2 $
//
// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH.
// and Contributors.
@ -331,6 +331,72 @@ void GlobTest::testMisc()
}
void GlobTest::testCaseless()
{
Glob g1("*.cpp", Glob::GLOB_CASELESS);
assert (g1.match("Glob.cpp"));
assert (!g1.match("Glob.h"));
assert (g1.match("Glob.CPP"));
assert (!g1.match("Glob.H"));
Glob g2("*.[hc]", Glob::GLOB_CASELESS);
assert (g2.match("foo.c"));
assert (g2.match("foo.h"));
assert (!g2.match("foo.i"));
assert (g2.match("foo.C"));
assert (g2.match("foo.H"));
assert (!g2.match("foo.I"));
Glob g4("File*.?pp", Glob::GLOB_CASELESS);
assert (g4.match("file.hpp"));
assert (g4.match("FILE.CPP"));
assert (g4.match("filesystem.hpp"));
assert (g4.match("FILESYSTEM.HPP"));
assert (!g4.match("FILE.H"));
assert (!g4.match("file.h"));
Glob g5("File*.[ch]*", Glob::GLOB_CASELESS);
assert (g5.match("file.hpp"));
assert (g5.match("FILE.HPP"));
assert (g5.match("file.cpp"));
assert (g5.match("FILE.CPP"));
assert (g5.match("filesystem.hpp"));
assert (g5.match("FILESYSTEM.HPP"));
assert (g5.match("file.h"));
assert (g5.match("FILE.H"));
assert (g5.match("filesystem.cp"));
assert (g5.match("FILESYSTEM.CP"));
Glob g6("[abc]", Glob::GLOB_CASELESS);
assert (g6.match("a"));
assert (g6.match("b"));
assert (g6.match("c"));
assert (g6.match("A"));
assert (g6.match("B"));
assert (g6.match("C"));
Glob g7("[a-f]", Glob::GLOB_CASELESS);
assert (g7.match("a"));
assert (g7.match("b"));
assert (g7.match("f"));
assert (!g7.match("g"));
assert (g7.match("A"));
assert (g7.match("B"));
assert (g7.match("F"));
assert (!g7.match("G"));
Glob g8("[A-F]", Glob::GLOB_CASELESS);
assert (g8.match("a"));
assert (g8.match("b"));
assert (g8.match("f"));
assert (!g8.match("g"));
assert (g8.match("A"));
assert (g8.match("B"));
assert (g8.match("F"));
assert (!g8.match("G"));
}
void GlobTest::testGlob()
{
createFile("globtest/Makefile");
@ -353,6 +419,16 @@ void GlobTest::testGlob()
assert (files.find("globtest/include/") != files.end());
assert (files.find("globtest/src/") != files.end());
assert (files.find("globtest/testsuite/") != files.end());
files.clear();
Glob::glob("GlobTest/*", files, Glob::GLOB_CASELESS);
translatePaths(files);
assert (files.size() == 5);
assert (files.find("globtest/Makefile") != files.end());
assert (files.find("globtest/.hidden") != files.end());
assert (files.find("globtest/include/") != files.end());
assert (files.find("globtest/src/") != files.end());
assert (files.find("globtest/testsuite/") != files.end());
files.clear();
Glob::glob("globtest/*/*.[hc]", files);
@ -370,6 +446,13 @@ void GlobTest::testGlob()
assert (files.size() == 2);
assert (files.find("globtest/testsuite/src/test.c") != files.end());
assert (files.find("globtest/testsuite/src/main.c") != files.end());
files.clear();
Glob::glob("Gl?bT?st/*/*/*.C", files, Glob::GLOB_CASELESS);
translatePaths(files);
assert (files.size() == 2);
assert (files.find("globtest/testsuite/src/test.c") != files.end());
assert (files.find("globtest/testsuite/src/main.c") != files.end());
files.clear();
Glob::glob("globtest/*/src/*", files);
@ -439,6 +522,7 @@ CppUnit::Test* GlobTest::suite()
CppUnit_addTest(pSuite, GlobTest, testMatchAsterisk);
CppUnit_addTest(pSuite, GlobTest, testMatchRange);
CppUnit_addTest(pSuite, GlobTest, testMisc);
CppUnit_addTest(pSuite, GlobTest, testCaseless);
CppUnit_addTest(pSuite, GlobTest, testGlob);
return pSuite;

View File

@ -1,7 +1,7 @@
//
// GlobTest.h
//
// $Id: //poco/svn/Foundation/testsuite/src/GlobTest.h#2 $
// $Id: //poco/1.3/Foundation/testsuite/src/GlobTest.h#2 $
//
// Definition of the GlobTest class.
//
@ -53,6 +53,7 @@ public:
void testMatchRange();
void testMisc();
void testGlob();
void testCaseless();
void setUp();
void tearDown();