[DEBUG] new regexp maximizing of the search

This commit is contained in:
Edouard DUPIN 2014-08-27 21:25:46 +02:00
parent bee6e06720
commit ce3243a5f4
2 changed files with 61 additions and 10 deletions

View File

@ -20,7 +20,8 @@
//#define TK_REG_DEBUG TK_VERBOSE //#define TK_REG_DEBUG TK_VERBOSE
//#define TK_REG_DEBUG TK_DEBUG //#define TK_REG_DEBUG TK_DEBUG
#define TK_REG_DEBUG_3 TK_VERBOSE #define TK_REG_DEBUG_3 TK_HIDDEN
//#define TK_REG_DEBUG_3 TK_VERBOSE
//#define TK_REG_DEBUG_3 TK_DEBUG //#define TK_REG_DEBUG_3 TK_DEBUG
#define TK_REG_DEBUG_2 TK_HIDDEN #define TK_REG_DEBUG_2 TK_HIDDEN
@ -1044,7 +1045,11 @@ template<class CLASS_TYPE> class NodePTheseElem : public Node<CLASS_TYPE> {
break; break;
} }
} }
_property.setPositionStop( _property.m_subProperty.back().getPositionStop() ); if (_property.m_subProperty.size()>0) {
_property.setPositionStop(_property.m_subProperty.back().getPositionStop() );
} else {
TK_WARNING("RegExp ERROR");
}
TK_REG_DEBUG(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (elem) return=" << _property); TK_REG_DEBUG(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (elem) return=" << _property);
} }
@ -1186,6 +1191,9 @@ template<class CLASS_TYPE> class NodePThese : public Node<CLASS_TYPE> {
tmpFind = false; tmpFind = false;
if (tmpCurrentPos+offset>=_lenMax) { if (tmpCurrentPos+offset>=_lenMax) {
TK_REG_DEBUG(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (... ---/" << m_subNode.size() << ") ==> out of range : " << tmpCurrentPos << "+" << offset << " >= " << _lenMax); TK_REG_DEBUG(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (... ---/" << m_subNode.size() << ") ==> out of range : " << tmpCurrentPos << "+" << offset << " >= " << _lenMax);
prop.setStatus(parseStatusFull);
_property.m_subProperty.push_back(prop);
break;
} }
for (size_t iii=iiiStartPos; iii<m_subNode.size() && tmpCurrentPos+offset<_lenMax; ++iii) { for (size_t iii=iiiStartPos; iii<m_subNode.size() && tmpCurrentPos+offset<_lenMax; ++iii) {
TK_REG_DEBUG(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (... " << iii << "/" << m_subNode.size() << ")"); TK_REG_DEBUG(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (... " << iii << "/" << m_subNode.size() << ")");
@ -1315,6 +1323,7 @@ template<class CLASS_TYPE> class RegExp {
bool m_isOk; //!< Known if we can process with this regExp bool m_isOk; //!< Known if we can process with this regExp
bool m_notBeginWithChar; //!< The regular expression must not have previously a char [a-zA-Z0-9_] bool m_notBeginWithChar; //!< The regular expression must not have previously a char [a-zA-Z0-9_]
bool m_notEndWithChar; //!< The regular expression must not have after the end a char [a-zA-Z0-9_] bool m_notEndWithChar; //!< The regular expression must not have after the end a char [a-zA-Z0-9_]
bool m_maximize; //!< by default the regexp find the minimum size of a regexp .
public: public:
// create the regular expression // create the regular expression
@ -1326,7 +1335,8 @@ template<class CLASS_TYPE> class RegExp {
m_expressionRequested(U""), m_expressionRequested(U""),
m_isOk(false), m_isOk(false),
m_notBeginWithChar(false), m_notBeginWithChar(false),
m_notEndWithChar(false) { m_notEndWithChar(false),
m_maximize(false) {
m_areaFind.start=0; m_areaFind.start=0;
m_areaFind.stop=0; m_areaFind.stop=0;
if (_exp.size() != 0) { if (_exp.size() != 0) {
@ -1340,7 +1350,8 @@ template<class CLASS_TYPE> class RegExp {
m_expressionRequested(U""), m_expressionRequested(U""),
m_isOk(false), m_isOk(false),
m_notBeginWithChar(false), m_notBeginWithChar(false),
m_notEndWithChar(false) { m_notEndWithChar(false),
m_maximize(false) {
m_areaFind.start=0; m_areaFind.start=0;
m_areaFind.stop=0; m_areaFind.stop=0;
if (_exp.size() != 0) { if (_exp.size() != 0) {
@ -1354,7 +1365,13 @@ template<class CLASS_TYPE> class RegExp {
~RegExp() { ~RegExp() {
m_isOk = false; m_isOk = false;
}; };
/**
* @brief SetMaximizing of the regexp
* @param[in] _value Maximize or not the regExp
*/
void setMaximize(bool _value) {
m_maximize = _value;
}
/** /**
* @brief Set a new regular expression matching * @brief Set a new regular expression matching
* @param[in] _exp the new expression to search * @param[in] _exp the new expression to search
@ -1568,16 +1585,25 @@ template<class CLASS_TYPE> class RegExp {
regexp::FindProperty prop; regexp::FindProperty prop;
prop.setPositionStart(iii); prop.setPositionStart(iii);
bool needOneMoreCycle = true; bool needOneMoreCycle = true;
bool oneCycleDone = false;
while (needOneMoreCycle == true) { while (needOneMoreCycle == true) {
needOneMoreCycle = false; needOneMoreCycle = false;
m_exprRootNode.parse(_SearchIn, iii, _endPos, prop); m_exprRootNode.parse(_SearchIn, iii, _endPos, prop);
TK_REG_DEBUG("res=" << prop.getStatus());
if ( prop.getStatus() == regexp::parseStatusNone
&& m_maximize == true
&& oneCycleDone == false) {
// TODO : do it better Patch the case of ".*" seach with maximizing
oneCycleDone = true;
needOneMoreCycle = true;
}
if ( prop.getStatus() == regexp::parseStatusFull if ( prop.getStatus() == regexp::parseStatusFull
|| prop.getStatus() == regexp::parseStatusPartial ) { || prop.getStatus() == regexp::parseStatusPartial ) {
findLen = prop.getFindLen(); findLen = prop.getFindLen();
TK_REG_DEBUG_3("main search find : " << findLen << " elements data=" << std::string(_SearchIn, prop.getPositionStart(), prop.getFindLen())); TK_REG_DEBUG_3("main search find : " << findLen << " elements data=" << std::string(_SearchIn, prop.getPositionStart(), prop.getFindLen()));
// Check end : // Check end :
if (m_notEndWithChar == true) { if (m_notEndWithChar == true) {
TK_DEBUG("Check end is not a char: '" << (char)_SearchIn[iii+findLen] << "'"); TK_REG_DEBUG("Check end is not a char: '" << (char)_SearchIn[iii+findLen] << "'");
if (_startPos+findLen < (int64_t)_SearchIn.size() ) { if (_startPos+findLen < (int64_t)_SearchIn.size() ) {
char32_t tmpVal = _SearchIn[iii+findLen]; char32_t tmpVal = _SearchIn[iii+findLen];
if( ( tmpVal >= 'a' if( ( tmpVal >= 'a'
@ -1588,11 +1614,15 @@ template<class CLASS_TYPE> class RegExp {
&& tmpVal <= '9' ) && tmpVal <= '9' )
|| ( tmpVal == '_' ) ) { || ( tmpVal == '_' ) ) {
// go on the next char ... // go on the next char ...
TK_DEBUG("Need one more cycle ..."); TK_REG_DEBUG("Need one more cycle ...");
needOneMoreCycle = true; needOneMoreCycle = true;
} }
} }
} }
if ( m_maximize == true
&& prop.getStatus() == regexp::parseStatusPartial) {
needOneMoreCycle = true;
}
if (needOneMoreCycle == false) { if (needOneMoreCycle == false) {
m_areaFind.start = iii; m_areaFind.start = iii;
m_areaFind.stop = iii + findLen; m_areaFind.stop = iii + findLen;
@ -1665,6 +1695,10 @@ template<class CLASS_TYPE> class RegExp {
} }
} }
} }
if ( m_maximize == true
&& prop.getStatus() == regexp::parseStatusPartial) {
needOneMoreCycle = true;
}
if (needOneMoreCycle == false) { if (needOneMoreCycle == false) {
m_areaFind.start = _startPos; m_areaFind.start = _startPos;
m_areaFind.stop = _startPos + findLen; m_areaFind.stop = _startPos + findLen;

View File

@ -136,7 +136,7 @@ void testDimension() {
void testColor() { void testColor() {
TK_INFO("==> test of COLOR (START)"); TK_INFO("==> test of COLOR (START)");
/*
etk::Color<uint8_t, 4> colorRGBA8(0x52,0x0F, 0x65, 0x44); etk::Color<uint8_t, 4> colorRGBA8(0x52,0x0F, 0x65, 0x44);
etk::Color<uint16_t, 4> colorRGBA16(0x52,0x0F, 0x65, 0x44); etk::Color<uint16_t, 4> colorRGBA16(0x52,0x0F, 0x65, 0x44);
etk::Color<uint32_t, 4> colorRGBA32(0x52,0x0F, 0x65, 0x44); etk::Color<uint32_t, 4> colorRGBA32(0x52,0x0F, 0x65, 0x44);
@ -150,6 +150,7 @@ void testColor() {
etk::Color<uint32_t, 1> colorMono32(0x52); etk::Color<uint32_t, 1> colorMono32(0x52);
etk::Color<float, 1> colorMonoF(5200.22); etk::Color<float, 1> colorMonoF(5200.22);
etk::Color<double, 1> colorMonoD(520000.22); etk::Color<double, 1> colorMonoD(520000.22);
*/
/* /*
etk::Color<uint8_t, 4> colorRGBA8__("#520F6544"); etk::Color<uint8_t, 4> colorRGBA8__("#520F6544");
etk::Color<uint16_t, 4> colorRGBA16__("rgba(0x52, 0x0F, 0x65, 0x44)"); etk::Color<uint16_t, 4> colorRGBA16__("rgba(0x52, 0x0F, 0x65, 0x44)");
@ -165,6 +166,7 @@ void testColor() {
etk::Color<float, 1> colorMonoF__("mono(5200.22)"); etk::Color<float, 1> colorMonoF__("mono(5200.22)");
etk::Color<double, 1> colorMonoD__("mono(520000.22)"); etk::Color<double, 1> colorMonoD__("mono(520000.22)");
*/ */
/*
etk::Color<float, 4> colorRGBAf__(colorRGBA8); etk::Color<float, 4> colorRGBAf__(colorRGBA8);
etk::Color<uint32_t, 2> colorXX332__(colorRGBA8); etk::Color<uint32_t, 2> colorXX332__(colorRGBA8);
@ -183,7 +185,7 @@ void testColor() {
TK_INFO("Create a color : MONO 32 : " << colorMono32); TK_INFO("Create a color : MONO 32 : " << colorMono32);
TK_INFO("Create a color : MONO float : " << colorMonoF); TK_INFO("Create a color : MONO float : " << colorMonoF);
TK_INFO("Create a color : MONO double : " << colorMonoD); TK_INFO("Create a color : MONO double : " << colorMonoD);
*/
TK_INFO("==> test of Color (STOP)"); TK_INFO("==> test of Color (STOP)");
exit(0); exit(0);
} }
@ -256,7 +258,22 @@ void testRegExp() {
data = " 'dfgd\\'fg' \n" data = " 'dfgd\\'fg' \n"
" vec2 m_offset; \n"; " vec2 m_offset; \n";
testRegExpSingle("'((\\\\[\\\\'])|.)*'", data); //testRegExpSingle("'((\\\\[\\\\'])|.)*'", data);
data = "ddfgdfgh";
etk::RegExp<std::string> reg(".*");
reg.setMaximize(true);
TK_INFO("Parse RegEx : '" << reg.getRegExDecorated() << "'");
if (reg.parse(data, 0, data.size()) == true) {
//if (reg.processOneElement(data, 0, data.size()) == true) {
TK_INFO(" match [" << reg.start() << ".." << reg.stop() << "] ");
TK_INFO(" ==> '" << std::string(data, reg.start(), reg.stop()-reg.start()) << "'");
}
//TODO : good : "(\\+|[0-9])*" ==> really bad : "(+|[0-9])*"
} }