[DEV] really better RegEx parser ==> nearly ended
This commit is contained in:
parent
27d1f599e6
commit
89784df428
80
etk/RegExp.h
80
etk/RegExp.h
@ -17,8 +17,8 @@
|
|||||||
#include <memory>
|
#include <memory>
|
||||||
|
|
||||||
#define TK_REG_EXP_DBG_MODE2 TK_HIDDEN
|
#define TK_REG_EXP_DBG_MODE2 TK_HIDDEN
|
||||||
#define TK_REG_EXP_DBG_MODE TK_HIDDEN
|
//#define TK_REG_EXP_DBG_MODE TK_HIDDEN
|
||||||
//#define TK_REG_EXP_DBG_MODE TK_VERBOSE
|
#define TK_REG_EXP_DBG_MODE TK_VERBOSE
|
||||||
//#define TK_REG_EXP_DBG_MODE TK_DEBUG
|
//#define TK_REG_EXP_DBG_MODE TK_DEBUG
|
||||||
|
|
||||||
//regular colors
|
//regular colors
|
||||||
@ -96,17 +96,18 @@ normal mode :
|
|||||||
[anjdi] or [a-gt-j] range
|
[anjdi] or [a-gt-j] range
|
||||||
. dot [^\x00-\x08\x0A-\x1F\x7F]
|
. dot [^\x00-\x08\x0A-\x1F\x7F]
|
||||||
$ End / Start of line of line ==> ce sera un truc suplémentaire comme le \@
|
$ End / Start of line of line ==> ce sera un truc suplémentaire comme le \@
|
||||||
|
@ Previous
|
||||||
==> TODO :
|
==> TODO :
|
||||||
^in the [] invertion of the range element
|
^in the [] invertion of the range element
|
||||||
Sart of line
|
Sart of line
|
||||||
force regexp to be the shortest.
|
force regexp to be the shortest.
|
||||||
|
|
||||||
multiplicity :
|
multiplicity :
|
||||||
* ==> {0, 2147483647}
|
* ==> {0, 2147483647} (try to have the minimum size)
|
||||||
? ==> {0, 1}
|
? ==> {0, 1}
|
||||||
+ ==> {1, 2147483647}
|
+ ==> {1, 2147483647} (try to have the minimum size)
|
||||||
{x} ==> {x, x}
|
{x} ==> {x, x} (try to have the minimum size)
|
||||||
{x,y} ==> {x, y}
|
{x,y} ==> {x, y} (try to have the minimum size)
|
||||||
*/
|
*/
|
||||||
/**
|
/**
|
||||||
* @brief convertion table of every element in a regular expression.
|
* @brief convertion table of every element in a regular expression.
|
||||||
@ -367,6 +368,13 @@ template<class CLASS_TYPE> class NodeValue : public Node<CLASS_TYPE> {
|
|||||||
_property.setStatus(parseStatusNone);
|
_property.setStatus(parseStatusNone);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if ( _property.getPositionStop() < 0
|
||||||
|
&& Node<CLASS_TYPE>::m_multipleMin == 0
|
||||||
|
&& _property.getMultiplicity() == 0) {
|
||||||
|
_property.setPositionStop(_property.getPositionStart());
|
||||||
|
_property.setStatus(parseStatusPartial);
|
||||||
|
return;
|
||||||
|
}
|
||||||
bool tmpFind = true;
|
bool tmpFind = true;
|
||||||
int32_t findLen = 0;
|
int32_t findLen = 0;
|
||||||
while( _property.getMultiplicity() < Node<CLASS_TYPE>::m_multipleMax
|
while( _property.getMultiplicity() < Node<CLASS_TYPE>::m_multipleMax
|
||||||
@ -520,7 +528,12 @@ template<class CLASS_TYPE> class NodeRangeValue : public Node<CLASS_TYPE> {
|
|||||||
}
|
}
|
||||||
}else {
|
}else {
|
||||||
if (_property.getPositionStop() != -1) {
|
if (_property.getPositionStop() != -1) {
|
||||||
|
if (_property.getMultiplicity() == 0) {
|
||||||
|
// simple optimisation ==> permit to remove parsing 1 cycle
|
||||||
|
_property.setStatus(parseStatusNone);
|
||||||
|
} else {
|
||||||
_property.setStatus(parseStatusFull);
|
_property.setStatus(parseStatusFull);
|
||||||
|
}
|
||||||
} else if (_property.getMultiplicity() == Node<CLASS_TYPE>::m_multipleMin) {
|
} else if (_property.getMultiplicity() == Node<CLASS_TYPE>::m_multipleMin) {
|
||||||
_property.setPositionStop(_property.getPositionStart());
|
_property.setPositionStop(_property.getPositionStart());
|
||||||
_property.setStatus(parseStatusFull);
|
_property.setStatus(parseStatusFull);
|
||||||
@ -1024,7 +1037,7 @@ template<class CLASS_TYPE> class NodePTheseElem : public Node<CLASS_TYPE> {
|
|||||||
findPartialNode = true;
|
findPartialNode = true;
|
||||||
prop = _property.m_subProperty[jjj];
|
prop = _property.m_subProperty[jjj];
|
||||||
tmpCurrentPos = prop.getPositionStop();
|
tmpCurrentPos = prop.getPositionStop();
|
||||||
_property.m_subProperty.erase(_property.m_subProperty.begin()+iii-1, _property.m_subProperty.end());
|
_property.m_subProperty.erase(_property.m_subProperty.begin()+jjj, _property.m_subProperty.end());
|
||||||
iii = jjj;
|
iii = jjj;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -1040,7 +1053,7 @@ template<class CLASS_TYPE> class NodePTheseElem : public Node<CLASS_TYPE> {
|
|||||||
TK_REG_EXP_DBG_MODE2(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (elem=" << iii << "/" << m_subNode.size() << ") data='" << autoStr(std::string(_data, tmpCurrentPos, _lenMax-tmpCurrentPos)) << "'");
|
TK_REG_EXP_DBG_MODE2(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (elem=" << iii << "/" << m_subNode.size() << ") data='" << autoStr(std::string(_data, tmpCurrentPos, _lenMax-tmpCurrentPos)) << "'");
|
||||||
m_subNode[iii]->parse(_data, tmpCurrentPos, _lenMax, prop);
|
m_subNode[iii]->parse(_data, tmpCurrentPos, _lenMax, prop);
|
||||||
if (prop.getStatus() == parseStatusNone) {
|
if (prop.getStatus() == parseStatusNone) {
|
||||||
TK_REG_EXP_DBG_MODE(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (elem=" << iii << "/" << m_subNode.size() << ") ===None===");
|
TK_REG_EXP_DBG_MODE(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (elem=" << iii << "/" << m_subNode.size() << ") ===None=== : " << prop);
|
||||||
// rewind the list:
|
// rewind the list:
|
||||||
bool findPartialNode = false;
|
bool findPartialNode = false;
|
||||||
for (int64_t jjj=_property.m_subProperty.size()-1; jjj>=0; --jjj) {
|
for (int64_t jjj=_property.m_subProperty.size()-1; jjj>=0; --jjj) {
|
||||||
@ -1048,7 +1061,7 @@ template<class CLASS_TYPE> class NodePTheseElem : public Node<CLASS_TYPE> {
|
|||||||
findPartialNode = true;
|
findPartialNode = true;
|
||||||
prop = _property.m_subProperty[jjj];
|
prop = _property.m_subProperty[jjj];
|
||||||
tmpCurrentPos = prop.getPositionStop();
|
tmpCurrentPos = prop.getPositionStop();
|
||||||
_property.m_subProperty.erase(_property.m_subProperty.begin()+iii-1, _property.m_subProperty.end());
|
_property.m_subProperty.erase(_property.m_subProperty.begin()+jjj, _property.m_subProperty.end());
|
||||||
iii = jjj;
|
iii = jjj;
|
||||||
TK_REG_EXP_DBG_MODE(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (elem=?/" << m_subNode.size() << ") == rewind at " << iii << "");
|
TK_REG_EXP_DBG_MODE(" " << levelSpace(Node<CLASS_TYPE>::m_nodeLevel) << " (elem=?/" << m_subNode.size() << ") == rewind at " << iii << "");
|
||||||
break;
|
break;
|
||||||
@ -1175,10 +1188,42 @@ template<class CLASS_TYPE> class NodePThese : public Node<CLASS_TYPE> {
|
|||||||
_property.setStatus(parseStatusNone);
|
_property.setStatus(parseStatusNone);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (_property.getMultiplicity() >= Node<CLASS_TYPE>::m_multipleMax) {
|
bool haveSubPartial = false;
|
||||||
|
for (int64_t iii=_property.m_subProperty.size()-1; iii>=0; --iii) {
|
||||||
|
if (_property.m_subProperty[iii].getStatus() == parseStatusPartial) {
|
||||||
|
haveSubPartial = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ( haveSubPartial == false
|
||||||
|
&& _property.getMultiplicity() >= Node<CLASS_TYPE>::m_multipleMax) {
|
||||||
_property.setStatus(parseStatusFull);
|
_property.setStatus(parseStatusFull);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if (haveSubPartial == true) {
|
||||||
|
TK_CRITICAL(" TODO ...");
|
||||||
|
// TODO : Really hard element ==> the current node might register the previous tree before rejecting parse ...
|
||||||
|
/*
|
||||||
|
for (int64_t jjj=_property.m_subProperty.size()-1; jjj>=0; --jjj) {
|
||||||
|
if (_property.m_subProperty[jjj].getStatus() == parseStatusPartial) {
|
||||||
|
findPartialNode = true;
|
||||||
|
prop = _property.m_subProperty[jjj];
|
||||||
|
tmpCurrentPos = prop.getPositionStop();
|
||||||
|
_property.m_subProperty.erase(_property.m_subProperty.begin()+jjj, _property.m_subProperty.end());
|
||||||
|
iii = jjj;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
} else {
|
||||||
|
if ( _property.getPositionStop() < 0
|
||||||
|
&& Node<CLASS_TYPE>::m_multipleMin == 0
|
||||||
|
&& _property.getMultiplicity() == 0) {
|
||||||
|
_property.setPositionStop(_property.getPositionStart());
|
||||||
|
_property.setStatus(parseStatusPartial);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
_property.setStatus(parseStatusFull);
|
_property.setStatus(parseStatusFull);
|
||||||
bool tmpFind = true;
|
bool tmpFind = true;
|
||||||
while ( _property.getMultiplicity() <= Node<CLASS_TYPE>::m_multipleMax
|
while ( _property.getMultiplicity() <= Node<CLASS_TYPE>::m_multipleMax
|
||||||
@ -1628,17 +1673,13 @@ template<class CLASS_TYPE> class RegExp {
|
|||||||
}
|
}
|
||||||
regexp::FindProperty prop;
|
regexp::FindProperty prop;
|
||||||
prop.setPositionStart(_startPos);
|
prop.setPositionStart(_startPos);
|
||||||
|
bool needOneMoreCycle = true;
|
||||||
|
while (needOneMoreCycle == true) {
|
||||||
|
needOneMoreCycle = false;
|
||||||
m_exprRootNode.parse(_SearchIn, _startPos, maxlen, prop);
|
m_exprRootNode.parse(_SearchIn, _startPos, maxlen, prop);
|
||||||
if ( prop.getStatus() == regexp::parseStatusFull
|
if ( prop.getStatus() == regexp::parseStatusFull
|
||||||
|| prop.getStatus() == regexp::parseStatusPartial ) {
|
|| prop.getStatus() == regexp::parseStatusPartial ) {
|
||||||
findLen = prop.getFindLen();
|
findLen = prop.getFindLen();
|
||||||
if ( _escapeChar != 0
|
|
||||||
&& _startPos>0) {
|
|
||||||
if (_escapeChar == (char32_t)_SearchIn[_startPos-1]) {
|
|
||||||
//==> detected escape char ==> try find again ...
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Check end :
|
// Check end :
|
||||||
if (m_notEndWithChar == true) {
|
if (m_notEndWithChar == true) {
|
||||||
if (_startPos+findLen < (int64_t)_SearchIn.size() ) {
|
if (_startPos+findLen < (int64_t)_SearchIn.size() ) {
|
||||||
@ -1651,14 +1692,17 @@ template<class CLASS_TYPE> class RegExp {
|
|||||||
&& tmpVal <= '9' )
|
&& tmpVal <= '9' )
|
||||||
|| ( tmpVal == '_' ) ) {
|
|| ( tmpVal == '_' ) ) {
|
||||||
// go on the next char ...
|
// go on the next char ...
|
||||||
return false;
|
needOneMoreCycle = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (needOneMoreCycle == false) {
|
||||||
m_areaFind.start = _startPos;
|
m_areaFind.start = _startPos;
|
||||||
m_areaFind.stop = _startPos + findLen;
|
m_areaFind.stop = _startPos + findLen;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
return false;
|
return false;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user