Browse Source

Code for compare strings with templates.

template_comparator
Michael Uleysky 8 years ago
parent
commit
de1eb76336
  1. 130
      modules/gmt/cmpstr.c++
  2. 258
      modules/gmt/cmpstr_alt.c++

130
modules/gmt/cmpstr.c++

@ -0,0 +1,130 @@
#include <iostream>
#include <set>
#include <string>
#include <vector>
bool CmpStrEx(const std::string& expr, const std::string& str)
{
if(expr.empty() || str.empty()) return false;
struct State
{
struct Block
{
const size_t b,len;
const bool optional;
Block(const size_t bb,const size_t ee,const bool o):b(bb),len(ee-bb),optional(o) {}
};
struct Cursor
{
size_t block,offset;
bool operator <(const struct Cursor& c) const
{
if(block<c.block) return true;
if(block>c.block) return false;
if(offset<c.offset) return true;
return false;
}
};
typedef std::set<struct Cursor> Cursors;
typedef std::set<struct Cursor>::iterator pCursor;
std::vector<struct Block> blockchain;
Cursors cursors;
const std::string& s;
Cursors InitCursors(size_t block) const
{
Cursors cs;
for(size_t i=block; i<blockchain.size(); ++i)
{
cs.insert({i,0});
if(!blockchain[i].optional) break;
}
return cs;
}
State(const std::string& str):s(str)
{
size_t cur=0;
size_t bpos=0;
// Parse blocks
while(cur<s.length())
{
if('['==s[cur] || ']'==s[cur])
{
// Add current block to blockchain
if(cur>bpos) blockchain.push_back(Block(bpos,cur,(']'==s[cur])));
cur++;
bpos=cur;
continue;
}
cur++;
}
// Add last block
if(bpos<s.length()) blockchain.push_back(Block(bpos,s.length(),false));
// Creating cursors for the first symbol
cursors=InitCursors(0);
}
bool CmpSmb(const char c)
{
pCursor p=cursors.begin();
bool res=false;
// Compare symbol with all cursors
while(p!=cursors.end())
{
const Block& bl=blockchain[p->block];
if(c==s[bl.b+p->offset]) {res=true; ++p;}
else p=cursors.erase(p);
}
// Increment cursors
Cursors upd; // New cursors
p=cursors.begin();
while(p!=cursors.end())
{
// Increment cursor on one position
if(p->offset+1>=blockchain[p->block].len) // Go to next block
{
Cursors cs=InitCursors(p->block+1); // Get cursors for next block
p=cursors.erase(p); // Erase current cursor
for(const auto& cur: cs) upd.insert(cur); // Copy cursors to new set
continue;
}
else upd.insert({p->block,p->offset+1});
if(blockchain[p->block].optional) // If current block is optional next symbol may be from next block
{
Cursors cs=InitCursors(p->block+1); // Get cursors for next blockchain
for(const auto& cur: cs) upd.insert(cur); // Copy cursors to new set
}
++p;
}
cursors=upd;
return res;
}
};
struct State st(expr);
for(size_t pos=0; pos<str.length(); ++pos)
{
if(!st.CmpSmb(str[pos])) return false;
}
return true;
}
int main(int argc, char** argv)
{
if(argc!=3) return 1;
std::cout<<"Compare "<<argv[1]<<" with template "<<argv[2]<<": "<<(CmpStrEx(argv[2],argv[1])?"match":"not match")<<std::endl;
return 0;
}

258
modules/gmt/cmpstr_alt.c++

@ -0,0 +1,258 @@
#include <iostream>
#include <memory>
#include <set>
#include <string>
#include <vector>
bool CmpStrEx(const std::string& expr, const std::string& str)
{
if(expr.empty() || str.empty()) return false;
struct State
{
struct Block
{
using pBlock=std::unique_ptr<struct Block>;
enum Type {NOTDEF,TEXT,OPTIONAL,VARIANTS,DELIM};
size_t b,e;
Type type;
const struct Block* parent;
pBlock next,child;
Block() = delete;
Block(const struct Block&) = delete;
Block(struct Block&&) = delete;
bool isText() const {return type==TEXT;}
Block(Type t, const struct Block* p=nullptr):b(0),e(0),type(t),parent(p),next(nullptr),child(nullptr) {}
Block(size_t bb, size_t ee, const struct Block* p=nullptr):b(bb),e(ee),type(TEXT),parent(p),next(nullptr),child(nullptr) {}
};
Block::pBlock Parse(const std::string& str)
{
size_t e=0;
struct Block* blk;
struct Block* root;
auto c=str[e];
bool init,next;
// First symbol
if('['==c || '('==c) blk=new Block(('['==c)?Block::OPTIONAL:Block::VARIANTS);
else blk=new Block(e,e+1);
root=blk;
init=!root->isText();
next=!init;
e++;
while(e<str.length())
{
c=str[e];
switch(c)
{
case '[':
case '(':
{
(next?blk->next:blk->child).reset(new Block(('['==c)?Block::OPTIONAL:Block::VARIANTS,(next?blk->parent:blk)));
blk=(next?blk->next:blk->child).get();
init=true;
next=false;
break;
}
case ']':
case ')':
{
blk=const_cast<struct Block*>(blk->parent);
init=true;
next=true;
break;
}
case '|': {blk->next.reset(new Block(Block::DELIM,blk->parent)); blk=blk->next.get(); init=next=true; break;}
default:
{
if(init)
{
(next?blk->next:blk->child).reset(new Block(e,e+1,(next?blk->parent:blk)));
blk=(next?blk->next:blk->child).get();
}
else blk->e=e+1;
init=false;
next=true;
}
}
e++;
}
return Block::pBlock(root);
}
struct Cursor
{
const struct Block* block;
size_t offset;
bool operator <(const struct Cursor& c) const {return block<c.block;}
bool compare(const std::string& str, const char c) const {return c==str[block->b+offset];}
Cursor(const struct Block* b, size_t o):block(b),offset(o) {}
};
using Cursors=std::set<struct Cursor>;
using pCursor=std::set<struct Cursor>::const_iterator;
Block::pBlock root;
Cursors cursors;
const std::string& s;
void InitCursors(const struct Block* blk)
{
switch(blk->type)
{
case Block::TEXT: {cursors.insert({blk,0}); break;}
case Block::OPTIONAL:
{
auto b=blk;
InitCursors(b->child.get());
while(true)
{
if(!b->next) break;
b=b->next.get();
InitCursors(b);
if(b->type!=Block::OPTIONAL) break;
}
break;
}
case Block::VARIANTS:
{
auto b=blk->child.get();
while(true)
{
InitCursors(b);
while(b->type!=Block::DELIM)
{
if(!b->next) break;
b=b->next.get();
}
if(!b->next) break;
b=b->next.get();
}
}
default: {}
}
}
State(const std::string& str):root(Parse(str)),s(str) {InitCursors(root.get());}
bool CmpSmb(const char c)
{
pCursor p=cursors.cbegin();
bool res=false;
// Compare symbol with all cursors
std::cout<<"--------------\n";
std::cout<<"Symbol: "<<c<<"\n";
while(p!=cursors.end())
{
std::cout<<p->block->b+p->offset<<" "<<s[p->block->b+p->offset]<<"\n";
if(p->compare(s,c)) {res=true; p++;}
else p=cursors.erase(p);
}
Cursors old(std::move(cursors));
for(const auto& p:old)
{
auto blk=p.block;
// Increment cursors
if(blk->b+p.offset+1<blk->e) cursors.insert({blk,p.offset+1}); // Advance in current block
else // Move to next block
{
if(!blk->next || blk->next->type==Block::DELIM) // End of chain, must go up
{
while(blk->parent!=nullptr)
{
blk=blk->parent;
if(!blk->next || blk->next->type==Block::DELIM) continue;
InitCursors(blk->next.get());
}
}
else InitCursors(blk->next.get()); // Next block in chain
}
// Try to leave optional blocks
blk=p.block;
while(blk->parent!=nullptr)
{
blk=blk->parent;
if(blk->type!=Block::OPTIONAL || !blk->next || blk->next->type==Block::DELIM) continue;
InitCursors(blk->next.get());
}
}
return res;
}
};
// Main function
struct State st(expr);
// Dump code
{
std::cerr<<"digraph main\n{\nrankdir=TB pack=\"true\" packmode=\"node\";\n";
std::cerr<<"node [fontsize=30, shape=rectangle, style=\"filled,rounded\", fillcolor=azure2, fixedsize=\"false\", margin=0.2, width=1, penwidth=3, fontname=\"Times New Roman\"];\n";
std::cerr<<"edge [arrowsize=1.5, penwidth=3];\n";
const struct State::Block* r=st.root.get();
std::set<const struct State::Block*> visited;
visited.insert(nullptr);
while(true)
{
if(visited.count(r)==0)
{
std::string label;
if(r->type==State::Block::OPTIONAL) label="[]";
if(r->type==State::Block::VARIANTS) label="()";
if(r->type==State::Block::DELIM) label="|";
if(r->type==State::Block::TEXT) label=expr.substr(r->b,r->e-r->b);
std::cerr<<"\""<<r<<"\" [label=\""<<label<<"\""<<((r->type==State::Block::TEXT)?",fontcolor=\"red\"":"")<<"];\n";
if(r->child) std::cerr<<"\""<<r<<"\" -> \""<<r->child.get()<<"\";\n";
if(r->next)
{
std::cerr<<"\""<<r<<"\" -> \""<<r->next.get()<<"\" [color=\"blue\"];\n";
std::cerr<<"{rank=same; "<<"\""<<r<<"\"; \""<<r->next.get()<<"\";}\n";
}
if(r->parent!=nullptr) std::cerr<<"\""<<r<<"\" -> \""<<r->parent<<"\" [color=\"green\", penwidth=1];\n";
visited.insert(r);
}
if(r->child && visited.count(r->child.get())==0) r=r->child.get();
else if(r->next) r=r->next.get();
else
{
while(r->parent!=nullptr)
{
r=r->parent;
if(r->next) {r=r->next.get(); break;}
}
}
if(visited.count(r)!=0 && r->parent==nullptr && visited.count(r->next.get())!=0 && visited.count(r->child.get())!=0) break;
}
std::cerr<<"}\n";
}
for(size_t pos=0; pos<str.length(); ++pos)
{
if(!st.CmpSmb(str[pos])) return false;
}
return true;
}
int main(int argc, char** argv)
{
if(argc!=3) return 1;
std::cout<<"Compare "<<argv[1]<<" with template "<<argv[2]<<": "<<(CmpStrEx(argv[2],argv[1])?"match":"not match")<<std::endl;
return 0;
}
Loading…
Cancel
Save