From de1eb76336836641808fe6d4374d1dc4d28d2b3c Mon Sep 17 00:00:00 2001 From: Michael Uleysky Date: Tue, 30 Aug 2016 15:17:19 +1000 Subject: [PATCH] Code for compare strings with templates. --- modules/gmt/cmpstr.c++ | 130 +++++++++++++++++++ modules/gmt/cmpstr_alt.c++ | 258 +++++++++++++++++++++++++++++++++++++ 2 files changed, 388 insertions(+) create mode 100644 modules/gmt/cmpstr.c++ create mode 100644 modules/gmt/cmpstr_alt.c++ diff --git a/modules/gmt/cmpstr.c++ b/modules/gmt/cmpstr.c++ new file mode 100644 index 0000000..1d180b1 --- /dev/null +++ b/modules/gmt/cmpstr.c++ @@ -0,0 +1,130 @@ +#include +#include +#include +#include + +bool CmpStrEx(const std::string& expr, const std::string& str) +{ + if(expr.empty() || str.empty()) return false; + + struct State + { + struct Block + { + const size_t b,len; + const bool optional; + Block(const size_t bb,const size_t ee,const bool o):b(bb),len(ee-bb),optional(o) {} + }; + + struct Cursor + { + size_t block,offset; + bool operator <(const struct Cursor& c) const + { + if(blockc.block) return false; + if(offset Cursors; + typedef std::set::iterator pCursor; + + std::vector blockchain; + Cursors cursors; + const std::string& s; + + Cursors InitCursors(size_t block) const + { + Cursors cs; + for(size_t i=block; ibpos) blockchain.push_back(Block(bpos,cur,(']'==s[cur]))); + cur++; + bpos=cur; + continue; + } + cur++; + } + // Add last block + if(bposblock]; + if(c==s[bl.b+p->offset]) {res=true; ++p;} + else p=cursors.erase(p); + } + + // Increment cursors + Cursors upd; // New cursors + p=cursors.begin(); + while(p!=cursors.end()) + { + // Increment cursor on one position + if(p->offset+1>=blockchain[p->block].len) // Go to next block + { + Cursors cs=InitCursors(p->block+1); // Get cursors for next block + p=cursors.erase(p); // Erase current cursor + for(const auto& cur: cs) upd.insert(cur); // Copy cursors to new set + continue; + } + else upd.insert({p->block,p->offset+1}); + if(blockchain[p->block].optional) // If current block is optional next symbol may be from next block + { + Cursors cs=InitCursors(p->block+1); // Get cursors for next blockchain + for(const auto& cur: cs) upd.insert(cur); // Copy cursors to new set + } + ++p; + } + + cursors=upd; + return res; + } + + }; + + struct State st(expr); + for(size_t pos=0; pos +#include +#include +#include +#include + +bool CmpStrEx(const std::string& expr, const std::string& str) +{ + if(expr.empty() || str.empty()) return false; + + struct State + { + struct Block + { + using pBlock=std::unique_ptr; + enum Type {NOTDEF,TEXT,OPTIONAL,VARIANTS,DELIM}; + size_t b,e; + Type type; + const struct Block* parent; + pBlock next,child; + + Block() = delete; + Block(const struct Block&) = delete; + Block(struct Block&&) = delete; + + bool isText() const {return type==TEXT;} + + Block(Type t, const struct Block* p=nullptr):b(0),e(0),type(t),parent(p),next(nullptr),child(nullptr) {} + Block(size_t bb, size_t ee, const struct Block* p=nullptr):b(bb),e(ee),type(TEXT),parent(p),next(nullptr),child(nullptr) {} + }; + + Block::pBlock Parse(const std::string& str) + { + size_t e=0; + struct Block* blk; + struct Block* root; + auto c=str[e]; + bool init,next; + + // First symbol + if('['==c || '('==c) blk=new Block(('['==c)?Block::OPTIONAL:Block::VARIANTS); + else blk=new Block(e,e+1); + root=blk; + init=!root->isText(); + next=!init; + + e++; + while(enext:blk->child).reset(new Block(('['==c)?Block::OPTIONAL:Block::VARIANTS,(next?blk->parent:blk))); + blk=(next?blk->next:blk->child).get(); + init=true; + next=false; + break; + } + case ']': + case ')': + { + blk=const_cast(blk->parent); + init=true; + next=true; + break; + } + case '|': {blk->next.reset(new Block(Block::DELIM,blk->parent)); blk=blk->next.get(); init=next=true; break;} + default: + { + if(init) + { + (next?blk->next:blk->child).reset(new Block(e,e+1,(next?blk->parent:blk))); + blk=(next?blk->next:blk->child).get(); + } + else blk->e=e+1; + init=false; + next=true; + } + } + e++; + } + return Block::pBlock(root); + } + + struct Cursor + { + const struct Block* block; + size_t offset; + bool operator <(const struct Cursor& c) const {return blockb+offset];} + Cursor(const struct Block* b, size_t o):block(b),offset(o) {} + }; + + using Cursors=std::set; + using pCursor=std::set::const_iterator; + + Block::pBlock root; + Cursors cursors; + const std::string& s; + + void InitCursors(const struct Block* blk) + { + switch(blk->type) + { + case Block::TEXT: {cursors.insert({blk,0}); break;} + case Block::OPTIONAL: + { + auto b=blk; + InitCursors(b->child.get()); + while(true) + { + if(!b->next) break; + b=b->next.get(); + InitCursors(b); + if(b->type!=Block::OPTIONAL) break; + } + break; + } + case Block::VARIANTS: + { + auto b=blk->child.get(); + while(true) + { + InitCursors(b); + while(b->type!=Block::DELIM) + { + if(!b->next) break; + b=b->next.get(); + } + if(!b->next) break; + b=b->next.get(); + } + } + default: {} + } + } + + + State(const std::string& str):root(Parse(str)),s(str) {InitCursors(root.get());} + + + bool CmpSmb(const char c) + { + pCursor p=cursors.cbegin(); + bool res=false; + + // Compare symbol with all cursors + + std::cout<<"--------------\n"; + std::cout<<"Symbol: "<block->b+p->offset<<" "<block->b+p->offset]<<"\n"; + if(p->compare(s,c)) {res=true; p++;} + else p=cursors.erase(p); + } + + Cursors old(std::move(cursors)); + for(const auto& p:old) + { + auto blk=p.block; + // Increment cursors + if(blk->b+p.offset+1e) cursors.insert({blk,p.offset+1}); // Advance in current block + else // Move to next block + { + if(!blk->next || blk->next->type==Block::DELIM) // End of chain, must go up + { + while(blk->parent!=nullptr) + { + blk=blk->parent; + if(!blk->next || blk->next->type==Block::DELIM) continue; + InitCursors(blk->next.get()); + } + } + else InitCursors(blk->next.get()); // Next block in chain + } + + // Try to leave optional blocks + blk=p.block; + while(blk->parent!=nullptr) + { + blk=blk->parent; + if(blk->type!=Block::OPTIONAL || !blk->next || blk->next->type==Block::DELIM) continue; + InitCursors(blk->next.get()); + } + } + + return res; + } + }; + + +// Main function + struct State st(expr); + + // Dump code + { + std::cerr<<"digraph main\n{\nrankdir=TB pack=\"true\" packmode=\"node\";\n"; + std::cerr<<"node [fontsize=30, shape=rectangle, style=\"filled,rounded\", fillcolor=azure2, fixedsize=\"false\", margin=0.2, width=1, penwidth=3, fontname=\"Times New Roman\"];\n"; + std::cerr<<"edge [arrowsize=1.5, penwidth=3];\n"; + + const struct State::Block* r=st.root.get(); + std::set visited; + visited.insert(nullptr); + while(true) + { + if(visited.count(r)==0) + { + std::string label; + if(r->type==State::Block::OPTIONAL) label="[]"; + if(r->type==State::Block::VARIANTS) label="()"; + if(r->type==State::Block::DELIM) label="|"; + if(r->type==State::Block::TEXT) label=expr.substr(r->b,r->e-r->b); + std::cerr<<"\""<type==State::Block::TEXT)?",fontcolor=\"red\"":"")<<"];\n"; + if(r->child) std::cerr<<"\""< \""<child.get()<<"\";\n"; + if(r->next) + { + std::cerr<<"\""< \""<next.get()<<"\" [color=\"blue\"];\n"; + std::cerr<<"{rank=same; "<<"\""<next.get()<<"\";}\n"; + } + if(r->parent!=nullptr) std::cerr<<"\""< \""<parent<<"\" [color=\"green\", penwidth=1];\n"; + visited.insert(r); + } + if(r->child && visited.count(r->child.get())==0) r=r->child.get(); + else if(r->next) r=r->next.get(); + else + { + while(r->parent!=nullptr) + { + r=r->parent; + if(r->next) {r=r->next.get(); break;} + } + } + if(visited.count(r)!=0 && r->parent==nullptr && visited.count(r->next.get())!=0 && visited.count(r->child.get())!=0) break; + } + std::cerr<<"}\n"; + } + + for(size_t pos=0; pos