diff --git a/README.md b/README.md index 9502cf5..476dfac 100644 --- a/README.md +++ b/README.md @@ -12,5 +12,8 @@ Origami uses random walks over the graph partial order to mine a representative Data Mining. October 2007. ## How to - -cd test; type make +```sh +cd test && mkdir build && cd build +cmake .. +make +``` diff --git a/src/StringTokenizer/CMakeLists.txt b/src/StringTokenizer/CMakeLists.txt new file mode 100644 index 0000000..18b0fbb --- /dev/null +++ b/src/StringTokenizer/CMakeLists.txt @@ -0,0 +1,31 @@ + +cmake_minimum_required(VERSION 3.10) + +# Project name +project(StringTokenizer) + +# Set the C++ standard +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED True) + +# Enable the generation of compile_commands.json +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + +# Compiler options +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pedantic -ansi -Wall") + +# Source files +set(SOURCE_FILES StringTokenizer.cpp strtoktest.cpp) + +# Include directories +include_directories(${CMAKE_CURRENT_SOURCE_DIR}) + +# Add library for StringTokenizer +add_library(StringTokenizer STATIC StringTokenizer.cpp) + +# Add executable for StrTokTest +add_executable(strtoktest strtoktest.cpp) + +# Link the StringTokenizer library to StrTokTest executable +target_link_libraries(strtoktest StringTokenizer) + diff --git a/src/StringTokenizer/StringTokenizer.cpp b/src/StringTokenizer/StringTokenizer.cpp index f67c5ed..aa7eaac 100755 --- a/src/StringTokenizer/StringTokenizer.cpp +++ b/src/StringTokenizer/StringTokenizer.cpp @@ -1,166 +1,126 @@ #include "StringTokenizer.h" -StringTokenizer::StringTokenizer(const std::string& _str, const std::string& _delim) -{ +StringTokenizer::StringTokenizer(const std::string &_str, + const std::string &_delim) { - if ((_str.length() == 0) || (_delim.length() == 0)) return; + if ((_str.length() == 0) || (_delim.length() == 0)) + return; - token_str = _str; - delim = _delim; + token_str = _str; + delim = _delim; /* Remove sequential delimiter */ - unsigned int curr_pos = 0; - - while(true) - { - if ((curr_pos = token_str.find(delim,curr_pos)) != std::string::npos) - { - curr_pos += delim.length(); - - while(token_str.find(delim,curr_pos) == curr_pos) - { - token_str.erase(curr_pos,delim.length()); - } - } - else - break; - } - - /* - Trim leading delimiter - */ - if (token_str.find(delim,0) == 0) - { - token_str.erase(0,delim.length()); - } - - /* - Trim ending delimiter - */ - curr_pos = 0; - if ((curr_pos = token_str.rfind(delim)) != std::string::npos) - { - if (curr_pos != (token_str.length() - delim.length())) return; - token_str.erase(token_str.length() - delim.length(),delim.length()); - } - -} - - -int StringTokenizer::countTokens() -{ + unsigned int curr_pos = 0; - unsigned int prev_pos = 0; - int num_tokens = 0; + while (true) { + if ((curr_pos = token_str.find(delim, curr_pos)) != std::string::npos) { + curr_pos += delim.length(); - if (token_str.length() > 0) - { - num_tokens = 0; - - unsigned int curr_pos = 0; - while(true) - { - if ((curr_pos = token_str.find(delim,curr_pos)) != std::string::npos) - { - num_tokens++; - prev_pos = curr_pos; - curr_pos += delim.length(); - } - else - break; + while (token_str.find(delim, curr_pos) == curr_pos) { + token_str.erase(curr_pos, delim.length()); } - return ++num_tokens; - } - else - { - return 0; - } - -} + } else + break; + } + /* + Trim leading delimiter + */ + if (token_str.find(delim, 0) == 0) { + token_str.erase(0, delim.length()); + } -bool StringTokenizer::hasMoreTokens() -{ - return (token_str.length() > 0); + /* + Trim ending delimiter + */ + curr_pos = 0; + if ((curr_pos = token_str.rfind(delim)) != std::string::npos) { + if (curr_pos != (token_str.length() - delim.length())) + return; + token_str.erase(token_str.length() - delim.length(), delim.length()); + } } +int StringTokenizer::countTokens() { + + unsigned int prev_pos = 0; + int num_tokens = 0; + + if (token_str.length() > 0) { + num_tokens = 0; + + unsigned int curr_pos = 0; + while (true) { + if ((curr_pos = token_str.find(delim, curr_pos)) != std::string::npos) { + num_tokens++; + prev_pos = curr_pos; + curr_pos += delim.length(); + } else + break; + } + return ++num_tokens; + } else { + return 0; + } +} -std::string StringTokenizer::nextToken() -{ - - if (token_str.length() == 0) - return ""; - - std::string tmp_str = ""; - unsigned int pos = token_str.find(delim,0); - - if (pos != std::string::npos) - { - tmp_str = token_str.substr(0,pos); - token_str = token_str.substr(pos+delim.length(),token_str.length()-pos); - } - else - { - tmp_str = token_str.substr(0,token_str.length()); - token_str = ""; - } +bool StringTokenizer::hasMoreTokens() { return (token_str.length() > 0); } - return tmp_str; -} +std::string StringTokenizer::nextToken() { + if (token_str.length() == 0) + return ""; -int StringTokenizer::nextIntToken() -{ - return atoi(nextToken().c_str()); -} + std::string tmp_str = ""; + unsigned int pos = token_str.find(delim, 0); + if (pos != std::string::npos) { + tmp_str = token_str.substr(0, pos); + token_str = + token_str.substr(pos + delim.length(), token_str.length() - pos); + } else { + tmp_str = token_str.substr(0, token_str.length()); + token_str = ""; + } -double StringTokenizer::nextFloatToken() -{ - return atof(nextToken().c_str()); + return tmp_str; } +int StringTokenizer::nextIntToken() { return atoi(nextToken().c_str()); } -std::string StringTokenizer::nextToken(const std::string& delimiter) -{ - if (token_str.length() == 0) - return ""; +double StringTokenizer::nextFloatToken() { return atof(nextToken().c_str()); } - std::string tmp_str = ""; - unsigned int pos = token_str.find(delimiter,0); +std::string StringTokenizer::nextToken(const std::string &delimiter) { + if (token_str.length() == 0) + return ""; - if (pos != std::string::npos) - { - tmp_str = token_str.substr(0,pos); - token_str = token_str.substr(pos + delimiter.length(),token_str.length() - pos); - } - else - { - tmp_str = token_str.substr(0,token_str.length()); - token_str = ""; - } - - return tmp_str; -} + std::string tmp_str = ""; + unsigned int pos = token_str.find(delimiter, 0); + if (pos != std::string::npos) { + tmp_str = token_str.substr(0, pos); + token_str = + token_str.substr(pos + delimiter.length(), token_str.length() - pos); + } else { + tmp_str = token_str.substr(0, token_str.length()); + token_str = ""; + } -std::string StringTokenizer::remainingString() -{ - return token_str; + return tmp_str; } +std::string StringTokenizer::remainingString() { return token_str; } -std::string StringTokenizer::filterNextToken(const std::string& filterStr) -{ - std::string tmp_str = nextToken(); - unsigned int currentPos = 0; +std::string StringTokenizer::filterNextToken(const std::string &filterStr) { + std::string tmp_str = nextToken(); + unsigned int currentPos = 0; - while((currentPos = tmp_str.find(filterStr,currentPos)) != std::string::npos) - { - tmp_str.erase(currentPos,filterStr.length()); - } + while ((currentPos = tmp_str.find(filterStr, currentPos)) != + std::string::npos) { + tmp_str.erase(currentPos, filterStr.length()); + } - return tmp_str; + return tmp_str; } diff --git a/src/StringTokenizer/StringTokenizer.h b/src/StringTokenizer/StringTokenizer.h index 602f864..a8c5b8d 100755 --- a/src/StringTokenizer/StringTokenizer.h +++ b/src/StringTokenizer/StringTokenizer.h @@ -11,42 +11,34 @@ * http://www.opensource.org/licenses/cpl.php * * * *********************************************************************** -*/ - - + */ #ifndef INCLUDE_STRINGTOKENIZER_H #define INCLUDE_STRINGTOKENIZER_H - +#include #include #include -#include #include +class StringTokenizer { -class StringTokenizer -{ - - public: - - StringTokenizer(const std::string& _str, const std::string& _delim); - ~StringTokenizer(){}; - - int countTokens(); - bool hasMoreTokens(); - std::string nextToken(); - int nextIntToken(); - double nextFloatToken(); - std::string nextToken(const std::string& delim); - std::string remainingString(); - std::string filterNextToken(const std::string& filterStr); - - private: +public: + StringTokenizer(const std::string &_str, const std::string &_delim); + ~StringTokenizer() {}; - std::string token_str; - std::string delim; + int countTokens(); + bool hasMoreTokens(); + std::string nextToken(); + int nextIntToken(); + double nextFloatToken(); + std::string nextToken(const std::string &delim); + std::string remainingString(); + std::string filterNextToken(const std::string &filterStr); +private: + std::string token_str; + std::string delim; }; #endif diff --git a/src/StringTokenizer/strtoktest.cpp b/src/StringTokenizer/strtoktest.cpp index cc5c460..4e270fb 100755 --- a/src/StringTokenizer/strtoktest.cpp +++ b/src/StringTokenizer/strtoktest.cpp @@ -11,44 +11,40 @@ * http://www.opensource.org/licenses/cpl.php * * * *********************************************************************** -*/ - + */ +#include "StringTokenizer.h" +#include #include #include -#include #include -#include "StringTokenizer.h" - - -int main() -{ - - //string tempStr = "|x|x|x~|x|x|x~aa~ |x ~b~b |x ~c~c~ |x|x|x|x ~d~d~|xw|xs|xd|x3|x4|xd|xf|x1|x222|xwwww|xgg|xjj|xkk|xvv|x|x22|x#3"; - //StringTokenizer strtok = StringTokenizer(tempStr,"|x"); - - std::string tempStr = "01|02|03|04|05|06|07|08|09|10|11|12"; - StringTokenizer strtok = StringTokenizer(tempStr,"|"); +int main() { + // string tempStr = "|x|x|x~|x|x|x~aa~ |x ~b~b |x ~c~c~ |x|x|x|x + // ~d~d~|xw|xs|xd|x3|x4|xd|xf|x1|x222|xwwww|xgg|xjj|xkk|xvv|x|x22|x#3"; + // StringTokenizer strtok = StringTokenizer(tempStr,"|x"); - std::cout << "Number Of Tokens: " << strtok.countTokens() << std::endl; - std::cout << "String: " << strtok.remainingString() << std::endl; + std::string tempStr = "01|02|03|04|05|06|07|08|09|10|11|12"; - int cnt = strtok.countTokens(); - std::string finalString = ""; + StringTokenizer strtok = StringTokenizer(tempStr, "|"); - for(int i = 0; i < cnt; i++) - { - std::string tempStr =""; - std::cout << "Token[" << i << "] ------> [" << (tempStr=strtok./*filterN*/nextToken(/*" "*/)) << "] "; - std::cout << "Token Count" << strtok.countTokens() << std::endl; - finalString += tempStr; + std::cout << "Number Of Tokens: " << strtok.countTokens() << std::endl; + std::cout << "String: " << strtok.remainingString() << std::endl; - } + int cnt = strtok.countTokens(); + std::string finalString = ""; - std::cout << std::endl << "Final String: " << finalString << std::endl; + for (int i = 0; i < cnt; i++) { + std::string tempStr = ""; + std::cout << "Token[" << i << "] ------> [" + << (tempStr = strtok./*filterN*/ nextToken(/*" "*/)) + << "] "; + std::cout << "Token Count" << strtok.countTokens() << std::endl; + finalString += tempStr; + } - return 1; + std::cout << std::endl << "Final String: " << finalString << std::endl; + return 1; } diff --git a/src/common/adj_list.h b/src/common/adj_list.h index 69e4de4..e3f8fc1 100644 --- a/src/common/adj_list.h +++ b/src/common/adj_list.h @@ -1,7 +1,7 @@ /* - * Copyright (C) 2005 M.J. Zaki , Rensselaer Polytechnic Institute - * Written by parimi@cs.rpi.edu - * Updated by chaojv@cs.rpi.edu, alhasan@cs.rpi.edu, salems@cs.rpi.edu + * Copyright (C) 2005 M.J. Zaki , Rensselaer Polytechnic + * Institute Written by parimi@cs.rpi.edu Updated by chaojv@cs.rpi.edu, + * alhasan@cs.rpi.edu, salems@cs.rpi.edu * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -21,143 +21,142 @@ #ifndef _ADJ_LIST #define _ADJ_LIST -#include #include +#include #include #include -#include +#include +#include // For std::pair using namespace std; +template using ALLOC = std::allocator; +template struct vertex_info; -template class ALLOC> struct vertex_info; -template class ALLOC> -ostream& operator<< (ostream&, const vertex_info&); +template +ostream &operator<<(ostream &, const vertex_info &); /// class to store all info associated with a vertex. -template class ALLOC=std::allocator> -struct vertex_info -{ +template struct vertex_info { - typedef pair EDGE_P; // - typedef vector > EDGES; // vector of edges + typedef pair EDGE_P; // + typedef vector EDGES; // vector of edges typedef typename EDGES::iterator EIT; typedef typename EDGES::const_iterator CONST_EIT; - + // constructor - vertex_info(const VERTEX_T& vert, const int& idval): v(vert), id(idval) {} + vertex_info(const VERTEX_T &vert, const int &idval) : v(vert), id(idval) {} vertex_info() {} /// Returns an iterator pointing to the begining of the list of out_edges. - EIT out_begin() {return out_edges.begin();} - /// Returns a const_iterator pointing to the begining of the list of out_edges. - CONST_EIT out_begin() const {return out_edges.begin();} + EIT out_begin() { return out_edges.begin(); } + /// Returns a const_iterator pointing to the begining of the list of + /// out_edges. + CONST_EIT out_begin() const { return out_edges.begin(); } /// Returns an iterator pointing to the end of the list of out_edges. - EIT out_end() {return out_edges.end();} + EIT out_end() { return out_edges.end(); } /// Returns a const_iterator pointing to the end of the list of out_edges. - CONST_EIT out_end() const {return out_edges.end();} - + CONST_EIT out_end() const { return out_edges.end(); } + /// Returns an iterator pointing to the begining of the list of in_edges. - EIT in_begin() {return in_edges.begin();} + EIT in_begin() { return in_edges.begin(); } /// Returns a cont_iterator pointing to the begining of the list of in_edges. - CONST_EIT in_begin() const {return in_edges.begin();} + CONST_EIT in_begin() const { return in_edges.begin(); } /// Returns an iterator pointing to the end of the list of in_edges. - EIT in_end() {return in_edges.end();} + EIT in_end() { return in_edges.end(); } /// Returns a const_iterator pointing to the end of the list of in_edges. - CONST_EIT in_end() const {return in_edges.end();} + CONST_EIT in_end() const { return in_edges.end(); } /// Adds an out_edge to the list of the out_edges. - void add_out_edge(const int& dest, const EDGE_T& e) - { - //out_edges.insert(make_pair(dest, e)); + void add_out_edge(const int &dest, const EDGE_T &e) { + // out_edges.insert(make_pair(dest, e)); out_edges.push_back(make_pair(dest, e)); } /// Adds an in_edge to the list of the in_edges. - void add_in_edge(const int& src, const EDGE_T& e) - { - //in_edges.insert(make_pair(src, e)); + void add_in_edge(const int &src, const EDGE_T &e) { + // in_edges.insert(make_pair(src, e)); in_edges.push_back(make_pair(src, e)); } - /** + /** * Returns true if there exists an out-edge from this vertex to dest - * and populates edge label in e + * and populates edge label in e */ - bool out_edge(const int& dest, EDGE_T& e) const { + bool out_edge(const int &dest, EDGE_T &e) const { CONST_EIT it; - for(it=out_begin(); it!=out_end(); it++) - if(it->first==dest) { - e=it->second; + for (it = out_begin(); it != out_end(); it++) + if (it->first == dest) { + e = it->second; return true; - } + } return false; - }//out_edge() - + } // out_edge() + /** Returns true if there exists an in-edge from src to this vertex and populates edge label in e */ - bool in_edge(const int& src, EDGE_T& e) const { + bool in_edge(const int &src, EDGE_T &e) const { CONST_EIT it; - for(it=in_begin(); it!=in_end(); it++) - if(it->first==src) { - e=it->second; + for (it = in_begin(); it != in_end(); it++) + if (it->first == src) { + e = it->second; return true; } return false; - }//in_edge() + } // in_edge() - ///Returns true if this vertex is less than vertex2 - bool operator< (const vertex_info& vertex2) const { + /// Returns true if this vertex is less than vertex2 + bool operator<(const vertex_info &vertex2) const { - if(v < vertex2.v) - return true; - else - return false; + if (v < vertex2.v) + return true; + else + return false; } - /// Outputs a vertex_info object to the stream. This is a global function, not a member function. - friend ostream& operator<< <>(ostream&, const vertex_info&); + /// Outputs a vertex_info object to the stream. This is a global function, + /// not a member function. + friend ostream &operator<< <>(ostream &, + const vertex_info &); /// public data members /// - VERTEX_T v; //vertex object - int id; //id of this vertex - EDGES out_edges; //stores all edges for an undirected graph - EDGES in_edges; //calls to this member should be made only for digraphs + VERTEX_T v; // vertex object + int id; // id of this vertex + EDGES out_edges; // stores all edges for an undirected graph + EDGES in_edges; // calls to this member should be made only for digraphs -}; //end struct vertex_info +}; // end struct vertex_info // overloaded extraction over a pair - used by following hash_map extraction -template -ostream& operator<< (ostream& ostr, const std::pair& p) { - ostr<<"("< +ostream &operator<<(ostream &ostr, const std::pair &p) { + ostr << "(" << p.first << " " << p.second << ")"; return ostr; } // overloaded extraction over the edgelist map -template -ostream& operator<< (ostream& ostr, const vector >& hm) { - typename vector >::const_iterator it; - for(it=hm.begin(); it!=hm.end(); it++) - std::cout<<*it<<" "; +template +ostream &operator<<(ostream &ostr, const vector> &hm) { + typename vector>::const_iterator it; + for (it = hm.begin(); it != hm.end(); it++) + std::cout << *it << " "; return ostr; } - -//friend extraction over output streams -template -ostream& operator<< (ostream& ostr, const vertex_info& vi) { - ostr<<"["< +ostream &operator<<(ostream &ostr, const vertex_info &vi) { + ostr << "[" << vi.id << "|" << vi.v << "] OUT: "; typename vertex_info::CONST_EIT it; - ostr< class ALLOC > -class adj_list; - -template class ALLOC > -ostream& operator<< (ostream&, const adj_list&); +template class adj_list; + +template +ostream &operator<<(ostream &, const adj_list &); /** * \brief core adjacency list class to store the pattern. @@ -165,18 +164,18 @@ ostream& operator<< (ostream&, const adj_list&); * the template arguments are vertex_type and edge_type. */ -template class ALLOC=std::allocator> -class adj_list -{ +template class adj_list { - public: +public: typedef V_T VERTEX_T; typedef E_T EDGE_T; - typedef vertex_info VERTEX_INFO; - typedef adj_list ADJ_L; + typedef vertex_info VERTEX_INFO; + typedef adj_list ADJ_L; - template - class VERTEX_LIST: public std::vector > {};//each vertex and its info is stored as a vector, for fast lookup since we'll know its unique id + template + class VERTEX_LIST : public std::vector { + }; // each vertex and its info is stored as a vector, for fast lookup since + // we'll know its unique id typedef VERTEX_LIST ADJ_LIST; @@ -187,256 +186,272 @@ class adj_list typedef std::pair EIT_PAIR; typedef std::pair CONST_EIT_PAIR; - void* operator new(size_t size) { + void *operator new(size_t size) { ALLOC aa; return aa.allocate(size); } - void operator delete(void *p, size_t size) { + void operator delete(void *p, size_t size) { if (p) { ALLOC aa; - aa.deallocate(static_cast (p), size); + aa.deallocate(static_cast(p), size); } } - - //default constructor + + // default constructor adj_list() {} - - IT begin() {return _alist.begin();} - CONST_IT begin() const {return _alist.begin();} - IT end() {return _alist.end();} - CONST_IT end() const {return _alist.end();} - inline - int size() const {return _alist.size();} /**< Returns number of vertices */ - void clear() {_alist.clear();} - void push_back(const VERTEX_INFO& vi) {_alist.push_back(vi);} + IT begin() { return _alist.begin(); } + CONST_IT begin() const { return _alist.begin(); } + IT end() { return _alist.end(); } + CONST_IT end() const { return _alist.end(); } + + inline int size() const { + return _alist.size(); + } /**< Returns number of vertices */ + void clear() { _alist.clear(); } + void push_back(const VERTEX_INFO &vi) { _alist.push_back(vi); } /** Returns the info associated with this vertex id */ - IT vertex_vals(const int&); + IT vertex_vals(const int &); - CONST_IT vertex_vals(const int& idval) const { - CONST_IT it=_alist.begin(); - if(idval>size()-1) { - std::cerr<<"adj_list.vertex_vals: out of range vertex id, "< size() - 1) { + std::cerr << "adj_list.vertex_vals: out of range vertex id, " << idval + << endl; exit(0); } - it+=idval; + it += idval; return it; - }// end vertex_vals() const + } // end vertex_vals() const - /** Returns a pair of iterators, the first of the pair points to the first + /** Returns a pair of iterators, the first of the pair points to the first entity in the set of out-edges of idval, the second to the end of edges*/ - std::pair out_edges(const int& idval) { - IT it=vertex_vals(idval); + std::pair out_edges(const int &idval) { + IT it = vertex_vals(idval); return make_pair(it->out_begin(), it->out_end()); - }//end out_edges() - - std::pair out_edges(const int& idval) const { - CONST_IT it=vertex_vals(idval); + } // end out_edges() + + std::pair out_edges(const int &idval) const { + CONST_IT it = vertex_vals(idval); return make_pair(it->out_begin(), it->out_end()); - }//end out_edges() const + } // end out_edges() const - /** Returns a pair of iterators, the first of the pair points to the first + /** Returns a pair of iterators, the first of the pair points to the first entity in the set of in-edges of idval, the second to the end of edges*/ - std::pair in_edges(const int& idval) { - IT it=vertex_vals(idval); + std::pair in_edges(const int &idval) { + IT it = vertex_vals(idval); return make_pair(it->in_begin(), it->in_end()); - }//end in_edges() - - std::pair in_edges(const int& idval) const { - CONST_IT it=vertex_vals(idval); + } // end in_edges() + + std::pair in_edges(const int &idval) const { + CONST_IT it = vertex_vals(idval); return make_pair(it->in_begin(), it->in_end()); - }//end in_edges() const + } // end in_edges() const /** Returns size of out-neighbors of vid */ - int out_nbr_size(const int& vid) const { - pair out_pit=out_edges(vid); - return out_pit.second-out_pit.first; + int out_nbr_size(const int &vid) const { + pair out_pit = out_edges(vid); + return out_pit.second - out_pit.first; } /** Returns size of in-neighbors of vid */ - int in_nbr_size(const int& vid) const { - pair in_pit=in_edges(vid); - return in_pit.second-in_pit.first; + int in_nbr_size(const int &vid) const { + pair in_pit = in_edges(vid); + return in_pit.second - in_pit.first; } /** Adds given vertex object and returns its id As is evident, these ids are generated in increasing order */ - int add_vertex(const VERTEX_T& v) { + int add_vertex(const VERTEX_T &v) { _alist.push_back(VERTEX_INFO(v, size())); - return size()-1; + return size() - 1; } // end add_vertex() - int add_vertex(int v_id, const VERTEX_T& v) { - if(v_id >= _alist.size()) - _alist.resize(v_id+1); - + int add_vertex(int v_id, const VERTEX_T &v) { + if (v_id >= _alist.size()) + _alist.resize(v_id + 1); + _alist[v_id] = VERTEX_INFO(v, v_id); - // for(int i=0; i < v_id; i++) + // for(int i=0; i < v_id; i++) // cout << _alist[i]; // cout << endl; - return size()-1; + return size() - 1; } // end add_vertex() - - + /** Delete all vertices with a given id */ - void delete_vertex_by_id(const int& vid) { - for (IT it= begin(); it < end(); ) { - if (it->id == vid) { // this is the corresponding vertex_info that needs to be deleted + void delete_vertex_by_id(const int &vid) { + for (IT it = begin(); it < end();) { + if (it->id == vid) { // this is the corresponding vertex_info that needs + // to be deleted _alist.erase(it); - } - else { - if (it->id > vid) it->id--; // If this vertex id is higher than that of deleting, it's id reduce by 1 + } else { + if (it->id > vid) + it->id--; // If this vertex id is higher than that of deleting, it's + // id reduce by 1 std::pair out_e = out_edges(it->id); - while (out_e.first != out_e.second ) { - if (out_e.first.first == vid) // if this edge's other end is a deleted vertex, delete this edge also - it->out_edges.erase(out_e.first); - else if (out_e.first.first > vid) { // if ther other end is a vertex with higher id, id value decrease by 1 - out_e.first.first--; - out_e.first++; // advancing the edge iterator - } - } - std::pair in_e = in_edges(it->id); - while (in_e.first != in_e.second ) { - if (in_e.first.first == vid) // if this edge's other end is a deleted vertex, delete this edge also - it->in_edges.erase(in_e.first); - else if (in_e.first.first > vid) { // if ther other end is a vertex with higher id, id value decrease by 1 - in_e.first.first--; - in_e.first++; // advancing the edge iterator - } - } - it++; // advancing the vertex iterator + while (out_e.first != out_e.second) { + if (out_e.first.first == vid) // if this edge's other end is a deleted + // vertex, delete this edge also + it->out_edges.erase(out_e.first); + else if (out_e.first.first > + vid) { // if ther other end is a vertex with higher id, id + // value decrease by 1 + out_e.first.first--; + out_e.first++; // advancing the edge iterator + } + } + std::pair in_e = in_edges(it->id); + while (in_e.first != in_e.second) { + if (in_e.first.first == vid) // if this edge's other end is a deleted + // vertex, delete this edge also + it->in_edges.erase(in_e.first); + else if (in_e.first.first > + vid) { // if ther other end is a vertex with higher id, id + // value decrease by 1 + in_e.first.first--; + in_e.first++; // advancing the edge iterator + } + } + it++; // advancing the vertex iterator } } } - void delete_vertex_by_label(VERTEX_T& v) { - for (IT it= begin(); it < end(); ) { - if (it->v == v) // this is the corresponding vertex_info that needs to be deleted - delete_vertex_by_id(it->id); + void delete_vertex_by_label(VERTEX_T &v) { + for (IT it = begin(); it < end();) { + if (it->v == + v) // this is the corresponding vertex_info that needs to be deleted + delete_vertex_by_id(it->id); else - it++; + it++; } - } + } // defining equality function object that compare a pair // only based on the first element - template - class equality_for_pair : public binary_function, pair, bool> { - public: - equality_for_pair() { } - bool operator()(const pair& x, const pair& y) { - return x.first < y.first; - } + template class equality_for_pair { + public: + equality_for_pair() {} + bool operator()(const pair &x, const pair &y) { + return x.first < y.first; + } }; equality_for_pair delete_condition; - /** delete one-edge, for what the source and destination vertex id matches with the given*/ - void delete_one_out_edge(const int& src, const int& dest) { - bool src_deleted = false, dest_deleted = false, dangling_src_vertex = false, dangling_dest_vertex = false; + /** delete one-edge, for what the source and destination vertex id matches + * with the given*/ + void delete_one_out_edge(const int &src, const int &dest) { + bool src_deleted = false, dest_deleted = false, dangling_src_vertex = false, + dangling_dest_vertex = false; EDGE_T e; - for (IT it= begin(); it < end(); ) { + for (IT it = begin(); it < end();) { if (it->id == src) { - it->out_edges(remove_if(it->out_edges.begin(), it->out_edges.end(), bind2nd(delete_condition, make_pair(dest,e))), - it->out_edges.end()); - if (it->out_edges.size() == 0 && it->in_edges.size() == 0) { + it->out_edges(remove_if(it->out_edges.begin(), it->out_edges.end(), + bind2nd(delete_condition, make_pair(dest, e))), + it->out_edges.end()); + if (it->out_edges.size() == 0 && it->in_edges.size() == 0) { dangling_src_vertex = true; - } - if (dest_deleted == true) break; - src_deleted = true; + } + if (dest_deleted == true) + break; + src_deleted = true; } if (it->id == dest) { - it->out_edges(remove_if(it->out_edges.begin(), it->out_edges.end(), bind2nd(delete_condition, make_pair(src,e))), - it->out_edges.end()); - if (it->out_edges.size() == 0 && it->in_edges.size() == 0) { + it->out_edges(remove_if(it->out_edges.begin(), it->out_edges.end(), + bind2nd(delete_condition, make_pair(src, e))), + it->out_edges.end()); + if (it->out_edges.size() == 0 && it->in_edges.size() == 0) { dangling_dest_vertex = true; - } - if (src_deleted == true) break; - dest_deleted = true; + } + if (src_deleted == true) + break; + dest_deleted = true; } it++; } if (dangling_src_vertex == true) { - delete_vertex_by_id(src); + delete_vertex_by_id(src); } if (dangling_dest_vertex == true) { - delete_vertex_by_id(dest); + delete_vertex_by_id(dest); } } /** Adds edge FROM src TO dest */ - void add_out_edge(const int& src, const int& dest, const EDGE_T& e) { - if((src>size()-1) || (dest>size()-1)) { - std::cout<<"adj_list::add_out_edge:out of bound vertex IDs, src="< class ALLOC > -typename adj_list::IT adj_list::vertex_vals(const int& idval) { - typename adj_list::IT it=_alist.begin(); - if(idval>size()-1) { - std::cerr<<"adj_list.vertex_vals: out of range vertex id, "< +typename adj_list::IT +adj_list::vertex_vals(const int &idval) { + typename adj_list::IT it = _alist.begin(); + if (idval > size() - 1) { + std::cerr << "adj_list.vertex_vals: out of range vertex id, " << idval + << endl; exit(0); } - it+=idval; + it += idval; return it; -}// end vertex_vals() - +} // end vertex_vals() #endif diff --git a/src/common/count_support.h b/src/common/count_support.h index d8adaf9..b762a20 100644 --- a/src/common/count_support.h +++ b/src/common/count_support.h @@ -1,7 +1,7 @@ /* - * Copyright (C) 2005 M.J. Zaki Rensselaer Polytechnic Institute - * Written by parimi@cs.rpi.edu - * Updated by chaojv@cs.rpi.edu, alhasan@cs.rpi.edu, salems@cs.rpi.edu + * Copyright (C) 2005 M.J. Zaki Rensselaer Polytechnic + * Institute Written by parimi@cs.rpi.edu Updated by chaojv@cs.rpi.edu, + * alhasan@cs.rpi.edu, salems@cs.rpi.edu * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -20,110 +20,103 @@ #ifndef _COUNT_SUPPORT_H_ #define _COUNT_SUPPORT_H_ -#include "pattern.h" #include "adj_list.h" -#include "mem_storage_manager.h" #include "generic_classes.h" +#include "mem_storage_manager.h" +#include "pattern.h" +#include +template using ALLOC = std::allocator; +/** + * \brief count_support class partially specialized for vertical mining. + * + */ +template class CC, class SM_TYPE> +class count_support>, ST, CC, + SM_TYPE> { -/** -* \brief count_support class partially specialized for vertical mining. -* -*/ -template class > class CC, -template class ALLOC, class SM_TYPE > -class count_support >, ST, CC, ALLOC, SM_TYPE > -{ - public: - typedef proplist > MINING_PROPS; + typedef proplist> MINING_PROPS; typedef ST PAT_ST_TYPE; typedef PP PATTERN_PROPS; // typedef SM_TYPE STORAGE_MANAGER_TYPE; - typedef pattern PATTERN; - typedef vat VAT; + typedef pattern PATTERN; + typedef vat VAT; typedef pattern_support PAT_SUP; - - count_support(storage_manager const& sm) : _strg_mgr(sm) {} - + + count_support(storage_manager const &sm) + : _strg_mgr(sm) {} + // function to count support of candidate patterns // cand_supports is populated, num is # of candidates generated - void count(PATTERN* const& p1, PATTERN* const& p2, PATTERN** const& cand_pats, - const int& minsup, const int& num, const bool& isfwd, const pair& ids) { - + void count(PATTERN *const &p1, PATTERN *const &p2, PATTERN **const &cand_pats, + const int &minsup, const int &num, const bool &isfwd, + const pair &ids) { + // invoke storage_mgr's intersect to get VATs and support for candidates - VAT** cand_vats; // pointer to VAT ptrs for candidates - PAT_SUP** cand_sups=new PAT_SUP*[num]; - + VAT **cand_vats; // pointer to VAT ptrs for candidates + PAT_SUP **cand_sups = new PAT_SUP *[num]; + int i; - for(i=0; isize()==1); - - // intersect() is expected to populate the support member + + // intersect() is expected to populate the support member // of each cand_pat - cand_vats= _strg_mgr.intersect(p1, p2, cand_sups, cand_pats, isfwd, ids, minsup); - + cand_vats = + _strg_mgr.intersect(p1, p2, cand_sups, cand_pats, isfwd, ids, minsup); + // check which candidates were frequent // and add their VATs to strg_mgr - for(i=0;iis_valid(minsup)) { + + if (cand_sups[i]->is_valid(minsup)) { cand_pats[i]->set_support(cand_sups[i]); _strg_mgr.add_vat(cand_pats[i], cand_vats[i]); // Delete the VAT for the first pattern.. we dont need it anymore. - if(p1->size() > 2) // Cannot delete single edges. + if (p1->size() > 2) // Cannot delete single edges. _strg_mgr.delete_vat(p1); - } - else { - //reclaim memory - if(cand_vats != NULL) + } else { + // reclaim memory + if (cand_vats != NULL) delete cand_vats[i]; } delete cand_sups[i]; - }//end for + } // end for delete[] cand_sups; - if(cand_vats != NULL) + if (cand_vats != NULL) delete[] cand_vats; - - }//end count() - - void delete_vat(PATTERN* const& p) { - _strg_mgr.delete_vat(p); - } - VAT* get_vat(PATTERN* const& p) { - return _strg_mgr.get_vat(p); - } + } // end count() + + void delete_vat(PATTERN *const &p) { _strg_mgr.delete_vat(p); } + + VAT *get_vat(PATTERN *const &p) { return _strg_mgr.get_vat(p); } // Prints the tids in which this pattern occurs. - void print_tids(PATTERN* const& p) { - _strg_mgr.print_tids(p); - } + void print_tids(PATTERN *const &p) { _strg_mgr.print_tids(p); } // get the tids in which this pattern occurs. - void get_tids(PATTERN* const& p, vector & tids) { + void get_tids(PATTERN *const &p, vector &tids) { _strg_mgr.get_tids(p, tids); } - unsigned int size() const { - return _strg_mgr.size(); - } - - storage_manager & get_sm_ref() { - return _strg_mgr; - } - + unsigned int size() const { return _strg_mgr.size(); } + + storage_manager &get_sm_ref() { return _strg_mgr; } + private: - storage_manager _strg_mgr; - -}; //end class count_support() + storage_manager _strg_mgr; + +}; // end class count_support() #endif diff --git a/src/common/db_reader.h b/src/common/db_reader.h index 4cec9dc..bf248da 100644 --- a/src/common/db_reader.h +++ b/src/common/db_reader.h @@ -1,7 +1,7 @@ /* - * Copyright (C) 2005 M.J. Zaki Rensselaer Polytechnic Institute - * Written by parimi@cs.rpi.edu - * Updated by chaojv@cs.rpi.edu, alhasan@cs.rpi.edu, salems@cs.rpi.edu + * Copyright (C) 2005 M.J. Zaki Rensselaer Polytechnic + * Institute Written by parimi@cs.rpi.edu Updated by chaojv@cs.rpi.edu, + * alhasan@cs.rpi.edu, salems@cs.rpi.edu * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -17,152 +17,163 @@ * with this program; if not, write to the Free Software Foundation, Inc., * 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. */ -/** \file db_reader.h - user defined class to parse input database and retrieve level-1 patterns */ +/** \file db_reader.h - user defined class to parse input database and retrieve + * level-1 patterns */ #ifndef _DB_READER_H #define _DB_READER_H -#include #include -#include #include +#include +#include +#include // using namespace std; // adding all files manually for now, TODO: improve this include system -#include "helper_funs.h" #include "generic_classes.h" -#include "pat_fam.h" // added later to make .cpp files +#include "helper_funs.h" +#include "pat_fam.h" // added later to make .cpp files /** * \brief Database Reader class, to read the input file. * - * This class reads the database, using the tokenizer class and populate the level-1 VAT. + * This class reads the database, using the tokenizer class and populate the + * level-1 VAT. */ -template class ALLOC > -class db_reader -{ - public: - - typedef vat VAT; /// vat typedef - typedef tokenizer TKNZ; /// tokenizer class for this pattern-type, its method parse_next_line() is invoked by db_reader - typedef map, typename PATTERN::EDGE_T >, int> FREQ_MAP; +template class db_reader { +public: + typedef vat + VAT; /// vat typedef + typedef tokenizer + TKNZ; /// tokenizer class for this pattern-type, its method + /// parse_next_line() is invoked by db_reader + typedef map, + typename PATTERN::EDGE_T>, + int> + FREQ_MAP; /** \fn db_reader(const char* infile_name) * \brief Constructor * \param infile_name Name of the input database (flat) file */ - db_reader(const char* infile_name): _in_db(infile_name) {} + db_reader(const char *infile_name) + : _in_db(infile_name), filename(std::string(infile_name)) {} /** \fn db_reader(const char* infile_name, int mem_size) * \brief Constructor_for_gigabase * \param infile_name Name of the input database (flat) file. * \param mem_size Maximum size of memory vat for gigabase backend. */ - db_reader(const char* infile_name, int mem_size): _in_db(infile_name) {filename=strdup(infile_name);_max_mem=mem_size;} + db_reader(const char *infile_name, int mem_size) : _in_db(infile_name) { + filename = std::string(infile_name); + std::cout << "Filename: " << filename << std::endl; + _max_mem = mem_size; + } /** \fn ~db_reader() * \brief destructor */ - ~db_reader() - { close();} + ~db_reader() { close(); } /** \fn void open(const char* infile_name) - * \brief Opens the specified input file. This is an alternative to + * \brief Opens the specified input file. This is an alternative to * the parameterized constructor * \param infile_name Name of the input file */ - void open(const char* infile_name) { - if(is_open()) + void open(const char *infile_name) { + if (is_open()) _in_db.close(); - _in_db.open(infile_name); - } + _in_db.open(infile_name); + } /** void close() * \brief Closes the file associated with this class */ - void close() { - _in_db.close(); - } + void close() { _in_db.close(); } /** bool is_open * \brief returns whether file associated with this object is open */ - bool is_open() { - return _in_db.is_open(); - } - - /** void get_length_one(pat_fam& freq_pats, vat_db& vat_hmap, int minsup) - * \brief obtain length one frequent patterns in sorted order, and populate vat_db with their vats - * \param freq_pats Pattern Family which is populated with the frequent patterns - * \param vat_hmap The hashmap used to store pattern-to-VAT mappings - * \param minsup Minimum support threshold + bool is_open() { return _in_db.is_open(); } + + /** void get_length_one(pat_fam& freq_pats, vat_db& + * vat_hmap, int minsup) \brief obtain length one frequent patterns in sorted + * order, and populate vat_db with their vats \param freq_pats Pattern Family + * which is populated with the frequent patterns \param vat_hmap The hashmap + * used to store pattern-to-VAT mappings \param minsup Minimum support + * threshold */ - template - void get_length_one(pat_fam& freq_pats, storage_manager& vat_hmap, const int& minsup, FREQ_MAP& fm) { + template + void get_length_one(pat_fam &freq_pats, + storage_manager &vat_hmap, + const int &minsup, FREQ_MAP &fm) { int tid; - VAT* ivat; + VAT *ivat; typename pat_fam::IT pf_it; - if(!is_open()) { + if (!is_open()) { // stream not open - std::cerr<<"db_reader: file stream not open in get_length_one()"<size())>=minsup) + // cout << "LEVEL 1 " << *ivat << endl; + + if ((ivat->size()) >= minsup) (*pf_it)->set_sup(make_pair(ivat->size(), 0)); else { // Delete the pattern and the vat. vat_hmap.delete_vat(*pf_it); delete (*pf_it); - freq_pats.erase(pf_it); + freq_pats.erase(pf_it); pf_it--; } - } //end for + } // end for // sort level-1 patterns - //typename pat_fam::IT b=freq_pats.begin(), e=freq_pats.end(); - //sort(b, e, less_than()); + // typename pat_fam::IT b=freq_pats.begin(), e=freq_pats.end(); + // sort(b, e, less_than()); - } //end get_length_one() - static void print_edge_freq_map(const FREQ_MAP& fm) { + } // end get_length_one() + static void print_edge_freq_map(const FREQ_MAP &fm) { cout << "Inside print_edge_freq_map." << endl; typename FREQ_MAP::const_iterator cit = fm.begin(); - for (;cit != fm.end(); cit++) + for (; cit != fm.end(); cit++) cout << "(" << cit->first.first.first << ", " << cit->first.second << ", " - << cit->first.first.second << ") -->" << cit->second << endl; + << cit->first.first.second << ") -->" << cit->second << endl; } - unsigned int get_transaction_count() const {return _trans_cnt;} - + unsigned int get_transaction_count() const { return _trans_cnt; } - private: +private: std::ifstream _in_db; - char* filename; // Holds the file name of the dataset - unsigned long _max_mem; + std::string filename; // Holds the file name of the dataset + unsigned long _max_mem; TKNZ tknz; // An object of Tokenizer class unsigned int _trans_cnt; -}; //end class db_reader +}; // end class db_reader #endif diff --git a/src/common/element_parser.h b/src/common/element_parser.h index 6c3135e..572b9a4 100644 --- a/src/common/element_parser.h +++ b/src/common/element_parser.h @@ -1,7 +1,7 @@ /* - * Copyright (C) 2005 M.J. Zaki Rensselaer Polytechnic Institute - * Written by parimi@cs.rpi.edu - * Updated by chaojv@cs.rpi.edu, alhasan@cs.rpi.edu, salems@cs.rpi.edu + * Copyright (C) 2005 M.J. Zaki Rensselaer Polytechnic + * Institute Written by parimi@cs.rpi.edu Updated by chaojv@cs.rpi.edu, + * alhasan@cs.rpi.edu, salems@cs.rpi.edu * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -17,13 +17,15 @@ * with this program; if not, write to the Free Software Foundation, Inc., * 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. */ -/** \file element_parser.h - class to parse a set of characters into the +/** \file element_parser.h - class to parse a set of characters into the * specialized element type */ #ifndef _ELEMENT_PARSER #define _ELEMENT_PARSER -#include #include "helper_funs.h" +#include +#include +#include /** \brief Class represents a generic element_parser * @@ -31,81 +33,64 @@ * It has to be partially specialized for each new element-type require */ -template -class element_parser {}; - -/** - * \brief Element parser class for parsing an integer, an specialization of element_parser class. +template class element_parser {}; +template struct COMP_FUNC {}; +/** + * \brief Element parser class for parsing an integer, an specialization of + * element_parser class. */ -template<> -class element_parser -{ - - public: +template <> class element_parser { - typedef int OBJ_T; /**< element type */ +public: + typedef int OBJ_T; /**< element type */ typedef int HASH_TYPE; // Input type for the hash function. - typedef eqint COMP_FUNC; // Comparison function. /** \fn OBJ_T parse_element(char* word) * \brief parse characters in word to type OBJ_T * \param word input set of characters * return value is parsed element */ - static inline OBJ_T parse_element(char* word) { - return atoi(word); - } + static inline OBJ_T parse_element(char *word) { return atoi(word); } - static const OBJ_T& convert(const int& i) { - - return i; - } + static const OBJ_T &convert(const int &i) { return i; } - static bool notEq(const int& i1, const int& i2) { - return !(i1 == i2); - } - - static const HASH_TYPE& conv_hash_type(const OBJ_T& s) { - return s; - } + static bool notEq(const int &i1, const int &i2) { return !(i1 == i2); } + static const HASH_TYPE &conv_hash_type(const OBJ_T &s) { return s; } -};//end clas element_parser +}; // end clas element_parser + // +template <> struct COMP_FUNC { + bool operator()(const int &lhs, const int &rhs) { return lhs == rhs; } +}; +typedef struct COMP_FUNC COMP_FUNC_INT; -/** - * \brief Element parser class for parsing a string, an specialization of element_parser class. +/** + * \brief Element parser class for parsing a string, an specialization of + * element_parser class. */ -template<> -class element_parser -{ - - public: - - typedef std::string OBJ_T; /**< element type */ - typedef const char* HASH_TYPE; // Input type for the hash function. - typedef eqstr COMP_FUNC; // Comparison function. +template <> class element_parser { +public: + typedef std::string OBJ_T; /**< element type */ + typedef const char *HASH_TYPE; // Input type for the hash function. /** \fn OBJ_T parse_element(char* word) * \brief parse characters in word to type OBJ_T * \param word input set of characters * return value is parsed element */ - static inline OBJ_T parse_element(const char* word) { + static inline OBJ_T parse_element(const char *word) { return std::string(word); } - static inline OBJ_T parse_element(std::string word) { - return word; - } - static OBJ_T convert(const char* s) { - return parse_element(s); - } + static inline OBJ_T parse_element(std::string word) { return word; } + static OBJ_T convert(const char *s) { return parse_element(s); } static OBJ_T convert(const int i) { std::ostringstream t_ss; t_ss << i; - + return t_ss.str(); } @@ -113,10 +98,19 @@ class element_parser return !(s1 == s2); } - static HASH_TYPE conv_hash_type(const OBJ_T& s) { - return s.c_str(); + static HASH_TYPE conv_hash_type(const OBJ_T &s) { + // malloc, memcpy then return the pointer + char *ret = (char *)malloc(s.size() + 1); + memcpy(ret, s.c_str(), s.size() + 1); + return ret; } -};//end clas element_parser +}; // end clas element_parser + +template <> struct COMP_FUNC { + bool operator()(const char *lhs, const char *rhs) const { + return !strcmp(lhs, rhs); + } +}; #endif diff --git a/src/common/generic_classes.h b/src/common/generic_classes.h index daa32f6..0d813f8 100644 --- a/src/common/generic_classes.h +++ b/src/common/generic_classes.h @@ -1,7 +1,7 @@ /* - * Copyright (C) 2005 M.J. Zaki Rensselaer Polytechnic Institute - * Written by parimi@cs.rpi.edu - * Updated by chaojv@cs.rpi.edu, alhasan@cs.rpi.edu, salems@cs.rpi.edu + * Copyright (C) 2005 M.J. Zaki Rensselaer Polytechnic + * Institute Written by parimi@cs.rpi.edu Updated by chaojv@cs.rpi.edu, + * alhasan@cs.rpi.edu, salems@cs.rpi.edu * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -31,71 +31,66 @@ * the second is the vertex_tpe and the third is the edge_type */ -template class ALLOC > +template class canonical_code {}; -/** +/** * \class vat iset_vat.h "src/itemset/iset_vat.h" * \brief Class represents a generic VAT(Vertical Attribute Table) * * It needs three template arguments; - * PP: Pattern Properties, + * PP: Pattern Properties, * MP: Mining properties, * ST: Storage_type, default is std::vector, P is the * underlying element type of ST storage type. */ -template class ALLOC=std::allocator, template class ST=std::vector > - class vat {}; +template class ST = std::vector> +class vat {}; /** * \brief Class represent a generic tokenizer. * * This class defines key functions used by db_parser in order to parse input db */ -template class A> -class tokenizer {}; +template class tokenizer {}; /** * \brief A generic count support class. * */ -template class > class cc, - template class alloc, class sm_type > -class count_support { -}; +template class CC, class sm_type> +class count_support {}; /** * \brief A generic Storage type class for back-end Storage. * - * A generic storage type for back-end. If you want to add new storage type for back-end, extend this - * class, as it is done for memory_storage class. + * A generic storage type for back-end. If you want to add new storage type for + * back-end, extend this class, as it is done for memory_storage class. */ - class storage_type { - }; +class storage_type {}; /** * \brief Memory based Storage Manager class, extended from base storage type. * */ -class memory_storage: public storage_type{ -}; +class memory_storage : public storage_type {}; /** * \brief File based Storage Manager class, extended from base storage type. * */ -class file_storage: public storage_type{ -}; +class file_storage : public storage_type {}; /** * \brief A Generic Storage Manager class. * * Storage_Manager class manages the VAT storage. It has 3 template arguments. - * PAT: The stored pattern type, VAT: The VAT type, ST_TYPE: The storage type(memory, file etc.) + * PAT: The stored pattern type, VAT: The VAT type, ST_TYPE: The storage + * type(memory, file etc.) */ -template class ALLOC, typename ST_TYPE> -class storage_manager { -}; - +template +class storage_manager {}; #endif diff --git a/src/common/hash_utils.hpp b/src/common/hash_utils.hpp new file mode 100644 index 0000000..d43fea4 --- /dev/null +++ b/src/common/hash_utils.hpp @@ -0,0 +1,87 @@ +#ifndef _HASH_UTIL_HPP_ +#define _HASH_UTIL_HPP_ +#include +#include +#include +#include + +template +class basic_fnv1a final { + + static_assert(std::is_unsigned::value, "need unsigned integer"); + +public: + using result_type = ResultT; + +private: + result_type state_{}; + +public: + constexpr basic_fnv1a() noexcept : state_{OffsetBasis} {} + + constexpr void update(const void *const data, + const std::size_t size) noexcept { + const auto cdata = static_cast(data); + auto acc = this->state_; + for (auto i = std::size_t{}; i < size; ++i) { + const auto next = std::size_t{cdata[i]}; + acc = (acc ^ next) * Prime; + } + this->state_ = acc; + } + + constexpr result_type digest() const noexcept { return this->state_; } +}; + +using fnv1a_32 = + basic_fnv1a; + +using fnv1a_64 = basic_fnv1a; + +template struct fnv1a; + +template <> struct fnv1a<32> { + using type = fnv1a_32; +}; + +template <> struct fnv1a<64> { + using type = fnv1a_64; +}; + +template using fnv1a_t = typename fnv1a::type; + +constexpr std::size_t hash_bytes(const void *const data, + const std::size_t size) noexcept { + auto hashfn = fnv1a_t{}; + hashfn.update(data, size); + return hashfn.digest(); +} + +template struct myhash { + std::size_t operator()(const T &obj) const noexcept { + // Fallback implementation. + auto hashfn = std::hash{}; + return hashfn(obj); + } +}; + +template <> struct myhash { + std::size_t operator()(const std::string &s) const noexcept { + // use default std::hash + return std::hash{}(s); + } +}; + +template <> struct myhash { + std::size_t operator()(const char *const s) const noexcept { + return hash_bytes(s, std::strlen(s)); + } +}; + +template <> struct myhash { + std::size_t operator()(const int i) const noexcept { + return hash_bytes(&i, sizeof(i)); + } +}; +#endif diff --git a/src/common/helper_funs.h b/src/common/helper_funs.h index 05b5c79..28e92d4 100644 --- a/src/common/helper_funs.h +++ b/src/common/helper_funs.h @@ -1,7 +1,7 @@ /* - * Copyright (C) 2005 M.J. Zaki Rensselaer Polytechnic Institute - * Written by parimi@cs.rpi.edu - * Updated by chaojv@cs.rpi.edu, alhasan@cs.rpi.edu, salems@cs.rpi.edu + * Copyright (C) 2005 M.J. Zaki Rensselaer Polytechnic + * Institute Written by parimi@cs.rpi.edu Updated by chaojv@cs.rpi.edu, + * alhasan@cs.rpi.edu, salems@cs.rpi.edu * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -21,58 +21,52 @@ #define _HELPER_FUNS_H_ #include +#include +#include +#include -#define HASHNS __gnu_cxx - -/** +/** * \struct eqstr * \brief function object which defines operator= for const char* */ -struct eqstr -{ - /** +struct eqstr { + /** * \fn bool operator() (const char* s1, const char* s2) const * \brief returns true if s1 and s2 are the same sequence of characters */ - bool operator()(const char* s1, const char* s2) const { + bool operator()(const char *s1, const char *s2) const { return strcmp(s1, s2) == 0; } -}; //end struct eqstr +}; // end struct eqstr -/** +/** * \struct eqint * \brief function object which defines operator= for integer */ -struct eqint -{ - /** +struct eqint { + /** * \fn bool operator() (int i1, int s2) const * \brief returns true if i1 and i2 are the same integer */ - bool operator()(int i1, int i2) const { - return i1 == i2; - } -}; //end struct eqint + bool operator()(int i1, int i2) const { return i1 == i2; } +}; // end struct eqint /** * \struct less_than * \brief function object for comparing two patterns for less-than */ -template -struct less_than -{ - bool operator() (const PAT* p1, const PAT* p2) const { - return (*p1 < *p2); - } +template struct less_than { + bool operator()(const PAT *p1, const PAT *p2) const { return (*p1 < *p2); } }; /** * \struct less_than for pairs. */ -struct ltpair -{ - bool operator()(const pair p1, const pair p2) const { - if((p1.first < p2.first) || ((p1.first == p2.first) && p1.second < p2.second)) +struct ltpair { + bool operator()(const std::pair p1, + const std::pair p2) const { + if ((p1.first < p2.first) || + ((p1.first == p2.first) && p1.second < p2.second)) return true; else return false; @@ -82,10 +76,10 @@ struct ltpair /** * \struct equal for pairs. */ -struct eqpair -{ - bool operator()(const pair p1, const pair p2) const { - if((p1.first == p2.first) && (p1.first == p2.first)) +struct eqpair { + bool operator()(const std::pair p1, + const std::pair p2) const { + if ((p1.first == p2.first) && (p1.first == p2.first)) return true; else return false; @@ -95,15 +89,6 @@ struct eqpair /** * \struct hash_func */ -template -struct hash_func -{ }; - -template <> -struct hash_func:HASHNS::hash { -size_t operator () (const string& x) const { -return this->HASHNS::hash::operator () (x.c_str()); -} -}; +template struct hash_func {}; #endif diff --git a/src/common/mem_storage_manager.h b/src/common/mem_storage_manager.h index b572618..da31c31 100644 --- a/src/common/mem_storage_manager.h +++ b/src/common/mem_storage_manager.h @@ -1,7 +1,7 @@ /* - * Copyright (C) 2005 M.J. Zaki Rensselaer Polytechnic Institute - * Written by parimi@cs.rpi.edu - * Updated by chaojv@cs.rpi.edu, alhasan@cs.rpi.edu, salems@cs.rpi.edu + * Copyright (C) 2005 M.J. Zaki Rensselaer Polytechnic + * Institute Written by parimi@cs.rpi.edu Updated by chaojv@cs.rpi.edu, + * alhasan@cs.rpi.edu, salems@cs.rpi.edu * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -20,124 +20,121 @@ #ifndef MEM_STORAGE_MANAGER_H_ #define MEM_STORAGE_MANAGER_H_ -#include -#include -#include "pat_fam.h" -#include "pat_support.h" #include "generic_classes.h" +#include "hash_utils.hpp" #include "helper_funs.h" +#include "pat_fam.h" +#include "pat_support.h" #include "time_tracker.h" +#include +#include +#include using namespace std; // time_tracker tt_tot_inter; /** -* \brief Storage Manager class partially specialized to Memory-based Storage manager to - * stores VAT in Memory. + * \brief Storage Manager class partially specialized to Memory-based Storage + * manager to stores VAT in Memory. * - * An object store. Specifically we want to store VATs and their associated patterns. - * It provides routines to find/access/store VATs with a pattern key. + * An object store. Specifically we want to store VATs and their associated + * patterns. It provides routines to find/access/store VATs with a pattern key. */ -template class ALLOC, class VAT > -class storage_manager -{ - +template +class storage_manager { + typedef typename PAT::CC_STORAGE_TYPE C_ST; typedef typename PAT::CC_COMPARISON_FUNC C_CF; - // typedef typename VAT::VAT_ALLOC ALLOC; - + public: - - typedef HASHNS::hash_map, C_CF, ALLOC > > CC_ST_TO_VATPTR; + typedef std::unordered_map, C_CF> CC_ST_TO_VATPTR; typedef typename CC_ST_TO_VATPTR::const_iterator CONST_IT; typedef typename CC_ST_TO_VATPTR::iterator IT; typedef pattern_support PAT_SUP; - + storage_manager() {} - - storage_manager(pat_fam& freq_one_pats, vector& freq_one_vats) { - + + storage_manager(pat_fam &freq_one_pats, vector &freq_one_vats) { + typename pat_fam::CONST_IT cit = freq_one_pats.begin(); int idx = 0; - - while(cit != freq_one_pats.end()) { - PAT* p = *cit; - + + while (cit != freq_one_pats.end()) { + PAT *p = *cit; + _pat_to_vat.insert(make_pair(p->pat_id(), freq_one_vats[idx++])); cit++; } } - + /** - * Return true if the pattern is found in the storage manager. + * Return true if the pattern is found in the storage manager. */ - bool find(PAT* const& p) const { + bool find(PAT *const &p) const { CONST_IT hmap_it; - hmap_it=_pat_to_vat.find(p->pat_id()); - - return !(hmap_it==_pat_to_vat.end()); - }//end find() - - /** - * Return the vat corresponding to the pattern. - */ - VAT* get_vat(PAT* const& p) const { - + hmap_it = _pat_to_vat.find(p->pat_id()); + + return !(hmap_it == _pat_to_vat.end()); + } // end find() + + /** + * Return the vat corresponding to the pattern. + */ + VAT *get_vat(PAT *const &p) const { + CONST_IT hmap_it; - hmap_it=_pat_to_vat.find(p->pat_id()); - if(hmap_it!=_pat_to_vat.end()) { - //vat found + hmap_it = _pat_to_vat.find(p->pat_id()); + if (hmap_it != _pat_to_vat.end()) { + // vat found return hmap_it->second; - } - else { + } else { return 0; } } - + /** - * Delete the vat. + * Delete the vat. */ - void delete_vat(PAT* const& p) { - + void delete_vat(PAT *const &p) { + IT hmap_it; - hmap_it=_pat_to_vat.find(p->pat_id()); - if(hmap_it!=_pat_to_vat.end()) { - VAT* vat=hmap_it->second; - if(vat) - delete vat; //reclaim memory for vat + hmap_it = _pat_to_vat.find(p->pat_id()); + if (hmap_it != _pat_to_vat.end()) { + VAT *vat = hmap_it->second; + if (vat) + delete vat; // reclaim memory for vat else - std::cout<<"storage_manager.delete_vat: vat is null"<pat_id()<pat_id() << endl; } } - - /** - * Map the pattern to the VAT. - */ - bool add_vat(PAT* const& p, VAT* v) { - bool ret=(_pat_to_vat.insert(make_pair(p->pat_id(), v))).second; - + + /** + * Map the pattern to the VAT. + */ + bool add_vat(PAT *const &p, VAT *v) { + bool ret = (_pat_to_vat.insert(make_pair(p->pat_id(), v))).second; + return ret; } - + /** * */ - void print_tids(PAT* const& p) { + void print_tids(PAT *const &p) { CONST_IT hmap_it; - hmap_it=_pat_to_vat.find(p->pat_id()); + hmap_it = _pat_to_vat.find(p->pat_id()); - if(hmap_it!=_pat_to_vat.end()) { - //vat found + if (hmap_it != _pat_to_vat.end()) { + // vat found (hmap_it->second)->print_tids(); - } - else { + } else { cout << "print_tids(): vat not found" << endl; } } @@ -145,74 +142,73 @@ class storage_manager /** * */ - void get_tids(PAT* const&p, vector& tids) { + void get_tids(PAT *const &p, vector &tids) { CONST_IT hmap_it; - hmap_it=_pat_to_vat.find(p->pat_id()); + hmap_it = _pat_to_vat.find(p->pat_id()); - if(hmap_it!=_pat_to_vat.end()) { - //vat found + if (hmap_it != _pat_to_vat.end()) { + // vat found (hmap_it->second)->get_tids(tids); - } - else { + } else { cout << "print_tids(): vat not found" << endl; } - } /** - * Generate candidate VATs for the next level, from the provided patterns. + * Generate candidate VATs for the next level, from the provided patterns. */ - VAT** intersect(PAT* const& p1, PAT* const& p2, PAT_SUP** cand_sups, PAT** cand_pats, - const bool& isfwd, const pair& ids, const int& minsup) { - + VAT **intersect(PAT *const &p1, PAT *const &p2, PAT_SUP **cand_sups, + PAT **cand_pats, const bool &isfwd, const pair &ids, + const int &minsup) { + // Get the vats. - VAT* v1, *v2; - + VAT *v1, *v2; + CONST_IT hmap_it; hmap_it = _pat_to_vat.find(p1->pat_id()); - if(hmap_it!=_pat_to_vat.end()) { - //vat found + if (hmap_it != _pat_to_vat.end()) { + // vat found v1 = hmap_it->second; - } - else { - cout<< "storage_manager: vat not found for pattern = " << p1->pat_id() << endl; + } else { + cout << "storage_manager: vat not found for pattern = " << p1->pat_id() + << endl; return 0; } - + hmap_it = _pat_to_vat.find(p2->pat_id()); - if(hmap_it!=_pat_to_vat.end()) { - //vat found + if (hmap_it != _pat_to_vat.end()) { + // vat found v2 = hmap_it->second; - } - else { - cout<< "storage_manager: vat not found for pattern = " << p2->pat_id() << endl; + } else { + cout << "storage_manager: vat not found for pattern = " << p2->pat_id() + << endl; return 0; } - + // cout << "Intersecting patterns. p1 = " << p1 << ". p2 = " << p2 << endl; - - VAT** ret = VAT::intersection(v1, v2, cand_sups, cand_pats, isfwd, ids, minsup); + + VAT **ret = + VAT::intersection(v1, v2, cand_sups, cand_pats, isfwd, ids, minsup); return ret; } - + void print() const { CONST_IT hmap_it; - for(hmap_it=_pat_to_vat.begin();hmap_it!=_pat_to_vat.end();hmap_it++) - cout<first<<"->"<second<first << "->" << hmap_it->second << endl; + } // end print() void print_pat_ids() const { CONST_IT hmap_it; - for(hmap_it=_pat_to_vat.begin();hmap_it!=_pat_to_vat.end();hmap_it++) - cout<first<<" "; + for (hmap_it = _pat_to_vat.begin(); hmap_it != _pat_to_vat.end(); hmap_it++) + cout << hmap_it->first << " "; cout << endl; } - unsigned int size() const { return _pat_to_vat.size();} - + unsigned int size() const { return _pat_to_vat.size(); } + private: - CC_ST_TO_VATPTR _pat_to_vat; // A pattern to vat map }; diff --git a/src/common/pat_fam.h b/src/common/pat_fam.h index 0971e3a..c34f534 100644 --- a/src/common/pat_fam.h +++ b/src/common/pat_fam.h @@ -1,7 +1,7 @@ /* - * Copyright (C) 2005 M.J. Zaki Rensselaer Polytechnic Institute - * Written by parimi@cs.rpi.edu - * Updated by chaojv@cs.rpi.edu, alhasan@cs.rpi.edu, salems@cs.rpi.edu + * Copyright (C) 2005 M.J. Zaki Rensselaer Polytechnic + * Institute Written by parimi@cs.rpi.edu Updated by chaojv@cs.rpi.edu, + * alhasan@cs.rpi.edu, salems@cs.rpi.edu * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -27,45 +27,49 @@ // this container shall be eventually buffered // -template > class ST> +template > class ST> class pat_fam; -template > class ST> -std::ostream& operator<< (std::ostream&, const pat_fam&); +template > class ST> +std::ostream &operator<<(std::ostream &, const pat_fam &); /** \class pat_fam - * \brief Pattern Family class, denotes a collection of patterns. No constraint is imposed on the comprising patterns + * \brief Pattern Family class, denotes a collection of patterns. No constraint + * is imposed on the comprising patterns * * P is the pattern class, ST is the storage container type * e.g. P=pattern >, ST=vector */ -template > class ST=std::vector> -class pat_fam: public ST -{ - public: - typedef typename ST::iterator IT; /**< iterator over the pattern family */ - typedef typename ST::const_iterator CONST_IT; /**< constant iterator over the pattern family */ +template > + class ST = std::vector> +class pat_fam : public ST

{ +public: + typedef + typename ST

::iterator IT; /**< iterator over the pattern family */ + typedef typename ST

::const_iterator + CONST_IT; /**< constant iterator over the pattern family */ - /** \fn friend ostream& operator<< <>(ostream& ostr, const pat_fam& pats) - * \brief friend function defining the extraction operator - * \param ostr Ostream object - * \param pats pat_fam object + /** \fn friend ostream& operator<< <>(ostream& ostr, const pat_fam& + * pats) \brief friend function defining the extraction operator \param ostr + * Ostream object \param pats pat_fam object */ - friend std::ostream& operator<< <>(std::ostream&, const pat_fam&); + friend std::ostream &operator<< <>(std::ostream &, const pat_fam &); void print_pids() { typename pat_fam::CONST_IT it; - for(it=this->begin(); it!=this->end(); it++) + for (it = this->begin(); it != this->end(); it++) cout << (*it)->pat_id() << " "; - cout << endl; + cout << endl; } }; -template -std::ostream& operator<< (std::ostream& ostr, const pat_fam& pats) -{ +template +std::ostream &operator<<(std::ostream &ostr, + const pat_fam &pats) { typename pat_fam::CONST_IT it; - for(it=pats.begin(); it!=pats.end(); it++) - ostr<<*it; + for (it = pats.begin(); it != pats.end(); it++) + ostr << *it; return ostr; } diff --git a/src/common/pat_fam.h.bak b/src/common/pat_fam.h.bak index 8107d2e..6398c00 100644 --- a/src/common/pat_fam.h.bak +++ b/src/common/pat_fam.h.bak @@ -1,7 +1,7 @@ /* - * Copyright (C) 2005 M.J. Zaki Rensselaer Polytechnic Institute - * Written by parimi@cs.rpi.edu - * Updated by chaojv@cs.rpi.edu, alhasan@cs.rpi.edu, salems@cs.rpi.edu + * Copyright (C) 2005 M.J. Zaki Rensselaer Polytechnic + * Institute Written by parimi@cs.rpi.edu Updated by chaojv@cs.rpi.edu, + * alhasan@cs.rpi.edu, salems@cs.rpi.edu * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -27,45 +27,45 @@ // this container shall be eventually buffered // -template class ST> -class pat_fam; -template class ST> -std::ostream& operator<< (std::ostream&, const pat_fam&); +template class ST> class pat_fam; +template class ST> +std::ostream &operator<<(std::ostream &, const pat_fam &); /** \class pat_fam - * \brief Pattern Family class, denotes a collection of patterns. No constraint is imposed on the comprising patterns + * \brief Pattern Family class, denotes a collection of patterns. No constraint + * is imposed on the comprising patterns * * P is the pattern class, ST is the storage container type * e.g. P=pattern >, ST=vector */ -template class ST=std::vector> -class pat_fam: public ST -{ - public: - typedef typename ST::iterator IT; /**< iterator over the pattern family */ - typedef typename ST::const_iterator CONST_IT; /**< constant iterator over the pattern family */ +template class ST = std::vector> +class pat_fam : public ST

{ +public: + typedef + typename ST

::iterator IT; /**< iterator over the pattern family */ + typedef typename ST

::const_iterator + CONST_IT; /**< constant iterator over the pattern family */ - /** \fn friend ostream& operator<< <>(ostream& ostr, const pat_fam& pats) - * \brief friend function defining the extraction operator - * \param ostr Ostream object - * \param pats pat_fam object + /** \fn friend ostream& operator<< <>(ostream& ostr, const pat_fam& + * pats) \brief friend function defining the extraction operator \param ostr + * Ostream object \param pats pat_fam object */ - friend std::ostream& operator<< <>(std::ostream&, const pat_fam&); + friend std::ostream &operator<< <>(std::ostream &, const pat_fam &); void print_pids() { typename pat_fam::CONST_IT it; - for(it=this->begin(); it!=this->end(); it++) + for (it = this->begin(); it != this->end(); it++) cout << (*it)->pat_id() << " "; - cout << endl; + cout << endl; } }; -template -std::ostream& operator<< (std::ostream& ostr, const pat_fam& pats) -{ +template +std::ostream &operator<<(std::ostream &ostr, + const pat_fam &pats) { typename pat_fam::CONST_IT it; - for(it=pats.begin(); it!=pats.end(); it++) - ostr<<*it; + for (it = pats.begin(); it != pats.end(); it++) + ostr << *it; return ostr; } diff --git a/src/common/pat_support.h b/src/common/pat_support.h index 8916c0d..273e236 100644 --- a/src/common/pat_support.h +++ b/src/common/pat_support.h @@ -1,7 +1,7 @@ /* - * Copyright (C) 2005 M.J. Zaki Rensselaer Polytechnic Institute - * Written by parimi@cs.rpi.edu - * Updated by chaojv@cs.rpi.edu, alhasan@cs.rpi.edu, salems@cs.rpi.edu + * Copyright (C) 2005 M.J. Zaki Rensselaer Polytechnic + * Institute Written by parimi@cs.rpi.edu Updated by chaojv@cs.rpi.edu, + * alhasan@cs.rpi.edu, salems@cs.rpi.edu * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -18,9 +18,9 @@ * 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. */ /* - * Copyright (C) 2005 M.J. Zaki Rensselaer Polytechnic Institute - * Written by parimi@cs.rpi.edu - * Updated by chaojv@cs.rpi.edu, alhasan@cs.rpi.edu, salems@cs.rpi.edu + * Copyright (C) 2005 M.J. Zaki Rensselaer Polytechnic + * Institute Written by parimi@cs.rpi.edu Updated by chaojv@cs.rpi.edu, + * alhasan@cs.rpi.edu, salems@cs.rpi.edu * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -39,128 +39,135 @@ #ifndef _PATTERN_SUPPORT_H_ #define _PATTERN_SUPPORT_H_ -#include #include "properties.h" +#include -// frequency is an indicator of whether the pattern is frequent +// frequency is an indicator of whether the pattern is frequent // for its definition -// validity of a pattern is an indicator of whether it should be stored for -// sake of completion of candidate generation. All frequent patterns shall be +// validity of a pattern is an indicator of whether it should be stored for +// sake of completion of candidate generation. All frequent patterns shall be // valid but vice versa does not hold true for Fk_Fk induced/unordered mining -template -class pattern_support; - -template -std::ostream& operator<< (std::ostream&, const pattern_support&); +template class pattern_support; +template +std::ostream &operator<<(std::ostream &, const pattern_support &); /** - * \brief Generic Pattern Support Class that takes care of support counting of a pattern. + * \brief Generic Pattern Support Class that takes care of support counting of a + * pattern. * - * Templated with Mining property, since support counting depends on different mining properties. + * Templated with Mining property, since support counting depends on different + * mining properties. */ -template -class pattern_support -{ +template class pattern_support { public: - //constructor - inline - pattern_support(const int& s=0, bool v=0): _sup(s), _valid(v) {} - - inline - bool is_valid(const int& minsup) const {return _sup>=minsup;} - - inline - bool is_freq(const int& ms) const {return _sup>=ms;} - - pattern_support& operator= (const pattern_support& rhs) { + // constructor + inline pattern_support(const int &s = 0, bool v = 0) : _sup(s), _valid(v) {} + + inline bool is_valid(const int &minsup) const { return _sup >= minsup; } + + inline bool is_freq(const int &ms) const { return _sup >= ms; } + + pattern_support &operator=(const pattern_support &rhs) { // pattern_support lhs(rhs._sup, rhs._valid); - if (this == &rhs) return *this; + if (this == &rhs) + return *this; _sup = rhs._sup; _valid = rhs._valid; return *this; } // parimi: commented out b'cos it is not clear what it should do - int get_sup() const { return _sup;} + int get_sup() const { return _sup; } - void set_vals(const pattern_support* s) {_sup=s->_sup; _valid=s->_valid;} + void set_vals(const pattern_support *s) { + _sup = s->_sup; + _valid = s->_valid; + } - void set_sup(const std::pair& s) { - // Ignore the second element in the pair - used for induced mining. - _sup=s.first; + void set_sup(const std::pair &s) { + // Ignore the second element in the pair - used for induced mining. + _sup = s.first; } // friend extraction - friend std::ostream& operator<< <>(std::ostream&, const pattern_support&); + friend std::ostream &operator<< <>(std::ostream &, + const pattern_support &); // NOTE: validity and frequency are synonymous for generic patterns - private: +private: int _sup; bool _valid; -};//end class pattern_support - +}; // end class pattern_support -template -std::ostream& operator<< (std::ostream& ostr, const pattern_support& ps) { - ostr<<"Support: "< +std::ostream &operator<<(std::ostream &ostr, const pattern_support &ps) { + ostr << "Support: " << ps._sup << std::endl; return ostr; } -template -std::ostream& operator<< (std::ostream& ostr, const pattern_support > > >& ps) { +template +std::ostream &operator<<( + std::ostream &ostr, + const pattern_support< + proplist>>> &ps) { - //ostr<<"Support: "< -class pattern_support > > > -{ +template +class pattern_support< + proplist>>> { public: - //constructor - pattern_support(const int& is=0, const int& s=0, bool v=0): _isup(is), _esup(s) {} + // constructor + pattern_support(const int &is = 0, const int &s = 0, bool v = 0) + : _isup(is), _esup(s) {} - bool is_valid(const int& minsup) const { - return (_isup>=minsup || (_esup>=minsup)); + bool is_valid(const int &minsup) const { + return (_isup >= minsup || (_esup >= minsup)); } - bool is_freq(const int& minsup) const { - return _isup >= minsup; - } + bool is_freq(const int &minsup) const { return _isup >= minsup; } - int get_sup() const { return _isup;} + int get_sup() const { return _isup; } - void incr_isup() { _isup++;} + void incr_isup() { _isup++; } - void incr_esup() { _esup++;} + void incr_esup() { _esup++; } - void set_vals(const pattern_support > > >* const& s) { - _esup=s->_esup; _isup=s->_isup; + void set_vals(const pattern_support< + proplist>>> + *const &s) { + _esup = s->_esup; + _isup = s->_isup; } - void set_sup(const std::pair& s) { - _isup=s.first; + void set_sup(const std::pair &s) { + _isup = s.first; _esup = s.second; } // friend extraction - friend std::ostream& operator<< <>(std::ostream&, const pattern_support > > >&); - - private: - int _isup; //induced support - int _esup; //ONLY embedded support -}; //end pattern_support for induced + friend std::ostream &operator<< <>( + std::ostream &, + const pattern_support< + proplist>>> &); + +private: + int _isup; // induced support + int _esup; // ONLY embedded support +}; // end pattern_support for induced #endif diff --git a/src/common/pattern.h b/src/common/pattern.h index e679964..cde84b9 100644 --- a/src/common/pattern.h +++ b/src/common/pattern.h @@ -1,7 +1,7 @@ /* - * Copyright (C) 2005 M.J. Zaki Rensselaer Polytechnic Institute - * Written by parimi@cs.rpi.edu - * Updated by chaojv@cs.rpi.edu, alhasan@cs.rpi.edu, salems@cs.rpi.edu + * Copyright (C) 2005 M.J. Zaki Rensselaer Polytechnic + * Institute Written by parimi@cs.rpi.edu Updated by chaojv@cs.rpi.edu, + * alhasan@cs.rpi.edu, salems@cs.rpi.edu * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -20,328 +20,326 @@ #ifndef _PATTERN_H #define _PATTERN_H -// NOTE: only pointers to pattern objects should be maintained, copying +// NOTE: only pointers to pattern objects should be maintained, copying // whole patterns each time shall be expensive in the current setup -#include -#include #include "properties.h" +#include "typedefs.h" +#include +#include using namespace std; - -template class > class CC, - template class ALLOC, class SM_TYPE> +template class CC, class SM_TYPE> class count_support; -template class > class CC, template class ALLOC > +template class CC> class pattern; -template class > class CC, template class ALLOC > -void update_rmost_path(pattern*const&); - -template class > class CC, template class ALLOC > -ostream& operator<<(ostream&, const pattern*); - -template class > class CC, template class ALLOC > -bool check_isomorphism(pattern* const&); +template class CC> +void update_rmost_path(pattern *const &); + +template class CC> +ostream &operator<<(ostream &, const pattern *); + +template class CC> +bool check_isomorphism(pattern *const &); +template class CC> +CC +check_isomorphism(GRAPH_PATTERN *const &cand_pat); +template class CC> +ostream &operator<<(ostream &ostr, const GRAPH_PATTERN *p); #include "adj_list.h" #include "pat_support.h" +template using ALLOC = std::allocator; /** * \brief The Pattern Class * - * Pattern Class takes 4 template arguments. PATTERN_PROPS is the pattern property, - * MINING_PROPS is the mining property, ST is the pattern storage data structure, - * CC is the canonical code class. + * Pattern Class takes 4 template arguments. PATTERN_PROPS is the pattern + * property, MINING_PROPS is the mining property, ST is the pattern storage + * data structure, CC is the canonical code class. */ -template class> class CC, - template class ALLOC=std::allocator > -class pattern -{ - - public: - typedef typename ST::VERTEX_T VERTEX_T; - typedef PATTERN_PROPS PAT_PROPS; - typedef MINING_PROPS MINE_PROPS; - typedef typename ST::EDGE_T EDGE_T; - typedef pattern PATTERN; - typedef typename ST::IT IT; - typedef typename ST::CONST_IT CONST_IT; - typedef typename ST::EIT EIT; - typedef typename ST::CONST_EIT CONST_EIT; - typedef typename ST::EIT_PAIR EIT_PAIR; - typedef typename ST::CONST_EIT_PAIR CONST_EIT_PAIR; - typedef CC CAN_CODE; - typedef std::vector > RMP_T; - typedef typename CAN_CODE::STORAGE_TYPE CC_STORAGE_TYPE; - typedef typename CAN_CODE::INIT_TYPE CC_INIT_TYPE; - typedef typename CAN_CODE::COMPARISON_FUNC CC_COMPARISON_FUNC; - - - void* operator new(size_t size) { +template class CC> +class pattern { + +public: + typedef typename ST::VERTEX_T VERTEX_T; + typedef PATTERN_PROPS PAT_PROPS; + typedef MINING_PROPS MINE_PROPS; + typedef typename ST::EDGE_T EDGE_T; + typedef pattern PATTERN; + typedef typename ST::IT IT; + typedef typename ST::CONST_IT CONST_IT; + typedef typename ST::EIT EIT; + typedef typename ST::CONST_EIT CONST_EIT; + typedef typename ST::EIT_PAIR EIT_PAIR; + typedef typename ST::CONST_EIT_PAIR CONST_EIT_PAIR; + typedef CC CAN_CODE; + typedef std::vector RMP_T; + typedef typename CAN_CODE::STORAGE_TYPE CC_STORAGE_TYPE; + typedef typename CAN_CODE::INIT_TYPE CC_INIT_TYPE; + typedef typename CAN_CODE::COMPARISON_FUNC CC_COMPARISON_FUNC; + + void *operator new(size_t size) { + ALLOC pa; + return pa.allocate(size); + } + + void operator delete(void *p, size_t size) { + if (p) { ALLOC pa; - return pa.allocate(size); + pa.deallocate(static_cast(p), size); } - - void operator delete(void *p, size_t size) { - if (p) { - ALLOC pa; - pa.deallocate(static_cast (p), size); - } - } - - // pattern constructor, mostly does nothing - pattern(): _rmost_vid(-1), _is_canonical(false), _edge_cnt(0) {} - - IT begin() {return _graph.begin();} - CONST_IT begin() const {return _graph.begin();} - IT end() {return _graph.end();} - CONST_IT end() const {return _graph.end();} - - unsigned int size() const { - return _graph.size(); + } + + // pattern constructor, mostly does nothing + pattern() : _rmost_vid(-1), _is_canonical(false), _edge_cnt(0) {} + + IT begin() { return _graph.begin(); } + CONST_IT begin() const { return _graph.begin(); } + IT end() { return _graph.end(); } + CONST_IT end() const { return _graph.end(); } + + unsigned int size() const { return _graph.size(); } + + int rmp_size() const { return _rmost_path.size(); } + + /** Creates a deep copy of this object into rhs */ + pattern *exact_clone() const { + pattern *clone = + new pattern(); + + CONST_IT it; + for (it = this->begin(); it != this->end(); it++) + clone->_graph.push_back(*it); + + clone->_rmost_vid = _rmost_vid; + clone->_rmost_path = _rmost_path; + + clone->_canonical_code = _canonical_code; + clone->_edge_cnt = _edge_cnt; + + clone->_pat_sup = _pat_sup; + + return clone; + } // end clone() + + /** Creates a deep copy of this object into rhs and update the id*/ + pattern *clone() const { + pattern *clone = + new pattern(); + + CONST_IT it; + for (it = this->begin(); it != this->end(); it++) + clone->_graph.push_back(*it); + + clone->_rmost_vid = _rmost_vid; + clone->_rmost_path = _rmost_path; + + clone->_canonical_code = _canonical_code; + clone->_edge_cnt = _edge_cnt; + clone->_canonical_code.update_code(); + + return clone; + } // end clone() + + int rmost_vid() const { return _rmost_vid; }; + void set_rmost_vid(const int &rvid) { _rmost_vid = rvid; } + + bool is_canonical() const { return _is_canonical; } + + const VERTEX_T &rmost_vertex() const { + int rvid = rmost_vid(); + CONST_IT it = _graph.vertex_vals(rvid); + return it->v; + } + + /** Returns vertex object associated with given vid */ + const VERTEX_T &label(const int &vid) const { + CONST_IT it = _graph.vertex_vals(vid); + return it->v; + } // end label() + + /** Adds vertex and makes it the right most vertex, the new + * vertex will get an id in the increment order of the current + * ids, say, it the current max_id is 3, new_id = 4 + */ + int add_vertex(const VERTEX_T &v) { + int new_vertex_id = _graph.add_vertex(v); + set_rmost_vid(new_vertex_id); // new vertex is always right-most-vertex + return rmost_vid(); + } // end add_vertex() + + /** + * Add vertex for a given id. + */ + int add_vertex(int v_id, const VERTEX_T &v) { + int new_vertex_id = _graph.add_vertex(v_id, v); + set_rmost_vid(new_vertex_id); // new vertex is always right-most-vertex + return rmost_vid(); + } // end add_vertex() + + /** Adds an edge to out-edge list of src +Both vertices are required to be already part of the graph */ + void add_out_edge(const int &src, const int &dest, const EDGE_T &e) { + _graph.add_out_edge(src, dest, e); + if (src < dest) { // to make sure that an edge is not double-counted + increase_edge_count(); + // cout << "No-of-edge:" << _edge_cnt << endl; } + } + + /** Adds edge to in-edge list of dest +Both vertices are required to be already part of the graph +This function should be invoked for digraphs ONLY */ + void add_in_edge(const int &dest, const int &src, const EDGE_T &e) { + _graph.add_in_edge(dest, src, e); + if (src < dest) // to make sure that an edge is not double-counted + increase_edge_count(); + } + + /** Returns a pair of iterators, the first of the pair points to the first + entity in the set of out-edges of idval, the second to the end of edges*/ + EIT_PAIR out_edges(const int &idval) { return _graph.out_edges(idval); } - int rmp_size() const { return _rmost_path.size();} - - /** Creates a deep copy of this object into rhs */ - pattern* exact_clone() const { - pattern* clone=new pattern(); - - CONST_IT it; - for(it=this->begin(); it!=this->end(); it++) - clone->_graph.push_back(*it); - - clone->_rmost_vid=_rmost_vid; - clone->_rmost_path=_rmost_path; - - clone->_canonical_code = _canonical_code; - clone->_edge_cnt = _edge_cnt; - - clone->_pat_sup = _pat_sup; - - return clone; - }//end clone() - - - /** Creates a deep copy of this object into rhs and update the id*/ - pattern* clone() const { - pattern* clone=new pattern(); - - CONST_IT it; - for(it=this->begin(); it!=this->end(); it++) - clone->_graph.push_back(*it); - - clone->_rmost_vid=_rmost_vid; - clone->_rmost_path=_rmost_path; - - clone->_canonical_code = _canonical_code; - clone->_edge_cnt = _edge_cnt; - clone->_canonical_code.update_code(); - - - return clone; - }//end clone() - - int rmost_vid() const {return _rmost_vid;}; - void set_rmost_vid(const int& rvid) {_rmost_vid=rvid;} - - bool is_canonical() const {return _is_canonical;} - - const VERTEX_T& rmost_vertex() const { - int rvid=rmost_vid(); - CONST_IT it=_graph.vertex_vals(rvid); - return it->v; - } + CONST_EIT_PAIR out_edges(const int &idval) const { + // cout <<_graph; + return _graph.out_edges(idval); + } - /** Returns vertex object associated with given vid */ - const VERTEX_T& label(const int& vid) const { - CONST_IT it=_graph.vertex_vals(vid); - return it->v; - }//end label() - - /** Adds vertex and makes it the right most vertex, the new - * vertex will get an id in the increment order of the current - * ids, say, it the current max_id is 3, new_id = 4 - */ - int add_vertex(const VERTEX_T& v) { - int new_vertex_id = _graph.add_vertex(v); - set_rmost_vid(new_vertex_id); // new vertex is always right-most-vertex - return rmost_vid(); - }//end add_vertex() - - /** - * Add vertex for a given id. - */ - int add_vertex(int v_id, const VERTEX_T& v) { - int new_vertex_id = _graph.add_vertex(v_id, v); - set_rmost_vid(new_vertex_id); // new vertex is always right-most-vertex - return rmost_vid(); - }//end add_vertex() - - - - - /** Adds an edge to out-edge list of src - Both vertices are required to be already part of the graph */ - void add_out_edge(const int& src, const int& dest, const EDGE_T& e) { - _graph.add_out_edge(src, dest, e); - if (src rhs) const; - bool get_in_edge(const int& src, const int& dest, const EDGE_T& e) const { - return _graph.get_in_edge(src, dest, e); - } + friend ostream & + operator<< <>(ostream &, + const pattern *); - // Unique int identifier for a pattern. - CC_STORAGE_TYPE - pat_id() const { return _canonical_code.getCode(); } + friend CAN_CODE check_isomorphism<>(PATTERN *const &pat); - bool operator< (const pattern rhs) const; + // friend function - this shall be specialized on pattern-props + friend void + update_rmost_path<>(pattern *const &); - friend ostream& operator<< <>(ostream&, const pattern*); + void set_support(const pattern_support *const &pat_sup) { + _pat_sup.set_vals(pat_sup); + } - friend CAN_CODE check_isomorphism <>(PATTERN* const& pat); + /** void set_sup + * \brief Set the support for the pattern. + * Note: Should be only used for level-1 patterns. + */ + void set_sup(const pair &s) { + _pat_sup.set_sup(s); + _is_canonical = true; + } - // friend function - this shall be specialized on pattern-props - friend void update_rmost_path <>(pattern*const&); + bool is_freq(int min_sup) { return _pat_sup.is_freq(min_sup); } - void set_support(const pattern_support* const& pat_sup) { - _pat_sup.set_vals(pat_sup); - } + bool is_valid(const int &ms) const { return (_pat_sup.is_valid(ms)); } - /** void set_sup - * \brief Set the support for the pattern. - * Note: Should be only used for level-1 patterns. - */ - void set_sup(const pair& s) { - _pat_sup.set_sup(s); - _is_canonical=true; - } + /** + * Initializes the canonical code for the pattern. + */ + void init_canonical_code(const CC_INIT_TYPE &cc) { + _canonical_code.init(cc, this); + } - bool is_freq(int min_sup) { - return _pat_sup.is_freq(min_sup); - } - - bool is_valid(const int& ms) const { - return (_pat_sup.is_valid(ms)); - } - - /** - * Initializes the canonical code for the pattern. - */ - void init_canonical_code(const CC_INIT_TYPE& cc) { - _canonical_code.init(cc, this); - } - - const RMP_T& rmost_path() const { return _rmost_path;} + const RMP_T &rmost_path() const { return _rmost_path; } - void update_rmpath(int val) { - _rmost_path.push_back(val); - } + void update_rmpath(int val) { _rmost_path.push_back(val); } - // check whether this pattern has any vertex with a given vertex-label - vector * get_vids_for_this_label(VERTEX_T v_label) const { - vector * ret_val = new vector(); - for (unsigned int i = 0; i < size(); i++) { - CONST_IT it=_graph.vertex_vals(i); - if (it->v == v_label) { - ret_val->push_back(i); - } + // check whether this pattern has any vertex with a given vertex-label + vector *get_vids_for_this_label(VERTEX_T v_label) const { + vector *ret_val = new vector(); + for (unsigned int i = 0; i < size(); i++) { + CONST_IT it = _graph.vertex_vals(i); + if (it->v == v_label) { + ret_val->push_back(i); } - return ret_val; } - int get_edge_freq(VERTEX_T src_l, VERTEX_T dest_l, EDGE_T e_l) { - vector* s_ids; - vector* d_ids; - vector::const_iterator sit, dit; - EDGE_T e; - int count = 0; - - s_ids = get_vids_for_this_label(src_l); - if (s_ids == 0) return 0; // this label don't present in the graph - if (src_l == dest_l) { - if (s_ids->size() == 1) return 0; - for (sit = s_ids->begin(); sit < s_ids->end()-1; sit++) - for (dit = s_ids->begin() + 1; dit < s_ids->end(); dit++) - if (get_out_edge(*sit, *dit, e)) - if (e == e_l) count++; - } - else { - d_ids = get_vids_for_this_label(dest_l); - if (s_ids == 0) return 0; // this label don't present in the graph - for (sit = s_ids->begin(); sit < s_ids->end(); sit++) - for (dit = d_ids->begin(); dit < s_ids->end(); dit++) - if (get_out_edge(*sit, *dit, e)) - if (e == e_l) count++; - } - return count; + return ret_val; + } + int get_edge_freq(VERTEX_T src_l, VERTEX_T dest_l, EDGE_T e_l) { + vector *s_ids; + vector *d_ids; + vector::const_iterator sit, dit; + EDGE_T e; + int count = 0; + + s_ids = get_vids_for_this_label(src_l); + if (s_ids == 0) + return 0; // this label don't present in the graph + if (src_l == dest_l) { + if (s_ids->size() == 1) + return 0; + for (sit = s_ids->begin(); sit < s_ids->end() - 1; sit++) + for (dit = s_ids->begin() + 1; dit < s_ids->end(); dit++) + if (get_out_edge(*sit, *dit, e)) + if (e == e_l) + count++; + } else { + d_ids = get_vids_for_this_label(dest_l); + if (s_ids == 0) + return 0; // this label don't present in the graph + for (sit = s_ids->begin(); sit < s_ids->end(); sit++) + for (dit = d_ids->begin(); dit < s_ids->end(); dit++) + if (get_out_edge(*sit, *dit, e)) + if (e == e_l) + count++; } - + return count; + } - void increase_edge_count() {_edge_cnt++;} + void increase_edge_count() { _edge_cnt++; } - unsigned int get_edge_count() const { return _edge_cnt;} + unsigned int get_edge_count() const { return _edge_cnt; } - const CAN_CODE& canonical_code() const { return _canonical_code;} + const CAN_CODE &canonical_code() const { return _canonical_code; } - CAN_CODE& canonical_code() { return _canonical_code;} - - pattern_support _pat_sup; + CAN_CODE &canonical_code() { return _canonical_code; } - private: + pattern_support _pat_sup; - ST _graph; - CAN_CODE _canonical_code; - int _rmost_vid; //id of right-most vertex of this pattern - bool _is_canonical; - unsigned int _edge_cnt; //no of edge in this pattern - RMP_T _rmost_path; //ids of vertices on right most path +private: + ST _graph; + CAN_CODE _canonical_code; + int _rmost_vid; // id of right-most vertex of this pattern + bool _is_canonical; + unsigned int _edge_cnt; // no of edge in this pattern + RMP_T _rmost_path; // ids of vertices on right most path -}; //end class pattern +}; // end class pattern #endif diff --git a/src/common/properties.h b/src/common/properties.h index cbf09fe..206a4e3 100644 --- a/src/common/properties.h +++ b/src/common/properties.h @@ -1,9 +1,8 @@ /* - * Copyright (C) 2005 M.J. Zaki Rensselaer Polytechnic Institute - * Written by parimi@cs.rpi.edu - * Updated by chaojv@cs.rpi.edu, alhasan@cs.rpi.edu, salems@cs.rpi.edu - * Modifications: - * Added tokenizer properties -- Zaki, 5/8/06 + * Copyright (C) 2005 M.J. Zaki Rensselaer Polytechnic + * Institute Written by parimi@cs.rpi.edu Updated by chaojv@cs.rpi.edu, + * alhasan@cs.rpi.edu, salems@cs.rpi.edu Modifications: Added tokenizer + * properties -- Zaki, 5/8/06 * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -20,9 +19,9 @@ * 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. */ /* - * Copyright (C) 2005 M.J. Zaki Rensselaer Polytechnic Institute - * Written by parimi@cs.rpi.edu - * Updated by chaojv@cs.rpi.edu, alhasan@cs.rpi.edu, salems@cs.rpi.edu + * Copyright (C) 2005 M.J. Zaki Rensselaer Polytechnic + * Institute Written by parimi@cs.rpi.edu Updated by chaojv@cs.rpi.edu, + * alhasan@cs.rpi.edu, salems@cs.rpi.edu * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -41,51 +40,59 @@ #ifndef _PROPERTIES_H_ #define _PROPERTIES_H_ +class property {}; /**< Root property class */ -class property {}; /**< Root property class */ - -class pat_property {}; /**< Root Pattern property class */ -class directed: public property {}; /**< Dummy property class, directed */ -class undirected: public property {}; /**< Dummy property class, undirected */ -class no_edges: public property {}; /**< Dummy property class, no_edges */ -class uniq_label: public property {}; /**< Dummy property class, uniq_label */ -class cyclic: public property {}; /**< Dummy property class, cyclic */ -class acyclic: public property {}; /**< Dummy property class, acyclic */ -class indegree_lte_one: public property {}; /**< Dummy property class, indegree_lte_one */ -class outdegree_lte_one: public property {}; /**< Dummy property class, outdegree_lte_one */ -class ordered: public property {}; /**< Dummy property class, ordered */ -class unordered: public property {}; /**< Dummy property class, unordered */ -class null_prop: public property {}; /**< Null property class, used to terminate the proplist */ +class pat_property {}; /**< Root Pattern property class */ +class directed : public property {}; /**< Dummy property class, directed */ +class undirected : public property {}; /**< Dummy property class, undirected */ +class no_edges : public property {}; /**< Dummy property class, no_edges */ +class uniq_label : public property {}; /**< Dummy property class, uniq_label */ +class cyclic : public property {}; /**< Dummy property class, cyclic */ +class acyclic : public property {}; /**< Dummy property class, acyclic */ +class indegree_lte_one : public property { +}; /**< Dummy property class, indegree_lte_one */ +class outdegree_lte_one : public property { +}; /**< Dummy property class, outdegree_lte_one */ +class ordered : public property {}; /**< Dummy property class, ordered */ +class unordered : public property {}; /**< Dummy property class, unordered */ +class null_prop : public property { +}; /**< Null property class, used to terminate the proplist */ // end pattern property definitions // - // mining property class definitions // -class mining_prop: public property {}; /**< Root Mining property class */ -class embedded: public mining_prop {}; /**< Dummy mining property class for embedded mining */ -class induced: public mining_prop {}; /**< Dummy mining property class for induced mining */ -class Fk_Fk: public mining_prop {}; /**< Dummy mining property class for Fk_Fk join */ -class Fk_F1: public mining_prop {}; /**< Dummy mining property class for Fk X F1 join */ -class vert_mine: public mining_prop {}; /**< Dummy mining property class for vertical mining */ -class horiz_mine: public mining_prop {}; /**< Dummy mining property class for horizontal mining */ +class mining_prop : public property {}; /**< Root Mining property class */ +class embedded : public mining_prop { +}; /**< Dummy mining property class for embedded mining */ +class induced : public mining_prop { +}; /**< Dummy mining property class for induced mining */ +class Fk_Fk : public mining_prop { +}; /**< Dummy mining property class for Fk_Fk join */ +class Fk_F1 : public mining_prop { +}; /**< Dummy mining property class for Fk X F1 join */ +class vert_mine : public mining_prop { +}; /**< Dummy mining property class for vertical mining */ +class horiz_mine : public mining_prop { +}; /**< Dummy mining property class for horizontal mining */ // end mining pros // -//tokenizer property class definitions// -class tokenizer_prop: public property {}; /**< Root Tokenizer property class */ -class dmtl_format: public tokenizer_prop {}; /**< Dummy tokenizer class for default DMTL formatted input files */ -class fasta_format: public tokenizer_prop {}; /**< Dummy tokenizer class for FASTA formatted sequence files */ - +// tokenizer property class definitions// +class tokenizer_prop : public property {}; /**< Root Tokenizer property class */ +class dmtl_format : public tokenizer_prop { +}; /**< Dummy tokenizer class for default DMTL formatted input files */ +class fasta_format : public tokenizer_prop { +}; /**< Dummy tokenizer class for FASTA formatted sequence files */ /** -* \brief Property list Class to list the properties for generic pattern mining algorithm + * \brief Property list Class to list the properties for generic pattern mining + * algorithm * - * Property list class takes two template arguments, the first one is a property class and the second one is - * the next property class and so on. To terminate the list, null_prop class is used. + * Property list class takes two template arguments, the first one is a property + * class and the second one is the next property class and so on. To terminate + * the list, null_prop class is used. */ -template -class proplist -{ +template class proplist { public: typedef prop FIRST; typedef next_property SECOND; diff --git a/src/common/time_tracker.h b/src/common/time_tracker.h index 7005137..e3256d4 100644 --- a/src/common/time_tracker.h +++ b/src/common/time_tracker.h @@ -1,7 +1,7 @@ /* - * Copyright (C) 2005 M.J. Zaki Rensselaer Polytechnic Institute - * Written by parimi@cs.rpi.edu - * Updated by chaojv@cs.rpi.edu, alhasan@cs.rpi.edu, salems@cs.rpi.edu + * Copyright (C) 2005 M.J. Zaki Rensselaer Polytechnic + * Institute Written by parimi@cs.rpi.edu Updated by chaojv@cs.rpi.edu, + * alhasan@cs.rpi.edu, salems@cs.rpi.edu * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -30,38 +30,39 @@ */ class time_tracker { - private: +private: struct timeval start_time; struct timeval stop_time; - bool running; + bool running; double curr_time; - public: + +public: time_tracker() { - running=false; - curr_time=0; + running = false; + curr_time = 0; } /** Start the timer */ void start() { gettimeofday(&start_time, (struct timezone *)0); - running=true; + running = true; } /** Stop the timer and return the total time taken */ void stop() { double st, en; - if (!running) return; + if (!running) + return; else { gettimeofday(&stop_time, (struct timezone *)0); - st = start_time.tv_sec + (start_time.tv_usec/microsec); - en = stop_time.tv_sec + (stop_time.tv_usec/microsec); - running=false; - curr_time+=en-st; + st = start_time.tv_sec + (start_time.tv_usec / microsec); + en = stop_time.tv_sec + (stop_time.tv_usec / microsec); + running = false; + curr_time += en - st; } } - double print() const - { return curr_time; } + double print() const { return curr_time; } }; #endif diff --git a/src/common/tokenizer_utils.h b/src/common/tokenizer_utils.h index 421ba3f..c119021 100644 --- a/src/common/tokenizer_utils.h +++ b/src/common/tokenizer_utils.h @@ -1,9 +1,8 @@ /* - * Copyright (C) 2005 M.J. Zaki Rensselaer Polytechnic Institute - * Written by parimi@cs.rpi.edu - * Updated by chaojv@cs.rpi.edu, alhasan@cs.rpi.edu, salems@cs.rpi.edu - * Modifications: - * Added LINE_SZ -- zaki 5/15/06 + * Copyright (C) 2005 M.J. Zaki Rensselaer Polytechnic + * Institute Written by parimi@cs.rpi.edu Updated by chaojv@cs.rpi.edu, + * alhasan@cs.rpi.edu, salems@cs.rpi.edu Modifications: Added LINE_SZ -- zaki + * 5/15/06 * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -22,30 +21,29 @@ #ifndef _TOKENIZER_UTILS_H_ #define _TOKENIZER_UTILS_H_ -#define LINE_SZ 10000 //number of characters per line in database file +#define LINE_SZ 10000 // number of characters per line in database file -struct parse_word -{ +struct parse_word { /** - * Reads in a delim-delimited set of characsters from line into word, - * and returns pointer to next char that needs to be read - */ - char* operator() (char* line, char* word, char delim=' ') const { + * Reads in a delim-delimited set of characsters from line into word, + * and returns pointer to next char that needs to be read + */ + char *operator()(char *line, char *word, char delim = ' ') const { - while(*line && *line!=delim) { - *word=*line; + while (*line && *line != delim) { + *word = *line; word++; line++; } - *word='\0'; + *word = '\0'; - if(*line==delim) { + if (*line == delim) { line++; } return line; - } //end parse_word() + } // end parse_word() }; #endif diff --git a/src/common/typedefs.h b/src/common/typedefs.h index c3a66c3..7528867 100644 --- a/src/common/typedefs.h +++ b/src/common/typedefs.h @@ -1,9 +1,8 @@ /* - * Copyright (C) 2005 M.J. Zaki Rensselaer Polytechnic Institute - * Written by parimi@cs.rpi.edu - * Updated by chaojv@cs.rpi.edu, alhasan@cs.rpi.edu, salems@cs.rpi.edu - * Modifications: - * added IBM and FASTA tokenizers -- Zaki, 5/8/06 + * Copyright (C) 2005 M.J. Zaki Rensselaer Polytechnic + * Institute Written by parimi@cs.rpi.edu Updated by chaojv@cs.rpi.edu, + * alhasan@cs.rpi.edu, salems@cs.rpi.edu Modifications: added IBM and FASTA + * tokenizers -- Zaki, 5/8/06 * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -22,49 +21,68 @@ #ifndef _TYPEDEFS_H_ #define _TYPEDEFS_H_ - #include "properties.h" // Mining properties #defines -#define ISET_PROP proplist > // PP stands for Pattern Property +#define ISET_PROP \ + proplist> // PP stands for Pattern Property -#define MSET_PROP proplist // PP stands for Pattern Property +#define MSET_PROP proplist // PP stands for Pattern Property -#define SEQ_PROP proplist > > > +#define SEQ_PROP \ + proplist>>> -#define TREE_PROP proplist > > +#define TREE_PROP \ + proplist>> -#define ORD_TREE_PROP proplist > > > +#define ORD_TREE_PROP \ + proplist< \ + directed, \ + proplist>>> -#define UNORD_TREE_PROP proplist > > > +#define UNORD_TREE_PROP \ + proplist< \ + directed, \ + proplist>>> -#define GRAPH_PROP proplist +#define GRAPH_PROP proplist -// Mining properties #defines -#define V_Fkk_MINE_PROP proplist > // MP stands for Mining property +// Mining properties #defines +#define V_Fkk_MINE_PROP \ + proplist> // MP stands for Mining property/ -#define V_Fkk_EMB_MINE_PROP proplist > > +#define V_Fkk_EMB_MINE_PROP \ + proplist>> -#define V_Fkk_IND_MINE_PROP proplist > > +#define V_Fkk_IND_MINE_PROP \ + proplist>> -#define V_Fk1_MINE_PROP proplist > +#define V_Fk1_MINE_PROP proplist> // Tokenizer properties #defines -#define DMTL_TKNZ_PROP proplist //TP stands for tokenizer property -#define FASTA_TKNZ_PROP proplist //TP stands for tokenizer property +#define DMTL_TKNZ_PROP \ + proplist // TP stands for tokenizer property +#define FASTA_TKNZ_PROP \ + proplist // TP stands for tokenizer property // Itemset Mining #defines -#define ISET_PATTERN pattern // PAT_ST is pattern storage type, CC is for Canonical Code +#define ISET_PATTERN \ + pattern // PAT_ST is pattern storage type, CC is for Canonical Code // Multiset Mining #defines -#define MSET_PATTERN pattern // PAT_ST is pattern storage type, CC is for Canonical Code +#define MSET_PATTERN \ + pattern // PAT_ST is pattern storage type, CC is for Canonical Code // Sequence Mining #defines -#define SEQ_PATTERN pattern +#define SEQ_PATTERN pattern // Tree Mining #defines -#define TREE_PATTERN pattern +#define TREE_PATTERN pattern // Graph Mining #defines -#define GRAPH_PATTERN pattern +#define GRAPH_PATTERN pattern #endif diff --git a/src/graph/compare_result.cpp b/src/graph/compare_result.cpp index d30c969..cdff107 100644 --- a/src/graph/compare_result.cpp +++ b/src/graph/compare_result.cpp @@ -1,16 +1,14 @@ -#include +#include "StringTokenizer.h" +#include #include +#include +#include #include #include -#include -#include -#include "StringTokenizer.h" using namespace std; -template -struct edge_counter -{ +template struct edge_counter { typedef pair, E_T> EDGE_T; typedef typename map::iterator IT; typedef typename map::const_iterator CIT; @@ -24,8 +22,10 @@ struct edge_counter else e = make_pair(make_pair(dest_l, src_l), edge_l); CIT cit = _counter.find(e); - if (cit != _counter.end()) return cit->second; - else return 0; + if (cit != _counter.end()) + return cit->second; + else + return 0; } void insert(V_T src_l, V_T dest_l, E_T edge_l) { EDGE_T e; @@ -43,147 +43,160 @@ struct edge_counter CIT cit = _counter.begin(); for (; cit != _counter.end(); cit++) - cout << "(" << cit->first.first.first << " " << cit->first.second << " " << cit->first.first.second << "):" << cit->second << endl; + cout << "(" << cit->first.first.first << " " << cit->first.second << " " + << cit->first.first.second << "):" << cit->second << endl; } - double operator-(const edge_counter& other) const { - const map& other_cnt = other._counter; + double operator-(const edge_counter &other) const { + const map &other_cnt = other._counter; CIT cit; - } - bool operator<(const edge_counter& other) const { - const map& other_cnt = other._counter; + bool operator<(const edge_counter &other) const { + const map &other_cnt = other._counter; return (_counter < other_cnt); } - bool operator==(const edge_counter& other) const { - const map& other_cnt = other._counter; + bool operator==(const edge_counter &other) const { + const map &other_cnt = other._counter; return !(_counter < other_cnt || other_cnt < _counter); } }; -int load_spin_result(const char* node_file, const char* edge_file, vector *>& all_graphs) { - - ifstream nodefile(node_file, ios::in); - ifstream edgefile(edge_file, ios::in); - - vector *> vertex_label; - string one_line; - int linesize, last_graph_id = -1; - vector * a_graph = 0; - - if (!nodefile) { - cout << "Input file:"<< node_file << " could not open to read from!" << endl; - exit(1); - } - - if (!edgefile) { - cout << "Input file:"<< edge_file << " could not open to read from!" << endl; - exit(1); - } - -//////////// ///////////// Reading Node file //////////////////////////////////// - while (!nodefile.eof()) { - std::getline(nodefile, one_line); - linesize = one_line.length(); - if (linesize < 5) break; - StringTokenizer strtok = StringTokenizer(one_line, " "); - int cnt = strtok.countTokens(); - assert(cnt == 4); - strtok.nextToken(); //skiping the word "node" - int graph_id = strtok.nextIntToken(); - if (graph_id != last_graph_id) { - if (a_graph) vertex_label.push_back(a_graph); - a_graph = new vector(); - last_graph_id = graph_id; - } - strtok.nextToken(); // skipping the vertex id, as it is always ascending - int v_label = strtok.nextIntToken(); - a_graph->push_back(v_label); - } - vertex_label.push_back(a_graph); - nodefile.close(); - -//////////// ///////////// Reading Edge file //////////////////////////////////// -last_graph_id = -1; -edge_counter* one_edc = 0; - -while (!edgefile.eof()) { - std::getline(edgefile, one_line); - linesize = one_line.length(); - if (linesize < 5) break; - StringTokenizer strtok = StringTokenizer(one_line, " "); - int cnt = strtok.countTokens(); - assert(cnt == 5); - strtok.nextToken(); //skiping the word "edge" - int graph_id = strtok.nextIntToken(); - if (graph_id != last_graph_id) { - if (one_edc) all_graphs.push_back(one_edc); - one_edc = new edge_counter(); - last_graph_id = graph_id; - } - int v1 = strtok.nextIntToken(); - int v2 = strtok.nextIntToken(); - int e_label = strtok.nextIntToken(); - vector* &v_vector = vertex_label[graph_id]; - int v1_label = (*v_vector)[v1]; - int v2_label = (*v_vector)[v2]; - one_edc->insert(v1_label, v2_label, e_label); - } - all_graphs.push_back(one_edc); - edgefile.close(); - - ////////////////////// Freeing memory ////////////////////////////////////////////// - for (int i = 0; i < vertex_label.size(); i++) - delete vertex_label[i]; +int load_spin_result(const char *node_file, const char *edge_file, + vector *> &all_graphs) { + + ifstream nodefile(node_file, ios::in); + ifstream edgefile(edge_file, ios::in); + + vector *> vertex_label; + string one_line; + int linesize, last_graph_id = -1; + vector *a_graph = 0; + + if (!nodefile) { + cout << "Input file:" << node_file << " could not open to read from!" + << endl; + exit(1); + } + + if (!edgefile) { + cout << "Input file:" << edge_file << " could not open to read from!" + << endl; + exit(1); + } + + //////////// ///////////// Reading Node file + /////////////////////////////////////// + while (!nodefile.eof()) { + std::getline(nodefile, one_line); + linesize = one_line.length(); + if (linesize < 5) + break; + StringTokenizer strtok = StringTokenizer(one_line, " "); + int cnt = strtok.countTokens(); + assert(cnt == 4); + strtok.nextToken(); // skiping the word "node" + int graph_id = strtok.nextIntToken(); + if (graph_id != last_graph_id) { + if (a_graph) + vertex_label.push_back(a_graph); + a_graph = new vector(); + last_graph_id = graph_id; + } + strtok.nextToken(); // skipping the vertex id, as it is always ascending + int v_label = strtok.nextIntToken(); + a_graph->push_back(v_label); + } + vertex_label.push_back(a_graph); + nodefile.close(); + + //////////// ///////////// Reading Edge file + /////////////////////////////////////// + last_graph_id = -1; + edge_counter *one_edc = 0; + + while (!edgefile.eof()) { + std::getline(edgefile, one_line); + linesize = one_line.length(); + if (linesize < 5) + break; + StringTokenizer strtok = StringTokenizer(one_line, " "); + int cnt = strtok.countTokens(); + assert(cnt == 5); + strtok.nextToken(); // skiping the word "edge" + int graph_id = strtok.nextIntToken(); + if (graph_id != last_graph_id) { + if (one_edc) + all_graphs.push_back(one_edc); + one_edc = new edge_counter(); + last_graph_id = graph_id; + } + int v1 = strtok.nextIntToken(); + int v2 = strtok.nextIntToken(); + int e_label = strtok.nextIntToken(); + vector *&v_vector = vertex_label[graph_id]; + int v1_label = (*v_vector)[v1]; + int v2_label = (*v_vector)[v2]; + one_edc->insert(v1_label, v2_label, e_label); + } + all_graphs.push_back(one_edc); + edgefile.close(); + ////////////////////// Freeing memory + ///////////////////////////////////////////////// + for (int i = 0; i < vertex_label.size(); i++) + delete vertex_label[i]; } -int load_our_result(const char* our_file, vector *>& all_graphs) { - - ifstream ourfile(our_file, ios::in); - - string one_line; - int linesize; - - edge_counter* one_edc = 0; - - if (!ourfile) { - cout << "Input file:"<< our_file << " could not open to read from!" << endl; - exit(1); - } - - while (!ourfile.eof()) { - std::getline(ourfile, one_line); - linesize = one_line.length(); - if (linesize < 5) break; - StringTokenizer strtok = StringTokenizer(one_line, " "); - int cnt = strtok.countTokens(); - assert(cnt == 5); - int vid1 = strtok.nextIntToken(); // finding the vid of edge-1 - int vid2 = strtok.nextIntToken(); // finding the other vid of edge-1 - if (vid1 == 0 && vid2 == 1) { // new graph starts - if (one_edc) all_graphs.push_back(one_edc); - one_edc = new edge_counter(); - } - int v1_label = strtok.nextIntToken(); - int e_label = strtok.nextIntToken(); - int v2_label = strtok.nextIntToken(); - one_edc->insert(v1_label, v2_label, e_label); - } - all_graphs.push_back(one_edc); - ourfile.close(); +int load_our_result(const char *our_file, + vector *> &all_graphs) { + + ifstream ourfile(our_file, ios::in); + + string one_line; + int linesize; + + edge_counter *one_edc = 0; + + if (!ourfile) { + cout << "Input file:" << our_file << " could not open to read from!" + << endl; + exit(1); + } + + while (!ourfile.eof()) { + std::getline(ourfile, one_line); + linesize = one_line.length(); + if (linesize < 5) + break; + StringTokenizer strtok = StringTokenizer(one_line, " "); + int cnt = strtok.countTokens(); + assert(cnt == 5); + int vid1 = strtok.nextIntToken(); // finding the vid of edge-1 + int vid2 = strtok.nextIntToken(); // finding the other vid of edge-1 + if (vid1 == 0 && vid2 == 1) { // new graph starts + if (one_edc) + all_graphs.push_back(one_edc); + one_edc = new edge_counter(); + } + int v1_label = strtok.nextIntToken(); + int e_label = strtok.nextIntToken(); + int v2_label = strtok.nextIntToken(); + one_edc->insert(v1_label, v2_label, e_label); + } + all_graphs.push_back(one_edc); + ourfile.close(); } -int main(int argc, char *argv[]){ +int main(int argc, char *argv[]) { if (argc < 3) { cout << "usage:"; - cout << "\t " <* > all_graphs; - vector* > all_graphs2; + } else { + vector *> all_graphs; + vector *> all_graphs2; load_spin_result(argv[1], argv[2], all_graphs); load_our_result(argv[3], all_graphs2); for (int i = 0; i < all_graphs.size(); i++) { @@ -198,16 +211,16 @@ int main(int argc, char *argv[]){ ////////// Now comparing the two ////////////////////// int common = 0; for (int i = 0; i < all_graphs.size(); i++) { - for (int j = 0; j < all_graphs2.size(); j++) { - edge_counter& p = *all_graphs[i]; - edge_counter& q = *all_graphs2[j]; - if ( p < q ) { - common++; - break; - } + for (int j = 0; j < all_graphs2.size(); j++) { + edge_counter &p = *all_graphs[i]; + edge_counter &q = *all_graphs2[j]; + if (p < q) { + common++; + break; + } } } - double coverage = (double) common/ all_graphs.size(); + double coverage = (double)common / all_graphs.size(); cout << setprecision(4) << "Coverage:" << coverage << endl; } -} //main +} // main diff --git a/src/graph/graph_can_code.h b/src/graph/graph_can_code.h index 0aed2e6..d42ae9c 100644 --- a/src/graph/graph_can_code.h +++ b/src/graph/graph_can_code.h @@ -1,7 +1,7 @@ /* - * Copyright (C) 2005 M.J. Zaki Rensselaer Polytechnic Institute - * Written by parimi@cs.rpi.edu - * Updated by chaojv@cs.rpi.edu, alhasan@cs.rpi.edu, salems@cs.rpi.edu + * Copyright (C) 2005 M.J. Zaki Rensselaer Polytechnic + * Institute Written by parimi@cs.rpi.edu Updated by chaojv@cs.rpi.edu, + * alhasan@cs.rpi.edu, salems@cs.rpi.edu * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -22,23 +22,22 @@ using namespace std; -#include +#include "generic_classes.h" +#include #include +#include +#include +#include #include -#include -#include -#include "generic_classes.h" - time_tracker tt_iostream; -template -struct five_tuple; +template struct five_tuple; -template -ostream& operator<< (ostream&, const five_tuple&); +template +ostream &operator<<(ostream &, const five_tuple &); -// NOTE: should the labels (_li, _lij, _lj) in thus struct be stored as +// NOTE: should the labels (_li, _lij, _lj) in thus struct be stored as // references (to avoid copying them) ?? Will that work ?? /** @@ -47,81 +46,87 @@ ostream& operator<< (ostream&, const five_tuple&); * are the 5-tuple. * It is used as part of the canonical code of a graph. */ -template -struct five_tuple -{ +template struct five_tuple { five_tuple() {} - five_tuple(const int& id1, const int& id2, const V_T& li, const E_T& lij, const V_T& lj): _i(id1), _j(id2), _li(li), _lj(lj), _lij(lij) {} + five_tuple(const int &id1, const int &id2, const V_T &li, const E_T &lij, + const V_T &lj) + : _i(id1), _j(id2), _li(li), _lj(lj), _lij(lij) {} - bool operator== (const five_tuple& rhs) const { + bool operator==(const five_tuple &rhs) const { - if((_i == rhs._i) && (_j == rhs._j) && (_li == rhs._li) && - (_lj == rhs._lj) && (_lij == rhs._lij)) + if ((_i == rhs._i) && (_j == rhs._j) && (_li == rhs._li) && + (_lj == rhs._lj) && (_lij == rhs._lij)) return true; // When the dest id is negative and all other things are the same // even then the tuples are the same. - if((_i == rhs._i) && (_j < 0) && (rhs._j < 0) && (_li == rhs._li) && - (_lj == rhs._lj) && (_lij == rhs._lij)) + if ((_i == rhs._i) && (_j < 0) && (rhs._j < 0) && (_li == rhs._li) && + (_lj == rhs._lj) && (_lij == rhs._lij)) return true; - - return false; + return false; } - bool operator< (const five_tuple& rhs) const { + bool operator<(const five_tuple &rhs) const { // follows ordering given on pg 10 of gSpan TR - bool is_fwd=(_i<_j); - bool rhs_is_fwd=(rhs._irhs._i) { // if both forward edge, _i > rhs._i + if (is_fwd && rhs_is_fwd && + _i > rhs._i) { // if both forward edge, _i > rhs._i return true; } - if(is_fwd && rhs_is_fwd && _i==rhs._i && _li(ostream&, const five_tuple&); + friend ostream &operator<< <>(ostream &, const five_tuple &); int _i; int _j; @@ -129,80 +134,81 @@ struct five_tuple V_T _lj; E_T _lij; -};//end struct five_tuple - +}; // end struct five_tuple -template -ostream& operator<< (ostream& ostr, const five_tuple& tuple) { - ostr< +ostream &operator<<(ostream &ostr, const five_tuple &tuple) { + ostr << tuple._i << " " << tuple._j << " " << tuple._li << " " << tuple._lij + << " " << tuple._lj; return ostr; } /** * \struct less_than for edge sets, represented as a 5-tuple. - * Used to order a list of edges. This function looks only + * Used to order a list of edges. This function looks only * at the vertex labels and the edge label. * This function is not used for ordering the edges in canonical * order. */ -template -struct lt_five_tuple -{ +template struct lt_five_tuple { -/*** - bool operator()(const five_tuple t1, const five_tuple t2) const { + /*** + bool operator()(const five_tuple t1, const five_tuple + t2) const { - // return ((t1._li < t2._li) || ((t1._li == t2._li) && (t1._lij < t2._lij)) || - // ((t1._li == t2._li) && (t1._lij == t2._lij) && (t1._lj < t2._lj))); + // return ((t1._li < t2._li) || ((t1._li == t2._li) && (t1._lij < + t2._lij)) || + // ((t1._li == t2._li) && (t1._lij == t2._lij) && (t1._lj < + t2._lj))); - // First come the back edges and then come the forward edges. + // First come the back edges and then come the forward edges. - if(t1._li < t2._li) { - return true; - - } else if((t1._li == t2._li) && (t1._lij < t2._lij)) { - - if((t1._lj >= 0 && t2._lj >= 0) && (t1._lij < t2._lij)) || - ((t1._li == t2._li) && (t1._lij == t2._lij) && (t1._lj < t2._lj))); + if(t1._li < t2._li) { + return true; + } else if((t1._li == t2._li) && (t1._lij < t2._lij)) { - return false; - } -***/ + if((t1._lj >= 0 && t2._lj >= 0) && (t1._lij < t2._lij)) || + ((t1._li == t2._li) && (t1._lij == t2._lij) && (t1._lj < + t2._lj))); + return false; + } + ***/ /** * Returns true if t1 < t2 */ - bool operator()(const five_tuple t1, const five_tuple t2) const { + bool operator()(const five_tuple t1, + const five_tuple t2) const { - if(t1._li < t2._li) { + if (t1._li < t2._li) { return true; - } else if(t1._li == t2._li) { // Source edge is the same. + } else if (t1._li == t2._li) { // Source edge is the same. - if(t1._j < 0 && t2._j >= 0) // t1 is fwd, t2 is back. + if (t1._j < 0 && t2._j >= 0) // t1 is fwd, t2 is back. return false; - if(t1._j >= 0 && t2._j < 0) // t1 is back, t2 is fwd. + if (t1._j >= 0 && t2._j < 0) // t1 is back, t2 is fwd. return true; - if(t1._j >= 0 && t2._j >= 0) { // Both back edges. - if(t1._j > t2._j) // Back edges to lower numbered edges come first. + if (t1._j >= 0 && t2._j >= 0) { // Both back edges. + if (t1._j > t2._j) // Back edges to lower numbered edges come first. return true; else return false; } // Reach here only if both forward edges. - if(t1._lij < t2._lij) // edge label of t1 < t2 edge label. + if (t1._lij < t2._lij) // edge label of t1 < t2 edge label. return true; - else if(t1._lij > t2._lij) + else if (t1._lij > t2._lij) return false; - // Both edge labels are same, then the last criterion - if(t1._lj < t2._lj) + // Both edge labels are same, then the last criterion + if (t1._lj < t2._lj) return true; } @@ -210,54 +216,51 @@ struct lt_five_tuple } }; - /** * \struct less_than for candidate edge sets, represented as a 5-tuple. * Used to order a list of edges by the canonical ordering. * The first one will be used to extend the current pattern. * - * This code assumes that the first node in both the edges is + * This code assumes that the first node in both the edges is * the same. */ -template -struct lt_five_tuple_can_order -{ +template struct lt_five_tuple_can_order { // // Returns true if t1 < t2 // - bool operator()(const five_tuple t1, const five_tuple t2) const { + bool operator()(const five_tuple t1, + const five_tuple t2) const { bool is_t1_back = true, is_t2_back = true; - if(t1._j == -1) // Is t1 back_edge? + if (t1._j == -1) // Is t1 back_edge? is_t1_back = false; - if(t2._j == -1) // Is t2 back_edge? + if (t2._j == -1) // Is t2 back_edge? is_t2_back = false; - if((is_t1_back && is_t2_back) || (!is_t1_back && !is_t2_back)) { // Both back edges or both fwd. + if ((is_t1_back && is_t2_back) || + (!is_t1_back && !is_t2_back)) { // Both back edges or both fwd. - if(t1._j < t2._j) + if (t1._j < t2._j) return true; else return false; - } else if(is_t1_back && !is_t2_back) { // t1 back, t2 forward. + } else if (is_t1_back && !is_t2_back) { // t1 back, t2 forward. return true; - } else if(!is_t1_back && is_t2_back) { // t2 back, t1 forward. + } else if (!is_t1_back && is_t2_back) { // t2 back, t1 forward. return false; } } }; +template +class canonical_code; -template class ALLOC > -class canonical_code; - -template class ALLOC > -ostream& operator<< (ostream&, const canonical_code&); - +template +ostream &operator<<(ostream &, const canonical_code &); /** * \brief Graph canonical Code class by partial specialization of @@ -265,48 +268,50 @@ ostream& operator<< (ostream&, const canonical_code class ALLOC> -class canonical_code -{ - public: - +template +class canonical_code { +public: typedef int STORAGE_TYPE; typedef five_tuple FIVE_TUPLE; typedef FIVE_TUPLE INIT_TYPE; typedef eqint COMPARISON_FUNC; - typedef vector > TUPLES; + typedef vector TUPLES; typedef typename TUPLES::const_iterator CONST_IT; typedef typename TUPLES::iterator IT; - typedef canonical_code CAN_CODE; // this class type - typedef HASHNS::hash_map, std::equal_to, ALLOC > VID_HMAP; // hash an int-->int + typedef canonical_code CAN_CODE; + typedef std::unordered_map VID_HMAP; typedef typename VID_HMAP::const_iterator VM_CONST_IT; - typedef vector > RMP_T; + typedef vector RMP_T; canonical_code() : _can_code(id_generator++) {} // defunct default constructor /** Parameterized constructor that inserts ft as first tuple into DFS code, it also takes two vertex-id and store them in hashmap */ - canonical_code(const FIVE_TUPLE& ft, const int&gi, const int& gj) { + canonical_code(const FIVE_TUPLE &ft, const int &gi, const int &gj) { append(ft, gi, gj); } - //dfs code is just a vector of five_tuple, this begin() returns the five-tuple of 1st edge - IT begin() { return _dfs_code.begin();} - CONST_IT begin() const { return _dfs_code.begin();} - IT end() { return _dfs_code.end();} - CONST_IT end() const { return _dfs_code.end();} + // dfs code is just a vector of five_tuple, this begin() returns the + // five-tuple of 1st edge + IT begin() { return _dfs_code.begin(); } + CONST_IT begin() const { return _dfs_code.begin(); } + IT end() { return _dfs_code.end(); } + CONST_IT end() const { return _dfs_code.end(); } - bool is_present(const FIVE_TUPLE& ft) { + bool is_present(const FIVE_TUPLE &ft) { FIVE_TUPLE other(ft._j, ft._i, ft._lj, ft._lij, ft._li); - if((_dfs_code.find(ft) == _dfs_code.end()) && (_dfs_code.find(other) == _dfs_code.end())) + if ((_dfs_code.find(ft) == _dfs_code.end()) && + (_dfs_code.find(other) == _dfs_code.end())) return false; - else + else return true; } - int size() const { return _dfs_code.size();} // how many edges are there in the code? + int size() const { + return _dfs_code.size(); + } // how many edges are there in the code? void clear() { _dfs_code.clear(); @@ -315,13 +320,15 @@ class canonical_code _rmp.clear(); } - const FIVE_TUPLE& operator[](const int& index) const { return _dfs_code[index];} + const FIVE_TUPLE &operator[](const int &index) const { + return _dfs_code[index]; + } - // initializing rmp, rmp is a vector of integer, it always inilializes as (0,1) - // since, in our graph dataset, any graph's vertex id are integer and id starts - // with 0. + // initializing rmp, rmp is a vector of integer, it always inilializes as + // (0,1) since, in our graph dataset, any graph's vertex id are integer and id + // starts with 0. void init_rmp() { - if(!_rmp.empty()) + if (!_rmp.empty()) _rmp.clear(); _rmp.push_back(0); _rmp.push_back(1); @@ -332,43 +339,43 @@ class canonical_code // a new candidate by adding an edge to a pattern. // The parameter passed is the five-tuple corresponding to the new edge // THIS ROUTINE IS CALLED IN update_rmpath() in graph_iso_check.h - void update_rmp(const FIVE_TUPLE& tuple) { + void update_rmp(const FIVE_TUPLE &tuple) { // if the right most path is empty, it is always // a forward edge and added by putting the two // id's of the graph - if(_rmp.empty()) { + if (_rmp.empty()) { _rmp.push_back(tuple._i); _rmp.push_back(tuple._j); return; } // no changes to rmp if it's a back-edge - if(tuple._i>tuple._j) + if (tuple._i > tuple._j) return; // Here is an example how rmp can change: // consider a graph's rmp is like, 1---4-----3-----2 // at this point, an edge (4---5) is added with the vertex 4 - // like below: + // like below: // ---------5 // | // 1---4----3------2 // new rightmost path is: 1---4-----5 - typename RMP_T::iterator rmp_it=_rmp.end()-1; - while(rmp_it>=_rmp.begin()) { - if(*rmp_it==tuple._i) // finding whith vertex the forward edge connect's to + typename RMP_T::iterator rmp_it = _rmp.end() - 1; + while (rmp_it >= _rmp.begin()) { + if (*rmp_it == + tuple._i) // finding whith vertex the forward edge connect's to break; - rmp_it=_rmp.erase(rmp_it); // deleting the vertices that is not part of rmp - rmp_it--; // checking the previous vertex + rmp_it = + _rmp.erase(rmp_it); // deleting the vertices that is not part of rmp + rmp_it--; // checking the previous vertex } - _rmp.push_back(tuple._j); // adding the new edge's other vertex in the rmp + _rmp.push_back(tuple._j); // adding the new edge's other vertex in the rmp - }//end update_rmp() + } // end update_rmp() - - template - void init(const INIT_TYPE& tuple, PAT* pattern) { + template void init(const INIT_TYPE &tuple, PAT *pattern) { tt_iostream.start(); clear(); _dfs_code.push_back(tuple); @@ -376,37 +383,36 @@ class canonical_code ostringstream t_ss; t_ss << tuple; string t_str = t_ss.str(); - //char* p = new char[t_str.length()+1]; - //t_str.copy(p, string::npos); - //p[t_str.length()] = 0; - //HASHNS::hash_map, eqstr>::iterator itr = level_one_hash.find(p); - HASHNS::hash_map, equal_to >::iterator itr = level_one_hash.find(t_str); - if(itr != level_one_hash.end()) { - _can_code = itr->second; - //delete [] p; + // char* p = new char[t_str.length()+1]; + // t_str.copy(p, string::npos); + // p[t_str.length()] = 0; + // HASHNS::hash_map, + // eqstr>::iterator itr = level_one_hash.find(p); + std::unordered_map::iterator itr = level_one_hash.find(t_str); + if (itr != level_one_hash.end()) { + _can_code = itr->second; + // delete [] p; } else { - //level_one_hash.insert(make_pair(p, _can_code)); + // level_one_hash.insert(make_pair(p, _can_code)); level_one_hash.insert(make_pair(t_str, _can_code)); } tt_iostream.stop(); - + pattern->update_rmpath(0); pattern->update_rmpath(1); } - void push_back(const FIVE_TUPLE& tuple) { + void push_back(const FIVE_TUPLE &tuple) { _dfs_code.push_back(tuple); tt_iostream.start(); tt_iostream.stop(); } // append a dfs code, just by inserting this tuple at the end - void append(const FIVE_TUPLE& tuple) { - push_back(tuple); - } + void append(const FIVE_TUPLE &tuple) { push_back(tuple); } - void append(const FIVE_TUPLE& tuple, const int& gi, const int& gj) { + void append(const FIVE_TUPLE &tuple, const int &gi, const int &gj) { push_back(tuple); _cid_to_gid.insert(make_pair(tuple._i, gi)); _cid_to_gid.insert(make_pair(tuple._j, gj)); @@ -414,158 +420,155 @@ class canonical_code _gid_to_cid.insert(make_pair(gj, tuple._j)); } - void update_code() { - _can_code = id_generator++; - } - + void update_code() { _can_code = id_generator++; } - STORAGE_TYPE getCode() const { - return _can_code; - } + STORAGE_TYPE getCode() const { return _can_code; } // canonical dfs code test, test for every edges lexicographically - bool operator< (const CAN_CODE& rhs) const { - unsigned int i=0, j=0; - while(i<_dfs_code.size() && j=rhs._dfs_code.size(); } - int cid(const int& gi) const { - VM_CONST_IT it=_gid_to_cid.find(gi); - if(it==_gid_to_cid.end()) { + int cid(const int &gi) const { + VM_CONST_IT it = _gid_to_cid.find(gi); + if (it == _gid_to_cid.end()) { return -1; } return it->second; } - int gid(const int& ci) const { - VM_CONST_IT it=_cid_to_gid.find(ci); - if(it==_cid_to_gid.end()) { + int gid(const int &ci) const { + VM_CONST_IT it = _cid_to_gid.find(ci); + if (it == _cid_to_gid.end()) { return -1; } return it->second; } - RMP_T& rmost_path() { return _rmp;} + RMP_T &rmost_path() { return _rmp; } - void append_rmp(const int& id) { - _rmp.push_back(id); - } + void append_rmp(const int &id) { _rmp.push_back(id); } - typedef pair > EDGE_T; + typedef pair> EDGE_T; - struct ltedge { - bool operator()(const EDGE_T& e1, const EDGE_T& e2) const { + struct ltedge { + bool operator()(const EDGE_T &e1, const EDGE_T &e2) const { return ((e1.first < e2.first) || - (e1.first == e2.first && e1.second.first < e2.second.first) || - (e1.first == e2.first && e1.second.first == e2.second.first && - e1.second.second < e2.second.second)); - } + (e1.first == e2.first && e1.second.first < e2.second.first) || + (e1.first == e2.first && e1.second.first == e2.second.first && + e1.second.second < e2.second.second)); + } }; /** * Converts the canonical code to a string. */ std::string to_string() const { - + ostringstream t_ss; - for(unsigned int i=0; i < _dfs_code.size(); i++) { - if(i == 0) + for (unsigned int i = 0; i < _dfs_code.size(); i++) { + if (i == 0) t_ss << _dfs_code[i]; else t_ss << ":" << _dfs_code[i]; } - + string t_str = t_ss.str(); return t_str; } - static double graph_distance(const CAN_CODE& c1, const CAN_CODE& c2) { + static double graph_distance(const CAN_CODE &c1, const CAN_CODE &c2) { multiset set1, set2; vector result; CONST_IT cit; EDGE_T an_edge; - for (cit = c1.begin(); cit < c1.end(); cit++){ + for (cit = c1.begin(); cit < c1.end(); cit++) { if (cit->_li < cit->_lj) an_edge = make_pair(cit->_li, make_pair(cit->_lij, cit->_lj)); - else + else an_edge = make_pair(cit->_lj, make_pair(cit->_lij, cit->_li)); set1.insert(an_edge); } - for (cit = c2.begin(); cit < c2.end(); cit++){ + for (cit = c2.begin(); cit < c2.end(); cit++) { if (cit->_li < cit->_lj) an_edge = make_pair(cit->_li, make_pair(cit->_lij, cit->_lj)); - else + else an_edge = make_pair(cit->_lj, make_pair(cit->_lij, cit->_li)); set2.insert(an_edge); } - set_intersection(set1.begin(), set1.end(), set2.begin(), set2.end(), back_inserter(result)); - return 1 - (double)result.size()/max(set1.size(), set2.size()); + set_intersection(set1.begin(), set1.end(), set2.begin(), set2.end(), + back_inserter(result)); + return 1 - (double)result.size() / max(set1.size(), set2.size()); } -/* - //// Destructor ///////////// - ~canonical_code() { - // cout << "Destructor called\n"; - // freeing all dynamically allocated memory here - HASHNS::hash_map, eqstr>::iterator itr = level_one_hash.begin(); - cout << "Size of map:" << level_one_hash.size() << endl; - for (; itr != level_one_hash.end(); itr++) { - const char* t = itr->first; - cout << "freeing "<< strlen(t) << " bytes\n"; - if (t && strlen(t) > 0) - delete[] t; - t = 0; + /* + //// Destructor ///////////// + ~canonical_code() { + // cout << "Destructor called\n"; + // freeing all dynamically allocated memory here + HASHNS::hash_map, + eqstr>::iterator itr = level_one_hash.begin(); cout << "Size of map:" << + level_one_hash.size() << endl; for (; itr != level_one_hash.end(); itr++) { + const char* t = itr->first; + cout << "freeing "<< strlen(t) << " bytes\n"; + if (t && strlen(t) > 0) + delete[] t; + t = 0; + } + cout << "done freeing\n"; } - cout << "done freeing\n"; - } -*/ - friend ostream& operator<< <>(ostream&, const canonical_code&); + */ + friend ostream &operator<< <>(ostream &, + const canonical_code &); - private: +private: STORAGE_TYPE _can_code; - TUPLES _dfs_code; - // the following two maps are very important. They maps vertex_id_in_code <---> vertex_id_in_graph - // while we are making minimal code, we reassign vertex id according to minimal code, say in a graph - // if we have edges like, D----C----D----B---A, there id's are like 0---1---2----3----4. - // in min_can_code, A should have id-0, so in _cid_to_gid{0} = 4, _gid_to_cid{4} = 0 - VID_HMAP _cid_to_gid; // code -> graph cand - VID_HMAP _gid_to_cid; // cand graph -> code + TUPLES _dfs_code; + // the following two maps are very important. They maps vertex_id_in_code + // <---> vertex_id_in_graph while we are making minimal code, we reassign + // vertex id according to minimal code, say in a graph if we have edges like, + // D----C----D----B---A, there id's are like 0---1---2----3----4. in + // min_can_code, A should have id-0, so in _cid_to_gid{0} = 4, _gid_to_cid{4} + // = 0 + VID_HMAP _cid_to_gid; // code -> graph cand + VID_HMAP _gid_to_cid; // cand graph -> code RMP_T _rmp; static int id_generator; - //static HASHNS::hash_map, eqstr> level_one_hash; - static HASHNS::hash_map, equal_to > level_one_hash; + static std::unordered_map level_one_hash; -};//end class canonical_code for graph +}; // end class canonical_code for graph -template class ALLOC> -ostream& operator<< (ostream& ostr, const canonical_code& cc) { - typename canonical_code::TUPLES::const_iterator it; - for(it=cc._dfs_code.begin(); it!=cc._dfs_code.end(); it++) - ostr<<*it< +ostream &operator<<(ostream &ostr, + const canonical_code &cc) { + typename canonical_code::TUPLES::const_iterator it; + for (it = cc._dfs_code.begin(); it != cc._dfs_code.end(); it++) + ostr << *it << endl; return ostr; } -template class ALLOC > -int canonical_code::id_generator = 1; +template +int canonical_code::id_generator = 1; -template class ALLOC > -HASHNS::hash_map, equal_to > -canonical_code::level_one_hash; +template +std::unordered_map + canonical_code::level_one_hash; /* template class ALLOC > -HASHNS::hash_map, eqstr> +HASHNS::hash_map, eqstr> canonical_code::level_one_hash; */ #endif diff --git a/src/graph/graph_evat.h b/src/graph/graph_evat.h index ed687cf..d959232 100644 --- a/src/graph/graph_evat.h +++ b/src/graph/graph_evat.h @@ -1,7 +1,7 @@ /* - * Copyright (C) 2005 M.J. Zaki Rensselaer Polytechnic Institute - * Written by parimi@cs.rpi.edu - * Updated by chaojv@cs.rpi.edu, alhasan@cs.rpi.edu, salems@cs.rpi.edu + * Copyright (C) 2005 M.J. Zaki Rensselaer Polytechnic + * Institute Written by parimi@cs.rpi.edu Updated by chaojv@cs.rpi.edu, + * alhasan@cs.rpi.edu, salems@cs.rpi.edu * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -22,406 +22,403 @@ using namespace std; -template