Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
02aaf1b
Format code
datvo06 Jul 30, 2024
f1fc205
Update formatting
datvo06 Jul 31, 2024
348c511
Adding CMakeLists
datvo06 Jul 31, 2024
243288b
Adding declarations of check isomorphism and operator<< to match pattern
datvo06 Jul 31, 2024
4cbc592
CMakeLists for String Tokenizers
datvo06 Jul 31, 2024
aeee89e
Update to C++17, simplify code with default allocator
datvo06 Jul 31, 2024
4174a18
FIX: Adding infile name as filename in constructor
datvo06 Jul 31, 2024
815d88b
FIX: Using split instead of StringTokenizer, also updated correct tra…
datvo06 Jul 31, 2024
e7e100f
FIX: Using split instead of StringTokenizer, also updated correct tra…
datvo06 Jul 31, 2024
32cae38
Minor editing
datvo06 Jul 31, 2024
ae472bc
Simplifying func name
datvo06 Aug 1, 2024
aa3dee1
Minor editing
datvo06 Aug 1, 2024
cb5b539
Adding hash_utils to fix for incorrect hashing
datvo06 Aug 1, 2024
dec60d7
Fix the number of tot_max_pat insteads of using magic number 120400; …
datvo06 Aug 3, 2024
a5f9aa4
Update README.md to use CMake
datvo06 Aug 5, 2024
82fd406
Adding condition to stop when the discovering process takes too long
datvo06 Aug 10, 2024
1b47111
Merge branch 'master' of https://github.com/zakimjz/Origami
datvo06 Aug 10, 2024
69b05a6
Merge https://github.com/datvo06/Origami
datvo06 Aug 10, 2024
8039b38
Remove printing of each iterations
datvo06 Aug 10, 2024
d0faaeb
Remove printing of each iterations
datvo06 Aug 10, 2024
0419b2a
Adding cstring for strcmp
datvo06 Aug 11, 2024
0783cfe
Adding inclusion of bits/stdc++
datvo06 Aug 11, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,8 @@ Origami uses random walks over the graph partial order to mine a representative
Data Mining. October 2007.

## How to

cd test; type make
```sh
cd test && mkdir build && cd build
cmake ..
make
```
31 changes: 31 additions & 0 deletions src/StringTokenizer/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@

cmake_minimum_required(VERSION 3.10)

# Project name
project(StringTokenizer)

# Set the C++ standard
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED True)

# Enable the generation of compile_commands.json
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

# Compiler options
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pedantic -ansi -Wall")

# Source files
set(SOURCE_FILES StringTokenizer.cpp strtoktest.cpp)

# Include directories
include_directories(${CMAKE_CURRENT_SOURCE_DIR})

# Add library for StringTokenizer
add_library(StringTokenizer STATIC StringTokenizer.cpp)

# Add executable for StrTokTest
add_executable(strtoktest strtoktest.cpp)

# Link the StringTokenizer library to StrTokTest executable
target_link_libraries(strtoktest StringTokenizer)

224 changes: 92 additions & 132 deletions src/StringTokenizer/StringTokenizer.cpp
Original file line number Diff line number Diff line change
@@ -1,166 +1,126 @@
#include "StringTokenizer.h"

StringTokenizer::StringTokenizer(const std::string& _str, const std::string& _delim)
{
StringTokenizer::StringTokenizer(const std::string &_str,
const std::string &_delim) {

if ((_str.length() == 0) || (_delim.length() == 0)) return;
if ((_str.length() == 0) || (_delim.length() == 0))
return;

token_str = _str;
delim = _delim;
token_str = _str;
delim = _delim;

/*
Remove sequential delimiter
*/
unsigned int curr_pos = 0;

while(true)
{
if ((curr_pos = token_str.find(delim,curr_pos)) != std::string::npos)
{
curr_pos += delim.length();

while(token_str.find(delim,curr_pos) == curr_pos)
{
token_str.erase(curr_pos,delim.length());
}
}
else
break;
}

/*
Trim leading delimiter
*/
if (token_str.find(delim,0) == 0)
{
token_str.erase(0,delim.length());
}

/*
Trim ending delimiter
*/
curr_pos = 0;
if ((curr_pos = token_str.rfind(delim)) != std::string::npos)
{
if (curr_pos != (token_str.length() - delim.length())) return;
token_str.erase(token_str.length() - delim.length(),delim.length());
}

}


int StringTokenizer::countTokens()
{
unsigned int curr_pos = 0;

unsigned int prev_pos = 0;
int num_tokens = 0;
while (true) {
if ((curr_pos = token_str.find(delim, curr_pos)) != std::string::npos) {
curr_pos += delim.length();

if (token_str.length() > 0)
{
num_tokens = 0;

unsigned int curr_pos = 0;
while(true)
{
if ((curr_pos = token_str.find(delim,curr_pos)) != std::string::npos)
{
num_tokens++;
prev_pos = curr_pos;
curr_pos += delim.length();
}
else
break;
while (token_str.find(delim, curr_pos) == curr_pos) {
token_str.erase(curr_pos, delim.length());
}
return ++num_tokens;
}
else
{
return 0;
}

}
} else
break;
}

/*
Trim leading delimiter
*/
if (token_str.find(delim, 0) == 0) {
token_str.erase(0, delim.length());
}

bool StringTokenizer::hasMoreTokens()
{
return (token_str.length() > 0);
/*
Trim ending delimiter
*/
curr_pos = 0;
if ((curr_pos = token_str.rfind(delim)) != std::string::npos) {
if (curr_pos != (token_str.length() - delim.length()))
return;
token_str.erase(token_str.length() - delim.length(), delim.length());
}
}

int StringTokenizer::countTokens() {

unsigned int prev_pos = 0;
int num_tokens = 0;

if (token_str.length() > 0) {
num_tokens = 0;

unsigned int curr_pos = 0;
while (true) {
if ((curr_pos = token_str.find(delim, curr_pos)) != std::string::npos) {
num_tokens++;
prev_pos = curr_pos;
curr_pos += delim.length();
} else
break;
}
return ++num_tokens;
} else {
return 0;
}
}

std::string StringTokenizer::nextToken()
{

if (token_str.length() == 0)
return "";

std::string tmp_str = "";
unsigned int pos = token_str.find(delim,0);

if (pos != std::string::npos)
{
tmp_str = token_str.substr(0,pos);
token_str = token_str.substr(pos+delim.length(),token_str.length()-pos);
}
else
{
tmp_str = token_str.substr(0,token_str.length());
token_str = "";
}
bool StringTokenizer::hasMoreTokens() { return (token_str.length() > 0); }

return tmp_str;
}
std::string StringTokenizer::nextToken() {

if (token_str.length() == 0)
return "";

int StringTokenizer::nextIntToken()
{
return atoi(nextToken().c_str());
}
std::string tmp_str = "";
unsigned int pos = token_str.find(delim, 0);

if (pos != std::string::npos) {
tmp_str = token_str.substr(0, pos);
token_str =
token_str.substr(pos + delim.length(), token_str.length() - pos);
} else {
tmp_str = token_str.substr(0, token_str.length());
token_str = "";
}

double StringTokenizer::nextFloatToken()
{
return atof(nextToken().c_str());
return tmp_str;
}

int StringTokenizer::nextIntToken() { return atoi(nextToken().c_str()); }

std::string StringTokenizer::nextToken(const std::string& delimiter)
{
if (token_str.length() == 0)
return "";
double StringTokenizer::nextFloatToken() { return atof(nextToken().c_str()); }

std::string tmp_str = "";
unsigned int pos = token_str.find(delimiter,0);
std::string StringTokenizer::nextToken(const std::string &delimiter) {
if (token_str.length() == 0)
return "";

if (pos != std::string::npos)
{
tmp_str = token_str.substr(0,pos);
token_str = token_str.substr(pos + delimiter.length(),token_str.length() - pos);
}
else
{
tmp_str = token_str.substr(0,token_str.length());
token_str = "";
}

return tmp_str;
}
std::string tmp_str = "";
unsigned int pos = token_str.find(delimiter, 0);

if (pos != std::string::npos) {
tmp_str = token_str.substr(0, pos);
token_str =
token_str.substr(pos + delimiter.length(), token_str.length() - pos);
} else {
tmp_str = token_str.substr(0, token_str.length());
token_str = "";
}

std::string StringTokenizer::remainingString()
{
return token_str;
return tmp_str;
}

std::string StringTokenizer::remainingString() { return token_str; }

std::string StringTokenizer::filterNextToken(const std::string& filterStr)
{
std::string tmp_str = nextToken();
unsigned int currentPos = 0;
std::string StringTokenizer::filterNextToken(const std::string &filterStr) {
std::string tmp_str = nextToken();
unsigned int currentPos = 0;

while((currentPos = tmp_str.find(filterStr,currentPos)) != std::string::npos)
{
tmp_str.erase(currentPos,filterStr.length());
}
while ((currentPos = tmp_str.find(filterStr, currentPos)) !=
std::string::npos) {
tmp_str.erase(currentPos, filterStr.length());
}

return tmp_str;
return tmp_str;
}
42 changes: 17 additions & 25 deletions src/StringTokenizer/StringTokenizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,42 +11,34 @@
* http://www.opensource.org/licenses/cpl.php *
* *
***********************************************************************
*/


*/

#ifndef INCLUDE_STRINGTOKENIZER_H
#define INCLUDE_STRINGTOKENIZER_H


#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <iostream>
#include <string>

class StringTokenizer {

class StringTokenizer
{

public:

StringTokenizer(const std::string& _str, const std::string& _delim);
~StringTokenizer(){};

int countTokens();
bool hasMoreTokens();
std::string nextToken();
int nextIntToken();
double nextFloatToken();
std::string nextToken(const std::string& delim);
std::string remainingString();
std::string filterNextToken(const std::string& filterStr);

private:
public:
StringTokenizer(const std::string &_str, const std::string &_delim);
~StringTokenizer() {};

std::string token_str;
std::string delim;
int countTokens();
bool hasMoreTokens();
std::string nextToken();
int nextIntToken();
double nextFloatToken();
std::string nextToken(const std::string &delim);
std::string remainingString();
std::string filterNextToken(const std::string &filterStr);

private:
std::string token_str;
std::string delim;
};

#endif
Loading