Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions PolyEngine/Core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@ file(GLOB_RECURSE POLYCORE_SRCS RELATIVE ${CMAKE_CURRENT_LIST_DIR}
${POLYCORE_INCLUDE}/*.h)
GenerateSourceGoups("${POLYCORE_SRCS}")

find_package(ICU 65.1 COMPONENTS uc i18n REQUIRED)

add_library(${CORE_TARGET} SHARED ${POLYCORE_SRCS})
target_link_libraries(${CORE_TARGET} PUBLIC ICU::uc ICU::i18n)
target_compile_options(${CORE_TARGET} PRIVATE $<$<BOOL:${SIMD}>:${SIMD_FLAGS}>)
target_compile_definitions(${CORE_TARGET} PRIVATE _CORE DISABLE_SIMD=$<NOT:$<BOOL:${SIMD}>>)
target_include_directories(${CORE_TARGET} PUBLIC ${POLYCORE_INCLUDE} PRIVATE ${RapidJSON_INCLUDE_DIRS})
Expand Down
11 changes: 10 additions & 1 deletion PolyEngine/Core/Src/pe/core/CorePCH.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,13 @@ SILENCE_GCC_WARNING(-Wclass-memaccess, "Rapidjson has no release containing fix
#include <rapidjson/stringbuffer.h>
#include <rapidjson/prettywriter.h>
#include <rapidjson/document.h>
UNSILENCE_GCC_WARNING()
UNSILENCE_GCC_WARNING()

// ICU
//#include <unicode/unorm2.h>
#include <unicode/normalizer2.h>
#include <unicode/translit.h>
//#include <unicode/ustring.h>
//#include <unicode/utrans.h>
#include <unicode/ucnv.h>
#include <unicode/coll.h>
117 changes: 94 additions & 23 deletions PolyEngine/Core/Src/pe/core/storage/String.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,50 @@ static const std::vector<char> WHITESPACES { ' ', '\t', '\r', '\n', '\0' };
namespace pe::core::storage
{

size_t StrLen(const char* str) {
size_t StrLen(const char* str)
{
size_t len = 0;
while (str[len] != 0)
++len;
return len;
}

bool isValidASCIIString(const char* str)
{
size_t pos = 0;
unsigned char c = 0;
while (c = str[pos], c != 0)
if (c > 0x7f)
return false;
else
++pos;
return true;
}

}

String String::fromASCII(const char* data) // can still be invalid but better than nothing
{
ASSERTE(isValidASCIIString(data), "Passed string is not valid ASCII, please use fromUTF8 factory method instead!");
return String(data);
}

String String::fromUTF8(const char* data)
{
String ret{};
const size_t len = StrLen(data);
UErrorCode success = UErrorCode::U_ZERO_ERROR;
icu::UnicodeString dst, src(data, len);
auto normalizer = icu::Normalizer2::getNFCInstance(success);
normalizer->normalize(src, dst, success);
ret.Data.reserve(dst.length());
dst.extract(0, dst.length(), ret.Data.data(), dst.length());
return ret;
}

String String::fromCodePoint(const char* data)
{
return String();
}

String::String(const char* data) {
Expand Down Expand Up @@ -86,6 +123,19 @@ String String::ToUpper() const
return ret;
}

String String::toASCII() const // C-api is very unwieldy for this one, copying and duplicating is unavoidable
{
String ret{};
ret.Data.reserve(Data.size());
icu::UnicodeString str(GetCStr(), Data.size());
UErrorCode success = UErrorCode::U_ZERO_ERROR;
UParseError parseError;
auto trans = icu::Transliterator::createInstance("Any-Latin; Latin-ASCII", UTRANS_FORWARD, parseError, success);
trans->transliterate(str);
str.extract(0, str.length(), ret.Data.data(), Data.size());
return ret;
}

bool String::IsEmpty() const {
return GetLength() == 0;
}
Expand All @@ -100,8 +150,8 @@ String String::Replace(char what, char with) const
return ret;
}

String String::Replace(const String& what, const String& with) const {
String String::Replace(const String& what, const String& with) const
{
std::vector<String> splitted = Split(what);
return Join(splitted.data(), splitted.size(), with);
}
Expand All @@ -122,28 +172,28 @@ std::vector<String> String::Split(const String& delimiter) const {
return elements;
}

String String::Join(const String* vars, size_t size, const String& separator) {
//TODO replace using stringbuilder
String s = String("");
for (size_t i = 0; i < size; i++) {
s = s + vars[i];
if (i != size - 1) {
s = s + separator;
}
String String::Join(const String* vars, size_t size, const String& separator)
{
StringBuilder sb;
for (size_t i = 0; i < size; ++i)
{
sb.Append(vars[i]);
if (i != size - 1)
sb.Append(separator);
}
return s;
return sb.StealString();
}

String String::Join(const String* vars, size_t size, char separator) {
//TODO replace using stringbuilder
String s = String("");
for (size_t i = 0; i < size; i++) {
s = s + vars[i];
if (i != size - 1) {
s = s + separator;
}
String String::Join(const String* vars, size_t size, char separator)
{
StringBuilder sb;
for (size_t i = 0; i < size; ++i)
{
sb.Append(vars[i]);
if (i != size - 1)
sb.Append(separator);
}
return s;
return sb.StealString();
}

bool String::StartsWith(char var) const {
Expand Down Expand Up @@ -202,7 +252,8 @@ String& String::operator=(String&& rhs) {
return *this;
}

bool String::operator==(const char* str) const {
bool String::CmpBytes(const char* str) const
{
if (GetLength() != StrLen(str))
return false;
for (size_t k = 0; k < GetLength(); ++k)
Expand All @@ -211,10 +262,25 @@ bool String::operator==(const char* str) const {
return true;
}

bool String::operator==(const String& str) const {
bool String::CmpBytes(const String& str) const
{
return Data == str.Data;
}

bool String::operator==(const char* str) const
{
UErrorCode success = U_ZERO_ERROR;
auto coll = icu::Collator::createInstance(success);
return coll->compareUTF8(Data.data(), str, success) == UCOL_EQUAL;
}

bool String::operator==(const String& str) const
{
UErrorCode success = U_ZERO_ERROR;
auto coll = icu::Collator::createInstance(success);
return coll->compareUTF8(Data.data(), str.Data.data(), success) == UCOL_EQUAL;
}

bool String::operator<(const String& rhs) const {
if (GetLength() < rhs.GetLength())
return true;
Expand Down Expand Up @@ -261,6 +327,11 @@ size_t String::GetLength() const
return Data.size() - 1;
}

size_t String::GetLogicalLength() const
{
return 0; // TODO: count it on demand?
}

size_t String::FindSubstrFromPoint(size_t startPoint, const String& str) const
{
for (size_t idx = startPoint; idx < GetLength(); ++idx)
Expand Down
90 changes: 88 additions & 2 deletions PolyEngine/Core/Src/pe/core/storage/String.hpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#pragma once

#include <pe/Defines.hpp>
#include <pe/core/utils/Range.hpp>

namespace pe::core::storage
{
Expand All @@ -12,6 +13,12 @@ namespace pe::core::storage
public:
static const String EMPTY;

static String fromASCII(const char* data);

static String fromUTF8(const char* data);

static String fromCodePoint(const char* data);

/// <summary>Basic String costructor that creates empty String</summary>
String() : String("") {};

Expand All @@ -27,7 +34,6 @@ namespace pe::core::storage
/// <param name="rhs">Reference to String instance which state should be moved</param>
String(String&& rhs);


/// <summary>Casts int to String</summary>
/// <param name="var">Integer value which should be used to make String instance</param>
/// <returns>String containing integer value</returns>
Expand Down Expand Up @@ -72,7 +78,6 @@ namespace pe::core::storage
/// <returns>String containing given std::string</returns>
static String From(const std::string& var);


/// <summary>Checks if String instance contains another String instance</summary>
/// <param name="var">String reference which should be contained in another String instance</param>
bool Contains(const String& var) const;
Expand All @@ -89,6 +94,8 @@ namespace pe::core::storage
/// <returns>Upper-case String instance</returns>
String ToUpper() const;

String toASCII() const;

/// <summary>Checks if String is empty</summary>
bool IsEmpty() const;

Expand Down Expand Up @@ -164,6 +171,10 @@ namespace pe::core::storage
/// <returns>Moved String reference</returns>
String& operator=(String&& rhs);

bool CmpBytes(const char* str) const;

bool CmpBytes(const String& str) const;

/// <summary>Compares String with Cstring</summary>
/// <param name="str">Cstring to be compared with</param>
bool operator==(const char* str) const;
Expand Down Expand Up @@ -193,10 +204,85 @@ namespace pe::core::storage
char operator[](size_t idx) const;

size_t GetLength() const;

size_t GetLogicalLength() const;

const char* GetCStr() const { return Data.data(); }

friend std::ostream& operator<< (std::ostream& stream, const String& rhs) { return stream << rhs.GetCStr(); }

class StringIteratorMemory final : public BaseObjectLiteralType<>
{
friend class String;
public:
using iterator_category = std::bidirectional_iterator_tag;
using value_type = char;
using difference_type = std::ptrdiff_t;
using pointer = char*;
using reference = char&;

bool operator==(const StringIteratorMemory& rhs) const { return idx == rhs.idx; }
bool operator!=(const StringIteratorMemory& rhs) const { return idx != rhs.idx; }

const char& operator*() const { return s->Data.at(idx); }
//const char* operator->() const { return s->Data.data() + idx * sizeof(char); } //are they even useful?

StringIteratorMemory& operator++() { ++idx; return *this; }
StringIteratorMemory operator++(int) { StringIteratorMemory ret(s, idx); ++idx; return ret; }
StringIteratorMemory& operator--() { ASSERTE(idx > 0, "Index cannot be negative"); --idx; return *this; }
StringIteratorMemory operator--(int) { ASSERTE(idx > 0, "Index cannot be negative"); StringIteratorMemory ret(s, idx); --idx; return ret; }
private:
StringIteratorMemory(const String* string, size_t index) : s(string), idx(index) {};
const String* s;
size_t idx;
};

class StringIteratorGlyph final : public BaseObjectLiteralType<>// add implementation from numeria
{
friend class String;
public:
using iterator_category = std::bidirectional_iterator_tag;
using value_type = char;
using difference_type = std::ptrdiff_t;
using pointer = char*;
using reference = char&;

bool operator==(const StringIteratorGlyph& rhs) const { return idx == rhs.idx; }
bool operator!=(const StringIteratorGlyph& rhs) const { return idx != rhs.idx; }

const char& operator*() const { return s->Data.at(idx); }
//const char* operator->() const { return s->Data.data() + idx * sizeof(char); }

StringIteratorGlyph& operator++() { ++idx; return *this; }
StringIteratorGlyph operator++(int) { StringIteratorGlyph ret(s, idx); ++idx; return ret; }
StringIteratorGlyph& operator--() { ASSERTE(idx > 0, "Index cannot be negative"); --idx; return *this; }
StringIteratorGlyph operator--(int) { ASSERTE(idx > 0, "Index cannot be negative"); StringIteratorGlyph ret(s, idx); --idx; return ret; }
private:
StringIteratorGlyph(const String* string, size_t index) : s(string), idx(index) {};
const String* s;
size_t idx;
};

// FIXME: default begin and end are memory for now, please scrutinize this, const added as well, they are const anyway, should user only use const ones? disallow mutating?
StringIteratorMemory begin() { return StringIteratorMemory(this, 0); }
StringIteratorMemory end() { return StringIteratorMemory(this, Data.size()); }
StringIteratorMemory cbegin() const { return StringIteratorMemory(this, 0); }
StringIteratorMemory cend() const { return StringIteratorMemory(this, Data.size()); }
StringIteratorGlyph beginGlyph() { return StringIteratorGlyph(this, 0); }
StringIteratorGlyph endGlyph() { return StringIteratorGlyph(this, Data.size()); }

//iteratememory
::pe::core::utils::Range<StringIteratorMemory> IterateMemory()
{
return ::pe::core::utils::Range<StringIteratorMemory>(begin(), end());
}

// iterateglyph
::pe::core::utils::Range<StringIteratorGlyph> IterateGlyphs()
{
return ::pe::core::utils::Range<StringIteratorGlyph>(beginGlyph(), endGlyph());
}

private:

String(std::vector<char> rawData) : Data(std::move(rawData)) { Data.push_back('\0'); }
Expand Down
Loading