Skip to content

Commit 73ce811

Browse files
committed
Fix: Lifetime of temp strings in ranges
Closes #268
1 parent a4582ce commit 73ce811

File tree

5 files changed

+125
-50
lines changed

5 files changed

+125
-50
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ build_release/
55
build_relwithdebinfo/
66
build_go/
77
build_golang/
8+
build_test/
89
build_artifacts*
910

1011
# Yes, everyone loves keeping this file in the history.

README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1089,10 +1089,10 @@ Similar to GCC, StringZilla is 32 bytes in size, and similar to Clang it can fit
10891089
Our layout might be preferential, if you want to avoid branches.
10901090
If you use a different compiler, you may want to check its SSO buffer size with a [simple Gist](https://gist.github.com/ashvardanian/c197f15732d9855c4e070797adf17b21).
10911091

1092-
| | `libstdc++` in GCC 13 | `libc++` in Clang 17 | StringZilla |
1093-
| :-------------------- | ---------------------: | -------------------: | ----------: |
1094-
| `sizeof(std::string)` | 32 | 24 | 32 |
1095-
| Small String Capacity | 15 | __22__ | __22__ |
1092+
| | `libstdc++` in GCC 13 | `libc++` in Clang 17 | StringZilla |
1093+
| :-------------- | ---------------------: | -------------------: | ----------: |
1094+
| String `sizeof` | 32 | 24 | 32 |
1095+
| Inner Capacity | 15 | __22__ | __22__ |
10961096

10971097
This design has been since ported to many high-level programming languages.
10981098
Swift, for example, [can store 15 bytes](https://developer.apple.com/documentation/swift/substring/withutf8(_:)#discussion) in the `String` instance itself.

include/stringzilla/stringzilla.hpp

Lines changed: 92 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -485,8 +485,33 @@ struct matcher_find_last_not_of {
485485
size_type operator()(haystack_type haystack) const noexcept { return haystack.find_last_not_of(needles_); }
486486
};
487487

488+
/**
489+
* @brief Helper to extract the appropriate view type for a string-like type.
490+
* For StringZilla types, uses the nested ::string_view typedef.
491+
* For STL types (like std::string_view), uses the type itself.
492+
*/
493+
template <typename string_type_, typename = void>
494+
struct string_view_for {
495+
// Default: use the type itself (for STL types)
496+
using type = string_type_;
497+
};
498+
499+
template <typename string_type_>
500+
struct string_view_for<string_type_,
501+
typename std::enable_if<std::is_class<typename string_type_::string_view>::value>::type> {
502+
// For StringZilla types with nested ::string_view
503+
using type = typename string_type_::string_view;
504+
};
505+
488506
/**
489507
* @brief A range of string slices representing the matches of a substring search.
508+
*
509+
* @note Lifetime semantics: Stores forwarded objects (including owning strings) to maintain lifetime.
510+
* Iterators receive lightweight views only, ensuring safe iteration without ownership concerns.
511+
* @note For-loop optimized: Iterators are lightweight views with minimal register pressure, ideal for
512+
* high-performance applications where cache efficiency and register allocation matter.
513+
* @note Sentinel support: Supports sentinel-based iteration via `operator==(end_sentinel_type)` for
514+
* efficient termination without constructing full end iterators.
490515
* @note Compatible with C++23 ranges, C++11 string views, and of course, StringZilla.
491516
* @see Similar to a pair of `boost::algorithm::find_iterator`.
492517
*/
@@ -495,6 +520,7 @@ class range_matches {
495520
public:
496521
using string_type = string_type_;
497522
using matcher_type = matcher_type_;
523+
using string_view_type = typename string_view_for<string_type>::type;
498524

499525
private:
500526
matcher_type matcher_;
@@ -503,24 +529,24 @@ class range_matches {
503529
public:
504530
using size_type = std::size_t;
505531
using difference_type = std::ptrdiff_t;
506-
using value_type = string_type;
507-
using pointer = string_type; // Needed for compatibility with STL container constructors.
508-
using reference = string_type; // Needed for compatibility with STL container constructors.
532+
using value_type = string_view_type;
533+
using pointer = string_view_type; // Needed for compatibility with STL container constructors.
534+
using reference = string_view_type; // Needed for compatibility with STL container constructors.
509535

510536
range_matches(string_type haystack, matcher_type needle) noexcept : matcher_(needle), haystack_(haystack) {}
511537

512538
class iterator {
513539
matcher_type matcher_;
514-
string_type remaining_;
540+
string_view_type remaining_;
515541

516542
public:
517543
using iterator_category = std::forward_iterator_tag;
518544
using difference_type = std::ptrdiff_t;
519-
using value_type = string_type;
520-
using pointer = string_type; // Needed for compatibility with STL container constructors.
521-
using reference = string_type; // Needed for compatibility with STL container constructors.
545+
using value_type = string_view_type;
546+
using pointer = string_view_type; // Needed for compatibility with STL container constructors.
547+
using reference = string_view_type; // Needed for compatibility with STL container constructors.
522548

523-
iterator(string_type haystack, matcher_type matcher) noexcept : matcher_(matcher), remaining_(haystack) {
549+
iterator(string_view_type haystack, matcher_type matcher) noexcept : matcher_(matcher), remaining_(haystack) {
524550
auto position = matcher_(remaining_);
525551
remaining_.remove_prefix(position != string_type::npos ? position : remaining_.size());
526552
}
@@ -548,8 +574,8 @@ class range_matches {
548574
bool operator==(end_sentinel_type) const noexcept { return remaining_.empty(); }
549575
};
550576

551-
iterator begin() const noexcept { return {haystack_, matcher_}; }
552-
iterator end() const noexcept { return {string_type {haystack_.data() + haystack_.size(), 0ull}, matcher_}; }
577+
iterator begin() const noexcept { return {string_view_type(haystack_), matcher_}; }
578+
iterator end() const noexcept { return {string_view_type(haystack_.data() + haystack_.size(), 0ull), matcher_}; }
553579
size_type size() const noexcept { return static_cast<size_type>(ssize()); }
554580
difference_type ssize() const noexcept { return std::distance(begin(), end()); }
555581
bool empty() const noexcept { return begin() == end_sentinel_type {}; }
@@ -570,6 +596,13 @@ class range_matches {
570596

571597
/**
572598
* @brief A range of string slices representing the matches of a @b reverse-order substring search.
599+
*
600+
* @note Lifetime semantics: Stores forwarded objects (including owning strings) to maintain lifetime.
601+
* Iterators receive lightweight views only, ensuring safe iteration without ownership concerns.
602+
* @note For-loop optimized: Iterators are lightweight views with minimal register pressure, ideal for
603+
* high-performance applications where cache efficiency and register allocation matter.
604+
* @note Sentinel support: Supports sentinel-based iteration via `operator==(end_sentinel_type)` for
605+
* efficient termination without constructing full end iterators.
573606
* @note Compatible with C++23 ranges, C++11 string views, and of course, StringZilla.
574607
* @see Similar to a pair of `boost::algorithm::find_iterator`.
575608
*/
@@ -578,12 +611,13 @@ class range_rmatches {
578611
public:
579612
using string_type = string_type_;
580613
using matcher_type = matcher_type_;
614+
using string_view_type = typename string_view_for<string_type>::type;
581615

582616
using size_type = std::size_t;
583617
using difference_type = std::ptrdiff_t;
584-
using value_type = string_type;
585-
using pointer = string_type; // Needed for compatibility with STL container constructors.
586-
using reference = string_type; // Needed for compatibility with STL container constructors.
618+
using value_type = string_view_type;
619+
using pointer = string_view_type; // Needed for compatibility with STL container constructors.
620+
using reference = string_view_type; // Needed for compatibility with STL container constructors.
587621

588622
private:
589623
matcher_type matcher_;
@@ -594,16 +628,16 @@ class range_rmatches {
594628

595629
class iterator {
596630
matcher_type matcher_;
597-
string_type remaining_;
631+
string_view_type remaining_;
598632

599633
public:
600634
using iterator_category = std::forward_iterator_tag;
601635
using difference_type = std::ptrdiff_t;
602-
using value_type = string_type;
603-
using pointer = string_type; // Needed for compatibility with STL container constructors.
604-
using reference = string_type; // Needed for compatibility with STL container constructors.
636+
using value_type = string_view_type;
637+
using pointer = string_view_type; // Needed for compatibility with STL container constructors.
638+
using reference = string_view_type; // Needed for compatibility with STL container constructors.
605639

606-
iterator(string_type haystack, matcher_type matcher) noexcept : matcher_(matcher), remaining_(haystack) {
640+
iterator(string_view_type haystack, matcher_type matcher) noexcept : matcher_(matcher), remaining_(haystack) {
607641
auto position = matcher_(remaining_);
608642
remaining_.remove_suffix( //
609643
position != string_type::npos //
@@ -644,8 +678,8 @@ class range_rmatches {
644678
bool operator==(end_sentinel_type) const noexcept { return remaining_.empty(); }
645679
};
646680

647-
iterator begin() const noexcept { return {haystack_, matcher_}; }
648-
iterator end() const noexcept { return {string_type {haystack_.data(), 0ull}, matcher_}; }
681+
iterator begin() const noexcept { return {string_view_type(haystack_), matcher_}; }
682+
iterator end() const noexcept { return {string_view_type(haystack_.data(), 0ull), matcher_}; }
649683
size_type size() const noexcept { return static_cast<size_type>(ssize()); }
650684
difference_type ssize() const noexcept { return std::distance(begin(), end()); }
651685
bool empty() const noexcept { return begin() == end_sentinel_type {}; }
@@ -666,6 +700,13 @@ class range_rmatches {
666700

667701
/**
668702
* @brief A range of string slices for different splits of the data.
703+
*
704+
* @note Lifetime semantics: Stores forwarded objects (including owning strings) to maintain lifetime.
705+
* Iterators receive lightweight views only, ensuring safe iteration without ownership concerns.
706+
* @note For-loop optimized: Iterators are lightweight views with minimal register pressure, ideal for
707+
* high-performance applications where cache efficiency and register allocation matter.
708+
* @note Sentinel support: Supports sentinel-based iteration via `operator==(end_sentinel_type)` for
709+
* efficient termination without constructing full end iterators.
669710
* @note Compatible with C++23 ranges, C++11 string views, and of course, StringZilla.
670711
* @see Similar to a pair of `boost::algorithm::split_iterator`.
671712
*
@@ -678,12 +719,13 @@ class range_splits {
678719
public:
679720
using string_type = string_type_;
680721
using matcher_type = matcher_type_;
722+
using string_view_type = typename string_view_for<string_type>::type;
681723

682724
using size_type = std::size_t;
683725
using difference_type = std::ptrdiff_t;
684-
using value_type = string_type;
685-
using pointer = string_type; // Needed for compatibility with STL container constructors.
686-
using reference = string_type; // Needed for compatibility with STL container constructors.
726+
using value_type = string_view_type;
727+
using pointer = string_view_type; // Needed for compatibility with STL container constructors.
728+
using reference = string_view_type; // Needed for compatibility with STL container constructors.
687729

688730
private:
689731
matcher_type matcher_;
@@ -694,24 +736,24 @@ class range_splits {
694736

695737
class iterator {
696738
matcher_type matcher_;
697-
string_type remaining_;
739+
string_view_type remaining_;
698740
std::size_t length_within_remaining_;
699741
bool reached_tail_;
700742

701743
public:
702744
using iterator_category = std::forward_iterator_tag;
703745
using difference_type = std::ptrdiff_t;
704-
using value_type = string_type;
705-
using pointer = string_type; // Needed for compatibility with STL container constructors.
706-
using reference = string_type; // Needed for compatibility with STL container constructors.
746+
using value_type = string_view_type;
747+
using pointer = string_view_type; // Needed for compatibility with STL container constructors.
748+
using reference = string_view_type; // Needed for compatibility with STL container constructors.
707749

708-
iterator(string_type haystack, matcher_type matcher) noexcept : matcher_(matcher), remaining_(haystack) {
750+
iterator(string_view_type haystack, matcher_type matcher) noexcept : matcher_(matcher), remaining_(haystack) {
709751
auto position = matcher_(remaining_);
710752
length_within_remaining_ = position != string_type::npos ? position : remaining_.size();
711753
reached_tail_ = false;
712754
}
713755

714-
iterator(string_type haystack, matcher_type matcher, end_sentinel_type) noexcept
756+
iterator(string_view_type haystack, matcher_type matcher, end_sentinel_type) noexcept
715757
: matcher_(matcher), remaining_(haystack), length_within_remaining_(0), reached_tail_(true) {}
716758

717759
pointer operator->() const noexcept = delete;
@@ -743,8 +785,8 @@ class range_splits {
743785
bool is_last() const noexcept { return remaining_.size() == length_within_remaining_; }
744786
};
745787

746-
iterator begin() const noexcept { return {haystack_, matcher_}; }
747-
iterator end() const noexcept { return {string_type {haystack_.end(), 0}, matcher_, end_sentinel_type {}}; }
788+
iterator begin() const noexcept { return {string_view_type(haystack_), matcher_}; }
789+
iterator end() const noexcept { return {string_view_type(haystack_.end(), 0), matcher_, end_sentinel_type {}}; }
748790
size_type size() const noexcept { return static_cast<size_type>(ssize()); }
749791
difference_type ssize() const noexcept { return std::distance(begin(), end()); }
750792
constexpr bool empty() const noexcept { return false; }
@@ -765,6 +807,13 @@ class range_splits {
765807

766808
/**
767809
* @brief A range of string slices for different splits of the data in @b reverse-order.
810+
*
811+
* @note Lifetime semantics: Stores forwarded objects (including owning strings) to maintain lifetime.
812+
* Iterators receive lightweight views only, ensuring safe iteration without ownership concerns.
813+
* @note For-loop optimized: Iterators are lightweight views with minimal register pressure, ideal for
814+
* high-performance applications where cache efficiency and register allocation matter.
815+
* @note Sentinel support: Supports sentinel-based iteration via `operator==(end_sentinel_type)` for
816+
* efficient termination without constructing full end iterators.
768817
* @note Compatible with C++23 ranges, C++11 string views, and of course, StringZilla.
769818
* @see Similar to a pair of `boost::algorithm::split_iterator`.
770819
*
@@ -777,12 +826,13 @@ class range_rsplits {
777826
public:
778827
using string_type = string_type_;
779828
using matcher_type = matcher_type_;
829+
using string_view_type = typename string_view_for<string_type>::type;
780830

781831
using size_type = std::size_t;
782832
using difference_type = std::ptrdiff_t;
783-
using value_type = string_type;
784-
using pointer = string_type; // Needed for compatibility with STL container constructors.
785-
using reference = string_type; // Needed for compatibility with STL container constructors.
833+
using value_type = string_view_type;
834+
using pointer = string_view_type; // Needed for compatibility with STL container constructors.
835+
using reference = string_view_type; // Needed for compatibility with STL container constructors.
786836

787837
private:
788838
matcher_type matcher_;
@@ -793,26 +843,26 @@ class range_rsplits {
793843

794844
class iterator {
795845
matcher_type matcher_;
796-
string_type remaining_;
846+
string_view_type remaining_;
797847
std::size_t length_within_remaining_;
798848
bool reached_tail_;
799849

800850
public:
801851
using iterator_category = std::forward_iterator_tag;
802852
using difference_type = std::ptrdiff_t;
803-
using value_type = string_type;
804-
using pointer = string_type; // Needed for compatibility with STL container constructors.
805-
using reference = string_type; // Needed for compatibility with STL container constructors.
853+
using value_type = string_view_type;
854+
using pointer = string_view_type; // Needed for compatibility with STL container constructors.
855+
using reference = string_view_type; // Needed for compatibility with STL container constructors.
806856

807-
iterator(string_type haystack, matcher_type matcher) noexcept : matcher_(matcher), remaining_(haystack) {
857+
iterator(string_view_type haystack, matcher_type matcher) noexcept : matcher_(matcher), remaining_(haystack) {
808858
auto position = matcher_(remaining_);
809859
length_within_remaining_ = position != string_type::npos
810860
? remaining_.size() - position - matcher_.needle_length()
811861
: remaining_.size();
812862
reached_tail_ = false;
813863
}
814864

815-
iterator(string_type haystack, matcher_type matcher, end_sentinel_type) noexcept
865+
iterator(string_view_type haystack, matcher_type matcher, end_sentinel_type) noexcept
816866
: matcher_(matcher), remaining_(haystack), length_within_remaining_(0), reached_tail_(true) {}
817867

818868
pointer operator->() const noexcept = delete;
@@ -848,8 +898,8 @@ class range_rsplits {
848898
bool is_last() const noexcept { return remaining_.size() == length_within_remaining_; }
849899
};
850900

851-
iterator begin() const noexcept { return {haystack_, matcher_}; }
852-
iterator end() const noexcept { return {{haystack_.data(), 0ull}, matcher_, end_sentinel_type {}}; }
901+
iterator begin() const noexcept { return {string_view_type(haystack_), matcher_}; }
902+
iterator end() const noexcept { return {string_view_type(haystack_.data(), 0ull), matcher_, end_sentinel_type {}}; }
853903
size_type size() const noexcept { return static_cast<size_type>(ssize()); }
854904
difference_type ssize() const noexcept { return std::distance(begin(), end()); }
855905
constexpr bool empty() const noexcept { return false; }

include/stringzilla/types.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -541,8 +541,7 @@ typedef enum sz_status_t {
541541
sz_invalid_utf8_k = -12,
542542
/** For algorithms that take collections of unique elements, this status indicates presence of duplicates. */
543543
sz_contains_duplicates_k = -13,
544-
/** For algorithms dealing with large inputs, this error reports the need to upcast the logic to larger types.
545-
*/
544+
/** For algorithms dealing with large inputs, this error reports the need to upcast the logic to larger types. */
546545
sz_overflow_risk_k = -14,
547546
/** For algorithms with multi-stage pipelines indicates input/output size mismatch. */
548547
sz_unexpected_dimensions_k = -15,

0 commit comments

Comments
 (0)