-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[libc] Wchar Stringconverter #146388
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[libc] Wchar Stringconverter #146388
Changes from all commits
c1e1650
de9385e
f18221f
41e7e31
e6453a2
86fb76f
5e4b5e2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
//===-- Definition of a class for mbstate_t and conversion -----*-- C++ -*-===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#ifndef LLVM_LIBC_SRC___SUPPORT_STRING_CONVERTER_H | ||
#define LLVM_LIBC_SRC___SUPPORT_STRING_CONVERTER_H | ||
|
||
#include "hdr/types/char32_t.h" | ||
#include "hdr/types/char8_t.h" | ||
#include "hdr/types/size_t.h" | ||
#include "src/__support/common.h" | ||
#include "src/__support/error_or.h" | ||
#include "src/__support/wchar/character_converter.h" | ||
#include "src/__support/wchar/mbstate.h" | ||
|
||
namespace LIBC_NAMESPACE_DECL { | ||
namespace internal { | ||
|
||
template <typename T> class StringConverter { | ||
private: | ||
CharacterConverter cr; | ||
const T *src; | ||
size_t src_len; | ||
size_t src_idx; | ||
|
||
// # of src elements pushed to cr needed to represent the current character | ||
size_t num_pushed; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. num_pushed is effectively a second return from There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We do still need to store num_pushed as the call to popUTFx() that actually pushes the full character might not be the same call that updates the src_idx Ex for a 4 byte character:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why is it necessary to update the src index in a different call to pop than the one that pushes? To me it makes more sense to increment the src index to represent that a character has been pushed and the rest of it is in the mbstate. |
||
|
||
// # of pops we are allowed to perform (essentially size of the dest buffer) | ||
size_t num_to_write; | ||
|
||
int pushFullCharacter() { | ||
for (num_pushed = 0; !cr.isFull() && src_idx + num_pushed < src_len; | ||
++num_pushed) { | ||
int err = cr.push(src[src_idx + num_pushed]); | ||
if (err != 0) | ||
return err; | ||
} | ||
|
||
// if we aren't able to read a full character from the source string | ||
if (src_idx + num_pushed == src_len && !cr.isFull()) { | ||
src_idx += num_pushed; | ||
return -1; | ||
} | ||
|
||
return 0; | ||
} | ||
|
||
public: | ||
StringConverter(const T *s, size_t srclen, size_t dstlen, mbstate *ps) | ||
: cr(ps), src(s), src_len(srclen), src_idx(0), num_pushed(0), | ||
num_to_write(dstlen) { | ||
pushFullCharacter(); | ||
sribee8 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
StringConverter(const T *s, size_t dstlen, mbstate *ps) | ||
: StringConverter(s, SIZE_MAX, dstlen, ps) {} | ||
Comment on lines
+54
to
+61
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. would it be better to move srclen to the end and make it a parameter with a default value instead of this? |
||
|
||
// TODO: following functions are almost identical | ||
// look into templating CharacterConverter pop functions | ||
ErrorOr<char32_t> popUTF32() { | ||
uzairnawaz marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if (cr.isEmpty()) { | ||
int err = pushFullCharacter(); | ||
if (err != 0) | ||
return Error(err); | ||
|
||
if (cr.sizeAsUTF32() > num_to_write) { | ||
cr.clear(); | ||
return Error(-1); | ||
} | ||
} | ||
|
||
auto out = cr.pop_utf32(); | ||
if (cr.isEmpty()) | ||
src_idx += num_pushed; | ||
|
||
if (out.has_value() && out.value() == L'\0') | ||
src_len = src_idx; | ||
|
||
num_to_write--; | ||
|
||
return out; | ||
} | ||
|
||
ErrorOr<char8_t> popUTF8() { | ||
if (cr.isEmpty()) { | ||
int err = pushFullCharacter(); | ||
if (err != 0) | ||
return Error(err); | ||
|
||
if (cr.sizeAsUTF8() > num_to_write) { | ||
cr.clear(); | ||
return Error(-1); | ||
} | ||
} | ||
|
||
auto out = cr.pop_utf8(); | ||
if (cr.isEmpty()) | ||
src_idx += num_pushed; | ||
|
||
if (out.has_value() && out.value() == '\0') | ||
src_len = src_idx; | ||
|
||
num_to_write--; | ||
|
||
return out; | ||
} | ||
|
||
size_t getSourceIndex() { return src_idx; } | ||
}; | ||
|
||
} // namespace internal | ||
} // namespace LIBC_NAMESPACE_DECL | ||
|
||
#endif // LLVM_LIBC_SRC___SUPPORT_STRING_CONVERTER_H |
Uh oh!
There was an error while loading. Please reload this page.