Skip to content

Commit 3c71259

Browse files
committed
a
1 parent a3895b6 commit 3c71259

File tree

4 files changed

+48
-14
lines changed

4 files changed

+48
-14
lines changed

gcc/rust/lex/rust-codepoint.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
#include "rust-system.h"
2323

2424
namespace Rust {
25+
26+
// FIXME: move this to rust-unicode.h?
2527
struct Codepoint
2628
{
2729
uint32_t value;

gcc/rust/lex/rust-lex.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,14 @@ class Lexer
334334
return c;
335335
}
336336
}
337+
338+
tl::optional<std::vector<Codepoint>> get_chars ()
339+
{
340+
if (is_valid ())
341+
return {chars};
342+
else
343+
return tl::nullopt;
344+
}
337345
};
338346

339347
class FileInputSource : public InputSource

gcc/rust/rust-session-manager.cc

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
#include "rust-early-name-resolver.h"
4343
#include "rust-cfg-strip.h"
4444
#include "rust-expand-visitor.h"
45+
#include "rust-unicode.h"
4546

4647
#include "diagnostic.h"
4748
#include "input.h"
@@ -113,29 +114,32 @@ infer_crate_name (const std::string &filename)
113114
static bool
114115
validate_crate_name (const std::string &crate_name, Error &error)
115116
{
116-
if (crate_name.empty ())
117+
Utf8String utf8_name = {crate_name};
118+
auto uchars = utf8_name.get_chars ();
119+
120+
if (!uchars.has_value ())
121+
{
122+
error = Error (UNDEF_LOCATION, "crate name is not a valid UTF-8 string");
123+
return false;
124+
}
125+
if (uchars.value ().empty ())
117126
{
118127
error = Error (UNDEF_LOCATION, "crate name cannot be empty");
119128
return false;
120129
}
121-
if (crate_name.length () > kMaxNameLength)
130+
if (uchars.value ().size () > kMaxNameLength)
122131
{
123132
error = Error (UNDEF_LOCATION, "crate name cannot exceed %lu characters",
124133
(unsigned long) kMaxNameLength);
125134
return false;
126135
}
127-
for (auto &c : crate_name)
136+
for (Codepoint &c : uchars.value ())
128137
{
129-
std::cout << "check for " << c.as_string () << std::endl;
130-
std::cout << is_alphabetic (c.value);
131-
std::cout << "is_al_ok" << std::endl;
132-
std::cout << is_numeric (c.value);
133-
std::cout << "is_num_ok" << std::endl;
134138
if (!(is_alphabetic (c.value) || is_numeric (c.value) || c.value == '_'))
135139
{
136140
error = Error (UNDEF_LOCATION,
137-
"invalid character %<%c%> in crate name: %<%s%>", c,
138-
crate_name.c_str ());
141+
"invalid character %<%s%> in crate name: %<%s%>",
142+
c.as_string ().c_str (), crate_name.c_str ());
139143
return false;
140144
}
141145
}
@@ -1231,19 +1235,20 @@ namespace selftest {
12311235
void
12321236
rust_crate_name_validation_test (void)
12331237
{
1234-
std::cout << "1 is numeric?" << Rust::is_numeric ('1') << std::endl;
1235-
12361238
auto error = Rust::Error (UNDEF_LOCATION, std::string ());
12371239
ASSERT_TRUE (Rust::validate_crate_name ("example", error));
12381240
ASSERT_TRUE (Rust::validate_crate_name ("abcdefg_1234", error));
12391241
ASSERT_TRUE (Rust::validate_crate_name ("1", error));
1240-
// FIXME: The next test does not pass as of current implementation
1241-
// ASSERT_TRUE (Rust::CompileOptions::validate_crate_name ("惊吓"));
1242+
ASSERT_TRUE (Rust::validate_crate_name ("クレート", error));
1243+
ASSERT_TRUE (Rust::validate_crate_name ("Sōkrátēs", error));
1244+
12421245
// NOTE: - is not allowed in the crate name ...
12431246

12441247
ASSERT_FALSE (Rust::validate_crate_name ("abcdefg-1234", error));
12451248
ASSERT_FALSE (Rust::validate_crate_name ("a+b", error));
12461249
ASSERT_FALSE (Rust::validate_crate_name ("/a+b/", error));
1250+
ASSERT_FALSE (Rust::validate_crate_name ("😸++", error));
1251+
ASSERT_FALSE (Rust::validate_crate_name ("", error));
12471252

12481253
/* Tests for crate name inference */
12491254
ASSERT_EQ (Rust::infer_crate_name ("c.rs"), "c");

gcc/rust/util/rust-unicode.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,29 @@
1919
#ifndef RUST_UNICODE_H
2020
#define RUST_UNICODE_H
2121

22+
#include "optional.h"
2223
#include "rust-system.h"
24+
#include "rust-lex.h"
2325

2426
namespace Rust {
2527

28+
class Utf8String
29+
{
30+
private:
31+
tl::optional<std::vector<Codepoint>> chars;
32+
33+
public:
34+
Utf8String (const std::string &maybe_utf8)
35+
{
36+
Lexer::BufferInputSource input_source = {maybe_utf8, 0};
37+
chars = input_source.get_chars ();
38+
}
39+
40+
// Returns UTF codepoints when string is valid as UTF-8, returns nullopt
41+
// otherwise.
42+
tl::optional<std::vector<Codepoint>> get_chars () const { return chars; }
43+
};
44+
2645
// TODO: add function nfc_normalize
2746

2847
bool

0 commit comments

Comments
 (0)