Skip to content

Commit bbbac30

Browse files
committed
gccrs: Add punycode encoding to v0 mangling
gcc/rust/ChangeLog: * backend/rust-mangle.cc (v0_add_identifier): Added punycode encoding (v0_mangle_item): Likewise. * lex/rust-lex.cc (assert_source_content): Change param type to const (test_buffer_input_source): Change param type to const (test_file_input_source): Change param type to const * resolve/rust-ast-resolve-toplevel.h: fix typo * rust-session-manager.cc (Session::load_extern_crate): fix typo * util/rust-canonical-path.h: fix typo * util/rust-hir-map.cc (NodeMapping::get_error): fix typo (Mappings::Mappings): fix typo * util/rust-mapping-common.h (UNKNOWN_CREATENUM): fix typo (UNKNOWN_CRATENUM): Change 0 to UINT32_MAX Signed-off-by: Raiki Tamura <[email protected]>
1 parent e55113e commit bbbac30

7 files changed

+53
-26
lines changed

gcc/rust/backend/rust-mangle.cc

Lines changed: 40 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
#include "rust-mangle.h"
22
#include "fnv-hash.h"
3+
#include "optional.h"
34
#include "rust-base62.h"
5+
#include "rust-diagnostics.h"
6+
#include "rust-unicode.h"
7+
#include "rust-punycode.h"
48

59
// FIXME: Rename those to legacy_*
610
static const std::string kMangledSymbolPrefix = "_ZN";
@@ -236,22 +240,43 @@ v0_add_disambiguator (std::string &mangled, uint64_t dis)
236240
static void
237241
v0_add_identifier (std::string &mangled, const std::string &identifier)
238242
{
239-
// FIXME: gccrs cannot handle unicode identifiers yet, so we never have to
240-
// create mangling for unicode values for now. However, this is handled
241-
// by the v0 mangling scheme. The grammar for unicode identifier is
242-
// contained in <undisambiguated-identifier>, right under the <identifier>
243-
// one. If the identifier contains unicode values, then an extra "u" needs
244-
// to be added to the mangling string and `punycode` must be used to encode
245-
// the characters.
246-
247-
mangled += std::to_string (identifier.size ());
248-
243+
// The grammar for unicode identifier is contained in
244+
// <undisambiguated-identifier>, right under the <identifier> one. If the
245+
// identifier contains unicode values, then an extra "u" needs to be added to
246+
// the mangling string and `punycode` must be used to encode the characters.
247+
tl::optional<Utf8String> uident_opt
248+
= Utf8String::make_utf8_string (identifier);
249+
if (uident_opt == tl::nullopt)
250+
rust_unreachable ();
251+
tl::optional<std::string> punycode_opt
252+
= encode_punycode (uident_opt.value ());
253+
if (punycode_opt == tl::nullopt)
254+
rust_unreachable ();
255+
256+
bool is_ascii_ident = true;
257+
for (auto c : uident_opt.value ().get_chars ())
258+
if (c.value > 127)
259+
{
260+
is_ascii_ident = false;
261+
break;
262+
}
263+
264+
std::string punycode = punycode_opt.value ();
265+
// remove tailing hyphen
266+
if (punycode.back () == '-')
267+
punycode.pop_back ();
268+
// replace hyphens in punycode with underscores
269+
std::replace (punycode.begin (), punycode.end (), '-', '_');
270+
271+
if (!is_ascii_ident)
272+
mangled.append ("u");
273+
274+
mangled += std::to_string (punycode.size ());
249275
// If the first character of the identifier is a digit or an underscore, we
250276
// add an extra underscore
251-
if (identifier[0] == '_')
252-
mangled.append ("_");
253-
254-
mangled.append (identifier);
277+
if (punycode[0] == '_')
278+
mangled += "_";
279+
mangled += punycode;
255280
}
256281

257282
static std::string
@@ -287,9 +312,9 @@ v0_mangle_item (const TyTy::BaseType *ty, const Resolver::CanonicalPath &path)
287312

288313
std::string mangled;
289314
// FIXME: Add real algorithm once all pieces are implemented
290-
auto ty_prefix = v0_type_prefix (ty);
291315
v0_add_identifier (mangled, crate_name);
292316
v0_add_disambiguator (mangled, 62);
317+
auto ty_prefix = v0_type_prefix (ty);
293318

294319
rust_unreachable ();
295320
}

gcc/rust/lex/rust-lex.cc

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2534,7 +2534,8 @@ namespace selftest {
25342534

25352535
// Checks if `src` has the same contents as the given characters
25362536
void
2537-
assert_source_content (Rust::InputSource &src, std::vector<uint32_t> expected)
2537+
assert_source_content (Rust::InputSource &src,
2538+
const std::vector<uint32_t> &expected)
25382539
{
25392540
Rust::Codepoint src_char = src.next ();
25402541
for (auto expected_char : expected)
@@ -2550,14 +2551,15 @@ assert_source_content (Rust::InputSource &src, std::vector<uint32_t> expected)
25502551
}
25512552

25522553
void
2553-
test_buffer_input_source (std::string str, std::vector<uint32_t> expected)
2554+
test_buffer_input_source (std::string str,
2555+
const std::vector<uint32_t> &expected)
25542556
{
25552557
Rust::BufferInputSource source (str, 0);
25562558
assert_source_content (source, expected);
25572559
}
25582560

25592561
void
2560-
test_file_input_source (std::string str, std::vector<uint32_t> expected)
2562+
test_file_input_source (std::string str, const std::vector<uint32_t> &expected)
25612563
{
25622564
FILE *tmpf = tmpfile ();
25632565
// Moves to the first character

gcc/rust/resolve/rust-ast-resolve-toplevel.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -430,7 +430,7 @@ class ResolveTopLevel : public ResolverBase
430430
}
431431
else
432432
{
433-
CrateNum found_crate_num = UNKNOWN_CREATENUM;
433+
CrateNum found_crate_num = UNKNOWN_CRATENUM;
434434
bool found
435435
= mappings->lookup_crate_name (extern_crate.get_referenced_crate (),
436436
found_crate_num);

gcc/rust/rust-session-manager.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -979,7 +979,7 @@ NodeId
979979
Session::load_extern_crate (const std::string &crate_name, location_t locus)
980980
{
981981
// has it already been loaded?
982-
CrateNum found_crate_num = UNKNOWN_CREATENUM;
982+
CrateNum found_crate_num = UNKNOWN_CRATENUM;
983983
bool found = mappings->lookup_crate_name (crate_name, found_crate_num);
984984
if (found)
985985
{

gcc/rust/util/rust-canonical-path.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ class CanonicalPath
5858
{
5959
rust_assert (!path.empty ());
6060
return CanonicalPath ({std::pair<NodeId, std::string> (id, path)},
61-
UNKNOWN_CREATENUM);
61+
UNKNOWN_CRATENUM);
6262
}
6363

6464
static CanonicalPath
@@ -88,7 +88,7 @@ class CanonicalPath
8888

8989
static CanonicalPath create_empty ()
9090
{
91-
return CanonicalPath ({}, UNKNOWN_CREATENUM);
91+
return CanonicalPath ({}, UNKNOWN_CRATENUM);
9292
}
9393

9494
bool is_empty () const { return segs.size () == 0; }
@@ -171,7 +171,7 @@ class CanonicalPath
171171

172172
CrateNum get_crate_num () const
173173
{
174-
rust_assert (crate_num != UNKNOWN_CREATENUM);
174+
rust_assert (crate_num != UNKNOWN_CRATENUM);
175175
return crate_num;
176176
}
177177

gcc/rust/util/rust-hir-map.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ namespace Analysis {
2929
NodeMapping
3030
NodeMapping::get_error ()
3131
{
32-
return NodeMapping (UNKNOWN_CREATENUM, UNKNOWN_NODEID, UNKNOWN_HIRID,
32+
return NodeMapping (UNKNOWN_CRATENUM, UNKNOWN_NODEID, UNKNOWN_HIRID,
3333
UNKNOWN_LOCAL_DEFID);
3434
}
3535

@@ -94,7 +94,7 @@ static const HirId kDefaultHirIdBegin = 1;
9494
static const HirId kDefaultCrateNumBegin = 0;
9595

9696
Mappings::Mappings ()
97-
: crateNumItr (kDefaultCrateNumBegin), currentCrateNum (UNKNOWN_CREATENUM),
97+
: crateNumItr (kDefaultCrateNumBegin), currentCrateNum (UNKNOWN_CRATENUM),
9898
hirIdIter (kDefaultHirIdBegin), nodeIdIter (kDefaultNodeIdBegin)
9999
{
100100
Analysis::NodeMapping node (0, 0, 0, 0);

gcc/rust/util/rust-mapping-common.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ struct DefId
6161
}
6262
};
6363

64-
#define UNKNOWN_CREATENUM ((uint32_t) (0))
64+
#define UNKNOWN_CRATENUM ((uint32_t) (UINT32_MAX))
6565
#define UNKNOWN_NODEID ((uint32_t) (0))
6666
#define UNKNOWN_HIRID ((uint32_t) (0))
6767
#define UNKNOWN_LOCAL_DEFID ((uint32_t) (0))

0 commit comments

Comments
 (0)