Skip to content

Commit 649e3e0

Browse files
Merge #747
747: Base v0 mangling grammar r=philberty a=CohenArthur This PR adds base functions to deal with the v0 mangling grammar, [found here](https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html#syntax-of-mangled-names). I have a few questions regarding this implementation: 1/ Is there any existing implementation for the base62 algorithm used here? This is directly adapted from [rustc's base_n module](https://github.com/rust-lang/rust/blob/6f53ddfa74ac3c10ceb63ad4a7a9c95e55853c87/compiler/rustc_data_structures/src/base_n.rs#L16) which I'm assuming is relatively standard and might already exist in the compiler. I haven't been able to find it however. 2/ gccrs cannot yet deal with unicode identifiers, as pointed out by `@bjorn3` in #418. This means that a big chunk of the `v0_add_identifier` implementation is missing. Should it be added in this PR too? 3/ As mentionned in zulip, it would be great to be able to create unit tests for this piece of code. It would be quite easy to generate a bunch of base62 strings and ensure that the algorithm here matches with them. Co-authored-by: CohenArthur <[email protected]>
2 parents a1a4506 + fd9d37c commit 649e3e0

File tree

4 files changed

+148
-0
lines changed

4 files changed

+148
-0
lines changed

gcc/rust/Make-lang.in

+1
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ GRS_OBJS = \
8888
rust/rust-lint-marklive.o \
8989
rust/rust-hir-type-check-path.o \
9090
rust/rust-compile-intrinsic.o \
91+
rust/rust-base62.o \
9192
$(END)
9293
# removed object files from here
9394

gcc/rust/backend/rust-mangle.cc

+65
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#include "rust-mangle.h"
22
#include "fnv-hash.h"
3+
#include "rust-base62.h"
4+
#include <algorithm>
35

46
// FIXME: Rename those to legacy_*
57
static const std::string kMangledSymbolPrefix = "_ZN";
@@ -154,6 +156,63 @@ v0_simple_type_prefix (const TyTy::BaseType *ty)
154156
gcc_unreachable ();
155157
}
156158

159+
// Add an underscore-terminated base62 integer to the mangling string.
160+
// This corresponds to the `<base-62-number>` grammar in the v0 mangling RFC:
161+
// - 0 is encoded as "_"
162+
// - any other value is encoded as itself minus one in base 62, followed by "_"
163+
static void
164+
v0_add_integer_62 (std::string &mangled, uint64_t x)
165+
{
166+
if (x > 0)
167+
mangled.append (base62_integer (x - 1));
168+
169+
mangled.append ("_");
170+
}
171+
172+
// Add a tag-prefixed base62 integer to the mangling string when the
173+
// integer is greater than 0:
174+
// - 0 is encoded as "" (nothing)
175+
// - any other value is encoded as <tag> + v0_add_integer_62(itself), that is
176+
// <tag> + base62(itself - 1) + '_'
177+
static void
178+
v0_add_opt_integer_62 (std::string &mangled, std::string tag, uint64_t x)
179+
{
180+
if (x > 0)
181+
{
182+
mangled.append (tag);
183+
v0_add_integer_62 (mangled, x);
184+
}
185+
}
186+
187+
static void
188+
v0_add_disambiguator (std::string &mangled, uint64_t dis)
189+
{
190+
v0_add_opt_integer_62 (mangled, "s", dis);
191+
}
192+
193+
// Add an identifier to the mangled string. This corresponds to the
194+
// `<identifier>` grammar in the v0 mangling RFC.
195+
static void
196+
v0_add_identifier (std::string &mangled, const std::string &identifier)
197+
{
198+
// FIXME: gccrs cannot handle unicode identifiers yet, so we never have to
199+
// create mangling for unicode values for now. However, this is handled
200+
// by the v0 mangling scheme. The grammar for unicode identifier is contained
201+
// in <undisambiguated-identifier>, right under the <identifier> one. If the
202+
// identifier contains unicode values, then an extra "u" needs to be added
203+
// to the mangling string and `punycode` must be used to encode the
204+
// characters.
205+
206+
mangled += std::to_string (identifier.size ());
207+
208+
// If the first character of the identifier is a digit or an underscore, we
209+
// add an extra underscore
210+
if (identifier[0] == '_')
211+
mangled.append ("_");
212+
213+
mangled.append (identifier);
214+
}
215+
157216
static std::string
158217
v0_type_prefix (const TyTy::BaseType *ty)
159218
{
@@ -194,7 +253,13 @@ static std::string
194253
v0_mangle_item (const TyTy::BaseType *ty, const Resolver::CanonicalPath &path,
195254
const std::string &crate_name)
196255
{
256+
std::string mangled;
257+
258+
// FIXME: Add real algorithm once all pieces are implemented
197259
auto ty_prefix = v0_type_prefix (ty);
260+
v0_add_identifier (mangled, crate_name);
261+
v0_add_disambiguator (mangled, 62);
262+
198263
gcc_unreachable ();
199264
}
200265

gcc/rust/util/rust-base62.cc

+48
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
// Copyright (C) 2020 Free Software Foundation, Inc.
2+
3+
// This file is part of GCC.
4+
5+
// GCC is free software; you can redistribute it and/or modify it under
6+
// the terms of the GNU General Public License as published by the Free
7+
// Software Foundation; either version 3, or (at your option) any later
8+
// version.
9+
10+
// GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11+
// WARRANTY; without even the implied warranty of MERCHANTABILITY or
12+
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13+
// for more details.
14+
15+
// You should have received a copy of the GNU General Public License
16+
// along with GCC; see the file COPYING3. If not see
17+
// <http://www.gnu.org/licenses/>.
18+
19+
#include "rust-base62.h"
20+
21+
#include <algorithm>
22+
23+
namespace Rust {
24+
25+
std::string
26+
base62_integer (uint64_t value)
27+
{
28+
const static std::string base_64
29+
= "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ@$";
30+
std::string buffer (128, '\0');
31+
size_t idx = 0;
32+
size_t base = 62;
33+
34+
do
35+
{
36+
buffer[idx] = base_64[(value % base)];
37+
idx++;
38+
value = value / base;
39+
}
40+
while (value != 0);
41+
42+
std::reverse (buffer.begin (), buffer.begin () + idx);
43+
return buffer.substr (0, idx);
44+
}
45+
46+
} // namespace Rust
47+
48+
// FIXME: Add unit testing using the selftest framework

gcc/rust/util/rust-base62.h

+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// Copyright (C) 2020 Free Software Foundation, Inc.
2+
3+
// This file is part of GCC.
4+
5+
// GCC is free software; you can redistribute it and/or modify it under
6+
// the terms of the GNU General Public License as published by the Free
7+
// Software Foundation; either version 3, or (at your option) any later
8+
// version.
9+
10+
// GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11+
// WARRANTY; without even the implied warranty of MERCHANTABILITY or
12+
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13+
// for more details.
14+
15+
// You should have received a copy of the GNU General Public License
16+
// along with GCC; see the file COPYING3. If not see
17+
// <http://www.gnu.org/licenses/>.
18+
19+
#ifndef RUST_BASE62_H
20+
#define RUST_BASE62_H
21+
22+
#include <string>
23+
24+
namespace Rust {
25+
26+
/**
27+
* Get the Base62 representation of an integer
28+
*/
29+
std::string
30+
base62_integer (uint64_t value);
31+
32+
} // namespace Rust
33+
34+
#endif /* !RUST_BASE62_H */

0 commit comments

Comments
 (0)