diff --git a/gcc/rust/Make-lang.in b/gcc/rust/Make-lang.in index 0e181a673d36..57e82995d9ba 100644 --- a/gcc/rust/Make-lang.in +++ b/gcc/rust/Make-lang.in @@ -88,6 +88,7 @@ GRS_OBJS = \ rust/rust-lint-marklive.o \ rust/rust-hir-type-check-path.o \ rust/rust-compile-intrinsic.o \ + rust/rust-base62.o \ $(END) # removed object files from here diff --git a/gcc/rust/backend/rust-mangle.cc b/gcc/rust/backend/rust-mangle.cc index 0e6643c64aeb..15ac3b1db5fd 100644 --- a/gcc/rust/backend/rust-mangle.cc +++ b/gcc/rust/backend/rust-mangle.cc @@ -1,5 +1,7 @@ #include "rust-mangle.h" #include "fnv-hash.h" +#include "rust-base62.h" +#include // FIXME: Rename those to legacy_* static const std::string kMangledSymbolPrefix = "_ZN"; @@ -154,6 +156,63 @@ v0_simple_type_prefix (const TyTy::BaseType *ty) gcc_unreachable (); } +// Add an underscore-terminated base62 integer to the mangling string. +// This corresponds to the `` grammar in the v0 mangling RFC: +// - 0 is encoded as "_" +// - any other value is encoded as itself minus one in base 62, followed by "_" +static void +v0_add_integer_62 (std::string &mangled, uint64_t x) +{ + if (x > 0) + mangled.append (base62_integer (x - 1)); + + mangled.append ("_"); +} + +// Add a tag-prefixed base62 integer to the mangling string when the +// integer is greater than 0: +// - 0 is encoded as "" (nothing) +// - any other value is encoded as + v0_add_integer_62(itself), that is +// + base62(itself - 1) + '_' +static void +v0_add_opt_integer_62 (std::string &mangled, std::string tag, uint64_t x) +{ + if (x > 0) + { + mangled.append (tag); + v0_add_integer_62 (mangled, x); + } +} + +static void +v0_add_disambiguator (std::string &mangled, uint64_t dis) +{ + v0_add_opt_integer_62 (mangled, "s", dis); +} + +// Add an identifier to the mangled string. This corresponds to the +// `` grammar in the v0 mangling RFC. +static void +v0_add_identifier (std::string &mangled, const std::string &identifier) +{ + // FIXME: gccrs cannot handle unicode identifiers yet, so we never have to + // create mangling for unicode values for now. However, this is handled + // by the v0 mangling scheme. The grammar for unicode identifier is contained + // in , right under the one. If the + // identifier contains unicode values, then an extra "u" needs to be added + // to the mangling string and `punycode` must be used to encode the + // characters. + + mangled += std::to_string (identifier.size ()); + + // If the first character of the identifier is a digit or an underscore, we + // add an extra underscore + if (identifier[0] == '_') + mangled.append ("_"); + + mangled.append (identifier); +} + static std::string v0_type_prefix (const TyTy::BaseType *ty) { @@ -194,7 +253,13 @@ static std::string v0_mangle_item (const TyTy::BaseType *ty, const Resolver::CanonicalPath &path, const std::string &crate_name) { + std::string mangled; + + // FIXME: Add real algorithm once all pieces are implemented auto ty_prefix = v0_type_prefix (ty); + v0_add_identifier (mangled, crate_name); + v0_add_disambiguator (mangled, 62); + gcc_unreachable (); } diff --git a/gcc/rust/util/rust-base62.cc b/gcc/rust/util/rust-base62.cc new file mode 100644 index 000000000000..f1e3202182a4 --- /dev/null +++ b/gcc/rust/util/rust-base62.cc @@ -0,0 +1,48 @@ +// Copyright (C) 2020 Free Software Foundation, Inc. + +// This file is part of GCC. + +// GCC is free software; you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3, or (at your option) any later +// version. + +// GCC is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. + +// You should have received a copy of the GNU General Public License +// along with GCC; see the file COPYING3. If not see +// . + +#include "rust-base62.h" + +#include + +namespace Rust { + +std::string +base62_integer (uint64_t value) +{ + const static std::string base_64 + = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ@$"; + std::string buffer (128, '\0'); + size_t idx = 0; + size_t base = 62; + + do + { + buffer[idx] = base_64[(value % base)]; + idx++; + value = value / base; + } + while (value != 0); + + std::reverse (buffer.begin (), buffer.begin () + idx); + return buffer.substr (0, idx); +} + +} // namespace Rust + +// FIXME: Add unit testing using the selftest framework diff --git a/gcc/rust/util/rust-base62.h b/gcc/rust/util/rust-base62.h new file mode 100644 index 000000000000..7a6e3cf2c926 --- /dev/null +++ b/gcc/rust/util/rust-base62.h @@ -0,0 +1,34 @@ +// Copyright (C) 2020 Free Software Foundation, Inc. + +// This file is part of GCC. + +// GCC is free software; you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3, or (at your option) any later +// version. + +// GCC is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. + +// You should have received a copy of the GNU General Public License +// along with GCC; see the file COPYING3. If not see +// . + +#ifndef RUST_BASE62_H +#define RUST_BASE62_H + +#include + +namespace Rust { + +/** + * Get the Base62 representation of an integer + */ +std::string +base62_integer (uint64_t value); + +} // namespace Rust + +#endif /* !RUST_BASE62_H */