Skip to content

Commit 629b5e7

Browse files
committed
New fmt::Arguments implementation.
1 parent 6d32b29 commit 629b5e7

File tree

2 files changed

+156
-98
lines changed

2 files changed

+156
-98
lines changed

library/core/src/fmt/mod.rs

Lines changed: 153 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,11 @@ use crate::char::EscapeDebugExtArgs;
77
use crate::iter;
88
use crate::marker::PhantomData;
99
use crate::mem;
10-
use crate::num::fmt as numfmt;
10+
use crate::num::{fmt as numfmt, NonZeroUsize};
1111
use crate::ops::Deref;
12+
use crate::ptr;
1213
use crate::result;
14+
use crate::slice;
1315
use crate::str;
1416

1517
mod builders;
@@ -277,15 +279,43 @@ impl<'a> Formatter<'a> {
277279
#[stable(feature = "rust1", since = "1.0.0")]
278280
#[derive(Copy, Clone)]
279281
pub struct Arguments<'a> {
280-
// Format string pieces to print.
281-
pieces: &'a [&'static str],
282+
/// The number of string pieces and place holders combined.
283+
///
284+
/// For example:
285+
/// - 1 for `format_args!("abc")`
286+
/// - 2 for `format_args!("abc{}")`
287+
/// - 3 for `format_args!("abc{}xyz")`
288+
/// - 4 for `format_args!("abc{}xyz{}")`
289+
/// - 5 for `format_args!("abc{}xyz{}123")`
290+
///
291+
/// The first part is always a string piece, but it may be an empty string.
292+
/// E.g. format_args!("{}") has two parts, one empty string piece and one placeholder.
293+
///
294+
/// The number of placeholders is `num_parts / 2`.
295+
/// The number of string pieces is `(num_parts + 1) / 2`.
296+
num_parts: NonZeroUsize,
282297

283-
// Placeholder specs, or `None` if all specs are default (as in "{}{}").
284-
fmt: Option<&'a [rt::Placeholder]>,
298+
/// The string pieces and the placeholders.
299+
///
300+
/// If `num_parts` is one, this stores the &'static str directly.
301+
/// Otherwise, it stores pointers to both the slice of string pieces and the slice of placeholders.
302+
///
303+
/// The length of those slices are determined by the `num_parts` field above.
304+
parts: Parts,
285305

286-
// Dynamic arguments for interpolation, to be interleaved with string
287-
// pieces. (Every argument is preceded by a string piece.)
288-
args: &'a [rt::Argument<'a>],
306+
/// Pointer to the start of the array of arguments.
307+
args: *const rt::Argument<'a>,
308+
}
309+
310+
#[derive(Copy, Clone)]
311+
union Parts {
312+
/// Used if `num_parts == 1`.
313+
string: &'static str,
314+
/// Used if `num_parts > 1`.
315+
///
316+
/// The placaeholders pointer can be null for default placeholders:
317+
/// a placeholder for each argument once, in order, with default formatting options.
318+
strings_and_placeholders: (*const &'static str, *const rt::Placeholder),
289319
}
290320

291321
/// Used by the format_args!() macro to create a fmt::Arguments object.
@@ -294,38 +324,60 @@ pub struct Arguments<'a> {
294324
impl<'a> Arguments<'a> {
295325
#[inline]
296326
#[rustc_const_unstable(feature = "const_fmt_arguments_new", issue = "none")]
297-
pub const fn new_const(pieces: &'a [&'static str]) -> Self {
298-
if pieces.len() > 1 {
299-
panic!("invalid args");
327+
#[cfg(bootstrap)]
328+
pub const fn new_const(strings: &'a [&'static str]) -> Self {
329+
match strings {
330+
[] => Self::new_str(""),
331+
[s] => Self::new_str(s),
332+
_ => panic!("invalid args"),
300333
}
301-
Arguments { pieces, fmt: None, args: &[] }
334+
}
335+
336+
#[inline]
337+
#[rustc_const_unstable(feature = "const_fmt_arguments_new", issue = "none")]
338+
pub const fn new_str(s: &'static str) -> Self {
339+
Self { num_parts: NonZeroUsize::MIN, parts: Parts { string: s }, args: ptr::null() }
302340
}
303341

304342
/// When using the format_args!() macro, this function is used to generate the
305343
/// Arguments structure.
306344
#[inline]
307-
pub fn new_v1(pieces: &'a [&'static str], args: &'a [rt::Argument<'a>]) -> Arguments<'a> {
308-
if pieces.len() < args.len() || pieces.len() > args.len() + 1 {
309-
panic!("invalid args");
345+
pub fn new_v1(strings: &'a [&'static str], args: &'a [rt::Argument<'a>]) -> Self {
346+
// The number of strings and args should be the same,
347+
// except there may be one additional string after the last arg.
348+
assert!(strings.len() == args.len() || strings.len() == args.len() + 1, "invalid args");
349+
match NonZeroUsize::new(strings.len() + args.len()) {
350+
None => Self::new_str(""),
351+
Some(NonZeroUsize::MIN) => Self::new_str(strings[0]),
352+
Some(num_parts) => Self {
353+
num_parts,
354+
parts: Parts { strings_and_placeholders: (strings.as_ptr(), ptr::null()) },
355+
args: args.as_ptr(),
356+
},
310357
}
311-
Arguments { pieces, fmt: None, args }
312358
}
313359

314360
/// This function is used to specify nonstandard formatting parameters.
315361
///
316362
/// An `rt::UnsafeArg` is required because the following invariants must be held
317363
/// in order for this function to be safe:
318-
/// 1. The `pieces` slice must be at least as long as `fmt`.
319-
/// 2. Every `rt::Placeholder::position` value within `fmt` must be a valid index of `args`.
320-
/// 3. Every `rt::Count::Param` within `fmt` must contain a valid index of `args`.
364+
/// 1. `placeholders` must be nonempty.
365+
/// 2. The `strings` slice must be at least as long as `placeholders`.
366+
/// 3. Every `rt::Placeholder::position` value within `placeholders` must be a valid index of `args`.
367+
/// 4. Every `rt::Count::Param` within `placeholders` must contain a valid index of `args`.
321368
#[inline]
322369
pub fn new_v1_formatted(
323-
pieces: &'a [&'static str],
370+
strings: &'a [&'static str],
324371
args: &'a [rt::Argument<'a>],
325-
fmt: &'a [rt::Placeholder],
372+
placeholders: &'a [rt::Placeholder],
326373
_unsafe_arg: rt::UnsafeArg,
327-
) -> Arguments<'a> {
328-
Arguments { pieces, fmt: Some(fmt), args }
374+
) -> Self {
375+
Self {
376+
// SAFETY: The caller must guarantee `placeholders` is nonempty.
377+
num_parts: unsafe { NonZeroUsize::new_unchecked(strings.len() + placeholders.len()) },
378+
parts: Parts { strings_and_placeholders: (strings.as_ptr(), placeholders.as_ptr()) },
379+
args: args.as_ptr(),
380+
}
329381
}
330382

331383
/// Estimates the length of the formatted text.
@@ -334,22 +386,37 @@ impl<'a> Arguments<'a> {
334386
/// when using `format!`. Note: this is neither the lower nor upper bound.
335387
#[inline]
336388
pub fn estimated_capacity(&self) -> usize {
337-
let pieces_length: usize = self.pieces.iter().map(|x| x.len()).sum();
338-
339-
if self.args.is_empty() {
340-
pieces_length
341-
} else if !self.pieces.is_empty() && self.pieces[0].is_empty() && pieces_length < 16 {
342-
// If the format string starts with an argument,
343-
// don't preallocate anything, unless length
344-
// of pieces is significant.
345-
0
389+
let num_parts = self.num_parts.get();
390+
391+
if num_parts == 1 {
392+
// SAFETY: With num_parts == 1, the `parts` field stores just the string.
393+
unsafe { self.parts.string }.len()
346394
} else {
347-
// There are some arguments, so any additional push
348-
// will reallocate the string. To avoid that,
349-
// we're "pre-doubling" the capacity here.
350-
pieces_length.checked_mul(2).unwrap_or(0)
395+
// SAFETY: With num_parts > 1, the `parts` field stores the pointers to the strings and
396+
// placeholder slices.
397+
let strings = unsafe {
398+
slice::from_raw_parts(self.parts.strings_and_placeholders.0, (num_parts + 1) / 2)
399+
};
400+
let strings_length: usize = strings.iter().map(|s| s.len()).sum();
401+
if strings[0].is_empty() && strings_length < 16 {
402+
// If the format string starts with an argument,
403+
// don't preallocate anything, unless length
404+
// of strings is significant.
405+
0
406+
} else {
407+
// There are some arguments, so any additional push
408+
// will reallocate the string. To avoid that,
409+
// we're "pre-doubling" the capacity here.
410+
strings_length.checked_mul(2).unwrap_or(0)
411+
}
351412
}
352413
}
414+
415+
#[inline(always)]
416+
unsafe fn arg(&self, n: usize) -> &rt::Argument<'a> {
417+
// SAFETY: Caller needs to privde a valid index.
418+
unsafe { &*self.args.add(n) }
419+
}
353420
}
354421

355422
impl<'a> Arguments<'a> {
@@ -400,10 +467,11 @@ impl<'a> Arguments<'a> {
400467
#[must_use]
401468
#[inline]
402469
pub const fn as_str(&self) -> Option<&'static str> {
403-
match (self.pieces, self.args) {
404-
([], []) => Some(""),
405-
([s], []) => Some(s),
406-
_ => None,
470+
if self.num_parts.get() == 1 {
471+
// SAFETY: With num_parts == 1, the `parts` field stores just the string.
472+
Some(unsafe { self.parts.string })
473+
} else {
474+
None
407475
}
408476
}
409477
}
@@ -1077,80 +1145,70 @@ pub trait UpperExp {
10771145
///
10781146
/// [`write!`]: crate::write!
10791147
#[stable(feature = "rust1", since = "1.0.0")]
1080-
pub fn write(output: &mut dyn Write, args: Arguments<'_>) -> Result {
1148+
pub fn write(output: &mut dyn Write, fmt: Arguments<'_>) -> Result {
10811149
let mut formatter = Formatter::new(output);
1082-
let mut idx = 0;
1083-
1084-
match args.fmt {
1085-
None => {
1086-
// We can use default formatting parameters for all arguments.
1087-
for (i, arg) in args.args.iter().enumerate() {
1088-
// SAFETY: args.args and args.pieces come from the same Arguments,
1089-
// which guarantees the indexes are always within bounds.
1090-
let piece = unsafe { args.pieces.get_unchecked(i) };
1091-
if !piece.is_empty() {
1092-
formatter.buf.write_str(*piece)?;
1093-
}
1094-
arg.fmt(&mut formatter)?;
1095-
idx += 1;
1150+
1151+
if let Some(s) = fmt.as_str() {
1152+
return formatter.buf.write_str(s);
1153+
}
1154+
1155+
// SAFETY: Since as_str() returned None, we know that `fmt.parts` contains the
1156+
// strings and placeholders pointers.
1157+
let (strings, placeholders) = unsafe { fmt.parts.strings_and_placeholders };
1158+
1159+
// Iterate over all parts (string, placeholder, string, ...).
1160+
// Even numbered parts are strings, odd numbered parts are placeholders.
1161+
for i in 0..fmt.num_parts.get() {
1162+
if i % 2 == 0 {
1163+
// SAFETY: The Arguments type guarantees the indexes are always within bounds.
1164+
let string = unsafe { &*strings.add(i / 2) };
1165+
if !string.is_empty() {
1166+
formatter.buf.write_str(string)?;
10961167
}
1097-
}
1098-
Some(fmt) => {
1099-
// Every spec has a corresponding argument that is preceded by
1100-
// a string piece.
1101-
for (i, arg) in fmt.iter().enumerate() {
1102-
// SAFETY: fmt and args.pieces come from the same Arguments,
1103-
// which guarantees the indexes are always within bounds.
1104-
let piece = unsafe { args.pieces.get_unchecked(i) };
1105-
if !piece.is_empty() {
1106-
formatter.buf.write_str(*piece)?;
1107-
}
1108-
// SAFETY: arg and args.args come from the same Arguments,
1109-
// which guarantees the indexes are always within bounds.
1110-
unsafe { run(&mut formatter, arg, args.args) }?;
1111-
idx += 1;
1168+
} else {
1169+
if placeholders.is_null() {
1170+
// Use default placeholders: each argument once, in order, with default formatting.
1171+
// SAFETY: The Arguments type guarantees the indexes are always within bounds.
1172+
unsafe { fmt.arg(i / 2) }.fmt(&mut formatter)?;
1173+
} else {
1174+
// SAFETY: The Arguments type guarantees the indexes are always within bounds.
1175+
unsafe { run(&mut formatter, &fmt, &*placeholders.add(i / 2)) }?;
11121176
}
11131177
}
11141178
}
11151179

1116-
// There can be only one trailing string piece left.
1117-
if let Some(piece) = args.pieces.get(idx) {
1118-
formatter.buf.write_str(*piece)?;
1119-
}
1120-
11211180
Ok(())
11221181
}
11231182

1124-
unsafe fn run(fmt: &mut Formatter<'_>, arg: &rt::Placeholder, args: &[rt::Argument<'_>]) -> Result {
1125-
fmt.fill = arg.fill;
1126-
fmt.align = arg.align;
1127-
fmt.flags = arg.flags;
1128-
// SAFETY: arg and args come from the same Arguments,
1129-
// which guarantees the indexes are always within bounds.
1183+
unsafe fn run(
1184+
out: &mut Formatter<'_>,
1185+
fmt: &Arguments<'_>,
1186+
placeholder: &rt::Placeholder,
1187+
) -> Result {
1188+
out.fill = placeholder.fill;
1189+
out.align = placeholder.align;
1190+
out.flags = placeholder.flags;
1191+
1192+
// SAFETY: The Arguments type guarantees the indexes are always within bounds.
11301193
unsafe {
1131-
fmt.width = getcount(args, &arg.width);
1132-
fmt.precision = getcount(args, &arg.precision);
1194+
out.width = getcount(fmt, &placeholder.width);
1195+
out.precision = getcount(fmt, &placeholder.precision);
11331196
}
11341197

1135-
// Extract the correct argument
1136-
debug_assert!(arg.position < args.len());
1137-
// SAFETY: arg and args come from the same Arguments,
1138-
// which guarantees its index is always within bounds.
1139-
let value = unsafe { args.get_unchecked(arg.position) };
1198+
// SAFETY: The Arguments type guarantees the indexes are always within bounds.
1199+
let arg = unsafe { fmt.arg(placeholder.position) };
11401200

1141-
// Then actually do some printing
1142-
value.fmt(fmt)
1201+
arg.fmt(out)
11431202
}
11441203

1145-
unsafe fn getcount(args: &[rt::Argument<'_>], cnt: &rt::Count) -> Option<usize> {
1204+
unsafe fn getcount(fmt: &Arguments<'_>, cnt: &rt::Count) -> Option<usize> {
11461205
match *cnt {
11471206
rt::Count::Is(n) => Some(n),
11481207
rt::Count::Implied => None,
11491208
rt::Count::Param(i) => {
1150-
debug_assert!(i < args.len());
1151-
// SAFETY: cnt and args come from the same Arguments,
1152-
// which guarantees this index is always within bounds.
1153-
unsafe { args.get_unchecked(i).as_usize() }
1209+
// SAFETY: The Arguments type guarantees the indexes are always within bounds,
1210+
// and the caller must give a `Count` from this same `Arguments` object.
1211+
unsafe { fmt.arg(i).as_usize() }
11541212
}
11551213
}
11561214
}

library/core/src/panicking.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ pub const fn panic(expr: &'static str) -> ! {
124124
// truncation and padding (even though none is used here). Using
125125
// Arguments::new_v1 may allow the compiler to omit Formatter::pad from the
126126
// output binary, saving up to a few kilobytes.
127-
panic_fmt(fmt::Arguments::new_const(&[expr]));
127+
panic_fmt(fmt::Arguments::new_str(expr));
128128
}
129129

130130
/// Like `panic`, but without unwinding and track_caller to reduce the impact on codesize.
@@ -133,15 +133,15 @@ pub const fn panic(expr: &'static str) -> ! {
133133
#[lang = "panic_nounwind"] // needed by codegen for non-unwinding panics
134134
#[rustc_nounwind]
135135
pub fn panic_nounwind(expr: &'static str) -> ! {
136-
panic_nounwind_fmt(fmt::Arguments::new_const(&[expr]), /* force_no_backtrace */ false);
136+
panic_nounwind_fmt(fmt::Arguments::new_str(expr), /* force_no_backtrace */ false);
137137
}
138138

139139
/// Like `panic_nounwind`, but also inhibits showing a backtrace.
140140
#[cfg_attr(not(feature = "panic_immediate_abort"), inline(never), cold)]
141141
#[cfg_attr(feature = "panic_immediate_abort", inline)]
142142
#[rustc_nounwind]
143143
pub fn panic_nounwind_nobacktrace(expr: &'static str) -> ! {
144-
panic_nounwind_fmt(fmt::Arguments::new_const(&[expr]), /* force_no_backtrace */ true);
144+
panic_nounwind_fmt(fmt::Arguments::new_str(expr), /* force_no_backtrace */ true);
145145
}
146146

147147
#[inline]

0 commit comments

Comments
 (0)