Skip to content

Commit 5505565

Browse files
authored
Add AsciiSet::EMPTY and boolean operators (#969)
* Add AsciiSet::EMPTY and impl ops::Add for AsciiSet In RFCs, the sets of characters to percent-encode are often defined as the union of multiple sets. This change adds an `EMPTY` constant to `AsciiSet` and implements the `Add` trait for `AsciiSet` so that sets can be combined with the `+` operator. AsciiSet now derives `Debug`, `PartialEq`, and `Eq` so that it can be used in tests. * implement ops::Not for AsciiSet * Add const functions for negation / union of AsciiSet
1 parent 9404ff5 commit 5505565

File tree

1 file changed

+83
-1
lines changed

1 file changed

+83
-1
lines changed

percent_encoding/src/lib.rs

+83-1
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ use alloc::{
5151
string::String,
5252
vec::Vec,
5353
};
54-
use core::{fmt, mem, slice, str};
54+
use core::{fmt, mem, ops, slice, str};
5555

5656
/// Represents a set of characters or bytes in the ASCII range.
5757
///
@@ -66,6 +66,7 @@ use core::{fmt, mem, slice, str};
6666
/// /// https://url.spec.whatwg.org/#fragment-percent-encode-set
6767
/// const FRAGMENT: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`');
6868
/// ```
69+
#[derive(Debug, PartialEq, Eq)]
6970
pub struct AsciiSet {
7071
mask: [Chunk; ASCII_RANGE_LEN / BITS_PER_CHUNK],
7172
}
@@ -77,6 +78,11 @@ const ASCII_RANGE_LEN: usize = 0x80;
7778
const BITS_PER_CHUNK: usize = 8 * mem::size_of::<Chunk>();
7879

7980
impl AsciiSet {
81+
/// An empty set.
82+
pub const EMPTY: AsciiSet = AsciiSet {
83+
mask: [0; ASCII_RANGE_LEN / BITS_PER_CHUNK],
84+
};
85+
8086
/// Called with UTF-8 bytes rather than code points.
8187
/// Not used for non-ASCII bytes.
8288
const fn contains(&self, byte: u8) -> bool {
@@ -100,6 +106,39 @@ impl AsciiSet {
100106
mask[byte as usize / BITS_PER_CHUNK] &= !(1 << (byte as usize % BITS_PER_CHUNK));
101107
AsciiSet { mask }
102108
}
109+
110+
/// Return the union of two sets.
111+
pub const fn union(&self, other: Self) -> Self {
112+
let mask = [
113+
self.mask[0] | other.mask[0],
114+
self.mask[1] | other.mask[1],
115+
self.mask[2] | other.mask[2],
116+
self.mask[3] | other.mask[3],
117+
];
118+
AsciiSet { mask }
119+
}
120+
121+
/// Return the negation of the set.
122+
pub const fn complement(&self) -> Self {
123+
let mask = [!self.mask[0], !self.mask[1], !self.mask[2], !self.mask[3]];
124+
AsciiSet { mask }
125+
}
126+
}
127+
128+
impl ops::Add for AsciiSet {
129+
type Output = Self;
130+
131+
fn add(self, other: Self) -> Self {
132+
self.union(other)
133+
}
134+
}
135+
136+
impl ops::Not for AsciiSet {
137+
type Output = Self;
138+
139+
fn not(self) -> Self {
140+
self.complement()
141+
}
103142
}
104143

105144
/// The set of 0x00 to 0x1F (C0 controls), and 0x7F (DEL).
@@ -478,3 +517,46 @@ fn decode_utf8_lossy(input: Cow<'_, [u8]>) -> Cow<'_, str> {
478517
}
479518
}
480519
}
520+
521+
#[cfg(test)]
522+
mod tests {
523+
use super::*;
524+
525+
#[test]
526+
fn add_op() {
527+
let left = AsciiSet::EMPTY.add(b'A');
528+
let right = AsciiSet::EMPTY.add(b'B');
529+
let expected = AsciiSet::EMPTY.add(b'A').add(b'B');
530+
assert_eq!(left + right, expected);
531+
}
532+
533+
#[test]
534+
fn not_op() {
535+
let set = AsciiSet::EMPTY.add(b'A').add(b'B');
536+
let not_set = !set;
537+
assert!(!not_set.contains(b'A'));
538+
assert!(not_set.contains(b'C'));
539+
}
540+
541+
/// This test ensures that we can get the union of two sets as a constant value, which is
542+
/// useful for defining sets in a modular way.
543+
#[test]
544+
fn union() {
545+
const A: AsciiSet = AsciiSet::EMPTY.add(b'A');
546+
const B: AsciiSet = AsciiSet::EMPTY.add(b'B');
547+
const UNION: AsciiSet = A.union(B);
548+
const EXPECTED: AsciiSet = AsciiSet::EMPTY.add(b'A').add(b'B');
549+
assert_eq!(UNION, EXPECTED);
550+
}
551+
552+
/// This test ensures that we can get the complement of a set as a constant value, which is
553+
/// useful for defining sets in a modular way.
554+
#[test]
555+
fn complement() {
556+
const BOTH: AsciiSet = AsciiSet::EMPTY.add(b'A').add(b'B');
557+
const COMPLEMENT: AsciiSet = BOTH.complement();
558+
assert!(!COMPLEMENT.contains(b'A'));
559+
assert!(!COMPLEMENT.contains(b'B'));
560+
assert!(COMPLEMENT.contains(b'C'));
561+
}
562+
}

0 commit comments

Comments
 (0)