@@ -593,16 +593,7 @@ impl char {
593
593
#[ stable( feature = "rust1" , since = "1.0.0" ) ]
594
594
#[ inline]
595
595
pub fn len_utf8 ( self ) -> usize {
596
- let code = self as u32 ;
597
- if code < MAX_ONE_B {
598
- 1
599
- } else if code < MAX_TWO_B {
600
- 2
601
- } else if code < MAX_THREE_B {
602
- 3
603
- } else {
604
- 4
605
- }
596
+ len_utf8 ( self as u32 )
606
597
}
607
598
608
599
/// Returns the number of 16-bit code units this `char` would need if
@@ -670,36 +661,7 @@ impl char {
670
661
#[ stable( feature = "unicode_encode_char" , since = "1.15.0" ) ]
671
662
#[ inline]
672
663
pub fn encode_utf8 ( self , dst : & mut [ u8 ] ) -> & mut str {
673
- let code = self as u32 ;
674
- let len = self . len_utf8 ( ) ;
675
- match ( len, & mut dst[ ..] ) {
676
- ( 1 , [ a, ..] ) => {
677
- * a = code as u8 ;
678
- }
679
- ( 2 , [ a, b, ..] ) => {
680
- * a = ( code >> 6 & 0x1F ) as u8 | TAG_TWO_B ;
681
- * b = ( code & 0x3F ) as u8 | TAG_CONT ;
682
- }
683
- ( 3 , [ a, b, c, ..] ) => {
684
- * a = ( code >> 12 & 0x0F ) as u8 | TAG_THREE_B ;
685
- * b = ( code >> 6 & 0x3F ) as u8 | TAG_CONT ;
686
- * c = ( code & 0x3F ) as u8 | TAG_CONT ;
687
- }
688
- ( 4 , [ a, b, c, d, ..] ) => {
689
- * a = ( code >> 18 & 0x07 ) as u8 | TAG_FOUR_B ;
690
- * b = ( code >> 12 & 0x3F ) as u8 | TAG_CONT ;
691
- * c = ( code >> 6 & 0x3F ) as u8 | TAG_CONT ;
692
- * d = ( code & 0x3F ) as u8 | TAG_CONT ;
693
- }
694
- _ => panic ! (
695
- "encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}" ,
696
- len,
697
- code,
698
- dst. len( ) ,
699
- ) ,
700
- } ;
701
- // SAFETY: We just wrote UTF-8 content in, so converting to str is fine.
702
- unsafe { from_utf8_unchecked_mut ( & mut dst[ ..len] ) }
664
+ encode_utf8_raw ( self as u32 , dst)
703
665
}
704
666
705
667
/// Encodes this character as UTF-16 into the provided `u16` buffer,
@@ -1673,3 +1635,60 @@ impl char {
1673
1635
}
1674
1636
}
1675
1637
}
1638
+
1639
+ #[ inline]
1640
+ fn len_utf8 ( code : u32 ) -> usize {
1641
+ if code < MAX_ONE_B {
1642
+ 1
1643
+ } else if code < MAX_TWO_B {
1644
+ 2
1645
+ } else if code < MAX_THREE_B {
1646
+ 3
1647
+ } else {
1648
+ 4
1649
+ }
1650
+ }
1651
+
1652
+ /// Encodes a raw u32 value as UTF-8 into the provided byte buffer,
1653
+ /// and then returns the subslice of the buffer that contains the encoded character.
1654
+ ///
1655
+ /// Unlike `char::encode_utf8`, this method can be called on codepoints in the surrogate range.
1656
+ ///
1657
+ /// # Panics
1658
+ ///
1659
+ /// Panics if the buffer is not large enough.
1660
+ /// A buffer of length four is large enough to encode any `char`.
1661
+ #[ unstable( feature = "char_internals" , reason = "exposed only for libstd" , issue = "none" ) ]
1662
+ #[ doc( hidden) ]
1663
+ #[ inline]
1664
+ pub fn encode_utf8_raw ( code : u32 , dst : & mut [ u8 ] ) -> & mut str {
1665
+ let len = len_utf8 ( code) ;
1666
+ match ( len, & mut dst[ ..] ) {
1667
+ ( 1 , [ a, ..] ) => {
1668
+ * a = code as u8 ;
1669
+ }
1670
+ ( 2 , [ a, b, ..] ) => {
1671
+ * a = ( code >> 6 & 0x1F ) as u8 | TAG_TWO_B ;
1672
+ * b = ( code & 0x3F ) as u8 | TAG_CONT ;
1673
+ }
1674
+ ( 3 , [ a, b, c, ..] ) => {
1675
+ * a = ( code >> 12 & 0x0F ) as u8 | TAG_THREE_B ;
1676
+ * b = ( code >> 6 & 0x3F ) as u8 | TAG_CONT ;
1677
+ * c = ( code & 0x3F ) as u8 | TAG_CONT ;
1678
+ }
1679
+ ( 4 , [ a, b, c, d, ..] ) => {
1680
+ * a = ( code >> 18 & 0x07 ) as u8 | TAG_FOUR_B ;
1681
+ * b = ( code >> 12 & 0x3F ) as u8 | TAG_CONT ;
1682
+ * c = ( code >> 6 & 0x3F ) as u8 | TAG_CONT ;
1683
+ * d = ( code & 0x3F ) as u8 | TAG_CONT ;
1684
+ }
1685
+ _ => panic ! (
1686
+ "encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}" ,
1687
+ len,
1688
+ code,
1689
+ dst. len( ) ,
1690
+ ) ,
1691
+ } ;
1692
+ // SAFETY: We just wrote UTF-8 content in, so converting to str is fine.
1693
+ unsafe { from_utf8_unchecked_mut ( & mut dst[ ..len] ) }
1694
+ }
0 commit comments