diff --git a/index.bs b/index.bs index 8689e85..ef6a58d 100644 --- a/index.bs +++ b/index.bs @@ -28,6 +28,27 @@ spec:webidl; type:dfn; text:resolve "href": "https://www.ietf.org/archive/id/draft-ietf-sframe-enc-00.html", "title": "Secure Frame (SFrame)" + }, + "VP9": { + "href": + "https://storage.googleapis.com/downloads.webmproject.org/docs/vp9/vp9-bitstream-specification-v0.6-20160331-draft.pdf", + "title": "VP9 Bitstream & Decoding Process Specification", + "publisher": "The WebM Project" + }, + "ITU-T-REC-H.264": { + "href": "https://www.itu.int/rec/T-REC-H.264", + "title": "H.264 : Advanced video coding for generic audiovisual services", + "publisher": "ITU" + }, + "ITU-G.711": { + "href": "https://www.itu.int/rec/T-REC-G.711/", + "title": "G.711 : Pulse code modulation (PCM) of voice frequencies", + "publisher": "ITU" + }, + "ITU-G.722": { + "href": "https://www.itu.int/rec/T-REC-G.722/", + "title": "G.722 : 7 kHz audio-coding within 64 kbit/s", + "publisher": "ITU" } } @@ -134,6 +155,7 @@ The writeEncodedData algorithm is given a |rtcObject| as On sender side, as part of [$readEncodedData$], frames produced by |rtcObject|'s encoder MUST be enqueued in |rtcObject|.`[[readable]]` in the encoder's output order. As [$writeEncodedData$] ensures that the transform cannot reorder frames, the encoder's output order is also the order followed by packetizers to generate RTP packets and assign RTP packet sequence numbers. +The packetizer may expect the transformed data to still conform to the original format, e.g. a series of NAL units separated by Annex B start codes. On receiver side, as part of [$readEncodedData$], frames produced by |rtcObject|'s packetizer MUST be enqueued in |rtcObject|.`[[readable]]` in the same encoder's output order. To ensure the order is respected, the depacketizer will typically use RTP packet sequence numbers to reorder RTP packets as needed before enqueuing frames in |rtcObject|.`[[readable]]`. @@ -435,8 +457,70 @@ interface RTCEncodedVideoFrame {

- The encoded frame data. + The encoded frame data. The format of the data depends on the video codec that is + used to encode/decode the frame which can be determined by looking at the + {{RTCEncodedVideoFrameMetadata/mimeType}}. + For SVC, each spatial layer + is transformed separately. +

+ Since packetizers may drop certain elements, e.g. AV1 temporal delimiter OBUs, + the input to an receive-side transform may be different from the output of + a send-side transform. +

+ The following table gives a number of examples:

+ + + + + + + + + + + + + + + + + + + + + + + + +
mimeTypeData format
+ video/VP8 + + The data starts with the "uncompressed data chunk" defined in + + section 9.1 of [[RFC6386]] and is followed by the rest of the + frame data. The + VP8 payload descriptor is not accessible. +
+ video/VP9 + + The data is a frame as described in Section 6 of [[VP9]]. + The + VP9 payload descriptor is not accessible. +
+ video/H264 + + The data is a series of NAL units in Annex B format, + as defined in [[ITU-T-REC-H.264]] Annex B. +
+ video/AV1 + + The data is a series of OBUs compliant to the + + low-overhead bitstream format as described in Section 5 of [[AV1]]. + The + AV1 aggregation header is not accessible. +
@@ -563,8 +647,76 @@ interface RTCEncodedAudioFrame {

- The encoded frame data. + The encoded frame data. The format of the data depends on the audio codec that is + used to encode/decode the frame which can be determined by looking at the + {{RTCEncodedAudioFrameMetadata/mimeType}}. + The following table gives a number of examples:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
mimeTypeData format
+ audio/opus + + The data is Opus packets, as described in + + section 3 of [[RFC6716]]. +
+ audio/PCMU + + The data is a sequence of bytes of arbitrary length, where each byte is a u-law + encoded PCM sample as defined by Table 2a and 2b in [[ITU-G.711]]. +
+ audio/PCMA + + The data is a sequence of bytes of arbitrary length, where each byte is + an A-law encoded PCM sample as defined by Tables 1a and 1b in [[ITU-G.711]]. +
+ audio/G722 + + The data is G.722 audio as described in [[ITU-G.722]]. +
+ audio/RED + + The data is Redundant Audio Data as described in + + section 3 of [[RFC2198]]. +
+ audio/CN + + The data is Comfort Noise as described in + + section 3 of [[RFC3389]]. +