Skip to content

Commit

Permalink
Implement voice gateway v8 (#146)
Browse files Browse the repository at this point in the history
* Implement voice gateway v8

* Ignore binary messages for now

Binary messages are used for DAVE, which we can't deal with yet.
  • Loading branch information
longnguyen2004 authored Jan 30, 2025
1 parent 1e5b918 commit 7dde773
Show file tree
Hide file tree
Showing 2 changed files with 188 additions and 29 deletions.
54 changes: 39 additions & 15 deletions src/client/voice/BaseMediaConnection.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ import {
type TransportEncryptor
} from "../encryptor/TransportEncryptor.js";
import { STREAMS_SIMULCAST, SupportedEncryptionModes, type SupportedVideoCodec } from "../../utils.js";
import type { ReadyMessage, SelectProtocolAck } from "./VoiceMessageTypes.js";
import WebSocket from 'ws';
import EventEmitter from "node:events";
import type { Message, GatewayRequest, GatewayResponse } from "./VoiceMessageTypes.js";

type VoiceConnectionStatus =
{
Expand All @@ -27,6 +27,11 @@ type WebRtcParameters = {
supportedEncryptionModes: SupportedEncryptionModes[]
}

type ValueOf<T> =
T extends (infer U)[] ? U :
T extends Record<string, infer U> ? U :
never

export const CodecPayloadType = {
"opus": {
name: "opus", type: "audio", priority: 1000, payload_type: 120
Expand All @@ -46,7 +51,7 @@ export const CodecPayloadType = {
"AV1": {
name: "AV1", type: "video", priority: 1000, payload_type: 109, rtx_payload_type: 110, encode: true, decode: true
}
}
} as const;

export interface StreamOptions {
/**
Expand Down Expand Up @@ -126,6 +131,7 @@ export abstract class BaseMediaConnection extends EventEmitter {
public webRtcParams: WebRtcParameters | null = null;
private _streamOptions: StreamOptions;
private _transportEncryptor?: TransportEncryptor;
private _sequenceNumber = -1;

constructor(guildId: string, botId: string, channelId: string, options: Partial<StreamOptions>, callback: (udp: MediaUdp) => void) {
super();
Expand Down Expand Up @@ -193,7 +199,7 @@ export abstract class BaseMediaConnection extends EventEmitter {
return
this.status.started = true;

this.ws = new WebSocket(`wss://${this.server}/?v=7`, {
this.ws = new WebSocket(`wss://${this.server}/?v=8`, {
followRedirects: true
});
this.ws.on("open", () => {
Expand Down Expand Up @@ -224,7 +230,7 @@ export abstract class BaseMediaConnection extends EventEmitter {
}
}

handleReady(d: ReadyMessage): void {
handleReady(d: Message.Ready): void {
// we hardcoded the STREAMS_SIMULCAST, which will always be array of 1
const stream = d.streams[0];
this.webRtcParams = {
Expand All @@ -238,7 +244,7 @@ export abstract class BaseMediaConnection extends EventEmitter {
this.udp.updatePacketizer();
}

handleProtocolAck(d: SelectProtocolAck): void {
handleProtocolAck(d: Message.SelectProtocolAck): void {
const secretKey = Buffer.from(d.secret_key);
switch (d.mode)
{
Expand All @@ -253,9 +259,12 @@ export abstract class BaseMediaConnection extends EventEmitter {
}

setupEvents(): void {
this.ws?.on('message', (data: string) => {
// Maybe map out all the types here to avoid any?
const { op, d } = JSON.parse(data);
this.ws?.on('message', (data, isBinary) => {
if (isBinary)
return;
const { op, d, seq } = JSON.parse(data.toString()) as GatewayResponse;
if (seq)
this._sequenceNumber = seq;

if (op === VoiceOpCodes.READY) { // ready
this.handleReady(d);
Expand Down Expand Up @@ -292,11 +301,14 @@ export abstract class BaseMediaConnection extends EventEmitter {
clearInterval(this.interval);
}
this.interval = setInterval(() => {
this.sendOpcode(VoiceOpCodes.HEARTBEAT, 42069);
this.sendOpcode(VoiceOpCodes.HEARTBEAT, {
t: Date.now(),
seq_ack: this._sequenceNumber
});
}, interval);
}

sendOpcode(code:number, data: unknown): void {
sendOpcode<T extends GatewayRequest>(code: T["op"], data: T["d"]): void {
this.ws?.send(JSON.stringify({
op: code,
d: data
Expand All @@ -307,6 +319,12 @@ export abstract class BaseMediaConnection extends EventEmitter {
** identifies with media server with credentials
*/
identify(): void {
if (!this.serverId)
throw new Error("Server ID is null or empty");
if (!this.session_id)
throw new Error("Session ID is null or empty");
if (!this.token)
throw new Error("Token is null or empty");
this.sendOpcode(VoiceOpCodes.IDENTIFY, {
server_id: this.serverId,
user_id: this.botId,
Expand All @@ -318,10 +336,17 @@ export abstract class BaseMediaConnection extends EventEmitter {
}

resume(): void {
if (!this.serverId)
throw new Error("Server ID is null or empty");
if (!this.session_id)
throw new Error("Session ID is null or empty");
if (!this.token)
throw new Error("Token is null or empty");
this.sendOpcode(VoiceOpCodes.RESUME, {
server_id: this.serverId,
session_id: this.session_id,
token: this.token,
seq_ack: this._sequenceNumber
});
}

Expand All @@ -332,6 +357,8 @@ export abstract class BaseMediaConnection extends EventEmitter {
*/
setProtocols(): Promise<void> {
const { ip, port } = this.udp;
if (!ip || !port)
throw new Error("IP or port is undefined (this shouldn't happen!!!)");
// select encryption mode
// From Discord docs:
// You must support aead_xchacha20_poly1305_rtpsize. You should prefer to use aead_aes256_gcm_rtpsize when it is available.
Expand All @@ -349,15 +376,12 @@ export abstract class BaseMediaConnection extends EventEmitter {
return new Promise((resolve) => {
this.sendOpcode(VoiceOpCodes.SELECT_PROTOCOL, {
protocol: "udp",
codecs: Object.values(CodecPayloadType),
codecs: Object.values(CodecPayloadType) as ValueOf<typeof CodecPayloadType>[],
data: {
address: ip,
port: port,
mode: encryptionMode
},
address: ip,
port: port,
mode: encryptionMode
}
});
this.once("select_protocol_ack", () => resolve());
})
Expand Down
163 changes: 149 additions & 14 deletions src/client/voice/VoiceMessageTypes.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,6 @@
import type { VoiceOpCodes } from "./VoiceOpCodes.js"
import type { SupportedEncryptionModes } from "../../utils.js"

export type ReadyMessage = {
ssrc: number,
ip: string,
port: number,
modes: SupportedEncryptionModes[],
experiments: string[],
streams: StreamInfo[]
}

type StreamInfo = {
active: boolean,
quality: number,
Expand All @@ -21,9 +13,152 @@ type StreamInfo = {
type: string
}

export type SelectProtocolAck = {
secret_key: number[],
audio_codec: string,
video_codec: string,
mode: string,
type SimulcastInfo = {
type: string,
rid: string,
quality: number
}

type CodecPayloadType = {
name: string,
type: "audio",
priority: number,
payload_type: number
} | {
name: string,
type: "video",
priority: number,
payload_type: number,
rtx_payload_type: number,
encode: boolean,
decode: boolean
}

export namespace Message {
// Request messages
export type Identify = {
server_id: string,
user_id: string,
session_id: string,
token: string,
video: boolean,
streams: SimulcastInfo[]
}

export type Resume = {
server_id: string,
session_id: string,
token: string,
seq_ack: number
}

export type Heartbeat = {
t: number,
seq_ack?: number
}

export type SelectProtocol = {
protocol: string,
codecs: CodecPayloadType[],
data: {
address: string,
port: number,
mode: SupportedEncryptionModes
}
}

export type Video = {
audio_ssrc: number,
video_ssrc: number,
rtx_ssrc: number,
streams: {
type: "video",
rid: string,
ssrc: number,
active: boolean,
quality: number,
rtx_ssrc: number,
max_bitrate: number,
max_framerate: number,
max_resolution: {
type: "fixed",
width: number,
height: number
}
}[]
}

// Response messages
export type Hello = {
heartbeat_interval: number
}

export type Ready = {
ssrc: number,
ip: string,
port: number,
modes: SupportedEncryptionModes[],
experiments: string[],
streams: StreamInfo[]
}

export type Speaking = {
speaking: 0 | 1 | 2,
delay: number,
ssrc: number
}

export type SelectProtocolAck = {
secret_key: number[],
audio_codec: string,
video_codec: string,
mode: string,
}

export type HeartbeatAck = {
t: number
}
}

export namespace GatewayResponse {
type Generic<Op extends VoiceOpCodes, T extends Record<string, unknown> | null> = {
op: Op,
d: T,
seq?: number
}
export type Hello = Generic<VoiceOpCodes.HELLO, Message.Hello>
export type Ready = Generic<VoiceOpCodes.READY, Message.Ready>
export type Resumed = Generic<VoiceOpCodes.RESUMED, null>
export type Speaking = Generic<VoiceOpCodes.SPEAKING, Message.Speaking>
export type SelectProtocolAck = Generic<VoiceOpCodes.SELECT_PROTOCOL_ACK, Message.SelectProtocolAck>
export type HeartbeatAck = Generic<VoiceOpCodes.HEARTBEAT_ACK, Message.HeartbeatAck>
}

export type GatewayResponse =
GatewayResponse.Hello |
GatewayResponse.Ready |
GatewayResponse.Resumed |
GatewayResponse.Speaking |
GatewayResponse.SelectProtocolAck |
GatewayResponse.HeartbeatAck

export namespace GatewayRequest {
type Generic<Op extends VoiceOpCodes, T extends Record<string, unknown> | null> = {
op: Op,
d: T
}
export type Identify = Generic<VoiceOpCodes.IDENTIFY, Message.Identify>
export type Resume = Generic<VoiceOpCodes.RESUME, Message.Resume>
export type Heartbeat = Generic<VoiceOpCodes.HEARTBEAT, Message.Heartbeat>
export type SelectProtocol = Generic<VoiceOpCodes.SELECT_PROTOCOL, Message.SelectProtocol>
export type Video = Generic<VoiceOpCodes.VIDEO, Message.Video>
export type Speaking = Generic<VoiceOpCodes.SPEAKING, Message.Speaking>
}

export type GatewayRequest =
GatewayRequest.Identify |
GatewayRequest.Resume |
GatewayRequest.Heartbeat |
GatewayRequest.SelectProtocol |
GatewayRequest.Video |
GatewayRequest.Speaking

0 comments on commit 7dde773

Please sign in to comment.