From 1c74bb5a11bbc0db4dac878cb18bf2bc137a2b34 Mon Sep 17 00:00:00 2001 From: David Braden Date: Wed, 4 Dec 2024 09:46:59 -0700 Subject: [PATCH] Kitsune2 Top-Level P2P Protocol (#38) * protocol * test * fix comment * review comment * protoc-prebuilt * review comments * protoc_prebuilt optional * proto build at dev time instead of compile time --- Cargo.lock | 174 ++++++++++++++++++++++++++ Cargo.toml | 10 ++ crates/api/Cargo.toml | 1 + crates/api/proto/gen/kitsune2.wire.rs | 108 ++++++++++++++++ crates/api/proto/wire.proto | 73 +++++++++++ crates/api/src/lib.rs | 2 + crates/api/src/protocol.rs | 43 +++++++ crates/tool_proto_build/Cargo.toml | 7 ++ crates/tool_proto_build/src/main.rs | 7 ++ 9 files changed, 425 insertions(+) create mode 100644 crates/api/proto/gen/kitsune2.wire.rs create mode 100644 crates/api/proto/wire.proto create mode 100644 crates/api/src/protocol.rs create mode 100644 crates/tool_proto_build/Cargo.toml create mode 100644 crates/tool_proto_build/src/main.rs diff --git a/Cargo.lock b/Cargo.lock index 6e6a92af..d22702d7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,6 +17,15 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + [[package]] name = "anstream" version = "0.6.18" @@ -66,6 +75,12 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "anyhow" +version = "1.0.93" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c95c10ba0b00a02636238b814946408b1322d5ac4760326e6fb8ec956d85775" + [[package]] name = "ascii" version = "1.1.0" @@ -329,6 +344,18 @@ dependencies = [ "zeroize", ] +[[package]] +name = "either" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" + +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + [[package]] name = "errno" version = "0.3.10" @@ -351,6 +378,12 @@ version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28dea519a9695b9977216879a3ebfddf92f1c08c05d984f8996aecd6ecdc811d" +[[package]] +name = "fixedbitset" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" + [[package]] name = "flate2" version = "1.0.35" @@ -486,6 +519,12 @@ version = "0.31.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" +[[package]] +name = "hashbrown" +version = "0.15.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" + [[package]] name = "heck" version = "0.5.0" @@ -643,12 +682,31 @@ dependencies = [ "icu_properties", ] +[[package]] +name = "indexmap" +version = "2.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62f822373a4fe84d4bb149bf54e584a7f4abec90e072ed49cda0edea5b95471f" +dependencies = [ + "equivalent", + "hashbrown", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.14" @@ -662,6 +720,7 @@ dependencies = [ "base64", "bytes", "futures", + "prost", "serde", "serde_json", "thiserror", @@ -779,6 +838,12 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "multimap" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "defc4c55412d89136f966bbb339008b474350e5e6e78d2714439c386b3137a03" + [[package]] name = "nix" version = "0.29.0" @@ -845,6 +910,16 @@ version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" +[[package]] +name = "petgraph" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" +dependencies = [ + "fixedbitset", + "indexmap", +] + [[package]] name = "pin-project-lite" version = "0.2.15" @@ -867,6 +942,16 @@ dependencies = [ "spki", ] +[[package]] +name = "prettyplease" +version = "0.2.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64d1ec885c64d0457d564db4ec299b2dae3f9c02808b8ad9c3a089c591b18033" +dependencies = [ + "proc-macro2", + "syn", +] + [[package]] name = "proc-macro2" version = "1.0.92" @@ -876,6 +961,59 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "prost" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b0487d90e047de87f984913713b85c601c05609aad5b0df4b4573fbf69aa13f" +dependencies = [ + "bytes", + "prost-derive", +] + +[[package]] +name = "prost-build" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c1318b19085f08681016926435853bbf7858f9c082d0999b80550ff5d9abe15" +dependencies = [ + "bytes", + "heck", + "itertools", + "log", + "multimap", + "once_cell", + "petgraph", + "prettyplease", + "prost", + "prost-types", + "regex", + "syn", + "tempfile", +] + +[[package]] +name = "prost-derive" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9552f850d5f0964a4e4d0bf306459ac29323ddfbae05e35a7c0d35cb0803cc5" +dependencies = [ + "anyhow", + "itertools", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "prost-types" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4759aa0d3a6232fb8dbdb97b61de2c20047c68aca932c7ed76da9d788508d670" +dependencies = [ + "prost", +] + [[package]] name = "quote" version = "1.0.37" @@ -903,6 +1041,35 @@ dependencies = [ "bitflags", ] +[[package]] +name = "regex" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" + [[package]] name = "ring" version = "0.17.8" @@ -1238,6 +1405,13 @@ dependencies = [ "syn", ] +[[package]] +name = "tool_proto_build" +version = "0.0.0+nopublish" +dependencies = [ + "prost-build", +] + [[package]] name = "typenum" version = "1.17.0" diff --git a/Cargo.toml b/Cargo.toml index b7c19924..278150ac 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,6 +5,7 @@ members = [ "crates/core", "crates/dht", "crates/memory", + "crates/tool_proto_build", ] resolver = "2" @@ -28,6 +29,8 @@ ctrlc = { version = "3.4.5", features = ["termination"] } ed25519-dalek = "2.1.1" # bootstrap_srv uses this to determine worker thread count. num_cpus = "1.16.0" +# api uses this for the kitsune2 wire protocol. +prost = "0.13.3" # kitsune types need to be serializable for network transmission. serde = { version = "1.0.215", features = ["derive"] } # kitsune2 agent info is serialized as json to improve debugability of @@ -41,6 +44,13 @@ tempfile = "3.14.0" thiserror = "2.0.3" # this is used by bootstrap_srv as the http server implementation. tiny_http = "0.12.0" +# --- tool-dependencies --- +# The following workspace dependencies are thus-far only used in unpublished +# tools and so are not needed in any true dependency trees. +# Please be careful to only include them in build dependencies or move them +# above this section. +# --- tool-dependencies --- +prost-build = "0.13.3" # --- dev-dependencies --- # The following workspace dependencies are used in crate dev-dependencies. # Please be careful to only include them in dev dependencies or move them diff --git a/crates/api/Cargo.toml b/crates/api/Cargo.toml index 56d0298c..46737fa9 100644 --- a/crates/api/Cargo.toml +++ b/crates/api/Cargo.toml @@ -13,6 +13,7 @@ edition = "2021" [dependencies] base64 = { workspace = true } bytes = { workspace = true } +prost = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } thiserror = { workspace = true } diff --git a/crates/api/proto/gen/kitsune2.wire.rs b/crates/api/proto/gen/kitsune2.wire.rs new file mode 100644 index 00000000..c4a2d281 --- /dev/null +++ b/crates/api/proto/gen/kitsune2.wire.rs @@ -0,0 +1,108 @@ +// This file is @generated by prost-build. +/// A Kitsune2 wire protocol message. +/// +/// This is the top-level encoding +/// that will be transferred between Kitsune2 peers. Most communications +/// between peers to make Kitsune2 actually function will be encoded +/// separately inside the payload of TY_MODULE type messages. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct K2Proto { + /// The type of this message. + #[prost(enumeration = "k2_proto::Ty", tag = "1")] + pub ty: i32, + /// The payload or content of this message. + #[prost(bytes = "bytes", tag = "2")] + pub data: ::prost::bytes::Bytes, + /// If the Ty requires that a space be specified, this is it. + /// + /// Kitsune is divided up into multiple dht "spaces" and peers + /// join and communicate over these spaces. These bytes identify + /// the space within which this message is communicating. + #[prost(bytes = "bytes", optional, tag = "3")] + pub space: ::core::option::Option<::prost::bytes::Bytes>, + /// If the Ty requires a module impl be specified, this is it. + /// + /// Modules include specific sub-protocols to communicate with each other, + /// so those messages must be routed to the correct module. + /// + /// This string is the module routing info. E.g. "gossip", "fetch", "sharding". + #[prost(string, optional, tag = "4")] + pub module: ::core::option::Option<::prost::alloc::string::String>, +} +/// Nested message and enum types in `K2Proto`. +pub mod k2_proto { + /// Enumeration of the types of messages that can be sent between peers. + /// + /// We are using this enum field to distinguish between top-level messages, + /// rather than protobuf's oneof because of the downsides of upgrading oneofs. + #[derive( + Clone, + Copy, + Debug, + PartialEq, + Eq, + Hash, + PartialOrd, + Ord, + ::prost::Enumeration + )] + #[repr(i32)] + pub enum Ty { + /// The "UNSPECIFIED" type for future message types. + /// + /// In general, peers should ignore unspecified messages, but + /// should still count them toward any ratelimiting metrics. + Unspecified = 0, + /// This message is preflight data. + /// + /// The implementor is responsible for encoding any module-specific + /// requirements within the data payload of this message type. + /// For example, if peers are required to include the "dht_v1" module + /// to communicate with each other, they should reject preflight to + /// peers that do not include that module. + Preflight = 1, + /// This is a notification or fire-and-forget message from a peer. + /// + /// This type requires that a "space" be specified. + Notify = 2, + /// This is a module communication. + /// + /// Most of Kitsune2's communications will likely proceed between + /// Kitsune2's modules. + /// + /// This type requires that a "space" be specified. + /// This type requires that a "module" be specified. + Module = 3, + /// This message indicates a general disconnect, with the reason + /// or context specified in the data payload. + /// + /// We may add additional specific disconnect codes in the future. + Disconnect = 15, + } + impl Ty { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + Self::Unspecified => "TY_UNSPECIFIED", + Self::Preflight => "TY_PREFLIGHT", + Self::Notify => "TY_NOTIFY", + Self::Module => "TY_MODULE", + Self::Disconnect => "TY_DISCONNECT", + } + } + /// Creates an enum from field names used in the ProtoBuf definition. + pub fn from_str_name(value: &str) -> ::core::option::Option { + match value { + "TY_UNSPECIFIED" => Some(Self::Unspecified), + "TY_PREFLIGHT" => Some(Self::Preflight), + "TY_NOTIFY" => Some(Self::Notify), + "TY_MODULE" => Some(Self::Module), + "TY_DISCONNECT" => Some(Self::Disconnect), + _ => None, + } + } + } +} diff --git a/crates/api/proto/wire.proto b/crates/api/proto/wire.proto new file mode 100644 index 00000000..29e6c62c --- /dev/null +++ b/crates/api/proto/wire.proto @@ -0,0 +1,73 @@ +syntax = "proto3"; + +package kitsune2.wire; + +// A Kitsune2 wire protocol message. +// +// This is the top-level encoding +// that will be transferred between Kitsune2 peers. Most communications +// between peers to make Kitsune2 actually function will be encoded +// separately inside the payload of TY_MODULE type messages. +message K2Proto { + // Enumeration of the types of messages that can be sent between peers. + // + // We are using this enum field to distinguish between top-level messages, + // rather than protobuf's oneof because of the downsides of upgrading oneofs. + enum Ty { + // The "UNSPECIFIED" type for future message types. + // + // In general, peers should ignore unspecified messages, but + // should still count them toward any ratelimiting metrics. + TY_UNSPECIFIED = 0; + + // This message is preflight data. + // + // The implementor is responsible for encoding any module-specific + // requirements within the data payload of this message type. + // For example, if peers are required to include the "dht_v1" module + // to communicate with each other, they should reject preflight to + // peers that do not include that module. + TY_PREFLIGHT = 1; + + // This is a notification or fire-and-forget message from a peer. + // + // This type requires that a "space" be specified. + TY_NOTIFY = 2; + + // This is a module communication. + // + // Most of Kitsune2's communications will likely proceed between + // Kitsune2's modules. + // + // This type requires that a "space" be specified. + // This type requires that a "module" be specified. + TY_MODULE = 3; + + // This message indicates a general disconnect, with the reason + // or context specified in the data payload. + // + // We may add additional specific disconnect codes in the future. + TY_DISCONNECT = 15; + } + + // The type of this message. + Ty ty = 1; + + // The payload or content of this message. + bytes data = 2; + + // If the Ty requires that a space be specified, this is it. + // + // Kitsune is divided up into multiple dht "spaces" and peers + // join and communicate over these spaces. These bytes identify + // the space within which this message is communicating. + optional bytes space = 3; + + // If the Ty requires a module impl be specified, this is it. + // + // Modules include specific sub-protocols to communicate with each other, + // so those messages must be routed to the correct module. + // + // This string is the module routing info. E.g. "gossip", "fetch", "sharding". + optional string module = 4; +} diff --git a/crates/api/src/lib.rs b/crates/api/src/lib.rs index 70fb423a..c72985aa 100644 --- a/crates/api/src/lib.rs +++ b/crates/api/src/lib.rs @@ -52,3 +52,5 @@ pub use timestamp::*; pub mod op_store; pub use op_store::*; + +pub mod protocol; diff --git a/crates/api/src/protocol.rs b/crates/api/src/protocol.rs new file mode 100644 index 00000000..04512046 --- /dev/null +++ b/crates/api/src/protocol.rs @@ -0,0 +1,43 @@ +//! Kitsune2 wire protocol types. + +include!("../proto/gen/kitsune2.wire.rs"); + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn happy_encode_decode() { + use prost::Message; + + let m = K2Proto { + ty: k2_proto::Ty::Module as i32, + data: bytes::Bytes::from_static(b"a"), + space: Some(bytes::Bytes::from_static(b"b")), + module: Some("c".into()), + }; + + let m_enc = m.encode_to_vec(); + + let d = K2Proto { + ty: k2_proto::Ty::Disconnect as i32, + data: bytes::Bytes::from_static(b"d"), + space: None, + module: None, + }; + + let d_enc = d.encode_to_vec(); + + // the disconnect message doesn't have a space or module, + // the encoded message should be smaller. + assert!(d_enc.len() < m_enc.len()); + + let m_dec = K2Proto::decode(std::io::Cursor::new(m_enc)).unwrap(); + + assert_eq!(m, m_dec); + + let d_dec = K2Proto::decode(std::io::Cursor::new(d_enc)).unwrap(); + + assert_eq!(d, d_dec); + } +} diff --git a/crates/tool_proto_build/Cargo.toml b/crates/tool_proto_build/Cargo.toml new file mode 100644 index 00000000..9f918d07 --- /dev/null +++ b/crates/tool_proto_build/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "tool_proto_build" +version = "0.0.0+nopublish" +edition = "2021" + +[dependencies] +prost-build = { workspace = true } diff --git a/crates/tool_proto_build/src/main.rs b/crates/tool_proto_build/src/main.rs new file mode 100644 index 00000000..04c30eea --- /dev/null +++ b/crates/tool_proto_build/src/main.rs @@ -0,0 +1,7 @@ +fn main() { + std::env::set_var("OUT_DIR", "../api/proto/gen"); + prost_build::Config::new() + .bytes(["."]) + .compile_protos(&["../api/proto/wire.proto"], &["../api/proto/"]) + .expect("Failed to compile protobuf protocol files"); +}