every.channel: sanitized baseline

This commit is contained in:
every.channel 2026-02-15 16:17:27 -05:00
commit 897e556bea
No known key found for this signature in database
258 changed files with 74298 additions and 0 deletions

View file

@ -0,0 +1,535 @@
//! Public API for using iroh-gossip
//!
//! The API is usable both locally and over RPC.
use std::{
collections::{BTreeSet, HashSet},
pin::Pin,
task::{Context, Poll},
};
use bytes::Bytes;
use iroh_base::EndpointId;
use irpc::{channel::mpsc, rpc_requests, Client};
use n0_error::{e, stack_error};
use n0_future::{Stream, StreamExt, TryStreamExt};
use serde::{Deserialize, Serialize};
use crate::proto::{DeliveryScope, TopicId};
/// Default channel capacity for topic subscription channels (one per topic)
const TOPIC_EVENTS_DEFAULT_CAP: usize = 2048;
/// Channel capacity for topic command send channels.
const TOPIC_COMMANDS_CAP: usize = 64;
/// Input messages for the gossip actor.
#[rpc_requests(message = RpcMessage, rpc_feature = "rpc")]
#[derive(Debug, Serialize, Deserialize)]
pub(crate) enum Request {
#[rpc(tx=mpsc::Sender<Event>, rx=mpsc::Receiver<Command>)]
Join(JoinRequest),
}
#[derive(Debug, Serialize, Deserialize)]
pub(crate) struct JoinRequest {
pub topic_id: TopicId,
pub bootstrap: BTreeSet<EndpointId>,
}
#[allow(missing_docs)]
#[stack_error(derive, add_meta, from_sources)]
#[non_exhaustive]
pub enum ApiError {
#[error(transparent)]
Rpc { source: irpc::Error },
/// The gossip topic was closed.
#[error("topic closed")]
Closed,
}
impl From<irpc::channel::SendError> for ApiError {
fn from(value: irpc::channel::SendError) -> Self {
irpc::Error::from(value).into()
}
}
impl From<irpc::channel::mpsc::RecvError> for ApiError {
fn from(value: irpc::channel::mpsc::RecvError) -> Self {
irpc::Error::from(value).into()
}
}
impl From<irpc::channel::oneshot::RecvError> for ApiError {
fn from(value: irpc::channel::oneshot::RecvError) -> Self {
irpc::Error::from(value).into()
}
}
/// API to control a [`Gossip`] instance.
///
/// This has methods to subscribe and join gossip topics, which return handles to publish
/// and receive messages on topics.
///
/// [`Gossip`] derefs to [`GossipApi`], so all functions on [`GossipApi`] are directly callable
/// from [`Gossip`].
///
/// Additionally, a [`GossipApi`] can be created by connecting to an RPC server. See [`Gossip::listen`]
/// and [`GossipApi::connect`] (*requires the `rpc` feature).
///
/// [`Gossip`]: crate::net::Gossip
/// [`Gossip::listen`]: crate::net::Gossip::listen
#[derive(Debug, Clone)]
pub struct GossipApi {
client: Client<Request>,
}
impl GossipApi {
#[cfg(feature = "net")]
pub(crate) fn local(tx: tokio::sync::mpsc::Sender<RpcMessage>) -> Self {
let local = irpc::LocalSender::<Request>::from(tx);
Self {
client: local.into(),
}
}
/// Connect to a remote as a RPC client.
#[cfg(feature = "rpc")]
pub fn connect(endpoint: quinn::Endpoint, addr: std::net::SocketAddr) -> Self {
let inner = irpc::Client::quinn(endpoint, addr);
Self { client: inner }
}
/// Listen on a quinn endpoint for incoming RPC connections.
#[cfg(all(feature = "rpc", feature = "net"))]
pub(crate) async fn listen(&self, endpoint: quinn::Endpoint) {
use irpc::rpc::{listen, RemoteService};
let local = self
.client
.as_local()
.expect("cannot listen on remote client");
let handler = Request::remote_handler(local);
listen::<Request>(endpoint, handler).await
}
/// Join a gossip topic with options.
///
/// Returns a [`GossipTopic`] instantly. To wait for at least one connection to be established,
/// you can await [`GossipTopic::joined`].
///
/// Messages will be queued until a first connection is available. If the internal channel becomes full,
/// the oldest messages will be dropped from the channel.
pub async fn subscribe_with_opts(
&self,
topic_id: TopicId,
opts: JoinOptions,
) -> Result<GossipTopic, ApiError> {
let req = JoinRequest {
topic_id,
bootstrap: opts.bootstrap,
};
let (tx, rx) = self
.client
.bidi_streaming(req, TOPIC_COMMANDS_CAP, opts.subscription_capacity)
.await?;
Ok(GossipTopic::new(tx, rx))
}
/// Join a gossip topic with the default options and wait for at least one active connection.
pub async fn subscribe_and_join(
&self,
topic_id: TopicId,
bootstrap: Vec<EndpointId>,
) -> Result<GossipTopic, ApiError> {
let mut sub = self
.subscribe_with_opts(topic_id, JoinOptions::with_bootstrap(bootstrap))
.await?;
sub.joined().await?;
Ok(sub)
}
/// Join a gossip topic with the default options.
///
/// Note that this will not wait for any bootstrap endpoint to be available.
/// To ensure the topic is connected to at least one endpoint, use [`GossipTopic::joined`]
/// or [`Self::subscribe_and_join`]
pub async fn subscribe(
&self,
topic_id: TopicId,
bootstrap: Vec<EndpointId>,
) -> Result<GossipTopic, ApiError> {
let sub = self
.subscribe_with_opts(topic_id, JoinOptions::with_bootstrap(bootstrap))
.await?;
Ok(sub)
}
}
/// Sender for a gossip topic.
#[derive(Debug, Clone)]
pub struct GossipSender(mpsc::Sender<Command>);
impl GossipSender {
pub(crate) fn new(sender: mpsc::Sender<Command>) -> Self {
Self(sender)
}
/// Broadcasts a message to all endpoints.
pub async fn broadcast(&self, message: Bytes) -> Result<(), ApiError> {
self.send(Command::Broadcast(message)).await?;
Ok(())
}
/// Broadcasts a message to our direct neighbors.
pub async fn broadcast_neighbors(&self, message: Bytes) -> Result<(), ApiError> {
self.send(Command::BroadcastNeighbors(message)).await?;
Ok(())
}
/// Joins a set of peers.
pub async fn join_peers(&self, peers: Vec<EndpointId>) -> Result<(), ApiError> {
self.send(Command::JoinPeers(peers)).await?;
Ok(())
}
async fn send(&self, command: Command) -> Result<(), irpc::channel::SendError> {
self.0.send(command).await?;
Ok(())
}
}
/// Subscribed gossip topic.
///
/// This handle is a [`Stream`] of [`Event`]s from the topic, and can be used to send messages.
///
/// Once the [`GossipTopic`] is dropped, the network actor will leave the gossip topic.
///
/// It may be split into sender and receiver parts with [`Self::split`]. In this case, the topic will
/// be left once both the [`GossipSender`] and [`GossipReceiver`] halves are dropped.
#[derive(Debug)]
pub struct GossipTopic {
sender: GossipSender,
receiver: GossipReceiver,
}
impl GossipTopic {
pub(crate) fn new(sender: mpsc::Sender<Command>, receiver: mpsc::Receiver<Event>) -> Self {
let sender = GossipSender::new(sender);
Self {
sender,
receiver: GossipReceiver::new(receiver),
}
}
/// Splits `self` into [`GossipSender`] and [`GossipReceiver`] parts.
pub fn split(self) -> (GossipSender, GossipReceiver) {
(self.sender, self.receiver)
}
/// Sends a message to all peers.
pub async fn broadcast(&mut self, message: Bytes) -> Result<(), ApiError> {
self.sender.broadcast(message).await
}
/// Sends a message to our direct neighbors in the swarm.
pub async fn broadcast_neighbors(&mut self, message: Bytes) -> Result<(), ApiError> {
self.sender.broadcast_neighbors(message).await
}
/// Lists our current direct neighbors.
pub fn neighbors(&self) -> impl Iterator<Item = EndpointId> + '_ {
self.receiver.neighbors()
}
/// Waits until we are connected to at least one endpoint.
///
/// See [`GossipReceiver::joined`] for details.
pub async fn joined(&mut self) -> Result<(), ApiError> {
self.receiver.joined().await
}
/// Returns `true` if we are connected to at least one endpoint.
pub fn is_joined(&self) -> bool {
self.receiver.is_joined()
}
}
impl Stream for GossipTopic {
type Item = Result<Event, ApiError>;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
Pin::new(&mut self.receiver).poll_next(cx)
}
}
/// Receiver for gossip events on a topic.
///
/// This is a [`Stream`] of [`Event`]s emitted from the topic.
#[derive(derive_more::Debug)]
pub struct GossipReceiver {
#[debug("BoxStream")]
stream: Pin<Box<dyn Stream<Item = Result<Event, ApiError>> + Send + Sync + 'static>>,
neighbors: HashSet<EndpointId>,
}
impl GossipReceiver {
pub(crate) fn new(events_rx: mpsc::Receiver<Event>) -> Self {
let stream = events_rx.into_stream().map_err(ApiError::from);
let stream = Box::pin(stream);
Self {
stream,
neighbors: Default::default(),
}
}
/// Lists our current direct neighbors.
pub fn neighbors(&self) -> impl Iterator<Item = EndpointId> + '_ {
self.neighbors.iter().copied()
}
/// Waits until we are connected to at least one endpoint.
///
/// Progresses the event stream to the first [`Event::NeighborUp`] event.
///
/// Note that this consumes this initial `NeighborUp` event. If you want to track
/// neighbors, use [`Self::neighbors`] after awaiting [`Self::joined`], and then
/// continue to track `NeighborUp` events on the event stream.
pub async fn joined(&mut self) -> Result<(), ApiError> {
while !self.is_joined() {
let _event = self.next().await.ok_or(e!(ApiError::Closed))??;
}
Ok(())
}
/// Returns `true` if we are connected to at least one endpoint.
pub fn is_joined(&self) -> bool {
!self.neighbors.is_empty()
}
}
impl Stream for GossipReceiver {
type Item = Result<Event, ApiError>;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
let item = std::task::ready!(Pin::new(&mut self.stream).poll_next(cx));
if let Some(Ok(item)) = &item {
match item {
Event::NeighborUp(endpoint_id) => {
self.neighbors.insert(*endpoint_id);
}
Event::NeighborDown(endpoint_id) => {
self.neighbors.remove(endpoint_id);
}
_ => {}
}
}
Poll::Ready(item)
}
}
/// Events emitted from a gossip topic.
///
/// These are the events emitted from a [`GossipReceiver`].
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Serialize, Deserialize)]
pub enum Event {
/// We have a new, direct neighbor in the swarm membership layer for this topic.
NeighborUp(EndpointId),
/// We dropped direct neighbor in the swarm membership layer for this topic.
NeighborDown(EndpointId),
/// We received a gossip message for this topic.
Received(Message),
/// We missed some messages because our [`GossipReceiver`] was not progressing fast enough.
Lagged,
}
impl From<crate::proto::Event<EndpointId>> for Event {
fn from(event: crate::proto::Event<EndpointId>) -> Self {
match event {
crate::proto::Event::NeighborUp(endpoint_id) => Self::NeighborUp(endpoint_id),
crate::proto::Event::NeighborDown(endpoint_id) => Self::NeighborDown(endpoint_id),
crate::proto::Event::Received(message) => Self::Received(Message {
content: message.content,
scope: message.scope,
delivered_from: message.delivered_from,
}),
}
}
}
/// A gossip message
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, derive_more::Debug, Serialize, Deserialize)]
pub struct Message {
/// The content of the message
#[debug("Bytes({})", self.content.len())]
pub content: Bytes,
/// The scope of the message.
/// This tells us if the message is from a direct neighbor or actual gossip.
pub scope: DeliveryScope,
/// The endpoint that delivered the message. This is not the same as the original author.
pub delivered_from: EndpointId,
}
/// Command for a gossip topic.
#[derive(Serialize, Deserialize, derive_more::Debug, Clone)]
pub enum Command {
/// Broadcasts a message to all endpoints in the swarm.
Broadcast(#[debug("Bytes({})", _0.len())] Bytes),
/// Broadcasts a message to all direct neighbors.
BroadcastNeighbors(#[debug("Bytes({})", _0.len())] Bytes),
/// Connects to a set of peers.
JoinPeers(Vec<EndpointId>),
}
/// Options for joining a gossip topic.
#[derive(Serialize, Deserialize, Debug)]
pub struct JoinOptions {
/// The initial bootstrap endpoints.
pub bootstrap: BTreeSet<EndpointId>,
/// The maximum number of messages that can be buffered in a subscription.
///
/// If this limit is reached, the subscriber will receive a `Lagged` response,
/// the message will be dropped, and the subscriber will be closed.
///
/// This is to prevent a single slow subscriber from blocking the dispatch loop.
/// If a subscriber is lagging, it should be closed and re-opened.
pub subscription_capacity: usize,
}
impl JoinOptions {
/// Creates [`JoinOptions`] with the provided bootstrap endpoints and the default subscription
/// capacity.
pub fn with_bootstrap(endpoints: impl IntoIterator<Item = EndpointId>) -> Self {
Self {
bootstrap: endpoints.into_iter().collect(),
subscription_capacity: TOPIC_EVENTS_DEFAULT_CAP,
}
}
}
#[cfg(test)]
mod tests {
use crate::api::GossipTopic;
#[cfg(all(feature = "rpc", feature = "net"))]
#[tokio::test]
#[n0_tracing_test::traced_test]
async fn test_rpc() -> n0_error::Result<()> {
use iroh::{address_lookup::memory::MemoryLookup, protocol::Router, RelayMap};
use n0_error::{AnyError, Result, StackResultExt, StdResultExt};
use n0_future::{time::Duration, StreamExt};
use rand_chacha::rand_core::SeedableRng;
use crate::{
api::{Event, GossipApi},
net::{test::create_endpoint, Gossip},
proto::TopicId,
ALPN,
};
let mut rng = rand_chacha::ChaCha12Rng::seed_from_u64(1);
let (relay_map, _relay_url, _guard) = iroh::test_utils::run_relay_server().await.unwrap();
async fn create_gossip_endpoint(
rng: &mut rand_chacha::ChaCha12Rng,
relay_map: RelayMap,
) -> Result<(Router, Gossip)> {
let endpoint = create_endpoint(rng, relay_map, None).await?;
let gossip = Gossip::builder().spawn(endpoint.clone());
let router = Router::builder(endpoint)
.accept(ALPN, gossip.clone())
.spawn();
Ok((router, gossip))
}
let topic_id = TopicId::from_bytes([0u8; 32]);
// create our gossip endpoint
let (router, gossip) = create_gossip_endpoint(&mut rng, relay_map.clone()).await?;
// create a second endpoint so that we can test actually joining
let (endpoint2_id, endpoint2_addr, endpoint2_task) = {
let (router, gossip) = create_gossip_endpoint(&mut rng, relay_map.clone()).await?;
let endpoint_addr = router.endpoint().addr();
let endpoint_id = router.endpoint().id();
let task = tokio::task::spawn(async move {
let mut topic = gossip.subscribe_and_join(topic_id, vec![]).await?;
topic.broadcast(b"hello".to_vec().into()).await?;
Ok::<_, AnyError>(router)
});
(endpoint_id, endpoint_addr, task)
};
// create a memory lookup service to add endpoint addr manually
let memory_lookup = MemoryLookup::new();
memory_lookup.add_endpoint_info(endpoint2_addr);
router.endpoint().address_lookup().add(memory_lookup);
// expose the gossip endpoint over RPC
let (rpc_server_endpoint, rpc_server_cert) =
irpc::util::make_server_endpoint("127.0.0.1:0".parse().unwrap())
.context("make server endpoint")?;
let rpc_server_addr = rpc_server_endpoint
.local_addr()
.std_context("resolve server addr")?;
let rpc_server_task = tokio::task::spawn(async move {
gossip.listen(rpc_server_endpoint).await;
});
// connect to the RPC endpoint with a new client
let rpc_client_endpoint =
irpc::util::make_client_endpoint("127.0.0.1:0".parse().unwrap(), &[&rpc_server_cert])
.context("make client endpoint")?;
let rpc_client = GossipApi::connect(rpc_client_endpoint, rpc_server_addr);
// join via RPC
let recv = async move {
let mut topic = rpc_client
.subscribe_and_join(topic_id, vec![endpoint2_id])
.await?;
// wait for a message
while let Some(event) = topic.try_next().await? {
match event {
Event::Received(message) => {
assert_eq!(&message.content[..], b"hello");
break;
}
Event::Lagged => panic!("unexpected lagged event"),
_ => {}
}
}
Ok::<_, AnyError>(())
};
// timeout to not hang in case of failure
tokio::time::timeout(Duration::from_secs(10), recv)
.await
.std_context("rpc recv timeout")??;
// shutdown
rpc_server_task.abort();
router.shutdown().await.std_context("shutdown router")?;
let router2 = endpoint2_task.await.std_context("join endpoint task")??;
router2
.shutdown()
.await
.std_context("shutdown second router")?;
Ok(())
}
#[test]
fn ensure_gossip_topic_is_sync() {
#[allow(unused)]
fn get() -> GossipTopic {
unimplemented!()
}
#[allow(unused)]
fn check(_t: impl Sync) {}
#[allow(unused)]
fn foo() {
check(get());
}
}
}

View file

@ -0,0 +1,418 @@
use std::{
collections::HashMap,
path::{Path, PathBuf},
};
use clap::Parser;
use comfy_table::{presets::NOTHING, Cell, CellAlignment, Table};
use iroh_gossip::proto::sim::{
BootstrapMode, NetworkConfig, RoundStats, RoundStatsAvg, RoundStatsDiff, Simulator,
SimulatorConfig,
};
use n0_error::{Result, StackResultExt, StdResultExt};
use rayon::iter::{IntoParallelIterator, ParallelIterator};
use serde::{Deserialize, Serialize};
use tracing::{error_span, info, warn};
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
#[allow(clippy::enum_variant_names)]
enum Simulation {
/// A single sender broadcasts a single message per round.
GossipSingle,
/// Each round a different sender is chosen at random, and broadcasts a single message
GossipMulti,
/// Each round, all peers broadcast a single message simultaneously.
GossipAll,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct ScenarioDescription {
sim: Simulation,
nodes: u32,
#[serde(default)]
bootstrap: BootstrapMode,
#[serde(default = "defaults::rounds")]
rounds: u32,
config: Option<NetworkConfig>,
}
impl ScenarioDescription {
pub fn label(&self) -> String {
let &ScenarioDescription {
sim,
nodes,
rounds,
config: _,
bootstrap: _,
} = &self;
format!("{sim:?}-n{nodes}-r{rounds}")
}
}
mod defaults {
pub fn rounds() -> u32 {
30
}
}
#[derive(Debug, Serialize, Deserialize)]
struct SimConfig {
seeds: Vec<u64>,
config: Option<NetworkConfig>,
scenario: Vec<ScenarioDescription>,
}
#[derive(Debug, Parser)]
struct Cli {
#[clap(subcommand)]
command: Command,
}
#[derive(Debug, Parser)]
enum Command {
/// Run simulations
Run {
#[clap(short, long)]
config_path: PathBuf,
#[clap(short, long)]
out_dir: Option<PathBuf>,
#[clap(short, long)]
baseline: Option<PathBuf>,
#[clap(short, long)]
single_threaded: bool,
#[clap(short, long)]
filter: Vec<String>,
},
/// Compare simulation runs
Compare {
baseline: PathBuf,
current: PathBuf,
#[clap(short, long)]
filter: Vec<String>,
},
}
fn main() -> Result<()> {
tracing_subscriber::fmt::init();
let args: Cli = Cli::parse();
match args.command {
Command::Run {
config_path,
out_dir,
baseline,
single_threaded,
filter,
} => {
let config_text = std::fs::read_to_string(&config_path)
.with_std_context(|_| format!("read config {}", config_path.display()))?;
let config: SimConfig = toml::from_str(&config_text).std_context("parse config")?;
let base_config = config.config.unwrap_or_default();
info!("base config: {base_config:?}");
let seeds = config.seeds;
let mut scenarios = config.scenario;
for scenario in scenarios.iter_mut() {
scenario.config.get_or_insert_with(|| base_config.clone());
}
if let Some(out_dir) = out_dir.as_ref() {
std::fs::create_dir_all(out_dir)
.with_std_context(|_| format!("create output dir {}", out_dir.display()))?;
}
let filter_fn = |s: &ScenarioDescription| {
let label = s.label();
if filter.is_empty() {
true
} else {
filter.iter().any(|x| x == &label)
}
};
let results: Result<Vec<_>> = if !single_threaded {
scenarios
.into_par_iter()
.filter(filter_fn)
.map(|scenario| run_and_save_simulation(scenario, &seeds, out_dir.as_ref()))
.collect()
} else {
scenarios
.into_iter()
.filter(filter_fn)
.map(|scenario| run_and_save_simulation(scenario, &seeds, out_dir.as_ref()))
.collect()
};
let mut results = results?;
results.sort_by_key(|a| a.scenario.label());
for result in results {
print_result(&result);
}
if let (Some(baseline), Some(out_dir)) = (baseline, out_dir) {
compare_dirs(baseline, out_dir, filter)?;
}
}
Command::Compare {
baseline,
current,
filter,
} => {
compare_dirs(baseline, current, filter)?;
}
}
Ok(())
}
fn run_and_save_simulation(
scenario: ScenarioDescription,
seeds: &[u64],
out_dir: Option<impl AsRef<Path>>,
) -> Result<SimulationResults> {
let label = scenario.label();
if let Some(out_dir) = out_dir.as_ref() {
let path = out_dir.as_ref().join(format!("{label}.config.toml"));
let encoded = toml::to_string(&scenario).std_context("encode scenario")?;
std::fs::write(&path, encoded)
.with_std_context(|_| format!("write scenario {}", &path.display()))?;
}
let result = run_simulation(seeds, scenario);
if let Some(out_dir) = out_dir.as_ref() {
let path = out_dir.as_ref().join(format!("{label}.results.json"));
let encoded = serde_json::to_string(&result).std_context("encode results")?;
std::fs::write(&path, encoded)
.with_std_context(|_| format!("write results {}", path.display()))?;
}
Ok(result)
}
#[derive(Debug, Serialize, Deserialize, Clone)]
struct SimulationResults {
scenario: ScenarioDescription,
/// Maps seeds to results
results: HashMap<u64, RoundStatsAvg>,
average: Option<RoundStatsAvg>,
}
impl SimulationResults {
fn load_from_file(path: impl AsRef<Path>) -> Result<Self> {
let s = std::fs::read_to_string(path.as_ref())
.with_std_context(|_| format!("read results {}", path.as_ref().display()))?;
let out = serde_json::from_str(&s).std_context("decode results")?;
Ok(out)
}
}
fn run_simulation(seeds: &[u64], scenario: ScenarioDescription) -> SimulationResults {
let mut results = HashMap::new();
let network_config = scenario.config.clone().unwrap_or_default();
for &seed in seeds {
let span = error_span!("sim", name=%scenario.label(), %seed);
let _guard = span.enter();
let sim_config = SimulatorConfig {
rng_seed: seed,
peers: scenario.nodes as usize,
..Default::default()
};
let bootstrap = scenario.bootstrap.clone();
let mut simulator = Simulator::new(sim_config, network_config.clone());
info!("start");
let outcome = simulator.bootstrap(bootstrap);
if outcome.has_peers_with_no_neighbors() {
warn!("not all nodes active after bootstrap: {outcome:?}");
} else {
info!("bootstrapped, all nodes active");
}
let result = match scenario.sim {
Simulation::GossipSingle => BigSingle.run(simulator, scenario.rounds as usize),
Simulation::GossipMulti => BigMulti.run(simulator, scenario.rounds as usize),
Simulation::GossipAll => BigAll.run(simulator, scenario.rounds as usize),
};
info!("done");
results.insert(seed, result);
}
let stats: Vec<_> = results.values().cloned().collect();
let average = if !stats.is_empty() {
let avg = RoundStatsAvg::avg(&stats);
Some(avg)
} else {
None
};
SimulationResults {
average,
results,
scenario,
}
}
fn print_result(r: &SimulationResults) {
let seeds = r.results.len();
println!("{} with {seeds} seeds", r.scenario.label());
let Some(avg) = r.average.as_ref() else {
println!("no results, simulation did not complete");
return;
};
let mut table = Table::new();
let header = ["", "RMR", "LDH", "missed", "duration"]
.into_iter()
.map(|s| Cell::new(s).set_alignment(CellAlignment::Right));
table
.load_preset(NOTHING)
.set_header(header)
.add_row(fmt_round("mean", &avg.mean))
.add_row(fmt_round("max", &avg.max))
.add_row(fmt_round("min", &avg.min));
println!("{table}");
if avg.max.missed > 0.0 {
println!("WARN: Messages were missed!")
}
println!();
}
trait Scenario {
fn run(self, sim: Simulator, rounds: usize) -> RoundStatsAvg;
}
struct BigSingle;
impl Scenario for BigSingle {
fn run(self, mut simulator: Simulator, rounds: usize) -> RoundStatsAvg {
let from = simulator.random_peer();
for i in 0..rounds {
let message = format!("m{i}").into_bytes().into();
let messages = vec![(from, message)];
simulator.gossip_round(messages);
}
simulator.round_stats_average()
}
}
struct BigMulti;
impl Scenario for BigMulti {
fn run(self, mut simulator: Simulator, rounds: usize) -> RoundStatsAvg {
for i in 0..rounds {
let from = simulator.random_peer();
let message = format!("m{i}").into_bytes().into();
let messages = vec![(from, message)];
simulator.gossip_round(messages);
}
simulator.round_stats_average()
}
}
struct BigAll;
impl Scenario for BigAll {
fn run(self, mut simulator: Simulator, rounds: usize) -> RoundStatsAvg {
let messages_per_peer = 1;
for i in 0..rounds {
let mut messages = vec![];
for id in simulator.network.peer_ids() {
for j in 0..messages_per_peer {
let message: bytes::Bytes = format!("{i}:{j}.{id}").into_bytes().into();
messages.push((id, message));
}
}
simulator.gossip_round(messages);
}
simulator.round_stats_average()
}
}
fn compare_dirs(baseline_dir: PathBuf, current_path: PathBuf, filter: Vec<String>) -> Result<()> {
let mut paths = vec![];
for entry in std::fs::read_dir(&current_path)
.with_std_context(|_| format!("read directory {}", current_path.display()))?
.filter_map(Result::ok)
.filter(|x| x.path().is_file())
{
let current_file = entry.path().to_owned();
let Some(filename) = current_file.file_name().and_then(|s| s.to_str()) else {
continue;
};
let Some(basename) = filename.strip_suffix(".results.json") else {
continue;
};
if !filter.is_empty() && !filter.iter().any(|x| x == basename) {
continue;
}
let baseline_file = baseline_dir.join(filename);
if !baseline_file.exists() {
println!("skip {filename} (not in baseline)");
}
paths.push((basename.to_string(), baseline_file, current_file));
}
paths.sort();
for (basename, baseline_file, current_file) in paths {
println!("comparing {basename}");
if let Err(err) = compare_files(&baseline_file, &current_file) {
println!(" skip (reason: {err:#}");
}
}
Ok(())
}
fn compare_files(baseline: impl AsRef<Path>, current: impl AsRef<Path>) -> Result<()> {
let baseline =
SimulationResults::load_from_file(baseline.as_ref()).context("failed to load baseline")?;
let current =
SimulationResults::load_from_file(current.as_ref()).context("failed to load current")?;
compare_results(baseline, current);
Ok(())
}
fn compare_results(baseline: SimulationResults, current: SimulationResults) {
match (baseline.average, current.average) {
(None, Some(_avg)) => {
println!("baseline run did not finish");
}
(Some(_avg), None) => {
println!("current run did not finish");
}
(None, None) => println!("both runs did not finish"),
(Some(baseline), Some(current)) => {
let diff = baseline.diff(&current);
let mut table = Table::new();
let header = ["", "RMR", "LDH", "missed", "duration"]
.into_iter()
.map(|s| Cell::new(s).set_alignment(CellAlignment::Right));
table
.load_preset(NOTHING)
.set_header(header)
.add_row(fmt_diff_round("mean", &diff.mean))
.add_row(fmt_diff_round("max", &diff.max))
.add_row(fmt_diff_round("min", &diff.min));
println!("{table}");
}
}
}
fn fmt_round(label: &str, round: &RoundStats) -> Vec<Cell> {
[
label.to_string(),
format!("{:.2}", round.rmr),
format!("{:.2}", round.ldh),
format!("{:.2}", round.missed),
format!("{}ms", round.duration.as_millis()),
]
.into_iter()
.map(|s| Cell::new(s).set_alignment(CellAlignment::Right))
.collect()
}
fn fmt_diff_round(label: &str, round: &RoundStatsDiff) -> Vec<String> {
vec![
label.to_string(),
fmt_percent(round.rmr),
fmt_percent(round.ldh),
fmt_percent(round.missed),
fmt_percent(round.duration),
]
}
fn fmt_percent(diff: f32) -> String {
format!("{:>+10.2}%", diff * 100.)
}

View file

@ -0,0 +1,25 @@
#![cfg_attr(feature = "net", doc = include_str!("../README.md"))]
//! Broadcast messages to peers subscribed to a topic
//!
//! The crate is designed to be used from the [iroh] crate, which provides a
//! [high level interface](https://docs.rs/iroh/latest/iroh/client/gossip/index.html),
//! but can also be used standalone.
//!
//! [iroh]: https://docs.rs/iroh
#![deny(missing_docs, rustdoc::broken_intra_doc_links)]
#![cfg_attr(iroh_docsrs, feature(doc_cfg))]
#[cfg(feature = "net")]
pub use net::Gossip;
#[cfg(feature = "net")]
#[doc(inline)]
pub use net::GOSSIP_ALPN as ALPN;
#[cfg(any(feature = "net", feature = "rpc"))]
pub mod api;
pub mod metrics;
#[cfg(feature = "net")]
pub mod net;
pub mod proto;
pub use proto::TopicId;

View file

@ -0,0 +1,45 @@
//! Metrics for iroh-gossip
use iroh_metrics::{Counter, MetricsGroup};
/// Enum of metrics for the module
#[derive(Debug, Default, MetricsGroup)]
#[metrics(name = "gossip")]
pub struct Metrics {
/// Number of control messages sent
pub msgs_ctrl_sent: Counter,
/// Number of control messages received
pub msgs_ctrl_recv: Counter,
/// Number of data messages sent
pub msgs_data_sent: Counter,
/// Number of data messages received
pub msgs_data_recv: Counter,
/// Total size of all data messages sent
pub msgs_data_sent_size: Counter,
/// Total size of all data messages received
pub msgs_data_recv_size: Counter,
/// Total size of all control messages sent
pub msgs_ctrl_sent_size: Counter,
/// Total size of all control messages received
pub msgs_ctrl_recv_size: Counter,
/// Number of times we connected to a peer
pub neighbor_up: Counter,
/// Number of times we disconnected from a peer
pub neighbor_down: Counter,
/// Number of times the main actor loop ticked
pub actor_tick_main: Counter,
/// Number of times the actor ticked for a message received
pub actor_tick_rx: Counter,
/// Number of times the actor ticked for an endpoint event
pub actor_tick_endpoint: Counter,
/// Number of times the actor ticked for a dialer event
pub actor_tick_dialer: Counter,
/// Number of times the actor ticked for a successful dialer event
pub actor_tick_dialer_success: Counter,
/// Number of times the actor ticked for a failed dialer event
pub actor_tick_dialer_failure: Counter,
/// Number of times the actor ticked for an incoming event
pub actor_tick_in_event_rx: Counter,
/// Number of times the actor ticked for a timer event
pub actor_tick_timers: Counter,
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,175 @@
//! An address lookup service to gather addressing info collected from gossip Join and ForwardJoin messages.
use std::{
collections::{btree_map::Entry, BTreeMap},
sync::{Arc, RwLock},
time::Duration,
};
use iroh::address_lookup::{self, AddressLookup, EndpointData, EndpointInfo};
use iroh_base::EndpointId;
use n0_future::{
boxed::BoxStream,
stream::{self, StreamExt},
task::AbortOnDropHandle,
time::SystemTime,
};
pub(crate) struct RetentionOpts {
/// How long to keep received endpoint info records alive before pruning them
retention: Duration,
/// How often to check for expired entries
evict_interval: Duration,
}
impl Default for RetentionOpts {
fn default() -> Self {
Self {
retention: Duration::from_secs(60 * 5),
evict_interval: Duration::from_secs(30),
}
}
}
/// An address lookup service that expires endpoints after some time.
///
/// It is added to the endpoint when constructing a gossip instance, and the gossip actor
/// then adds endpoint addresses as received with Join or ForwardJoin messages.
#[derive(Debug, Clone)]
pub(crate) struct GossipAddressLookup {
endpoints: NodeMap,
_task_handle: Arc<AbortOnDropHandle<()>>,
}
type NodeMap = Arc<RwLock<BTreeMap<EndpointId, StoredEndpointInfo>>>;
#[derive(Debug)]
struct StoredEndpointInfo {
data: EndpointData,
last_updated: SystemTime,
}
impl Default for GossipAddressLookup {
fn default() -> Self {
Self::new()
}
}
impl GossipAddressLookup {
const PROVENANCE: &'static str = "gossip";
/// Creates a new gossip address lookup instance.
pub(crate) fn new() -> Self {
Self::with_opts(Default::default())
}
pub(crate) fn with_opts(opts: RetentionOpts) -> Self {
let endpoints: NodeMap = Default::default();
let task = {
let endpoints = Arc::downgrade(&endpoints);
n0_future::task::spawn(async move {
let mut interval = n0_future::time::interval(opts.evict_interval);
loop {
interval.tick().await;
let Some(endpoints) = endpoints.upgrade() else {
break;
};
let now = SystemTime::now();
endpoints.write().expect("poisoned").retain(|_k, v| {
let age = now.duration_since(v.last_updated).unwrap_or(Duration::MAX);
age <= opts.retention
});
}
})
};
Self {
endpoints,
_task_handle: Arc::new(AbortOnDropHandle::new(task)),
}
}
/// Augments endpoint addressing information for the given endpoint ID.
///
/// The provided addressing information is combined with the existing info in the in-memory
/// lookup. Any new direct addresses are added to those already present while the
/// relay URL is overwritten.
pub(crate) fn add(&self, endpoint_info: impl Into<EndpointInfo>) {
let last_updated = SystemTime::now();
let EndpointInfo { endpoint_id, data } = endpoint_info.into();
let mut guard = self.endpoints.write().expect("poisoned");
match guard.entry(endpoint_id) {
Entry::Occupied(mut entry) => {
let existing = entry.get_mut();
existing.data.add_addrs(data.addrs().cloned());
existing.data.set_user_data(data.user_data().cloned());
existing.last_updated = last_updated;
}
Entry::Vacant(entry) => {
entry.insert(StoredEndpointInfo { data, last_updated });
}
}
}
}
impl AddressLookup for GossipAddressLookup {
fn resolve(
&self,
endpoint_id: EndpointId,
) -> Option<BoxStream<Result<address_lookup::Item, address_lookup::Error>>> {
let guard = self.endpoints.read().expect("poisoned");
let info = guard.get(&endpoint_id)?;
let last_updated = info
.last_updated
.duration_since(SystemTime::UNIX_EPOCH)
.expect("time drift")
.as_micros() as u64;
let item = address_lookup::Item::new(
EndpointInfo::from_parts(endpoint_id, info.data.clone()),
Self::PROVENANCE,
Some(last_updated),
);
Some(stream::iter(Some(Ok(item))).boxed())
}
}
#[cfg(test)]
mod tests {
use std::time::Duration;
use iroh::{address_lookup::AddressLookup, EndpointAddr, SecretKey};
use n0_future::StreamExt;
use rand::SeedableRng;
use super::{GossipAddressLookup, RetentionOpts};
#[tokio::test]
async fn test_retention() {
let opts = RetentionOpts {
evict_interval: Duration::from_millis(100),
retention: Duration::from_millis(500),
};
let disco = GossipAddressLookup::with_opts(opts);
let rng = &mut rand_chacha::ChaCha12Rng::seed_from_u64(1);
let k1 = SecretKey::generate(rng);
let a1 = EndpointAddr::new(k1.public());
disco.add(a1);
assert!(matches!(
disco.resolve(k1.public()).unwrap().next().await,
Some(Ok(_))
));
tokio::time::sleep(Duration::from_millis(200)).await;
assert!(matches!(
disco.resolve(k1.public()).unwrap().next().await,
Some(Ok(_))
));
tokio::time::sleep(Duration::from_millis(700)).await;
assert!(disco.resolve(k1.public()).is_none());
}
}

View file

@ -0,0 +1,435 @@
//! Utilities for iroh-gossip networking
use std::{
collections::{hash_map, HashMap},
io,
time::Duration,
};
use bytes::{Bytes, BytesMut};
use iroh::{
endpoint::{Connection, RecvStream, SendStream},
EndpointId,
};
use n0_error::{e, stack_error};
use n0_future::{
time::{sleep_until, Instant},
FuturesUnordered, StreamExt,
};
use serde::{de::DeserializeOwned, Deserialize, Serialize};
use tokio::{
io::{AsyncReadExt, AsyncWriteExt},
sync::mpsc,
task::JoinSet,
};
use tracing::{debug, trace, Instrument};
use super::{InEvent, ProtoMessage};
use crate::proto::{util::TimerMap, TopicId};
/// Errors related to message writing
#[allow(missing_docs)]
#[stack_error(derive, add_meta, from_sources)]
#[non_exhaustive]
pub(crate) enum WriteError {
/// Connection error
#[error("Connection error")]
Connection {
#[error(std_err)]
source: iroh::endpoint::ConnectionError,
},
/// Serialization failed
#[error("Serialization failed")]
Ser {
#[error(std_err)]
source: postcard::Error,
},
/// IO error
#[error("IO error")]
Io {
#[error(std_err)]
source: std::io::Error,
},
/// Message was larger than the configured maximum message size
#[error("message too large")]
TooLarge {},
}
#[derive(Debug, Serialize, Deserialize)]
pub(crate) struct StreamHeader {
pub(crate) topic_id: TopicId,
}
impl StreamHeader {
pub(crate) async fn read(
stream: &mut RecvStream,
buffer: &mut BytesMut,
max_message_size: usize,
) -> Result<Self, ReadError> {
let header: Self = read_frame(stream, buffer, max_message_size)
.await?
.ok_or_else(|| {
ReadError::from(io::Error::new(
io::ErrorKind::UnexpectedEof,
"stream ended before header",
))
})?;
Ok(header)
}
pub(crate) async fn write(
self,
stream: &mut SendStream,
buffer: &mut Vec<u8>,
max_message_size: usize,
) -> Result<(), WriteError> {
write_frame(stream, &self, buffer, max_message_size).await?;
Ok(())
}
}
pub(crate) struct RecvLoop {
remote_endpoint_id: EndpointId,
conn: Connection,
max_message_size: usize,
in_event_tx: mpsc::Sender<InEvent>,
}
impl RecvLoop {
pub(crate) fn new(
remote_endpoint_id: EndpointId,
conn: Connection,
in_event_tx: mpsc::Sender<InEvent>,
max_message_size: usize,
) -> Self {
Self {
remote_endpoint_id,
conn,
max_message_size,
in_event_tx,
}
}
pub(crate) async fn run(&mut self) -> Result<(), ReadError> {
let mut read_futures = FuturesUnordered::new();
let mut conn_is_closed = false;
let closed = self.conn.closed();
tokio::pin!(closed);
while !conn_is_closed || !read_futures.is_empty() {
tokio::select! {
_ = &mut closed, if !conn_is_closed => {
conn_is_closed = true;
}
stream = self.conn.accept_uni(), if !conn_is_closed => {
let stream = match stream {
Ok(stream) => stream,
Err(_) => {
conn_is_closed = true;
continue;
}
};
let state = RecvStreamState::new(stream, self.max_message_size).await?;
debug!(topic=%state.header.topic_id.fmt_short(), "stream opened");
read_futures.push(state.next());
}
Some(res) = read_futures.next(), if !read_futures.is_empty() => {
let (state, msg) = match res {
Ok((state, msg)) => (state, msg),
Err(err) => {
debug!("recv stream closed with error: {err:#}");
continue;
}
};
match msg {
None => debug!(topic=%state.header.topic_id.fmt_short(), "stream closed"),
Some(msg) => {
if self.in_event_tx.send(InEvent::RecvMessage(self.remote_endpoint_id, msg)).await.is_err() {
debug!("stop recv loop: actor closed");
break;
}
read_futures.push(state.next());
}
}
}
}
}
debug!("recv loop closed");
Ok(())
}
}
#[derive(Debug)]
struct RecvStreamState {
stream: RecvStream,
header: StreamHeader,
buffer: BytesMut,
max_message_size: usize,
}
impl RecvStreamState {
async fn new(mut stream: RecvStream, max_message_size: usize) -> Result<Self, ReadError> {
let mut buffer = BytesMut::new();
let header = StreamHeader::read(&mut stream, &mut buffer, max_message_size).await?;
Ok(Self {
buffer: BytesMut::new(),
max_message_size,
stream,
header,
})
}
/// Reads the next message from the stream.
///
/// Returns `self` and the next message, or `None` if the stream ended gracefully.
///
/// ## Cancellation safety
///
/// This function is not cancellation-safe.
async fn next(mut self) -> Result<(Self, Option<ProtoMessage>), ReadError> {
let msg = read_frame(&mut self.stream, &mut self.buffer, self.max_message_size).await?;
let msg = msg.map(|msg| ProtoMessage {
topic: self.header.topic_id,
message: msg,
});
Ok((self, msg))
}
}
pub(crate) struct SendLoop {
conn: Connection,
streams: HashMap<TopicId, SendStream>,
buffer: Vec<u8>,
max_message_size: usize,
finishing: JoinSet<()>,
send_rx: mpsc::Receiver<ProtoMessage>,
}
impl SendLoop {
pub(crate) fn new(
conn: Connection,
send_rx: mpsc::Receiver<ProtoMessage>,
max_message_size: usize,
) -> Self {
Self {
conn,
max_message_size,
buffer: Default::default(),
streams: Default::default(),
finishing: Default::default(),
send_rx,
}
}
pub(crate) async fn run(&mut self, queue: Vec<ProtoMessage>) -> Result<(), WriteError> {
for msg in queue {
self.write_message(&msg).await?;
}
let conn_clone = self.conn.clone();
let closed = conn_clone.closed();
tokio::pin!(closed);
loop {
tokio::select! {
biased;
_ = &mut closed => break,
Some(msg) = self.send_rx.recv() => self.write_message(&msg).await?,
_ = self.finishing.join_next(), if !self.finishing.is_empty() => {}
else => break,
}
}
// Close remaining streams.
for (topic_id, mut stream) in self.streams.drain() {
stream.finish().ok();
self.finishing.spawn(
async move {
stream.stopped().await.ok();
debug!(topic=%topic_id.fmt_short(), "stream closed");
}
.instrument(tracing::Span::current()),
);
}
if !self.finishing.is_empty() {
trace!(
"send loop closing, waiting for {} send streams to finish",
self.finishing.len()
);
// Wait for the remote to acknowledge all streams are finished.
if let Err(_elapsed) = n0_future::time::timeout(Duration::from_secs(5), async move {
while self.finishing.join_next().await.is_some() {}
})
.await
{
debug!("not all send streams finished within timeout, abort")
}
}
debug!("send loop closed");
Ok(())
}
/// Write a [`ProtoMessage`] as a length-prefixed, postcard-encoded message on its stream.
///
/// If no stream is opened yet, this opens a new stream for the topic and writes the topic header.
///
/// This function is not cancellation-safe.
pub async fn write_message(&mut self, message: &ProtoMessage) -> Result<(), WriteError> {
let ProtoMessage { topic, message } = message;
let topic_id = *topic;
let is_last = message.is_disconnect();
let mut entry = match self.streams.entry(topic_id) {
hash_map::Entry::Occupied(entry) => entry,
hash_map::Entry::Vacant(entry) => {
let mut stream = self.conn.open_uni().await?;
let header = StreamHeader { topic_id };
header
.write(&mut stream, &mut self.buffer, self.max_message_size)
.await?;
debug!(topic=%topic_id.fmt_short(), "stream opened");
entry.insert_entry(stream)
}
};
let stream = entry.get_mut();
write_frame(stream, message, &mut self.buffer, self.max_message_size).await?;
if is_last {
trace!(topic=%topic_id.fmt_short(), "stream closing");
let mut stream = entry.remove();
if stream.finish().is_ok() {
self.finishing.spawn(
async move {
stream.stopped().await.ok();
debug!(topic=%topic_id.fmt_short(), "stream closed");
}
.instrument(tracing::Span::current()),
);
}
}
Ok(())
}
}
/// Errors related to message reading
#[allow(missing_docs)]
#[stack_error(derive, add_meta, from_sources)]
#[non_exhaustive]
pub(crate) enum ReadError {
/// Deserialization failed
#[error("Deserialization failed")]
De {
#[error(std_err)]
source: postcard::Error,
},
/// IO error
#[error("IO error")]
Io {
#[error(std_err)]
source: std::io::Error,
},
/// Message was larger than the configured maximum message size
#[error("message too large")]
TooLarge {},
}
/// Read a length-prefixed frame and decode with postcard.
pub async fn read_frame<T: DeserializeOwned>(
reader: &mut RecvStream,
buffer: &mut BytesMut,
max_message_size: usize,
) -> Result<Option<T>, ReadError> {
match read_lp(reader, buffer, max_message_size).await? {
None => Ok(None),
Some(data) => {
let message = postcard::from_bytes(&data)?;
Ok(Some(message))
}
}
}
/// Reads a length prefixed buffer.
///
/// Returns the frame as raw bytes. If the end of the stream is reached before
/// the frame length starts, `None` is returned.
pub async fn read_lp(
reader: &mut RecvStream,
buffer: &mut BytesMut,
max_message_size: usize,
) -> Result<Option<Bytes>, ReadError> {
let size = match reader.read_u32().await {
Ok(size) => size,
Err(err) if err.kind() == io::ErrorKind::UnexpectedEof => return Ok(None),
Err(err) => return Err(err.into()),
};
let size = usize::try_from(size).map_err(|_| e!(ReadError::TooLarge))?;
if size > max_message_size {
return Err(e!(ReadError::TooLarge));
}
buffer.resize(size, 0u8);
reader
.read_exact(&mut buffer[..])
.await
.map_err(io::Error::other)?;
Ok(Some(buffer.split_to(size).freeze()))
}
/// Writes a length-prefixed frame.
pub async fn write_frame<T: Serialize>(
stream: &mut SendStream,
message: &T,
buffer: &mut Vec<u8>,
max_message_size: usize,
) -> Result<(), WriteError> {
let len = postcard::experimental::serialized_size(&message)?;
if len >= max_message_size {
return Err(e!(WriteError::TooLarge));
}
buffer.clear();
buffer.resize(len, 0u8);
let slice = postcard::to_slice(&message, buffer)?;
stream.write_u32(len as u32).await?;
stream.write_all(slice).await.map_err(io::Error::other)?;
Ok(())
}
/// A [`TimerMap`] with an async method to wait for the next timer expiration.
#[derive(Debug)]
pub struct Timers<T> {
map: TimerMap<T>,
}
impl<T> Default for Timers<T> {
fn default() -> Self {
Self {
map: TimerMap::default(),
}
}
}
impl<T> Timers<T> {
/// Creates a new timer map.
pub fn new() -> Self {
Self::default()
}
/// Inserts a new entry at the specified instant
pub fn insert(&mut self, instant: Instant, item: T) {
self.map.insert(instant, item);
}
/// Waits for the next timer to elapse.
pub async fn wait_next(&mut self) -> Instant {
match self.map.first() {
None => std::future::pending::<Instant>().await,
Some(instant) => {
sleep_until(*instant).await;
*instant
}
}
}
/// Pops the earliest timer that expires at or before `now`.
pub fn pop_before(&mut self, now: Instant) -> Option<(Instant, T)> {
self.map.pop_before(now)
}
}

View file

@ -0,0 +1,344 @@
//! Implementation of the iroh-gossip protocol, as an IO-less state machine
//!
//! This module implements the iroh-gossip protocol. The entry point is [`State`], which contains
//! the protocol state for a node.
//!
//! The iroh-gossip protocol is made up from two parts: A swarm membership protocol, based on
//! [HyParView][hyparview], and a gossip broadcasting protocol, based on [PlumTree][plumtree].
//!
//! For a full explanation it is recommended to read the two papers. What follows is a brief
//! outline of the protocols.
//!
//! All protocol messages are namespaced by a [`TopicId`], a 32 byte identifier. Topics are
//! separate swarms and broadcast scopes. The HyParView and PlumTree algorithms both work in the
//! scope of a single topic. Thus, joining multiple topics increases the number of open connections
//! to peers and the size of the local routing table.
//!
//! The **membership protocol** ([HyParView][hyparview]) is a cluster protocol where each peer
//! maintains a partial view of all nodes in the swarm.
//! A peer joins the swarm for a topic by connecting to any known peer that is a member of this
//! topic's swarm. Obtaining this initial contact info happens out of band. The peer then sends
//! a `Join` message to that initial peer. All peers maintain a list of
//! `active` and `passive` peers. Active peers are those that you maintain active connections to.
//! Passive peers is an addressbook of additional peers. If one of your active peers goes offline,
//! its slot is filled with a random peer from the passive set. In the default configuration, the
//! active view has a size of 5 and the passive view a size of 30.
//! The HyParView protocol ensures that active connections are always bidirectional, and regularly
//! exchanges nodes for the passive view in a `Shuffle` operation.
//! Thus, this protocol exposes a high degree of reliability and auto-recovery in the case of node
//! failures.
//!
//! The **gossip protocol** ([PlumTree][plumtree]) builds upon the membership protocol. It exposes
//! a method to broadcast messages to all peers in the swarm. On each node, it maintains two sets
//! of peers: An `eager` set and a `lazy` set. Both are subsets of the `active` view from the
//! membership protocol. When broadcasting a message from the local node, or upon receiving a
//! broadcast message, the message is pushed to all peers in the eager set. Additionally, the hash
//! of the message (which uniquely identifies it), but not the message content, is lazily pushed
//! to all peers in the `lazy` set. When receiving such lazy pushes (called `Ihaves`), those peers
//! may request the message content after a timeout if they didn't receive the message by one of
//! their eager peers before. When requesting a message from a currently-lazy peer, this peer is
//! also upgraded to be an eager peer from that moment on. This strategy self-optimizes the
//! messaging graph by latency. Note however that this optimization will work best if the messaging
//! paths are stable, i.e. if it's always the same peer that broadcasts. If not, the relative
//! message redundancy will grow and the ideal messaging graph might change frequently.
//!
//! [hyparview]: https://asc.di.fct.unl.pt/~jleitao/pdf/dsn07-leitao.pdf
//! [plumtree]: https://asc.di.fct.unl.pt/~jleitao/pdf/srds07-leitao.pdf
use std::{fmt, hash::Hash};
use bytes::Bytes;
use serde::{de::DeserializeOwned, Deserialize, Serialize};
mod hyparview;
mod plumtree;
pub mod state;
pub mod topic;
pub mod util;
#[cfg(any(test, feature = "test-utils"))]
pub mod sim;
pub use hyparview::Config as HyparviewConfig;
pub use plumtree::{Config as PlumtreeConfig, DeliveryScope, Scope};
pub use state::{InEvent, Message, OutEvent, State, Timer, TopicId};
pub use topic::{Command, Config, Event, IO};
/// The default maximum size in bytes for a gossip message.
/// This is a sane but arbitrary default and can be changed in the [`Config`].
pub const DEFAULT_MAX_MESSAGE_SIZE: usize = 4096;
/// The minimum allowed value for [`Config::max_message_size`].
pub const MIN_MAX_MESSAGE_SIZE: usize = 512;
/// The identifier for a peer.
///
/// The protocol implementation is generic over this trait. When implementing the protocol,
/// a concrete type must be chosen that will then be used throughout the implementation to identify
/// and index individual peers.
///
/// Note that the concrete type will be used in protocol messages. Therefore, implementations of
/// the protocol are only compatible if the same concrete type is supplied for this trait.
///
/// TODO: Rename to `PeerId`? It does not necessarily refer to a peer's address, as long as the
/// networking layer can translate the value of its concrete type into an address.
pub trait PeerIdentity: Hash + Eq + Ord + Copy + fmt::Debug + Serialize + DeserializeOwned {}
impl<T> PeerIdentity for T where
T: Hash + Eq + Ord + Copy + fmt::Debug + Serialize + DeserializeOwned
{
}
/// Opaque binary data that is transmitted on messages that introduce new peers.
///
/// Implementations may use these bytes to supply addresses or other information needed to connect
/// to a peer that is not included in the peer's [`PeerIdentity`].
#[derive(derive_more::Debug, Serialize, Deserialize, Clone, PartialEq, Eq, Default)]
#[debug("PeerData({}b)", self.0.len())]
pub struct PeerData(Bytes);
impl PeerData {
/// Create a new [`PeerData`] from a byte buffer.
pub fn new(data: impl Into<Bytes>) -> Self {
Self(data.into())
}
/// Get a reference to the contained [`bytes::Bytes`].
pub fn inner(&self) -> &bytes::Bytes {
&self.0
}
/// Get the peer data as a byte slice.
pub fn as_bytes(&self) -> &[u8] {
&self.0
}
}
/// PeerInfo contains a peer's identifier and the opaque peer data as provided by the implementer.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
struct PeerInfo<PI> {
pub id: PI,
pub data: Option<PeerData>,
}
impl<PI> From<(PI, Option<PeerData>)> for PeerInfo<PI> {
fn from((id, data): (PI, Option<PeerData>)) -> Self {
Self { id, data }
}
}
#[cfg(test)]
mod test {
use std::{collections::HashSet, env, fmt, str::FromStr};
use n0_tracing_test::traced_test;
use rand::SeedableRng;
use rand_chacha::ChaCha12Rng;
use super::{Command, Config, Event};
use crate::proto::{
sim::{LatencyConfig, Network, NetworkConfig},
Scope, TopicId,
};
#[test]
#[traced_test]
fn hyparview_smoke() {
// Create a network with 4 nodes and active_view_capacity 2
let rng = ChaCha12Rng::seed_from_u64(read_var("SEED", 0));
let mut config = Config::default();
config.membership.active_view_capacity = 2;
let network_config = NetworkConfig {
proto: config,
latency: LatencyConfig::default_static(),
};
let mut network = Network::new(network_config, rng);
for i in 0..4 {
network.insert(i);
}
let t: TopicId = [0u8; 32].into();
// Do some joins between nodes 0,1,2
network.command(0, t, Command::Join(vec![1, 2]));
network.command(1, t, Command::Join(vec![2]));
network.command(2, t, Command::Join(vec![]));
network.run_trips(3);
// Confirm emitted events
let actual = network.events_sorted();
let expected = sort(vec![
(0, t, Event::NeighborUp(1)),
(0, t, Event::NeighborUp(2)),
(1, t, Event::NeighborUp(2)),
(1, t, Event::NeighborUp(0)),
(2, t, Event::NeighborUp(0)),
(2, t, Event::NeighborUp(1)),
]);
assert_eq!(actual, expected);
// Confirm active connections
assert_eq!(network.conns(), vec![(0, 1), (0, 2), (1, 2)]);
// Now let node 3 join node 0.
// Node 0 is full, so it will disconnect from either node 1 or node 2.
network.command(3, t, Command::Join(vec![0]));
network.run_trips(2);
// Confirm emitted events. There's two options because whether node 0 disconnects from
// node 1 or node 2 is random.
let actual = network.events_sorted();
eprintln!("actual {actual:#?}");
let expected1 = sort(vec![
(3, t, Event::NeighborUp(0)),
(0, t, Event::NeighborUp(3)),
(0, t, Event::NeighborDown(1)),
(1, t, Event::NeighborDown(0)),
]);
let expected2 = sort(vec![
(3, t, Event::NeighborUp(0)),
(0, t, Event::NeighborUp(3)),
(0, t, Event::NeighborDown(2)),
(2, t, Event::NeighborDown(0)),
]);
assert!((actual == expected1) || (actual == expected2));
// Confirm active connections.
if actual == expected1 {
assert_eq!(network.conns(), vec![(0, 2), (0, 3), (1, 2)]);
} else {
assert_eq!(network.conns(), vec![(0, 1), (0, 3), (1, 2)]);
}
assert!(network.check_synchronicity());
}
#[test]
#[traced_test]
fn plumtree_smoke() {
let rng = ChaCha12Rng::seed_from_u64(read_var("SEED", 0));
let network_config = NetworkConfig {
proto: Config::default(),
latency: LatencyConfig::default_static(),
};
let mut network = Network::new(network_config, rng);
// build a network with 6 nodes
for i in 0..6 {
network.insert(i);
}
let t = [0u8; 32].into();
// let node 0 join the topic but do not connect to any peers
network.command(0, t, Command::Join(vec![]));
// connect nodes 1 and 2 to node 0
(1..3).for_each(|i| network.command(i, t, Command::Join(vec![0])));
// connect nodes 4 and 5 to node 3
network.command(3, t, Command::Join(vec![]));
(4..6).for_each(|i| network.command(i, t, Command::Join(vec![3])));
// run ticks and drain events
network.run_trips(4);
let _ = network.events();
assert!(network.check_synchronicity());
// now broadcast a first message
network.command(
1,
t,
Command::Broadcast(b"hi1".to_vec().into(), Scope::Swarm),
);
network.run_trips(4);
let events = network.events();
let received = events.filter(|x| matches!(x, (_, _, Event::Received(_))));
// message should be received by two other nodes
assert_eq!(received.count(), 2);
assert!(network.check_synchronicity());
// now connect the two sections of the swarm
network.command(2, t, Command::Join(vec![5]));
network.run_trips(3);
let _ = network.events();
println!("{}", network.report());
// now broadcast again
network.command(
1,
t,
Command::Broadcast(b"hi2".to_vec().into(), Scope::Swarm),
);
network.run_trips(5);
let events = network.events();
let received = events.filter(|x| matches!(x, (_, _, Event::Received(_))));
// message should be received by all 5 other nodes
assert_eq!(received.count(), 5);
assert!(network.check_synchronicity());
println!("{}", network.report());
}
#[test]
#[traced_test]
fn quit() {
// Create a network with 4 nodes and active_view_capacity 2
let rng = ChaCha12Rng::seed_from_u64(read_var("SEED", 0));
let mut config = Config::default();
config.membership.active_view_capacity = 2;
let mut network = Network::new(config.into(), rng);
let num = 4;
for i in 0..num {
network.insert(i);
}
let t: TopicId = [0u8; 32].into();
// join all nodes
network.command(0, t, Command::Join(vec![]));
network.command(1, t, Command::Join(vec![0]));
network.command(2, t, Command::Join(vec![1]));
network.command(3, t, Command::Join(vec![2]));
network.run_trips(2);
// assert all peers appear in the connections
let all_conns: HashSet<u64> = HashSet::from_iter((0u64..4).flat_map(|p| {
network
.neighbors(&p, &t)
.into_iter()
.flat_map(|x| x.into_iter())
}));
assert_eq!(all_conns, HashSet::from_iter([0, 1, 2, 3]));
assert!(network.check_synchronicity());
// let node 3 leave the swarm
network.command(3, t, Command::Quit);
network.run_trips(4);
assert!(network.peer(&3).unwrap().state(&t).is_none());
// assert all peers without peer 3 appear in the connections
let all_conns: HashSet<u64> = HashSet::from_iter((0..num).flat_map(|p| {
network
.neighbors(&p, &t)
.into_iter()
.flat_map(|x| x.into_iter())
}));
assert_eq!(all_conns, HashSet::from_iter([0, 1, 2]));
assert!(network.check_synchronicity());
}
fn read_var<T: FromStr<Err: fmt::Display + fmt::Debug>>(name: &str, default: T) -> T {
env::var(name)
.map(|x| {
x.parse()
.unwrap_or_else(|_| panic!("Failed to parse environment variable {name}"))
})
.unwrap_or(default)
}
fn sort<T: Ord + Clone>(items: Vec<T>) -> Vec<T> {
let mut sorted = items;
sorted.sort();
sorted
}
}

View file

@ -0,0 +1,764 @@
//! Implementation of the HyParView membership protocol
//!
//! The implementation is based on [this paper][paper] by Joao Leitao, Jose Pereira, Luıs Rodrigues
//! and the [example implementation][impl] by Bartosz Sypytkowski
//!
//! [paper]: https://asc.di.fct.unl.pt/~jleitao/pdf/dsn07-leitao.pdf
//! [impl]: https://gist.github.com/Horusiath/84fac596101b197da0546d1697580d99
use std::collections::{HashMap, HashSet};
use derive_more::{From, Sub};
use n0_future::time::Duration;
use rand::{rngs::ThreadRng, Rng};
use serde::{Deserialize, Serialize};
use tracing::debug;
use super::{util::IndexSet, PeerData, PeerIdentity, PeerInfo, IO};
/// Input event for HyParView
#[derive(Debug)]
pub enum InEvent<PI> {
/// A [`Message`] was received from a peer.
RecvMessage(PI, Message<PI>),
/// A timer has expired.
TimerExpired(Timer<PI>),
/// A peer was disconnected on the IO layer.
PeerDisconnected(PI),
/// Send a join request to a peer.
RequestJoin(PI),
/// Update the peer data that is transmitted on join requests.
UpdatePeerData(PeerData),
/// Quit the swarm, informing peers about us leaving.
Quit,
}
/// Output event for HyParView
#[derive(Debug)]
pub enum OutEvent<PI> {
/// Ask the IO layer to send a [`Message`] to peer `PI`.
SendMessage(PI, Message<PI>),
/// Schedule a [`Timer`].
ScheduleTimer(Duration, Timer<PI>),
/// Ask the IO layer to close the connection to peer `PI`.
DisconnectPeer(PI),
/// Emit an [`Event`] to the application.
EmitEvent(Event<PI>),
/// New [`PeerData`] was received for peer `PI`.
PeerData(PI, PeerData),
}
/// Event emitted by the [`State`] to the application.
#[derive(Clone, Debug)]
pub enum Event<PI> {
/// A peer was added to our set of active connections.
NeighborUp(PI),
/// A peer was removed from our set of active connections.
NeighborDown(PI),
}
/// Kinds of timers HyParView needs to schedule.
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Timer<PI> {
DoShuffle,
PendingNeighborRequest(PI),
}
/// Messages that we can send and receive from peers within the topic.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub enum Message<PI> {
/// Sent to a peer if you want to join the swarm
Join(Option<PeerData>),
/// When receiving Join, ForwardJoin is forwarded to the peer's ActiveView to introduce the
/// new member.
ForwardJoin(ForwardJoin<PI>),
/// A shuffle request is sent occasionally to re-shuffle the PassiveView with contacts from
/// other peers.
Shuffle(Shuffle<PI>),
/// Peers reply to [`Message::Shuffle`] requests with a random peers from their active and
/// passive views.
ShuffleReply(ShuffleReply<PI>),
/// Request to add sender to an active view of recipient. If [`Neighbor::priority`] is
/// [`Priority::High`], the request cannot be denied.
Neighbor(Neighbor),
/// Request to disconnect from a peer.
/// If [`Disconnect::alive`] is true, the other peer is not shutting down, so it should be
/// added to the passive set.
Disconnect(Disconnect),
}
/// The time-to-live for this message.
///
/// Each time a message is forwarded, the `Ttl` is decreased by 1. If the `Ttl` reaches 0, it
/// should not be forwarded further.
#[derive(From, Sub, Eq, PartialEq, Clone, Debug, Copy, Serialize, Deserialize)]
pub struct Ttl(pub u16);
impl Ttl {
pub fn expired(&self) -> bool {
*self == Ttl(0)
}
pub fn next(&self) -> Ttl {
Ttl(self.0.saturating_sub(1))
}
}
/// A message informing other peers that a new peer joined the swarm for this topic.
///
/// Will be forwarded in a random walk until `ttl` reaches 0.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub struct ForwardJoin<PI> {
/// The peer that newly joined the swarm
peer: PeerInfo<PI>,
/// The time-to-live for this message
ttl: Ttl,
}
/// Shuffle messages are sent occasionally to shuffle our passive view with peers from other peer's
/// active and passive views.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub struct Shuffle<PI> {
/// The peer that initiated the shuffle request.
origin: PI,
/// A random subset of the active and passive peers of the `origin` peer.
nodes: Vec<PeerInfo<PI>>,
/// The time-to-live for this message.
ttl: Ttl,
}
/// Once a shuffle messages reaches a [`Ttl`] of 0, a peer replies with a `ShuffleReply`.
///
/// The reply is sent to the peer that initiated the shuffle and contains a subset of the active
/// and passive views of the peer at the end of the random walk.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub struct ShuffleReply<PI> {
/// A random subset of the active and passive peers of the peer sending the `ShuffleReply`.
nodes: Vec<PeerInfo<PI>>,
}
/// The priority of a `Join` message
///
/// This is `High` if the sender does not have any active peers, and `Low` otherwise.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub enum Priority {
/// High priority join that may not be denied.
///
/// A peer may only send high priority joins if it doesn't have any active peers at the moment.
High,
/// Low priority join that can be denied.
Low,
}
/// A neighbor message is sent after adding a peer to our active view to inform them that we are
/// now neighbors.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub struct Neighbor {
/// The priority of the `Join` or `ForwardJoin` message that triggered this neighbor request.
priority: Priority,
/// The user data of the peer sending this message.
data: Option<PeerData>,
}
/// Message sent when leaving the swarm or closing down to inform peers about us being gone.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub struct Disconnect {
/// Whether we are actually shutting down or closing the connection only because our limits are
/// reached.
alive: bool,
/// Obsolete field (kept in the struct to maintain wire compatibility).
_respond: bool,
}
/// Configuration for the swarm membership layer
#[derive(Clone, Debug, Serialize, Deserialize)]
#[serde(default)]
pub struct Config {
/// Number of peers to which active connections are maintained
pub active_view_capacity: usize,
/// Number of peers for which contact information is remembered,
/// but to which we are not actively connected to.
pub passive_view_capacity: usize,
/// Number of hops a `ForwardJoin` message is propagated until the new peer's info
/// is added to a peer's active view.
pub active_random_walk_length: Ttl,
/// Number of hops a `ForwardJoin` message is propagated until the new peer's info
/// is added to a peer's passive view.
pub passive_random_walk_length: Ttl,
/// Number of hops a `Shuffle` message is propagated until a peer replies to it.
pub shuffle_random_walk_length: Ttl,
/// Number of active peers to be included in a `Shuffle` request.
pub shuffle_active_view_count: usize,
/// Number of passive peers to be included in a `Shuffle` request.
pub shuffle_passive_view_count: usize,
/// Interval duration for shuffle requests
pub shuffle_interval: Duration,
/// Timeout after which a `Neighbor` request is considered failed
pub neighbor_request_timeout: Duration,
}
impl Default for Config {
/// Default values for the HyParView layer
fn default() -> Self {
Self {
// From the paper (p9)
active_view_capacity: 5,
// From the paper (p9)
passive_view_capacity: 30,
// From the paper (p9)
active_random_walk_length: Ttl(6),
// From the paper (p9)
passive_random_walk_length: Ttl(3),
// From the paper (p9)
shuffle_random_walk_length: Ttl(6),
// From the paper (p9)
shuffle_active_view_count: 3,
// From the paper (p9)
shuffle_passive_view_count: 4,
// Wild guess
shuffle_interval: Duration::from_secs(60),
// Wild guess
neighbor_request_timeout: Duration::from_millis(500),
}
}
}
#[derive(Default, Debug, Clone)]
pub struct Stats {
total_connections: usize,
}
/// The state of the HyParView protocol
#[derive(Debug)]
pub struct State<PI, RG = ThreadRng> {
/// Our peer identity
me: PI,
/// Our opaque user data to transmit to peers on join messages
me_data: Option<PeerData>,
/// The active view, i.e. peers we are connected to
pub(crate) active_view: IndexSet<PI>,
/// The passive view, i.e. peers we know about but are not connected to at the moment
pub(crate) passive_view: IndexSet<PI>,
/// Protocol configuration (cannot change at runtime)
config: Config,
/// Whether a shuffle timer is currently scheduled
shuffle_scheduled: bool,
/// Random number generator
rng: RG,
/// Statistics
pub(crate) stats: Stats,
/// The set of neighbor requests we sent out but did not yet receive a reply for
pending_neighbor_requests: HashSet<PI>,
/// The opaque user peer data we received for other peers
peer_data: HashMap<PI, PeerData>,
/// List of peers that are disconnecting, but which we want to keep in the passive set once the connection closes
alive_disconnect_peers: HashSet<PI>,
}
impl<PI, RG> State<PI, RG>
where
PI: PeerIdentity,
RG: Rng,
{
pub fn new(me: PI, me_data: Option<PeerData>, config: Config, rng: RG) -> Self {
Self {
me,
me_data,
active_view: IndexSet::new(),
passive_view: IndexSet::new(),
config,
shuffle_scheduled: false,
rng,
stats: Stats::default(),
pending_neighbor_requests: Default::default(),
peer_data: Default::default(),
alive_disconnect_peers: Default::default(),
}
}
pub fn handle(&mut self, event: InEvent<PI>, io: &mut impl IO<PI>) {
match event {
InEvent::RecvMessage(from, message) => self.handle_message(from, message, io),
InEvent::TimerExpired(timer) => match timer {
Timer::DoShuffle => self.handle_shuffle_timer(io),
Timer::PendingNeighborRequest(peer) => self.handle_pending_neighbor_timer(peer, io),
},
InEvent::PeerDisconnected(peer) => self.handle_connection_closed(peer, io),
InEvent::RequestJoin(peer) => self.handle_join(peer, io),
InEvent::UpdatePeerData(data) => {
self.me_data = Some(data);
}
InEvent::Quit => self.handle_quit(io),
}
// this will only happen on the first call
if !self.shuffle_scheduled {
io.push(OutEvent::ScheduleTimer(
self.config.shuffle_interval,
Timer::DoShuffle,
));
self.shuffle_scheduled = true;
}
}
fn handle_message(&mut self, from: PI, message: Message<PI>, io: &mut impl IO<PI>) {
let is_disconnect = matches!(message, Message::Disconnect(Disconnect { .. }));
if !is_disconnect && !self.active_view.contains(&from) {
self.stats.total_connections += 1;
}
match message {
Message::Join(data) => self.on_join(from, data, io),
Message::ForwardJoin(details) => self.on_forward_join(from, details, io),
Message::Shuffle(details) => self.on_shuffle(from, details, io),
Message::ShuffleReply(details) => self.on_shuffle_reply(details, io),
Message::Neighbor(details) => self.on_neighbor(from, details, io),
Message::Disconnect(details) => self.on_disconnect(from, details, io),
}
// Disconnect from passive nodes right after receiving a message.
// TODO(frando): I'm not sure anymore that this is correct. Maybe remove?
if !is_disconnect && !self.active_view.contains(&from) {
io.push(OutEvent::DisconnectPeer(from));
}
}
fn handle_join(&mut self, peer: PI, io: &mut impl IO<PI>) {
io.push(OutEvent::SendMessage(
peer,
Message::Join(self.me_data.clone()),
));
}
/// We received a disconnect message.
fn on_disconnect(&mut self, peer: PI, details: Disconnect, io: &mut impl IO<PI>) {
self.pending_neighbor_requests.remove(&peer);
if self.active_view.contains(&peer) {
self.remove_active(
&peer,
RemovalReason::DisconnectReceived {
is_alive: details.alive,
},
io,
);
} else if details.alive && self.passive_view.contains(&peer) {
self.alive_disconnect_peers.insert(peer);
}
}
/// A connection was closed by the peer.
fn handle_connection_closed(&mut self, peer: PI, io: &mut impl IO<PI>) {
self.pending_neighbor_requests.remove(&peer);
if self.active_view.contains(&peer) {
self.remove_active(&peer, RemovalReason::ConnectionClosed, io);
} else if !self.alive_disconnect_peers.remove(&peer) {
self.passive_view.remove(&peer);
self.peer_data.remove(&peer);
}
}
fn handle_quit(&mut self, io: &mut impl IO<PI>) {
for peer in self.active_view.clone().into_iter() {
self.active_view.remove(&peer);
self.send_disconnect(peer, false, io);
}
}
fn send_disconnect(&mut self, peer: PI, alive: bool, io: &mut impl IO<PI>) {
// Before disconnecting, send a `ShuffleReply` with some of our nodes to
// prevent the other node from running out of connections. This is especially
// relevant if the other node just joined the swarm.
self.send_shuffle_reply(
peer,
self.config.shuffle_active_view_count + self.config.shuffle_passive_view_count,
io,
);
let message = Message::Disconnect(Disconnect {
alive,
_respond: false,
});
io.push(OutEvent::SendMessage(peer, message));
io.push(OutEvent::DisconnectPeer(peer));
}
fn on_join(&mut self, peer: PI, data: Option<PeerData>, io: &mut impl IO<PI>) {
// "A node that receives a join request will start by adding the new
// node to its active view, even if it has to drop a random node from it. (6)"
self.add_active(peer, data.clone(), Priority::High, true, io);
// "The contact node c will then send to all other nodes in its active view a ForwardJoin
// request containing the new node identifier. Associated to the join procedure,
// there are two configuration parameters, named Active Random Walk Length (ARWL),
// that specifies the maximum number of hops a ForwardJoin request is propagated,
// and Passive Random Walk Length (PRWL), that specifies at which point in the walk the node
// is inserted in a passive view. To use these parameters, the ForwardJoin request carries
// a “time to live” field that is initially set to ARWL and decreased at every hop. (7)"
let ttl = self.config.active_random_walk_length;
let peer_info = PeerInfo { id: peer, data };
for node in self.active_view.iter_without(&peer) {
let message = Message::ForwardJoin(ForwardJoin {
peer: peer_info.clone(),
ttl,
});
io.push(OutEvent::SendMessage(*node, message));
}
}
fn on_forward_join(&mut self, sender: PI, message: ForwardJoin<PI>, io: &mut impl IO<PI>) {
let peer_id = message.peer.id;
// If the peer is already in our active view, we renew our neighbor relationship.
if self.active_view.contains(&peer_id) {
self.insert_peer_info(message.peer, io);
self.send_neighbor(peer_id, Priority::High, io);
}
// "i) If the time to live is equal to zero or if the number of nodes in ps active view is equal to one,
// it will add the new node to its active view (7)"
else if message.ttl.expired() || self.active_view.len() <= 1 {
self.insert_peer_info(message.peer, io);
// Modification from paper: Instead of adding the peer directly to our active view,
// we only send the Neighbor message. We will add the peer to our active view once we receive a
// reply from our neighbor.
// This prevents us adding unreachable peers to our active view.
self.send_neighbor(peer_id, Priority::High, io);
} else {
// "ii) If the time to live is equal to PRWL, p will insert the new node into its passive view"
if message.ttl == self.config.passive_random_walk_length {
self.add_passive(peer_id, message.peer.data.clone(), io);
}
// "iii) The time to live field is decremented."
// "iv) If, at this point, n has not been inserted
// in ps active view, p will forward the request to a random node in its active view
// (different from the one from which the request was received)."
if !self.active_view.contains(&peer_id)
&& !self.pending_neighbor_requests.contains(&peer_id)
{
match self
.active_view
.pick_random_without(&[&sender], &mut self.rng)
{
None => {
unreachable!("if the peer was not added, there are at least two peers in our active view.");
}
Some(next) => {
let message = Message::ForwardJoin(ForwardJoin {
peer: message.peer,
ttl: message.ttl.next(),
});
io.push(OutEvent::SendMessage(*next, message));
}
}
}
}
}
fn on_neighbor(&mut self, from: PI, details: Neighbor, io: &mut impl IO<PI>) {
let is_reply = self.pending_neighbor_requests.remove(&from);
let do_reply = !is_reply;
// "A node q that receives a high priority neighbor request will always accept the request, even
// if it has to drop a random member from its active view (again, the member that is dropped will
// receive a Disconnect notification). If a node q receives a low priority Neighbor request, it will
// only accept the request if it has a free slot in its active view, otherwise it will refuse the request."
if !self.add_active(from, details.data, details.priority, do_reply, io) {
self.send_disconnect(from, true, io);
}
}
/// Get the peer [`PeerInfo`] for a peer.
fn peer_info(&self, id: &PI) -> PeerInfo<PI> {
let data = self.peer_data.get(id).cloned();
PeerInfo { id: *id, data }
}
fn insert_peer_info(&mut self, peer_info: PeerInfo<PI>, io: &mut impl IO<PI>) {
if let Some(data) = peer_info.data {
let old = self.peer_data.remove(&peer_info.id);
let same = matches!(old, Some(old) if old == data);
if !same && !data.0.is_empty() {
io.push(OutEvent::PeerData(peer_info.id, data.clone()));
}
self.peer_data.insert(peer_info.id, data);
}
}
/// Handle a [`Message::Shuffle`]
///
/// > A node q that receives a Shuffle request will first decrease its time to live. If the time
/// > to live of the message is greater than zero and the number of nodes in qs active view is
/// > greater than 1, the node will select a random node from its active view, different from the
/// > one he received this shuffle message from, and simply forwards the Shuffle request.
/// > Otherwise, node q accepts the Shuffle request and send back (p.8)
fn on_shuffle(&mut self, from: PI, shuffle: Shuffle<PI>, io: &mut impl IO<PI>) {
if shuffle.ttl.expired() || self.active_view.len() <= 1 {
let len = shuffle.nodes.len();
for node in shuffle.nodes {
self.add_passive(node.id, node.data, io);
}
self.send_shuffle_reply(shuffle.origin, len, io);
} else if let Some(node) = self
.active_view
.pick_random_without(&[&shuffle.origin, &from], &mut self.rng)
{
let message = Message::Shuffle(Shuffle {
origin: shuffle.origin,
nodes: shuffle.nodes,
ttl: shuffle.ttl.next(),
});
io.push(OutEvent::SendMessage(*node, message));
}
}
fn send_shuffle_reply(&mut self, to: PI, len: usize, io: &mut impl IO<PI>) {
let mut nodes = self.passive_view.shuffled_and_capped(len, &mut self.rng);
// If we don't have enough passive nodes for the expected length, we fill with
// active nodes.
if nodes.len() < len {
nodes.extend(
self.active_view
.shuffled_and_capped(len - nodes.len(), &mut self.rng),
);
}
let nodes = nodes.into_iter().map(|id| self.peer_info(&id));
let message = Message::ShuffleReply(ShuffleReply {
nodes: nodes.collect(),
});
io.push(OutEvent::SendMessage(to, message));
}
fn on_shuffle_reply(&mut self, message: ShuffleReply<PI>, io: &mut impl IO<PI>) {
for node in message.nodes {
self.add_passive(node.id, node.data, io);
}
self.refill_active_from_passive(&[], io);
}
fn handle_shuffle_timer(&mut self, io: &mut impl IO<PI>) {
if let Some(node) = self.active_view.pick_random(&mut self.rng) {
let active = self.active_view.shuffled_without_and_capped(
&[node],
self.config.shuffle_active_view_count,
&mut self.rng,
);
let passive = self.passive_view.shuffled_without_and_capped(
&[node],
self.config.shuffle_passive_view_count,
&mut self.rng,
);
let nodes = active
.iter()
.chain(passive.iter())
.map(|id| self.peer_info(id));
let me = PeerInfo {
id: self.me,
data: self.me_data.clone(),
};
let nodes = nodes.chain([me]);
let message = Shuffle {
origin: self.me,
nodes: nodes.collect(),
ttl: self.config.shuffle_random_walk_length,
};
io.push(OutEvent::SendMessage(*node, Message::Shuffle(message)));
}
io.push(OutEvent::ScheduleTimer(
self.config.shuffle_interval,
Timer::DoShuffle,
));
}
fn passive_is_full(&self) -> bool {
self.passive_view.len() >= self.config.passive_view_capacity
}
fn active_is_full(&self) -> bool {
self.active_view.len() >= self.config.active_view_capacity
}
/// Add a peer to the passive view.
///
/// If the passive view is full, it will first remove a random peer and then insert the new peer.
/// If a peer is currently in the active view it will not be added.
fn add_passive(&mut self, peer: PI, data: Option<PeerData>, io: &mut impl IO<PI>) {
self.insert_peer_info((peer, data).into(), io);
if self.active_view.contains(&peer) || self.passive_view.contains(&peer) || peer == self.me
{
return;
}
if self.passive_is_full() {
self.passive_view.remove_random(&mut self.rng);
}
self.passive_view.insert(peer);
}
/// Remove a peer from the active view.
///
/// If `reason` is [`RemovalReason::Random`], a [`Disconnect`] message will be sent to the peer.
fn remove_active(&mut self, peer: &PI, reason: RemovalReason, io: &mut impl IO<PI>) {
if let Some(idx) = self.active_view.get_index_of(peer) {
let removed_peer = self.remove_active_by_index(idx, reason, io).unwrap();
self.refill_active_from_passive(&[&removed_peer], io);
}
}
fn refill_active_from_passive(&mut self, skip_peers: &[&PI], io: &mut impl IO<PI>) {
if self.active_view.len() + self.pending_neighbor_requests.len()
>= self.config.active_view_capacity
{
return;
}
// "When a node p suspects that one of the nodes present in its active view has failed
// (by either disconnecting or blocking), it selects a random node q from its passive view and
// attempts to establish a TCP connection with q. If the connection fails to establish,
// node q is considered failed and removed from ps passive view; another node q is selected
// at random and a new attempt is made. The procedure is repeated until a connection is established
// with success." (p7)
let mut skip_peers = skip_peers.to_vec();
skip_peers.extend(self.pending_neighbor_requests.iter());
if let Some(node) = self
.passive_view
.pick_random_without(&skip_peers, &mut self.rng)
.copied()
{
let priority = match self.active_view.is_empty() {
true => Priority::High,
false => Priority::Low,
};
self.send_neighbor(node, priority, io);
// schedule a timer that checks if the node replied with a neighbor message,
// otherwise try again with another passive node.
io.push(OutEvent::ScheduleTimer(
self.config.neighbor_request_timeout,
Timer::PendingNeighborRequest(node),
));
};
}
fn handle_pending_neighbor_timer(&mut self, peer: PI, io: &mut impl IO<PI>) {
if self.pending_neighbor_requests.remove(&peer) {
self.passive_view.remove(&peer);
self.refill_active_from_passive(&[], io);
}
}
fn remove_active_by_index(
&mut self,
peer_index: usize,
reason: RemovalReason,
io: &mut impl IO<PI>,
) -> Option<PI> {
if let Some(peer) = self.active_view.remove_index(peer_index) {
io.push(OutEvent::EmitEvent(Event::NeighborDown(peer)));
match reason {
// send a disconnect message, then close connection.
RemovalReason::Random => self.send_disconnect(peer, true, io),
// close connection without sending anything further.
RemovalReason::DisconnectReceived { is_alive: _ } => {
io.push(OutEvent::DisconnectPeer(peer))
}
RemovalReason::ConnectionClosed => io.push(OutEvent::DisconnectPeer(peer)),
}
let keep_as_passive = match reason {
// keep alive if previously marked as alive.
RemovalReason::ConnectionClosed => self.alive_disconnect_peers.remove(&peer),
// keep alive if other peer said to be still alive.
RemovalReason::DisconnectReceived { is_alive } => is_alive,
// keep alive (only we are removing for now)
RemovalReason::Random => true,
};
if keep_as_passive {
let data = self.peer_data.remove(&peer);
self.add_passive(peer, data, io);
// mark peer as alive, so it doesn't get removed from the passive view if the conn closes.
if !matches!(reason, RemovalReason::ConnectionClosed) {
self.alive_disconnect_peers.insert(peer);
}
}
debug!(other = ?peer, "removed from active view, reason: {reason:?}");
Some(peer)
} else {
None
}
}
/// Remove a random peer from the active view.
fn free_random_slot_in_active_view(&mut self, io: &mut impl IO<PI>) {
if let Some(index) = self.active_view.pick_random_index(&mut self.rng) {
self.remove_active_by_index(index, RemovalReason::Random, io);
}
}
/// Add a peer to the active view.
///
/// If the active view is currently full, a random peer will be removed first.
/// Sends a Neighbor message to the peer. If high_priority is true, the peer
/// may not deny the Neighbor request.
fn add_active(
&mut self,
peer: PI,
data: Option<PeerData>,
priority: Priority,
reply: bool,
io: &mut impl IO<PI>,
) -> bool {
if peer == self.me {
return false;
}
self.insert_peer_info((peer, data).into(), io);
if self.active_view.contains(&peer) {
if reply {
self.send_neighbor(peer, priority, io);
}
return true;
}
match (priority, self.active_is_full()) {
(Priority::High, is_full) => {
if is_full {
self.free_random_slot_in_active_view(io);
}
self.add_active_unchecked(peer, Priority::High, reply, io);
true
}
(Priority::Low, false) => {
self.add_active_unchecked(peer, Priority::Low, reply, io);
true
}
(Priority::Low, true) => false,
}
}
fn add_active_unchecked(
&mut self,
peer: PI,
priority: Priority,
reply: bool,
io: &mut impl IO<PI>,
) {
self.passive_view.remove(&peer);
if self.active_view.insert(peer) {
debug!(other = ?peer, "add to active view");
io.push(OutEvent::EmitEvent(Event::NeighborUp(peer)));
if reply {
self.send_neighbor(peer, priority, io);
}
}
}
fn send_neighbor(&mut self, peer: PI, priority: Priority, io: &mut impl IO<PI>) {
if self.pending_neighbor_requests.insert(peer) {
let message = Message::Neighbor(Neighbor {
priority,
data: self.me_data.clone(),
});
io.push(OutEvent::SendMessage(peer, message));
}
}
}
#[derive(Debug)]
enum RemovalReason {
/// A peer is removed because the connection was closed ungracefully.
ConnectionClosed,
/// A peer is removed because we received a disconnect message.
DisconnectReceived { is_alive: bool },
/// A peer is removed after random selection to make room for a newly joined peer.
Random,
}

View file

@ -0,0 +1,909 @@
//! Implementation of the Plumtree epidemic broadcast tree protocol
//!
//! The implementation is based on [this paper][paper] by Joao Leitao, Jose Pereira, Luıs Rodrigues
//! and the [example implementation][impl] by Bartosz Sypytkowski
//!
//! [paper]: https://asc.di.fct.unl.pt/~jleitao/pdf/srds07-leitao.pdf
//! [impl]: https://gist.github.com/Horusiath/84fac596101b197da0546d1697580d99
use std::{
collections::{BTreeMap, BTreeSet, HashMap, HashSet, VecDeque},
hash::Hash,
};
use bytes::Bytes;
use derive_more::{Add, From, Sub};
use n0_future::time::{Duration, Instant};
use postcard::experimental::max_size::MaxSize;
use serde::{Deserialize, Serialize};
use tracing::{debug, warn};
use super::{
util::{idbytes_impls, TimeBoundCache},
PeerIdentity, IO,
};
/// A message identifier, which is the message content's blake3 hash.
#[derive(Serialize, Deserialize, Clone, Hash, Copy, PartialEq, Eq, MaxSize)]
pub struct MessageId([u8; 32]);
idbytes_impls!(MessageId, "MessageId");
impl MessageId {
/// Create a `[MessageId]` by hashing the message content.
///
/// This hashes the input with [`blake3::hash`].
pub fn from_content(message: &[u8]) -> Self {
Self::from(blake3::hash(message))
}
}
/// Events Plumtree is informed of from the peer sampling service and IO layer.
#[derive(Debug)]
pub enum InEvent<PI> {
/// A [`Message`] was received from the peer.
RecvMessage(PI, Message),
/// Broadcast the contained payload to the given scope.
Broadcast(Bytes, Scope),
/// A timer has expired.
TimerExpired(Timer),
/// New member `PI` has joined the topic.
NeighborUp(PI),
/// Peer `PI` has disconnected from the topic.
NeighborDown(PI),
}
/// Events Plumtree emits.
#[derive(Debug, PartialEq, Eq)]
pub enum OutEvent<PI> {
/// Ask the IO layer to send a [`Message`] to peer `PI`.
SendMessage(PI, Message),
/// Schedule a [`Timer`].
ScheduleTimer(Duration, Timer),
/// Emit an [`Event`] to the application.
EmitEvent(Event<PI>),
}
/// Kinds of timers Plumtree needs to schedule.
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum Timer {
/// Request the content for [`MessageId`] by sending [`Message::Graft`].
///
/// The message will be sent to a peer that sent us an [`Message::IHave`] for this [`MessageId`],
/// which will send us the message content in reply and also move the peer into the eager set.
/// Will be a no-op if the message for [`MessageId`] was already received from another peer by now.
SendGraft(MessageId),
/// Dispatch the [`Message::IHave`] in our lazy push queue.
DispatchLazyPush,
/// Evict the message cache
EvictCache,
}
/// Event emitted by the [`State`] to the application.
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Event<PI> {
/// A new gossip message was received.
Received(GossipEvent<PI>),
}
#[derive(Clone, derive_more::Debug, PartialEq, Eq, Ord, PartialOrd, Serialize, Deserialize)]
pub struct GossipEvent<PI> {
/// The content of the gossip message.
#[debug("<{}b>", content.len())]
pub content: Bytes,
/// The peer that we received the gossip message from. Note that this is not the peer that
/// originally broadcasted the message, but the peer before us in the gossiping path.
pub delivered_from: PI,
/// The broadcast scope of the message.
pub scope: DeliveryScope,
}
impl<PI> GossipEvent<PI> {
fn from_message(message: &Gossip, from: PI) -> Self {
Self {
content: message.content.clone(),
scope: message.scope,
delivered_from: from,
}
}
}
/// Number of delivery hops a message has taken.
#[derive(
From,
Add,
Sub,
Serialize,
Deserialize,
Eq,
PartialEq,
PartialOrd,
Ord,
Clone,
Copy,
Debug,
Hash,
MaxSize,
)]
pub struct Round(u16);
impl Round {
pub fn next(&self) -> Round {
Round(self.0 + 1)
}
}
/// Messages that we can send and receive from peers within the topic.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub enum Message {
/// When receiving Gossip, emit as event and forward full message to eager peer and (after a
/// delay) message IDs to lazy peers.
Gossip(Gossip),
/// When receiving Prune, move the peer from the eager to the lazy set.
Prune,
/// When receiving Graft, move the peer to the eager set and send the full content for the
/// included message ID.
Graft(Graft),
/// When receiving IHave, do nothing initially, and request the messages for the included
/// message IDs after some time if they aren't pushed eagerly to us.
IHave(Vec<IHave>),
}
/// Payload messages transmitted by the protocol.
#[derive(Serialize, Deserialize, Clone, derive_more::Debug, PartialEq, Eq)]
pub struct Gossip {
/// Id of the message.
id: MessageId,
/// Message contents.
#[debug("<{}b>", content.len())]
content: Bytes,
/// Scope to broadcast to.
scope: DeliveryScope,
}
impl Gossip {
fn round(&self) -> Option<Round> {
match self.scope {
DeliveryScope::Swarm(round) => Some(round),
DeliveryScope::Neighbors => None,
}
}
}
/// The scope to deliver the message to.
#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, Ord, PartialOrd, Copy)]
pub enum DeliveryScope {
/// This message was received from the swarm, with a distance (in hops) travelled from the
/// original broadcaster.
Swarm(Round),
/// This message was received from a direct neighbor that broadcasted the message to neighbors
/// only.
Neighbors,
}
impl DeliveryScope {
/// Whether this message was directly received from its publisher.
pub fn is_direct(&self) -> bool {
matches!(self, Self::Neighbors | Self::Swarm(Round(0)))
}
}
/// The broadcast scope of a gossip message.
#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, Ord, PartialOrd, Copy)]
pub enum Scope {
/// The message is broadcast to all peers in the swarm.
Swarm,
/// The message is broadcast only to the immediate neighbors of a peer.
Neighbors,
}
impl Gossip {
/// Get a clone of this `Gossip` message and increase the delivery round by 1.
pub fn next_round(&self) -> Option<Gossip> {
match self.scope {
DeliveryScope::Neighbors => None,
DeliveryScope::Swarm(round) => Some(Gossip {
id: self.id,
content: self.content.clone(),
scope: DeliveryScope::Swarm(round.next()),
}),
}
}
/// Validate that the message id is the blake3 hash of the message content.
pub fn validate(&self) -> bool {
let expected = MessageId::from_content(&self.content);
expected == self.id
}
}
/// Control message to inform peers we have a message without transmitting the whole payload.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq, MaxSize)]
pub struct IHave {
/// Id of the message.
pub(crate) id: MessageId,
/// Delivery round of the message.
pub(crate) round: Round,
}
/// Control message to signal a peer that they have been moved to the eager set, and to ask the
/// peer to do the same with this node.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub struct Graft {
/// Message id that triggers the graft, if any.
/// On receiving a graft, the payload message must be sent in reply if a message id is set.
id: Option<MessageId>,
/// Delivery round of the [`Message::IHave`] that triggered this Graft message.
round: Round,
}
/// Configuration for the gossip broadcast layer.
///
/// Currently, the expectation is that the configuration is the same for all peers in the
/// network (as recommended in the paper).
#[derive(Clone, Debug, Serialize, Deserialize)]
#[serde(default)]
pub struct Config {
/// When receiving an `IHave` message, this timeout is registered. If the message for the
/// `IHave` was not received once the timeout is expired, a `Graft` message is sent to the
/// peer that sent us the `IHave` to request the message payload.
///
/// The plumtree paper notes:
/// > The timeout value is a protocol parameter that should be configured considering the
/// > diameter of the overlay and a target maximum recovery latency, defined by the application
/// > requirements. (p.8)
pub graft_timeout_1: Duration,
/// This timeout is registered when sending a `Graft` message. If a reply has not been
/// received once the timeout expires, we send another `Graft` message to the next peer that
/// sent us an `IHave` for this message.
///
/// The plumtree paper notes:
/// > This second timeout value should be smaller that the first, in the order of an average
/// > round trip time to a neighbor.
pub graft_timeout_2: Duration,
/// Timeout after which `IHave` messages are pushed to peers.
pub dispatch_timeout: Duration,
/// The protocol performs a tree optimization, which promotes lazy peers to eager peers if the
/// `Message::IHave` messages received from them have a lower number of hops from the
/// message's origin as the `InEvent::Broadcast` messages received from our eager peers. This
/// parameter is the number of hops that the lazy peers must be closer to the origin than our
/// eager peers to be promoted to become an eager peer.
pub optimization_threshold: Round,
/// Duration for which to keep gossip messages in the internal message cache.
///
/// Messages broadcast from this node or received from other nodes are kept in an internal
/// cache for this duration before being evicted. If this is too low, other nodes will not be
/// able to retrieve messages once they need them. If this is high, the cache will grow.
///
/// Should be at least around several round trip times to peers.
pub message_cache_retention: Duration,
/// Duration for which to keep the `MessageId`s for received messages.
///
/// Should be at least as long as [`Self::message_cache_retention`], usually will be longer to
/// not accidentally receive messages multiple times.
pub message_id_retention: Duration,
/// How often the internal caches will be checked for expired items.
pub cache_evict_interval: Duration,
}
impl Default for Config {
/// Sensible defaults for the plumtree configuration
//
// TODO: Find out what good defaults are for the three timeouts here. Current numbers are
// guesses that need validation. The paper does not have concrete recommendations for these
// numbers.
fn default() -> Self {
Self {
// Paper: "The timeout value is a protocol parameter that should be configured considering
// the diameter of the overlay and a target maximum recovery latency, defined by the
// application requirements. This is a parameter that should be statically configured
// at deployment time." (p. 8)
//
// Earthstar has 5ms it seems, see https://github.com/earthstar-project/earthstar/blob/1523c640fedf106f598bf79b184fb0ada64b1cc0/src/syncer/plum_tree.ts#L75
// However in the paper it is more like a few roundtrips if I read things correctly.
graft_timeout_1: Duration::from_millis(80),
// Paper: "This second timeout value should be smaller that the first, in the order of an
// average round trip time to a neighbor." (p. 9)
//
// Earthstar doesn't have this step from my reading.
graft_timeout_2: Duration::from_millis(40),
// Again, paper does not tell a recommended number here. Likely should be quite small,
// as to not delay messages without need. This would also be the time frame in which
// `IHave`s are aggregated to save on packets.
//
// Eartstar dispatches immediately from my reading.
dispatch_timeout: Duration::from_millis(5),
// This number comes from experiment settings the plumtree paper (p. 12)
optimization_threshold: Round(7),
// This is a certainly-high-enough value for usual operation.
message_cache_retention: Duration::from_secs(30),
message_id_retention: Duration::from_secs(90),
cache_evict_interval: Duration::from_secs(1),
}
}
}
/// Stats about this topic's plumtree.
#[derive(Debug, Default, Clone)]
pub struct Stats {
/// Number of payload messages received so far.
///
/// See [`Message::Gossip`].
pub payload_messages_received: u64,
/// Number of control messages received so far.
///
/// See [`Message::Prune`], [`Message::Graft`], [`Message::IHave`].
pub control_messages_received: u64,
/// Max round seen so far.
pub max_last_delivery_hop: u16,
}
/// State of the plumtree.
#[derive(Debug)]
pub struct State<PI> {
/// Our address.
me: PI,
/// Configuration for this plumtree.
config: Config,
/// Set of peers used for payload exchange.
pub(crate) eager_push_peers: BTreeSet<PI>,
/// Set of peers used for control message exchange.
pub(crate) lazy_push_peers: BTreeSet<PI>,
lazy_push_queue: BTreeMap<PI, Vec<IHave>>,
/// Messages for which a [`MessageId`] has been seen via a [`Message::IHave`] but we have not
/// yet received the full payload. For each, we store the peers that have claimed to have this
/// message.
missing_messages: HashMap<MessageId, VecDeque<(PI, Round)>>,
/// Messages for which the full payload has been seen.
received_messages: TimeBoundCache<MessageId, ()>,
/// Payloads of received messages.
cache: TimeBoundCache<MessageId, Gossip>,
/// Message ids for which a [`Timer::SendGraft`] has been scheduled.
graft_timer_scheduled: HashSet<MessageId>,
/// Whether a [`Timer::DispatchLazyPush`] has been scheduled.
dispatch_timer_scheduled: bool,
/// Set to false after the first message is received. Used for initial timer scheduling.
init: bool,
/// [`Stats`] of this plumtree.
pub(crate) stats: Stats,
max_message_size: usize,
}
impl<PI: PeerIdentity> State<PI> {
/// Initialize the [`State`] of a plumtree.
pub fn new(me: PI, config: Config, max_message_size: usize) -> Self {
Self {
me,
eager_push_peers: Default::default(),
lazy_push_peers: Default::default(),
lazy_push_queue: Default::default(),
config,
missing_messages: Default::default(),
received_messages: Default::default(),
graft_timer_scheduled: Default::default(),
dispatch_timer_scheduled: false,
cache: Default::default(),
init: false,
stats: Default::default(),
max_message_size,
}
}
/// Handle an [`InEvent`].
pub fn handle(&mut self, event: InEvent<PI>, now: Instant, io: &mut impl IO<PI>) {
if !self.init {
self.init = true;
self.on_evict_cache_timer(now, io)
}
match event {
InEvent::RecvMessage(from, message) => self.handle_message(from, message, now, io),
InEvent::Broadcast(data, scope) => self.broadcast(data, scope, now, io),
InEvent::NeighborUp(peer) => self.on_neighbor_up(peer),
InEvent::NeighborDown(peer) => self.on_neighbor_down(peer),
InEvent::TimerExpired(timer) => match timer {
Timer::DispatchLazyPush => self.on_dispatch_timer(io),
Timer::SendGraft(id) => {
self.on_send_graft_timer(id, io);
}
Timer::EvictCache => self.on_evict_cache_timer(now, io),
},
}
}
/// Get access to the [`Stats`] of the plumtree.
pub fn stats(&self) -> &Stats {
&self.stats
}
/// Handle receiving a [`Message`].
fn handle_message(&mut self, sender: PI, message: Message, now: Instant, io: &mut impl IO<PI>) {
if matches!(message, Message::Gossip(_)) {
self.stats.payload_messages_received += 1;
} else {
self.stats.control_messages_received += 1;
}
match message {
Message::Gossip(details) => self.on_gossip(sender, details, now, io),
Message::Prune => self.on_prune(sender),
Message::IHave(details) => self.on_ihave(sender, details, io),
Message::Graft(details) => self.on_graft(sender, details, io),
}
}
/// Dispatches messages from lazy queue over to lazy peers.
fn on_dispatch_timer(&mut self, io: &mut impl IO<PI>) {
let chunk_size = self.max_message_size
// Space for discriminator
- 1
// Space for length prefix
- 2;
let chunk_len = chunk_size / IHave::POSTCARD_MAX_SIZE;
while let Some((peer, list)) = self.lazy_push_queue.pop_first() {
for chunk in list.chunks(chunk_len) {
io.push(OutEvent::SendMessage(peer, Message::IHave(chunk.to_vec())));
}
}
self.dispatch_timer_scheduled = false;
}
/// Send a gossip message.
///
/// Will be pushed in full to eager peers.
/// Pushing the message id to the lazy peers is delayed by a timer.
fn broadcast(&mut self, content: Bytes, scope: Scope, now: Instant, io: &mut impl IO<PI>) {
let id = MessageId::from_content(&content);
let scope = match scope {
Scope::Neighbors => DeliveryScope::Neighbors,
Scope::Swarm => DeliveryScope::Swarm(Round(0)),
};
let message = Gossip { id, content, scope };
let me = self.me;
if let DeliveryScope::Swarm(_) = scope {
self.received_messages
.insert(id, (), now + self.config.message_id_retention);
self.cache.insert(
id,
message.clone(),
now + self.config.message_cache_retention,
);
self.lazy_push(message.clone(), &me, io);
}
self.eager_push(message.clone(), &me, io);
}
/// Handle receiving a [`Message::Gossip`].
fn on_gossip(&mut self, sender: PI, message: Gossip, now: Instant, io: &mut impl IO<PI>) {
// Validate that the message id is the blake3 hash of the message content.
if !message.validate() {
// TODO: Do we want to take any measures against the sender if we received a message
// with a spoofed message id?
warn!(
peer = ?sender,
"Received a message with spoofed message id ({})", message.id
);
return;
}
// if we already received this message: move peer to lazy set
// and notify peer about this.
if self.received_messages.contains_key(&message.id) {
self.add_lazy(sender);
io.push(OutEvent::SendMessage(sender, Message::Prune));
// otherwise store the message, emit to application and forward to peers
} else {
if let DeliveryScope::Swarm(prev_round) = message.scope {
// insert the message in the list of received messages
self.received_messages.insert(
message.id,
(),
now + self.config.message_id_retention,
);
// increase the round for forwarding the message, and add to cache
// to reply to Graft messages later
// TODO: add callback/event to application to get missing messages that were received before?
let message = message.next_round().expect("just checked");
self.cache.insert(
message.id,
message.clone(),
now + self.config.message_cache_retention,
);
// push the message to our peers
self.eager_push(message.clone(), &sender, io);
self.lazy_push(message.clone(), &sender, io);
// cleanup places where we track missing messages
self.graft_timer_scheduled.remove(&message.id);
let previous_ihaves = self.missing_messages.remove(&message.id);
// do the optimization step from the paper
if let Some(previous_ihaves) = previous_ihaves {
self.optimize_tree(&sender, &message, previous_ihaves, io);
}
self.stats.max_last_delivery_hop =
self.stats.max_last_delivery_hop.max(prev_round.0);
}
// emit event to application
io.push(OutEvent::EmitEvent(Event::Received(
GossipEvent::from_message(&message, sender),
)));
}
}
/// Optimize the tree by pruning the `sender` of a [`Message::Gossip`] if we previously
/// received a [`Message::IHave`] for the same message with a much lower number of delivery
/// hops from the original broadcaster of the message.
///
/// See [Config::optimization_threshold].
fn optimize_tree(
&mut self,
gossip_sender: &PI,
message: &Gossip,
previous_ihaves: VecDeque<(PI, Round)>,
io: &mut impl IO<PI>,
) {
let round = message.round().expect("only called for swarm messages");
let best_ihave = previous_ihaves
.iter()
.min_by(|(_a_peer, a_round), (_b_peer, b_round)| a_round.cmp(b_round))
.copied();
if let Some((ihave_peer, ihave_round)) = best_ihave {
if (ihave_round < round) && (round - ihave_round) >= self.config.optimization_threshold
{
// Graft the sender of the IHave, but only if it's not already eager.
if !self.eager_push_peers.contains(&ihave_peer) {
let message = Message::Graft(Graft {
id: None,
round: ihave_round,
});
self.add_eager(ihave_peer);
io.push(OutEvent::SendMessage(ihave_peer, message));
}
// Prune the sender of the Gossip.
self.add_lazy(*gossip_sender);
io.push(OutEvent::SendMessage(*gossip_sender, Message::Prune));
}
}
}
/// Handle receiving a [`Message::Prune`].
fn on_prune(&mut self, sender: PI) {
self.add_lazy(sender);
}
/// Handle receiving a [`Message::IHave`].
///
/// > When a node receives a IHAVE message, it simply marks the corresponding message as
/// > missing It then starts a timer, with a predefined timeout value, and waits for the missing
/// > message to be received via eager push before the timer expires. The timeout value is a
/// > protocol parameter that should be configured considering the diameter of the overlay and a
/// > target maximum recovery latency, defined by the application requirements. This is a
/// > parameter that should be statically configured at deployment time. (p8)
fn on_ihave(&mut self, sender: PI, ihaves: Vec<IHave>, io: &mut impl IO<PI>) {
for ihave in ihaves {
if !self.received_messages.contains_key(&ihave.id) {
self.missing_messages
.entry(ihave.id)
.or_default()
.push_back((sender, ihave.round));
if !self.graft_timer_scheduled.contains(&ihave.id) {
self.graft_timer_scheduled.insert(ihave.id);
io.push(OutEvent::ScheduleTimer(
self.config.graft_timeout_1,
Timer::SendGraft(ihave.id),
));
}
}
}
}
/// A scheduled [`Timer::SendGraft`] has reached it's deadline.
fn on_send_graft_timer(&mut self, id: MessageId, io: &mut impl IO<PI>) {
self.graft_timer_scheduled.remove(&id);
// if the message was received before the timer ran out, there is no need to request it
// again
if self.received_messages.contains_key(&id) {
return;
}
// get the first peer that advertised this message
let entry = self
.missing_messages
.get_mut(&id)
.and_then(|entries| entries.pop_front());
if let Some((peer, round)) = entry {
self.add_eager(peer);
let message = Message::Graft(Graft {
id: Some(id),
round,
});
io.push(OutEvent::SendMessage(peer, message));
// "when a GRAFT message is sent, another timer is started to expire after a certain timeout,
// to ensure that the message will be requested to another neighbor if it is not received
// meanwhile. This second timeout value should be smaller that the first, in the order of
// an average round trip time to a neighbor." (p9)
io.push(OutEvent::ScheduleTimer(
self.config.graft_timeout_2,
Timer::SendGraft(id),
));
}
}
/// Handle receiving a [`Message::Graft`].
fn on_graft(&mut self, sender: PI, details: Graft, io: &mut impl IO<PI>) {
self.add_eager(sender);
if let Some(id) = details.id {
if let Some(message) = self.cache.get(&id) {
io.push(OutEvent::SendMessage(
sender,
Message::Gossip(message.clone()),
));
} else {
debug!(?id, peer=?sender, "on_graft failed to graft: message not in cache");
}
}
}
/// Handle a [`InEvent::NeighborUp`] when a peer joins the topic.
fn on_neighbor_up(&mut self, peer: PI) {
self.add_eager(peer);
}
/// Handle a [`InEvent::NeighborDown`] when a peer leaves the topic.
/// > When a neighbor is detected to leave the overlay, it is simple removed from the
/// > membership. Furthermore, the record of IHAVE messages sent from failed members is deleted
/// > from the missing history. (p9)
fn on_neighbor_down(&mut self, peer: PI) {
self.missing_messages.retain(|_message_id, ihaves| {
ihaves.retain(|(ihave_peer, _round)| *ihave_peer != peer);
!ihaves.is_empty()
});
self.eager_push_peers.remove(&peer);
self.lazy_push_peers.remove(&peer);
}
fn on_evict_cache_timer(&mut self, now: Instant, io: &mut impl IO<PI>) {
self.cache.expire_until(now);
io.push(OutEvent::ScheduleTimer(
self.config.cache_evict_interval,
Timer::EvictCache,
));
}
/// Moves peer into eager set.
fn add_eager(&mut self, peer: PI) {
self.lazy_push_peers.remove(&peer);
self.eager_push_peers.insert(peer);
}
/// Moves peer into lazy set.
fn add_lazy(&mut self, peer: PI) {
self.eager_push_peers.remove(&peer);
self.lazy_push_peers.insert(peer);
}
/// Immediately sends message to eager peers.
fn eager_push(&mut self, gossip: Gossip, sender: &PI, io: &mut impl IO<PI>) {
for peer in self
.eager_push_peers
.iter()
.filter(|peer| **peer != self.me && *peer != sender)
{
io.push(OutEvent::SendMessage(
*peer,
Message::Gossip(gossip.clone()),
));
}
}
/// Queue lazy message announcements into the queue that will be sent out as batched
/// [`Message::IHave`] messages once the [`Timer::DispatchLazyPush`] timer is triggered.
fn lazy_push(&mut self, gossip: Gossip, sender: &PI, io: &mut impl IO<PI>) {
let Some(round) = gossip.round() else {
return;
};
for peer in self.lazy_push_peers.iter().filter(|x| *x != sender) {
self.lazy_push_queue.entry(*peer).or_default().push(IHave {
id: gossip.id,
round,
});
}
if !self.dispatch_timer_scheduled {
io.push(OutEvent::ScheduleTimer(
self.config.dispatch_timeout,
Timer::DispatchLazyPush,
));
self.dispatch_timer_scheduled = true;
}
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn optimize_tree() {
let mut io = VecDeque::new();
let config: Config = Default::default();
let mut state = State::new(1, config.clone(), 1024);
let now = Instant::now();
// we receive an IHave message from peer 2
// it has `round: 2` which means that the the peer that sent us the IHave was
// two hops away from the original sender of the message
let content: Bytes = b"hi".to_vec().into();
let id = MessageId::from_content(&content);
let event = InEvent::RecvMessage(
2u32,
Message::IHave(vec![IHave {
id,
round: Round(2),
}]),
);
state.handle(event, now, &mut io);
io.clear();
// we then receive a `Gossip` message with the same `MessageId` from peer 3
// the message has `round: 6`, which means it travelled 6 hops until it reached us
// this is less hops than to peer 2, but not enough to trigger the optimization
// because we use the default config which has `optimization_threshold: 7`
let event = InEvent::RecvMessage(
3,
Message::Gossip(Gossip {
id,
content: content.clone(),
scope: DeliveryScope::Swarm(Round(6)),
}),
);
state.handle(event, now, &mut io);
let expected = {
// we expect a dispatch timer schedule and receive event, but no Graft or Prune
// messages
let mut io = VecDeque::new();
io.push(OutEvent::ScheduleTimer(
config.dispatch_timeout,
Timer::DispatchLazyPush,
));
io.push(OutEvent::EmitEvent(Event::Received(GossipEvent {
content,
delivered_from: 3,
scope: DeliveryScope::Swarm(Round(6)),
})));
io
};
assert_eq!(io, expected);
io.clear();
// now we run the same flow again but this time peer 3 is 9 hops away from the message's
// sender. message's sender. this will trigger the optimization:
// peer 2 will be promoted to eager and peer 4 demoted to lazy
let content: Bytes = b"hi2".to_vec().into();
let id = MessageId::from_content(&content);
let event = InEvent::RecvMessage(
2u32,
Message::IHave(vec![IHave {
id,
round: Round(2),
}]),
);
state.handle(event, now, &mut io);
io.clear();
let event = InEvent::RecvMessage(
3,
Message::Gossip(Gossip {
id,
content: content.clone(),
scope: DeliveryScope::Swarm(Round(9)),
}),
);
state.handle(event, now, &mut io);
let expected = {
// this time we expect the Graft and Prune messages to be sent, performing the
// optimization step
let mut io = VecDeque::new();
io.push(OutEvent::SendMessage(
2,
Message::Graft(Graft {
id: None,
round: Round(2),
}),
));
io.push(OutEvent::SendMessage(3, Message::Prune));
io.push(OutEvent::EmitEvent(Event::Received(GossipEvent {
content,
delivered_from: 3,
scope: DeliveryScope::Swarm(Round(9)),
})));
io
};
assert_eq!(io, expected);
}
#[test]
fn spoofed_messages_are_ignored() {
let config: Config = Default::default();
let mut state = State::new(1, config.clone(), 1024);
let now = Instant::now();
// we recv a correct gossip message and expect the Received event to be emitted
let content: Bytes = b"hello1".to_vec().into();
let message = Message::Gossip(Gossip {
content: content.clone(),
id: MessageId::from_content(&content),
scope: DeliveryScope::Swarm(Round(1)),
});
let mut io = VecDeque::new();
state.handle(InEvent::RecvMessage(2, message), now, &mut io);
let expected = {
let mut io = VecDeque::new();
io.push(OutEvent::ScheduleTimer(
config.cache_evict_interval,
Timer::EvictCache,
));
io.push(OutEvent::ScheduleTimer(
config.dispatch_timeout,
Timer::DispatchLazyPush,
));
io.push(OutEvent::EmitEvent(Event::Received(GossipEvent {
content,
delivered_from: 2,
scope: DeliveryScope::Swarm(Round(1)),
})));
io
};
assert_eq!(io, expected);
// now we recv with a spoofed id and expect no event to be emitted
let content: Bytes = b"hello2".to_vec().into();
let message = Message::Gossip(Gossip {
content,
id: MessageId::from_content(b"foo"),
scope: DeliveryScope::Swarm(Round(1)),
});
let mut io = VecDeque::new();
state.handle(InEvent::RecvMessage(2, message), now, &mut io);
let expected = VecDeque::new();
assert_eq!(io, expected);
}
#[test]
fn cache_is_evicted() {
let config: Config = Default::default();
let mut state = State::new(1, config.clone(), 1024);
let now = Instant::now();
let content: Bytes = b"hello1".to_vec().into();
let message = Message::Gossip(Gossip {
content: content.clone(),
id: MessageId::from_content(&content),
scope: DeliveryScope::Swarm(Round(1)),
});
let mut io = VecDeque::new();
state.handle(InEvent::RecvMessage(2, message), now, &mut io);
assert_eq!(state.cache.len(), 1);
let now = now + Duration::from_secs(1);
state.handle(InEvent::TimerExpired(Timer::EvictCache), now, &mut io);
assert_eq!(state.cache.len(), 1);
let now = now + config.message_cache_retention;
state.handle(InEvent::TimerExpired(Timer::EvictCache), now, &mut io);
assert_eq!(state.cache.len(), 0);
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,381 @@
//! The protocol state of the `iroh-gossip` protocol.
use std::collections::{hash_map, HashMap, HashSet};
use n0_future::time::{Duration, Instant};
use rand::Rng;
use serde::{Deserialize, Serialize};
use tracing::trace;
use crate::{
metrics::Metrics,
proto::{
topic::{self, Command},
util::idbytes_impls,
Config, PeerData, PeerIdentity, MIN_MAX_MESSAGE_SIZE,
},
};
/// The identifier for a topic
#[derive(Clone, Copy, Eq, PartialEq, Hash, Serialize, Ord, PartialOrd, Deserialize)]
pub struct TopicId([u8; 32]);
idbytes_impls!(TopicId, "TopicId");
impl TopicId {
/// Convert to a hex string limited to the first 5 bytes for a friendly string
/// representation of the key.
pub fn fmt_short(&self) -> String {
data_encoding::HEXLOWER.encode(&self.as_bytes()[..5])
}
}
/// Protocol wire message
///
/// This is the wire frame of the `iroh-gossip` protocol.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct Message<PI> {
pub(crate) topic: TopicId,
pub(crate) message: topic::Message<PI>,
}
impl<PI> Message<PI> {
/// Get the kind of this message
pub fn kind(&self) -> MessageKind {
self.message.kind()
}
}
impl<PI: Serialize> Message<PI> {
pub(crate) fn postcard_header_size() -> usize {
// We create a message that has no payload (gossip::Message::Prune), calculate the encoded size,
// and subtract 1 for the discriminator of the inner gossip::Message enum.
let m = Self {
topic: TopicId(Default::default()),
message: topic::Message::<PI>::Gossip(super::plumtree::Message::Prune),
};
postcard::experimental::serialized_size(&m).unwrap() - 1
}
}
/// Whether this is a control or data message
#[derive(Debug)]
pub enum MessageKind {
/// A data message.
Data,
/// A control message.
Control,
}
impl<PI: Serialize> Message<PI> {
/// Get the encoded size of this message
pub fn size(&self) -> postcard::Result<usize> {
postcard::experimental::serialized_size(&self)
}
}
/// A timer to be registered into the runtime
///
/// As the implementation of the protocol is an IO-less state machine, registering timers does not
/// happen within the protocol implementation. Instead, these `Timer` structs are emitted as
/// [`OutEvent`]s. The implementer must register the timer in its runtime to be emitted on the specified [`Instant`],
/// and once triggered inject an [`InEvent::TimerExpired`] into the protocol state.
#[derive(Clone, Debug)]
pub struct Timer<PI> {
topic: TopicId,
timer: topic::Timer<PI>,
}
/// Input event to the protocol state.
#[derive(Clone, Debug)]
pub enum InEvent<PI> {
/// Message received from the network.
RecvMessage(PI, Message<PI>),
/// Execute a command from the application.
Command(TopicId, Command<PI>),
/// Trigger a previously scheduled timer.
TimerExpired(Timer<PI>),
/// Peer disconnected on the network level.
PeerDisconnected(PI),
/// Update the opaque peer data about yourself.
UpdatePeerData(PeerData),
}
/// Output event from the protocol state.
#[derive(Debug, Clone)]
pub enum OutEvent<PI> {
/// Send a message on the network
SendMessage(PI, Message<PI>),
/// Emit an event to the application.
EmitEvent(TopicId, topic::Event<PI>),
/// Schedule a timer. The runtime is responsible for sending an [InEvent::TimerExpired]
/// after the duration.
ScheduleTimer(Duration, Timer<PI>),
/// Close the connection to a peer on the network level.
DisconnectPeer(PI),
/// Updated peer data
PeerData(PI, PeerData),
}
type ConnsMap<PI> = HashMap<PI, HashSet<TopicId>>;
type Outbox<PI> = Vec<OutEvent<PI>>;
enum InEventMapped<PI> {
All(topic::InEvent<PI>),
TopicEvent(TopicId, topic::InEvent<PI>),
}
impl<PI> From<InEvent<PI>> for InEventMapped<PI> {
fn from(event: InEvent<PI>) -> InEventMapped<PI> {
match event {
InEvent::RecvMessage(from, Message { topic, message }) => {
Self::TopicEvent(topic, topic::InEvent::RecvMessage(from, message))
}
InEvent::Command(topic, command) => {
Self::TopicEvent(topic, topic::InEvent::Command(command))
}
InEvent::TimerExpired(Timer { topic, timer }) => {
Self::TopicEvent(topic, topic::InEvent::TimerExpired(timer))
}
InEvent::PeerDisconnected(peer) => Self::All(topic::InEvent::PeerDisconnected(peer)),
InEvent::UpdatePeerData(data) => Self::All(topic::InEvent::UpdatePeerData(data)),
}
}
}
/// The state of the `iroh-gossip` protocol.
///
/// The implementation works as an IO-less state machine. The implementer injects events through
/// [`Self::handle`], which returns an iterator of [`OutEvent`]s to be processed.
///
/// This struct contains a map of [`topic::State`] for each topic that was joined. It mostly acts as
/// a forwarder of [`InEvent`]s to matching topic state. Each topic's state is completely
/// independent; thus the actual protocol logic lives with [`topic::State`].
#[derive(Debug)]
pub struct State<PI, R> {
me: PI,
me_data: PeerData,
config: Config,
rng: R,
states: HashMap<TopicId, topic::State<PI, R>>,
outbox: Outbox<PI>,
peer_topics: ConnsMap<PI>,
}
impl<PI: PeerIdentity, R: Rng + Clone> State<PI, R> {
/// Create a new protocol state instance.
///
/// `me` is the [`PeerIdentity`] of the local node, `peer_data` is the initial [`PeerData`]
/// (which can be updated over time).
/// For the protocol to perform as recommended in the papers, the [`Config`] should be
/// identical for all nodes in the network.
///
/// ## Panics
///
/// Panics if [`Config::max_message_size`] is below [`MIN_MAX_MESSAGE_SIZE`].
pub fn new(me: PI, me_data: PeerData, config: Config, rng: R) -> Self {
assert!(
config.max_message_size >= MIN_MAX_MESSAGE_SIZE,
"max_message_size must be at least {MIN_MAX_MESSAGE_SIZE}"
);
Self {
me,
me_data,
config,
rng,
states: Default::default(),
outbox: Default::default(),
peer_topics: Default::default(),
}
}
/// Get a reference to the node's [`PeerIdentity`]
pub fn me(&self) -> &PI {
&self.me
}
/// Get a reference to the protocol state for a topic.
pub fn state(&self, topic: &TopicId) -> Option<&topic::State<PI, R>> {
self.states.get(topic)
}
/// Resets the tracked stats for a topic.
pub fn reset_stats(&mut self, topic: &TopicId) {
if let Some(state) = self.states.get_mut(topic) {
state.reset_stats();
}
}
/// Get an iterator of all joined topics.
pub fn topics(&self) -> impl Iterator<Item = &TopicId> {
self.states.keys()
}
/// Get an iterator for the states of all joined topics.
pub fn states(&self) -> impl Iterator<Item = (&TopicId, &topic::State<PI, R>)> {
self.states.iter()
}
/// Check if a topic has any active (connected) peers.
pub fn has_active_peers(&self, topic: &TopicId) -> bool {
self.state(topic)
.map(|s| s.has_active_peers())
.unwrap_or(false)
}
/// Returns the maximum message size configured in the gossip protocol.
pub fn max_message_size(&self) -> usize {
self.config.max_message_size
}
/// Handle an [`InEvent`]
///
/// This returns an iterator of [`OutEvent`]s that must be processed.
pub fn handle(
&mut self,
event: InEvent<PI>,
now: Instant,
metrics: Option<&Metrics>,
) -> impl Iterator<Item = OutEvent<PI>> + '_ + use<'_, PI, R> {
trace!("in : {event:?}");
if let Some(metrics) = &metrics {
track_in_event(&event, metrics);
}
let event: InEventMapped<PI> = event.into();
match event {
InEventMapped::TopicEvent(topic, event) => {
// when receiving a join command, initialize state if it doesn't exist
if matches!(&event, topic::InEvent::Command(Command::Join(_peers))) {
if let hash_map::Entry::Vacant(e) = self.states.entry(topic) {
e.insert(topic::State::with_rng(
self.me,
Some(self.me_data.clone()),
self.config.clone(),
self.rng.clone(),
));
}
}
// when receiving a quit command, note this and drop the topic state after
// processing this last event
let quit = matches!(event, topic::InEvent::Command(Command::Quit));
// pass the event to the state handler
if let Some(state) = self.states.get_mut(&topic) {
// when receiving messages, update our conn map to take note that this topic state may want
// to keep this connection
if let topic::InEvent::RecvMessage(from, _message) = &event {
self.peer_topics.entry(*from).or_default().insert(topic);
}
let out = state.handle(event, now);
for event in out {
handle_out_event(topic, event, &mut self.peer_topics, &mut self.outbox);
}
}
if quit {
self.states.remove(&topic);
}
}
// when a peer disconnected on the network level, forward event to all states
InEventMapped::All(event) => {
if let topic::InEvent::UpdatePeerData(data) = &event {
self.me_data = data.clone();
}
for (topic, state) in self.states.iter_mut() {
let out = state.handle(event.clone(), now);
for event in out {
handle_out_event(*topic, event, &mut self.peer_topics, &mut self.outbox);
}
}
}
}
// track metrics
if let Some(metrics) = &metrics {
track_out_events(&self.outbox, metrics);
}
self.outbox.drain(..)
}
}
fn handle_out_event<PI: PeerIdentity>(
topic: TopicId,
event: topic::OutEvent<PI>,
conns: &mut ConnsMap<PI>,
outbox: &mut Outbox<PI>,
) {
trace!("out: {event:?}");
match event {
topic::OutEvent::SendMessage(to, message) => {
outbox.push(OutEvent::SendMessage(to, Message { topic, message }))
}
topic::OutEvent::EmitEvent(event) => outbox.push(OutEvent::EmitEvent(topic, event)),
topic::OutEvent::ScheduleTimer(delay, timer) => {
outbox.push(OutEvent::ScheduleTimer(delay, Timer { topic, timer }))
}
topic::OutEvent::DisconnectPeer(peer) => {
let empty = conns
.get_mut(&peer)
.map(|list| list.remove(&topic) || list.is_empty())
.unwrap_or(false);
if empty {
conns.remove(&peer);
outbox.push(OutEvent::DisconnectPeer(peer));
}
}
topic::OutEvent::PeerData(peer, data) => outbox.push(OutEvent::PeerData(peer, data)),
}
}
fn track_out_events<PI: Serialize>(events: &[OutEvent<PI>], metrics: &Metrics) {
for event in events {
match event {
OutEvent::SendMessage(_to, message) => match message.kind() {
MessageKind::Data => {
metrics.msgs_data_sent.inc();
metrics
.msgs_data_sent_size
.inc_by(message.size().unwrap_or(0) as u64);
}
MessageKind::Control => {
metrics.msgs_ctrl_sent.inc();
metrics
.msgs_ctrl_sent_size
.inc_by(message.size().unwrap_or(0) as u64);
}
},
OutEvent::EmitEvent(_topic, event) => match event {
super::Event::NeighborUp(_peer) => {
metrics.neighbor_up.inc();
}
super::Event::NeighborDown(_peer) => {
metrics.neighbor_down.inc();
}
_ => {}
},
_ => {}
}
}
}
fn track_in_event<PI: Serialize>(event: &InEvent<PI>, metrics: &Metrics) {
if let InEvent::RecvMessage(_from, message) = event {
match message.kind() {
MessageKind::Data => {
metrics.msgs_data_recv.inc();
metrics
.msgs_data_recv_size
.inc_by(message.size().unwrap_or(0) as u64);
}
MessageKind::Control => {
metrics.msgs_ctrl_recv.inc();
metrics
.msgs_ctrl_recv_size
.inc_by(message.size().unwrap_or(0) as u64);
}
}
}
}

View file

@ -0,0 +1,363 @@
//! This module contains the implementation of the gossiping protocol for an individual topic
use std::collections::VecDeque;
use bytes::Bytes;
use derive_more::From;
use n0_future::time::{Duration, Instant};
use rand::Rng;
use serde::{Deserialize, Serialize};
use super::{
hyparview::{self, InEvent as SwarmIn},
plumtree::{self, GossipEvent, InEvent as GossipIn, Scope},
state::MessageKind,
PeerData, PeerIdentity, DEFAULT_MAX_MESSAGE_SIZE,
};
use crate::proto::MIN_MAX_MESSAGE_SIZE;
/// Input event to the topic state handler.
#[derive(Clone, Debug)]
pub enum InEvent<PI> {
/// Message received from the network.
RecvMessage(PI, Message<PI>),
/// Execute a command from the application.
Command(Command<PI>),
/// Trigger a previously scheduled timer.
TimerExpired(Timer<PI>),
/// Peer disconnected on the network level.
PeerDisconnected(PI),
/// Update the opaque peer data about yourself.
UpdatePeerData(PeerData),
}
/// An output event from the state handler.
#[derive(Debug, PartialEq, Eq)]
pub enum OutEvent<PI> {
/// Send a message on the network
SendMessage(PI, Message<PI>),
/// Emit an event to the application.
EmitEvent(Event<PI>),
/// Schedule a timer. The runtime is responsible for sending an [InEvent::TimerExpired]
/// after the duration.
ScheduleTimer(Duration, Timer<PI>),
/// Close the connection to a peer on the network level.
DisconnectPeer(PI),
/// Emitted when new [`PeerData`] was received for a peer.
PeerData(PI, PeerData),
}
impl<PI> From<hyparview::OutEvent<PI>> for OutEvent<PI> {
fn from(event: hyparview::OutEvent<PI>) -> Self {
use hyparview::OutEvent::*;
match event {
SendMessage(to, message) => Self::SendMessage(to, message.into()),
ScheduleTimer(delay, timer) => Self::ScheduleTimer(delay, timer.into()),
DisconnectPeer(peer) => Self::DisconnectPeer(peer),
EmitEvent(event) => Self::EmitEvent(event.into()),
PeerData(peer, data) => Self::PeerData(peer, data),
}
}
}
impl<PI> From<plumtree::OutEvent<PI>> for OutEvent<PI> {
fn from(event: plumtree::OutEvent<PI>) -> Self {
use plumtree::OutEvent::*;
match event {
SendMessage(to, message) => Self::SendMessage(to, message.into()),
ScheduleTimer(delay, timer) => Self::ScheduleTimer(delay, timer.into()),
EmitEvent(event) => Self::EmitEvent(event.into()),
}
}
}
/// A trait for a concrete type to push `OutEvent`s to.
///
/// The implementation is generic over this trait, which allows the upper layer to supply a
/// container of their choice for `OutEvent`s emitted from the protocol state.
pub trait IO<PI: Clone> {
/// Push an event in the IO container
fn push(&mut self, event: impl Into<OutEvent<PI>>);
/// Push all events from an iterator into the IO container
fn push_from_iter(&mut self, iter: impl IntoIterator<Item = impl Into<OutEvent<PI>>>) {
for event in iter.into_iter() {
self.push(event);
}
}
}
/// A protocol message for a particular topic
#[derive(From, Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub enum Message<PI> {
/// A message of the swarm membership layer
Swarm(hyparview::Message<PI>),
/// A message of the gossip broadcast layer
Gossip(plumtree::Message),
}
impl<PI> Message<PI> {
/// Get the kind of this message
pub fn kind(&self) -> MessageKind {
match self {
Message::Swarm(_) => MessageKind::Control,
Message::Gossip(message) => match message {
plumtree::Message::Gossip(_) => MessageKind::Data,
_ => MessageKind::Control,
},
}
}
/// Returns `true` if this is a disconnect message (which is the last message sent to a peer per topic).
pub fn is_disconnect(&self) -> bool {
matches!(self, Message::Swarm(hyparview::Message::Disconnect(_)))
}
}
/// An event to be emitted to the application for a particular topic.
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Serialize, Deserialize)]
pub enum Event<PI> {
/// We have a new, direct neighbor in the swarm membership layer for this topic
NeighborUp(PI),
/// We dropped direct neighbor in the swarm membership layer for this topic
NeighborDown(PI),
/// A gossip message was received for this topic
Received(GossipEvent<PI>),
}
impl<PI> From<hyparview::Event<PI>> for Event<PI> {
fn from(value: hyparview::Event<PI>) -> Self {
match value {
hyparview::Event::NeighborUp(peer) => Self::NeighborUp(peer),
hyparview::Event::NeighborDown(peer) => Self::NeighborDown(peer),
}
}
}
impl<PI> From<plumtree::Event<PI>> for Event<PI> {
fn from(value: plumtree::Event<PI>) -> Self {
match value {
plumtree::Event::Received(event) => Self::Received(event),
}
}
}
/// A timer to be registered for a particular topic.
///
/// This should be treated as an opaque value by the implementer and, once emitted, simply returned
/// to the protocol through [`InEvent::TimerExpired`].
#[derive(Clone, From, Debug, PartialEq, Eq)]
pub enum Timer<PI> {
/// A timer for the swarm layer
Swarm(hyparview::Timer<PI>),
/// A timer for the gossip layer
Gossip(plumtree::Timer),
}
/// A command to the protocol state for a particular topic.
#[derive(Clone, derive_more::Debug)]
pub enum Command<PI> {
/// Join this topic and connect to peers.
///
/// If the list of peers is empty, will prepare the state and accept incoming join requests,
/// but only become operational after the first join request by another peer.
Join(Vec<PI>),
/// Broadcast a message for this topic.
Broadcast(#[debug("<{}b>", _0.len())] Bytes, Scope),
/// Leave this topic and drop all state.
Quit,
}
impl<PI: Clone> IO<PI> for VecDeque<OutEvent<PI>> {
fn push(&mut self, event: impl Into<OutEvent<PI>>) {
self.push_back(event.into())
}
}
/// Protocol configuration
#[derive(Clone, Debug, Serialize, Deserialize)]
#[serde(default)]
pub struct Config {
/// Configuration for the swarm membership layer
pub membership: hyparview::Config,
/// Configuration for the gossip broadcast layer
pub broadcast: plumtree::Config,
/// Max message size in bytes.
///
/// This size should be the same across a network to ensure all nodes can transmit and read large messages.
///
/// At minimum, this size should be large enough to send gossip control messages. This can vary, depending on the size of the [`PeerIdentity`] you use and the size of the [`PeerData`] you transmit in your messages.
///
/// The default is [`DEFAULT_MAX_MESSAGE_SIZE`].
pub max_message_size: usize,
}
impl Default for Config {
fn default() -> Self {
Self {
membership: Default::default(),
broadcast: Default::default(),
max_message_size: DEFAULT_MAX_MESSAGE_SIZE,
}
}
}
/// The topic state maintains the swarm membership and broadcast tree for a particular topic.
#[derive(Debug)]
pub struct State<PI, R> {
me: PI,
pub(crate) swarm: hyparview::State<PI, R>,
pub(crate) gossip: plumtree::State<PI>,
outbox: VecDeque<OutEvent<PI>>,
stats: Stats,
}
impl<PI: PeerIdentity> State<PI, rand::rngs::ThreadRng> {
/// Initialize the local state with the default random number generator.
///
/// ## Panics
///
/// Panics if [`Config::max_message_size`] is below [`MIN_MAX_MESSAGE_SIZE`].
pub fn new(me: PI, me_data: Option<PeerData>, config: Config) -> Self {
Self::with_rng(me, me_data, config, rand::rng())
}
}
impl<PI, R> State<PI, R> {
/// The address of your local endpoint.
pub fn endpoint(&self) -> &PI {
&self.me
}
}
impl<PI: PeerIdentity, R: Rng> State<PI, R> {
/// Initialize the local state with a custom random number generator.
///
/// ## Panics
///
/// Panics if [`Config::max_message_size`] is below [`MIN_MAX_MESSAGE_SIZE`].
pub fn with_rng(me: PI, me_data: Option<PeerData>, config: Config, rng: R) -> Self {
assert!(
config.max_message_size >= MIN_MAX_MESSAGE_SIZE,
"max_message_size must be at least {MIN_MAX_MESSAGE_SIZE}"
);
let max_payload_size =
config.max_message_size - super::Message::<PI>::postcard_header_size();
Self {
swarm: hyparview::State::new(me, me_data, config.membership, rng),
gossip: plumtree::State::new(me, config.broadcast, max_payload_size),
me,
outbox: VecDeque::new(),
stats: Stats::default(),
}
}
/// Handle an incoming event.
///
/// Returns an iterator of outgoing events that must be processed by the application.
pub fn handle(
&mut self,
event: InEvent<PI>,
now: Instant,
) -> impl Iterator<Item = OutEvent<PI>> + '_ {
let io = &mut self.outbox;
// Process the event, store out events in outbox.
match event {
InEvent::Command(command) => match command {
Command::Join(peers) => {
for peer in peers {
self.swarm.handle(SwarmIn::RequestJoin(peer), io);
}
}
Command::Broadcast(data, scope) => {
self.gossip
.handle(GossipIn::Broadcast(data, scope), now, io)
}
Command::Quit => self.swarm.handle(SwarmIn::Quit, io),
},
InEvent::RecvMessage(from, message) => {
self.stats.messages_received += 1;
match message {
Message::Swarm(message) => {
self.swarm.handle(SwarmIn::RecvMessage(from, message), io)
}
Message::Gossip(message) => {
self.gossip
.handle(GossipIn::RecvMessage(from, message), now, io)
}
}
}
InEvent::TimerExpired(timer) => match timer {
Timer::Swarm(timer) => self.swarm.handle(SwarmIn::TimerExpired(timer), io),
Timer::Gossip(timer) => self.gossip.handle(GossipIn::TimerExpired(timer), now, io),
},
InEvent::PeerDisconnected(peer) => {
self.swarm.handle(SwarmIn::PeerDisconnected(peer), io);
self.gossip.handle(GossipIn::NeighborDown(peer), now, io);
}
InEvent::UpdatePeerData(data) => self.swarm.handle(SwarmIn::UpdatePeerData(data), io),
}
// Forward NeighborUp and NeighborDown events from hyparview to plumtree
let mut io = VecDeque::new();
for event in self.outbox.iter() {
match event {
OutEvent::EmitEvent(Event::NeighborUp(peer)) => {
self.gossip
.handle(GossipIn::NeighborUp(*peer), now, &mut io)
}
OutEvent::EmitEvent(Event::NeighborDown(peer)) => {
self.gossip
.handle(GossipIn::NeighborDown(*peer), now, &mut io)
}
_ => {}
}
}
// Note that this is a no-op because plumtree::handle(NeighborUp | NeighborDown)
// above does not emit any OutEvents.
self.outbox.extend(io.drain(..));
// Update sent message counter
self.stats.messages_sent += self
.outbox
.iter()
.filter(|event| matches!(event, OutEvent::SendMessage(_, _)))
.count();
self.outbox.drain(..)
}
/// Get stats on how many messages were sent and received.
// TODO: Remove/replace with metrics?
pub fn stats(&self) -> &Stats {
&self.stats
}
/// Reset all statistics.
pub fn reset_stats(&mut self) {
self.gossip.stats = Default::default();
self.swarm.stats = Default::default();
self.stats = Default::default();
}
/// Get statistics for the gossip broadcast state
///
/// TODO: Remove/replace with metrics?
pub fn gossip_stats(&self) -> &plumtree::Stats {
self.gossip.stats()
}
/// Check if this topic has any active (connected) peers.
pub fn has_active_peers(&self) -> bool {
!self.swarm.active_view.is_empty()
}
}
/// Statistics for the protocol state of a topic
#[derive(Clone, Debug, Default)]
pub struct Stats {
/// Number of messages sent
pub messages_sent: usize,
/// Number of messages received
pub messages_received: usize,
}

View file

@ -0,0 +1,532 @@
//! Utilities used in the protocol implementation
use std::{
collections::{hash_map, BinaryHeap, HashMap},
hash::Hash,
};
use n0_future::time::Instant;
use rand::{
seq::{IteratorRandom, SliceRandom},
Rng,
};
/// Implement methods, display, debug and conversion traits for 32 byte identifiers.
macro_rules! idbytes_impls {
($ty:ty, $name:expr) => {
impl $ty {
/// Create from a byte array.
pub const fn from_bytes(bytes: [u8; 32]) -> Self {
Self(bytes)
}
/// Get as byte slice.
pub fn as_bytes(&self) -> &[u8; 32] {
&self.0
}
}
impl<T: ::std::convert::Into<[u8; 32]>> ::std::convert::From<T> for $ty {
fn from(value: T) -> Self {
Self::from_bytes(value.into())
}
}
impl ::std::fmt::Display for $ty {
fn fmt(&self, f: &mut ::std::fmt::Formatter) -> ::std::fmt::Result {
write!(f, "{}", ::hex::encode(&self.0))
}
}
impl ::std::fmt::Debug for $ty {
fn fmt(&self, f: &mut ::std::fmt::Formatter) -> ::std::fmt::Result {
write!(f, "{}({})", $name, ::hex::encode(&self.0))
}
}
impl ::std::str::FromStr for $ty {
type Err = ::hex::FromHexError;
fn from_str(s: &str) -> ::std::result::Result<Self, Self::Err> {
let mut bytes = [0u8; 32];
::hex::decode_to_slice(s, &mut bytes)?;
Ok(Self::from_bytes(bytes))
}
}
impl ::std::convert::AsRef<[u8]> for $ty {
fn as_ref(&self) -> &[u8] {
&self.0
}
}
impl ::std::convert::AsRef<[u8; 32]> for $ty {
fn as_ref(&self) -> &[u8; 32] {
&self.0
}
}
};
}
pub(crate) use idbytes_impls;
/// A hash set where the iteration order of the values is independent of their
/// hash values.
///
/// This is wrapper around [indexmap::IndexSet] which couple of utility methods
/// to randomly select elements from the set.
#[derive(Default, Debug, Clone, derive_more::Deref)]
pub(crate) struct IndexSet<T> {
inner: indexmap::IndexSet<T>,
}
impl<T: Hash + Eq> PartialEq for IndexSet<T> {
fn eq(&self, other: &Self) -> bool {
self.inner == other.inner
}
}
impl<T: Hash + Eq + PartialEq> IndexSet<T> {
pub fn new() -> Self {
Self {
inner: indexmap::IndexSet::new(),
}
}
pub fn insert(&mut self, value: T) -> bool {
self.inner.insert(value)
}
/// Remove a random element from the set.
pub fn remove_random<R: Rng + ?Sized>(&mut self, rng: &mut R) -> Option<T> {
self.pick_random_index(rng)
.and_then(|idx| self.inner.shift_remove_index(idx))
}
/// Pick a random element from the set.
pub fn pick_random<R: Rng + ?Sized>(&self, rng: &mut R) -> Option<&T> {
self.pick_random_index(rng)
.and_then(|idx| self.inner.get_index(idx))
}
/// Pick a random element from the set, but not any of the elements in `without`.
pub fn pick_random_without<R: Rng + ?Sized>(&self, without: &[&T], rng: &mut R) -> Option<&T> {
self.iter().filter(|x| !without.contains(x)).choose(rng)
}
/// Pick a random index for an element in the set.
pub fn pick_random_index<R: Rng + ?Sized>(&self, rng: &mut R) -> Option<usize> {
if self.is_empty() {
None
} else {
Some(rng.random_range(0..self.inner.len()))
}
}
/// Remove an element from the set.
///
/// NOTE: the value is removed by swapping it with the last element of the set and popping it off.
/// **This modifies the order of element by moving the last element**
pub fn remove(&mut self, value: &T) -> Option<T> {
self.inner.swap_remove_full(value).map(|(_i, v)| v)
}
/// Remove an element from the set by its index.
///
/// NOTE: the value is removed by swapping it with the last element of the set and popping it off.
/// **This modifies the order of element by moving the last element**
pub fn remove_index(&mut self, index: usize) -> Option<T> {
self.inner.swap_remove_index(index)
}
/// Create an iterator over the set in the order of insertion, while skipping the element in
/// `without`.
pub fn iter_without<'a>(&'a self, value: &'a T) -> impl Iterator<Item = &'a T> {
self.iter().filter(move |x| *x != value)
}
}
impl<T> IndexSet<T>
where
T: Hash + Eq + Clone,
{
/// Create a vector of all elements in the set in random order.
pub fn shuffled<R: Rng + ?Sized>(&self, rng: &mut R) -> Vec<T> {
let mut items: Vec<_> = self.inner.iter().cloned().collect();
items.shuffle(rng);
items
}
/// Create a vector of all elements in the set in random order, and shorten to
/// the first `len` elements after shuffling.
pub fn shuffled_and_capped<R: Rng + ?Sized>(&self, len: usize, rng: &mut R) -> Vec<T> {
let mut items = self.shuffled(rng);
items.truncate(len);
items
}
/// Create a vector of the elements in the set in random order while omitting
/// the elements in `without`.
pub fn shuffled_without<R: Rng + ?Sized>(&self, without: &[&T], rng: &mut R) -> Vec<T> {
let mut items = self
.inner
.iter()
.filter(|x| !without.contains(x))
.cloned()
.collect::<Vec<_>>();
items.shuffle(rng);
items
}
/// Create a vector of the elements in the set in random order while omitting
/// the elements in `without`, and shorten to the first `len` elements.
pub fn shuffled_without_and_capped<R: Rng + ?Sized>(
&self,
without: &[&T],
len: usize,
rng: &mut R,
) -> Vec<T> {
let mut items = self.shuffled_without(without, rng);
items.truncate(len);
items
}
}
impl<T> IntoIterator for IndexSet<T> {
type Item = T;
type IntoIter = <indexmap::IndexSet<T> as IntoIterator>::IntoIter;
fn into_iter(self) -> Self::IntoIter {
self.inner.into_iter()
}
}
impl<T> FromIterator<T> for IndexSet<T>
where
T: Hash + Eq,
{
fn from_iter<I: IntoIterator<Item = T>>(iterable: I) -> Self {
IndexSet {
inner: indexmap::IndexSet::from_iter(iterable),
}
}
}
/// A [`BinaryHeap`] with entries sorted by [`Instant`]. Allows to process expired items.
#[derive(Debug)]
pub struct TimerMap<T> {
heap: BinaryHeap<TimerMapEntry<T>>,
seq: u64,
}
// Can't derive default because we don't want a `T: Default` bound.
impl<T> Default for TimerMap<T> {
fn default() -> Self {
Self {
heap: Default::default(),
seq: 0,
}
}
}
impl<T> TimerMap<T> {
/// Create a new, empty TimerMap.
pub fn new() -> Self {
Self::default()
}
/// Insert a new entry at the specified instant.
pub fn insert(&mut self, instant: Instant, item: T) {
let seq = self.seq;
self.seq += 1;
let entry = TimerMapEntry {
seq,
time: instant,
item,
};
self.heap.push(entry);
}
/// Remove and return all entries before and equal to `from`.
pub fn drain_until(
&mut self,
from: &Instant,
) -> impl Iterator<Item = (Instant, T)> + '_ + use<'_, T> {
let from = *from;
std::iter::from_fn(move || self.pop_before(from))
}
/// Pop the first entry, if equal or before `limit`.
pub fn pop_before(&mut self, limit: Instant) -> Option<(Instant, T)> {
match self.heap.peek() {
Some(item) if item.time <= limit => self.heap.pop().map(|item| (item.time, item.item)),
_ => None,
}
}
/// Get a reference to the earliest entry in the `TimerMap`.
pub fn first(&self) -> Option<&Instant> {
self.heap.peek().map(|x| &x.time)
}
#[cfg(test)]
fn to_vec(&self) -> Vec<(Instant, T)>
where
T: Clone,
{
self.heap
.clone()
.into_sorted_vec()
.into_iter()
.rev()
.map(|x| (x.time, x.item))
.collect()
}
}
#[derive(Debug, Clone)]
struct TimerMapEntry<T> {
time: Instant,
seq: u64,
item: T,
}
impl<T> PartialEq for TimerMapEntry<T> {
fn eq(&self, other: &Self) -> bool {
self.time == other.time && self.seq == other.seq
}
}
impl<T> Eq for TimerMapEntry<T> {}
impl<T> PartialOrd for TimerMapEntry<T> {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
}
impl<T> Ord for TimerMapEntry<T> {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.time
.cmp(&other.time)
.reverse()
.then_with(|| self.seq.cmp(&other.seq).reverse())
}
}
/// A hash map where entries expire after a time
#[derive(Debug)]
pub struct TimeBoundCache<K, V> {
map: HashMap<K, (Instant, V)>,
expiry: TimerMap<K>,
}
impl<K, V> Default for TimeBoundCache<K, V> {
fn default() -> Self {
Self {
map: Default::default(),
expiry: Default::default(),
}
}
}
impl<K: Hash + Eq + Clone, V> TimeBoundCache<K, V> {
/// Insert an item into the cache, marked with an expiration time.
pub fn insert(&mut self, key: K, value: V, expires: Instant) {
self.map.insert(key.clone(), (expires, value));
self.expiry.insert(expires, key);
}
/// Returns `true` if the map contains a value for the specified key.
pub fn contains_key(&self, key: &K) -> bool {
self.map.contains_key(key)
}
/// Get the number of entries in the cache.
pub fn len(&self) -> usize {
self.map.len()
}
/// Returns `true` if the map contains no elements.
pub fn is_empty(&self) -> bool {
self.map.is_empty()
}
/// Get an item from the cache.
pub fn get(&self, key: &K) -> Option<&V> {
self.map.get(key).map(|(_expires, value)| value)
}
/// Get the expiration time for an item.
pub fn expires(&self, key: &K) -> Option<&Instant> {
self.map.get(key).map(|(expires, _value)| expires)
}
/// Iterate over all items in the cache.
pub fn iter(&self) -> impl Iterator<Item = (&K, &V, &Instant)> {
self.map.iter().map(|(k, (expires, v))| (k, v, expires))
}
/// Remove all entries with an expiry instant lower or equal to `instant`.
///
/// Returns the number of items that were removed.
pub fn expire_until(&mut self, instant: Instant) -> usize {
let drain = self.expiry.drain_until(&instant);
let mut count = 0;
for (time, key) in drain {
match self.map.entry(key) {
hash_map::Entry::Occupied(entry) if entry.get().0 == time => {
// If the entry's time matches that of the item we are draining from the expiry list,
// remove the entry from the map and increase the count of items we removed.
entry.remove();
count += 1;
}
hash_map::Entry::Occupied(_entry) => {
// If the entry's time does not match the time of the item we are draining,
// do not remove the entry: It means that it was re-added with a later time.
}
hash_map::Entry::Vacant(_) => {
// If the entry is not in the map, it means that it was already removed,
// which can happen if it was inserted multiple times.
}
}
}
count
}
}
#[cfg(test)]
mod test {
use std::str::FromStr;
use n0_future::time::{Duration, Instant};
use rand::SeedableRng;
use super::{IndexSet, TimeBoundCache, TimerMap};
fn test_rng() -> rand_chacha::ChaCha12Rng {
rand_chacha::ChaCha12Rng::seed_from_u64(42)
}
#[test]
fn indexset() {
let elems = [1, 2, 3, 4];
let set = IndexSet::from_iter(elems);
let x = set.shuffled(&mut test_rng());
assert_eq!(x, vec![2, 1, 4, 3]);
let x = set.shuffled_and_capped(2, &mut test_rng());
assert_eq!(x, vec![2, 1]);
let x = set.shuffled_without(&[&1], &mut test_rng());
assert_eq!(x, vec![3, 2, 4]);
let x = set.shuffled_without_and_capped(&[&1], 2, &mut test_rng());
assert_eq!(x, vec![3, 2]);
// recreate the rng - otherwise we get failures on some architectures when cross-compiling,
// likely due to usize differences pulling different amounts of randomness.
let x = set.pick_random(&mut test_rng());
assert_eq!(x, Some(&1));
let x = set.pick_random_without(&[&3], &mut test_rng());
assert_eq!(x, Some(&4));
let mut set = set;
set.remove_random(&mut test_rng());
assert_eq!(set, IndexSet::from_iter([2, 3, 4]));
}
#[test]
fn timer_map() {
let mut map = TimerMap::new();
let now = Instant::now();
let times = [
now - Duration::from_secs(1),
now,
now + Duration::from_secs(1),
now + Duration::from_secs(2),
];
map.insert(times[0], -1);
map.insert(times[0], -2);
map.insert(times[1], 0);
map.insert(times[2], 1);
map.insert(times[3], 2);
map.insert(times[3], 3);
assert_eq!(
map.to_vec(),
vec![
(times[0], -1),
(times[0], -2),
(times[1], 0),
(times[2], 1),
(times[3], 2),
(times[3], 3)
]
);
assert_eq!(map.first(), Some(&times[0]));
let drain = map.drain_until(&now);
assert_eq!(
drain.collect::<Vec<_>>(),
vec![(times[0], -1), (times[0], -2), (times[1], 0),]
);
assert_eq!(
map.to_vec(),
vec![(times[2], 1), (times[3], 2), (times[3], 3)]
);
let drain = map.drain_until(&now);
assert_eq!(drain.collect::<Vec<_>>(), vec![]);
let drain = map.drain_until(&(now + Duration::from_secs(10)));
assert_eq!(
drain.collect::<Vec<_>>(),
vec![(times[2], 1), (times[3], 2), (times[3], 3)]
);
}
#[test]
fn hex() {
#[derive(Eq, PartialEq)]
struct Id([u8; 32]);
idbytes_impls!(Id, "Id");
let id: Id = [1u8; 32].into();
assert_eq!(id, Id::from_str(&format!("{id}")).unwrap());
assert_eq!(
&format!("{id}"),
"0101010101010101010101010101010101010101010101010101010101010101"
);
assert_eq!(
&format!("{id:?}"),
"Id(0101010101010101010101010101010101010101010101010101010101010101)"
);
assert_eq!(id.as_bytes(), &[1u8; 32]);
}
#[test]
fn time_bound_cache() {
let mut cache = TimeBoundCache::default();
let t0 = Instant::now();
let t1 = t0 + Duration::from_secs(1);
let t2 = t0 + Duration::from_secs(2);
cache.insert(1, 10, t0);
cache.insert(2, 20, t1);
cache.insert(3, 30, t1);
cache.insert(4, 40, t2);
assert_eq!(cache.get(&2), Some(&20));
assert_eq!(cache.len(), 4);
let removed = cache.expire_until(t1);
assert_eq!(removed, 3);
assert_eq!(cache.len(), 1);
assert_eq!(cache.get(&2), None);
assert_eq!(cache.get(&4), Some(&40));
let t3 = t2 + Duration::from_secs(1);
cache.insert(5, 50, t2);
assert_eq!(cache.expires(&5), Some(&t2));
cache.insert(5, 50, t3);
assert_eq!(cache.expires(&5), Some(&t3));
cache.expire_until(t2);
assert_eq!(cache.get(&4), None);
assert_eq!(cache.get(&5), Some(&50));
}
}