nat4-nat4 punch (#388)

this patch optimize the udp hole punch logic:

1. allow start punch hole before stun test complete.
2. add lock to symmetric punch, avoid conflict between concurrent hole punching task.
3. support punching hole for predictable nat4-nat4.
4. make backoff of retry reasonable
This commit is contained in:
Sijie.Sun
2024-10-06 22:49:18 +08:00
committed by GitHub
parent ba3da97ad4
commit 37ceb77bf6
24 changed files with 2748 additions and 1310 deletions

View File

@@ -15,6 +15,8 @@ pub mod foreign_network_manager;
pub mod encrypt;
pub mod peer_task;
#[cfg(test)]
pub mod tests;

View File

@@ -1058,7 +1058,7 @@ mod tests {
let ret = stub
.say_hello(
RpcController {},
RpcController::default(),
SayHelloRequest {
name: "abc".to_string(),
},

View File

@@ -539,7 +539,7 @@ impl RouteTable {
fn get_nat_type(&self, peer_id: PeerId) -> Option<NatType> {
self.peer_infos
.get(&peer_id)
.map(|x| NatType::try_from(x.udp_stun_info as i32).unwrap())
.map(|x| NatType::try_from(x.udp_stun_info as i32).unwrap_or_default())
}
fn build_peer_graph_from_synced_info<T: RouteCostCalculatorInterface>(
@@ -1322,7 +1322,7 @@ impl PeerRouteServiceImpl {
self.global_ctx.get_network_name(),
);
let mut ctrl = BaseController {};
let mut ctrl = BaseController::default();
ctrl.set_timeout_ms(3000);
let ret = rpc_stub
.sync_route_info(

View File

@@ -224,7 +224,10 @@ pub mod tests {
let msg = random_string(8192);
let ret = stub
.say_hello(RpcController {}, SayHelloRequest { name: msg.clone() })
.say_hello(
RpcController::default(),
SayHelloRequest { name: msg.clone() },
)
.await
.unwrap();
@@ -233,7 +236,10 @@ pub mod tests {
let msg = random_string(10);
let ret = stub
.say_hello(RpcController {}, SayHelloRequest { name: msg.clone() })
.say_hello(
RpcController::default(),
SayHelloRequest { name: msg.clone() },
)
.await
.unwrap();
@@ -281,7 +287,10 @@ pub mod tests {
);
let ret = stub
.say_hello(RpcController {}, SayHelloRequest { name: msg.clone() })
.say_hello(
RpcController::default(),
SayHelloRequest { name: msg.clone() },
)
.await
.unwrap();
assert_eq!(ret.greeting, format!("Hello {}!", msg));
@@ -289,14 +298,20 @@ pub mod tests {
// call again
let msg = random_string(16 * 1024);
let ret = stub
.say_hello(RpcController {}, SayHelloRequest { name: msg.clone() })
.say_hello(
RpcController::default(),
SayHelloRequest { name: msg.clone() },
)
.await
.unwrap();
assert_eq!(ret.greeting, format!("Hello {}!", msg));
let msg = random_string(16 * 1024);
let ret = stub
.say_hello(RpcController {}, SayHelloRequest { name: msg.clone() })
.say_hello(
RpcController::default(),
SayHelloRequest { name: msg.clone() },
)
.await
.unwrap();
assert_eq!(ret.greeting, format!("Hello {}!", msg));
@@ -340,13 +355,19 @@ pub mod tests {
let msg = random_string(16 * 1024);
let ret = stub1
.say_hello(RpcController {}, SayHelloRequest { name: msg.clone() })
.say_hello(
RpcController::default(),
SayHelloRequest { name: msg.clone() },
)
.await
.unwrap();
assert_eq!(ret.greeting, format!("Hello {}!", msg));
let ret = stub2
.say_hello(RpcController {}, SayHelloRequest { name: msg.clone() })
.say_hello(
RpcController::default(),
SayHelloRequest { name: msg.clone() },
)
.await;
assert!(ret.is_err() && ret.unwrap_err().to_string().contains("Timeout"));
}

View File

@@ -0,0 +1,138 @@
use std::result::Result;
use std::sync::{Arc, Mutex};
use async_trait::async_trait;
use dashmap::DashMap;
use tokio::select;
use tokio::sync::Notify;
use tokio::task::JoinHandle;
use crate::common::scoped_task::ScopedTask;
use anyhow::Error;
use super::peer_manager::PeerManager;
#[async_trait]
pub trait PeerTaskLauncher: Send + Sync + Clone + 'static {
type Data;
type CollectPeerItem;
type TaskRet;
fn new_data(&self, peer_mgr: Arc<PeerManager>) -> Self::Data;
async fn collect_peers_need_task(&self, data: &Self::Data) -> Vec<Self::CollectPeerItem>;
async fn launch_task(
&self,
data: &Self::Data,
item: Self::CollectPeerItem,
) -> JoinHandle<Result<Self::TaskRet, Error>>;
async fn all_task_done(&self, _data: &Self::Data) {}
fn loop_interval_ms(&self) -> u64 {
5000
}
}
pub struct PeerTaskManager<Launcher: PeerTaskLauncher> {
launcher: Launcher,
peer_mgr: Arc<PeerManager>,
main_loop_task: Mutex<Option<ScopedTask<()>>>,
run_signal: Arc<Notify>,
data: Launcher::Data,
}
impl<D, C, T, L> PeerTaskManager<L>
where
D: Send + Sync + Clone + 'static,
C: std::fmt::Debug + Send + Sync + Clone + core::hash::Hash + Eq + 'static,
T: Send + 'static,
L: PeerTaskLauncher<Data = D, CollectPeerItem = C, TaskRet = T> + 'static,
{
pub fn new(launcher: L, peer_mgr: Arc<PeerManager>) -> Self {
let data = launcher.new_data(peer_mgr.clone());
Self {
launcher,
peer_mgr,
main_loop_task: Mutex::new(None),
run_signal: Arc::new(Notify::new()),
data,
}
}
pub fn start(&self) {
let task = tokio::spawn(Self::main_loop(
self.launcher.clone(),
self.data.clone(),
self.run_signal.clone(),
))
.into();
self.main_loop_task.lock().unwrap().replace(task);
}
async fn main_loop(launcher: L, data: D, signal: Arc<Notify>) {
let peer_task_map = Arc::new(DashMap::<C, ScopedTask<Result<T, Error>>>::new());
loop {
let peers_to_connect = launcher.collect_peers_need_task(&data).await;
// remove task not in peers_to_connect
let mut to_remove = vec![];
for item in peer_task_map.iter() {
if !peers_to_connect.contains(item.key()) || item.value().is_finished() {
to_remove.push(item.key().clone());
}
}
tracing::debug!(
?peers_to_connect,
?to_remove,
"got peers to connect and remove"
);
for key in to_remove {
if let Some((_, task)) = peer_task_map.remove(&key) {
task.abort();
match task.await {
Ok(Ok(_)) => {}
Ok(Err(task_ret)) => {
tracing::error!(?task_ret, "hole punching task failed");
}
Err(e) => {
tracing::error!(?e, "hole punching task aborted");
}
}
}
}
if !peers_to_connect.is_empty() {
for item in peers_to_connect {
if peer_task_map.contains_key(&item) {
continue;
}
tracing::debug!(?item, "launch hole punching task");
peer_task_map
.insert(item.clone(), launcher.launch_task(&data, item).await.into());
}
} else if peer_task_map.is_empty() {
tracing::debug!("all task done");
launcher.all_task_done(&data).await;
}
select! {
_ = tokio::time::sleep(std::time::Duration::from_millis(
launcher.loop_interval_ms(),
)) => {},
_ = signal.notified() => {}
}
}
}
pub async fn run_immediately(&self) {
self.run_signal.notify_one();
}
pub fn data(&self) -> D {
self.data.clone()
}
}