mirror of
https://mirror.suhoan.cn/https://github.com/EasyTier/EasyTier.git
synced 2025-12-15 22:27:26 +08:00
optimize memory issues (#767)
* optimize memory issues 1. introduce jemalloc support, which can dump current memory usage 2. reduce the GlobalEvent broadcaster memory usage. 3. reduce tcp & udp tunnel memory usage TODO: if peer conn tunnel hangs, the unbounded channel of peer rpc may consume lots of memory, which should be improved. * select a port from 15888+ when port is 0
This commit is contained in:
@@ -118,9 +118,21 @@ impl Peer {
|
||||
}
|
||||
|
||||
pub async fn add_peer_conn(&self, mut conn: PeerConn) {
|
||||
conn.set_close_event_sender(self.close_event_sender.clone());
|
||||
let close_event_sender = self.close_event_sender.clone();
|
||||
let close_notifier = conn.get_close_notifier();
|
||||
tokio::spawn(async move {
|
||||
let conn_id = close_notifier.get_conn_id();
|
||||
if let Some(mut waiter) = close_notifier.get_waiter().await {
|
||||
let _ = waiter.recv().await;
|
||||
}
|
||||
if let Err(e) = close_event_sender.send(conn_id).await {
|
||||
tracing::warn!(?conn_id, "failed to send close event: {}", e);
|
||||
}
|
||||
});
|
||||
|
||||
conn.start_recv_loop(self.packet_recv_chan.clone()).await;
|
||||
conn.start_pingpong();
|
||||
|
||||
self.global_ctx
|
||||
.issue_event(GlobalCtxEvent::PeerConnAdded(conn.get_conn_info()));
|
||||
self.conns.insert(conn.get_conn_id(), Arc::new(conn));
|
||||
|
||||
@@ -13,7 +13,7 @@ use futures::{StreamExt, TryFutureExt};
|
||||
use prost::Message;
|
||||
|
||||
use tokio::{
|
||||
sync::{broadcast, mpsc, Mutex},
|
||||
sync::{broadcast, Mutex},
|
||||
task::JoinSet,
|
||||
time::{timeout, Duration},
|
||||
};
|
||||
@@ -50,6 +50,41 @@ pub type PeerConnId = uuid::Uuid;
|
||||
const MAGIC: u32 = 0xd1e1a5e1;
|
||||
const VERSION: u32 = 1;
|
||||
|
||||
pub struct PeerConnCloseNotify {
|
||||
conn_id: PeerConnId,
|
||||
sender: Arc<std::sync::Mutex<Option<broadcast::Sender<()>>>>,
|
||||
}
|
||||
|
||||
impl PeerConnCloseNotify {
|
||||
fn new(conn_id: PeerConnId) -> Self {
|
||||
let (sender, _) = broadcast::channel(1);
|
||||
Self {
|
||||
conn_id,
|
||||
sender: Arc::new(std::sync::Mutex::new(Some(sender))),
|
||||
}
|
||||
}
|
||||
|
||||
fn notify_close(&self) {
|
||||
self.sender.lock().unwrap().take();
|
||||
}
|
||||
|
||||
pub async fn get_waiter(&self) -> Option<broadcast::Receiver<()>> {
|
||||
if let Some(sender) = self.sender.lock().unwrap().as_mut() {
|
||||
let receiver = sender.subscribe();
|
||||
return Some(receiver);
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub fn get_conn_id(&self) -> PeerConnId {
|
||||
self.conn_id
|
||||
}
|
||||
|
||||
pub fn is_closed(&self) -> bool {
|
||||
self.sender.lock().unwrap().is_none()
|
||||
}
|
||||
}
|
||||
|
||||
pub struct PeerConn {
|
||||
conn_id: PeerConnId,
|
||||
|
||||
@@ -66,7 +101,7 @@ pub struct PeerConn {
|
||||
info: Option<HandshakeRequest>,
|
||||
is_client: Option<bool>,
|
||||
|
||||
close_event_sender: Option<mpsc::Sender<PeerConnId>>,
|
||||
close_event_notifier: Arc<PeerConnCloseNotify>,
|
||||
|
||||
ctrl_resp_sender: broadcast::Sender<ZCPacket>,
|
||||
|
||||
@@ -88,7 +123,7 @@ impl Debug for PeerConn {
|
||||
impl PeerConn {
|
||||
pub fn new(my_peer_id: PeerId, global_ctx: ArcGlobalCtx, tunnel: Box<dyn Tunnel>) -> Self {
|
||||
let tunnel_info = tunnel.info();
|
||||
let (ctrl_sender, _ctrl_receiver) = broadcast::channel(100);
|
||||
let (ctrl_sender, _ctrl_receiver) = broadcast::channel(8);
|
||||
|
||||
let peer_conn_tunnel_filter = StatsRecorderTunnelFilter::new();
|
||||
let throughput = peer_conn_tunnel_filter.filter_output();
|
||||
@@ -97,8 +132,10 @@ impl PeerConn {
|
||||
|
||||
let (recv, sink) = (mpsc_tunnel.get_stream(), mpsc_tunnel.get_sink());
|
||||
|
||||
let conn_id = PeerConnId::new_v4();
|
||||
|
||||
PeerConn {
|
||||
conn_id: PeerConnId::new_v4(),
|
||||
conn_id: conn_id.clone(),
|
||||
|
||||
my_peer_id,
|
||||
global_ctx,
|
||||
@@ -114,7 +151,8 @@ impl PeerConn {
|
||||
|
||||
info: None,
|
||||
is_client: None,
|
||||
close_event_sender: None,
|
||||
|
||||
close_event_notifier: Arc::new(PeerConnCloseNotify::new(conn_id)),
|
||||
|
||||
ctrl_resp_sender: ctrl_sender,
|
||||
|
||||
@@ -267,10 +305,8 @@ impl PeerConn {
|
||||
let mut stream = self.recv.lock().await.take().unwrap();
|
||||
let sink = self.sink.clone();
|
||||
let sender = packet_recv_chan.clone();
|
||||
let close_event_sender = self.close_event_sender.clone().unwrap();
|
||||
let conn_id = self.conn_id;
|
||||
let close_event_notifier = self.close_event_notifier.clone();
|
||||
let ctrl_sender = self.ctrl_resp_sender.clone();
|
||||
let _conn_info = self.get_conn_info();
|
||||
let conn_info_for_instrument = self.get_conn_info();
|
||||
|
||||
self.tasks.spawn(
|
||||
@@ -312,9 +348,7 @@ impl PeerConn {
|
||||
tracing::info!("end recving peer conn packet");
|
||||
|
||||
drop(sink);
|
||||
if let Err(e) = close_event_sender.send(conn_id).await {
|
||||
tracing::error!(error = ?e, "peer conn close event send error");
|
||||
}
|
||||
close_event_notifier.notify_close();
|
||||
|
||||
task_ret
|
||||
}
|
||||
@@ -335,17 +369,14 @@ impl PeerConn {
|
||||
self.throughput.clone(),
|
||||
);
|
||||
|
||||
let close_event_sender = self.close_event_sender.clone().unwrap();
|
||||
let conn_id = self.conn_id;
|
||||
let close_event_notifier = self.close_event_notifier.clone();
|
||||
|
||||
self.tasks.spawn(async move {
|
||||
pingpong.pingpong().await;
|
||||
|
||||
tracing::warn!(?pingpong, "pingpong task exit");
|
||||
|
||||
if let Err(e) = close_event_sender.send(conn_id).await {
|
||||
tracing::warn!("close event sender error: {:?}", e);
|
||||
}
|
||||
close_event_notifier.notify_close();
|
||||
|
||||
Ok(())
|
||||
});
|
||||
@@ -373,8 +404,8 @@ impl PeerConn {
|
||||
ret
|
||||
}
|
||||
|
||||
pub fn set_close_event_sender(&mut self, sender: mpsc::Sender<PeerConnId>) {
|
||||
self.close_event_sender = Some(sender);
|
||||
pub fn get_close_notifier(&self) -> Arc<PeerConnCloseNotify> {
|
||||
self.close_event_notifier.clone()
|
||||
}
|
||||
|
||||
pub fn get_stats(&self) -> PeerConnStats {
|
||||
@@ -405,6 +436,13 @@ impl PeerConn {
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for PeerConn {
|
||||
fn drop(&mut self) {
|
||||
// if someone drop a conn manually, the notifier is not called.
|
||||
self.close_event_notifier.notify_close();
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
@@ -496,15 +534,13 @@ mod tests {
|
||||
s_peer.do_handshake_as_server()
|
||||
);
|
||||
|
||||
s_peer.set_close_event_sender(tokio::sync::mpsc::channel(1).0);
|
||||
s_peer.start_recv_loop(create_packet_recv_chan().0).await;
|
||||
// do not start ping for s, s only reponde to ping from c
|
||||
|
||||
assert!(c_ret.is_ok());
|
||||
assert!(s_ret.is_ok());
|
||||
|
||||
let (close_send, mut close_recv) = tokio::sync::mpsc::channel(1);
|
||||
c_peer.set_close_event_sender(close_send);
|
||||
let close_notifier = c_peer.get_close_notifier();
|
||||
c_peer.start_pingpong();
|
||||
c_peer.start_recv_loop(create_packet_recv_chan().0).await;
|
||||
|
||||
@@ -520,9 +556,9 @@ mod tests {
|
||||
tokio::time::sleep(Duration::from_secs(15)).await;
|
||||
|
||||
if conn_closed {
|
||||
assert!(close_recv.try_recv().is_ok());
|
||||
assert!(close_notifier.is_closed());
|
||||
} else {
|
||||
assert!(close_recv.try_recv().is_err());
|
||||
assert!(!close_notifier.is_closed());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -347,21 +347,43 @@ impl PeerManager {
|
||||
async fn start_peer_conn_close_event_handler(&self) {
|
||||
let dmap = self.directly_connected_conn_map.clone();
|
||||
let mut event_recv = self.global_ctx.subscribe();
|
||||
let peer_map = self.peers.clone();
|
||||
use tokio::sync::broadcast::error::RecvError;
|
||||
self.tasks.lock().await.spawn(async move {
|
||||
while let Ok(event) = event_recv.recv().await {
|
||||
match event {
|
||||
GlobalCtxEvent::PeerConnRemoved(info) => {
|
||||
if let Some(set) = dmap.get_mut(&info.peer_id) {
|
||||
let conn_id = info.conn_id.parse().unwrap();
|
||||
let old = set.remove(&conn_id);
|
||||
tracing::info!(
|
||||
?old,
|
||||
?info,
|
||||
"try remove conn id from directly connected map"
|
||||
);
|
||||
loop {
|
||||
match event_recv.recv().await {
|
||||
Err(RecvError::Closed) => {
|
||||
tracing::error!("peer conn close event handler exit");
|
||||
break;
|
||||
}
|
||||
Err(RecvError::Lagged(_)) => {
|
||||
tracing::warn!("peer conn close event handler lagged");
|
||||
event_recv = event_recv.resubscribe();
|
||||
let alive_conns = peer_map.get_alive_conns();
|
||||
for p in dmap.iter_mut() {
|
||||
p.retain(|x| alive_conns.contains_key(&(*p.key(), *x)));
|
||||
}
|
||||
dmap.retain(|_, v| !v.is_empty());
|
||||
}
|
||||
Ok(event) => {
|
||||
if let GlobalCtxEvent::PeerConnRemoved(info) = event {
|
||||
let mut need_remove = false;
|
||||
if let Some(set) = dmap.get_mut(&info.peer_id) {
|
||||
let conn_id = info.conn_id.parse().unwrap();
|
||||
let old = set.remove(&conn_id);
|
||||
tracing::info!(
|
||||
?old,
|
||||
?info,
|
||||
"try remove conn id from directly connected map"
|
||||
);
|
||||
need_remove = set.is_empty();
|
||||
}
|
||||
|
||||
if need_remove {
|
||||
dmap.remove(&info.peer_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
@@ -27,6 +27,7 @@ pub struct PeerMap {
|
||||
peer_map: DashMap<PeerId, Arc<Peer>>,
|
||||
packet_send: PacketRecvChan,
|
||||
routes: RwLock<Vec<ArcRoute>>,
|
||||
alive_conns: Arc<DashMap<(PeerId, PeerConnId), PeerConnInfo>>,
|
||||
}
|
||||
|
||||
impl PeerMap {
|
||||
@@ -37,6 +38,7 @@ impl PeerMap {
|
||||
peer_map: DashMap::new(),
|
||||
packet_send,
|
||||
routes: RwLock::new(Vec::new()),
|
||||
alive_conns: Arc::new(DashMap::new()),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -48,6 +50,7 @@ impl PeerMap {
|
||||
}
|
||||
|
||||
pub async fn add_new_peer_conn(&self, peer_conn: PeerConn) {
|
||||
self.maintain_alive_conns(&peer_conn);
|
||||
let peer_id = peer_conn.get_peer_id();
|
||||
let no_entry = self.peer_map.get(&peer_id).is_none();
|
||||
if no_entry {
|
||||
@@ -60,6 +63,30 @@ impl PeerMap {
|
||||
}
|
||||
}
|
||||
|
||||
fn maintain_alive_conns(&self, peer_conn: &PeerConn) {
|
||||
let close_notifier = peer_conn.get_close_notifier();
|
||||
let alive_conns_weak = Arc::downgrade(&self.alive_conns);
|
||||
let conn_id = close_notifier.get_conn_id();
|
||||
let conn_info = peer_conn.get_conn_info();
|
||||
self.alive_conns
|
||||
.insert((conn_info.peer_id, conn_id.clone()), conn_info.clone());
|
||||
tokio::spawn(async move {
|
||||
if let Some(mut waiter) = close_notifier.get_waiter().await {
|
||||
let _ = waiter.recv().await;
|
||||
}
|
||||
let mut alive_conn_count = 0;
|
||||
if let Some(alive_conns) = alive_conns_weak.upgrade() {
|
||||
alive_conns.remove(&(conn_info.peer_id, conn_id)).unwrap();
|
||||
alive_conn_count = alive_conns.len();
|
||||
}
|
||||
tracing::debug!(
|
||||
?conn_id,
|
||||
"peer conn is closed, current alive conns: {}",
|
||||
alive_conn_count
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
fn get_peer_by_id(&self, peer_id: PeerId) -> Option<Arc<Peer>> {
|
||||
self.peer_map.get(&peer_id).map(|v| v.clone())
|
||||
}
|
||||
@@ -284,6 +311,13 @@ impl PeerMap {
|
||||
|
||||
Ok(!self.has_peer(gateway_id))
|
||||
}
|
||||
|
||||
pub fn get_alive_conns(&self) -> DashMap<(PeerId, PeerConnId), PeerConnInfo> {
|
||||
self.alive_conns
|
||||
.iter()
|
||||
.map(|v| (v.key().clone(), v.value().clone()))
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for PeerMap {
|
||||
|
||||
Reference in New Issue
Block a user