feat(core): Refactor IDN and URL handling logic (#1533)

* feat(core): Refactor IDN and URL handling logic

* feat(tests): add dual_convert option for URL serialization in IDN tests
This commit is contained in:
Mg Pig
2025-11-03 22:15:40 +08:00
committed by GitHub
parent 8ab98bba8f
commit 6bb2fd9a15
5 changed files with 80 additions and 222 deletions

View File

@@ -68,7 +68,7 @@ pub async fn socket_addrs(
url: &url::Url,
default_port_number: impl Fn() -> Option<u16>,
) -> Result<Vec<SocketAddr>, Error> {
let host = url.host_str().ok_or(Error::InvalidUrl(url.to_string()))?;
let host = url.host().ok_or(Error::InvalidUrl(url.to_string()))?;
let port = url
.port()
.or_else(default_port_number)
@@ -84,9 +84,12 @@ pub async fn socket_addrs(
};
// if host is an ip address, return it directly
if let Ok(ip) = host.parse::<std::net::IpAddr>() {
return Ok(vec![SocketAddr::new(ip, port)]);
match host {
url::Host::Ipv4(ip) => return Ok(vec![SocketAddr::new(std::net::IpAddr::V4(ip), port)]),
url::Host::Ipv6(ip) => return Ok(vec![SocketAddr::new(std::net::IpAddr::V6(ip), port)]),
_ => {}
}
let host = host.to_string();
if ALLOW_USE_SYSTEM_DNS_RESOLVER.load(std::sync::atomic::Ordering::Relaxed) {
let socket_addr = format!("{}:{}", host, port);
@@ -103,7 +106,7 @@ pub async fn socket_addrs(
}
// use hickory_resolver
let ret = RESOLVER.lookup_ip(host).await.with_context(|| {
let ret = RESOLVER.lookup_ip(&host).await.with_context(|| {
format!(
"hickory dns lookup_ip failed, host: {}, port: {}",
host, port

View File

@@ -1,210 +1,70 @@
use idna::domain_to_ascii;
pub fn convert_idn_to_ascii(url_str: &str) -> Result<String, String> {
if !url_str.is_ascii() {
let mut url_parts = url_str.splitn(2, "://");
let scheme = url_parts.next().unwrap_or("");
let rest = url_parts.next().unwrap_or(url_str);
let (host_part, port_part, path_part) = {
let mut path_and_rest = rest.splitn(2, '/');
let host_port_part = path_and_rest.next().unwrap_or("");
let path_part = path_and_rest
.next()
.map(|s| format!("/{}", s))
.unwrap_or_default();
if host_port_part.starts_with('[') {
if let Some(end_bracket_pos) = host_port_part.find(']') {
let host_part = &host_port_part[..end_bracket_pos + 1];
let remaining = &host_port_part[end_bracket_pos + 1..];
if remaining.starts_with(':') {
if let Some(port_str) = remaining.strip_prefix(':') {
if port_str.chars().all(|c| c.is_ascii_digit()) {
(host_part, format!(":{}", port_str), path_part)
} else {
(host_part, String::new(), path_part)
}
} else {
(host_part, String::new(), path_part)
}
} else {
(host_part, String::new(), path_part)
}
} else {
(host_port_part, String::new(), path_part)
}
} else {
let (host_part, port_part) = if let Some(pos) = host_port_part.rfind(':') {
let port_str = &host_port_part[pos + 1..];
if port_str.chars().all(|c| c.is_ascii_digit()) {
(&host_port_part[..pos], format!(":{}", port_str))
} else {
(host_port_part, String::new())
}
} else {
(host_port_part, String::new())
};
(host_part, port_part, path_part)
}
};
use percent_encoding::percent_decode_str;
if !host_part.is_ascii() {
let ascii_host = domain_to_ascii(host_part)
.map_err(|e| format!("Failed to convert IDN to ASCII: {}", e))?;
let result = format!("{}://{}{}{}", scheme, ascii_host, port_part, path_part);
Ok(result)
} else {
Ok(url_str.to_string())
}
} else {
Ok(url_str.to_string())
pub fn convert_idn_to_ascii(mut url: url::Url) -> anyhow::Result<url::Url> {
if url.is_special() {
return Ok(url);
}
}
pub fn safe_convert_idn_to_ascii(url_str: &str) -> String {
convert_idn_to_ascii(url_str).unwrap_or_else(|_| url_str.to_string())
if let Some(domain) = url.domain() {
let domain = percent_decode_str(domain).decode_utf8()?;
let domain = domain_to_ascii(&domain)?;
url.set_host(Some(&domain))?;
}
Ok(url)
}
#[cfg(test)]
mod tests {
use super::*;
use rstest::rstest;
#[test]
fn test_ascii_only_urls() {
assert_eq!(
convert_idn_to_ascii("https://example.com").unwrap(),
"https://example.com"
);
assert_eq!(
convert_idn_to_ascii("http://test.org:8080/path").unwrap(),
"http://test.org:8080/path"
);
}
#[rstest]
// test_ascii_only_urls
#[case("example.com", "example.com")]
#[case("test.org:8080/path", "test.org:8080/path")]
// test_unicode_domains
#[case("räksmörgås.nu", "xn--rksmrgs-5wao1o.nu")]
#[case("中文.测试", "xn--fiq228c.xn--0zwm56d")]
// test_unicode_domains_with_port
#[case("räksmörgås.nu:8080", "xn--rksmrgs-5wao1o.nu:8080")]
// test_unicode_domains_with_port_and_path
#[case("例子.测试/path", "xn--fsqu00a.xn--0zwm56d/path")]
#[case("中文.测试:9000/api", "xn--fiq228c.xn--0zwm56d:9000/api")]
#[case("räksmörgås.nu:8080/path", "xn--rksmrgs-5wao1o.nu:8080/path")]
// test_unicode_domains_with_port_and_unicode_path
#[case(
"中文.测试:8000/用户/管理",
"xn--fiq228c.xn--0zwm56d:8000/%E7%94%A8%E6%88%B7/%E7%AE%A1%E7%90%86"
)]
// test_ipv6_literals & test_ipv6_with_unicode_path
#[case("[2001:db8::1]:8080", "[2001:db8::1]:8080")]
#[case("[2001:db8::1]/path", "[2001:db8::1]/path")]
#[case(
"[2001:db8::1]/路径/资源",
"[2001:db8::1]/%E8%B7%AF%E5%BE%84/%E8%B5%84%E6%BA%90"
)]
fn test_convert_idn_to_ascii_cases(
#[case] host_part: &str,
#[case] expected_host_part: &str,
#[values("tcp", "udp", "ws", "wss", "wg", "quic", "http", "https")] protocol: &str,
#[values(false, true)] dual_convert: bool,
) {
let input = url::Url::parse(&format!("{}://{}", protocol, host_part)).unwrap();
let input = if dual_convert {
// in case url is serialized/deserialized as string somewhere else
input.to_string().parse().unwrap()
} else {
input
};
let actual = convert_idn_to_ascii(input.clone()).unwrap().to_string();
#[test]
fn test_unicode_domains() {
assert_eq!(
convert_idn_to_ascii("https://räksmörgås.nu").unwrap(),
"https://xn--rksmrgs-5wao1o.nu"
);
assert_eq!(
convert_idn_to_ascii("https://例子.测试").unwrap(),
"https://xn--fsqu00a.xn--0zwm56d"
);
}
let mut expected = format!("{}://{}", protocol, expected_host_part);
#[test]
fn test_chinese_domains() {
assert_eq!(
convert_idn_to_ascii("https://中文.测试").unwrap(),
"https://xn--fiq228c.xn--0zwm56d"
);
assert_eq!(
convert_idn_to_ascii("https://公司.中国").unwrap(),
"https://xn--55qx5d.xn--fiqs8s"
);
assert_eq!(
convert_idn_to_ascii("https://网络.测试").unwrap(),
"https://xn--io0a7i.xn--0zwm56d"
);
}
// ws and wss protocols may automatically add a trailing slash if there's no path after host/port
if input.is_special() && actual.ends_with("/") && !expected_host_part.ends_with("/") {
expected.push('/');
}
#[test]
fn test_unicode_domains_with_port() {
assert_eq!(
convert_idn_to_ascii("https://räksmörgås.nu:8080").unwrap(),
"https://xn--rksmrgs-5wao1o.nu:8080"
);
assert_eq!(
convert_idn_to_ascii("http://例子.测试:3000/path").unwrap(),
"http://xn--fsqu00a.xn--0zwm56d:3000/path"
);
assert_eq!(
convert_idn_to_ascii("https://中文.测试:9000/api").unwrap(),
"https://xn--fiq228c.xn--0zwm56d:9000/api"
);
}
#[test]
fn test_unicode_domains_with_path() {
assert_eq!(
convert_idn_to_ascii("https://räksmörgås.nu/path/to/resource").unwrap(),
"https://xn--rksmrgs-5wao1o.nu/path/to/resource"
);
assert_eq!(
convert_idn_to_ascii("http://例子.测试/api/v1").unwrap(),
"http://xn--fsqu00a.xn--0zwm56d/api/v1"
);
assert_eq!(
convert_idn_to_ascii("https://中文.测试/api/users").unwrap(),
"https://xn--fiq228c.xn--0zwm56d/api/users"
);
}
#[test]
fn test_unicode_domains_with_port_and_path() {
assert_eq!(
convert_idn_to_ascii("https://räksmörgås.nu:8080/path/to/resource").unwrap(),
"https://xn--rksmrgs-5wao1o.nu:8080/path/to/resource"
);
assert_eq!(
convert_idn_to_ascii("http://例子.测试:9000/api/v1/users").unwrap(),
"http://xn--fsqu00a.xn--0zwm56d:9000/api/v1/users"
);
assert_eq!(
convert_idn_to_ascii("https://中文.测试:8000/用户/管理").unwrap(),
"https://xn--fiq228c.xn--0zwm56d:8000/用户/管理"
);
}
#[test]
fn test_ipv6_literals() {
assert_eq!(
convert_idn_to_ascii("https://[2001:db8::1]:8080").unwrap(),
"https://[2001:db8::1]:8080"
);
assert_eq!(
convert_idn_to_ascii("https://[2001:db8::1]/path").unwrap(),
"https://[2001:db8::1]/path"
);
assert_eq!(
convert_idn_to_ascii("https://[2001:db8::1]/路径/资源").unwrap(),
"https://[2001:db8::1]/路径/资源"
);
}
#[test]
fn test_invalid_port_format() {
let result = convert_idn_to_ascii("https://räksmörgås.nu:notaport").unwrap();
assert!(result.contains("xn--") && result.contains(":notaport"));
}
#[test]
fn test_safe_conversion() {
assert_eq!(
safe_convert_idn_to_ascii("https://example.com"),
"https://example.com"
);
assert_eq!(
safe_convert_idn_to_ascii("https://中文.测试"),
"https://xn--fiq228c.xn--0zwm56d"
);
}
#[test]
fn test_edge_cases() {
// Without scheme '://', entire string is treated as host part
let result = convert_idn_to_ascii("räksmörgås.nu").unwrap();
assert_eq!(result, "räksmörgås.nu://xn--rksmrgs-5wao1o.nu");
assert_eq!(
convert_idn_to_ascii("https://test.例子.com").unwrap(),
"https://test.xn--fsqu00a.com"
);
}
#[test]
fn test_ipv6_with_unicode_path() {
assert_eq!(
convert_idn_to_ascii("https://[2001:db8::1]/路径/资源").unwrap(),
"https://[2001:db8::1]/路径/资源"
);
assert_eq!(actual, expected);
}
}

View File

@@ -10,7 +10,7 @@ use crate::tunnel::quic::QUICTunnelConnector;
#[cfg(feature = "wireguard")]
use crate::tunnel::wireguard::{WgConfig, WgTunnelConnector};
use crate::{
common::{error::Error, global_ctx::ArcGlobalCtx, network::IPCollector},
common::{error::Error, global_ctx::ArcGlobalCtx, idn, network::IPCollector},
tunnel::{
check_scheme_and_get_socket_addr, ring::RingTunnelConnector, tcp::TcpTunnelConnector,
udp::UdpTunnelConnector, IpVersion, TunnelConnector,
@@ -58,6 +58,7 @@ pub async fn create_connector_by_url(
ip_version: IpVersion,
) -> Result<Box<dyn TunnelConnector + 'static>, Error> {
let url = url::Url::parse(url).map_err(|_| Error::InvalidUrl(url.to_owned()))?;
let url = idn::convert_idn_to_ascii(url)?;
let mut connector: Box<dyn TunnelConnector + 'static> = match url.scheme() {
"tcp" => {
let dst_addr =

View File

@@ -1169,6 +1169,9 @@ async fn run_main(cli: Cli) -> anyhow::Result<()> {
let token = config_server_url
.path_segments()
.and_then(|mut x| x.next())
.map(|x| percent_encoding::percent_decode_str(x).decode_utf8())
.transpose()
.with_context(|| "failed to decode config server token")?
.map(|x| x.to_string())
.unwrap_or_default();

View File

@@ -10,7 +10,6 @@ use crate::{
},
constants::EASYTIER_VERSION,
global_ctx::{EventBusSubscriber, GlobalCtxEvent},
idn::safe_convert_idn_to_ascii,
},
instance::instance::Instance,
proto::api::instance::list_peer_route_pair,
@@ -525,13 +524,9 @@ impl NetworkConfig {
{
NetworkingMethod::PublicServer => {
let public_server_url = self.public_server_url.clone().unwrap_or_default();
let converted_public_server_url = safe_convert_idn_to_ascii(&public_server_url);
cfg.set_peers(vec![PeerConfig {
uri: converted_public_server_url.parse().with_context(|| {
format!(
"failed to parse public server uri: {}",
converted_public_server_url
)
uri: public_server_url.parse().with_context(|| {
format!("failed to parse public server uri: {}", public_server_url)
})?,
}]);
}
@@ -541,11 +536,10 @@ impl NetworkConfig {
if peer_url.is_empty() {
continue;
}
let converted_peer_url = safe_convert_idn_to_ascii(peer_url);
peers.push(PeerConfig {
uri: converted_peer_url.parse().with_context(|| {
format!("failed to parse peer uri: {}", converted_peer_url)
})?,
uri: peer_url
.parse()
.with_context(|| format!("failed to parse peer uri: {}", peer_url))?,
});
}
@@ -559,10 +553,11 @@ impl NetworkConfig {
if listener_url.is_empty() {
continue;
}
let converted_listener_url = safe_convert_idn_to_ascii(listener_url);
listener_urls.push(converted_listener_url.parse().with_context(|| {
format!("failed to parse listener uri: {}", converted_listener_url)
})?);
listener_urls.push(
listener_url
.parse()
.with_context(|| format!("failed to parse listener uri: {}", listener_url))?,
);
}
cfg.set_listeners(listener_urls);
@@ -656,12 +651,8 @@ impl NetworkConfig {
self.mapped_listeners
.iter()
.map(|s| {
let converted_s = safe_convert_idn_to_ascii(s);
converted_s
.parse()
.with_context(|| {
format!("mapped listener is not a valid url: {}", converted_s)
})
s.parse()
.with_context(|| format!("mapped listener is not a valid url: {}", s))
.unwrap()
})
.map(|s: url::Url| {