Skip to content

Commit 5f81ae3

Browse files
committed
Improve TCP keepalive and idle timeout for mobile clients
TCP keepalive was configured (SetKeepAlivePeriod) but never actually enabled (SO_KEEPALIVE) on accepted client connections. Go 1.26's SetKeepAlivePeriod only sets TCP_KEEPIDLE — it does not call setsockopt(SO_KEEPALIVE, 1). Without SO_KEEPALIVE the kernel never sends probe packets, so dead connections from sleeping mobile clients linger until the idle timeout fires. Replace SetKeepAlive + SetKeepAlivePeriod with net.KeepAliveConfig (available since Go 1.24) for explicit per-socket control: Idle: 30s (time before first probe) Interval: 10s (between probes) Count: 3 (failed probes to declare dead) This detects dead connections in ~60s instead of relying on system defaults (tcp_keepalive_intvl=75s, probes=9 → up to 11 minutes). Increase the default idle timeout from 1 minute to 5 minutes. MTProto clients send ping_delay_disconnect every ~60s, which resets the idle timer. The previous 1-minute default created a race: if a ping arrived even 1–2 seconds late the relay was killed. A 5-minute window also survives typical mobile sleep periods (phone idle 2–5 min) where the NAT mapping is still alive and the connection can resume without reconnection. Ref: #132
1 parent 3a68ea5 commit 5f81ae3

File tree

10 files changed

+233
-10
lines changed

10 files changed

+233
-10
lines changed

example.config.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ proxies = [
211211
[network.timeout]
212212
tcp = "5s"
213213
http = "10s"
214-
idle = "1m"
214+
idle = "5m"
215215

216216
# mtg has to mimic real websites. It does not mean domain fronting, it also
217217
# means that traffic characteristics should be similar to real world traffic.

internal/cli/run_proxy.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ import (
55
"fmt"
66
"net"
77
"os"
8-
"time"
98

109
"github.com/9seconds/mtg/v2/antireplay"
1110
"github.com/9seconds/mtg/v2/events"
@@ -263,7 +262,7 @@ func runProxy(conf *config.Config, version string) error { //nolint: funlen
263262

264263
AllowFallbackOnUnknownDC: conf.AllowFallbackOnUnknownDC.Get(false),
265264
TolerateTimeSkewness: conf.TolerateTimeSkewness.Value,
266-
IdleTimeout: conf.Network.Timeout.Idle.Get(time.Minute),
265+
IdleTimeout: conf.Network.Timeout.Idle.Get(mtglib.DefaultIdleTimeout),
267266

268267
DoppelGangerURLs: doppelGangerURLs,
269268
DoppelGangerPerRaid: conf.Defense.Doppelganger.Repeats.Get(mtglib.DoppelGangerPerRaid),

mtglib/init.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,9 @@ const (
7777
// DefaultIdleTimeout is a default timeout for closing a connection in case of
7878
// idling.
7979
//
80-
// Deprecated: no longer in use because of changed TCP relay algorithm.
81-
DefaultIdleTimeout = time.Minute
80+
// Set to 5 minutes to survive typical mobile sleep periods (2-5 min) and
81+
// avoid racing with MTProto ping_delay_disconnect (~60s interval).
82+
DefaultIdleTimeout = 5 * time.Minute
8283

8384
// DefaultTolerateTimeSkewness is a default timeout for time skewness on a
8485
// faketls timeout verification.

mtglib/proxy_opts.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ func (p ProxyOpts) getPreferIP() string {
217217

218218
func (p ProxyOpts) getIdleTimeout() time.Duration {
219219
if p.IdleTimeout == 0 {
220-
return time.Minute
220+
return DefaultIdleTimeout
221221
}
222222

223223
return p.IdleTimeout

network/init.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,22 @@ const (
3636

3737
// DefaultTCPKeepAlivePeriod defines a time period between 2 consequitive
3838
// probes.
39+
//
40+
// Deprecated: use DefaultKeepAliveIdle and DefaultKeepAliveInterval instead.
3941
DefaultTCPKeepAlivePeriod = 10 * time.Second
4042

43+
// DefaultKeepAliveIdle is the time a connection must be idle before
44+
// the first keepalive probe is sent.
45+
DefaultKeepAliveIdle = 30 * time.Second
46+
47+
// DefaultKeepAliveInterval is the time between consecutive keepalive
48+
// probes.
49+
DefaultKeepAliveInterval = 10 * time.Second
50+
51+
// DefaultKeepAliveCount is the number of unacknowledged probes before
52+
// the connection is considered dead.
53+
DefaultKeepAliveCount = 3
54+
4155
// ProxyDialerOpenThreshold is used for load balancing SOCKS5 dialer only.
4256
//
4357
// This dialer uses circuit breaker with of 3 stages: OPEN, HALF_OPEN and

network/sockopts.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,13 @@ func SetServerSocketOptions(conn net.Conn, bufferSize int) error {
2020
}
2121

2222
func setCommonSocketOptions(conn *net.TCPConn) error {
23-
if err := conn.SetKeepAlivePeriod(DefaultTCPKeepAlivePeriod); err != nil {
24-
return fmt.Errorf("cannot set time period of TCP keepalive probes: %w", err)
23+
if err := conn.SetKeepAliveConfig(net.KeepAliveConfig{
24+
Enable: true,
25+
Idle: DefaultKeepAliveIdle,
26+
Interval: DefaultKeepAliveInterval,
27+
Count: DefaultKeepAliveCount,
28+
}); err != nil {
29+
return fmt.Errorf("cannot configure TCP keepalive: %w", err)
2530
}
2631

2732
if err := conn.SetLinger(tcpLingerTimeout); err != nil {

network/sockopts_test.go

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
//go:build linux || darwin
2+
// +build linux darwin
3+
4+
package network_test
5+
6+
import (
7+
"net"
8+
"runtime"
9+
"syscall"
10+
"testing"
11+
"time"
12+
13+
"github.com/9seconds/mtg/v2/network"
14+
"github.com/stretchr/testify/require"
15+
"golang.org/x/sys/unix"
16+
)
17+
18+
func tcpKeepIdleOption() int {
19+
if runtime.GOOS == "darwin" {
20+
return 0x10 // TCP_KEEPALIVE on macOS
21+
}
22+
23+
return 0x4 // TCP_KEEPIDLE on Linux
24+
}
25+
26+
func TestSetClientSocketOptionsKeepAlive(t *testing.T) {
27+
t.Parallel()
28+
29+
listener, err := net.Listen("tcp", "127.0.0.1:0")
30+
require.NoError(t, err)
31+
defer func() {
32+
err := listener.Close()
33+
require.NoError(t, err)
34+
}()
35+
36+
type dialResult struct {
37+
conn net.Conn
38+
err error
39+
}
40+
41+
dialDone := make(chan dialResult, 1)
42+
43+
go func() {
44+
c, err := net.Dial("tcp", listener.Addr().String())
45+
dialDone <- dialResult{conn: c, err: err}
46+
}()
47+
48+
tcpListener, ok := listener.(*net.TCPListener)
49+
require.True(t, ok, "listener must be a *net.TCPListener")
50+
51+
require.NoError(t, tcpListener.SetDeadline(time.Now().Add(5*time.Second)))
52+
53+
accepted, err := listener.Accept()
54+
require.NoError(t, err)
55+
defer func() {
56+
err := accepted.Close()
57+
require.NoError(t, err)
58+
}()
59+
60+
dr := <-dialDone
61+
require.NoError(t, dr.err)
62+
defer func() {
63+
err := dr.conn.Close()
64+
require.NoError(t, err)
65+
}()
66+
67+
err = network.SetClientSocketOptions(accepted, 0)
68+
require.NoError(t, err)
69+
70+
tcpConn := accepted.(*net.TCPConn)
71+
72+
rawConn, err := tcpConn.SyscallConn()
73+
require.NoError(t, err)
74+
75+
err = rawConn.Control(func(fd uintptr) {
76+
val, err := unix.GetsockoptInt(int(fd), syscall.SOL_SOCKET, syscall.SO_KEEPALIVE)
77+
require.NoError(t, err)
78+
require.NotEqual(t, 0, val, "SO_KEEPALIVE should be enabled")
79+
80+
idle, err := unix.GetsockoptInt(int(fd), syscall.IPPROTO_TCP, tcpKeepIdleOption())
81+
require.NoError(t, err)
82+
require.Equal(t, int(network.DefaultKeepAliveIdle.Seconds()), idle)
83+
84+
interval, err := unix.GetsockoptInt(int(fd), syscall.IPPROTO_TCP, unix.TCP_KEEPINTVL)
85+
require.NoError(t, err)
86+
require.Equal(t, int(network.DefaultKeepAliveInterval.Seconds()), interval)
87+
88+
count, err := unix.GetsockoptInt(int(fd), syscall.IPPROTO_TCP, unix.TCP_KEEPCNT)
89+
require.NoError(t, err)
90+
require.Equal(t, network.DefaultKeepAliveCount, count)
91+
})
92+
require.NoError(t, err)
93+
}

network/v2/init.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,22 @@ const (
2626

2727
// DefaultTCPKeepAlivePeriod defines a time period between 2 consecuitive
2828
// probes.
29+
//
30+
// Deprecated: use DefaultKeepAliveIdle and DefaultKeepAliveInterval instead.
2931
DefaultTCPKeepAlivePeriod = 10 * time.Second
3032

33+
// DefaultKeepAliveIdle is the time a connection must be idle before
34+
// the first keepalive probe is sent.
35+
DefaultKeepAliveIdle = 30 * time.Second
36+
37+
// DefaultKeepAliveInterval is the time between consecutive keepalive
38+
// probes.
39+
DefaultKeepAliveInterval = 10 * time.Second
40+
41+
// DefaultKeepAliveCount is the number of unacknowledged probes before
42+
// the connection is considered dead.
43+
DefaultKeepAliveCount = 3
44+
3145
// User Agent to use in HTTP client.
3246
UserAgent = "curl/8.5.0"
3347

network/v2/sockopts.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,13 @@ import (
66
)
77

88
func setCommonSocketOptions(conn *net.TCPConn) error {
9-
if err := conn.SetKeepAlivePeriod(DefaultTCPKeepAlivePeriod); err != nil {
10-
return fmt.Errorf("cannot set time period of TCP keepalive probes: %w", err)
9+
if err := conn.SetKeepAliveConfig(net.KeepAliveConfig{
10+
Enable: true,
11+
Idle: DefaultKeepAliveIdle,
12+
Interval: DefaultKeepAliveInterval,
13+
Count: DefaultKeepAliveCount,
14+
}); err != nil {
15+
return fmt.Errorf("cannot configure TCP keepalive: %w", err)
1116
}
1217

1318
if err := conn.SetLinger(tcpLingerTimeout); err != nil {

network/v2/sockopts_test.go

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
//go:build linux || darwin
2+
// +build linux darwin
3+
4+
package network
5+
6+
import (
7+
"net"
8+
"runtime"
9+
"syscall"
10+
"testing"
11+
"time"
12+
13+
"github.com/stretchr/testify/require"
14+
"golang.org/x/sys/unix"
15+
)
16+
17+
func tcpKeepIdleOption() int {
18+
if runtime.GOOS == "darwin" {
19+
return 0x10 // TCP_KEEPALIVE on macOS
20+
}
21+
22+
return 0x4 // TCP_KEEPIDLE on Linux
23+
}
24+
25+
func TestSetCommonSocketOptionsKeepAlive(t *testing.T) {
26+
t.Parallel()
27+
28+
listener, err := net.Listen("tcp", "127.0.0.1:0")
29+
require.NoError(t, err)
30+
defer func() {
31+
err := listener.Close()
32+
require.NoError(t, err)
33+
}()
34+
35+
type dialResult struct {
36+
conn net.Conn
37+
err error
38+
}
39+
40+
dialDone := make(chan dialResult, 1)
41+
42+
go func() {
43+
c, err := net.Dial("tcp", listener.Addr().String())
44+
dialDone <- dialResult{conn: c, err: err}
45+
}()
46+
47+
tcpListener, ok := listener.(*net.TCPListener)
48+
require.True(t, ok, "listener must be a *net.TCPListener")
49+
50+
require.NoError(t, tcpListener.SetDeadline(time.Now().Add(5*time.Second)))
51+
52+
accepted, err := listener.Accept()
53+
require.NoError(t, err)
54+
defer func() {
55+
err := accepted.Close()
56+
require.NoError(t, err)
57+
}()
58+
59+
dr := <-dialDone
60+
require.NoError(t, dr.err)
61+
defer func() {
62+
err := dr.conn.Close()
63+
require.NoError(t, err)
64+
}()
65+
66+
tcpConn := accepted.(*net.TCPConn)
67+
68+
err = setCommonSocketOptions(tcpConn)
69+
require.NoError(t, err)
70+
71+
rawConn, err := tcpConn.SyscallConn()
72+
require.NoError(t, err)
73+
74+
err = rawConn.Control(func(fd uintptr) {
75+
val, err := unix.GetsockoptInt(int(fd), syscall.SOL_SOCKET, syscall.SO_KEEPALIVE)
76+
require.NoError(t, err)
77+
require.NotEqual(t, 0, val, "SO_KEEPALIVE should be enabled")
78+
79+
idle, err := unix.GetsockoptInt(int(fd), syscall.IPPROTO_TCP, tcpKeepIdleOption())
80+
require.NoError(t, err)
81+
require.Equal(t, int(DefaultKeepAliveIdle.Seconds()), idle, "keepalive idle should match DefaultKeepAliveIdle")
82+
83+
interval, err := unix.GetsockoptInt(int(fd), syscall.IPPROTO_TCP, unix.TCP_KEEPINTVL)
84+
require.NoError(t, err)
85+
require.Equal(t, int(DefaultKeepAliveInterval.Seconds()), interval, "keepalive interval should match DefaultKeepAliveInterval")
86+
87+
count, err := unix.GetsockoptInt(int(fd), syscall.IPPROTO_TCP, unix.TCP_KEEPCNT)
88+
require.NoError(t, err)
89+
require.Equal(t, DefaultKeepAliveCount, count, "keepalive count should match DefaultKeepAliveCount")
90+
})
91+
require.NoError(t, err)
92+
}

0 commit comments

Comments
 (0)