From fd1b6efea64a17fbdcd24e1df3d3033da0140d22 Mon Sep 17 00:00:00 2001 From: Emily Hudson Date: Sun, 7 Aug 2022 08:40:05 +0100 Subject: [PATCH] net: add failed addresses + details on connect errors, make connect more robust in the default non blocking mode (#15364) --- examples/net_failconnect.v | 5 + vlib/net/errors.v | 11 +- vlib/net/net_nix.c.v | 3 + vlib/net/net_windows.c.v | 3 + vlib/net/tcp.v | 72 +++++++---- .../tcp_self_dial_from_many_clients_test.v | 114 ++++++++++++++++++ vlib/net/websocket/websocket_test.v | 26 ++-- vlib/vweb/tests/vweb_test.v | 5 + vlib/vweb/tests/vweb_test_server.v | 4 +- 9 files changed, 202 insertions(+), 41 deletions(-) create mode 100644 examples/net_failconnect.v create mode 100644 vlib/net/tcp_self_dial_from_many_clients_test.v diff --git a/examples/net_failconnect.v b/examples/net_failconnect.v new file mode 100644 index 000000000..1035a4507 --- /dev/null +++ b/examples/net_failconnect.v @@ -0,0 +1,5 @@ +import net + +conn := net.dial_tcp('[::1]:57000')? +peer_addr := conn.peer_addr()? +println('$peer_addr') diff --git a/vlib/net/errors.v b/vlib/net/errors.v index e050c1245..6cbc1a445 100644 --- a/vlib/net/errors.v +++ b/vlib/net/errors.v @@ -12,12 +12,13 @@ pub const ( errors_base + 2) err_option_wrong_type = error_with_code('net: set_option_xxx option wrong type', errors_base + 3) - err_port_out_of_range = error_with_code('', errors_base + 5) - err_no_udp_remote = error_with_code('', errors_base + 6) + err_port_out_of_range = error_with_code('net: port out of range', errors_base + 5) + err_no_udp_remote = error_with_code('net: no udp remote', errors_base + 6) err_connect_failed = error_with_code('net: connect failed', errors_base + 7) err_connect_timed_out = error_with_code('net: connect timed out', errors_base + 8) err_timed_out = error_with_code('net: op timed out', errors_base + 9) err_timed_out_code = errors_base + 9 + err_connection_refused = error_with_code('net: connection refused', errors_base + 10) ) pub fn socket_error_message(potential_code int, s string) ?int { @@ -43,13 +44,13 @@ pub fn socket_error(potential_code int) ?int { } pub fn wrap_error(error_code int) ? { + if error_code == 0 { + return + } $if windows { enum_error := wsa_error(error_code) return error_with_code('net: socket error: $enum_error', error_code) } $else { - if error_code == 0 { - return - } return error_with_code('net: socket error: $error_code', error_code) } } diff --git a/vlib/net/net_nix.c.v b/vlib/net/net_nix.c.v index a9fa53101..5f322428a 100644 --- a/vlib/net/net_nix.c.v +++ b/vlib/net/net_nix.c.v @@ -10,6 +10,8 @@ module net #flag solaris -lsocket +const is_windows = false + fn error_code() int { return C.errno } @@ -23,4 +25,5 @@ pub const ( const ( error_ewouldblock = C.EWOULDBLOCK + error_einprogress = C.EINPROGRESS ) diff --git a/vlib/net/net_windows.c.v b/vlib/net/net_windows.c.v index b003a3043..19695996d 100644 --- a/vlib/net/net_windows.c.v +++ b/vlib/net/net_windows.c.v @@ -1,5 +1,7 @@ module net +const is_windows = true + // WsaError is all of the socket errors that WSA provides from WSAGetLastError pub enum WsaError { // @@ -739,6 +741,7 @@ pub fn wsa_error(code int) WsaError { const ( error_ewouldblock = WsaError.wsaewouldblock + error_einprogress = WsaError.wsaeinprogress ) // Link to Winsock library diff --git a/vlib/net/tcp.v b/vlib/net/tcp.v index 1248e6658..606469de1 100644 --- a/vlib/net/tcp.v +++ b/vlib/net/tcp.v @@ -1,6 +1,7 @@ module net import time +import strings const ( tcp_default_read_timeout = 30 * time.second @@ -24,12 +25,16 @@ pub fn dial_tcp(address string) ?&TcpConn { return error('$err.msg(); could not resolve address $address in dial_tcp') } + // Keep track of dialing errors that take place + mut errs := []IError{} + // Very simple dialer for addr in addrs { mut s := new_tcp_socket(addr.family()) or { return error('$err.msg(); could not create new tcp socket in dial_tcp') } s.connect(addr) or { + errs << err // Connection failed s.close() or { continue } continue @@ -41,8 +46,20 @@ pub fn dial_tcp(address string) ?&TcpConn { write_timeout: net.tcp_default_write_timeout } } + + // Once we've failed now try and explain why we failed to connect + // to any of these addresses + mut err_builder := strings.new_builder(1024) + err_builder.write_string('dial_tcp failed for address $address\n') + err_builder.write_string('tried addrs:\n') + for i := 0; i < errs.len; i++ { + addr := addrs[i] + why := errs[i] + err_builder.write_string('\t$addr: $why\n') + } + // failed - return error('dial_tcp failed for address $address') + return error(err_builder.str()) } // bind local address and dail. @@ -431,34 +448,39 @@ fn (mut s TcpSocket) connect(a Addr) ? { if res == 0 { return } - - // The socket is nonblocking and the connection cannot be completed - // immediately. (UNIX domain sockets failed with EAGAIN instead.) - // It is possible to select(2) or poll(2) for completion by selecting - // the socket for writing. After select(2) indicates writability, - // use getsockopt(2) to read the SO_ERROR option at level SOL_SOCKET to - // determine whether connect() completed successfully (SO_ERROR is zero) or - // unsuccessfully (SO_ERROR is one of the usual error codes listed here, - // ex‐ plaining the reason for the failure). - write_result := s.@select(.write, net.connect_timeout)? - if write_result { + ecode := error_code() + // On nix non-blocking sockets we expect einprogress + // On windows we expect res == -1 && error_code() == ewouldblock + if (is_windows && ecode == int(error_ewouldblock)) + || (!is_windows && res == -1 && ecode == int(error_einprogress)) { + // The socket is nonblocking and the connection cannot be completed + // immediately. (UNIX domain sockets failed with EAGAIN instead.) + // It is possible to select(2) or poll(2) for completion by selecting + // the socket for writing. After select(2) indicates writability, + // use getsockopt(2) to read the SO_ERROR option at level SOL_SOCKET to + // determine whether connect() completed successfully (SO_ERROR is zero) or + // unsuccessfully (SO_ERROR is one of the usual error codes listed here, + // ex‐ plaining the reason for the failure). + write_result := s.@select(.write, net.connect_timeout)? err := 0 len := sizeof(err) - socket_error(C.getsockopt(s.handle, C.SOL_SOCKET, C.SO_ERROR, &err, &len))? - - if err != 0 { - return wrap_error(err) + xyz := C.getsockopt(s.handle, C.SOL_SOCKET, C.SO_ERROR, &err, &len) + if xyz == 0 && err == 0 { + return } - // Succeeded - return + if write_result { + if xyz == 0 { + wrap_error(err)? + return + } + return + } + return err_timed_out } - - // Get the error - socket_error(C.connect(s.handle, voidptr(&a), a.len()))? - - // otherwise we timed out - return err_connect_timed_out + wrap_error(ecode)? + return } $else { - socket_error(C.connect(s.handle, voidptr(&a), a.len()))? + x := C.connect(s.handle, voidptr(&a), a.len()) + socket_error(x)? } } diff --git a/vlib/net/tcp_self_dial_from_many_clients_test.v b/vlib/net/tcp_self_dial_from_many_clients_test.v new file mode 100644 index 000000000..0dd82bf50 --- /dev/null +++ b/vlib/net/tcp_self_dial_from_many_clients_test.v @@ -0,0 +1,114 @@ +module main + +import net +import time + +const xport = 15523 + +struct Context { +mut: + ok_client_dials int + fail_client_dials int + // + ok_client_close int + fail_client_close int + //// + ok_server_accepts int + fail_server_accepts int + // + ok_server_close int + fail_server_close int + // + received []int +} + +fn elog(msg string) { + eprintln('$time.now().format_ss_micro() | $msg') +} + +fn receive_data(mut con net.TcpConn, mut ctx Context) { + mut buf := []u8{len: 5} + for { + bytes := con.read(mut buf) or { -1 } + if bytes < 0 { + break + } + ctx.received << buf[0] + } + con.close() or { + ctx.fail_server_close++ + return + } + ctx.ok_server_close++ +} + +fn start_server(schannel chan int, mut ctx Context) { + elog('server: start_server') + mut tcp_listener := net.listen_tcp(net.AddrFamily.ip, ':$xport') or { + elog('server: start server error $err') + return + } + elog('server: server started listening at port :$xport') + schannel <- 0 + + for { + mut tcp_con := tcp_listener.accept() or { + elog('server: accept error: $err') + ctx.fail_server_accepts++ + continue + } + go receive_data(mut tcp_con, mut ctx) + ctx.ok_server_accepts++ + elog('server: new tcp connection con.sock.handle: $tcp_con.sock.handle') + continue + } +} + +fn start_client(i int, mut ctx Context) { + elog('client [$i]: start') + mut tcp_con := net.dial_tcp('127.0.0.1:$xport') or { + elog('client [$i]: net.dial_tcp err $err') + ctx.fail_client_dials++ + return + } + ctx.ok_client_dials++ + elog('client [$i]: conn is connected, con.sock.handle: $tcp_con.sock.handle') + tcp_con.write([u8(i)]) or { elog('client [$i]: write failed, err: $err') } + time.sleep(1 * time.second) + elog('client [$i]: closing connection...') + tcp_con.close() or { + elog('client [$i]: close failed, err: $err') + ctx.fail_client_close++ + return + } + ctx.ok_client_close++ +} + +fn test_tcp_self_dialing() { + elog('>>> start') + start_time := time.now() + mut ctx := &Context{} + mut server_channel := chan int{cap: 1} + go start_server(server_channel, mut ctx) + svalue := <-server_channel + elog('>>> server was started: ${svalue}. Starting clients:') + for i := int(0); i < 20; i++ { + go start_client(i, mut ctx) + elog('>>> started client $i') + // time.sleep(2 * time.millisecond) + } + max_dt := 5 * time.second + for { + t := time.now() + dt := t - start_time + if dt > max_dt { + elog('>>> exiting after $dt.milliseconds() ms ...') + dump(ctx) + assert ctx.fail_client_dials < 2, 'allowed failed client dials, from $ctx.ok_server_accepts connections' + assert ctx.received.len > ctx.ok_server_accepts / 2, 'at least half the clients sent some data, that was later received by the server' + elog('>>> goodbye') + exit(0) + } + time.sleep(10 * time.millisecond) + } +} diff --git a/vlib/net/websocket/websocket_test.v b/vlib/net/websocket/websocket_test.v index 18b6a7889..4874b00e7 100644 --- a/vlib/net/websocket/websocket_test.v +++ b/vlib/net/websocket/websocket_test.v @@ -26,9 +26,13 @@ fn test_ws_ipv6() { return } port := 30000 + rand.intn(1024) or { 0 } + eprintln('> port ipv6: $port') go start_server(.ip6, port) - time.sleep(500 * time.millisecond) - ws_test(.ip6, 'ws://localhost:$port') or { assert false } + time.sleep(1500 * time.millisecond) + ws_test(.ip6, 'ws://localhost:$port') or { + eprintln('> error while connecting .ip6, err: $err') + assert false + } } // tests with internal ws servers @@ -37,9 +41,13 @@ fn test_ws_ipv4() { return } port := 30000 + rand.intn(1024) or { 0 } + eprintln('> port ipv4: $port') go start_server(.ip, port) - time.sleep(500 * time.millisecond) - ws_test(.ip, 'ws://localhost:$port') or { assert false } + time.sleep(1500 * time.millisecond) + ws_test(.ip, 'ws://localhost:$port') or { + eprintln('> error while connecting .ip, err: $err') + assert false + } } fn start_server(family net.AddrFamily, listen_port int) ? { @@ -58,15 +66,15 @@ fn start_server(family net.AddrFamily, listen_port int) ? { })? s.on_message(fn (mut ws websocket.Client, msg &websocket.Message) ? { match msg.opcode { - .pong { ws.write_string('pong') or { panic(err) } } - else { ws.write(msg.payload, msg.opcode) or { panic(err) } } + .pong { ws.write_string('pong')? } + else { ws.write(msg.payload, msg.opcode)? } } }) s.on_close(fn (mut ws websocket.Client, code int, reason string) ? { // not used }) - s.listen() or { panic('websocket server could not listen') } + s.listen() or { panic('websocket server could not listen, err: $err') } } // ws_test tests connect to the websocket server from websocket client @@ -104,11 +112,11 @@ fn ws_test(family net.AddrFamily, uri string) ? { println('Binary message: $msg') } }, test_results) - ws.connect() or { panic('fail to connect') } + ws.connect() or { panic('fail to connect, err: $err') } go ws.listen() text := ['a'].repeat(2) for msg in text { - ws.write(msg.bytes(), .text_frame) or { panic('fail to write to websocket') } + ws.write(msg.bytes(), .text_frame) or { panic('fail to write to websocket, err: $err') } // sleep to give time to recieve response before send a new one time.sleep(100 * time.millisecond) } diff --git a/vlib/vweb/tests/vweb_test.v b/vlib/vweb/tests/vweb_test.v index b7e689038..18bf8f697 100644 --- a/vlib/vweb/tests/vweb_test.v +++ b/vlib/vweb/tests/vweb_test.v @@ -269,6 +269,7 @@ fn simple_tcp_client(config SimpleTcpClientConfig) ?string { mut tries := 0 for tries < config.retries { tries++ + eprintln('> client retries: $tries') client = net.dial_tcp(localserver) or { if tries > config.retries { return err @@ -278,6 +279,10 @@ fn simple_tcp_client(config SimpleTcpClientConfig) ?string { } break } + if client == unsafe { nil } { + eprintln('coult not create a tcp client connection to $localserver after $config.retries retries') + exit(1) + } client.set_read_timeout(tcp_r_timeout) client.set_write_timeout(tcp_w_timeout) defer { diff --git a/vlib/vweb/tests/vweb_test_server.v b/vlib/vweb/tests/vweb_test_server.v index f53ac904b..d72516dd7 100644 --- a/vlib/vweb/tests/vweb_test_server.v +++ b/vlib/vweb/tests/vweb_test_server.v @@ -21,7 +21,7 @@ struct Config { fn exit_after_timeout(timeout_in_ms int) { time.sleep(timeout_in_ms * time.millisecond) - // eprintln('webserver is exiting ...') + println('>> webserver: pid: $os.getpid(), exiting ...') exit(0) } @@ -43,7 +43,7 @@ fn main() { timeout: timeout global_config: config } - eprintln('>> webserver: started on http://localhost:$app.port/ , with maximum runtime of $app.timeout milliseconds.') + eprintln('>> webserver: pid: $os.getpid(), started on http://localhost:$app.port/ , with maximum runtime of $app.timeout milliseconds.') vweb.run_at(app, host: 'localhost', port: http_port, family: .ip)? } -- 2.30.2