For additional context the exception printout is:
2023-03-31 02:06:13,231 - distributed.worker - ERROR - Failed to communicate with scheduler during heartbeat.
Traceback (most recent call last):
File "/usr/local/lib/python3.10/site-packages/distributed/comm/tcp.py", line 225, in read
frames_nbytes = await stream.read_bytes(fmt_size)
asyncio.exceptions.CancelledError
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/lib/python3.10/asyncio/tasks.py", line 456, in wait_for
return fut.result()
asyncio.exceptions.CancelledError
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/usr/local/lib/python3.10/site-packages/distributed/comm/core.py", line 329, in connect
handshake = await wait_for(comm.read(), time_left())
File "/usr/local/lib/python3.10/site-packages/distributed/utils.py", line 1849, in wait_for
return await asyncio.wait_for(fut, timeout)
File "/usr/local/lib/python3.10/asyncio/tasks.py", line 458, in wait_for
raise exceptions.TimeoutError() from exc
asyncio.exceptions.TimeoutError
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/usr/local/lib/python3.10/site-packages/distributed/worker.py", line 1244, in heartbeat
response = await retry_operation(
File "/usr/local/lib/python3.10/site-packages/distributed/utils_comm.py", line 434, in retry_operation
return await retry(
File "/usr/local/lib/python3.10/site-packages/distributed/utils_comm.py", line 413, in retry
return await coro()
File "/usr/local/lib/python3.10/site-packages/distributed/core.py", line 1262, in send_recv_from_rpc
comm = await self.pool.connect(self.addr)
File "/usr/local/lib/python3.10/site-packages/distributed/core.py", line 1506, in connect
return await connect_attempt
File "/usr/local/lib/python3.10/site-packages/distributed/core.py", line 1427, in _connect
comm = await connect(
File "/usr/local/lib/python3.10/site-packages/distributed/comm/core.py", line 334, in connect
raise OSError(
OSError: Timed out during handshake while connecting to tcp://127.0.0.1:40883 after 30 s
2023-03-31 02:06:44,230 - distributed.worker - ERROR - Failed to communicate with scheduler during heartbeat.
Traceback (most recent call last):
File "/usr/local/lib/python3.10/site-packages/distributed/comm/tcp.py", line 225, in read
frames_nbytes = await stream.read_bytes(fmt_size)
asyncio.exceptions.CancelledError
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/lib/python3.10/asyncio/tasks.py", line 456, in wait_for
return fut.result()
asyncio.exceptions.CancelledError
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/usr/local/lib/python3.10/site-packages/distributed/comm/core.py", line 329, in connect
handshake = await wait_for(comm.read(), time_left())
File "/usr/local/lib/python3.10/site-packages/distributed/utils.py", line 1849, in wait_for
return await asyncio.wait_for(fut, timeout)
File "/usr/local/lib/python3.10/asyncio/tasks.py", line 458, in wait_for
raise exceptions.TimeoutError() from exc
asyncio.exceptions.TimeoutError
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/usr/local/lib/python3.10/site-packages/distributed/worker.py", line 1244, in heartbeat
response = await retry_operation(
File "/usr/local/lib/python3.10/site-packages/distributed/utils_comm.py", line 434, in retry_operation
return await retry(
File "/usr/local/lib/python3.10/site-packages/distributed/utils_comm.py", line 413, in retry
return await coro()
File "/usr/local/lib/python3.10/site-packages/distributed/core.py", line 1262, in send_recv_from_rpc
comm = await self.pool.connect(self.addr)
File "/usr/local/lib/python3.10/site-packages/distributed/core.py", line 1506, in connect
return await connect_attempt
File "/usr/local/lib/python3.10/site-packages/distributed/core.py", line 1427, in _connect
comm = await connect(
File "/usr/local/lib/python3.10/site-packages/distributed/comm/core.py", line 334, in connect
raise OSError(
OSError: Timed out during handshake while connecting to tcp://127.0.0.1:40883 after 30 s
2023-03-31 02:07:15,229 - distributed.worker - ERROR - Failed to communicate with scheduler during heartbeat.
Traceback (most recent call last):
File "/usr/local/lib/python3.10/site-packages/distributed/comm/tcp.py", line 225, in read
frames_nbytes = await stream.read_bytes(fmt_size)
asyncio.exceptions.CancelledError
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/lib/python3.10/asyncio/tasks.py", line 456, in wait_for
return fut.result()
asyncio.exceptions.CancelledError
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/usr/local/lib/python3.10/site-packages/distributed/comm/core.py", line 329, in connect
handshake = await wait_for(comm.read(), time_left())
File "/usr/local/lib/python3.10/site-packages/distributed/utils.py", line 1849, in wait_for
return await asyncio.wait_for(fut, timeout)
File "/usr/local/lib/python3.10/asyncio/tasks.py", line 458, in wait_for
raise exceptions.TimeoutError() from exc
asyncio.exceptions.TimeoutError
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/usr/local/lib/python3.10/site-packages/distributed/worker.py", line 1244, in heartbeat
response = await retry_operation(
File "/usr/local/lib/python3.10/site-packages/distributed/utils_comm.py", line 434, in retry_operation
return await retry(
File "/usr/local/lib/python3.10/site-packages/distributed/utils_comm.py", line 413, in retry
return await coro()
File "/usr/local/lib/python3.10/site-packages/distributed/core.py", line 1262, in send_recv_from_rpc
comm = await self.pool.connect(self.addr)
File "/usr/local/lib/python3.10/site-packages/distributed/core.py", line 1506, in connect
return await connect_attempt
File "/usr/local/lib/python3.10/site-packages/distributed/core.py", line 1427, in _connect
comm = await connect(
File "/usr/local/lib/python3.10/site-packages/distributed/comm/core.py", line 334, in connect
raise OSError(
OSError: Timed out during handshake while connecting to tcp://127.0.0.1:40883 after 30 s