From 0f876d17707737e728e95b2a611fd131f67c8bfb Mon Sep 17 00:00:00 2001 From: Laurens Valk Date: Thu, 21 May 2026 14:51:07 +0200 Subject: [PATCH 1/2] pbio/port_lump: Increase initial read time out. This races with the keep alive timeout as they are essentially testing for the same thing, so the timeout should match. --- lib/pbio/src/port_lump.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/lib/pbio/src/port_lump.c b/lib/pbio/src/port_lump.c index a696a8312..9010685e3 100644 --- a/lib/pbio/src/port_lump.c +++ b/lib/pbio/src/port_lump.c @@ -829,7 +829,7 @@ pbio_error_t pbio_port_lump_sync_thread(pbio_os_state_t *state, pbio_port_lump_d // read the message header PBIO_OS_AWAIT(state, &lump_dev->read_pt, err = pbdrv_uart_read(&lump_dev->read_pt, uart_dev, lump_dev->rx_msg, 1, EV3_UART_IO_TIMEOUT)); if (err != PBIO_SUCCESS) { - debug_pr("UART Rx end error during info header\n"); + debug_pr("UART Rx error during info header\n"); return err; } @@ -847,7 +847,7 @@ pbio_error_t pbio_port_lump_sync_thread(pbio_os_state_t *state, pbio_port_lump_d if (lump_dev->rx_msg_size > 1) { PBIO_OS_AWAIT(state, &lump_dev->read_pt, err = pbdrv_uart_read(&lump_dev->read_pt, uart_dev, lump_dev->rx_msg + 1, lump_dev->rx_msg_size - 1, EV3_UART_IO_TIMEOUT)); if (err != PBIO_SUCCESS) { - debug_pr("UART Rx end error during info\n"); + debug_pr("UART Rx error during info\n"); return err; } } @@ -1020,9 +1020,14 @@ pbio_error_t pbio_port_lump_data_recv_thread(pbio_os_state_t *state, pbio_port_l PBIO_OS_ASYNC_BEGIN(state); while (true) { - PBIO_OS_AWAIT(state, &lump_dev->read_pt, err = pbdrv_uart_read(&lump_dev->read_pt, uart_dev, lump_dev->rx_msg, 1, EV3_UART_IO_TIMEOUT)); + PBIO_OS_AWAIT(state, &lump_dev->read_pt, err = pbdrv_uart_read(&lump_dev->read_pt, uart_dev, lump_dev->rx_msg, 1, + // This is essentially the timeout for receiving the next data + // message, so we should allow at least as much timeout as allowed + // by missing messages rather than use a generic IO timeout. + EV3_UART_DATA_KEEP_ALIVE_TIMEOUT * (EV3_UART_DATA_KEEP_ALIVE_MAX_MISSED + 1) + )); if (err != PBIO_SUCCESS) { - debug_pr("UART Rx data header end error\n"); + debug_pr("Did not receive UART Rx data header byte\n"); return err; } @@ -1042,7 +1047,7 @@ pbio_error_t pbio_port_lump_data_recv_thread(pbio_os_state_t *state, pbio_port_l PBIO_OS_AWAIT(state, &lump_dev->read_pt, err = pbdrv_uart_read(&lump_dev->read_pt, uart_dev, lump_dev->rx_msg + 1, lump_dev->rx_msg_size - 1, EV3_UART_IO_TIMEOUT)); if (err != PBIO_SUCCESS) { - debug_pr("UART Rx data end error\n"); + debug_pr("UART Rx data error\n"); return err; } From 3d75311b450120ddc4033a0b43166285d1283c72 Mon Sep 17 00:00:00 2001 From: Thomas Schank Date: Fri, 22 May 2026 07:04:30 +0200 Subject: [PATCH 2/2] pbio/port_lump: Recover from RX desync bursts in data recv thread Rebased onto the `work` branch (keepalive grace period + increased header-read timeout). This drops the patch's earlier keepalive and header-read-timeout changes, which `work` now supersedes, and keeps only the in-place desync realignment in the data receive thread. When the SPIKE Prime hub's UART driver drops bytes, the data receive parser loses byte-stream alignment and reads mid-packet data as message headers. The keepalive grace period delays disconnect but does not fix the parser state. This adds active realignment: - Track consecutive bad-header events (wrong message size or unexpected message type) in lump_dev->rx_bad_header_streak, stored on the device struct to match the style of err_count. - After EV3_UART_BAD_HEADER_STREAK_MAX (8) consecutive bad headers, flush the UART buffer to realign byte boundaries. A short burst is normal after a corrupt packet; a sustained streak means the parser is stuck reading mid-stream bytes and needs an active reset. - Treat a payload read timeout as an immediate desync signal (a valid header arrived but its payload bytes never came) and flush immediately. The header-byte read timeout itself is left to `work`'s handling, which returns the error so the keepalive logic can trigger a full reconnect. --- lib/pbio/src/port_lump.c | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/lib/pbio/src/port_lump.c b/lib/pbio/src/port_lump.c index 9010685e3..403fafc8e 100644 --- a/lib/pbio/src/port_lump.c +++ b/lib/pbio/src/port_lump.c @@ -31,6 +31,8 @@ #define EV3_UART_MAX_MESSAGE_SIZE (LUMP_MAX_MSG_SIZE + 3) +#define EV3_UART_BAD_HEADER_STREAK_MAX 8 + #define EV3_UART_TYPE_MIN 29 // EV3 color sensor #define EV3_UART_TYPE_MAX 101 #define EV3_UART_SPEED_MIN 2400 @@ -174,6 +176,8 @@ struct _pbio_port_lump_dev_t { uint32_t rx_msg_size; /** Total number of errors that have occurred. Re-used in different stages of synchronization and data reading. */ uint32_t err_count; + /** Consecutive bad/missing header count for RX desync recovery. Reset on a good packet. */ + uint8_t rx_bad_header_streak; /** Flag that indicates that good DATA lump_dev->msg has been received since last watchdog timeout. */ bool data_rec; /** Angle reported by the device. */ @@ -213,6 +217,7 @@ pbio_port_lump_dev_t *pbio_port_lump_init_instance(uint8_t device_index) { lump_dev->rx_msg = &bufs[device_index][BUF_RX_MSG][0]; lump_dev->status = PBDRV_LEGODEV_LUMP_STATUS_ERR; lump_dev->err_count = 0; + lump_dev->rx_bad_header_streak = 0; lump_dev->data_set = &data_set_bufs[device_index]; lump_dev->bin_data = data_read_bufs[device_index]; return lump_dev; @@ -1013,9 +1018,8 @@ pbio_error_t pbio_port_lump_data_recv_thread(pbio_os_state_t *state, pbio_port_l pbio_error_t err; - // REVISIT: This is not the greatest. We can easily get a buffer overrun and - // loose data. For now, the retry after bad message size helps get back into - // sync with the data stream. + // REVISIT: We can easily get a buffer overrun and lose data. + // The flush-on-bad-streak below helps reacquire sync with the data stream. PBIO_OS_ASYNC_BEGIN(state); @@ -1033,6 +1037,14 @@ pbio_error_t pbio_port_lump_data_recv_thread(pbio_os_state_t *state, pbio_port_l lump_dev->rx_msg_size = ev3_uart_get_msg_size(lump_dev->rx_msg[0]); if (lump_dev->rx_msg_size < 3 || lump_dev->rx_msg_size > EV3_UART_MAX_MESSAGE_SIZE) { + // Bad header byte — the parser is mid-packet or the UART dropped bytes. + // Accumulate: a short burst is normal after a corrupt packet; a sustained + // streak means the stream is desynchronized and a flush is needed to + // realign the byte boundaries before more data is discarded. + if (++lump_dev->rx_bad_header_streak >= EV3_UART_BAD_HEADER_STREAK_MAX) { + pbdrv_uart_flush(uart_dev); + lump_dev->rx_bad_header_streak = 0; + } debug_pr("Bad data message size\n"); continue; } @@ -1041,16 +1053,29 @@ pbio_error_t pbio_port_lump_data_recv_thread(pbio_os_state_t *state, pbio_port_l uint8_t cmd = lump_dev->rx_msg[0] & LUMP_MSG_CMD_MASK; if (msg_type != LUMP_MSG_TYPE_DATA && (msg_type != LUMP_MSG_TYPE_CMD || (cmd != LUMP_CMD_WRITE && cmd != LUMP_CMD_EXT_MODE))) { + if (++lump_dev->rx_bad_header_streak >= EV3_UART_BAD_HEADER_STREAK_MAX) { + pbdrv_uart_flush(uart_dev); + lump_dev->rx_bad_header_streak = 0; + } debug_pr("Bad msg type\n"); continue; } PBIO_OS_AWAIT(state, &lump_dev->read_pt, err = pbdrv_uart_read(&lump_dev->read_pt, uart_dev, lump_dev->rx_msg + 1, lump_dev->rx_msg_size - 1, EV3_UART_IO_TIMEOUT)); if (err != PBIO_SUCCESS) { - debug_pr("UART Rx data error\n"); + if (err == PBIO_ERROR_TIMEDOUT) { + // Header arrived but payload timed out: we are mid-packet. + // Flush to realign; the next header read starts fresh. + pbdrv_uart_flush(uart_dev); + lump_dev->rx_bad_header_streak = 0; + continue; + } + debug_pr("UART Rx data end error\n"); return err; } + lump_dev->rx_bad_header_streak = 0; + // at this point, we have a full lump_dev->msg that can be parsed pbio_port_lump_lump_parse_msg(lump_dev); }