Skip to content

Commit ead7fee

Browse files
[8.2] MOD-14826 Use try_lock in debugInfo() to avoid blocking main thread during SVS training (#932)
* MOD-14826 Use try_lock in debugInfo() to avoid blocking main thread during SVS training (#931) * use try lock * fix test (cherry picked from commit d7a5afb) * remove assert --------- Co-authored-by: meiravgri <109056284+meiravgri@users.noreply.github.com> Co-authored-by: meiravgri <meirav.grimberg@redis.com>
1 parent 09adad1 commit ead7fee

2 files changed

Lines changed: 13 additions & 11 deletions

File tree

src/VecSim/algorithms/svs/svs_tiered.h

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -832,9 +832,19 @@ class TieredSVSIndex : public VecSimTieredIndex<DataType, float> {
832832
.updateJobWaitTime = this->updateJobWaitTime,
833833
};
834834
{
835-
std::lock_guard<std::mutex> lock(this->updateJobMutex);
836-
svsTieredInfo.indexUpdateScheduled =
837-
this->indexUpdateScheduled.test() == VecSimBool_TRUE;
835+
// Use try_lock to avoid blocking the main thread during long-running
836+
// training operations. updateSVSIndexWrapper holds updateJobMutex for
837+
// the entire training duration (which can take 40-85s on slow machines).
838+
// If the mutex is held, training is actively running, so we report
839+
// indexUpdateScheduled = true (BACKGROUND_INDEXING = 1).
840+
std::unique_lock<std::mutex> lock(this->updateJobMutex, std::try_to_lock);
841+
if (lock.owns_lock()) {
842+
svsTieredInfo.indexUpdateScheduled =
843+
this->indexUpdateScheduled.test() == VecSimBool_TRUE;
844+
} else {
845+
// Mutex is held by updateSVSIndexWrapper — training is in progress.
846+
svsTieredInfo.indexUpdateScheduled = true;
847+
}
838848
}
839849
info.tieredInfo.specificTieredBackendInfo.svsTieredInfo = svsTieredInfo;
840850
info.tieredInfo.backgroundIndexing =

tests/flow/test_svs_tiered.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -235,32 +235,24 @@ def search_insert(test_logger, is_multi: bool, num_per_label=1, data_type=VecSim
235235
searches_number = 0
236236
# run knn query every 1 s.
237237
total_tiered_search_time = 0
238-
prev_bf_size = num_labels
239238
cur_svs_label_count = index.svs_label_count()
240239

241240
test_logger.info(f"SVS labels number = {cur_svs_label_count}")
242241
while searches_number == 0 or cur_svs_label_count < num_labels - updateThreshold:
243242
# For each run get the current svs size and the query time.
244-
bf_curr_size = index.get_curr_bf_size()
245243
query_start = time.time()
246244
tiered_labels, _ = index.knn_query(query_data, k)
247245
query_dur = time.time() - query_start
248246
total_tiered_search_time += query_dur
249247

250248
test_logger.info(f"query time = {round_ms(query_dur)} ms")
251249

252-
# BF size should decrease.
253-
test_logger.info(f"bf size = {bf_curr_size}")
254-
assert bf_curr_size < prev_bf_size
255-
256250
# Run the query also in the bf index to get the ground truth results.
257251
bf_labels, _ = bf_index.knn_query(query_data, k)
258252
correct += len(np.intersect1d(tiered_labels[0], bf_labels[0]))
259253
time.sleep(1)
260254
searches_number += 1
261-
prev_bf_size = bf_curr_size
262255
cur_svs_label_count = index.svs_label_count()
263-
264256
# SVS labels count updates before the job is done, so we need to wait for the queue to be empty.
265257
index.wait_for_index(1)
266258
index_dur = time.time() - index_start

0 commit comments

Comments
 (0)