From 295b3949bc406c178d9fbfafa72a11ac89038055 Mon Sep 17 00:00:00 2001 From: Michal Mielewczyk Date: Mon, 24 Mar 2025 09:47:19 +0100 Subject: [PATCH] backfill: Update occupancy only if BF succeeded Increasing the occupancy before the backfill request has completed leads to incorrect statistics test_lazy_engine_errors() detected the bug in the following scenario: 1. Rio submitted a read request 2. The read turned out to be a full miss. After a successful read from the core device, engine_read set the metadata bits to valid and submitted a backfill request 3. In meantime, Rio submitted a write request to the same cache lines 4. Since the test uses an error device as cache (all sectors are erroneous) the backfill request failed and was redirected to engine_invalidate 5. engine_invalidate reset the metadata bits to invalid, but since cache lines had waiters registered, the occupancy wasn't decremented as there was an assumption that the new owner (in this case, the write request) would do the bookkeeping 6. Upon receiving cache line locks, the write request detected that the mapping changed so it was completed with an error without decrementing the occupancy 7. The incorrect value of cached cache lines was persisted for the whole lifetime of the cache Signed-off-by: Michal Mielewczyk --- src/engine/engine_bf.c | 25 ++++++++++++++++++++----- src/engine/engine_rd.c | 9 +-------- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/src/engine/engine_bf.c b/src/engine/engine_bf.c index 2d80d78..219efb2 100644 --- a/src/engine/engine_bf.c +++ b/src/engine/engine_bf.c @@ -1,6 +1,6 @@ /* * Copyright(c) 2012-2022 Intel Corporation - * Copyright(c) 2024 Huawei Technologies + * Copyright(c) 2024-2025 Huawei Technologies * SPDX-License-Identifier: BSD-3-Clause */ @@ -39,6 +39,23 @@ static inline void backfill_queue_inc_block(struct ocf_cache *cache) env_atomic_set(&cache->pending_read_misses_list_blocked, 1); } +static int _ocf_backfill_update_metadata(struct ocf_request *req) +{ + ocf_cache_t cache = req->cache; + + ocf_hb_req_prot_lock_wr(req); + + ocf_set_valid_map_info(req); + + ocf_hb_req_prot_unlock_wr(req); + + ocf_req_unlock(ocf_cache_line_concurrency(cache), req); + + ocf_req_put(req); + + return 0; +} + static void _ocf_backfill_complete(struct ocf_request *req, int error) { struct ocf_cache *cache = req->cache; @@ -61,10 +78,8 @@ static void _ocf_backfill_complete(struct ocf_request *req, int error) if (error) { ocf_engine_invalidate(req); } else { - ocf_req_unlock(ocf_cache_line_concurrency(cache), req); - - /* put the request at the last point of the completion path */ - ocf_req_put(req); + ocf_queue_push_req_cb(req, _ocf_backfill_update_metadata, + OCF_QUEUE_ALLOW_SYNC | OCF_QUEUE_PRIO_HIGH); } } diff --git a/src/engine/engine_rd.c b/src/engine/engine_rd.c index bbc90cb..ac994fa 100644 --- a/src/engine/engine_rd.c +++ b/src/engine/engine_rd.c @@ -1,6 +1,6 @@ /* * Copyright(c) 2012-2022 Intel Corporation - * Copyright(c) 2024 Huawei Technologies + * Copyright(c) 2024-2025 Huawei Technologies * SPDX-License-Identifier: BSD-3-Clause */ @@ -133,13 +133,6 @@ static int _ocf_read_generic_do(struct ocf_request *req) return 0; } - - ocf_hb_req_prot_lock_wr(req); - - /* Set valid status bits map */ - ocf_set_valid_map_info(req); - - ocf_hb_req_prot_unlock_wr(req); } if (ocf_engine_needs_repart(req)) {