backfill: Update occupancy only if BF succeeded

Increasing the occupancy before the backfill request has completed leads to incorrect statistics test_lazy_engine_errors() detected the bug in the following scenario: 1. Rio submitted a read request 2. The read turned out to be a full miss. After a successful read from the core device, engine_read set the metadata bits to valid and submitted a backfill request 3. In meantime, Rio submitted a write request to the same cache lines 4. Since the test uses an error device as cache (all sectors are erroneous) the backfill request failed and was redirected to engine_invalidate 5. engine_invalidate reset the metadata bits to invalid, but since cache lines had waiters registered, the occupancy wasn't decremented as there was an assumption that the new owner (in this case, the write request) would do the bookkeeping 6. Upon receiving cache line locks, the write request detected that the mapping changed so it was completed with an error without decrementing the occupancy 7. The incorrect value of cached cache lines was persisted for the whole lifetime of the cache Signed-off-by: Michal Mielewczyk <michal.mielewczyk@huawei.com>
2025-03-24 09:47:19 +01:00 · 2025-03-24 09:47:19 +01:00 · 295b3949bc
commit 295b3949bc
parent 7f55116b5e
2 changed files with 21 additions and 13 deletions
--- a/src/engine/engine_bf.c
+++ b/src/engine/engine_bf.c
@ -1,6 +1,6 @@
 /*
 * Copyright(c) 2012-2022 Intel Corporation
- * Copyright(c) 2024 Huawei Technologies
+ * Copyright(c) 2024-2025 Huawei Technologies
 * SPDX-License-Identifier: BSD-3-Clause
 */
@ -39,6 +39,23 @@ static inline void backfill_queue_inc_block(struct ocf_cache *cache)
 		env_atomic_set(&cache->pending_read_misses_list_blocked, 1);
 }
 static int _ocf_backfill_update_metadata(struct ocf_request *req)
 {
 	ocf_cache_t cache = req->cache;
 	ocf_hb_req_prot_lock_wr(req);
 	ocf_set_valid_map_info(req);
 	ocf_hb_req_prot_unlock_wr(req);
 	ocf_req_unlock(ocf_cache_line_concurrency(cache), req);
 	ocf_req_put(req);
 	return 0;
 }
 static void _ocf_backfill_complete(struct ocf_request *req, int error)
 {
 	struct ocf_cache *cache = req->cache;
@ -61,10 +78,8 @@ static void _ocf_backfill_complete(struct ocf_request *req, int error)
 	if (error) {
 		ocf_engine_invalidate(req);
 	} else {
-		ocf_req_unlock(ocf_cache_line_concurrency(cache), req);
+		ocf_queue_push_req_cb(req, _ocf_backfill_update_metadata,
-
+				OCF_QUEUE_ALLOW_SYNC | OCF_QUEUE_PRIO_HIGH);
 		/* put the request at the last point of the completion path */
 		ocf_req_put(req);
 	}
 }
--- a/src/engine/engine_rd.c
+++ b/src/engine/engine_rd.c
@ -1,6 +1,6 @@
 /*
 * Copyright(c) 2012-2022 Intel Corporation
- * Copyright(c) 2024 Huawei Technologies
+ * Copyright(c) 2024-2025 Huawei Technologies
 * SPDX-License-Identifier: BSD-3-Clause
 */
@ -133,13 +133,6 @@ static int _ocf_read_generic_do(struct ocf_request *req)
 			return 0;
 		}
 		ocf_hb_req_prot_lock_wr(req);
 		/* Set valid status bits map */
 		ocf_set_valid_map_info(req);
 		ocf_hb_req_prot_unlock_wr(req);
 	}
 	if (ocf_engine_needs_repart(req)) {