backfill: Update occupancy only if BF succeeded

Increasing the occupancy before the backfill request has completed leads
to incorrect statistics

test_lazy_engine_errors() detected the bug in the following scenario:
1. Rio submitted a read request
2. The read turned out to be a full miss. After a successful read from
   the core device, engine_read set the metadata bits to valid and
   submitted a backfill request
3. In meantime, Rio submitted a write request to the same cache lines
4. Since the test uses an error device as cache (all sectors are
   erroneous) the backfill request failed and was redirected to
   engine_invalidate
5. engine_invalidate reset the metadata bits to invalid, but since cache
   lines had waiters registered, the occupancy wasn't decremented as
   there was an assumption that the new owner (in this case, the write
   request) would do the bookkeeping
6. Upon receiving cache line locks, the write request detected that
   the mapping changed so it was completed with an error without
   decrementing the occupancy
7. The incorrect value of cached cache lines was persisted for the whole
   lifetime of the cache

Signed-off-by: Michal Mielewczyk <michal.mielewczyk@huawei.com>
This commit is contained in:
Michal Mielewczyk 2025-03-24 09:47:19 +01:00
parent 7f55116b5e
commit 295b3949bc
2 changed files with 21 additions and 13 deletions

View File

@ -1,6 +1,6 @@
/* /*
* Copyright(c) 2012-2022 Intel Corporation * Copyright(c) 2012-2022 Intel Corporation
* Copyright(c) 2024 Huawei Technologies * Copyright(c) 2024-2025 Huawei Technologies
* SPDX-License-Identifier: BSD-3-Clause * SPDX-License-Identifier: BSD-3-Clause
*/ */
@ -39,6 +39,23 @@ static inline void backfill_queue_inc_block(struct ocf_cache *cache)
env_atomic_set(&cache->pending_read_misses_list_blocked, 1); env_atomic_set(&cache->pending_read_misses_list_blocked, 1);
} }
static int _ocf_backfill_update_metadata(struct ocf_request *req)
{
ocf_cache_t cache = req->cache;
ocf_hb_req_prot_lock_wr(req);
ocf_set_valid_map_info(req);
ocf_hb_req_prot_unlock_wr(req);
ocf_req_unlock(ocf_cache_line_concurrency(cache), req);
ocf_req_put(req);
return 0;
}
static void _ocf_backfill_complete(struct ocf_request *req, int error) static void _ocf_backfill_complete(struct ocf_request *req, int error)
{ {
struct ocf_cache *cache = req->cache; struct ocf_cache *cache = req->cache;
@ -61,10 +78,8 @@ static void _ocf_backfill_complete(struct ocf_request *req, int error)
if (error) { if (error) {
ocf_engine_invalidate(req); ocf_engine_invalidate(req);
} else { } else {
ocf_req_unlock(ocf_cache_line_concurrency(cache), req); ocf_queue_push_req_cb(req, _ocf_backfill_update_metadata,
OCF_QUEUE_ALLOW_SYNC | OCF_QUEUE_PRIO_HIGH);
/* put the request at the last point of the completion path */
ocf_req_put(req);
} }
} }

View File

@ -1,6 +1,6 @@
/* /*
* Copyright(c) 2012-2022 Intel Corporation * Copyright(c) 2012-2022 Intel Corporation
* Copyright(c) 2024 Huawei Technologies * Copyright(c) 2024-2025 Huawei Technologies
* SPDX-License-Identifier: BSD-3-Clause * SPDX-License-Identifier: BSD-3-Clause
*/ */
@ -133,13 +133,6 @@ static int _ocf_read_generic_do(struct ocf_request *req)
return 0; return 0;
} }
ocf_hb_req_prot_lock_wr(req);
/* Set valid status bits map */
ocf_set_valid_map_info(req);
ocf_hb_req_prot_unlock_wr(req);
} }
if (ocf_engine_needs_repart(req)) { if (ocf_engine_needs_repart(req)) {