From 7776bd64851da3433abdb9718355eb497d871e4e Mon Sep 17 00:00:00 2001 From: Adam Rutkowski Date: Tue, 26 May 2020 16:37:16 +0200 Subject: [PATCH] WO: read clean sectors from cache In case of partial hit WO engine first reads data for the entire request address range from core device. Then it plumbs it by fetching dirty sectors from cache device. For unindentified reason this leads to a data corruption in YCSB workload A. After flushing dirty data and re-loading cache the data is correct. This change modifies WO read handler to read clean data from the cache. This is not optimal, as the clean sectors are now read twice in case of partial hit. For now it seems to be good enough work-around for the data corruption problem. The symptoms, combined with the fact that this change seems to make the problem go away, indicates that at some point WB write handler (and/or special I/O request handlers like discard) puts CAS in a state where in-memory medata wrongly indicates that a sector is clean while in fact it is dirty, as marked in the on-disk metadata. Signed-off-by: Adam Rutkowski --- src/engine/engine_common.h | 13 +++++++++++++ src/engine/engine_wo.c | 29 ++++++++++++++--------------- 2 files changed, 27 insertions(+), 15 deletions(-) diff --git a/src/engine/engine_common.h b/src/engine/engine_common.h index 320a69f..2557c64 100644 --- a/src/engine/engine_common.h +++ b/src/engine/engine_common.h @@ -141,6 +141,19 @@ bool ocf_engine_map_all_sec_clean(struct ocf_request *req, uint32_t line) start, end); } +static inline +bool ocf_engine_map_all_sec_valid(struct ocf_request *req, uint32_t line) +{ + uint8_t start = ocf_map_line_start_sector(req, line); + uint8_t end = ocf_map_line_end_sector(req, line); + + if (req->map[line].status != LOOKUP_HIT) + return false; + + return metadata_test_valid_sec(req->cache, req->map[line].coll_idx, + start, end); +} + /** * @brief Clean request (flush dirty data to the core device) * diff --git a/src/engine/engine_wo.c b/src/engine/engine_wo.c index 128ecb7..d3308a5 100644 --- a/src/engine/engine_wo.c +++ b/src/engine/engine_wo.c @@ -57,7 +57,7 @@ static int ocf_read_wo_cache_do(struct ocf_request *req) uint32_t s, e, i; uint64_t line; struct ocf_map_info *entry; - bool dirty = false; + bool valid = false; bool io = false; uint64_t phys_prev, phys_curr = 0; uint64_t io_start = 0; @@ -83,18 +83,17 @@ static int ocf_read_wo_cache_do(struct ocf_request *req) } /* try to seek directly to the last sector */ - if (entry->status == LOOKUP_MISS || - ocf_engine_map_all_sec_clean(req, line)) { - /* all sectors invalid or clean */ + if (entry->status == LOOKUP_MISS) { + /* all sectors invalid */ i = e + 1; increment = SECTORS_TO_BYTES(e - s + 1); - dirty = false; + valid = false; } - else if (ocf_engine_map_all_sec_dirty(req, line)) { - /* all sectors dirty */ + else if (ocf_engine_map_all_sec_valid(req, line)) { + /* all sectors valid */ i = e + 1; increment = SECTORS_TO_BYTES(e - s + 1); - dirty = true; + valid = true; } else { /* need to iterate through CL sector by sector */ i = s; @@ -102,26 +101,26 @@ static int ocf_read_wo_cache_do(struct ocf_request *req) do { if (i <= e) { - dirty = metadata_test_dirty_one(cache, + valid = metadata_test_valid_one(cache, entry->coll_idx, i); increment = 0; do { ++i; increment += SECTORS_TO_BYTES(1); - } while (i <= e && metadata_test_dirty_one( + } while (i <= e && metadata_test_valid_one( cache, entry->coll_idx, i) - == dirty); + == valid); } - if (io && !dirty) { - /* end of sequential dirty region */ + if (io && !valid) { + /* end of sequential valid region */ ocf_read_wo_cache_io(req, io_start, offset - io_start); io = false; } - if (!io && dirty) { - /* beginning of sequential dirty region */ + if (!io && valid) { + /* beginning of sequential valid region */ io = true; io_start = offset; }