From 61983c946c54a816693237bcc5f5d7be8c6249f4 Mon Sep 17 00:00:00 2001 From: Adam Rutkowski Date: Thu, 19 Mar 2020 19:10:09 -0400 Subject: [PATCH 1/6] Move flush containers sort & submit outside metadata lock Moving _ocf_mngt_flush_containers outside global metadata critical section. All this function does is sort core lines and add queue request. This fixes stalls reported by Linux scheduler due to IO threads waiting on global metadata RW semaprhore for several minutes. Signed-off-by: Adam Rutkowski --- src/mngt/ocf_mngt_flush.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/mngt/ocf_mngt_flush.c b/src/mngt/ocf_mngt_flush.c index 3bfdee4..66a6fd9 100644 --- a/src/mngt/ocf_mngt_flush.c +++ b/src/mngt/ocf_mngt_flush.c @@ -517,9 +517,9 @@ static void _ocf_mngt_flush_core( fc->core_id = core_id; fc->iter = 0; - _ocf_mngt_flush_containers(context, fc, 1, complete); - ocf_metadata_end_exclusive_access(&cache->metadata.lock); + + _ocf_mngt_flush_containers(context, fc, 1, complete); } static void _ocf_mngt_flush_all_cores( @@ -550,9 +550,9 @@ static void _ocf_mngt_flush_all_cores( return; } - _ocf_mngt_flush_containers(context, fctbl, fcnum, complete); - ocf_metadata_end_exclusive_access(&cache->metadata.lock); + + _ocf_mngt_flush_containers(context, fctbl, fcnum, complete); } static void _ocf_mngt_flush_all_cores_complete( From c17beec7d41c20999e80858a60b178e3cb03feea Mon Sep 17 00:00:00 2001 From: Adam Rutkowski Date: Thu, 19 Mar 2020 21:01:34 -0400 Subject: [PATCH 2/6] Do not exclude used cachelines from flushing Lower layer is prepared to handle used cachelines by acquiring asynchronus read lock. It is very likely that by the time the cacheline is actually cleaned its lock state changes. So checking the lock at the moment of constructing dirty cachelines list makes little sense. Signed-off-by: Adam Rutkowski --- src/mngt/ocf_mngt_flush.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/mngt/ocf_mngt_flush.c b/src/mngt/ocf_mngt_flush.c index 66a6fd9..9d375df 100644 --- a/src/mngt/ocf_mngt_flush.c +++ b/src/mngt/ocf_mngt_flush.c @@ -166,9 +166,6 @@ static int _ocf_mngt_get_sectors(ocf_cache_t cache, ocf_core_id_t core_id, if (!metadata_test_dirty(cache, i)) continue; - if (ocf_cache_line_is_used(cache, i)) - continue; - /* It's core_id cacheline and it's valid and it's dirty! */ p[j].cache_line = i; p[j].core_line = core_line; @@ -261,9 +258,6 @@ static int _ocf_mngt_get_flush_containers(ocf_cache_t cache, if (!metadata_test_dirty(cache, i)) continue; - if (ocf_cache_line_is_used(cache, i)) - continue; - curr = &fc[core_revmap[core_id]]; ENV_BUG_ON(curr->iter >= curr->count); From 64dcae1490482df02cfb66cf146e597abefdf640 Mon Sep 17 00:00:00 2001 From: Adam Rutkowski Date: Thu, 19 Mar 2020 21:04:40 -0400 Subject: [PATCH 3/6] Split global metadata lock critial path There is no need to constantly hold metadata global lock during collecting cachelines to clean. Since we are past freezing dirty request counter, we know for sure that the number of dirty cache lines will not increase. So the worst case is when the loop realxes and releases the lock, a concurent IO to CAS is serviced in WT mode, possibly inserting and/or evicting cachelines. This does not interfere with scanning for dirty cachelines. And the lower layer will handle synchronization with concurrent I/O by acquiring asynchronous read lock on each cleaned cacheline. Signed-off-by: Adam Rutkowski --- src/mngt/ocf_mngt_flush.c | 67 ++++++++++++++++++++++++--------------- 1 file changed, 41 insertions(+), 26 deletions(-) diff --git a/src/mngt/ocf_mngt_flush.c b/src/mngt/ocf_mngt_flush.c index 9d375df..5a4f28d 100644 --- a/src/mngt/ocf_mngt_flush.c +++ b/src/mngt/ocf_mngt_flush.c @@ -142,17 +142,22 @@ static int _ocf_mngt_get_sectors(ocf_cache_t cache, ocf_core_id_t core_id, ocf_core_id_t i_core_id; struct flush_data *p; uint32_t i, j, dirty = 0; + unsigned ret = 0; + + ocf_metadata_start_exclusive_access(&cache->metadata.lock); dirty = env_atomic_read(&core->runtime_meta->dirty_clines); if (!dirty) { *num = 0; *tbl = NULL; - return 0; + goto unlock; } p = env_vmalloc(dirty * sizeof(**tbl)); - if (!p) - return -OCF_ERR_NO_MEM; + if (!p) { + ret = -OCF_ERR_NO_MEM; + goto unlock; + } for (i = 0, j = 0; i < cache->device->collision_table_entries; i++) { ocf_metadata_get_core_info(cache, i, &i_core_id, &core_line); @@ -171,23 +176,31 @@ static int _ocf_mngt_get_sectors(ocf_cache_t cache, ocf_core_id_t core_id, p[j].core_line = core_line; p[j].core_id = i_core_id; j++; + /* stop if all cachelines were found */ if (j == dirty) break; + + if ((i + 1) % 1000000 == 0) { + ocf_metadata_end_exclusive_access( + &cache->metadata.lock); + env_cond_resched(); + ocf_metadata_start_exclusive_access( + &cache->metadata.lock); + } } ocf_core_log(core, log_debug, "%u dirty cache lines to clean\n", j); - if (dirty != j) { - ocf_core_log(core, log_debug, "Wrong number of dirty " - "blocks for flushing (%u!=%u)\n", j, dirty); - } - *tbl = p; *num = j; - return 0; + +unlock: + ocf_metadata_end_exclusive_access(&cache->metadata.lock); + + return ret; } static int _ocf_mngt_get_flush_containers(ocf_cache_t cache, @@ -201,7 +214,9 @@ static int _ocf_mngt_get_flush_containers(ocf_cache_t cache, ocf_core_id_t core_id; ocf_core_t core; uint32_t i, j = 0, dirty = 0; - int step = 0; + int ret = 0; + + ocf_metadata_start_exclusive_access(&cache->metadata.lock); /* * TODO: Create containers for each physical device, not for @@ -210,7 +225,7 @@ static int _ocf_mngt_get_flush_containers(ocf_cache_t cache, num = cache->conf_meta->core_count; if (num == 0) { *fcnum = 0; - return 0; + goto unlock; } core_revmap = env_vzalloc(sizeof(*core_revmap) * OCF_CORE_MAX); @@ -221,7 +236,8 @@ static int _ocf_mngt_get_flush_containers(ocf_cache_t cache, fc = env_vzalloc(sizeof(**fctbl) * num); if (!fc) { env_vfree(core_revmap); - return -OCF_ERR_NO_MEM; + ret = -OCF_ERR_NO_MEM; + goto unlock; } for_each_core(cache, core, core_id) { @@ -246,7 +262,7 @@ static int _ocf_mngt_get_flush_containers(ocf_cache_t cache, env_vfree(core_revmap); env_vfree(fc); *fcnum = 0; - return 0; + goto unlock; } for (i = 0, j = 0; i < cache->device->collision_table_entries; i++) { @@ -269,16 +285,21 @@ static int _ocf_mngt_get_flush_containers(ocf_cache_t cache, curr->iter++; j++; + /* stop if all cachelines were found */ if (j == dirty) break; - OCF_COND_RESCHED(step, 1000000) + if ((i + 1) % 1000000 == 0) { + ocf_metadata_end_exclusive_access( + &cache->metadata.lock); + env_cond_resched(); + ocf_metadata_start_exclusive_access( + &cache->metadata.lock); + } } if (dirty != j) { - ocf_cache_log(cache, log_debug, "Wrong number of dirty " - "blocks (%u!=%u)\n", j, dirty); for (i = 0; i < num; i++) fc[i].count = fc[i].iter; } @@ -289,7 +310,10 @@ static int _ocf_mngt_get_flush_containers(ocf_cache_t cache, env_vfree(core_revmap); *fctbl = fc; *fcnum = num; - return 0; + +unlock: + ocf_metadata_end_exclusive_access(&cache->metadata.lock); + return ret; } static void _ocf_mngt_free_flush_containers(struct flush_container *fctbl, @@ -495,15 +519,12 @@ static void _ocf_mngt_flush_core( return; } - ocf_metadata_start_exclusive_access(&cache->metadata.lock); - ret = _ocf_mngt_get_sectors(cache, core_id, &fc->flush_data, &fc->count); if (ret) { ocf_core_log(core, log_err, "Flushing operation aborted, " "no memory\n"); env_vfree(fc); - ocf_metadata_end_exclusive_access(&cache->metadata.lock); complete(context, -OCF_ERR_NO_MEM); return; } @@ -511,8 +532,6 @@ static void _ocf_mngt_flush_core( fc->core_id = core_id; fc->iter = 0; - ocf_metadata_end_exclusive_access(&cache->metadata.lock); - _ocf_mngt_flush_containers(context, fc, 1, complete); } @@ -532,8 +551,6 @@ static void _ocf_mngt_flush_all_cores( env_atomic_set(&cache->flush_in_progress, 1); - ocf_metadata_start_exclusive_access(&cache->metadata.lock); - /* Get all 'dirty' sectors for all cores */ ret = _ocf_mngt_get_flush_containers(cache, &fctbl, &fcnum); if (ret) { @@ -544,8 +561,6 @@ static void _ocf_mngt_flush_all_cores( return; } - ocf_metadata_end_exclusive_access(&cache->metadata.lock); - _ocf_mngt_flush_containers(context, fctbl, fcnum, complete); } From 4d61d5624919c6a58a1bcac7206c461410279250 Mon Sep 17 00:00:00 2001 From: Adam Rutkowski Date: Mon, 23 Mar 2020 19:23:21 -0400 Subject: [PATCH 4/6] Rename flushing functions local variables for readibility Signed-off-by: Adam Rutkowski --- src/mngt/ocf_mngt_flush.c | 67 ++++++++++++++++++++------------------- 1 file changed, 35 insertions(+), 32 deletions(-) diff --git a/src/mngt/ocf_mngt_flush.c b/src/mngt/ocf_mngt_flush.c index 5a4f28d..59214b9 100644 --- a/src/mngt/ocf_mngt_flush.c +++ b/src/mngt/ocf_mngt_flush.c @@ -140,48 +140,52 @@ static int _ocf_mngt_get_sectors(ocf_cache_t cache, ocf_core_id_t core_id, ocf_core_t core = ocf_cache_get_core(cache, core_id); uint64_t core_line; ocf_core_id_t i_core_id; - struct flush_data *p; - uint32_t i, j, dirty = 0; + struct flush_data *elem; + uint32_t line, dirty_found = 0, dirty_total = 0; unsigned ret = 0; ocf_metadata_start_exclusive_access(&cache->metadata.lock); - dirty = env_atomic_read(&core->runtime_meta->dirty_clines); - if (!dirty) { + dirty_total = env_atomic_read(&core->runtime_meta->dirty_clines); + if (!dirty_total) { *num = 0; *tbl = NULL; goto unlock; } - p = env_vmalloc(dirty * sizeof(**tbl)); - if (!p) { + *tbl = env_vmalloc(dirty_total * sizeof(**tbl)); + if (*tbl == NULL) { ret = -OCF_ERR_NO_MEM; goto unlock; } - for (i = 0, j = 0; i < cache->device->collision_table_entries; i++) { - ocf_metadata_get_core_info(cache, i, &i_core_id, &core_line); + for (line = 0, elem = *tbl; + line < cache->device->collision_table_entries; + line++) { + ocf_metadata_get_core_info(cache, line, &i_core_id, + &core_line); if (i_core_id != core_id) continue; - if (!metadata_test_valid_any(cache, i)) + if (!metadata_test_valid_any(cache, line)) continue; - if (!metadata_test_dirty(cache, i)) + if (!metadata_test_dirty(cache, line)) continue; /* It's core_id cacheline and it's valid and it's dirty! */ - p[j].cache_line = i; - p[j].core_line = core_line; - p[j].core_id = i_core_id; - j++; + elem->cache_line = line; + elem->core_line = core_line; + elem->core_id = i_core_id; + elem++; + dirty_found++; /* stop if all cachelines were found */ - if (j == dirty) + if (dirty_found == dirty_total) break; - if ((i + 1) % 1000000 == 0) { + if ((line + 1) % 1000000 == 0) { ocf_metadata_end_exclusive_access( &cache->metadata.lock); env_cond_resched(); @@ -191,11 +195,10 @@ static int _ocf_mngt_get_sectors(ocf_cache_t cache, ocf_core_id_t core_id, } ocf_core_log(core, log_debug, - "%u dirty cache lines to clean\n", j); + "%u dirty cache lines to clean\n", dirty_found); - *tbl = p; - *num = j; + *num = dirty_found; unlock: ocf_metadata_end_exclusive_access(&cache->metadata.lock); @@ -213,7 +216,8 @@ static int _ocf_mngt_get_flush_containers(ocf_cache_t cache, uint64_t core_line; ocf_core_id_t core_id; ocf_core_t core; - uint32_t i, j = 0, dirty = 0; + uint32_t i, j = 0, line; + uint32_t dirty_found = 0, dirty_total = 0; int ret = 0; ocf_metadata_start_exclusive_access(&cache->metadata.lock); @@ -247,7 +251,7 @@ static int _ocf_mngt_get_flush_containers(ocf_cache_t cache, /* Check for dirty blocks */ fc[j].count = env_atomic_read( &core->runtime_meta->dirty_clines); - dirty += fc[j].count; + dirty_total += fc[j].count; if (fc[j].count) { fc[j].flush_data = env_vmalloc(fc[j].count * @@ -258,20 +262,20 @@ static int _ocf_mngt_get_flush_containers(ocf_cache_t cache, break; } - if (!dirty) { + if (!dirty_total) { env_vfree(core_revmap); env_vfree(fc); *fcnum = 0; goto unlock; } - for (i = 0, j = 0; i < cache->device->collision_table_entries; i++) { - ocf_metadata_get_core_info(cache, i, &core_id, &core_line); + for (line = 0; line < cache->device->collision_table_entries; line++) { + ocf_metadata_get_core_info(cache, line, &core_id, &core_line); - if (!metadata_test_valid_any(cache, i)) + if (!metadata_test_valid_any(cache, line)) continue; - if (!metadata_test_dirty(cache, i)) + if (!metadata_test_dirty(cache, line)) continue; curr = &fc[core_revmap[core_id]]; @@ -279,18 +283,17 @@ static int _ocf_mngt_get_flush_containers(ocf_cache_t cache, ENV_BUG_ON(curr->iter >= curr->count); /* It's core_id cacheline and it's valid and it's dirty! */ - curr->flush_data[curr->iter].cache_line = i; + curr->flush_data[curr->iter].cache_line = line; curr->flush_data[curr->iter].core_line = core_line; curr->flush_data[curr->iter].core_id = core_id; curr->iter++; - - j++; + dirty_found++; /* stop if all cachelines were found */ - if (j == dirty) + if (dirty_found == dirty_total) break; - if ((i + 1) % 1000000 == 0) { + if ((line + 1) % 1000000 == 0) { ocf_metadata_end_exclusive_access( &cache->metadata.lock); env_cond_resched(); @@ -299,7 +302,7 @@ static int _ocf_mngt_get_flush_containers(ocf_cache_t cache, } } - if (dirty != j) { + if (dirty_total != dirty_found) { for (i = 0; i < num; i++) fc[i].count = fc[i].iter; } From fd328bd0a1277c6cc917fc23016e99df0932b0c3 Mon Sep 17 00:00:00 2001 From: Adam Rutkowski Date: Fri, 20 Mar 2020 13:18:24 -0400 Subject: [PATCH 5/6] Check relaxation condition in each step of flush loop Signed-off-by: Adam Rutkowski --- src/mngt/ocf_mngt_flush.c | 62 +++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 35 deletions(-) diff --git a/src/mngt/ocf_mngt_flush.c b/src/mngt/ocf_mngt_flush.c index 59214b9..8e2e22e 100644 --- a/src/mngt/ocf_mngt_flush.c +++ b/src/mngt/ocf_mngt_flush.c @@ -165,25 +165,20 @@ static int _ocf_mngt_get_sectors(ocf_cache_t cache, ocf_core_id_t core_id, ocf_metadata_get_core_info(cache, line, &i_core_id, &core_line); - if (i_core_id != core_id) - continue; + if (i_core_id == core_id && + metadata_test_valid_any(cache, line) && + metadata_test_dirty(cache, line)) { + /* It's valid and dirty target core cacheline */ + elem->cache_line = line; + elem->core_line = core_line; + elem->core_id = i_core_id; + elem++; + dirty_found++; - if (!metadata_test_valid_any(cache, line)) - continue; - - if (!metadata_test_dirty(cache, line)) - continue; - - /* It's core_id cacheline and it's valid and it's dirty! */ - elem->cache_line = line; - elem->core_line = core_line; - elem->core_id = i_core_id; - elem++; - dirty_found++; - - /* stop if all cachelines were found */ - if (dirty_found == dirty_total) - break; + /* stop if all cachelines were found */ + if (dirty_found == dirty_total) + break; + } if ((line + 1) % 1000000 == 0) { ocf_metadata_end_exclusive_access( @@ -272,26 +267,23 @@ static int _ocf_mngt_get_flush_containers(ocf_cache_t cache, for (line = 0; line < cache->device->collision_table_entries; line++) { ocf_metadata_get_core_info(cache, line, &core_id, &core_line); - if (!metadata_test_valid_any(cache, line)) - continue; + if (metadata_test_valid_any(cache, line) && + metadata_test_dirty(cache, line)) { + curr = &fc[core_revmap[core_id]]; - if (!metadata_test_dirty(cache, line)) - continue; + ENV_BUG_ON(curr->iter >= curr->count); - curr = &fc[core_revmap[core_id]]; + /* It's core_id cacheline and it's valid and it's dirty! */ + curr->flush_data[curr->iter].cache_line = line; + curr->flush_data[curr->iter].core_line = core_line; + curr->flush_data[curr->iter].core_id = core_id; + curr->iter++; + dirty_found++; - ENV_BUG_ON(curr->iter >= curr->count); - - /* It's core_id cacheline and it's valid and it's dirty! */ - curr->flush_data[curr->iter].cache_line = line; - curr->flush_data[curr->iter].core_line = core_line; - curr->flush_data[curr->iter].core_id = core_id; - curr->iter++; - dirty_found++; - - /* stop if all cachelines were found */ - if (dirty_found == dirty_total) - break; + /* stop if all cachelines were found */ + if (dirty_found == dirty_total) + break; + } if ((line + 1) % 1000000 == 0) { ocf_metadata_end_exclusive_access( From b267d5d77d5ac45c83982d0e9e0b6e38b1681dbb Mon Sep 17 00:00:00 2001 From: Adam Rutkowski Date: Wed, 25 Mar 2020 23:37:49 +0100 Subject: [PATCH 6/6] Reduce flush relaxation period by 1 order of magninude Loop now relaxes every 2^17 (131K) cycles instead of every 1M. Signed-off-by: Adam Rutkowski --- src/mngt/ocf_mngt_flush.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mngt/ocf_mngt_flush.c b/src/mngt/ocf_mngt_flush.c index 8e2e22e..1e51c25 100644 --- a/src/mngt/ocf_mngt_flush.c +++ b/src/mngt/ocf_mngt_flush.c @@ -180,7 +180,7 @@ static int _ocf_mngt_get_sectors(ocf_cache_t cache, ocf_core_id_t core_id, break; } - if ((line + 1) % 1000000 == 0) { + if ((line + 1) % 131072 == 0) { ocf_metadata_end_exclusive_access( &cache->metadata.lock); env_cond_resched(); @@ -285,7 +285,7 @@ static int _ocf_mngt_get_flush_containers(ocf_cache_t cache, break; } - if ((line + 1) % 1000000 == 0) { + if ((line + 1) % 131072 == 0) { ocf_metadata_end_exclusive_access( &cache->metadata.lock); env_cond_resched();