From 3e033df9c64d2e2e5befd719f27a4352d5eaa66c Mon Sep 17 00:00:00 2001 From: Michal Mielewczyk Date: Tue, 28 Jan 2020 09:29:33 -0500 Subject: [PATCH 1/2] Handle flushing cache interruption. When flushing cache, acquiring write lock is not necessary, so it was replaced with read lock wherever possible. If flushing operation is interrupted by signal, not only waiting for it to be finished is interrupted but also OCF flush operation is stopped. Signed-off-by: Michal Mielewczyk --- modules/cas_cache/layer_cache_management.c | 83 ++++++++++++++++++---- 1 file changed, 68 insertions(+), 15 deletions(-) diff --git a/modules/cas_cache/layer_cache_management.c b/modules/cas_cache/layer_cache_management.c index 5f34c06..6556256 100644 --- a/modules/cas_cache/layer_cache_management.c +++ b/modules/cas_cache/layer_cache_management.c @@ -23,6 +23,7 @@ struct _cache_mngt_async_context { struct completion cmpl; spinlock_t lock; int result; + void (*compl_func)(ocf_cache_t cache); }; /* @@ -87,6 +88,7 @@ static inline void _cache_mngt_async_context_init( init_completion(&context->cmpl); spin_lock_init(&context->lock); context->result = 0; + context->compl_func = NULL; } static void _cache_mngt_lock_complete(ocf_cache_t cache, void *priv, int error) @@ -196,19 +198,55 @@ static int _cache_mngt_save_sync(ocf_cache_t cache) return result; } +static void _cache_mngt_cache_flush_uninterruptible_complete(ocf_cache_t cache, + void *priv, int error) +{ + struct _cache_mngt_sync_context *context = priv; + + *context->result = error; + complete(&context->cmpl); +} + +/* + * Since wait_for_completion() is used, hang tasks may occure if flush would + * take long time. + */ +static int _cache_mngt_cache_flush_uninterruptible(ocf_cache_t cache) +{ + int result; + struct _cache_mngt_sync_context context; + struct cache_priv *cache_priv = ocf_cache_get_priv(cache); + + init_completion(&context.cmpl); + context.result = &result; + atomic_set(&cache_priv->flush_interrupt_enabled, 0); + + ocf_mngt_cache_flush(cache, _cache_mngt_cache_flush_uninterruptible_complete, + &context); + wait_for_completion(&context.cmpl); + + atomic_set(&cache_priv->flush_interrupt_enabled, 1); + + return result; +} + static void _cache_mngt_cache_flush_complete(ocf_cache_t cache, void *priv, int error) { struct _cache_mngt_async_context *context = priv; int result; + if (context->compl_func) + context->compl_func(cache); + result = _cache_mngt_async_callee_set_result(context, error); if (result == -KCAS_ERR_WAITING_INTERRUPTED) kfree(context); } -static int _cache_mngt_cache_flush_sync(ocf_cache_t cache, bool interruption) +static int _cache_mngt_cache_flush_sync(ocf_cache_t cache, bool interruption, + void (*compl)(ocf_cache_t cache)) { int result; struct _cache_mngt_async_context *context; @@ -219,6 +257,7 @@ static int _cache_mngt_cache_flush_sync(ocf_cache_t cache, bool interruption) return -ENOMEM; _cache_mngt_async_context_init(context); + context->compl_func = compl; atomic_set(&cache_priv->flush_interrupt_enabled, interruption); ocf_mngt_cache_flush(cache, _cache_mngt_cache_flush_complete, context); @@ -228,6 +267,8 @@ static int _cache_mngt_cache_flush_sync(ocf_cache_t cache, bool interruption) if (result != -KCAS_ERR_WAITING_INTERRUPTED) kfree(context); + else if (result == -KCAS_ERR_WAITING_INTERRUPTED && interruption) + ocf_mngt_cache_flush_interrupt(cache); atomic_set(&cache_priv->flush_interrupt_enabled, 1); @@ -496,6 +537,12 @@ out: return result; } +static void _cache_read_unlock_put_cmpl(ocf_cache_t cache) +{ + ocf_mngt_cache_read_unlock(cache); + ocf_mngt_cache_put(cache); +} + int cache_mngt_flush_device(const char *cache_name, size_t name_len) { int result; @@ -506,16 +553,15 @@ int cache_mngt_flush_device(const char *cache_name, size_t name_len) if (result) return result; - result = _cache_mngt_lock_sync(cache); + result = _cache_mngt_read_lock_sync(cache); if (result) { ocf_mngt_cache_put(cache); return result; } - result = _cache_mngt_cache_flush_sync(cache, true); + result = _cache_mngt_cache_flush_sync(cache, true, + _cache_read_unlock_put_cmpl); - ocf_mngt_cache_unlock(cache); - ocf_mngt_cache_put(cache); return result; } @@ -1921,7 +1967,7 @@ int cache_mngt_set_cache_mode(const char *cache_name, size_t name_len, goto out; if (flush) { - result = _cache_mngt_cache_flush_sync(cache, true); + result = _cache_mngt_cache_flush_sync(cache, true, NULL); if (result) { ocf_mngt_cache_set_mode(cache, old_mode); goto out; @@ -1952,7 +1998,6 @@ int cache_mngt_exit_instance(const char *cache_name, size_t name_len, int flush) ocf_queue_t mngt_queue; int status, flush_status = 0; - /* Get cache */ status = ocf_mngt_cache_get_by_name(cas_ctx, cache_name, name_len, &cache); if (status) @@ -1961,9 +2006,6 @@ int cache_mngt_exit_instance(const char *cache_name, size_t name_len, int flush) cache_priv = ocf_cache_get_priv(cache); mngt_queue = cache_priv->mngt_queue; - status = _cache_mngt_read_lock_sync(cache); - if (status) - goto put; /* * Flush cache. Flushing may take a long time, so we allow user * to interrupt this operation. Hence we do first flush before @@ -1974,11 +2016,24 @@ int cache_mngt_exit_instance(const char *cache_name, size_t name_len, int flush) * exported object. The second flush should be much faster. */ if (flush) { - status = _cache_mngt_cache_flush_sync(cache, true); + /* Getting cache twice is workaround to make flush error handling easier + and avoid dealing with synchronizing issues */ + status = ocf_mngt_cache_get(cache); + if (status) + goto put; + + status = _cache_mngt_read_lock_sync(cache); + if (status) { + ocf_mngt_cache_put(cache); + goto put; + } + + status = _cache_mngt_cache_flush_sync(cache, true, + _cache_read_unlock_put_cmpl); switch (status) { case -OCF_ERR_CACHE_IN_INCOMPLETE_STATE: case -OCF_ERR_FLUSHING_INTERRUPTED: - ocf_mngt_cache_read_unlock(cache); + case -KCAS_ERR_WAITING_INTERRUPTED: goto put; default: flush_status = status; @@ -1986,8 +2041,6 @@ int cache_mngt_exit_instance(const char *cache_name, size_t name_len, int flush) } } - ocf_mngt_cache_read_unlock(cache); - /* get cache write lock */ status = _cache_mngt_lock_sync(cache); if (status) @@ -2015,7 +2068,7 @@ int cache_mngt_exit_instance(const char *cache_name, size_t name_len, int flush) /* Flush cache again. This time we don't allow interruption. */ if (flush) - flush_status = _cache_mngt_cache_flush_sync(cache, false); + flush_status = _cache_mngt_cache_flush_uninterruptible(cache); if (flush && !flush_status) BUG_ON(ocf_mngt_cache_is_dirty(cache)); From a0937758b798c06e518d112c27c80a5c1e7dc539 Mon Sep 17 00:00:00 2001 From: Michal Mielewczyk Date: Tue, 28 Jan 2020 11:01:37 -0500 Subject: [PATCH 2/2] Handle flush core interruption. When flushing core, acquiring write lock is not necessary, so it was replaced with read lock wherever possible. If flushing operation is interrupted by signal, not only waiting for it to be finished is interrupted but also OCF flush operation is stopped. Signed-off-by: Michal Mielewczyk --- modules/cas_cache/layer_cache_management.c | 102 +++++++++++++++------ 1 file changed, 75 insertions(+), 27 deletions(-) diff --git a/modules/cas_cache/layer_cache_management.c b/modules/cas_cache/layer_cache_management.c index 6556256..7342de2 100644 --- a/modules/cas_cache/layer_cache_management.c +++ b/modules/cas_cache/layer_cache_management.c @@ -280,6 +280,10 @@ static void _cache_mngt_core_flush_complete(ocf_core_t core, void *priv, { struct _cache_mngt_async_context *context = priv; int result; + ocf_cache_t cache = ocf_core_get_cache(core); + + if (context->compl_func) + context->compl_func(cache); result = _cache_mngt_async_callee_set_result(context, error); @@ -287,7 +291,8 @@ static void _cache_mngt_core_flush_complete(ocf_core_t core, void *priv, kfree(context); } -static int _cache_mngt_core_flush_sync(ocf_core_t core, bool interruption) +static int _cache_mngt_core_flush_sync(ocf_core_t core, bool interruption, + void (*compl)(ocf_cache_t cache)) { int result; struct _cache_mngt_async_context *context; @@ -299,6 +304,7 @@ static int _cache_mngt_core_flush_sync(ocf_core_t core, bool interruption) return -ENOMEM; _cache_mngt_async_context_init(context); + context->compl_func = compl; atomic_set(&cache_priv->flush_interrupt_enabled, interruption); ocf_mngt_core_flush(core, _cache_mngt_core_flush_complete, context); @@ -308,6 +314,41 @@ static int _cache_mngt_core_flush_sync(ocf_core_t core, bool interruption) if (result != -KCAS_ERR_WAITING_INTERRUPTED) kfree(context); + else if (result == -KCAS_ERR_WAITING_INTERRUPTED && interruption) + ocf_mngt_cache_flush_interrupt(cache); + + atomic_set(&cache_priv->flush_interrupt_enabled, 1); + + return result; +} + +static void _cache_mngt_core_flush_uninterruptible_complete(ocf_core_t core, + void *priv, int error) +{ + struct _cache_mngt_sync_context *context = priv; + + *context->result = error; + complete(&context->cmpl); +} + +/* + * Since wait_for_completion() is used, hang tasks may occure if flush would + * take long time. + */ +static int _cache_mngt_core_flush_uninterruptible(ocf_core_t core) +{ + int result; + struct _cache_mngt_sync_context context; + ocf_cache_t cache = ocf_core_get_cache(core); + struct cache_priv *cache_priv = ocf_cache_get_priv(cache); + + init_completion(&context.cmpl); + context.result = &result; + atomic_set(&cache_priv->flush_interrupt_enabled, 0); + + ocf_mngt_core_flush(core, _cache_mngt_core_flush_uninterruptible_complete, + &context); + wait_for_completion(&context.cmpl); atomic_set(&cache_priv->flush_interrupt_enabled, 1); @@ -507,6 +548,12 @@ static void mark_core_id_free(uint64_t *bitmap, uint16_t core_id) clear_bit(core_id, (unsigned long *)bitmap); } +static void _cache_read_unlock_put_cmpl(ocf_cache_t cache) +{ + ocf_mngt_cache_read_unlock(cache); + ocf_mngt_cache_put(cache); +} + int cache_mngt_flush_object(const char *cache_name, size_t cache_name_len, const char *core_name, size_t core_name_len) { @@ -519,30 +566,25 @@ int cache_mngt_flush_object(const char *cache_name, size_t cache_name_len, if (result) return result; - result = _cache_mngt_lock_sync(cache); + result = _cache_mngt_read_lock_sync(cache); if (result) { ocf_mngt_cache_put(cache); return result; } result = ocf_core_get_by_name(cache, core_name, core_name_len, &core); - if (result) - goto out; + if (result) { + ocf_mngt_cache_read_unlock(cache); + ocf_mngt_cache_put(cache); + return result; + } - result = _cache_mngt_core_flush_sync(core, true); + result = _cache_mngt_core_flush_sync(core, true, + _cache_read_unlock_put_cmpl); -out: - ocf_mngt_cache_unlock(cache); - ocf_mngt_cache_put(cache); return result; } -static void _cache_read_unlock_put_cmpl(ocf_cache_t cache) -{ - ocf_mngt_cache_read_unlock(cache); - ocf_mngt_cache_put(cache); -} - int cache_mngt_flush_device(const char *cache_name, size_t name_len) { int result; @@ -1117,8 +1159,11 @@ int _cache_mngt_remove_core_prepare(ocf_cache_t cache, ocf_core_t core, if (!cmd->force_no_flush) { if (core_active) { /* Flush core */ - flush_result = _cache_mngt_core_flush_sync(core, - flush_interruptible); + if (flush_interruptible) + flush_result = _cache_mngt_core_flush_sync(core, + flush_interruptible, _cache_read_unlock_put_cmpl); + else + flush_result = _cache_mngt_core_flush_uninterruptible(core); } else if (!ocf_mngt_core_is_dirty(core)) { /* Clean core is always "flushed" */ flush_result = 0; @@ -1163,20 +1208,28 @@ int cache_mngt_remove_core_from_cache(struct kcas_remove_core *cmd) if (!cmd->force_no_flush) { /* First check state and flush data (if requested by user) under read lock */ - result = _cache_mngt_read_lock_sync(cache); + /* Getting cache twice is workaround to make flush error handling easier + and avoid dealing with synchronizing issues */ + result = ocf_mngt_cache_get(cache); if (result) goto put; + result = _cache_mngt_read_lock_sync(cache); + if (result) { + ocf_mngt_cache_put(cache); + goto put; + } result = get_core_by_id(cache, cmd->core_id, &core); - if (result < 0) - goto rd_unlock; + if (result < 0) { + ocf_mngt_cache_unlock(cache); + ocf_mngt_cache_put(cache); + goto put; + } result = _cache_mngt_remove_core_prepare(cache, core, cmd, false); if (result) - goto rd_unlock; - - ocf_mngt_cache_read_unlock(cache); + goto put; } /* Acquire write lock */ @@ -1232,11 +1285,6 @@ unlock: put: ocf_mngt_cache_put(cache); return result; - -rd_unlock: - ocf_mngt_cache_read_unlock(cache); - ocf_mngt_cache_put(cache); - return result; } int cache_mngt_reset_stats(const char *cache_name, size_t cache_name_len,