ocf/src/cleaning/acp.c

/*
 * Copyright(c) 2012-2021 Intel Corporation
 * SPDX-License-Identifier: BSD-3-Clause-Clear
 */

#include "ocf/ocf.h"
#include "../ocf_cache_priv.h"
#include "cleaning.h"
#include "../metadata/metadata.h"
#include "../utils/utils_cleaner.h"
#include "../utils/utils_cache_line.h"
#include "../ocf_request.h"
#include "../cleaning/acp.h"
#include "../engine/engine_common.h"
#include "../concurrency/ocf_cache_line_concurrency.h"
#include "../concurrency/ocf_metadata_concurrency.h"
#include "cleaning_priv.h"

#define OCF_ACP_DEBUG 0

#if 1 == OCF_ACP_DEBUG

#define OCF_DEBUG_PREFIX "[Clean] %s():%d "

#define OCF_DEBUG_LOG(cache, format, ...) \
	ocf_cache_log_prefix(cache, log_info, OCF_DEBUG_PREFIX, \
			format"\n", __func__, __LINE__, ##__VA_ARGS__)

#define OCF_DEBUG_TRACE(cache) OCF_DEBUG_LOG(cache, "")

#define OCF_DEBUG_MSG(cache, msg) OCF_DEBUG_LOG(cache, "- %s", msg)

#define OCF_DEBUG_PARAM(cache, format, ...) OCF_DEBUG_LOG(cache, "- "format, \
			##__VA_ARGS__)

#define ACP_DEBUG_INIT(acp) acp->checksum = 0
#define ACP_DEBUG_BEGIN(acp, cache_line) acp->checksum ^= cache_line
#define ACP_DEBUG_END(acp, cache_line) acp->checksum ^= cache_line
#define ACP_DEBUG_CHECK(acp) ENV_BUG_ON(acp->checksum)
#else
#define OCF_DEBUG_PREFIX
#define OCF_DEBUG_LOG(cache, format, ...)
#define OCF_DEBUG_TRACE(cache)
#define OCF_DEBUG_MSG(cache, msg)
#define OCF_DEBUG_PARAM(cache, format, ...)
#define ACP_DEBUG_INIT(acp)
#define ACP_DEBUG_BEGIN(acp, cache_line)
#define ACP_DEBUG_END(acp, cache_line)
#define ACP_DEBUG_CHECK(acp)
#endif

#define ACP_CHUNK_SIZE (100 * MiB)

/* minimal time to chunk cleaning after error */
#define ACP_CHUNK_CLEANING_BACKOFF_TIME 5

/* time to sleep when nothing to clean in ms */
#define ACP_BACKOFF_TIME_MS 1000

#define ACP_MAX_BUCKETS 11

/* Upper thresholds for buckets in percent dirty pages. First bucket should have
 * threshold=0 - it isn't cleaned and we don't want dirty chunks staying dirty
 * forever. Last bucket also should stay at 100 for obvious reasons */
static const uint16_t ACP_BUCKET_DEFAULTS[ACP_MAX_BUCKETS] = { 0, 10, 20, 30, 40,
		50, 60, 70, 80, 90, 100 };

struct acp_flush_context {
	/* number of cache lines in flush */
	uint64_t size;
	/* chunk_for error handling */
	struct acp_chunk_info *chunk;
	/* cache lines to flush */
	struct flush_data data[OCF_ACP_MAX_FLUSH_MAX_BUFFERS];
	/* flush error code */
	int error;
};

struct acp_state {
	/* currently cleaned chunk */
	struct acp_chunk_info *chunk;

	/* cache line iterator within current chunk */
	unsigned iter;

	/* true if there are cache lines to process
	 * current chunk */
	bool in_progress;
};

struct acp_chunk_info {
	struct list_head list;
	uint64_t chunk_id;
	uint64_t next_cleaning_timestamp;
	ocf_core_id_t core_id;
	uint16_t num_dirty;
	uint8_t bucket_id;
};

struct acp_bucket {
	struct list_head chunk_list;
	uint16_t threshold; /* threshold in clines */
};

struct acp_context {
	env_rwsem chunks_lock;

	/* number of chunks per core */
	uint64_t num_chunks[OCF_CORE_MAX];

	/* per core array of all chunks */
	struct acp_chunk_info *chunk_info[OCF_CORE_MAX];

	struct acp_bucket bucket_info[ACP_MAX_BUCKETS];

	/* total number of chunks in cache */
	uint64_t chunks_total;

	/* structure to keep track of I/O in progress */
	struct acp_flush_context flush;

	/* cleaning state persistent over subsequent calls to
	 perform_cleaning */
	struct acp_state state;

	/* cache handle */
	ocf_cache_t cache;

	/* cleaner completion callback */
	ocf_cleaner_end_t cmpl;

#if 1 == OCF_ACP_DEBUG
	/* debug only */
	uint64_t checksum;
#endif
};

struct acp_core_line_info
{
	ocf_cache_line_t cache_line;
	ocf_core_id_t core_id;
	uint64_t core_line;
};

#define ACP_LOCK_CHUNKS_RD() env_rwsem_down_read(&acp->chunks_lock)

#define ACP_UNLOCK_CHUNKS_RD() env_rwsem_up_read(&acp->chunks_lock)

#define ACP_LOCK_CHUNKS_WR() env_rwsem_down_write(&acp->chunks_lock)

#define ACP_UNLOCK_CHUNKS_WR() env_rwsem_up_write(&acp->chunks_lock)

static struct acp_context *_acp_get_ctx_from_cache(struct ocf_cache *cache)
{
	return cache->cleaner.cleaning_policy_context;
}

static struct acp_cleaning_policy_meta* _acp_meta_get(
		struct ocf_cache *cache, uint32_t cache_line)
{
	return &ocf_metadata_get_cleaning_policy(cache, cache_line)->meta.acp;
}

static struct acp_core_line_info _acp_core_line_info(struct ocf_cache *cache,
		ocf_cache_line_t cache_line)
{
	struct acp_core_line_info acp_core_line_info = {.cache_line = cache_line, };
	ocf_metadata_get_core_info(cache, cache_line, &acp_core_line_info.core_id,
		&acp_core_line_info.core_line);
	return acp_core_line_info;
}

static struct acp_chunk_info *_acp_get_chunk(struct ocf_cache *cache,
		uint32_t cache_line)
{
	struct acp_context *acp = _acp_get_ctx_from_cache(cache);
	struct acp_core_line_info core_line =
			_acp_core_line_info(cache, cache_line);
	uint64_t chunk_id;

	chunk_id = core_line.core_line * ocf_line_size(cache) / ACP_CHUNK_SIZE;

	return &acp->chunk_info[core_line.core_id][chunk_id];
}

static void _acp_remove_cores(struct ocf_cache *cache)
{
	ocf_core_t core;
	ocf_core_id_t core_id;

	for_each_core(cache, core, core_id)
		cleaning_policy_acp_remove_core(cache, core_id);
}

static int _acp_load_cores(struct ocf_cache *cache)
{

	ocf_core_t core;
	ocf_core_id_t core_id;
	int err = 0;

	for_each_core(cache, core, core_id) {
		OCF_DEBUG_PARAM(cache, "loading core %i\n", core_id);
		err = cleaning_policy_acp_add_core(cache, core_id);
		if (err)
			break;
	}

	if (err)
		_acp_remove_cores(cache);

	return err;
}

void cleaning_policy_acp_init_cache_block(struct ocf_cache *cache,
		uint32_t cache_line)
{
	struct acp_cleaning_policy_meta *acp_meta;

	acp_meta = _acp_meta_get(cache, cache_line);
	acp_meta->dirty = 0;
}

void cleaning_policy_acp_deinitialize(struct ocf_cache *cache)
{
	struct acp_context *acp;

	_acp_remove_cores(cache);

	acp = cache->cleaner.cleaning_policy_context;
	env_rwsem_destroy(&acp->chunks_lock);

	env_vfree(cache->cleaner.cleaning_policy_context);
	cache->cleaner.cleaning_policy_context = NULL;
}

static void _acp_rebuild(struct ocf_cache *cache)
{
	ocf_cache_line_t cline;
	ocf_core_id_t cline_core_id;
	uint32_t step = 0;

	for (cline = 0; cline < cache->device->collision_table_entries; cline++) {
		ocf_metadata_get_core_and_part_id(cache, cline, &cline_core_id,
				NULL);

		OCF_COND_RESCHED_DEFAULT(step);

		if (cline_core_id == OCF_CORE_MAX)
			continue;

		cleaning_policy_acp_init_cache_block(cache, cline);

		if (!metadata_test_dirty(cache, cline))
			continue;

		cleaning_policy_acp_set_hot_cache_line(cache, cline);
	}

	ocf_cache_log(cache, log_info, "Finished rebuilding ACP metadata\n");
}

void cleaning_policy_acp_setup(struct ocf_cache *cache)
{
	struct acp_cleaning_policy_config *config;

	config = (void *)&cache->conf_meta->cleaning[ocf_cleaning_acp].data;

	config->thread_wakeup_time = OCF_ACP_DEFAULT_WAKE_UP;
	config->flush_max_buffers = OCF_ACP_DEFAULT_FLUSH_MAX_BUFFERS;
}

int cleaning_policy_acp_initialize(struct ocf_cache *cache,
		int init_metadata)
{
	struct acp_context *acp;
	int err, i;

	/* bug if max chunk number would overflow dirty_no array type */
#if defined (BUILD_BUG_ON)
	BUILD_BUG_ON(ACP_CHUNK_SIZE / ocf_cache_line_size_min >=
			1U << (sizeof(acp->chunk_info[0][0].num_dirty) * 8));
#else
	ENV_BUG_ON(ACP_CHUNK_SIZE / ocf_cache_line_size_min >=
			1U << (sizeof(acp->chunk_info[0][0].num_dirty) * 8));
#endif

	ENV_BUG_ON(cache->cleaner.cleaning_policy_context);

	acp = env_vzalloc(sizeof(*acp));
	if (!acp) {
		ocf_cache_log(cache, log_err, "acp context allocation error\n");
		return -OCF_ERR_NO_MEM;
	}

	err = env_rwsem_init(&acp->chunks_lock);
	if (err) {
		env_vfree(acp);
		return err;
	}

	cache->cleaner.cleaning_policy_context = acp;
	acp->cache = cache;

	for (i = 0; i < ACP_MAX_BUCKETS; i++) {
		INIT_LIST_HEAD(&acp->bucket_info[i].chunk_list);
		acp->bucket_info[i].threshold =
			((ACP_CHUNK_SIZE/ocf_line_size(cache)) *
			 ACP_BUCKET_DEFAULTS[i]) / 100;
	}

	if (cache->conf_meta->core_count > 0) {
		err = _acp_load_cores(cache);
		if (err) {
			cleaning_policy_acp_deinitialize(cache);
			return err;
		}
	}

	_acp_rebuild(cache);
	ocf_kick_cleaner(cache);

	return 0;
}

int cleaning_policy_acp_set_cleaning_param(ocf_cache_t cache,
		uint32_t param_id, uint32_t param_value)
{
	struct acp_cleaning_policy_config *config;

	config = (void *)&cache->conf_meta->cleaning[ocf_cleaning_acp].data;

	switch (param_id) {
	case ocf_acp_wake_up_time:
		OCF_CLEANING_CHECK_PARAM(cache, param_value,
				OCF_ACP_MIN_WAKE_UP,
				OCF_ACP_MAX_WAKE_UP,
				"thread_wakeup_time");
		config->thread_wakeup_time = param_value;
		ocf_cache_log(cache, log_info, "Write-back flush thread "
			"wake-up time: %d\n", config->thread_wakeup_time);
		ocf_kick_cleaner(cache);
		break;
	case ocf_acp_flush_max_buffers:
		OCF_CLEANING_CHECK_PARAM(cache, param_value,
				OCF_ACP_MIN_FLUSH_MAX_BUFFERS,
				OCF_ACP_MAX_FLUSH_MAX_BUFFERS,
				"flush_max_buffers");
		config->flush_max_buffers = param_value;
		ocf_cache_log(cache, log_info, "Write-back flush thread max "
			"buffers flushed per iteration: %d\n",
			config->flush_max_buffers);
		break;
	default:
		return -OCF_ERR_INVAL;
	}

	return 0;
}

int cleaning_policy_acp_get_cleaning_param(ocf_cache_t cache,
		uint32_t param_id, uint32_t *param_value)
{
	struct acp_cleaning_policy_config *config;

	config = (void *)&cache->conf_meta->cleaning[ocf_cleaning_acp].data;

	switch (param_id) {
	case ocf_acp_flush_max_buffers:
		*param_value = config->flush_max_buffers;
		break;
	case ocf_acp_wake_up_time:
		*param_value = config->thread_wakeup_time;
		break;
	default:
		return -OCF_ERR_INVAL;
	}

	return 0;
}


/* attempt to lock cache line if it's dirty */
static ocf_cache_line_t _acp_trylock_dirty(struct ocf_cache *cache,
		uint32_t core_id, uint64_t core_line)
{
	struct ocf_map_info info;
	bool locked = false;
	unsigned lock_idx = ocf_metadata_concurrency_next_idx(
			cache->cleaner.io_queue);

	ocf_hb_cline_prot_lock_rd(&cache->metadata.lock, lock_idx, core_id,
			core_line);

	ocf_engine_lookup_map_entry(cache, &info, core_id,
			core_line);

	if (info.status == LOOKUP_HIT &&
			metadata_test_dirty(cache, info.coll_idx)) {
		locked = ocf_cache_line_try_lock_rd(
				ocf_cache_line_concurrency(cache),
				info.coll_idx);
	}

	ocf_hb_cline_prot_unlock_rd(&cache->metadata.lock, lock_idx, core_id,
			core_line);

	return locked ? info.coll_idx : cache->device->collision_table_entries;
}

static void _acp_handle_flush_error(struct ocf_cache *cache,
		struct acp_context *acp)
{
	struct acp_flush_context *flush = &acp->flush;

	flush->chunk->next_cleaning_timestamp = env_get_tick_count() +
			env_secs_to_ticks(ACP_CHUNK_CLEANING_BACKOFF_TIME);

	if (ocf_cache_log_rl(cache)) {
		ocf_core_log(&cache->core[flush->chunk->core_id],
				log_err, "Cleaning error (%d) in range"
				" <%llu; %llu) backing off for %u seconds\n",
				flush->error,
				flush->chunk->chunk_id * ACP_CHUNK_SIZE,
				(flush->chunk->chunk_id * ACP_CHUNK_SIZE) +
						ACP_CHUNK_SIZE,
				ACP_CHUNK_CLEANING_BACKOFF_TIME);
	}
}

static inline bool _acp_can_clean_chunk(struct ocf_cache *cache,
		struct acp_chunk_info *chunk)
{
	/* Check if core device is opened and if timeout after cleaning error
	 * expired or wasn't set in the first place */
	return (cache->core[chunk->core_id].opened &&
			(chunk->next_cleaning_timestamp > env_get_tick_count() ||
					!chunk->next_cleaning_timestamp));
}

static struct acp_chunk_info *_acp_get_cleaning_candidate(ocf_cache_t cache)
{
	int i;
	struct acp_chunk_info *cur;
	struct acp_context *acp = cache->cleaner.cleaning_policy_context;

	ACP_LOCK_CHUNKS_RD();

	/* go through all buckets in descending order, excluding bucket 0 which
	 * is supposed to contain all clean chunks */
	for (i = ACP_MAX_BUCKETS - 1; i > 0; i--) {
		list_for_each_entry(cur, &acp->bucket_info[i].chunk_list, list) {
			if (_acp_can_clean_chunk(cache, cur)) {
				ACP_UNLOCK_CHUNKS_RD();
				return cur;
			}
		}
	}

	ACP_UNLOCK_CHUNKS_RD();
	return NULL;
}

/* called after flush request completed */
static void _acp_flush_end(void *priv, int error)
{
	struct acp_cleaning_policy_config *config;
	struct acp_context *acp = priv;
	struct acp_flush_context *flush = &acp->flush;
	ocf_cache_t cache = acp->cache;
	int i;

	config = (void *)&cache->conf_meta->cleaning[ocf_cleaning_acp].data;

	for (i = 0; i < flush->size; i++) {
		ocf_cache_line_unlock_rd(
				ocf_cache_line_concurrency(cache),
				flush->data[i].cache_line);
		ACP_DEBUG_END(acp, flush->data[i].cache_line);
	}

	if (error) {
		flush->error = error;
		_acp_handle_flush_error(cache, acp);
	}

	ACP_DEBUG_CHECK(acp);

	acp->cmpl(&cache->cleaner, config->thread_wakeup_time);
}

/* flush data  */
static void _acp_flush(struct acp_context *acp)
{
	ocf_cache_t cache = acp->cache;
	struct ocf_cleaner_attribs attribs = {
		.cmpl_context = acp,
		.cmpl_fn = _acp_flush_end,
		.lock_cacheline = false,
		.lock_metadata = true,
		.do_sort = false,
		.io_queue = cache->cleaner.io_queue,
	};

	ocf_cleaner_do_flush_data_async(cache, acp->flush.data,
				acp->flush.size, &attribs);
}

static bool _acp_prepare_flush_data(struct acp_context *acp,
		uint32_t flush_max_buffers)
{
	ocf_cache_t cache = acp->cache;
	struct acp_state *state = &acp->state;
	struct acp_chunk_info *chunk = state->chunk;
	size_t lines_per_chunk = ACP_CHUNK_SIZE / ocf_line_size(cache);
	uint64_t first_core_line = chunk->chunk_id * lines_per_chunk;

	OCF_DEBUG_PARAM(cache, "lines per chunk %llu chunk %llu "
			"first_core_line %llu\n", (uint64_t)lines_per_chunk,
			chunk->chunk_id, first_core_line);

	acp->flush.size = 0;
	acp->flush.chunk = chunk;
	for (; state->iter < lines_per_chunk &&
			acp->flush.size < flush_max_buffers; state->iter++) {
		uint64_t core_line = first_core_line + state->iter;
		ocf_cache_line_t cache_line;

		cache_line = _acp_trylock_dirty(cache, chunk->core_id, core_line);
		if (cache_line == cache->device->collision_table_entries)
			continue;

		ACP_DEBUG_BEGIN(acp, cache_line);

		acp->flush.data[acp->flush.size].core_id = chunk->core_id;
		acp->flush.data[acp->flush.size].core_line = core_line;
		acp->flush.data[acp->flush.size].cache_line = cache_line;
		acp->flush.size++;
	}

	if (state->iter == lines_per_chunk) {
		/* reached end of chunk - reset state */
		state->in_progress = false;
	}

	return (acp->flush.size > 0);
}

/* Clean at most 'flush_max_buffers' cache lines from current or newly
 * selected chunk */
void cleaning_policy_acp_perform_cleaning(ocf_cache_t cache,
		ocf_cleaner_end_t cmpl)
{
	struct acp_cleaning_policy_config *config;
	struct acp_context *acp = _acp_get_ctx_from_cache(cache);
	struct acp_state *state = &acp->state;

	acp->cmpl = cmpl;

	if (!state->in_progress) {
		/* get next chunk to clean */
		state->chunk = _acp_get_cleaning_candidate(cache);

		if (!state->chunk) {
			/* nothing co clean */
			cmpl(&cache->cleaner, ACP_BACKOFF_TIME_MS);
			return;
		}

		/* new cleaning cycle - reset state */
		state->iter = 0;
		state->in_progress = true;
	}

	ACP_DEBUG_INIT(acp);

	config = (void *)&cache->conf_meta->cleaning[ocf_cleaning_acp].data;

	if (_acp_prepare_flush_data(acp, config->flush_max_buffers))
		_acp_flush(acp);
	else
		_acp_flush_end(acp, 0);
}

static void _acp_update_bucket(struct acp_context *acp,
		struct acp_chunk_info *chunk)
{
	struct acp_bucket *bucket = &acp->bucket_info[chunk->bucket_id];

	if (chunk->num_dirty > bucket->threshold) {
		ENV_BUG_ON(chunk->bucket_id == ACP_MAX_BUCKETS - 1);

		chunk->bucket_id++;
		/* buckets are stored in array, move up one bucket.
		 * No overflow here. ENV_BUG_ON made sure of no incrementation on
		 * last bucket */
		bucket++;

		list_move_tail(&chunk->list, &bucket->chunk_list);
	} else if (chunk->bucket_id &&
			chunk->num_dirty <= (bucket - 1)->threshold) {
		chunk->bucket_id--;
		/* move down one bucket, we made sure we won't underflow */
		bucket--;

		list_move(&chunk->list, &bucket->chunk_list);
	}
}

void cleaning_policy_acp_set_hot_cache_line(struct ocf_cache *cache,
		uint32_t cache_line)
{
	struct acp_context *acp = _acp_get_ctx_from_cache(cache);
	struct acp_cleaning_policy_meta *acp_meta;
	struct acp_chunk_info *chunk;

	ACP_LOCK_CHUNKS_WR();

	acp_meta = _acp_meta_get(cache, cache_line);
	chunk = _acp_get_chunk(cache, cache_line);

	if (!acp_meta->dirty) {
		acp_meta->dirty = 1;
		chunk->num_dirty++;
	}

	_acp_update_bucket(acp, chunk);

	ACP_UNLOCK_CHUNKS_WR();
}

void cleaning_policy_acp_purge_block(struct ocf_cache *cache,
		uint32_t cache_line)
{
	struct acp_context *acp = _acp_get_ctx_from_cache(cache);
	struct acp_cleaning_policy_meta *acp_meta;
	struct acp_chunk_info *chunk;

	ACP_LOCK_CHUNKS_WR();

	acp_meta = _acp_meta_get(cache, cache_line);
	chunk = _acp_get_chunk(cache, cache_line);

	if (acp_meta->dirty) {
		acp_meta->dirty = 0;
		chunk->num_dirty--;
	}

	_acp_update_bucket(acp, chunk);

	ACP_UNLOCK_CHUNKS_WR();
}

int cleaning_policy_acp_purge_range(struct ocf_cache *cache,
		int core_id, uint64_t start_byte, uint64_t end_byte)
{
	return ocf_metadata_actor(cache, PARTITION_INVALID,
			core_id, start_byte, end_byte,
			cleaning_policy_acp_purge_block);
}

void cleaning_policy_acp_remove_core(ocf_cache_t cache,
		ocf_core_id_t core_id)
{
	struct acp_context *acp  = _acp_get_ctx_from_cache(cache);
	uint64_t i;

	ENV_BUG_ON(acp->chunks_total < acp->num_chunks[core_id]);

	if (acp->state.in_progress && acp->state.chunk->core_id == core_id) {
		acp->state.in_progress = false;
		acp->state.iter = 0;
		acp->state.chunk = NULL;
	}

	ACP_LOCK_CHUNKS_WR();

	for (i = 0; i < acp->num_chunks[core_id]; i++)
		list_del(&acp->chunk_info[core_id][i].list);

	acp->chunks_total -= acp->num_chunks[core_id];
	acp->num_chunks[core_id] = 0;

	env_vfree(acp->chunk_info[core_id]);
	acp->chunk_info[core_id] = NULL;

	ACP_UNLOCK_CHUNKS_WR();
}

int cleaning_policy_acp_add_core(ocf_cache_t cache,
		ocf_core_id_t core_id)
{
	ocf_core_t core = ocf_cache_get_core(cache, core_id);
	uint64_t core_size = core->conf_meta->length;
	uint64_t num_chunks = OCF_DIV_ROUND_UP(core_size, ACP_CHUNK_SIZE);
	struct acp_context *acp = _acp_get_ctx_from_cache(cache);
	int i;

	OCF_DEBUG_PARAM(cache, "%s core_id %llu num_chunks %llu\n",
			__func__, (uint64_t)core_id, (uint64_t) num_chunks);

	ACP_LOCK_CHUNKS_WR();

	ENV_BUG_ON(acp->chunk_info[core_id]);

	acp->chunk_info[core_id] =
			env_vzalloc(num_chunks * sizeof(acp->chunk_info[0][0]));

	if (!acp->chunk_info[core_id]) {
		ACP_UNLOCK_CHUNKS_WR();
		OCF_DEBUG_PARAM(cache, "failed to allocate acp tables\n");
		return -OCF_ERR_NO_MEM;
	}

	OCF_DEBUG_PARAM(cache, "successfully allocated acp tables\n");

	/* increment counters */
	acp->num_chunks[core_id] = num_chunks;
	acp->chunks_total += num_chunks;

	for (i = 0; i < acp->num_chunks[core_id]; i++) {
		/* fill in chunk metadata and add to the clean bucket */
		acp->chunk_info[core_id][i].core_id = core_id;
		acp->chunk_info[core_id][i].chunk_id = i;
		list_add(&acp->chunk_info[core_id][i].list,
				&acp->bucket_info[0].chunk_list);
	}

	ACP_UNLOCK_CHUNKS_WR();

	return 0;
}