Initial commit

Signed-off-by: Robert Baldyga <robert.baldyga@intel.com>
This commit is contained in:
Robert Baldyga
2019-03-29 08:39:34 +01:00
commit 94e8ca09e0
140 changed files with 37144 additions and 0 deletions

3
modules/cas_cache/.gitignore vendored Normal file
View File

@@ -0,0 +1,3 @@
include/
src/

View File

@@ -0,0 +1,10 @@
#
# Copyright(c) 2012-2019 Intel Corporation
# SPDX-License-Identifier: BSD-3-Clause-Clear
#
include $(M)/config.mk
obj-m := cas_cache.o
cas_cache-c = $(shell find $(M)/cas_cache -name \*.c)
cas_cache-objs = $(patsubst $(M)/cas_cache/%.c,%.o,$(cas_cache-c))

View File

@@ -0,0 +1,97 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __CAS_CACHE_H__
#define __CAS_CACHE_H__
#include "ocf/ocf.h"
#include "ocf_env.h"
#include <cas_version.h>
#include <cas_ioctl_codes.h>
#include "linux_kernel_version.h"
#include "layer_upgrade.h"
#include "control.h"
#include "layer_cache_management.h"
#include "service_ui_ioctl.h"
#include "utils/cas_cache_utils.h"
#include "volume/vol_blk_utils.h"
#include "classifier.h"
#include "context.h"
#include <linux/kallsyms.h>
#define CAS_KERN_EMERG KERN_EMERG OCF_PREFIX_SHORT
#define CAS_KERN_ALERT KERN_ALERT OCF_PREFIX_SHORT
#define CAS_KERN_CRIT KERN_CRIT OCF_PREFIX_SHORT
#define CAS_KERN_ERR KERN_ERR OCF_PREFIX_SHORT
#define CAS_KERN_WARNING KERN_WARNING OCF_PREFIX_SHORT
#define CAS_KERN_NOTICE KERN_NOTICE OCF_PREFIX_SHORT
#define CAS_KERN_INFO KERN_INFO OCF_PREFIX_SHORT
#define CAS_KERN_DEBUG KERN_DEBUG OCF_PREFIX_SHORT
#ifndef SECTOR_SHIFT
#define SECTOR_SHIFT 9
#endif
#ifndef SECTOR_SIZE
#define SECTOR_SIZE (1<<SECTOR_SHIFT)
#endif
#define MAX_LINES_PER_IO 16
/**
* cache/core object types */
enum {
BLOCK_DEVICE_VOLUME = 1, /**< block device volume */
ATOMIC_DEVICE_VOLUME, /**< block device volume with atomic
metadata support */
/** \cond SKIP_IN_DOC */
OBJECT_TYPE_MAX,
NVME_CONTROLLER
/** \endcond */
};
struct cas_classifier;
struct cache_priv {
struct cas_classifier *classifier;
ocf_queue_t mngt_queue;
ocf_queue_t io_queues[];
};
extern ocf_ctx_t cas_ctx;
extern struct casdsk_functions_mapper casdisk_functions;
struct casdsk_functions_mapper {
int (*casdsk_disk_dettach)(struct casdsk_disk *dsk);
int (*casdsk_exp_obj_destroy)(struct casdsk_disk *dsk);
int (*casdsk_exp_obj_create)(struct casdsk_disk *dsk, const char *dev_name,
struct module *owner, struct casdsk_exp_obj_ops *ops);
struct request_queue *(*casdsk_disk_get_queue)(struct casdsk_disk *dsk);
void (*casdsk_store_config)(size_t n_blobs, struct casdsk_props_conf *blobs);
struct block_device *(*casdsk_disk_get_blkdev)(struct casdsk_disk *dsk);
struct request_queue *(*casdsk_exp_obj_get_queue)(struct casdsk_disk *dsk);
uint32_t (*casdsk_get_version)(void);
void (*casdsk_disk_close)(struct casdsk_disk *dsk);
struct casdsk_disk *(*casdsk_disk_claim)(const char *path, void *private);
int (*casdsk_exp_obj_unlock)(struct casdsk_disk *dsk);
int (*casdsk_disk_set_pt)(struct casdsk_disk *dsk);
size_t (*casdsk_get_stored_config)(struct casdsk_props_conf **blobs);
struct gendisk *(*casdsk_disk_get_gendisk)(struct casdsk_disk *dsk);
int (*casdsk_disk_attach) (struct casdsk_disk *dsk, struct module *owner,
struct casdsk_exp_obj_ops *ops);
int (*casdsk_disk_set_attached)(struct casdsk_disk *dsk);
int (*casdsk_exp_obj_activate)(struct casdsk_disk *dsk);
bool (*casdsk_exp_obj_activated)(struct casdsk_disk *ds);
int (*casdsk_exp_obj_lock)(struct casdsk_disk *dsk);
void (*casdsk_free_stored_config)(void);
struct casdsk_disk *(*casdsk_disk_open)(const char *path, void *private);
int (*casdsk_disk_clear_pt)(struct casdsk_disk *dsk);
struct gendisk *(*casdsk_exp_obj_get_gendisk)(struct casdsk_disk *dsk);
};
#endif

View File

@@ -0,0 +1,967 @@
/*
* Copyright(c) 2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#include "cas_cache.h"
#include "linux_kernel_version.h"
#include "classifier.h"
#include "classifier_defs.h"
#include <linux/namei.h>
/* Kernel log prefix */
#define CAS_CLS_LOG_PREFIX OCF_PREFIX_SHORT"[Classifier]"
/* Production version logs */
#define CAS_CLS_MSG(severity, format, ...) \
printk(severity CAS_CLS_LOG_PREFIX " " format, ##__VA_ARGS__);
/* Set to 1 to enable debug logs */
#define CAS_CLASSIFIER_CLS_DEBUG 0
#if 1 == CAS_CLASSIFIER_CLS_DEBUG
/* Debug log */
#define CAS_CLS_DEBUG_MSG(format, ...) \
CAS_CLS_MSG(KERN_INFO, format, ##__VA_ARGS__)
/* Trace log */
#define CAS_CLS_DEBUG_TRACE(format, ...) \
trace_printk(format, ##__VA_ARGS__)
#else
#define CAS_CLS_DEBUG_MSG(format, ...)
#define CAS_CLS_DEBUG_TRACE(format, ...)
#endif
/* Done condition test - always accepts and stops evaluation */
static cas_cls_eval_t _cas_cls_done_test(struct cas_classifier *cls,
struct cas_cls_condition *c, struct cas_cls_io *io,
ocf_part_id_t part_id)
{
cas_cls_eval_t ret = {.yes = 1, .stop = 1};
return ret;
}
/* Metadata condition test */
static cas_cls_eval_t _cas_cls_metadata_test(struct cas_classifier *cls,
struct cas_cls_condition *c, struct cas_cls_io *io,
ocf_part_id_t part_id)
{
if (!io->page)
return cas_cls_eval_no;
if (PageAnon(io->page))
return cas_cls_eval_no;
if (PageSlab(io->page) || PageCompound(io->page)) {
/* A filesystem issues IO on pages that does not belongs
* to the file page cache. It means that it is a
* part of metadata
*/
return cas_cls_eval_yes;
}
if (!io->page->mapping) {
/* XFS case, page are allocated internally and do not
* have references into inode
*/
return cas_cls_eval_yes;
}
if (!io->inode)
return cas_cls_eval_no;
if (S_ISBLK(io->inode->i_mode)) {
/* EXT3 and EXT4 case. Metadata IO is performed into pages
* of block device cache
*/
return cas_cls_eval_yes;
}
if (S_ISDIR(io->inode->i_mode)) {
return cas_cls_eval_yes;
}
return cas_cls_eval_no;
}
/* Direct I/O condition test function */
static cas_cls_eval_t _cas_cls_direct_test(struct cas_classifier *cls,
struct cas_cls_condition *c, struct cas_cls_io *io,
ocf_part_id_t part_id)
{
if (!io->page)
return cas_cls_eval_no;
if (PageAnon(io->page))
return cas_cls_eval_yes;
return cas_cls_eval_no;
}
/* Generic condition constructor for conditions without operands (e.g. direct,
* metadata) */
static int _cas_cls_generic_ctr(struct cas_classifier *cls,
struct cas_cls_condition *c, char *data)
{
if (data) {
CAS_CLS_MSG(KERN_ERR, "Unexpected operand in condition\n");
return -EINVAL;
}
return 0;
}
/* Generic condition destructor */
static void _cas_cls_generic_dtr(struct cas_classifier *cls,
struct cas_cls_condition *c)
{
if (c->context)
kfree(c->context);
c->context = NULL;
}
/* Numeric condition constructor. @data is expected to contain either
* plain number string or range specifier (e.g. "gt:4096"). */
static int _cas_cls_numeric_ctr(struct cas_classifier* cls,
struct cas_cls_condition *c, char *data)
{
struct cas_cls_numeric *ctx;
int result;
char *ptr;
if (!data || strlen(data) == 0) {
CAS_CLS_MSG(KERN_ERR, "Missing numeric condition operand\n");
return -EINVAL;
}
ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx)
return -ENOMEM;
ctx->operator = cas_cls_numeric_eq;
ptr = strpbrk(data, ":");
if (ptr) {
/* Terminate sub-string containing arithmetic operator */
*ptr = '\0';
++ptr;
if (!strcmp(data, "eq")) {
ctx->operator = cas_cls_numeric_eq;
} else if (!strcmp(data, "ne")) {
ctx->operator = cas_cls_numeric_ne;
} else if (!strcmp(data, "lt")) {
ctx->operator = cas_cls_numeric_lt;
} else if (!strcmp(data, "gt")) {
ctx->operator = cas_cls_numeric_gt;
} else if (!strcmp(data, "le")) {
ctx->operator = cas_cls_numeric_le;
} else if (!strcmp(data, "ge")) {
ctx->operator = cas_cls_numeric_ge;
} else {
CAS_CLS_MSG(KERN_ERR, "Invalid numeric operator \n");
result = -EINVAL;
goto error;
}
} else {
/* Plain number case */
ptr = data;
}
result = kstrtou64(ptr, 10, &ctx->v_u64);
if (result) {
CAS_CLS_MSG(KERN_ERR, "Invalid numeric operand\n");
goto error;
}
CAS_CLS_DEBUG_MSG("\t\t - Using operator %d with value %llu\n",
ctx->operator, ctx->v_u64);
c->context = ctx;
return 0;
error:
kfree(ctx);
return result;
}
/* Unsigned int numeric test function */
static cas_cls_eval_t _cas_cls_numeric_test_u(
struct cas_cls_condition *c, uint64_t val)
{
struct cas_cls_numeric *ctx = c->context;
switch (ctx->operator) {
case cas_cls_numeric_eq:
return val == ctx->v_u64 ? cas_cls_eval_yes : cas_cls_eval_no;
case cas_cls_numeric_ne:
return val != ctx->v_u64 ? cas_cls_eval_yes : cas_cls_eval_no;
case cas_cls_numeric_lt:
return val < ctx->v_u64 ? cas_cls_eval_yes : cas_cls_eval_no;
case cas_cls_numeric_gt:
return val > ctx->v_u64 ? cas_cls_eval_yes : cas_cls_eval_no;
case cas_cls_numeric_le:
return val <= ctx->v_u64 ? cas_cls_eval_yes : cas_cls_eval_no;
case cas_cls_numeric_ge:
return val >= ctx->v_u64 ? cas_cls_eval_yes : cas_cls_eval_no;
}
return cas_cls_eval_no;
}
/* Io class test function */
static cas_cls_eval_t _cas_cls_io_class_test(struct cas_classifier *cls,
struct cas_cls_condition *c, struct cas_cls_io *io,
ocf_part_id_t part_id)
{
return _cas_cls_numeric_test_u(c, part_id);
}
/* File size test function */
static cas_cls_eval_t _cas_cls_file_size_test(
struct cas_classifier *cls, struct cas_cls_condition *c,
struct cas_cls_io *io, ocf_part_id_t part_id)
{
if (!io->inode)
return cas_cls_eval_no;
if (S_ISBLK(io->inode->i_mode))
return cas_cls_eval_no;
if (!S_ISREG(io->inode->i_mode))
return cas_cls_eval_no;
return _cas_cls_numeric_test_u(c, i_size_read(io->inode));
}
/* Resolve path to inode */
static void _cas_cls_directory_resolve(struct cas_classifier *cls,
struct cas_cls_directory *ctx)
{
struct path path;
struct inode *inode;
int error;
int o_res;
unsigned long o_ino;
o_res = ctx->resolved;
o_ino = ctx->i_ino;
error = kern_path(ctx->pathname, LOOKUP_FOLLOW, &path);
if (error) {
ctx->resolved = 0;
if (o_res) {
CAS_CLS_DEBUG_MSG("Removed inode resolution for %s\n",
ctx->pathname);
}
return;
}
inode = path.dentry->d_inode;
ctx->i_ino = inode->i_ino;
ctx->resolved = 1;
path_put(&path);
if (!o_res) {
CAS_CLS_DEBUG_MSG("Resolved %s to inode: %lu\n", ctx->pathname,
ctx->i_ino);
} else if (o_ino != ctx->i_ino) {
CAS_CLS_DEBUG_MSG("Changed inode resolution for %s: %lu => %lu"
"\n", ctx->pathname, o_ino, ctx->i_ino);
}
}
/* Inode resolving work entry point */
static void _cas_cls_directory_resolve_work(struct work_struct *work)
{
struct cas_cls_directory *ctx;
ctx = container_of(work, struct cas_cls_directory, d_work.work);
_cas_cls_directory_resolve(ctx->cls, ctx);
queue_delayed_work(ctx->cls->wq, &ctx->d_work,
msecs_to_jiffies(ctx->resolved ? 5000 : 1000));
}
/* Get unaliased dentry for given dir inode */
static struct dentry *_cas_cls_dir_get_inode_dentry(struct inode *inode)
{
struct dentry *d = NULL, *iter;
ALIAS_NODE_TYPE *pos; /* alias list current element */
if (DENTRY_LIST_EMPTY(&inode->i_dentry))
return NULL;
spin_lock(&inode->i_lock);
if (S_ISDIR(inode->i_mode))
goto unlock;
INODE_FOR_EACH_DENTRY(pos, &inode->i_dentry) {
iter = ALIAS_NODE_TO_DENTRY(pos);
spin_lock(&iter->d_lock);
if (!d_unhashed(iter))
d = iter;
spin_unlock(&d->d_lock);
if (d)
break;
}
unlock:
spin_unlock(&inode->i_lock);
return d;
}
/* Directory condition test function */
static cas_cls_eval_t _cas_cls_directory_test(
struct cas_classifier *cls, struct cas_cls_condition *c,
struct cas_cls_io *io, ocf_part_id_t part_id)
{
struct cas_cls_directory *ctx;
struct inode *inode, *p_inode;
struct dentry *dentry, *p_dentry;
ctx = c->context;
inode = io->inode;
if (!inode || !ctx->resolved)
return cas_cls_eval_no;
/* I/O target inode dentry */
dentry = _cas_cls_dir_get_inode_dentry(inode);
if (!dentry)
return cas_cls_eval_no;
/* Walk up directory tree starting from I/O destination
* dir until current dir inode matches condition inode or top
* directory is reached. */
while (inode) {
if (inode->i_ino == ctx->i_ino)
return cas_cls_eval_yes;
spin_lock(&dentry->d_lock);
p_dentry = dentry->d_parent;
if (!p_dentry) {
spin_unlock(&dentry->d_lock);
return cas_cls_eval_no;
}
p_inode = p_dentry->d_inode;
spin_unlock(&dentry->d_lock);
if (p_inode != inode) {
inode = p_inode;
dentry = p_dentry;
} else {
inode = NULL;
}
}
return cas_cls_eval_no;
}
/* Directory condition constructor */
static int _cas_cls_directory_ctr(struct cas_classifier *cls,
struct cas_cls_condition *c, char *data)
{
struct cas_cls_directory *ctx;
if (!data || strlen(data) == 0) {
CAS_CLS_MSG(KERN_ERR, "Missing directory specifier\n");
return -EINVAL;
}
ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx)
return -ENOMEM;
ctx->cls = cls;
ctx->resolved = 0;
ctx->pathname = kstrdup(data, GFP_KERNEL);
if (!ctx->pathname) {
kfree(ctx);
return -ENOMEM;
}
INIT_DELAYED_WORK(&ctx->d_work, _cas_cls_directory_resolve_work);
queue_delayed_work(cls->wq, &ctx->d_work,
msecs_to_jiffies(10));
c->context = ctx;
return 0;
}
/* Directory condition destructor */
static void _cas_cls_directory_dtr(struct cas_classifier *cls,
struct cas_cls_condition *c)
{
struct cas_cls_directory *ctx;
ctx = c->context;
if (!ctx)
return;
cancel_delayed_work_sync(&ctx->d_work);
kfree(ctx->pathname);
kfree(ctx);
}
/* Array of condition handlers */
static struct cas_cls_condition_handler _handlers[] = {
{ "done", _cas_cls_done_test, _cas_cls_generic_ctr },
{ "metadata", _cas_cls_metadata_test, _cas_cls_generic_ctr },
{ "direct", _cas_cls_direct_test, _cas_cls_generic_ctr },
{ "io_class", _cas_cls_io_class_test, _cas_cls_numeric_ctr,
_cas_cls_generic_dtr },
{ "file_size", _cas_cls_file_size_test, _cas_cls_numeric_ctr,
_cas_cls_generic_dtr },
{ "directory", _cas_cls_directory_test, _cas_cls_directory_ctr,
_cas_cls_directory_dtr },
{ NULL }
};
/* Get condition handler for condition string token */
static struct cas_cls_condition_handler *_cas_cls_lookup_handler(
const char *token)
{
struct cas_cls_condition_handler *h = _handlers;
while (h->token) {
if (strcmp(h->token, token) == 0)
return h;
h++;
}
return NULL;
}
/* Deallocate condition */
static void _cas_cls_free_condition(struct cas_classifier *cls,
struct cas_cls_condition *c)
{
if (c->handler->dtr)
c->handler->dtr(cls, c);
kfree(c);
}
/* Allocate condition */
static struct cas_cls_condition * _cas_cls_create_condition(
struct cas_classifier *cls, const char *token,
char *data, int l_op)
{
struct cas_cls_condition_handler *h;
struct cas_cls_condition *c;
int result;
h = _cas_cls_lookup_handler(token);
if (!h) {
CAS_CLS_DEBUG_MSG("Cannot find handler for condition"
" %s\n", token);
return ERR_PTR(-ENOENT);
}
c = kmalloc(sizeof(*c), GFP_KERNEL);
if (!c)
return ERR_PTR(-ENOMEM);
c->handler = h;
c->context = NULL;
c->l_op = l_op;
if (c->handler->ctr) {
result = c->handler->ctr(cls, c, data);
if (result) {
kfree(c);
return ERR_PTR(result);
}
}
CAS_CLS_DEBUG_MSG("\t\t - Created condition %s\n", token);
return c;
}
/* Read single codnition from text input and return cas_cls_condition
* representation. *rule pointer is advanced to point to next condition.
* Input @rule string is modified to speed up parsing (selected bytes are
* overwritten with 0).
*
* *l_op contains logical operator from previous condition and gets overwritten
* with operator read from currently parsed condition.
*
* Returns pointer to condition if successfull.
* Returns NULL if no more conditions in string.
* Returns error pointer in case of syntax or runtime error.
*/
static struct cas_cls_condition *_cas_cls_parse_condition(
struct cas_classifier *cls, char **rule,
enum cas_cls_logical_op *l_op)
{
char *token = *rule; /* Condition token substring (e.g. file_size) */
char *operand = NULL; /* Operand substring (e.g. "lt:4096" or path) */
char *ptr; /* Current position in input string */
char *last = token; /* Last seen substring in condition */
char op = 'X'; /* Logical operator at the end of condition */
struct cas_cls_condition *c; /* Output condition */
if (**rule == '\0') {
/* Empty condition */
return NULL;
}
ptr = strpbrk(*rule, ":&|");
if (!ptr) {
/* No operands in condition (e.g. "metadata"), no logical
* operators following condition - we're done with parsing. */
goto create;
}
if (*ptr == ':') {
/* Operand found - terminate token string and move forward. */
*ptr = '\0';
ptr += 1;
operand = ptr;
last = ptr;
ptr = strpbrk(ptr, "&|");
if (!ptr) {
/* No operator past condition - create rule and exit */
goto create;
}
}
/* Remember operator value and zero target byte to terminate previous
* string (token or operand) */
op = *ptr;
*ptr = '\0';
create:
c = _cas_cls_create_condition(cls, token, operand, *l_op);
*l_op = (op == '|' ? cas_cls_logical_or : cas_cls_logical_and);
/* Set *rule to character past current condition and logical operator */
if (ptr) {
/* Set pointer for next iteration */
*rule = ptr + 1;
} else {
/* Set pointer to terminating zero */
*rule = last + strlen(last);
}
return c;
}
/* Parse all conditions in rule text description. @rule might be overwritten */
static int _cas_cls_parse_conditions(struct cas_classifier *cls,
struct cas_cls_rule *r, char *rule)
{
char *start;
struct cas_cls_condition *c;
enum cas_cls_logical_op l_op = cas_cls_logical_or;
start = rule;
for (;;) {
c = _cas_cls_parse_condition(cls, &start, &l_op);
if (IS_ERR(c))
return PTR_ERR(c);
if (!c)
break;
list_add_tail(&c->list, &r->conditions);
}
return 0;
}
static struct cas_classifier* cas_get_classifier(ocf_cache_t cache)
{
struct cache_priv *cache_priv = ocf_cache_get_priv(cache);
ENV_BUG_ON(!cache_priv);
return cache_priv->classifier;
}
static void cas_set_classifier(ocf_cache_t cache,
struct cas_classifier* cls)
{
struct cache_priv *cache_priv = ocf_cache_get_priv(cache);
ENV_BUG_ON(!cache_priv);
cache_priv->classifier = cls;
}
void _cas_cls_rule_destroy(struct cas_classifier *cls,
struct cas_cls_rule *r)
{
struct list_head *item, *n;
struct cas_cls_condition *c = NULL;
if (!r)
return;
list_for_each_safe(item, n, &r->conditions) {
c = list_entry(item, struct cas_cls_condition, list);
list_del(item);
_cas_cls_free_condition(cls, c);
}
kfree(r);
}
/* Destroy rule */
void cas_cls_rule_destroy(ocf_cache_t cache, struct cas_cls_rule *r)
{
struct cas_classifier *cls = cas_get_classifier(cache);
BUG_ON(!cls);
_cas_cls_rule_destroy(cls, r);
}
/* Create rule from text description. @rule might be overwritten */
static struct cas_cls_rule *_cas_cls_rule_create(struct cas_classifier *cls,
ocf_part_id_t part_id, char *rule)
{
struct cas_cls_rule *r;
int result;
if (part_id == 0 || rule[0] == '\0')
return NULL;
r = kmalloc(sizeof(*r), GFP_KERNEL);
if (!r)
return ERR_PTR(-ENOMEM);
r->part_id = part_id;
INIT_LIST_HEAD(&r->conditions);
result = _cas_cls_parse_conditions(cls, r, rule);
if (result) {
_cas_cls_rule_destroy(cls, r);
return ERR_PTR(result);
}
return r;
}
/* Update rule associated with given io class */
void cas_cls_rule_apply(ocf_cache_t cache,
ocf_part_id_t part_id, struct cas_cls_rule *new)
{
struct cas_classifier *cls;
struct cas_cls_rule *old = NULL, *elem;
struct list_head *item, *_n;
cls = cas_get_classifier(cache);
BUG_ON(!cls);
write_lock(&cls->lock);
/* Walk through list of rules in reverse order (tail to head), visiting
* rules from high to low part_id */
list_for_each_prev_safe(item, _n, &cls->rules) {
elem = list_entry(item, struct cas_cls_rule, list);
if (elem->part_id == part_id) {
old = elem;
list_del(item);
}
if (elem->part_id < part_id)
break;
}
/* Insert new element past loop cursor */
if (new)
list_add(&new->list, item);
write_unlock(&cls->lock);
_cas_cls_rule_destroy(cls, old);
if (old)
CAS_CLS_DEBUG_MSG("Removed rule for class %d\n", part_id);
if (new)
CAS_CLS_DEBUG_MSG("New rule for for class %d\n", part_id);
return;
}
/*
* Translate classification rule error from linux error code to CAS error code.
* Internal classifier functions use PTR_ERR / ERR_PTR macros to propagate
* error in pointers. These macros do not work well with CAS error codes, so
* this function is used to form fine-grained CAS error code when returning
* from classifier management function.
*/
static int _cas_cls_rule_err_to_cass_err(int err)
{
switch (err) {
case -ENOENT:
return KCAS_ERR_CLS_RULE_UNKNOWN_CONDITION;
case -EINVAL:
return KCAS_ERR_CLS_RULE_INVALID_SYNTAX;
default:
return err;
}
}
/* Create and apply classification rule for given class id */
static int _cas_cls_rule_init(ocf_cache_t cache, ocf_part_id_t part_id)
{
struct cas_classifier *cls;
struct ocf_io_class_info *info;
struct cas_cls_rule *r;
int result;
cls = cas_get_classifier(cache);
if (!cls)
return -EINVAL;
info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info)
return -ENOMEM;
result = ocf_cache_io_class_get_info(cache, part_id, info);
if (result) {
if (result == -OCF_ERR_IO_CLASS_NOT_EXIST)
result = 0;
goto exit;
}
if (strnlen(info->name, sizeof(info->name)) == sizeof(info->name)) {
CAS_CLS_MSG(KERN_ERR, "IO class name not null terminated\n");
result = -EINVAL;
goto exit;
}
r = _cas_cls_rule_create(cls, part_id, info->name);
if (IS_ERR(r)) {
result = _cas_cls_rule_err_to_cass_err(PTR_ERR(r));
goto exit;
}
cas_cls_rule_apply(cache, part_id, r);
exit:
kfree(info);
return result;
}
/* Create classification rule from text description */
int cas_cls_rule_create(ocf_cache_t cache,
ocf_part_id_t part_id, const char* rule,
struct cas_cls_rule **cls_rule)
{
struct cas_cls_rule *r = NULL;
struct cas_classifier *cls;
char *_rule;
int ret;
if (!cls_rule)
return -EINVAL;
cls = cas_get_classifier(cache);
if (!cls)
return -EINVAL;
if (strnlen(rule, OCF_IO_CLASS_NAME_MAX) == OCF_IO_CLASS_NAME_MAX) {
CAS_CLS_MSG(KERN_ERR, "IO class name not null terminated\n");
return -EINVAL;
}
/* Make description copy as _cas_cls_rule_create might modify input
* string */
_rule = kstrdup(rule, GFP_KERNEL);
if (!_rule)
return -ENOMEM;
r = _cas_cls_rule_create(cls, part_id, _rule);
if (IS_ERR(r))
ret = _cas_cls_rule_err_to_cass_err(PTR_ERR(r));
else {
CAS_CLS_DEBUG_MSG("Created rule: %s => %d\n", rule, part_id);
*cls_rule = r;
ret = 0;
}
kfree(_rule);
return ret;
}
/* Deinitialize classifier and remove rules */
void cas_cls_deinit(ocf_cache_t cache)
{
struct cas_classifier *cls;
struct list_head *item, *n;
struct cas_cls_rule *r = NULL;
cls = cas_get_classifier(cache);
ENV_BUG_ON(!cls);
list_for_each_safe(item, n, &cls->rules) {
r = list_entry(item, struct cas_cls_rule, list);
list_del(item);
_cas_cls_rule_destroy(cls, r);
}
destroy_workqueue(cls->wq);
kfree(cls);
cas_set_classifier(cache, NULL);
CAS_CLS_MSG(KERN_INFO, "Deinitialized IO classifier\n");
return;
}
/* Initialize classifier context */
static struct cas_classifier *_cas_cls_init(ocf_cache_t cache)
{
struct cas_classifier *cls;
cls = kzalloc(sizeof(*cls), GFP_KERNEL);
if (!cls)
return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(&cls->rules);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 36)
cls->wq = alloc_workqueue("kcas_clsd", WQ_UNBOUND | WQ_FREEZABLE, 1);
#else
cls->wq = create_singlethread_workqueue("kcas_clsd");
#endif
if (!cls->wq) {
kfree(cls);
return ERR_PTR(-ENOMEM);
}
rwlock_init(&cls->lock);
CAS_CLS_MSG(KERN_INFO, "Initialized IO classifier\n");
return cls;
}
/* Initialize classifier and create rules for existing I/O classes */
int cas_cls_init(ocf_cache_t cache)
{
struct cas_classifier *cls;
unsigned result = 0;
unsigned i;
cls = _cas_cls_init(cache);
if (IS_ERR(cls))
return PTR_ERR(cls);
cas_set_classifier(cache, cls);
/* Update rules for all I/O classes except 0 - this is default for all
* unclassified I/O */
for (i = 1; i < OCF_IO_CLASS_MAX; i++) {
result = _cas_cls_rule_init(cache, i);
if (result)
break;
}
if (result)
cas_cls_deinit(cache);
return result;
}
/* Determine whether io matches rule */
static cas_cls_eval_t cas_cls_process_rule(struct cas_classifier *cls,
struct cas_cls_rule *r, struct cas_cls_io *io,
ocf_part_id_t *part_id)
{
struct list_head *item;
struct cas_cls_condition *c;
cas_cls_eval_t ret = cas_cls_eval_no, rr;
CAS_CLS_DEBUG_TRACE(" Processing rule for class %d\n", r->part_id);
list_for_each(item, &r->conditions) {
c = list_entry(item, struct cas_cls_condition, list);
if (!ret.yes && c->l_op == cas_cls_logical_and)
break;
rr = c->handler->test(cls, c, io, *part_id);
CAS_CLS_DEBUG_TRACE(" Processing condition %s => %d, stop:%d "
"(l_op: %d)\n", c->handler->token, rr.yes,
rr.stop, (int)c->l_op);
ret.yes = (c->l_op == cas_cls_logical_and) ?
rr.yes && ret.yes :
rr.yes || ret.yes;
ret.stop = rr.stop;
if (ret.stop)
break;
}
CAS_CLS_DEBUG_TRACE(" Rule %d output => %d stop: %d\n", r->part_id,
ret.yes, ret.stop);
return ret;
}
/* Fill in cas_cls_io for given bio - it is assumed that ctx is
* zeroed upon entry */
static void _cas_cls_get_bio_context(struct bio *bio,
struct cas_cls_io *ctx)
{
struct page *page = NULL;
if (!bio)
return;
ctx->bio = bio;
if (!SEGMENT_BVEC(bio_iovec(bio)))
return;
page = bio_page(bio);
if (!page)
return;
ctx->page = page;
if (PageAnon(page))
return;
if (PageSlab(page) || PageCompound(page))
return;
if (!page->mapping)
return;
ctx->inode = page->mapping->host;
return;
}
/* Determine I/O class for bio */
ocf_part_id_t cas_cls_classify(ocf_cache_t cache, struct bio *bio)
{
struct cas_classifier *cls;
struct cas_cls_io io = {};
struct list_head *item;
struct cas_cls_rule *r;
ocf_part_id_t part_id = 0;
cas_cls_eval_t ret;
cls = cas_get_classifier(cache);
ENV_BUG_ON(!cls);
_cas_cls_get_bio_context(bio, &io);
read_lock(&cls->lock);
CAS_CLS_DEBUG_TRACE("%s\n", "Starting processing");
list_for_each(item, &cls->rules) {
r = list_entry(item, struct cas_cls_rule, list);
ret = cas_cls_process_rule(cls, r, &io, &part_id);
if (ret.yes)
part_id = r->part_id;
if (ret.stop)
break;
}
read_unlock(&cls->lock);
return part_id;
}

View File

@@ -0,0 +1,33 @@
/*
* Copyright(c) 2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __CLASSIFIER_H__
#define __CLASSIFIER_H__
struct cas_cls_rule;
/* Initialize classifier and create rules for existing I/O classes */
int cas_cls_init(ocf_cache_t cache);
/* Deinitialize classifier and remove rules */
void cas_cls_deinit(ocf_cache_t cache);
/* Allocate and initialize classification rule */
int cas_cls_rule_create(ocf_cache_t cache,
ocf_part_id_t part_id, const char* rule,
struct cas_cls_rule **cls_rule);
/* Deinit classification rule */
void cas_cls_rule_destroy(ocf_cache_t cache, struct cas_cls_rule *r);
/* Bind classification rule to io class */
void cas_cls_rule_apply(ocf_cache_t cache, ocf_part_id_t part_id,
struct cas_cls_rule *r);
/* Determine I/O class for bio */
ocf_part_id_t cas_cls_classify(ocf_cache_t cache, struct bio *bio);
#endif

View File

@@ -0,0 +1,139 @@
/*
* Copyright(c) 2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __CLASSIFIER_DEFS_H__
#define __CLASSIFIER_DEFS_H__
/* Rule matches 1:1 with io class. It contains multiple conditions with
* associated logical operator (and/or) */
struct cas_cls_rule {
/* Rules list element */
struct list_head list;
/* Associated partition id */
ocf_part_id_t part_id;
/* Conditions for this rule */
struct list_head conditions;
};
/* Classifier context - one per cache instance. */
struct cas_classifier {
/* Rules list head */
struct list_head rules;
/* Directory inode resolving workqueue */
struct workqueue_struct *wq;
/* Lock for rules list */
rwlock_t lock;
};
struct cas_cls_condition_handler;
/* cas_cls_condition represents single test (e.g. file_size <= 4K) plus
* logical operator (and/or) to combine evaluation of this condition with
* previous conditions within one rule */
struct cas_cls_condition {
/* Condition handler */
struct cas_cls_condition_handler *handler;
/* Conditions list element */
struct list_head list;
/* Data specific to this condition instance */
void *context;
/* Logical operator to apply to previous conditions evaluation */
int l_op;
};
/* Helper structure aggregating I/O data often accessed by condition handlers */
struct cas_cls_io {
/* bio */
struct bio *bio;
/* First page associated with bio */
struct page *page;
/* Inode associated with page */
struct inode *inode;
};
/* Condition evaluation return flags */
typedef struct cas_cls_eval {
uint8_t yes : 1;
uint8_t stop : 1;
} cas_cls_eval_t;
static const cas_cls_eval_t cas_cls_eval_yes = { .yes = 1 };
static const cas_cls_eval_t cas_cls_eval_no = { };
/* Logical operators */
enum cas_cls_logical_op {
cas_cls_logical_and = 0,
cas_cls_logical_or
};
/* Condition handler - abstraction over different kinds of condition checks
* (e.g. file size, metadata). Does not contain all the data required to
* evaluate condition (e.g. actual file size value), these are stored in
* @context member of cas_cls_condition object, provided as input argument to
* test, ctr and dtr callbacks. */
struct cas_cls_condition_handler {
/* String representing this condition class */
const char *token;
/* Condition test */
cas_cls_eval_t (*test)(struct cas_classifier *cls,
struct cas_cls_condition *c, struct cas_cls_io *io,
ocf_part_id_t part_id);
/* Condition constructor */
int (*ctr)(struct cas_classifier *cls, struct cas_cls_condition *c,
char *data);
/* Condition destructor */
void (*dtr)(struct cas_classifier *cls, struct cas_cls_condition *c);
};
/* Numeric condition numeric operators */
enum cas_cls_numeric_op {
cas_cls_numeric_eq = 0,
cas_cls_numeric_ne = 1,
cas_cls_numeric_lt = 2,
cas_cls_numeric_gt = 3,
cas_cls_numeric_le = 4,
cas_cls_numeric_ge = 5,
};
/* Numeric condition context */
struct cas_cls_numeric {
/* Arithmetic operator */
enum cas_cls_numeric_op operator;
/* Condition operand as unsigned int */
uint64_t v_u64;
};
/* Directory condition context */
struct cas_cls_directory {
/* 1 if directory had been resolved */
int resolved;
/* Dir path */
char *pathname;
/* Resolved inode */
unsigned long i_ino;
/* Back pointer to classifier context */
struct cas_classifier *cls;
/* Work item associated with resolving dir for this condition */
struct delayed_work d_work;
};
#endif

482
modules/cas_cache/context.c Normal file
View File

@@ -0,0 +1,482 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#include "cas_cache.h"
#include "context.h"
#include "utils/utils_rpool.h"
#include "utils/utils_data.h"
#include "utils/utils_gc.h"
#include "threads.h"
struct ocf_mpool *cas_bvec_pool;
struct cas_reserve_pool *cas_bvec_pages_rpool;
#define CAS_ALLOC_PAGE_LIMIT 1024
#define PG_cas PG_private
#define CAS_LOG_RATELIMIT HZ * 5
/* High burst limit to ensure cache init logs are printed properly */
#define CAS_LOG_BURST_LIMIT 50
static inline void _cas_page_set_priv(struct page *page)
{
set_bit(PG_cas , &page->flags);
}
static inline void _cas_page_clear_priv(struct page *page)
{
clear_bit(PG_cas , &page->flags);
page->private = 0;
}
static inline int _cas_page_test_priv(struct page *page)
{
return test_bit(PG_cas , &page->flags);
}
static void _cas_free_page_rpool(void *allocator_ctx, void *item)
{
struct page *page = virt_to_page(item);
_cas_page_clear_priv(page);
__free_page(page);
}
static void _cas_page_set_cpu(struct page *page, int cpu)
{
page->private = cpu;
}
void *_cas_alloc_page_rpool(void *allocator_ctx, int cpu)
{
struct page *page;
page = alloc_page(GFP_NOIO | __GFP_NORETRY);
if (!page)
return NULL;
if (_cas_page_test_priv(page)) {
printk(KERN_WARNING "CAS private bit is set\n");
WARN(true, OCF_PREFIX_SHORT" CAS private bit is set\n");
}
_cas_page_set_priv(page);
_cas_page_set_cpu(page, cpu);
return page_address(page);
}
static int _cas_page_get_cpu(struct page *page)
{
return page->private;
}
/* *** CONTEXT DATA OPERATIONS *** */
/*
*
*/
ctx_data_t *__cas_ctx_data_alloc(uint32_t pages, bool zalloc)
{
struct blk_data *data;
uint32_t i;
void *page_addr = NULL;
struct page *page = NULL;
int cpu;
data = ocf_mpool_new(cas_bvec_pool, pages);
if (!data) {
CAS_PRINT_RL(KERN_ERR "Couldn't allocate BIO vector.\n");
return NULL;
}
data->size = pages;
for (i = 0; i < pages; ++i) {
page_addr = cas_rpool_try_get(cas_bvec_pages_rpool, &cpu);
if (page_addr) {
data->vec[i].bv_page = virt_to_page(page_addr);
_cas_page_set_cpu(data->vec[i].bv_page, cpu);
} else {
data->vec[i].bv_page = alloc_page(GFP_NOIO);
}
if (!data->vec[i].bv_page)
break;
if (zalloc) {
if (!page_addr) {
page_addr = page_address(
data->vec[i].bv_page);
}
memset(page_addr, 0, PAGE_SIZE);
}
data->vec[i].bv_len = PAGE_SIZE;
data->vec[i].bv_offset = 0;
}
/* One of allocations failed */
if (i != pages) {
for (pages = 0; pages < i; pages++) {
page = data->vec[i].bv_page;
if (page && !(_cas_page_test_priv(page) &&
!cas_rpool_try_put(cas_bvec_pages_rpool,
page_address(page),
_cas_page_get_cpu(page)))) {
__free_page(page);
}
}
ocf_mpool_del(cas_bvec_pool, data, pages);
data = NULL;
} else {
/* Initialize iterator */
cas_io_iter_init(&data->iter, data->vec, data->size);
}
return data;
}
ctx_data_t *cas_ctx_data_alloc(uint32_t pages)
{
return __cas_ctx_data_alloc(pages, false);
}
ctx_data_t *cas_ctx_data_zalloc(uint32_t pages)
{
return __cas_ctx_data_alloc(pages, true);
}
/*
*
*/
void cas_ctx_data_free(ctx_data_t *ctx_data)
{
uint32_t i;
struct page *page = NULL;
struct blk_data *data = ctx_data;
if (!data)
return;
for (i = 0; i < data->size; i++) {
page = data->vec[i].bv_page;
if (!(_cas_page_test_priv(page) && !cas_rpool_try_put(
cas_bvec_pages_rpool,
page_address(page),
_cas_page_get_cpu(page))))
__free_page(page);
}
ocf_mpool_del(cas_bvec_pool, data, data->size);
}
static int _cas_ctx_data_mlock(ctx_data_t *ctx_data)
{
return 0;
}
static void _cas_ctx_data_munlock(ctx_data_t *ctx_data)
{
}
void cas_ctx_data_secure_erase(ctx_data_t *ctx_data)
{
struct blk_data *data = ctx_data;
uint32_t i;
void *ptr;
for (i = 0; i < data->size; i++) {
ptr = page_address(data->vec[i].bv_page);
memset(ptr, 0, PAGE_SIZE);
}
}
/*
*
*/
static uint32_t _cas_ctx_read_data(void *dst, ctx_data_t *src,
uint32_t size)
{
struct blk_data *data = src;
return cas_io_iter_cpy_to_data(dst, &data->iter, size);
}
/*
*
*/
static uint32_t _cas_ctx_write_data(ctx_data_t *dst, const void *src,
uint32_t size)
{
struct blk_data *data = dst;
return cas_io_iter_cpy_from_data(&data->iter, src, size);
}
/*
*
*/
static uint32_t _cas_ctx_zero_data(ctx_data_t *dst, uint32_t size)
{
struct blk_data *data = dst;
return cas_io_iter_zero(&data->iter, size);
}
/*
*
*/
static uint32_t _cas_ctx_seek_data(ctx_data_t *dst,
ctx_data_seek_t seek, uint32_t offset)
{
struct blk_data *data = dst;
switch (seek) {
case ctx_data_seek_begin:
cas_io_iter_init(&data->iter, data->vec, data->size);
case ctx_data_seek_current:
/* TODO Implement this if needed or remove this from enum */
break;
default:
BUG();
return 0;
}
return cas_io_iter_move(&data->iter, offset);
}
/*
*
*/
static uint64_t _cas_ctx_data_copy(ctx_data_t *dst, ctx_data_t *src,
uint64_t to, uint64_t from, uint64_t bytes)
{
struct blk_data *src_data = src, *dst_data = dst;
return cas_data_cpy(dst_data->vec, dst_data->size, src_data->vec,
src_data->size, to, from, bytes);
}
static int _cas_ctx_cleaner_init(ocf_cleaner_t c)
{
return cas_create_cleaner_thread(c);
}
static void _cas_ctx_cleaner_stop(ocf_cleaner_t c)
{
return cas_stop_cleaner_thread(c);
}
static int _cas_ctx_metadata_updater_init(ocf_metadata_updater_t mu)
{
return cas_create_metadata_updater_thread(mu);
}
static void _cas_ctx_metadata_updater_kick(ocf_metadata_updater_t mu)
{
return cas_kick_metadata_updater_thread(mu);
}
static void _cas_ctx_metadata_updater_stop(ocf_metadata_updater_t mu)
{
return cas_stop_metadata_updater_thread(mu);
}
/*
*
*/
static int _cas_ctx_logger_printf(ocf_logger_t logger, ocf_logger_lvl_t lvl,
const char *fmt, va_list args)
{
static const char* level[] = {
[log_emerg] = KERN_EMERG,
[log_alert] = KERN_ALERT,
[log_crit] = KERN_CRIT,
[log_err] = KERN_ERR,
[log_warn] = KERN_WARNING,
[log_notice] = KERN_NOTICE,
[log_info] = KERN_INFO,
[log_debug] = KERN_DEBUG,
};
char *format;
if (((unsigned)lvl) >= sizeof(level))
return -EINVAL;
format = kasprintf(GFP_ATOMIC, "%s%s", level[lvl], fmt);
if (!format)
return -ENOMEM;
vprintk(format, args);
kfree(format);
return 0;
}
/*
*
*/
static int _cas_ctx_logger_printf_rl(ocf_logger_t logger, const char *func_name)
{
static DEFINE_RATELIMIT_STATE(cas_log_rl, CAS_LOG_RATELIMIT,
CAS_LOG_BURST_LIMIT);
if (!func_name)
return -EINVAL;
return CAS_RATELIMIT(&cas_log_rl, func_name);
}
/*
*
*/
static int _cas_ctx_logger_dump_stack(ocf_logger_t logger)
{
dump_stack();
return 0;
}
static const struct ocf_ctx_config ctx_cfg = {
.name = "CAS Linux Kernel",
.ops = {
.data = {
.alloc = cas_ctx_data_alloc,
.free = cas_ctx_data_free,
.mlock = _cas_ctx_data_mlock,
.munlock = _cas_ctx_data_munlock,
.read = _cas_ctx_read_data,
.write = _cas_ctx_write_data,
.zero = _cas_ctx_zero_data,
.seek = _cas_ctx_seek_data,
.copy = _cas_ctx_data_copy,
.secure_erase = cas_ctx_data_secure_erase,
},
.cleaner = {
.init = _cas_ctx_cleaner_init,
.stop = _cas_ctx_cleaner_stop,
},
.metadata_updater = {
.init = _cas_ctx_metadata_updater_init,
.kick = _cas_ctx_metadata_updater_kick,
.stop = _cas_ctx_metadata_updater_stop,
},
.logger = {
.printf = _cas_ctx_logger_printf,
.printf_rl = _cas_ctx_logger_printf_rl,
.dump_stack = _cas_ctx_logger_dump_stack,
},
},
};
/* *** CONTEXT INITIALIZATION *** */
int cas_initialize_context(void)
{
struct blk_data data;
int ret;
ret = ocf_ctx_init(&cas_ctx, &ctx_cfg);
if (ret < 0)
return ret;
cas_bvec_pool = ocf_mpool_create(NULL, sizeof(data),
sizeof(data.vec[0]), GFP_NOIO, 7, "cas_biovec");
if (!cas_bvec_pool) {
printk(KERN_ERR "Cannot create BIO vector memory pool\n");
ret = -ENOMEM;
goto err_ctx;
}
cas_bvec_pages_rpool = cas_rpool_create(CAS_ALLOC_PAGE_LIMIT,
NULL, PAGE_SIZE, _cas_alloc_page_rpool,
_cas_free_page_rpool, NULL);
if (!cas_bvec_pages_rpool) {
printk(KERN_ERR "Cannot create reserve pool for "
"BIO vector memory pool\n");
ret = -ENOMEM;
goto err_mpool;
}
cas_garbage_collector_init();
ret = block_dev_init();
if (ret) {
printk(KERN_ERR "Cannot initialize block device layer\n");
goto err_rpool;
}
ret = atomic_dev_init();
if (ret) {
printk(KERN_ERR "Cannot initialize atomic device layer\n");
goto err_block_dev;
}
ocf_mngt_core_pool_init(cas_ctx);
return 0;
err_block_dev:
block_dev_deinit();
err_rpool:
cas_rpool_destroy(cas_bvec_pages_rpool, _cas_free_page_rpool, NULL);
err_mpool:
ocf_mpool_destroy(cas_bvec_pool);
err_ctx:
ocf_ctx_exit(cas_ctx);
return ret;
}
int cas_cleanup_context(void)
{
ocf_mngt_core_pool_deinit(cas_ctx);
block_dev_deinit();
atomic_dev_deinit();
cas_garbage_collector_deinit();
ocf_mpool_destroy(cas_bvec_pool);
cas_rpool_destroy(cas_bvec_pages_rpool, _cas_free_page_rpool, NULL);
return ocf_ctx_exit(cas_ctx);
}
/* *** CONTEXT DATA HELPER FUNCTION *** */
/*
*
*/
struct blk_data *cas_alloc_blk_data(uint32_t size, gfp_t flags)
{
struct blk_data *data = ocf_mpool_new_f(cas_bvec_pool, size, flags);
if (data)
data->size = size;
return data;
}
/*
*
*/
void cas_free_blk_data(struct blk_data *data)
{
if (!data)
return;
ocf_mpool_del(cas_bvec_pool, data, data->size);
}

View File

@@ -0,0 +1,79 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __CONTEXT_H__
#define __CONTEXT_H__
#include "linux_kernel_version.h"
struct bio_vec_iter {
struct bio_vec *vec;
uint32_t vec_size;
uint32_t idx;
uint32_t offset;
uint32_t len;
struct bio_vec *ivec;
};
struct blk_data {
/**
* @brief Atomic counter for core device
*/
atomic_t master_remaining;
/**
* @brief Core device request context (core private info)
*/
void *master_io_req;
/**
* @brief CAS IO with which data is associated
*/
struct ocf_io *io;
/**
* @brief List item used for IO splitting
*/
struct list_head list;
/**
* @brief Timestamp of start processing request
*/
unsigned long long start_time;
/**
* @brief Request data siz
*/
uint32_t size;
/**
* @brief This filed indicates an error for request
*/
int error;
/**
* @brief Iterator for accessing data
*/
struct bio_vec_iter iter;
/**
* @brief Request data
*/
struct bio_vec vec[];
};
struct blk_data *cas_alloc_blk_data(uint32_t size, gfp_t flags);
void cas_free_blk_data(struct blk_data *data);
ctx_data_t *cas_ctx_data_alloc(uint32_t pages);
ctx_data_t *cas_ctx_data_zalloc(uint32_t pages);
void cas_ctx_data_free(ctx_data_t *ctx_data);
void cas_ctx_data_secure_erase(ctx_data_t *ctx_data);
int cas_initialize_context(void);
int cas_cleanup_context(void);
#endif /* __CONTEXT_H__ */

View File

@@ -0,0 +1,80 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#include <linux/cdev.h>
#include <linux/fs.h>
#include "linux_kernel_version.h"
#include "service_ui_ioctl.h"
#include "control.h"
#include "cas_cache/cas_cache.h"
struct cas_ctrl_device {
struct cdev cdev;
struct class *class;
dev_t dev;
};
static struct cas_ctrl_device _control_device;
static const struct file_operations _ctrl_dev_fops = {
.owner = THIS_MODULE,
.unlocked_ioctl = cas_service_ioctl_ctrl
};
int __init cas_ctrl_device_init(void)
{
struct cas_ctrl_device *ctrl = &_control_device;
struct device *device;
int result = 0;
result = alloc_chrdev_region(&ctrl->dev, 0, 1, "cas");
if (result) {
printk(KERN_ERR "Cannot allocate control chrdev number.\n");
goto error_alloc_chrdev_region;
}
cdev_init(&ctrl->cdev, &_ctrl_dev_fops);
result = cdev_add(&ctrl->cdev, ctrl->dev, 1);
if (result) {
printk(KERN_ERR "Cannot add control chrdev.\n");
goto error_cdev_add;
}
ctrl->class = class_create(THIS_MODULE, "cas");
if (IS_ERR(ctrl->class)) {
printk(KERN_ERR "Cannot create control chrdev class.\n");
result = PTR_ERR(ctrl->class);
goto error_class_create;
}
device = device_create(ctrl->class, NULL, ctrl->dev, NULL,
"cas_ctrl");
if (IS_ERR(device)) {
printk(KERN_ERR "Cannot create control chrdev.\n");
result = PTR_ERR(device);
goto error_device_create;
}
return result;
error_device_create:
class_destroy(ctrl->class);
error_class_create:
cdev_del(&ctrl->cdev);
error_cdev_add:
unregister_chrdev_region(ctrl->dev, 1);
error_alloc_chrdev_region:
return result;
}
void __exit cas_ctrl_device_deinit(void)
{
struct cas_ctrl_device *ctrl = &_control_device;
device_destroy(ctrl->class, ctrl->dev);
class_destroy(ctrl->class);
cdev_del(&ctrl->cdev);
unregister_chrdev_region(ctrl->dev, 1);
}

View File

@@ -0,0 +1,11 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __CAS_CONTROL_H__
#define __CAS_CONTROL_H__
int __init cas_ctrl_device_init(void);
void __exit cas_ctrl_device_deinit(void);
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,92 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __LAYER_CACHE_MANAGEMENT_H__
#define __LAYER_CACHE_MANAGEMENT_H__
#define CAS_BLK_DEV_REQ_TYPE_BIO 1
#define CAS_BLK_DEV_REQ_TYPE_REQ 3
int cache_mng_set_cleaning_policy(ocf_cache_id_t cache_id, uint32_t type);
int cache_mng_get_cleaning_policy(ocf_cache_id_t cache_id, uint32_t *type);
int cache_mng_set_cleaning_param(ocf_cache_id_t cache_id, ocf_cleaning_t type,
uint32_t param_id, uint32_t param_value);
int cache_mng_get_cleaning_param(ocf_cache_id_t cache_id, ocf_cleaning_t type,
uint32_t param_id, uint32_t *param_value);
int cache_mng_add_core_to_cache(struct ocf_mngt_core_config *cfg,
struct kcas_insert_core *cmd_info);
int cache_mng_remove_core_from_cache(struct kcas_remove_core *cmd);
int cache_mng_reset_core_stats(ocf_cache_id_t cache_id,
ocf_core_id_t core_id);
int cache_mng_set_partitions(struct kcas_io_classes *cfg);
int cache_mng_exit_instance(ocf_cache_id_t id, int flush);
int cache_mng_prepare_cache_cfg(struct ocf_mngt_cache_config *cfg,
struct ocf_mngt_cache_device_config *device_cfg,
struct kcas_start_cache *cmd);
int cache_mng_core_pool_get_paths(struct kcas_core_pool_path *cmd_info);
int cache_mng_core_pool_remove(struct kcas_core_pool_remove *cmd_info);
int cache_mng_cache_check_device(struct kcas_cache_check_device *cmd_info);
int cache_mng_prepare_core_cfg(struct ocf_mngt_core_config *cfg,
struct kcas_insert_core *cmd_info);
int cache_mng_init_instance(struct ocf_mngt_cache_config *cfg,
struct ocf_mngt_cache_device_config *device_cfg,
struct kcas_start_cache *cmd);
int cache_mng_set_seq_cutoff_threshold(ocf_cache_id_t id, ocf_core_id_t core_id,
uint32_t thresh);
int cache_mng_set_seq_cutoff_policy(ocf_cache_id_t id, ocf_core_id_t core_id,
ocf_seq_cutoff_policy policy);
int cache_mng_get_seq_cutoff_threshold(ocf_cache_id_t id, ocf_core_id_t core_id,
uint32_t *thresh);
int cache_mng_get_seq_cutoff_policy(ocf_cache_id_t id, ocf_core_id_t core_id,
ocf_seq_cutoff_policy *policy);
int cache_mng_set_cache_mode(ocf_cache_id_t id, ocf_cache_mode_t mode,
uint8_t flush);
int cache_mng_flush_object(ocf_cache_id_t cache_id, ocf_core_id_t core_id);
int cache_mng_flush_device(ocf_cache_id_t id);
ocf_cache_line_t cache_mng_lookup(ocf_cache_t cache,
ocf_core_id_t core_id, uint64_t core_cacheline);
int cache_mng_list_caches(struct kcas_cache_list *list);
int cache_mng_interrupt_flushing(ocf_cache_id_t id);
int cache_mng_get_info(struct kcas_cache_info *info);
int cache_mng_get_io_class_info(struct kcas_io_class *part);
int cache_mng_get_core_info(struct kcas_core_info *info);
void cache_mng_wait_for_rq_finish(ocf_cache_t cache);
int cache_mng_set_core_params(struct kcas_set_core_param *info);
int cache_mng_get_core_params(struct kcas_get_core_param *info);
int cache_mng_set_cache_params(struct kcas_set_cache_param *info);
int cache_mng_get_cache_params(struct kcas_get_cache_param *info);
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,46 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __LAYER_UPGRADE_H
#define __LAYER_UPGRADE_H
#include "cas_cache/cas_cache.h"
extern bool in_upgrade;
/**
* @brief Check that CAS is in upgarde state
* @return true if is or false if isn't
*/
bool cas_upgrade_is_in_upgrade(void);
/**
* @brief Check that caches configuration is stored at casdsk
* @return 0 if exist
*/
int cas_upgrade_get_configuration(void);
/**
* @brief Start upgrade in flight procedure, dump configuration,
* switch caches to PT and close caches
* @return result
*/
int cas_upgrade(void);
/**
* @brief Finish upgrade in new CAS module - restore all caches
* @return result of restoring
*/
int cas_upgrade_finish(void);
/**
* @brief Try to parse configuration stored in casdisk
* @return result of verification
*/
int cas_upgrade_verify(void);
#endif /* __LAYER_UPGRADE_H */

View File

@@ -0,0 +1,624 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __LINUX_KERNEL_VERSION_H__
#define __LINUX_KERNEL_VERSION_H__
/* Libraries. */
#include <linux/types.h>
#include <linux/module.h>
#include <linux/list.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/vmalloc.h>
#include <linux/uaccess.h>
#include <linux/kthread.h>
#include <linux/spinlock.h>
#include <linux/bio.h>
#include <linux/fs.h>
#include <linux/stat.h>
#include <linux/genhd.h>
#include <linux/blkdev.h>
#include <linux/version.h>
#include <linux/workqueue.h>
#include <linux/cpumask.h>
#include <linux/smp.h>
#include <linux/ioctl.h>
#include <linux/delay.h>
#include <linux/sort.h>
#include <linux/swap.h>
#include <linux/thread_info.h>
#include <asm-generic/ioctl.h>
#include <linux/bitops.h>
#include <linux/crc16.h>
#include <linux/crc32.h>
#include <linux/nmi.h>
#include <linux/ratelimit.h>
#ifdef CONFIG_SLAB
#include <linux/slab_def.h>
#endif
#if LINUX_VERSION_CODE > KERNEL_VERSION(3, 0, 0)
#include <generated/utsrelease.h>
#ifdef UTS_UBUNTU_RELEASE_ABI
#define CAS_UBUNTU
#endif
#endif
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32)
#error Unsupported Linux Kernel Version
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)
#define FILE_INODE(file) file->f_inode
#else
#define FILE_INODE(file) file->f_dentry->d_inode
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 76)
#define DENTRY_ALIAS_HEAD(dentry) (dentry)->d_u.d_alias
#define ALIAS_NODE_TO_DENTRY(alias) container_of(alias, struct dentry, d_u.d_alias)
#else
#define DENTRY_ALIAS_HEAD(dentry) (dentry)->d_alias
#define ALIAS_NODE_TO_DENTRY(alias) container_of(alias, struct dentry, d_alias)
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0)
#define ALIAS_NODE_TYPE struct hlist_node
#define DENTRY_LIST_EMPTY(head) hlist_empty(head)
#define INODE_FOR_EACH_DENTRY(pos, head) hlist_for_each(pos, head)
#else
#define DENTRY_LIST_EMPTY(head) list_empty(head)
#define ALIAS_NODE_TYPE struct list_head
#define INODE_FOR_EACH_DENTRY(pos, head) list_for_each(pos, head)
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0)
#define BIO_OP_STATUS(bio) bio->bi_status
#else
#define BIO_OP_STATUS(bio) bio->bi_error
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0)
#define BIO_ENDIO(BIO, BYTES_DONE, ERROR) \
({ BIO_OP_STATUS(BIO) = ERROR; bio_endio(BIO); })
#else
#define BIO_ENDIO(BIO, BYTES_DONE, ERROR) bio_endio(BIO, ERROR)
#endif
#define REFER_BLOCK_CALLBACK(name) name##_callback
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
#define DECLARE_BLOCK_CALLBACK(name, BIO, BYTES_DONE, ERROR) \
void name##_callback(BIO, ERROR)
#define BLOCK_CALLBACK_INIT(BIO) {; }
#define BLOCK_CALLBACK_RETURN() { return; }
#define BLOCK_CALLBACK_ERROR(BIO, ERROR) ERROR
#else
#define DECLARE_BLOCK_CALLBACK(name, BIO, BYTES_DONE, ERROR) \
void name##_callback(BIO)
#define BLOCK_CALLBACK_INIT(BIO) {; }
#define BLOCK_CALLBACK_RETURN() { return; }
#define BLOCK_CALLBACK_ERROR(BIO, ERROR) BIO_OP_STATUS(BIO)
#endif
#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 37)
#define OPEN_BDEV_EXCLUSIVE(PATH, FMODE, HOLDER) \
blkdev_get_by_path(PATH, (FMODE_EXCL | FMODE), HOLDER)
#define CLOSE_BDEV_EXCLUSIVE(BDEV, FMODE) \
blkdev_put(BDEV, (FMODE_EXCL | FMODE))
#else
#define OPEN_BDEV_EXCLUSIVE(PATH, FMODE, HOLDER) \
open_bdev_exclusive(PATH, FMODE, HOLDER)
#define CLOSE_BDEV_EXCLUSIVE(BDEV, FMODE) \
close_bdev_exclusive(BDEV, FMODE)
#endif
#ifdef CAS_UBUNTU
#define LOOKUP_BDEV(PATH) lookup_bdev(PATH, 0)
#else
#define LOOKUP_BDEV(PATH) lookup_bdev(PATH)
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0) || defined CAS_SLES12SP3
#define BIO_OP_FLAGS_FORMAT "0x%016X"
#define BIO_OP_FLAGS(bio) (bio)->bi_opf
#else
#define BIO_OP_FLAGS_FORMAT "0x%016lX"
#define BIO_OP_FLAGS(bio) (bio)->bi_rw
#endif
#if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 32)
#define BIO_RW_FLAGS ((1U << BIO_RW_UNPLUG) | \
(1U << BIO_RW_NOIDLE) | (1U << BIO_RW_SYNCIO))
#define BIO_SET_RW_FLAGS(bio) BIO_OP_FLAGS((bio)) |= BIO_RW_FLAGS
#else
#define BIO_RW_FLAGS 0
#define BIO_SET_RW_FLAGS(bio)
#endif
#if defined RQF_SOFTBARRIER
#define CHECK_BARRIER(bio) ((BIO_OP_FLAGS(bio) & RQF_SOFTBARRIER) != 0)
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 0, 1)
#define CHECK_BARRIER(bio) ((BIO_OP_FLAGS(bio) & REQ_SOFTBARRIER) != 0)
#else
#define CHECK_BARRIER(bio) (bio_rw_flagged((bio), BIO_RW_BARRIER))
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0) || defined CAS_SLES12SP3
#define RQ_DATA_DIR(rq) rq_data_dir(rq)
#define RQ_DATA_DIR_WR WRITE
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 34)
#define RQ_DATA_DIR(rq) rq_data_dir(rq)
#define RQ_DATA_DIR_WR REQ_WRITE
#else
#define RQ_DATA_DIR(rq) rq_data_dir(rq)
#define RQ_DATA_DIR_WR WRITE
#endif
#if defined REQ_PREFLUSH
#define CAS_REQ_FLUSH REQ_PREFLUSH
#define CAS_FLUSH_SUPPORTED
#elif defined REQ_FLUSH
#define CAS_REQ_FLUSH REQ_FLUSH
#define CAS_FLUSH_SUPPORTED
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0) || defined CAS_SLES12SP3
#define CHECK_QUEUE_FLUSH(q) test_bit(QUEUE_FLAG_WC, &(q)->queue_flags)
#define CHECK_QUEUE_FUA(q) test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags)
static inline void cas_set_queue_flush_fua(struct request_queue *q,
bool flush, bool fua)
{
blk_queue_write_cache(q, flush, fua);
}
#else
#define CHECK_QUEUE_FLUSH(q) ((q)->flush_flags & CAS_REQ_FLUSH)
#define CHECK_QUEUE_FUA(q) ((q)->flush_flags & REQ_FUA)
static inline void cas_set_queue_flush_fua(struct request_queue *q,
bool flush, bool fua)
{
unsigned int flags = 0;
if (flush)
flags |= CAS_REQ_FLUSH;
if (fua)
flags |= REQ_FUA;
if (flags)
blk_queue_flush(q, flags);
}
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 32)
#ifdef WRITE_FLUSH
#define RQ_IS_FLUSH(rq) ((rq)->cmd_flags & CAS_REQ_FLUSH)
#ifdef BIO_FLUSH
#define CAS_IS_WRITE_FLUSH(flags) ((flags) & BIO_FLUSH)
#else
#define CAS_IS_WRITE_FLUSH(flags) \
((flags) & CAS_REQ_FLUSH)
#endif
#define OCF_WRITE_FLUSH WRITE_FLUSH
#elif defined REQ_PREFLUSH
#define RQ_IS_FLUSH(rq) ((rq)->cmd_flags & REQ_PREFLUSH)
#define OCF_WRITE_FLUSH (REQ_OP_WRITE | REQ_PREFLUSH)
#define CAS_IS_WRITE_FLUSH(flags) \
(OCF_WRITE_FLUSH == ((flags) & OCF_WRITE_FLUSH))
#else
#define RQ_IS_FLUSH(rq) 0
#define CAS_IS_WRITE_FLUSH(flags) \
(WRITE_BARRIER == ((flags) & WRITE_BARRIER))
#define OCF_WRITE_FLUSH WRITE_BARRIER
#endif /* #ifdef WRITE_FLUSH */
#ifdef WRITE_FLUSH_FUA
#define OCF_WRITE_FLUSH_FUA WRITE_FLUSH_FUA
#ifdef BIO_FUA
#define CAS_IS_WRITE_FLUSH_FUA(flags) \
((BIO_FUA | BIO_FLUSH) == \
((flags) & (BIO_FUA | BIO_FLUSH)))
#else
#define CAS_IS_WRITE_FLUSH_FUA(flags) \
((REQ_FUA | CAS_REQ_FLUSH) == \
((flags) & (REQ_FUA | CAS_REQ_FLUSH)))
#endif
#elif defined REQ_PREFLUSH
#define CAS_IS_WRITE_FLUSH_FUA(flags) \
((REQ_PREFLUSH | REQ_FUA) == \
((flags) & (REQ_PREFLUSH |REQ_FUA)))
#define OCF_WRITE_FLUSH_FUA (REQ_PREFLUSH | REQ_FUA)
#else
#define CAS_IS_WRITE_FLUSH_FUA(flags) 0
#define OCF_WRITE_FLUSH_FUA WRITE_BARRIER
#endif /* #ifdef WRITE_FLUSH_FUA */
#ifdef WRITE_FUA
#ifdef BIO_FUA
#define CAS_IS_WRITE_FUA(flags) ((flags) & BIO_FUA)
#else
#define CAS_IS_WRITE_FUA(flags) ((flags) & REQ_FUA)
#endif
#define OCF_WRITE_FUA WRITE_FUA
#elif defined REQ_FUA
#define CAS_IS_WRITE_FUA(flags) ((flags) & REQ_FUA)
#define OCF_WRITE_FUA REQ_FUA
#else
#define CAS_IS_WRITE_FUA(flags) 0
#define OCF_WRITE_FUA WRITE_BARRIER
#endif /* #ifdef WRITE_FUA */
#endif /* #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 32) */
#if LINUX_VERSION_CODE <= KERNEL_VERSION(3, 7, 9)
#define DAEMONIZE(name, arg...) daemonize(name, ##arg)
#else
#define DAEMONIZE(name, arg...) do { } while (0)
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
#define SET_QUEUE_CHUNK_SECTORS(queue, chunk_size) \
queue->limits.chunk_sectors = chunk_size;
#else
#define SET_QUEUE_CHUNK_SECTORS(queue, chunk_size) {; }
#endif
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0)
#define BIO_BISIZE(bio) bio->bi_size
#define BIO_BIIDX(bio) bio->bi_idx
#define BIO_BISECTOR(bio) bio->bi_sector
#else
#define BIO_BISIZE(bio) bio->bi_iter.bi_size
#define BIO_BISECTOR(bio) bio->bi_iter.bi_sector
#define BIO_BIIDX(bio) bio->bi_iter.bi_idx
#endif
#ifdef CAS_SLES12SP3
#define CAS_IS_DISCARD(bio) \
(((BIO_OP_FLAGS(bio)) & REQ_OP_MASK) == REQ_OP_DISCARD)
#define CAS_BIO_DISCARD \
((REQ_OP_WRITE | REQ_OP_DISCARD))
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
#define CAS_IS_DISCARD(bio) \
(bio_op(bio) == REQ_OP_DISCARD)
#define CAS_BIO_DISCARD \
(REQ_OP_DISCARD)
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)
#define CAS_IS_DISCARD(bio) \
((BIO_OP_FLAGS(bio)) & REQ_OP_DISCARD)
#define CAS_BIO_DISCARD \
((REQ_OP_WRITE | REQ_OP_DISCARD))
#elif LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32)
#define CAS_IS_DISCARD(bio) ((BIO_OP_FLAGS(bio)) & REQ_DISCARD)
#define CAS_BIO_DISCARD (REQ_WRITE | REQ_DISCARD)
#else
#define CAS_IS_DISCARD(bio) ((BIO_OP_FLAGS(bio)) & (1 << BIO_RW_DISCARD))
#define CAS_BIO_DISCARD ((1 << BIO_RW) | (1 << BIO_RW_DISCARD))
#endif
#include <linux/mm.h>
#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32)
#include <uapi/asm-generic/mman-common.h>
static inline unsigned long cas_vm_mmap(struct file *file,
unsigned long addr, unsigned long len)
{
return vm_mmap(file, addr, len, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, 0);
}
static inline int cas_vm_munmap(unsigned long start, size_t len)
{
return vm_munmap(start, len);
}
#else
#include <asm-generic/mman-common.h>
static inline unsigned long cas_vm_mmap(struct file *file,
unsigned long addr, unsigned long len)
{
return do_mmap_pgoff(file, addr, len, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, 0);
}
static inline int cas_vm_munmap(unsigned long start, size_t len)
{
return do_munmap(current->mm, start, len);
}
#endif
/*
* For 8KB process kernel stack check if request is not continous and
* submit each bio as separate request. This prevent nvme driver from
* splitting requests.
* For large requests, nvme splitting causes stack overrun.
*/
#if THREAD_SIZE <= 8192
#define RQ_CHECK_CONTINOUS
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 0)
#define SEGMENT_BVEC(vec) (&(vec))
#else
#define SEGMENT_BVEC(vec) (vec)
#endif
#ifndef SHRT_MIN
#define SHRT_MIN ((s16)-32768)
#endif
#ifndef SHRT_MAX
#define SHRT_MAX ((s16)32767)
#endif
#define ENOTSUP ENOTSUPP
#ifdef RHEL_RELEASE_VERSION
#if RHEL_RELEASE_CODE == RHEL_RELEASE_VERSION(7, 3)
#define CAS_RHEL_73
#endif
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0) || defined CAS_SLES12SP3
static inline blk_qc_t cas_submit_bio(int rw, struct bio *bio)
{
BIO_OP_FLAGS(bio) |= rw;
return submit_bio(bio);
}
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)
static inline blk_qc_t cas_submit_bio(int rw, struct bio *bio)
{
return submit_bio(rw, bio);
}
#else
static inline void cas_submit_bio(int rw, struct bio *bio)
{
submit_bio(rw, bio);
}
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
#define cas_blk_rq_set_block_pc(rq) {}
#else
#define cas_blk_rq_set_block_pc(rq) blk_rq_set_block_pc(rq)
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0)
#define cas_blk_queue_bounce(q, bounce_bio) ({})
#else
#define cas_blk_queue_bounce(q, bounce_bio) blk_queue_bounce(q, bounce_bio)
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 14, 11)
#define cas_blk_rq_append_bio(rq, bounce_bio) blk_rq_append_bio(rq, &bounce_bio)
#else
#define cas_blk_rq_append_bio(rq, bounce_bio) blk_rq_append_bio(rq, bounce_bio)
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0) || defined CAS_SLES12SP3
static inline struct request *cas_blk_make_request(struct request_queue *q,
struct bio *bio, gfp_t gfp_mask)
{
struct request *rq = blk_get_request(q, bio_data_dir(bio), gfp_mask);
if (IS_ERR(rq))
return rq;
cas_blk_rq_set_block_pc(rq);
rq->q = q;
for_each_bio(bio) {
struct bio *bounce_bio = bio;
int ret;
cas_blk_queue_bounce(q, &bounce_bio);
ret = cas_blk_rq_append_bio(rq, bounce_bio);
if (unlikely(ret)) {
blk_put_request(rq);
return ERR_PTR(ret);
}
}
return rq;
}
#else
static inline struct request *cas_blk_make_request(struct request_queue *q,
struct bio *bio, gfp_t gfp_mask)
{
return blk_make_request(q, bio, gfp_mask);
}
#endif
#ifdef CAS_RHEL_73
static inline void cas_copy_queue_limits(struct request_queue *exp_q,
struct request_queue *cache_q, struct request_queue *core_q)
{
struct queue_limits_aux *l_aux = exp_q->limits.limits_aux;
exp_q->limits = cache_q->limits;
exp_q->limits.limits_aux = l_aux;
if (exp_q->limits.limits_aux && cache_q->limits.limits_aux)
*exp_q->limits.limits_aux = *cache_q->limits.limits_aux;
exp_q->limits.max_sectors = core_q->limits.max_sectors;
exp_q->limits.max_hw_sectors = core_q->limits.max_hw_sectors;
exp_q->limits.max_segments = core_q->limits.max_segments;
exp_q->limits.max_write_same_sectors = 0;
/*
* Workaround for RHEL/CentOS 7.3 bug in kernel.
* Merging implementation on blk-mq does not respec virt boundary
* restriction and front merges bios with non-zero offsets.
* This leads to request with gaps between bios and in consequence
* triggers BUG_ON() in nvme driver or silently corrupts data.
* To prevent this, disable merging on cache queue if there are
* requirements regarding virt boundary (marking bios with REQ_NOMERGE
* does not solve this problem).
*/
if (queue_virt_boundary(cache_q))
queue_flag_set(QUEUE_FLAG_NOMERGES, cache_q);
}
#else
static inline void cas_copy_queue_limits(struct request_queue *exp_q,
struct request_queue *cache_q, struct request_queue *core_q)
{
exp_q->limits = cache_q->limits;
exp_q->limits.max_sectors = core_q->limits.max_sectors;
exp_q->limits.max_hw_sectors = core_q->limits.max_hw_sectors;
exp_q->limits.max_segments = core_q->limits.max_segments;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) || defined CAS_SLES12SP3
exp_q->limits.max_write_same_sectors = 0;
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) || defined CAS_SLES12SP3
exp_q->limits.max_write_zeroes_sectors = 0;
#endif
}
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,8,0)
#define CAS_GARBAGE_COLLECTOR
#endif
/* rate-limited printk */
#define CAS_PRINT_RL(...) \
if (printk_ratelimit()) \
printk(__VA_ARGS__)
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 19, 0)
static inline void cas_generic_start_io_acct(struct request_queue *q,
int rw, unsigned long sectors, struct hd_struct *part)
{
int cpu = part_stat_lock();
part_round_stats(cpu, part);
part_stat_inc(cpu, part, ios[rw]);
part_stat_add(cpu, part, sectors[rw], sectors);
part_inc_in_flight(part, rw);
part_stat_unlock();
}
static inline void cas_generic_end_io_acct(struct request_queue *q,
int rw, struct hd_struct *part, unsigned long start_time)
{
unsigned long duration = jiffies - start_time;
int cpu = part_stat_lock();
part_stat_add(cpu, part, ticks[rw], duration);
part_round_stats(cpu, part);
part_dec_in_flight(part, rw);
part_stat_unlock();
}
#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0)
static inline void cas_generic_start_io_acct(struct request_queue *q,
int rw, unsigned long sectors, struct hd_struct *part)
{
generic_start_io_acct(rw, sectors, part);
}
static inline void cas_generic_end_io_acct(struct request_queue *q,
int rw, struct hd_struct *part, unsigned long start_time)
{
generic_end_io_acct(rw, part, start_time);
}
#else
static inline void cas_generic_start_io_acct(struct request_queue *q,
int rw, unsigned long sectors, struct hd_struct *part)
{
generic_start_io_acct(q, rw, sectors, part);
}
static inline void cas_generic_end_io_acct(struct request_queue *q,
int rw, struct hd_struct *part, unsigned long start_time)
{
generic_end_io_acct(q, rw, part, start_time);
}
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 14, 0)
static inline unsigned long cas_global_zone_page_state(enum zone_stat_item item)
{
return global_zone_page_state(item);
}
#define CAS_BIO_SET_DEV(bio, bdev) bio_set_dev(bio, bdev)
#define CAS_BIO_GET_DEV(bio) bio->bi_disk
#else
static inline unsigned long cas_global_zone_page_state(enum zone_stat_item item)
{
return global_page_state(item);
}
#define CAS_BIO_SET_DEV(bio, bdev) bio->bi_bdev = bdev
#define CAS_BIO_GET_DEV(bio) bio->bi_bdev->bd_disk
#endif
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 33)
#define CAS_RATELIMIT(state, func_name) __ratelimit(state)
#else
#define CAS_RATELIMIT(state, func_name) ___ratelimit(state, func_name)
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 19, 0)
static inline struct bio *cas_bio_clone(struct bio *bio, gfp_t gfp_mask)
{
return bio_clone_fast(bio, gfp_mask, NULL);
}
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0)
static inline struct bio *cas_bio_clone(struct bio *bio, gfp_t gfp_mask)
{
return bio_clone_kmalloc(bio, gfp_mask);
}
#define CAS_BLK_STATUS_T blk_status_t
#else
static inline struct bio *cas_bio_clone(struct bio *bio, gfp_t gfp_mask)
{
return bio_clone(bio, gfp_mask);
}
#define CAS_BLK_STATUS_T int
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
static inline int is_rq_type_fs(struct request *rq)
{
switch (req_op(rq)){
case REQ_OP_READ:
case REQ_OP_WRITE:
case REQ_OP_FLUSH:
case REQ_OP_DISCARD:
return true;
default:
return false;
}
}
#else
static inline int is_rq_type_fs(struct request *rq)
{
return rq->cmd_type == REQ_TYPE_FS;
}
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 12, 0)
#define CAS_SET_DISCARD_ZEROES_DATA(queue_limits, val) ({})
#else
#define CAS_SET_DISCARD_ZEROES_DATA(queue_limits, val) \
queue_limits.discard_zeroes_data = val
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0)
#define cas_queue_flag_set_unlocked(flag, request_queue) \
blk_queue_flag_set(flag, request_queue)
#else
#define cas_queue_flag_set_unlocked(flag, request_queue) \
queue_flag_set_unlocked(flag, request_queue)
#endif
#endif /* #ifndef __LINUX_KERNEL_VERSION_H__ */

210
modules/cas_cache/main.c Normal file
View File

@@ -0,0 +1,210 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#include "cas_cache.h"
/* Layer information. */
MODULE_AUTHOR("Intel(R) Corporation");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_VERSION(CAS_VERSION);
u32 max_writeback_queue_size = 65536;
module_param(max_writeback_queue_size, uint, (S_IRUSR | S_IRGRP));
MODULE_PARM_DESC(max_writeback_queue_size,
"Max cache writeback queue size (65536)");
u32 writeback_queue_unblock_size = 60000;
module_param(writeback_queue_unblock_size, uint, (S_IRUSR | S_IRGRP));
MODULE_PARM_DESC(writeback_queue_unblock_size,
"Cache writeback queue size (60000) at which queue "
"is unblocked when blocked");
u32 dry_run;
module_param(dry_run, uint, (S_IRUSR | S_IRGRP));
MODULE_PARM_DESC(dry_run, "Perform dry run on module load");
u32 use_io_scheduler = 1;
module_param(use_io_scheduler, uint, (S_IRUSR | S_IRGRP));
MODULE_PARM_DESC(use_io_scheduler,
"Configure how IO shall be handled. "
"0 - in make request function, 1 - in request function");
u32 metadata_layout = ocf_metadata_layout_default;
module_param(metadata_layout, uint, (S_IRUSR | S_IRGRP));
MODULE_PARM_DESC(metadata_layout, "Metadata layout, 0 - striping, 1 - sequential");
u32 unaligned_io = 1;
module_param(unaligned_io, uint, (S_IRUSR | S_IRGRP));
MODULE_PARM_DESC(unaligned_io,
"Define how to handle I/O requests unaligned to 4 kiB, "
"0 - apply PT, 1 - handle by cache");
u32 seq_cut_off_mb = 1;
module_param(seq_cut_off_mb, uint, (S_IRUSR | S_IRGRP));
MODULE_PARM_DESC(seq_cut_off_mb,
"Sequential cut off threshold in MiB. 0 - disable");
/* globals */
bool in_upgrade;
ocf_ctx_t cas_ctx;
struct casdsk_functions_mapper casdisk_functions;
struct exported_symbol {
char *name;
unsigned long addr;
};
int static cas_find_symbol(void *data, const char *namebuf,
struct module *module, unsigned long kallsyms_addresses)
{
struct exported_symbol *sym = data;
if (strcmp(namebuf, sym->name) == 0)
sym->addr = kallsyms_addresses;
return 0;
}
#define cas_lookup_symbol(f) ({ \
struct exported_symbol sym = {#f, 0}; \
kallsyms_on_each_symbol(&cas_find_symbol, &sym); \
casdisk_functions.f = (void *)sym.addr; \
if (!casdisk_functions.f) \
return -EINVAL; \
})
int static cas_casdisk_lookup_funtions(void)
{
cas_lookup_symbol(casdsk_disk_dettach);
cas_lookup_symbol(casdsk_exp_obj_destroy);
cas_lookup_symbol(casdsk_exp_obj_create);
cas_lookup_symbol(casdsk_disk_get_queue);
cas_lookup_symbol(casdsk_store_config);
cas_lookup_symbol(casdsk_disk_get_blkdev);
cas_lookup_symbol(casdsk_exp_obj_get_queue);
cas_lookup_symbol(casdsk_get_version);
cas_lookup_symbol(casdsk_disk_close);
cas_lookup_symbol(casdsk_disk_claim);
cas_lookup_symbol(casdsk_exp_obj_unlock);
cas_lookup_symbol(casdsk_disk_set_pt);
cas_lookup_symbol(casdsk_get_stored_config);
cas_lookup_symbol(casdsk_disk_get_gendisk);
cas_lookup_symbol(casdsk_disk_attach);
cas_lookup_symbol(casdsk_disk_set_attached);
cas_lookup_symbol(casdsk_exp_obj_activate);
cas_lookup_symbol(casdsk_exp_obj_activated);
cas_lookup_symbol(casdsk_exp_obj_lock);
cas_lookup_symbol(casdsk_free_stored_config);
cas_lookup_symbol(casdsk_disk_open);
cas_lookup_symbol(casdsk_disk_clear_pt);
cas_lookup_symbol(casdsk_exp_obj_get_gendisk);
return 0;
}
static int __init cas_init_module(void)
{
int result = 0;
result = cas_casdisk_lookup_funtions();
if (result) {
printk(KERN_ERR OCF_PREFIX_SHORT
"Could not find inteldisk functions.\n");
return result;
}
if (casdisk_functions.casdsk_get_version() != CASDSK_IFACE_VERSION) {
printk(KERN_ERR OCF_PREFIX_SHORT
"Incompatible inteldisk module\n");
return -EINVAL;
}
if (!writeback_queue_unblock_size || !max_writeback_queue_size) {
printk(KERN_ERR OCF_PREFIX_SHORT
"Invalid module parameter.\n");
return -EINVAL;
}
if (writeback_queue_unblock_size >= max_writeback_queue_size) {
printk(KERN_ERR OCF_PREFIX_SHORT
"parameter writeback_queue_unblock_size"
" must be less than max_writeback_queue_size\n");
return -EINVAL;
}
if (metadata_layout >= ocf_metadata_layout_max) {
printk(KERN_ERR OCF_PREFIX_SHORT
"Invalid value for metadata_layout parameter\n");
return -EINVAL;
}
if (unaligned_io != 0 && unaligned_io != 1) {
printk(KERN_ERR OCF_PREFIX_SHORT
"Invalid value for unaligned_io parameter\n");
return -EINVAL;
}
if (use_io_scheduler != 0 && use_io_scheduler != 1) {
printk(KERN_ERR OCF_PREFIX_SHORT
"Invalid value for use_io_scheduler parameter\n");
return -EINVAL;
}
result = cas_initialize_context();
if (result) {
printk(KERN_ERR OCF_PREFIX_SHORT
"Cannot initialize cache library\n");
return result;
}
result = cas_upgrade_get_configuration();
if (-KCAS_ERR_NO_STORED_CONF == result) {
printk(KERN_INFO OCF_PREFIX_SHORT
"Not found configuration for upgrade. "
"Standard module initialization.\n");
} else {
if (!dry_run) {
result = cas_upgrade_finish();
if (result) {
printk(KERN_ERR OCF_PREFIX_SHORT
"Error during finish upgrade, "
"result: %d\n", result);
goto error_cas_ctx_init;
}
} else {
result = cas_upgrade_verify();
if (result) {
printk(KERN_ERR OCF_PREFIX_SHORT
"Error during upgrade "
"verification\n");
goto error_cas_ctx_init;
}
}
}
result = cas_ctrl_device_init();
if (result) {
printk(KERN_ERR OCF_PREFIX_SHORT
"Cannot initialize control device\n");
goto error_cas_ctx_init;
}
printk(KERN_INFO "%s Version %s (%s)::Module loaded successfully\n",
OCF_PREFIX_LONG, CAS_VERSION, CAS_KERNEL);
return 0;
error_cas_ctx_init:
cas_cleanup_context();
return result;
}
module_init(cas_init_module);
static void __exit cas_exit_module(void)
{
cas_ctrl_device_deinit();
cas_cleanup_context();
}
module_exit(cas_exit_module);

284
modules/cas_cache/ocf_env.c Normal file
View File

@@ -0,0 +1,284 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#include "cas_cache.h"
#include "utils/utils_rpool.h"
/* *** ALLOCATOR *** */
#define CAS_ALLOC_ALLOCATOR_LIMIT 256
struct _env_allocator {
/*!< Memory pool ID unique name */
char *name;
/*!< Size of specific item of memory pool */
uint32_t item_size;
/*!< OS handle to memory pool */
struct kmem_cache *kmem_cache;
/*!< Number of currently allocated items in pool */
atomic_t count;
struct cas_reserve_pool *rpool;
};
static inline size_t env_allocator_align(size_t size)
{
if (size <= 2)
return size;
return (1ULL << 32) >> __builtin_clz(size - 1);
}
struct _env_allocator_item {
uint32_t flags;
uint32_t cpu;
char data[];
};
void *env_allocator_new(env_allocator *allocator)
{
struct _env_allocator_item *item = NULL;
int cpu;
item = cas_rpool_try_get(allocator->rpool, &cpu);
if (item) {
memset(item->data, 0, allocator->item_size -
sizeof(struct _env_allocator_item));
} else {
item = kmem_cache_zalloc(allocator->kmem_cache, GFP_ATOMIC);
}
if (item) {
item->cpu = cpu;
atomic_inc(&allocator->count);
return &item->data;
} else {
return NULL;
}
}
void *env_allocator_new_rpool(void *allocator_ctx, int cpu)
{
env_allocator *allocator = (env_allocator*) allocator_ctx;
struct _env_allocator_item *item;
item = kmem_cache_zalloc(allocator->kmem_cache, GFP_NOIO |
__GFP_NORETRY);
if (item) {
item->flags = (GFP_NOIO | __GFP_NORETRY);
item->cpu = cpu;
}
return item;
}
void env_allocator_del_rpool(void *allocator_ctx, void *item)
{
env_allocator *allocator = (env_allocator* ) allocator_ctx;
kmem_cache_free(allocator->kmem_cache, item);
}
#define ENV_ALLOCATOR_NAME_MAX 128
env_allocator *env_allocator_create(uint32_t size, const char *name)
{
int error = -1;
bool retry = true;
env_allocator *allocator = kzalloc(sizeof(*allocator), GFP_KERNEL);
if (!allocator) {
error = __LINE__;
goto err;
}
if (size < CAS_RPOOL_MIN_SIZE_ITEM) {
printk(KERN_ERR "Can not create allocator."
" Item size is too small.");
ENV_WARN(true, OCF_PREFIX_SHORT" Can not create allocator."
" Item size is too small.\n");
error = __LINE__;
goto err;
}
allocator->item_size = size + sizeof(struct _env_allocator_item);
if (allocator->item_size > PAGE_SIZE) {
printk(KERN_WARNING "Creating allocator with item size"
" greater than 4096B");
ENV_WARN(true, OCF_PREFIX_SHORT" Creating allocator"
" with item size greater than 4096B\n");
}
allocator->name = kstrdup(name, ENV_MEM_NORMAL);
if (!allocator->name) {
error = __LINE__;
goto err;
}
/* Initialize kernel memory cache */
#ifdef CONFIG_SLAB
RETRY:
#else
(void)retry;
#endif
allocator->kmem_cache = kmem_cache_create(allocator->name,
allocator->item_size, 0, 0, NULL);
if (!allocator->kmem_cache) {
/* Can not setup kernel memory cache */
error = __LINE__;
goto err;
}
#ifdef CONFIG_SLAB
if ((allocator->item_size < PAGE_SIZE)
&& allocator->kmem_cache->gfporder) {
/* Goal is to have one page allocation */
if (retry) {
retry = false;
kmem_cache_destroy(allocator->kmem_cache);
allocator->kmem_cache = NULL;
allocator->item_size = env_allocator_align(allocator->item_size);
goto RETRY;
}
}
#endif
/* Initialize reserve pool handler per cpu */
allocator->rpool = cas_rpool_create(CAS_ALLOC_ALLOCATOR_LIMIT,
allocator->name, allocator->item_size, env_allocator_new_rpool,
env_allocator_del_rpool, allocator);
if (!allocator->rpool) {
error = __LINE__;
goto err;
}
return allocator;
err:
printk(KERN_ERR "Cannot create memory allocator, ERROR %d", error);
env_allocator_destroy(allocator);
return NULL;
}
void env_allocator_del(env_allocator *allocator, void *obj)
{
struct _env_allocator_item *item =
container_of(obj, struct _env_allocator_item, data);
atomic_dec(&allocator->count);
if (item->flags == (GFP_NOIO | __GFP_NORETRY) &&
!cas_rpool_try_put(allocator->rpool, item, item->cpu))
return;
kmem_cache_free(allocator->kmem_cache, item);
}
void env_allocator_destroy(env_allocator *allocator)
{
if (allocator) {
cas_rpool_destroy(allocator->rpool, env_allocator_del_rpool,
allocator);
allocator->rpool = NULL;
if (atomic_read(&allocator->count)) {
printk(KERN_CRIT "Not all object deallocated\n");
ENV_WARN(true, OCF_PREFIX_SHORT" Cleanup problem\n");
}
if (allocator->kmem_cache)
kmem_cache_destroy(allocator->kmem_cache);
kfree(allocator->name);
kfree(allocator);
}
}
uint32_t env_allocator_item_count(env_allocator *allocator)
{
return atomic_read(&allocator->count);
}
static int env_sort_is_aligned(const void *base, int align)
{
return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||
((unsigned long)base & (align - 1)) == 0;
}
static void env_sort_u32_swap(void *a, void *b, int size)
{
u32 t = *(u32 *)a;
*(u32 *)a = *(u32 *)b;
*(u32 *)b = t;
}
static void env_sort_u64_swap(void *a, void *b, int size)
{
u64 t = *(u64 *)a;
*(u64 *)a = *(u64 *)b;
*(u64 *)b = t;
}
static void env_sort_generic_swap(void *a, void *b, int size)
{
char t;
do {
t = *(char *)a;
*(char *)a++ = *(char *)b;
*(char *)b++ = t;
} while (--size > 0);
}
void env_sort(void *base, size_t num, size_t size,
int (*cmp_fn)(const void *, const void *),
void (*swap_fn)(void *, void *, int size))
{
/* pre-scale counters for performance */
int64_t i = (num/2 - 1) * size, n = num * size, c, r;
if (!swap_fn) {
if (size == 4 && env_sort_is_aligned(base, 4))
swap_fn = env_sort_u32_swap;
else if (size == 8 && env_sort_is_aligned(base, 8))
swap_fn = env_sort_u64_swap;
else
swap_fn = env_sort_generic_swap;
}
/* heapify */
for ( ; i >= 0; i -= size) {
for (r = i; r * 2 + size < n; r = c) {
c = r * 2 + size;
if (c < n - size &&
cmp_fn(base + c, base + c + size) < 0)
c += size;
if (cmp_fn(base + r, base + c) >= 0)
break;
swap_fn(base + r, base + c, size);
}
}
/* sort */
for (i = n - size; i > 0; i -= size) {
swap_fn(base, base + i, size);
for (r = 0; r * 2 + size < i; r = c) {
c = r * 2 + size;
if (c < i - size &&
cmp_fn(base + c, base + c + size) < 0)
c += size;
if (cmp_fn(base + r, base + c) >= 0)
break;
swap_fn(base + r, base + c, size);
}
}
}

584
modules/cas_cache/ocf_env.h Normal file
View File

@@ -0,0 +1,584 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __OCF_ENV_H__
#define __OCF_ENV_H__
#include "linux_kernel_version.h"
#include "utils/utils_gc.h"
#include "ocf/ocf_err.h"
/* linux sector 512-bytes */
#define ENV_SECTOR_SHIFT 9
/* *** MEMORY MANAGEMENT *** */
#define ENV_MEM_NORMAL GFP_KERNEL
#define ENV_MEM_NOIO GFP_NOIO
#define ENV_MEM_ATOMIC GFP_ATOMIC
static inline uint64_t env_get_free_memory(void)
{
return cas_global_zone_page_state(NR_FREE_PAGES) << PAGE_SHIFT;
}
static inline void *env_malloc(size_t size, int flags)
{
return kmalloc(size, flags);
}
static inline void *env_zalloc(size_t size, int flags)
{
return kzalloc(size, flags);
}
static inline void env_free(const void *ptr)
{
kfree(ptr);
}
static inline void *env_vmalloc(size_t size)
{
return vmalloc(size);
}
static inline void *env_vzalloc(size_t size)
{
return vzalloc(size);
}
static inline void env_vfree(const void *ptr)
{
cas_vfree(ptr);
}
/* *** ALLOCATOR *** */
typedef struct _env_allocator env_allocator;
env_allocator *env_allocator_create(uint32_t size, const char *name);
void env_allocator_destroy(env_allocator *allocator);
void *env_allocator_new(env_allocator *allocator);
void env_allocator_del(env_allocator *allocator, void *item);
uint32_t env_allocator_item_count(env_allocator *allocator);
/* *** MUTEX *** */
typedef struct mutex env_mutex;
static inline int env_mutex_init(env_mutex *mutex)
{
mutex_init(mutex);
return 0;
}
static inline void env_mutex_lock(env_mutex *mutex)
{
mutex_lock(mutex);
}
static inline int env_mutex_lock_interruptible(env_mutex *mutex)
{
return mutex_lock_interruptible(mutex) ? -OCF_ERR_INTR : 0;
}
static inline int env_mutex_trylock(env_mutex *mutex)
{
return mutex_trylock(mutex) ? 0 : -OCF_ERR_NO_LOCK;
}
static inline void env_mutex_unlock(env_mutex *mutex)
{
mutex_unlock(mutex);
}
static inline int env_mutex_is_locked(env_mutex *mutex)
{
return mutex_is_locked(mutex);
}
/* *** RECURSIVE MUTEX *** */
typedef struct {
struct mutex mutex;
atomic_t count;
struct task_struct *holder;
} env_rmutex;
static inline int env_rmutex_init(env_rmutex *rmutex)
{
mutex_init(&rmutex->mutex);
atomic_set(&rmutex->count, 0);
rmutex->holder = NULL;
return 0;
}
static inline void env_rmutex_lock(env_rmutex *rmutex)
{
if (current == rmutex->holder) {
atomic_inc(&rmutex->count);
return;
}
mutex_lock(&rmutex->mutex);
rmutex->holder = current;
atomic_inc(&rmutex->count);
}
static inline int env_rmutex_lock_interruptible(env_rmutex *rmutex)
{
int result = 0;
if (current == rmutex->holder) {
atomic_inc(&rmutex->count);
return 0;
}
result = mutex_lock_interruptible(&rmutex->mutex);
if (result) {
/* No lock */
return -OCF_ERR_INTR;
}
rmutex->holder = current;
atomic_inc(&rmutex->count);
return 0;
}
static inline int env_rmutex_trylock(env_rmutex *rmutex)
{
if (current == rmutex->holder) {
atomic_inc(&rmutex->count);
return 0;
}
if (mutex_trylock(&rmutex->mutex)) {
/* No lock */
return -OCF_ERR_NO_LOCK;
}
rmutex->holder = current;
atomic_inc(&rmutex->count);
return 0;
}
static inline void env_rmutex_unlock(env_rmutex *rmutex)
{
BUG_ON(current != rmutex->holder);
if (atomic_dec_return(&rmutex->count)) {
return;
}
rmutex->holder = NULL;
mutex_unlock(&rmutex->mutex);
}
static inline int env_rmutex_is_locked(env_rmutex *rmutex)
{
return mutex_is_locked(&rmutex->mutex);
}
/* *** RW SEMAPHORE *** */
typedef struct
{
struct rw_semaphore sem;
wait_queue_head_t wq;
} env_rwsem;
static inline int env_rwsem_init(env_rwsem *s)
{
init_rwsem(&s->sem);
init_waitqueue_head(&s->wq);
return 0;
}
static inline void env_rwsem_up_read(env_rwsem *s)
{
up_read(&s->sem);
wake_up_all(&s->wq);
}
static inline void env_rwsem_down_read(env_rwsem *s)
{
down_read(&s->sem);
}
static inline int env_rwsem_down_read_interruptible(env_rwsem *s)
{
return wait_event_interruptible(s->wq,
down_read_trylock(&s->sem)) ? -OCF_ERR_INTR : 0;
}
static inline int env_rwsem_down_read_trylock(env_rwsem *s)
{
return down_read_trylock(&s->sem) ? 0 : -OCF_ERR_NO_LOCK;
}
static inline void env_rwsem_up_write(env_rwsem *s)
{
up_write(&s->sem);
wake_up_all(&s->wq);
}
static inline void env_rwsem_down_write(env_rwsem *s)
{
down_write(&s->sem);
}
static inline int env_rwsem_down_write_interruptible(env_rwsem *s)
{
return wait_event_interruptible(s->wq,
down_write_trylock(&s->sem)) ? -OCF_ERR_INTR : 0;
}
static inline int env_rwsem_down_write_trylock(env_rwsem *s)
{
return down_write_trylock(&s->sem) ? 0 : -OCF_ERR_NO_LOCK;
}
static inline int env_rwsem_is_locked(env_rwsem *s)
{
return rwsem_is_locked(&s->sem);
}
/* *** COMPLETION *** */
typedef struct completion env_completion;
static inline void env_completion_init(env_completion *completion)
{
init_completion(completion);
}
static inline void env_completion_wait(env_completion *completion)
{
wait_for_completion(completion);
}
static inline void env_completion_complete(env_completion *completion)
{
complete(completion);
}
/* *** ATOMIC VARIABLES *** */
typedef atomic_t env_atomic;
typedef atomic64_t env_atomic64;
static inline int env_atomic_read(const env_atomic *a)
{
return atomic_read(a);
}
static inline void env_atomic_set(env_atomic *a, int i)
{
atomic_set(a, i);
}
static inline void env_atomic_add(int i, env_atomic *a)
{
atomic_add(i, a);
}
static inline void env_atomic_sub(int i, env_atomic *a)
{
atomic_sub(i, a);
}
static inline bool env_atomic_sub_and_test(int i, env_atomic *a)
{
return atomic_sub_and_test(i, a);
}
static inline void env_atomic_inc(env_atomic *a)
{
atomic_inc(a);
}
static inline void env_atomic_dec(env_atomic *a)
{
atomic_dec(a);
}
static inline bool env_atomic_dec_and_test(env_atomic *a)
{
return atomic_dec_and_test(a);
}
static inline bool env_atomic_inc_and_test(env_atomic *a)
{
return atomic_inc_and_test(a);
}
static inline int env_atomic_add_return(int i, env_atomic *a)
{
return atomic_add_return(i, a);
}
static inline int env_atomic_sub_return(int i, env_atomic *a)
{
return atomic_sub_return(i, a);
}
static inline int env_atomic_inc_return(env_atomic *a)
{
return atomic_inc_return(a);
}
static inline int env_atomic_dec_return(env_atomic *a)
{
return atomic_dec_return(a);
}
static inline int env_atomic_cmpxchg(env_atomic *a, int old, int new_value)
{
return atomic_cmpxchg(a, old, new_value);
}
static inline int env_atomic_add_unless(env_atomic *a, int i, int u)
{
return atomic_add_unless(a, i, u);
}
static inline u64 env_atomic64_read(const env_atomic64 *a)
{
return atomic64_read(a);
}
static inline void env_atomic64_set(env_atomic64 *a, u64 i)
{
atomic64_set(a, i);
}
static inline void env_atomic64_add(u64 i, env_atomic64 *a)
{
atomic64_add(i, a);
}
static inline void env_atomic64_sub(u64 i, env_atomic64 *a)
{
atomic64_sub(i, a);
}
static inline void env_atomic64_inc(env_atomic64 *a)
{
atomic64_inc(a);
}
static inline void env_atomic64_dec(env_atomic64 *a)
{
atomic64_dec(a);
}
static inline u64 env_atomic64_inc_return(env_atomic64 *a)
{
return atomic64_inc_return(a);
}
static inline u64 env_atomic64_cmpxchg(atomic64_t *a, u64 old, u64 new)
{
return atomic64_cmpxchg(a, old, new);
}
/* *** SPIN LOCKS *** */
typedef spinlock_t env_spinlock;
static inline void env_spinlock_init(env_spinlock *l)
{
spin_lock_init(l);
}
static inline void env_spinlock_lock(env_spinlock *l)
{
spin_lock(l);
}
static inline void env_spinlock_unlock(env_spinlock *l)
{
spin_unlock(l);
}
static inline void env_spinlock_lock_irq(env_spinlock *l)
{
spin_lock_irq(l);
}
static inline void env_spinlock_unlock_irq(env_spinlock *l)
{
spin_unlock_irq(l);
}
#define env_spinlock_lock_irqsave(l, flags) \
spin_lock_irqsave((l), (flags))
#define env_spinlock_unlock_irqrestore(l, flags) \
spin_unlock_irqrestore((l), (flags))
/* *** RW LOCKS *** */
typedef rwlock_t env_rwlock;
static inline void env_rwlock_init(env_rwlock *l)
{
rwlock_init(l);
}
static inline void env_rwlock_read_lock(env_rwlock *l)
{
read_lock(l);
}
static inline void env_rwlock_read_unlock(env_rwlock *l)
{
read_unlock(l);
}
static inline void env_rwlock_write_lock(env_rwlock *l)
{
write_lock(l);
}
static inline void env_rwlock_write_unlock(env_rwlock *l)
{
write_unlock(l);
}
/* *** WAITQUEUE *** */
typedef wait_queue_head_t env_waitqueue;
static inline void env_waitqueue_init(env_waitqueue *w)
{
init_waitqueue_head(w);
}
static inline void env_waitqueue_wake_up(env_waitqueue *w)
{
wake_up(w);
}
#define env_waitqueue_wait(w, condition) \
wait_event_interruptible((w), (condition))
/* *** SCHEDULING *** */
static inline void env_cond_resched(void)
{
cond_resched();
}
static inline int env_in_interrupt(void)
{
return in_interrupt();;
}
/* *** TIME *** */
static inline uint64_t env_get_tick_count(void)
{
return jiffies;
}
static inline uint64_t env_ticks_to_msecs(uint64_t j)
{
return jiffies_to_msecs(j);
}
static inline uint64_t env_ticks_to_nsecs(uint64_t j)
{
return jiffies_to_usecs(j) * NSEC_PER_USEC;
}
static inline bool env_time_after(uint64_t a, uint64_t b)
{
return time_after64(a,b);
}
static inline uint64_t env_ticks_to_secs(uint64_t j)
{
return j >> SHIFT_HZ;
}
static inline uint64_t env_secs_to_ticks(uint64_t j)
{
return j << SHIFT_HZ;
}
/* *** BIT OPERATIONS *** */
static inline void env_bit_set(int nr, volatile void *addr)
{
set_bit(nr, addr);
}
static inline void env_bit_clear(int nr, volatile void *addr)
{
clear_bit(nr, addr);
}
static inline int env_bit_test(int nr, const void *addr)
{
return test_bit(nr, addr);
}
static inline void env_msleep(uint64_t n)
{
msleep(n);
}
/* *** STRING OPERATIONS *** */
#define env_memset(dest, dmax, val) ({ \
memset(dest, val, dmax); \
0; \
})
#define env_memcpy(dest, dmax, src, slen) ({ \
memcpy(dest, src, min_t(int, dmax, slen)); \
0; \
})
#define env_memcmp(s1, s1max, s2, s2max, diff) ({ \
*diff = memcmp(s1, s2, min_t(int, s1max, s2max)); \
0; \
})
#define env_strdup kstrdup
#define env_strnlen(s, smax) strnlen(s, smax)
#define env_strncmp strncmp
#define env_strncpy(dest, dmax, src, slen) ({ \
strlcpy(dest, src, min_t(int, dmax, slen)); \
0; \
})
/* *** SORTING *** */
void env_sort(void *base, size_t num, size_t size,
int (*cmp_fn)(const void *, const void *),
void (*swap_fn)(void *, void *, int size));
/* *** CRC *** */
static inline uint32_t env_crc32(uint32_t crc, uint8_t const *data, size_t len)
{
return crc32(crc, data, len);
}
/* *** LOGGING *** */
#define ENV_PRIu64 "llu"
#define ENV_WARN(cond, fmt...) WARN(cond, fmt)
#define ENV_WARN_ON(cond) WARN_ON(cond)
#define ENV_BUG() BUG()
#define ENV_BUG_ON(cond) BUG_ON(cond)
#endif /* __OCF_ENV_H__ */

View File

@@ -0,0 +1,21 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __OCF_ENV_HEADERS_H__
#define __OCF_ENV_HEADERS_H__
#include <linux/types.h>
/* TODO: Move prefix printing to context logger. */
#define OCF_LOGO "Open-CAS"
#define OCF_PREFIX_SHORT "[" OCF_LOGO "] "
#define OCF_PREFIX_LONG "Open Cache Acceleration Software Linux"
#define OCF_VERSION_MAIN CAS_VERSION_MAIN
#define OCF_VERSION_MAJOR CAS_VERSION_MAJOR
#define OCF_VERSION_MINOR CAS_VERSION_MINOR
#endif /* __OCF_ENV_HEADERS_H__ */

View File

@@ -0,0 +1,414 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#include "cas_cache.h"
struct {
int cas_error;
int std_error;
} static cas_error_code_map[] = {
/* IOC error mappings*/
{ OCF_ERR_INVAL, EINVAL },
{ OCF_ERR_INVAL_VOLUME_TYPE, EINVAL },
{ OCF_ERR_INTR, EINTR },
{ OCF_ERR_UNKNOWN, EINVAL },
{ OCF_ERR_TOO_MANY_CACHES, ENOSPC },
{ OCF_ERR_NO_MEM, ENOMEM },
{ OCF_ERR_NO_FREE_RAM, ENOMEM },
{ OCF_ERR_START_CACHE_FAIL, EFAULT },
{ OCF_ERR_CACHE_IN_USE, EBUSY },
{ OCF_ERR_CACHE_NOT_EXIST, ENODEV },
{ OCF_ERR_CACHE_EXIST, EEXIST },
{ OCF_ERR_TOO_MANY_CORES, ENOSPC },
{ OCF_ERR_CORE_NOT_AVAIL, ENAVAIL },
{ OCF_ERR_NOT_OPEN_EXC, EBUSY },
{ OCF_ERR_CACHE_NOT_AVAIL, ENAVAIL },
{ OCF_ERR_IO_CLASS_NOT_EXIST, ENODEV },
{ OCF_ERR_WRITE_CACHE, EIO },
{ OCF_ERR_WRITE_CORE, EIO },
{ OCF_ERR_DIRTY_SHUTDOWN, EFAULT },
{ OCF_ERR_DIRTY_EXISTS, EFAULT },
{ OCF_ERR_FLUSHING_INTERRUPTED, EINTR },
/* CAS kernel error mappings*/
{ KCAS_ERR_ROOT, EPERM },
{ KCAS_ERR_SYSTEM, EINVAL },
{ KCAS_ERR_BAD_RANGE, ERANGE },
{ KCAS_ERR_DEV_SPACE, ENOSPC },
{ KCAS_ERR_INV_IOCTL, EINVAL },
{ KCAS_ERR_DEV_PENDING, EBUSY },
{ KCAS_ERR_DIRTY_EXISTS_NVME, EFAULT },
{ KCAS_ERR_FILE_EXISTS, EEXIST },
{ KCAS_ERR_IN_UPGRADE, EFAULT },
{ KCAS_ERR_UNALIGNED, EINVAL },
{ KCAS_ERR_NO_STORED_CONF, EINTR },
{ KCAS_ERR_ROLLBACK, EFAULT },
{ KCAS_ERR_NOT_NVME, ENODEV },
{ KCAS_ERR_FORMAT_FAILED, EFAULT },
{ KCAS_ERR_NVME_BAD_FORMAT, EINVAL },
{ KCAS_ERR_CONTAINS_PART, EINVAL },
{ KCAS_ERR_A_PART, EINVAL },
{ KCAS_ERR_REMOVED_DIRTY, EIO },
{ KCAS_ERR_STOPPED_DIRTY, EIO },
};
/*******************************************/
/* Helper which change cas-specific error */
/* codes to kernel generic error codes */
/*******************************************/
int map_cas_err_to_generic_code(int cas_error_code)
{
int i;
if (cas_error_code == 0)
return 0; /* No Error */
cas_error_code = abs(cas_error_code);
for (i = 0; i < ARRAY_SIZE(cas_error_code_map); i++) {
if (cas_error_code_map[i].cas_error == cas_error_code)
return -cas_error_code_map[i].std_error;
}
return -cas_error_code;
}
#define _GET_CMD_INFO(cmd_info, arg, size) ({ \
cmd_info = vmalloc(size); \
if (!cmd_info) \
return -ENOMEM; \
if (copy_from_user(cmd_info, (void __user *)arg, size)) { \
printk(KERN_ALERT "Cannot copy cmd info from user space\n"); \
vfree(cmd_info); \
return -EINVAL; \
} \
})
#define GET_CMD_INFO(cmd_info, arg) _GET_CMD_INFO(cmd_info, arg, \
sizeof(*cmd_info))
#define RETURN_CMD_RESULT(cmd_info, arg, result) ({ \
int ret = result; \
cmd_info->ext_err_code = abs(result); \
if (copy_to_user((void __user *)arg, cmd_info, sizeof(*cmd_info))) { \
printk(KERN_ALERT "Unable to copy response to user\n"); \
ret = -EFAULT; \
} \
vfree(cmd_info); \
return map_cas_err_to_generic_code(ret); \
})
/* this handles IOctl for /dev/cas */
/*********************************************/
long cas_service_ioctl_ctrl(struct file *filp, unsigned int cmd,
unsigned long arg)
{
int retval = 0;
if (_IOC_TYPE(cmd) != KCAS_IOCTL_MAGIC)
return -EINVAL;
if (!capable(CAP_SYS_ADMIN)) {
/* Must be root to issue ioctls */
return -EPERM;
}
if (cas_upgrade_is_in_upgrade() &&
cmd != KCAS_IOCTL_CACHE_INFO &&
cmd != KCAS_IOCTL_LIST_CACHE &&
cmd != KCAS_IOCTL_GET_CACHE_COUNT &&
cmd != KCAS_IOCTL_CORE_INFO &&
cmd != KCAS_IOCTL_PARTITION_STATS &&
cmd != KCAS_IOCTL_GET_CAPABILITIES) {
return -EFAULT;
}
switch (cmd) {
case KCAS_IOCTL_START_CACHE: {
struct kcas_start_cache *cmd_info;
struct ocf_mngt_cache_config cfg;
struct ocf_mngt_cache_device_config device_cfg;
GET_CMD_INFO(cmd_info, arg);
retval = cache_mng_prepare_cache_cfg(&cfg, &device_cfg, cmd_info);
if (retval)
RETURN_CMD_RESULT(cmd_info, arg, retval);
retval = cache_mng_init_instance(&cfg, &device_cfg, cmd_info);
RETURN_CMD_RESULT(cmd_info, arg, retval);
}
case KCAS_IOCTL_STOP_CACHE: {
struct kcas_stop_cache *cmd_info;
GET_CMD_INFO(cmd_info, arg);
retval = cache_mng_exit_instance(cmd_info->cache_id,
cmd_info->flush_data);
RETURN_CMD_RESULT(cmd_info, arg, retval);
}
case KCAS_IOCTL_SET_CACHE_STATE: {
struct kcas_set_cache_state *cmd_info;
GET_CMD_INFO(cmd_info, arg);
retval = cache_mng_set_cache_mode(cmd_info->cache_id,
cmd_info->caching_mode, cmd_info->flush_data);
RETURN_CMD_RESULT(cmd_info, arg, retval);
}
case KCAS_IOCTL_INSERT_CORE: {
struct kcas_insert_core *cmd_info;
struct ocf_mngt_core_config cfg;
GET_CMD_INFO(cmd_info, arg);
retval = cache_mng_prepare_core_cfg(&cfg, cmd_info);
if (retval)
RETURN_CMD_RESULT(cmd_info, arg, retval);
retval = cache_mng_add_core_to_cache(&cfg, cmd_info);
RETURN_CMD_RESULT(cmd_info, arg, retval);
}
case KCAS_IOCTL_REMOVE_CORE: {
struct kcas_remove_core *cmd_info;
GET_CMD_INFO(cmd_info, arg);
retval = cache_mng_remove_core_from_cache(cmd_info);
RETURN_CMD_RESULT(cmd_info, arg, retval);
}
case KCAS_IOCTL_RESET_STATS: {
struct kcas_reset_stats *cmd_info;
GET_CMD_INFO(cmd_info, arg);
retval = cache_mng_reset_core_stats(cmd_info->cache_id,
cmd_info->core_id);
RETURN_CMD_RESULT(cmd_info, arg, retval);
}
case KCAS_IOCTL_FLUSH_CACHE: {
struct kcas_flush_cache *cmd_info;
GET_CMD_INFO(cmd_info, arg);
retval = cache_mng_flush_device(cmd_info->cache_id);
RETURN_CMD_RESULT(cmd_info, arg, retval);
}
case KCAS_IOCTL_INTERRUPT_FLUSHING: {
struct kcas_interrupt_flushing *cmd_info;
GET_CMD_INFO(cmd_info, arg);
retval = cache_mng_interrupt_flushing(cmd_info->cache_id);
RETURN_CMD_RESULT(cmd_info, arg, retval);
}
case KCAS_IOCTL_FLUSH_CORE: {
struct kcas_flush_core *cmd_info;
GET_CMD_INFO(cmd_info, arg);
retval = cache_mng_flush_object(cmd_info->cache_id,
cmd_info->core_id);
RETURN_CMD_RESULT(cmd_info, arg, retval);
}
case KCAS_IOCTL_CACHE_INFO: {
struct kcas_cache_info *cmd_info;
GET_CMD_INFO(cmd_info, arg);
retval = cache_mng_get_info(cmd_info);
RETURN_CMD_RESULT(cmd_info, arg, retval);
}
case KCAS_IOCTL_CORE_INFO: {
struct kcas_core_info *cmd_info;
GET_CMD_INFO(cmd_info, arg);
retval = cache_mng_get_core_info(cmd_info);
RETURN_CMD_RESULT(cmd_info, arg, retval);
}
case KCAS_IOCTL_PARTITION_STATS: {
struct kcas_io_class *cmd_info;
GET_CMD_INFO(cmd_info, arg);
retval = cache_mng_get_io_class_info(cmd_info);
RETURN_CMD_RESULT(cmd_info, arg, retval);
}
case KCAS_IOCTL_PARTITION_SET: {
struct kcas_io_classes *cmd_info;
/* copy entire memory from user, including array of
* ocf_io_class_info structs past the end of kcas_io_classes */
_GET_CMD_INFO(cmd_info, arg, KCAS_IO_CLASSES_SIZE);
retval = cache_mng_set_partitions(cmd_info);
/* return just sizeof(struct kcas_io_classes) bytes of data */
RETURN_CMD_RESULT(cmd_info, arg, retval);
}
case KCAS_IOCTL_GET_CACHE_COUNT: {
struct kcas_cache_count *cmd_info;
GET_CMD_INFO(cmd_info, arg);
cmd_info->cache_count = ocf_mngt_cache_get_count(cas_ctx);
RETURN_CMD_RESULT(cmd_info, arg, 0);
}
case KCAS_IOCTL_LIST_CACHE: {
struct kcas_cache_list *cmd_info;
GET_CMD_INFO(cmd_info, arg);
retval = cache_mng_list_caches(cmd_info);
RETURN_CMD_RESULT(cmd_info, arg, retval > 0 ? 0 : retval);
}
case KCAS_IOCTL_GET_CAPABILITIES: {
struct kcas_capabilites *cmd_info;
GET_CMD_INFO(cmd_info, arg);
memset(cmd_info, 0, sizeof(*cmd_info));
#ifdef CAS_NVME_FULL
cmd_info->nvme_format = 1;
#endif
RETURN_CMD_RESULT(cmd_info, arg, 0);
}
case KCAS_IOCTL_UPGRADE: {
struct kcas_upgrade *cmd_info;
GET_CMD_INFO(cmd_info, arg);
retval = cas_upgrade();
RETURN_CMD_RESULT(cmd_info, arg, retval);
}
#if defined(CAS_NVME_FULL)
case KCAS_IOCTL_NVME_FORMAT: {
struct kcas_nvme_format *cmd_info;
GET_CMD_INFO(cmd_info, arg);
retval = cas_nvme_format_optimal(
cmd_info->device_path_name,
cmd_info->metadata_mode,
cmd_info->force);
RETURN_CMD_RESULT(cmd_info, arg, retval);
}
#endif
case KCAS_IOCTL_GET_CORE_POOL_COUNT: {
struct kcas_core_pool_count *cmd_info;
GET_CMD_INFO(cmd_info, arg);
cmd_info->core_pool_count =
ocf_mngt_core_pool_get_count(cas_ctx);
RETURN_CMD_RESULT(cmd_info, arg, 0);
}
case KCAS_IOCTL_GET_CORE_POOL_PATHS: {
struct kcas_core_pool_path *cmd_info;
GET_CMD_INFO(cmd_info, arg);
retval = cache_mng_core_pool_get_paths(cmd_info);
RETURN_CMD_RESULT(cmd_info, arg, retval);
}
case KCAS_IOCTL_CORE_POOL_REMOVE: {
struct kcas_core_pool_remove *cmd_info;
GET_CMD_INFO(cmd_info, arg);
retval = cache_mng_core_pool_remove(cmd_info);
RETURN_CMD_RESULT(cmd_info, arg, retval);
}
case KCAS_IOCTL_CACHE_CHECK_DEVICE: {
struct kcas_cache_check_device *cmd_info;
GET_CMD_INFO(cmd_info, arg);
retval = cache_mng_cache_check_device(cmd_info);
RETURN_CMD_RESULT(cmd_info, arg, retval);
}
case KCAS_IOCTL_SET_CORE_PARAM: {
struct kcas_set_core_param *cmd_info;
GET_CMD_INFO(cmd_info, arg);
retval = cache_mng_set_core_params(cmd_info);
RETURN_CMD_RESULT(cmd_info, arg, retval);
}
case KCAS_IOCTL_GET_CORE_PARAM: {
struct kcas_get_core_param *cmd_info;
GET_CMD_INFO(cmd_info, arg);
retval = cache_mng_get_core_params(cmd_info);
RETURN_CMD_RESULT(cmd_info, arg, retval);
}
case KCAS_IOCTL_SET_CACHE_PARAM: {
struct kcas_set_cache_param *cmd_info;
GET_CMD_INFO(cmd_info, arg);
retval = cache_mng_set_cache_params(cmd_info);
RETURN_CMD_RESULT(cmd_info, arg, retval);
}
case KCAS_IOCTL_GET_CACHE_PARAM: {
struct kcas_get_cache_param *cmd_info;
GET_CMD_INFO(cmd_info, arg);
retval = cache_mng_get_cache_params(cmd_info);
RETURN_CMD_RESULT(cmd_info, arg, retval);
}
default:
return -EINVAL;
}
}

View File

@@ -0,0 +1,15 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __SERVICE_UI_IOCTL_H__
#define __SERVICE_UI_IOCTL_H__
struct casdsk_disk;
long cas_service_ioctl_ctrl(struct file *filp, unsigned int cmd,
unsigned long arg);
#endif

281
modules/cas_cache/threads.c Normal file
View File

@@ -0,0 +1,281 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#include "threads.h"
#include "cas_cache.h"
#define MAX_THREAD_NAME_SIZE 16
struct cas_thread_info {
atomic_t stop;
struct completion compl;
struct completion sync_compl;
void *sync_data;
wait_queue_head_t wq;
atomic_t kicked;
struct task_struct *thread;
char name[MAX_THREAD_NAME_SIZE];
bool running;
};
static int _cas_io_queue_thread(void *data)
{
ocf_queue_t q = data;
struct cas_thread_info *info;
BUG_ON(!q);
/* complete the creation of the thread */
info = ocf_queue_get_priv(q);
BUG_ON(!info);
DAEMONIZE(info->thread->comm);
complete(&info->compl);
/* Continue working until signaled to exit. */
do {
/* Wait until there are completed read misses from the HDDs,
* or a stop.
*/
wait_event_interruptible(info->wq, ocf_queue_pending_io(q) ||
atomic_read(&info->stop));
ocf_queue_run(q);
} while (!atomic_read(&info->stop) || ocf_queue_pending_io(q));
WARN(ocf_queue_pending_io(q), "Still pending IO requests\n");
/* If we get here, then thread was signalled to terminate.
* So, let's complete and exit.
*/
complete_and_exit(&info->compl, 0);
return 0;
}
static void _cas_cleaner_complete(ocf_cleaner_t c, uint32_t interval)
{
struct cas_thread_info *info = ocf_cleaner_get_priv(c);
uint32_t *ms = info->sync_data;
*ms = interval;
complete(&info->sync_compl);
}
static int _cas_cleaner_thread(void *data)
{
ocf_cleaner_t c = data;
ocf_cache_t cache = ocf_cleaner_get_cache(c);
struct cache_priv *cache_priv = ocf_cache_get_priv(cache);
struct cas_thread_info *info;
uint32_t ms;
BUG_ON(!c);
ENV_BUG_ON(!cache_priv);
/* complete the creation of the thread */
info = ocf_cleaner_get_priv(c);
BUG_ON(!info);
DAEMONIZE(info->thread->comm);
complete(&info->compl);
info->sync_data = &ms;
ocf_cleaner_set_cmpl(c, _cas_cleaner_complete);
do {
init_completion(&info->sync_compl);
ocf_cleaner_run(c, cache_priv->io_queues[smp_processor_id()]);
wait_for_completion(&info->sync_compl);
} while (0 == wait_event_interruptible_timeout(info->wq,
atomic_read(&info->stop), msecs_to_jiffies(ms)));
complete_and_exit(&info->compl, 0);
return 0;
}
static int _cas_metadata_updater_thread(void *data)
{
ocf_metadata_updater_t mu = data;
struct cas_thread_info *info;
BUG_ON(!mu);
/* complete the creation of the thread */
info = ocf_metadata_updater_get_priv(mu);
BUG_ON(!info);
DAEMONIZE(info->thread->comm);
complete(&info->compl);
do {
if (atomic_read(&info->stop))
break;
atomic_set(&info->kicked, 0);
if (ocf_metadata_updater_run(mu))
continue;
wait_event_interruptible(info->wq, atomic_read(&info->stop) ||
atomic_read(&info->kicked));
} while (true);
complete_and_exit(&info->compl, 0);
return 0;
}
static int _cas_create_thread(struct cas_thread_info **pinfo,
int (*threadfn)(void *), void *priv, int cpu,
const char *fmt, ...)
{
struct cas_thread_info *info;
struct task_struct *thread;
va_list args;
info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info)
return -ENOMEM;
atomic_set(&info->stop, 0);
init_completion(&info->compl);
init_completion(&info->sync_compl);
init_waitqueue_head(&info->wq);
va_start(args, fmt);
vsnprintf(info->name, sizeof(info->name), fmt, args);
va_end(args);
thread = kthread_create(threadfn, priv, "%s", info->name);
if (IS_ERR(thread)) {
kfree(info);
/* Propagate error code as PTR_ERR */
return PTR_ERR(thread);
}
info->thread = thread;
/* Affinitize thread to core */
if (cpu != CAS_CPUS_ALL)
kthread_bind(thread, cpu);
if (pinfo)
*pinfo = info;
return 0;
}
static void _cas_start_thread(struct cas_thread_info *info)
{
wake_up_process(info->thread);
wait_for_completion(&info->compl);
info->running = true;
printk(KERN_DEBUG "Thread %s started\n", info->name);
}
static void _cas_stop_thread(struct cas_thread_info *info)
{
if (info->running && info->thread) {
init_completion(&info->compl);
atomic_set(&info->stop, 1);
wake_up(&info->wq);
wait_for_completion(&info->compl);
printk(KERN_DEBUG "Thread %s stopped\n", info->name);
}
kfree(info);
}
int cas_create_queue_thread(ocf_queue_t q, int cpu)
{
struct cas_thread_info *info;
ocf_cache_t cache = ocf_queue_get_cache(q);
int result;
result = _cas_create_thread(&info, _cas_io_queue_thread, q, cpu,
"cas_io_%s_%d", ocf_cache_get_name(cache), cpu);
if (!result) {
ocf_queue_set_priv(q, info);
_cas_start_thread(info);
}
return result;
}
void cas_kick_queue_thread(ocf_queue_t q)
{
struct cas_thread_info *info = ocf_queue_get_priv(q);
wake_up(&info->wq);
}
void cas_stop_queue_thread(ocf_queue_t q)
{
struct cas_thread_info *info = ocf_queue_get_priv(q);
ocf_queue_set_priv(q, NULL);
_cas_stop_thread(info);
}
int cas_create_cleaner_thread(ocf_cleaner_t c)
{
struct cas_thread_info *info;
ocf_cache_t cache = ocf_cleaner_get_cache(c);
int result;
result = _cas_create_thread(&info, _cas_cleaner_thread, c,
CAS_CPUS_ALL, "cas_clean_%d",
ocf_cache_get_id(cache));
if (!result) {
ocf_cleaner_set_priv(c, info);
_cas_start_thread(info);
}
return result;
}
void cas_stop_cleaner_thread(ocf_cleaner_t c)
{
struct cas_thread_info *info = ocf_cleaner_get_priv(c);
ocf_cleaner_set_priv(c, NULL);
_cas_stop_thread(info);
}
int cas_create_metadata_updater_thread(ocf_metadata_updater_t mu)
{
struct cas_thread_info *info;
int result;
result = _cas_create_thread(&info, _cas_metadata_updater_thread,
mu, CAS_CPUS_ALL, "ocf_metadata_updater_%d",
ocf_cache_get_id(ocf_metadata_updater_get_cache(mu)));
if (!result) {
ocf_metadata_updater_set_priv(mu, info);
_cas_start_thread(info);
}
return result;
}
void cas_kick_metadata_updater_thread(ocf_metadata_updater_t mu)
{
struct cas_thread_info *info = ocf_metadata_updater_get_priv(mu);
atomic_set(&info->kicked, 1);
wake_up(&info->wq);
}
void cas_stop_metadata_updater_thread(ocf_metadata_updater_t mu)
{
struct cas_thread_info *info = ocf_metadata_updater_get_priv(mu);
ocf_metadata_updater_set_priv(mu, NULL);
_cas_stop_thread(info);
}

View File

@@ -0,0 +1,26 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __THREADS_H__
#define __THREADS_H__
#include "ocf/ocf.h"
#include "linux_kernel_version.h"
#define CAS_CPUS_ALL -1
int cas_create_queue_thread(ocf_queue_t q, int cpu);
void cas_kick_queue_thread(ocf_queue_t q);
void cas_stop_queue_thread(ocf_queue_t q);
int cas_create_cleaner_thread(ocf_cleaner_t c);
void cas_stop_cleaner_thread(ocf_cleaner_t c);
int cas_create_metadata_updater_thread(ocf_metadata_updater_t mu);
void cas_kick_metadata_updater_thread(ocf_metadata_updater_t mu);
void cas_stop_metadata_updater_thread(ocf_metadata_updater_t mu);
#endif /* __THREADS_H__ */

View File

@@ -0,0 +1,13 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __CAS_UTILS_H__
#define __CAS_UTILS_H__
#include "utils_nvme.h"
#include "utils_properties.h"
#endif /* __CAS_UTILS_H__ */

View File

@@ -0,0 +1,22 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#include "utils_blk.h"
int cas_blk_get_part_count(struct block_device *bdev)
{
struct disk_part_tbl *ptbl;
int i, count = 0;
rcu_read_lock();
ptbl = rcu_dereference(bdev->bd_disk->part_tbl);
for (i = 0; i < ptbl->len; ++i) {
if (rcu_access_pointer(ptbl->part[i]))
count++;
}
rcu_read_unlock();
return count;
}

View File

@@ -0,0 +1,14 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef UTILS_BLK_H_
#define UTILS_BLK_H_
#include <linux/fs.h>
#include <linux/genhd.h>
int cas_blk_get_part_count(struct block_device *bdev);
#endif /* UTILS_BLK_H_ */

View File

@@ -0,0 +1,130 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#include "cas_cache.h"
/**
* This function locates index of IO vec from given vecs array where byte at
* offset is located. When found it returns its index and byte offset within
* this vec.
* @param vecs IO vector array to be searched
* @param vec_num number of items in IO vector array
* @param offset byte offset to be found
* @param offset_in_vec byte offset within found IO vec
* @return vec index if it lies within specified buffer, otherwise -1
*/
static int get_starting_vec(struct bio_vec *vecs, uint64_t vecs_num,
uint64_t offset, uint64_t *offset_in_vec)
{
int i;
for (i = 0; i < vecs_num; i++) {
if (vecs[i].bv_len > offset) {
if (offset_in_vec != NULL)
*offset_in_vec = offset;
return i;
}
offset -= vecs[i].bv_len;
}
return -1;
}
uint64_t cas_data_cpy(struct bio_vec *dst, uint64_t dst_num,
struct bio_vec *src, uint64_t src_num,
uint64_t to, uint64_t from, uint64_t bytes)
{
uint64_t i, j, dst_len, src_len, to_copy;
uint64_t dst_off, src_off;
uint64_t written = 0;
int ret;
void *dst_p, *src_p;
struct bio_vec *curr_dst, *curr_src;
/* Locate vec idx and offset in dst vec array */
ret = get_starting_vec(dst, dst_num, to, &to);
if (ret < 0) {
CAS_PRINT_RL(KERN_INFO "llu dst buffer too small "
"to_offset=%llu bytes=%llu", to, bytes);
return 0;
}
j = ret;
/* Locate vec idx and offset in src vec array */
ret = get_starting_vec(src, src_num, from, &from);
if (ret < 0) {
CAS_PRINT_RL(KERN_INFO "llu src buffer too small "
"from_offset=%llu bytes=%llu", from, bytes);
return 0;
}
i = ret;
curr_dst = &dst[j];
curr_src = &src[i];
dst_off = curr_dst->bv_offset + to;
dst_len = curr_dst->bv_len - to;
src_off = curr_src->bv_offset + from;
src_len = curr_src->bv_len - from;
while (written < bytes) {
dst_p = page_address(curr_dst->bv_page) + dst_off;
src_p = page_address(curr_src->bv_page) + src_off;
to_copy = src_len > dst_len ? dst_len : src_len;
/* Prevent from copying too much*/
if ((written + to_copy) > bytes)
to_copy = bytes - written;
memcpy(dst_p, src_p, to_copy);
written += to_copy;
if (written == bytes)
break;
/* Setup new len and offset. */
dst_off += to_copy;
dst_len -= to_copy;
src_off += to_copy;
src_len -= to_copy;
/* Go to next src buffer */
if (src_len == 0) {
i++;
/* Setup new len and offset. */
if (i < src_num) {
curr_src = &src[i];
src_off = curr_src->bv_offset;
src_len = curr_src->bv_len;
} else {
break;
}
}
/* Go to next dst buffer */
if (dst_len == 0) {
j++;
if (j < dst_num) {
curr_dst = &dst[j];
dst_off = curr_dst->bv_offset;
dst_len = curr_dst->bv_len;
} else {
break;
}
}
}
if (written != bytes) {
CAS_PRINT_RL(KERN_INFO "Written bytes not equal requested bytes "
"(written=%llu; requested=%llu)", written, bytes);
}
return written;
}

View File

@@ -0,0 +1,31 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef UTILS_DATA_H_
#define UTILS_DATA_H_
/**
* @brief Copy data from a data vector to another one
*
* This function copies number of bytes from source IO vector to destination
* IO vector. It starts coping to specified offset in destination IO vector. If
* there is not enough space it will return number of bytes that was
* successfully copied.
*
* @param dst destination IO vector
* @param dst_num size of destination IO vector
* @param src source IO vector
* @param src_num size of source IO vector
* @param to dst offset where write to will start
* @param from src offset where write from will start
* @param bytes number of bytes to be copied
*
* @return number of bytes written from src to dst
*/
uint64_t cas_data_cpy(struct bio_vec *dst, uint64_t dst_num,
struct bio_vec *src, uint64_t src_num,
uint64_t to, uint64_t from, uint64_t bytes);
#endif /* UTILS_DATA_H_ */

View File

@@ -0,0 +1,78 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#include "utils_gc.h"
#include <linux/vmalloc.h>
#if defined (CAS_GARBAGE_COLLECTOR)
struct cas_vfree_item {
struct llist_head list;
struct work_struct ws;
};
static DEFINE_PER_CPU(struct cas_vfree_item, cas_vfree_item);
static atomic_t freed = ATOMIC_INIT(0);
static void cas_garbage_collector(struct work_struct *w)
{
struct cas_vfree_item *item = container_of(w, struct cas_vfree_item,
ws);
struct llist_node *llnode = llist_del_all(&item->list);
while (llnode) {
void *item = llnode;
llnode = llnode->next;
atomic_dec(&freed);
vfree(item);
}
}
void cas_vfree(const void *addr)
{
struct cas_vfree_item *item = this_cpu_ptr(&cas_vfree_item);
atomic_inc(&freed);
if (llist_add((struct llist_node *)addr, &item->list))
schedule_work(&item->ws);
}
void cas_garbage_collector_init(void)
{
int i;
for_each_possible_cpu(i) {
struct cas_vfree_item *item;
item = &per_cpu(cas_vfree_item, i);
init_llist_head(&item->list);
INIT_WORK(&item->ws, cas_garbage_collector);
}
}
void cas_garbage_collector_deinit(void)
{
int i;
for_each_possible_cpu(i) {
struct cas_vfree_item *item;
item = &per_cpu(cas_vfree_item, i);
while (work_pending(&item->ws))
schedule();
}
WARN(atomic_read(&freed) != 0,
OCF_PREFIX_SHORT" Not all memory deallocated\n");
}
#else
void cas_garbage_collector_init(void) {};
void cas_garbage_collector_deinit(void) {};
void cas_vfree(const void *addr) { vfree(addr); };
#endif

View File

@@ -0,0 +1,16 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef UTILS_GC_H_
#define UTILS_GC_H_
void cas_garbage_collector_init(void);
void cas_garbage_collector_deinit(void);
void cas_vfree(const void *addr);
#endif /* UTILS_GC_H_ */

View File

@@ -0,0 +1,583 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#if defined(CAS_NVME_PARTIAL)
#include "cas_cache.h"
#include "utils_nvme.h"
#include "utils_blk.h"
#include <linux/ioctl.h>
#include <linux/file.h>
int cas_nvme_get_nsid(struct block_device *bdev, unsigned int *nsid)
{
int ret = 0;
/*
* Maximum NSID is 0xFFFFFFFF, so theoretically there is no free
* room for error code. However it's unlikely that there will ever
* be device with such number of namespaces, so we treat this value
* as it was signed. Then in case of negative value we interpret it
* as an error code. Moreover in case of error we can be sure, that
* we deal with non-NVMe device, because this ioctl should never
* fail with NVMe driver.
*/
ret = ioctl_by_bdev(bdev, NVME_IOCTL_ID, (unsigned long)NULL);
if (ret < 0)
return ret;
*nsid = (unsigned int)ret;
return 0;
}
#define NVME_ID_CNS_NS 0x00
#define NVME_ID_CNS_CTRL 0x01
int cas_nvme_identify_ns(struct block_device *bdev, unsigned int nsid,
struct nvme_id_ns *ns)
{
struct nvme_admin_cmd cmd = { };
unsigned long __user buffer;
int ret = 0;
buffer = cas_vm_mmap(NULL, 0, sizeof(*ns));
if (IS_ERR((void *)buffer))
return PTR_ERR((void *)buffer);
cmd.opcode = nvme_admin_identify;
cmd.nsid = cpu_to_le32(nsid);
cmd.addr = (__u64)buffer;
cmd.data_len = sizeof(*ns);
cmd.cdw10 = NVME_ID_CNS_NS;
ret = ioctl_by_bdev(bdev, NVME_IOCTL_ADMIN_CMD, (unsigned long)&cmd);
if (ret < 0)
goto out;
ret = copy_from_user(ns, (void *)buffer, sizeof(*ns));
if (ret > 0)
ret = -EINVAL;
out:
cas_vm_munmap(buffer, sizeof(*ns));
return ret;
}
int cas_nvme_identify_ns_contorller(struct file *file, struct nvme_id_ns *ns)
{
struct nvme_admin_cmd cmd = { };
unsigned long __user buffer;
mm_segment_t old_fs;
int ret = 0;
buffer = cas_vm_mmap(NULL, 0, sizeof(*ns));
if (IS_ERR((void *)buffer))
return PTR_ERR((void *)buffer);
cmd.opcode = nvme_admin_identify;
cmd.nsid = 1;
cmd.addr = (__u64)buffer;
cmd.data_len = sizeof(*ns);
cmd.cdw10 = NVME_ID_CNS_NS;
old_fs = get_fs();
set_fs(KERNEL_DS);
ret = file->f_op->unlocked_ioctl(file,
NVME_IOCTL_ADMIN_CMD, (unsigned long)&cmd);
set_fs(old_fs);
if (ret < 0)
goto out;
ret = copy_from_user(ns, (void *)buffer, sizeof(*ns));
if (ret > 0)
ret = -EINVAL;
out:
cas_vm_munmap(buffer, sizeof(*ns));
return ret;
}
#if defined(CAS_NVME_FULL)
#define FORMAT_WORKAROUND_NOT_NEED 0
#define FORMAT_WORKAROUND_NEED 1
static int __cas_nvme_check_fw(struct nvme_id_ctrl *id_ctrl)
{
/*
* If firmware is older then 8DV101H0 we need do
* workaround - make format twice. We need to compare
* only 5 last characters.
*/
return (strncmp(&id_ctrl->fr[3], "101H0", 5) < 0) ?
FORMAT_WORKAROUND_NEED :
FORMAT_WORKAROUND_NOT_NEED;
}
int cas_nvme_identify_ctrl(struct block_device *bdev,
struct nvme_id_ctrl *id_ctrl)
{
struct nvme_admin_cmd cmd = { };
unsigned long __user buffer;
int ret = 0;
buffer = cas_vm_mmap(NULL, 0, sizeof(*id_ctrl));
if (IS_ERR((void *)buffer))
return PTR_ERR((void *)buffer);
cmd.opcode = nvme_admin_identify;
cmd.addr = (__u64)buffer;
cmd.data_len = sizeof(*id_ctrl);
cmd.cdw10 = NVME_ID_CNS_CTRL;
ret = ioctl_by_bdev(bdev, NVME_IOCTL_ADMIN_CMD, (unsigned long)&cmd);
if (ret < 0)
goto out;
ret = copy_from_user(id_ctrl, (void *)buffer, sizeof(*id_ctrl));
if (ret > 0)
ret = -EINVAL;
out:
cas_vm_munmap(buffer, sizeof(*id_ctrl));
return ret;
}
static int _cas_nvme_format_bdev(struct block_device *bdev, unsigned int nsid,
int lbaf, int ms)
{
struct nvme_admin_cmd cmd = { };
cmd.opcode = nvme_admin_format_nvm;
cmd.nsid = nsid;
cmd.cdw10 = lbaf | ms<<4;
cmd.timeout_ms = 1200000;
return ioctl_by_bdev(bdev, NVME_IOCTL_ADMIN_CMD, (unsigned long)&cmd);
}
static int _cas_nvme_controller_identify(struct file *character_device_file,
unsigned long __user buffer)
{
struct nvme_admin_cmd cmd = { };
mm_segment_t old_fs;
int ret;
old_fs = get_fs();
cmd.opcode = nvme_admin_identify;
cmd.nsid = 0;
cmd.addr = (__u64)buffer;
/* 1 - identify contorller, 0 - identify namespace */
cmd.cdw10 = 1;
cmd.data_len = 0x1000;
set_fs(KERNEL_DS);
ret = character_device_file->f_op->unlocked_ioctl(character_device_file,
NVME_IOCTL_ADMIN_CMD, (unsigned long)&cmd);
set_fs(old_fs);
return ret;
}
static int _cas_nvme_format_controller(struct file *character_device_file,
int lbaf, bool sbnsupp)
{
struct nvme_admin_cmd cmd = { };
mm_segment_t old_fs;
int ret;
old_fs = get_fs();
/* Send format command to device */
cmd.opcode = nvme_admin_format_nvm;
cmd.nsid = 0xFFFFFFFF;
cmd.cdw10 = lbaf | sbnsupp << 4;
cmd.timeout_ms = 120000;
cmd.addr = 0;
set_fs(KERNEL_DS);
ret = character_device_file->f_op->unlocked_ioctl(character_device_file,
NVME_IOCTL_ADMIN_CMD, (unsigned long)&cmd);
set_fs(old_fs);
return ret;
}
static inline int find_lbaf(struct nvme_lbaf *lbaf, int cnt, int atomic)
{
int ms = atomic ? 8 : 0;
int i;
for (i = 0; i <= cnt; ++i)
if (lbaf[i].ms == ms && lbaf[i].ds == 9)
return i;
return -EINVAL;
}
/* context for async probe */
struct _probe_context
{
struct completion cmpl;
struct ocf_metadata_probe_status status;
int error;
};
static void _cas_nvme_probe_cmpl(void *priv, int error,
struct ocf_metadata_probe_status *status)
{
struct _probe_context *ctx = (struct _probe_context*)priv;
ctx->error = error;
if (!error) {
ctx->status = *status;
}
complete(&ctx->cmpl);
}
static int _cas_nvme_preformat_check(struct block_device *bdev, int force)
{
ocf_volume_t volume;
struct _probe_context probe_ctx;
int ret = 0;
if (bdev != bdev->bd_contains)
return -KCAS_ERR_A_PART;
if (cas_blk_get_part_count(bdev) > 1 && !force)
return -KCAS_ERR_CONTAINS_PART;
ret = cas_blk_open_volume_by_bdev(&volume, bdev);
if (ret == -KCAS_ERR_NVME_BAD_FORMAT) {
/* Current format is not supported by CAS, so we can be sure
* that there is no dirty data. Do format
*/
return 0;
} else if (ret) {
/* An error occurred, stop processing */
return ret;
}
init_completion(&probe_ctx.cmpl);
ocf_metadata_probe(cas_ctx, volume, _cas_nvme_probe_cmpl, &probe_ctx);
if (wait_for_completion_interruptible(&probe_ctx.cmpl)) {
ocf_volume_close(volume);
return -OCF_ERR_FLUSHING_INTERRUPTED;
}
if (probe_ctx.error == -ENODATA) {
/* Cache was not detected on this device
* NVMe can be formated
*/
ret = 0;
} else if (probe_ctx.error == -EBUSY) {
ret = -OCF_ERR_NOT_OPEN_EXC;
} else if (probe_ctx.error) {
/* Some error occurred, we do not have sure about clean cache */
ret = -KCAS_ERR_FORMAT_FAILED;
} else {
/* Check if cache was closed in proper way */
if (!probe_ctx.status.clean_shutdown ||
probe_ctx.status.cache_dirty) {
/* Dirty shutdown */
ret = -KCAS_ERR_DIRTY_EXISTS_NVME;
}
if (force) {
/* Force overwrites dirty shutdown */
ret = 0;
}
}
ocf_volume_close(volume);
return ret;
}
static int _cas_nvme_format_namespace_by_path(const char *device_path,
int metadata_mode, int force)
{
struct nvme_id_ns *ns;
struct nvme_id_ctrl *id;
unsigned int nsid, sbnsupp = 0;
int best_lbaf = 0;
int ret = 0;
struct block_device *bdev;
char holder[] = "CAS FORMAT\n";
ns = kmalloc(sizeof(*ns), GFP_KERNEL);
if (!ns)
return -OCF_ERR_NO_MEM;
id = kmalloc(sizeof(*id), GFP_KERNEL);
if (!id) {
ret = -OCF_ERR_NO_MEM;
goto out1;
}
bdev = OPEN_BDEV_EXCLUSIVE(device_path,
FMODE_READ | FMODE_WRITE | FMODE_EXCL, holder);
if (IS_ERR(bdev)) {
if (PTR_ERR(bdev) == -EBUSY)
ret = -OCF_ERR_NOT_OPEN_EXC;
else
ret = -OCF_ERR_INVAL_VOLUME_TYPE;
goto out1;
}
ret = cas_nvme_get_nsid(bdev, &nsid);
if (ret < 0) {
ret = -KCAS_ERR_NOT_NVME;
goto out2;
}
ret = _cas_nvme_preformat_check(bdev, force);
if (ret)
goto out2;
ret = cas_nvme_identify_ns(bdev, nsid, ns);
if (ret < 0) {
ret = -KCAS_ERR_FORMAT_FAILED;
goto out2;
}
if (metadata_mode == CAS_METADATA_MODE_NORMAL) {
best_lbaf = find_lbaf(ns->lbaf, ns->nlbaf, 0);
sbnsupp = 0;
} else if (metadata_mode == CAS_METADATA_MODE_ATOMIC) {
best_lbaf = find_lbaf(ns->lbaf, ns->nlbaf, 1);
sbnsupp = !(ns->mc & (1<<1));
}
if (best_lbaf < 0) {
ret = -KCAS_ERR_FORMAT_FAILED;
goto out2;
}
ret = cas_nvme_identify_ctrl(bdev, id);
if (ret < 0) {
ret = -KCAS_ERR_FORMAT_FAILED;
goto out2;
}
if (__cas_nvme_check_fw(id) == FORMAT_WORKAROUND_NEED) {
/*
* If firmware is older then 8DV101H0 we need do
* workaround - make format twice.
*/
ret = _cas_nvme_format_bdev(bdev, nsid, best_lbaf, sbnsupp);
if (ret)
goto out2;
}
ret = _cas_nvme_format_bdev(bdev, nsid, best_lbaf, sbnsupp);
if (ret)
goto out2;
ret = ioctl_by_bdev(bdev, BLKRRPART, (unsigned long)NULL);
out2:
CLOSE_BDEV_EXCLUSIVE(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
out1:
kfree(id);
kfree(ns);
return ret;
}
static int _cas_nvme_get_bdev_from_controller(struct block_device **bdev,
int major, int minor, int namespace_number)
{
mm_segment_t old_fs;
char *sys_path;
struct file *file;
char readbuffer[12] = {0};
char holder[] = "CAS FORMAT\n";
int ret = 0;
sys_path = kzalloc(sizeof(char)*MAX_STR_LEN, GFP_KERNEL);
if (!sys_path)
return -OCF_ERR_NO_MEM;
sprintf(sys_path, "/sys/dev/char/%d:%d/nvme%dn%d/dev",
major, minor, minor, namespace_number);
file = filp_open(sys_path, O_RDONLY, 0);
kfree(sys_path);
if (IS_ERR(file))
return -KCAS_ERR_FORMAT_FAILED;
old_fs = get_fs();
set_fs(KERNEL_DS);
ret = file->f_op->read(file, readbuffer, sizeof(readbuffer),
&file->f_pos);
set_fs(old_fs);
filp_close(file, 0);
if (ret < 0)
return -KCAS_ERR_FORMAT_FAILED;
ret = sscanf(readbuffer, "%d:%d", &major, &minor);
if (ret < 0)
return -KCAS_ERR_FORMAT_FAILED;
*bdev = blkdev_get_by_dev(MKDEV(major, minor),
FMODE_READ | FMODE_WRITE | FMODE_EXCL, holder);
if (IS_ERR(*bdev))
return -OCF_ERR_INVAL_VOLUME_TYPE;
return 0;
}
static int _cas_nvme_format_character_device(const char *device_path,
int metadata_mode, int force)
{
mm_segment_t old_fs;
int ret;
struct file *character_device_file = NULL;
struct nvme_id_ctrl *ctrl;
unsigned long __user buffer;
struct kstat *stat;
struct block_device **ndev = NULL;
int i;
struct nvme_id_ns *ns;
int best_lbaf = 0;
int sbnsupp = 0;
ctrl = kzalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL);
buffer = cas_vm_mmap(NULL, 0, sizeof(*ctrl));
stat = kmalloc(sizeof(struct kstat), GFP_KERNEL);
ns = kmalloc(sizeof(*ns), GFP_KERNEL);
old_fs = get_fs();
if (!ctrl || !buffer || !stat || !ns) {
ret = -OCF_ERR_NO_MEM;
goto out1;
}
character_device_file = filp_open(device_path, O_RDWR | O_EXCL, 0);
if (IS_ERR(character_device_file)) {
ret = -OCF_ERR_INVAL_VOLUME_TYPE;
goto out1;
}
ret = _cas_nvme_controller_identify(character_device_file, buffer);
if (ret < 0) {
ret = KCAS_ERR_FORMAT_FAILED;
goto out1;
}
ret = copy_from_user(ctrl, (void *)buffer, sizeof(*ctrl));
if (ret)
goto out1;
ndev = kmalloc_array(ctrl->nn, sizeof(struct block_device), GFP_KERNEL);
if (!ndev) {
ret = -OCF_ERR_NO_MEM;
goto out1;
}
set_fs(KERNEL_DS);
ret = vfs_stat(device_path, stat);
set_fs(old_fs);
if (ret)
goto out1;
for (i = 1; i <= ctrl->nn; i++) {
ret = _cas_nvme_get_bdev_from_controller(&ndev[i-1],
MAJOR(stat->rdev), MINOR(stat->rdev), i);
if (ret) {
i--;
goto cleanup;
}
ret = _cas_nvme_preformat_check(ndev[i-1], force);
if (ret)
goto cleanup;
}
ret = cas_nvme_identify_ns_contorller(character_device_file, ns);
if (ret)
goto cleanup;
if (metadata_mode == CAS_METADATA_MODE_NORMAL) {
best_lbaf = find_lbaf(ns->lbaf, ns->nlbaf, 0);
sbnsupp = 0;
} else if (metadata_mode == CAS_METADATA_MODE_ATOMIC) {
best_lbaf = find_lbaf(ns->lbaf, ns->nlbaf, 1);
sbnsupp = !(ns->mc & (1<<1));
}
if (best_lbaf < 0) {
ret = -KCAS_ERR_FORMAT_FAILED;
goto cleanup;
}
if (__cas_nvme_check_fw(ctrl) == FORMAT_WORKAROUND_NEED) {
/*
* If firmware is older then 8DV101H0 we need do
* workaround - make format twice.
*/
ret = _cas_nvme_format_controller(character_device_file,
best_lbaf, sbnsupp);
if (ret < 0) {
ret = -KCAS_ERR_FORMAT_FAILED;
goto cleanup;
}
}
ret = _cas_nvme_format_controller(character_device_file,
best_lbaf, sbnsupp);
if (ret < 0)
ret = -KCAS_ERR_FORMAT_FAILED;
cleanup:
for (i = i-1; i >= 1; i--) {
ret |= ioctl_by_bdev(ndev[i-1], BLKRRPART, (unsigned long)NULL);
blkdev_put(ndev[i-1], FMODE_READ | FMODE_WRITE | FMODE_EXCL);
}
out1:
kfree(ndev);
kfree(ctrl);
kfree(stat);
kfree(ns);
cas_vm_munmap(buffer, sizeof(buffer));
filp_close(character_device_file, 0);
return ret;
}
int cas_nvme_format_optimal(const char *device_path, int metadata_mode,
int force)
{
int ret;
uint8_t type;
ret = cas_blk_identify_type(device_path, &type);
if (ret == -OCF_ERR_INVAL_VOLUME_TYPE) {
/* An error occurred, stop processing */
return ret;
}
if (type == BLOCK_DEVICE_VOLUME || type == ATOMIC_DEVICE_VOLUME) {
ret = _cas_nvme_format_namespace_by_path(device_path,
metadata_mode, force);
} else if (type == NVME_CONTROLLER && false) {
/*
* TODO(rbaldyga): Make it safe with NVMe drives that do not
* handle format change properly.
*/
ret = _cas_nvme_format_character_device(device_path,
metadata_mode, force);
} else {
ret = -OCF_ERR_INVAL_VOLUME_TYPE;
}
return ret;
}
#endif
#endif

View File

@@ -0,0 +1,38 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef UTILS_NVME_H_
#define UTILS_NVME_H_
#if defined(CAS_UAPI_NVME)
#include <uapi/nvme.h>
#endif
#if defined(CAS_UAPI_LINUX_NVME)
#include <uapi/linux/nvme.h>
#endif
#if defined(CAS_UAPI_LINUX_NVME_IOCTL)
#include <uapi/linux/nvme_ioctl.h>
#endif
#if defined(CAS_NVME_PARTIAL)
#include <linux/nvme.h>
int cas_nvme_get_nsid(struct block_device *bdev, unsigned int *nsid);
int cas_nvme_identify_ns(struct block_device *bdev, unsigned int nsid,
struct nvme_id_ns *ns);
#if defined(CAS_NVME_FULL)
int cas_nvme_format_optimal(const char *device_path, int metadata_mode,
int force);
#endif /* CAS_NVME_FULL */
#endif /* CAS_NVME_PARTIAL */
#endif /* UTILS_NVME_H_ */

View File

@@ -0,0 +1,769 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#include "cas_cache.h"
#define INTERNAL_CALL 0
#define EXTERNAL_CALL 1
#define CAS_PROPERTIES_VERSION 101
#define VERSION_STR ".version"
/*
* Difference between constant and non constant entry is store in LSB
* e.g.:
* cas_property_string in binary 0000 1010
* cas_property_string_const in binary 0000 1011
*/
#define CAS_PROP_UNCONST(type) (type & ~CAS_PROPERTIES_CONST)
#define CAS_PROP_CHECK_CONST(type) (type & CAS_PROPERTIES_CONST)
enum cas_property_type {
cas_property_string = 10,
cas_property_string_const =
(cas_property_string | CAS_PROPERTIES_CONST),
cas_property_sint = 16,
cas_property_sint_const = (cas_property_sint | CAS_PROPERTIES_CONST),
cas_property_uint = 74,
cas_property_uint_const = (cas_property_uint | CAS_PROPERTIES_CONST),
};
struct cas_properties {
struct list_head list;
};
struct _cas_property {
uint8_t type;
char *key;
struct list_head item;
union {
void *value;
uint64_t value_uint;
int64_t value_sint;
};
};
struct cas_properties *cas_properties_create(void)
{
struct cas_properties *props;
int result;
props = kzalloc(sizeof(*props), GFP_KERNEL);
if (!props)
return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(&props->list);
result = cas_properties_add_uint(props, VERSION_STR,
CAS_PROPERTIES_VERSION, CAS_PROPERTIES_CONST);
if (result) {
kfree(props);
return ERR_PTR(result);
}
result = cas_properties_add_uint(props, ".size", 0,
CAS_PROPERTIES_NON_CONST);
if (result) {
kfree(props);
return ERR_PTR(result);
}
return props;
}
void cas_properties_destroy(struct cas_properties *props)
{
struct list_head *curr, *tmp;
struct _cas_property *entry;
list_for_each_safe(curr, tmp, &props->list) {
entry = list_entry(curr, struct _cas_property, item);
list_del(curr);
if (cas_property_string == CAS_PROP_UNCONST(entry->type))
kfree(entry->value);
kfree(entry->key);
kfree(entry);
}
kfree(props);
}
static uint64_t _cas_prop_get_size(struct cas_properties *props)
{
struct list_head *curr;
struct _cas_property *entry;
uint64_t size_all = 0;
list_for_each(curr, &props->list) {
entry = list_entry(curr, struct _cas_property, item);
size_all += cas_prop_strnlen(entry->key, MAX_STRING_SIZE) + 1;
size_all += sizeof(entry->type);
switch (CAS_PROP_UNCONST(entry->type)) {
case cas_property_string:
size_all += cas_prop_strnlen(entry->value,
MAX_STRING_SIZE) + 1;
break;
case cas_property_sint:
size_all += sizeof(entry->value_sint);
break;
case cas_property_uint:
size_all += sizeof(entry->value_uint);
break;
default:
return 0;
}
}
return size_all;
}
static int _cas_prop_serialize_string(char *buffer, const uint64_t size,
uint64_t *offset, char *value)
{
uint64_t str_size = 0;
str_size = cas_prop_strnlen(value, MAX_STRING_SIZE) + 1;
if ((*offset + str_size) > size)
return -ENOMEM;
memcpy(buffer + *offset, value, str_size);
*offset += str_size;
return 0;
}
static int _cas_prop_parse_string(const char *buffer, const uint64_t size,
uint64_t *offset, char **str)
{
char *tmp_str = NULL;
uint64_t str_size = 0;
if (*offset >= size)
return -ENOMEM;
str_size = cas_prop_strnlen(&buffer[*offset], size - *offset ) + 1;
if (str_size > size - *offset) {
/* no null terminator at the end of buffer */
return -ENOMEM;
}
tmp_str = kstrdup(&buffer[*offset], GFP_KERNEL);
if (!tmp_str)
return -ENOMEM;
*offset += str_size;
*str = tmp_str;
return 0;
}
static int _cas_prop_serialize_int(char *buffer, const uint64_t size,
uint64_t *offset, uint64_t number)
{
int32_t i;
/*
* To prevent issue connected with byte order we
* serialize integer byte by byte.
*/
for (i = 0; i < sizeof(number); i++) {
char byte = number & 0xFF;
if (*offset < size)
buffer[*offset] = byte;
else
return -ENOMEM;
(*offset)++;
number = number >> 8;
}
return 0;
}
static int _cas_prop_serialize_uint(char *buffer, const uint64_t size,
uint64_t *offset, uint64_t number)
{
return _cas_prop_serialize_int(buffer, size, offset, number);
}
static int _cas_prop_serialize_sint(char *buffer, const uint64_t size,
uint64_t *offset, int64_t number)
{
return _cas_prop_serialize_int(buffer, size, offset, (uint64_t) number);
}
static int _cas_prop_parse_int(const char *buffer,
const uint64_t size, uint64_t *offset, uint64_t *number)
{
int32_t i;
uint64_t byte;
*number = 0;
/*
* To prevent issue connected with byte order we
* parse integer byte by byte.
*/
for (i = 0; i < sizeof(*number); i++) {
if (*offset >= size)
return -ENOMEM;
byte = buffer[*offset] & 0xFF;
byte = byte << (i * 8);
*number |= byte;
(*offset)++;
}
return 0;
}
static int _cas_prop_parse_uint(const char *buffer,
const uint64_t size, uint64_t *offset, uint64_t *number)
{
return _cas_prop_parse_int(buffer, size, offset, number);
}
static int _cas_prop_parse_sint(const char *buffer,
const uint64_t size, uint64_t *offset, int64_t *number)
{
return _cas_prop_parse_int(buffer, size, offset, (uint64_t *) number);
}
static int _cas_prop_serialize(struct _cas_property *entry, void *buffer,
const uint64_t size, uint64_t *offset)
{
uint64_t item_size = 0;
void *item;
int result = 0;
if (*offset > size)
return -ENOMEM;
/*
* Each entry is represented in buffer in order as below
* (e.g. in case we have entry with integer) :
* <----- entry ----->
* <- key -><-type-><- integer ->
* <- X bytes -><1 byte><- 8 byte ->
* | | | |
*/
/*
* First step - serialize key
*/
item_size = cas_prop_strnlen(entry->key, MAX_STRING_SIZE) + 1;
item = entry->key;
if ((*offset + item_size) > size)
return -ENOMEM;
memcpy(buffer + *offset, item, item_size);
*offset += item_size;
/*
* Second step - serialize type
*/
item_size = sizeof(entry->type);
item = &entry->type;
if ((*offset + item_size) > size)
return -ENOMEM;
memcpy(buffer + *offset, item, item_size);
*offset += item_size;
/*
* Third step - serialize value
*/
switch (CAS_PROP_UNCONST(entry->type)) {
case cas_property_string:
/* Serialize string */
result = _cas_prop_serialize_string(buffer, size, offset,
entry->value);
break;
case cas_property_sint:
/* Serialize signed integer */
result = _cas_prop_serialize_sint(buffer, size, offset,
entry->value_uint);
break;
case cas_property_uint:
/* Serialize unsigned integer */
result = _cas_prop_serialize_uint(buffer, size, offset,
entry->value_uint);
break;
default:
result = -EINVAL;
break;
}
return result;
}
int cas_properties_serialize(struct cas_properties *props,
struct casdsk_props_conf *caches_serialized_conf)
{
int result = 0;
uint64_t offset = 0, size;
uint16_t crc = 0;
void *buffer;
struct list_head *curr;
struct _cas_property *entry;
size = _cas_prop_get_size(props);
if (size == 0)
return -EINVAL;
buffer = vzalloc(size);
if (!buffer)
return -ENOMEM;
/*
* Update first entry on list - size of buffer
*/
result = cas_properties_add_uint(props, ".size", size,
CAS_PROPERTIES_CONST);
if (result)
goto error_after_buffer_allocation;
/*
* Serialize each entry, one by one
*/
list_for_each(curr, &props->list) {
entry = list_entry(curr, struct _cas_property, item);
result = _cas_prop_serialize(entry, buffer, size, &offset);
if (result)
goto error_after_buffer_allocation;
}
crc = crc16(0, buffer, size);
caches_serialized_conf->buffer = buffer;
caches_serialized_conf->size = size;
caches_serialized_conf->crc = crc;
return result;
error_after_buffer_allocation:
vfree(buffer);
return result;
}
void cas_properties_print(struct cas_properties *props)
{
int result = 0;
struct list_head *curr;
struct _cas_property *entry;
char *abc;
/*
* Serialize each entry, one by one
*/
list_for_each(curr, &props->list) {
entry = list_entry(curr, struct _cas_property, item);
printk(KERN_DEBUG "[Upgrade] Key: %s", entry->key);
switch (CAS_PROP_UNCONST(entry->type)) {
case cas_property_string:
printk(", string, ");
abc = (char *)entry->value;
printk("Value: %s ", abc);
break;
case cas_property_sint:
break;
case cas_property_uint:
printk(", uint, ");
printk("Value: %llu ", entry->value_uint);
default:
result = -EINVAL;
break;
}
printk("\n");
}
}
static int _cas_prop_parse_version(const char *buffer, uint64_t *offset,
uint64_t *version, int trigger)
{
int result = 0;
char *key = NULL;
uint8_t type;
result = _cas_prop_parse_string(buffer, strlen(VERSION_STR) + 1,
offset, &key);
if (result)
goto error_during_parse_key;
if (strcmp(VERSION_STR, key)) {
result = -EINVAL;
goto error_after_parse_key;
}
type = buffer[*offset];
if (cas_property_uint_const != type) {
result = -EINVAL;
goto error_after_parse_key;
}
*offset += sizeof(type);
result = _cas_prop_parse_uint(buffer,
strlen(VERSION_STR) + 1 + sizeof(type) +
sizeof(*version), offset, version);
if (result)
goto error_after_parse_key;
/*
* In case that is external call
* we don't need check version.
*/
if (trigger == INTERNAL_CALL && *version != CAS_PROPERTIES_VERSION) {
printk(KERN_ERR "Version of interface using to parse is "
"different than version used to serialize\n");
result = -EPERM;
}
error_after_parse_key:
kfree(key);
error_during_parse_key:
return result;
}
int cas_properites_parse_version(struct casdsk_props_conf *caches_serialized_conf,
uint64_t *version)
{
uint64_t offset = 0;
char *buffer = NULL;
buffer = (char *) caches_serialized_conf->buffer;
if (!buffer)
return -EINVAL;
return _cas_prop_parse_version(buffer, &offset, version, EXTERNAL_CALL);
}
struct cas_properties *
cas_properites_parse(struct casdsk_props_conf *caches_serialized_conf)
{
struct cas_properties *props;
char *key = NULL, *value = NULL, *buffer = NULL;
int result;
uint8_t type;
uint64_t uint_value, size = 0, offset = 0, version = 0;
uint16_t crc;
int64_t sint_value;
bool constant = false;
props = cas_properties_create();
if (IS_ERR(props))
return ERR_PTR(-ENOMEM);
if (!caches_serialized_conf) {
result = -EINVAL;
goto error_after_props_allocation;
}
buffer = (char *) caches_serialized_conf->buffer;
if (!buffer) {
result = -EINVAL;
goto error_after_props_allocation;
}
size = caches_serialized_conf->size;
crc = crc16(0, buffer, size);
if (crc != caches_serialized_conf->crc) {
printk(KERN_ERR "Cache configuration corrupted");
result = -EINVAL;
goto error_after_props_allocation;
}
/*
* Parse first entry on list - version of interface used to
* serialization
*/
result = _cas_prop_parse_version(buffer, &offset, &version,
INTERNAL_CALL);
if (result)
goto error_after_props_allocation;
while (offset < size) {
/*
* Parse key of entry
*/
result = _cas_prop_parse_string(buffer, size, &offset, &key);
if (result)
goto error_after_props_allocation;
/*
* Parse type of entry
*/
if (offset + sizeof(type) > size) {
kfree(key);
goto error_after_props_allocation;
}
memcpy(&type, buffer + offset, sizeof(type));
offset += sizeof(type);
constant = CAS_PROP_CHECK_CONST(type);
type = CAS_PROP_UNCONST(type);
switch (type) {
case cas_property_string:
/* Parse string */
result = _cas_prop_parse_string(buffer, size, &offset,
&value);
if (result)
break;
/*
* Add new entry with string to CAS properties instance
*/
result |= cas_properties_add_string(props, key, value,
constant);
kfree(value);
break;
case cas_property_sint:
/* Parse signed integer */
result = _cas_prop_parse_sint(buffer, size, &offset,
&sint_value);
/* Add new entry with signed integer to CAS properties
* instance
*/
result |= cas_properties_add_sint(props, key,
sint_value, constant);
break;
case cas_property_uint:
/* Parse unsigned integer */
result = _cas_prop_parse_uint(buffer, size, &offset,
&uint_value);
/* Add new entry with unsigned integer to CAS properties
* instance
*/
result |= cas_properties_add_uint(props, key,
uint_value, constant);
break;
default:
result = -EINVAL;
break;
}
/*
* In case when we added new entry,
* we not need hold key value longer.
*/
kfree(key);
if (result)
goto error_after_props_allocation;
}
return props;
error_after_props_allocation:
cas_properties_destroy(props);
return ERR_PTR(result);
}
static struct _cas_property *_cas_prop_find(const struct cas_properties *props,
const char *key)
{
struct list_head *curr;
struct _cas_property *entry;
list_for_each(curr, &props->list) {
entry = list_entry(curr, struct _cas_property, item);
if (strncmp(key, entry->key, MAX_STRING_SIZE) == 0)
return entry;
}
return ERR_PTR(-ENOENT);
}
static struct _cas_property *_cas_prop_alloc_entry_key(const char *key)
{
struct _cas_property *entry;
entry = kzalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
return ERR_PTR(-ENOMEM);
entry->key = kstrdup(key, GFP_KERNEL);
if (!entry->key) {
kfree(entry);
return ERR_PTR(-ENOMEM);
}
INIT_LIST_HEAD(&entry->item);
return entry;
}
/*
* ADD
*/
int cas_properties_add_uint(struct cas_properties *props, const char *key,
uint64_t value, bool constant)
{
struct _cas_property *entry;
/*
* Looks for entry with same key,
* if it is exist - update, if not - create new
*/
entry = _cas_prop_find(props, key);
if (IS_ERR(entry)) {
entry = _cas_prop_alloc_entry_key(key);
if (IS_ERR(entry))
return PTR_ERR(entry);
list_add_tail(&entry->item, &props->list);
} else if (cas_property_uint != entry->type) {
/*
* We can update only non constant entry,
* so we need compare type only with non constant type.
*/
return -EINVAL;
}
entry->type = constant ? cas_property_uint_const : cas_property_uint;
entry->value_uint = value;
return 0;
}
int cas_properties_add_sint(struct cas_properties *props, const char *key,
int64_t value, bool constant)
{
struct _cas_property *entry;
/*
* Looks for entry with same key,
* if it is exist - update, if not - create new
*/
entry = _cas_prop_find(props, key);
if (IS_ERR(entry)) {
entry = _cas_prop_alloc_entry_key(key);
if (IS_ERR(entry))
return PTR_ERR(entry);
list_add_tail(&entry->item, &props->list);
} else if (cas_property_sint != entry->type) {
/*
* We can update only non constant entry,
* so we need compare type only with non constant type.
*/
return -EINVAL;
}
entry->type = constant ? cas_property_sint_const : cas_property_sint;
entry->value_sint = value;
return 0;
}
int cas_properties_add_string(struct cas_properties *props, const char *key,
const char *value, bool constant)
{
struct _cas_property *entry;
char *tmp_value = NULL;
tmp_value = kstrdup(value, GFP_KERNEL);
if (!tmp_value)
return -ENOMEM;
/*
* Looks for entry with same key,
* if it is exist - update, if not - create new
*/
entry = _cas_prop_find(props, key);
if (IS_ERR(entry)) {
entry = _cas_prop_alloc_entry_key(key);
if (IS_ERR(entry)) {
kfree(tmp_value);
return PTR_ERR(entry);
}
list_add_tail(&entry->item, &props->list);
} else {
if (cas_property_string != entry->type) {
/*
* We can update only non constant entry,
* so we need compare type only with non constant type.
*/
kfree(tmp_value);
return -EINVAL;
}
kfree(entry->value);
}
entry->type = constant ? cas_property_string_const :
cas_property_string;
entry->value = tmp_value;
return 0;
}
/*
* GET
*/
int cas_properties_get_uint(struct cas_properties *props, const char *key,
uint64_t *value)
{
struct _cas_property *entry;
entry = _cas_prop_find(props, key);
if ((IS_ERR(entry) == 0) && (cas_property_uint ==
CAS_PROP_UNCONST(entry->type))) {
*value = entry->value_uint;
return 0;
}
return IS_ERR(entry) ? PTR_ERR(entry) : -EINVAL;
}
int cas_properties_get_sint(struct cas_properties *props, const char *key,
int64_t *value)
{
struct _cas_property *entry;
entry = _cas_prop_find(props, key);
if ((IS_ERR(entry) == 0) && (cas_property_sint ==
CAS_PROP_UNCONST(entry->type))) {
*value = entry->value_sint;
return 0;
}
return IS_ERR(entry) ? PTR_ERR(entry) : -EINVAL;
}
int cas_properties_get_string(struct cas_properties *props, const char *key,
char *value, uint32_t size)
{
struct _cas_property *entry;
entry = _cas_prop_find(props, key);
if ((IS_ERR(entry) == 0) && (cas_property_string ==
CAS_PROP_UNCONST(entry->type))) {
/* Check if size of destination memory is enough */
if (size < cas_prop_strnlen(entry->value, MAX_STRING_SIZE) + 1)
return -ENOMEM;
cas_prop_strncpy(value, size, entry->value,
cas_prop_strnlen(entry->value, MAX_STRING_SIZE));
return 0;
}
return IS_ERR(entry) ? PTR_ERR(entry) : -EINVAL;
}

View File

@@ -0,0 +1,153 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef UTILS_PROPERTIES_H_
#define UTILS_PROPERTIES_H_
#ifdef __KERNEL__
#define cas_prop_strncpy(dest, dest_size, src, src_size) \
strlcpy(dest, src, dest_size)
#define cas_prop_strnlen(string, size) strnlen(string, size)
#else
#define cas_prop_strncpy(dest, dest_size, src, src_size) \
strncpy(dest, src, src_size)
#define cas_prop_strnlen(string, size) strlen(string)
#endif
#include "../../cas_disk/cas_disk.h"
#define MAX_STRING_SIZE 4095
#define CAS_PROPERTIES_NON_CONST false
#define CAS_PROPERTIES_CONST true
/**
* @file utils_properties.h
* @brief CAS cache interface for collect and serialization CAS properties
*/
/**
* @brief Handler for instance of CAS properties
*/
struct cas_properties;
/**
* @brief Initialize instance of CAS properties
*
* @return Handler to instance of interface
*/
struct cas_properties *cas_properties_create(void);
/**
* @brief De-initialize instance of CAS properties
*
* @param props Handler to instance to de-initialize
*/
void cas_properties_destroy(struct cas_properties *props);
/**
* @brief Serialize given CAS properties instance to continuous buffer
*
* @param props instance of CAS properties
* @param idisk conf instance of CAS properties
* @return result of serialize CAS properties
*/
int cas_properties_serialize(struct cas_properties *props,
struct casdsk_props_conf *caches_serialized_conf);
/**
* @brief Parse of first entry given continuous buffer to get version of
* interface which been used to serialize
*
* @param buffer pointer to continuous buffer with serialized CAS properties
* @param version pointer to memory where we will put version
* @return result of getting version, 0 success
*/
int cas_properites_parse_version(struct casdsk_props_conf *caches_serialized_conf,
uint64_t *version);
/**
* @brief Parse of given continuous buffer to CAS properties instance
*
* @param buffer pointer to continuous buffer with serialized CAS properties
* @return handler to CAS properties instance
*/
struct cas_properties *
cas_properites_parse(struct casdsk_props_conf *caches_serialized_conf);
/**
* @brief Add unsigned integer to CAS properties instance
*
* @param props CAS properties instance to add variable
* @param key key paired with variable
* @param value value of variable
* @param private if true value cannot be updated
* @return result of adding 0 success
*/
int cas_properties_add_uint(struct cas_properties *props, const char *key,
uint64_t value, bool private);
/**
* @brief Add signed integer to CAS properties instance
*
* @param props CAS properties instance to add variable
* @param key key paired with variable
* @param value value of variable
* @param private if true value cannot be updated
* @return result of adding 0 success
*/
int cas_properties_add_sint(struct cas_properties *props, const char *key,
int64_t value, bool private);
/**
* @brief Add string to CAS properties instance
*
* @param props CAS properties instance to add variable
* @param key key paired with variable
* @param value value of variable
* @param private if true value cannot be updated
* @return result of adding 0 success
*/
int cas_properties_add_string(struct cas_properties *props, const char *key,
const char *value, bool private);
/**
* @brief Get unsigned integer to CAS properties instance
*
* @param props CAS properties instance to add variable
* @param key key paired with variable
* @param value pointer to memory where we will put value
* @return result of getting 0 success
*/
int cas_properties_get_uint(struct cas_properties *props, const char *key,
uint64_t *value);
/**
* @brief Get signed integer to CAS properties instance
*
* @param props CAS properties instance to add variable
* @param key key paired with variable
* @param value pointer to memory where we will put value
* @return result of getting 0 success
*/
int cas_properties_get_sint(struct cas_properties *props, const char *key,
int64_t *value);
/**
* @brief Get string integer to CAS properties instance
*
* @param props CAS properties instance to add variable
* @param key key paired with variable
* @param value pointer to memory where we will put value
* @param size size of destination memory
* @return result of getting 0 success, 1 error, 2 not enough space
* in destination
*/
int cas_properties_get_string(struct cas_properties *props, const char *key,
char *value, uint32_t size);
void cas_properties_print(struct cas_properties *props);
#endif /* UTILS_PROPERTIES_H_ */

View File

@@ -0,0 +1,262 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#include "ocf/ocf.h"
#include "utils_rpool.h"
#include "ocf_env.h"
#include "../cas_cache.h"
#define CAS_UTILS_RPOOL_DEBUG 0
#if 1 == CAS_UTILS_RPOOL_DEBUG
#define CAS_DEBUG_TRACE() \
printk(KERN_INFO "[Utils][RPOOL] %s\n", __func__)
#define CAS_DEBUG_MSG(msg) \
printk(KERN_INFO "[Utils][RPOOL] %s - %s\n", __func__, msg)
#define CAS_DEBUG_PARAM(format, ...) \
printk(KERN_INFO "[Utils][RPOOL] %s - "format"\n", \
__func__, ##__VA_ARGS__)
#else
#define CAS_DEBUG_TRACE()
#define CAS_DEBUG_MSG(msg)
#define CAS_DEBUG_PARAM(format, ...)
#endif
struct _cas_reserve_pool_per_cpu {
spinlock_t lock;
struct list_head list;
atomic_t count;
};
struct cas_reserve_pool {
uint32_t limit;
char *name;
uint32_t entry_size;
struct _cas_reserve_pool_per_cpu *rpools;
};
struct _cas_rpool_pre_alloc_info {
struct work_struct ws;
struct cas_reserve_pool *rpool_master;
cas_rpool_new rpool_new;
void *allocator_ctx;
struct completion cmpl;
int error;
};
#define RPOOL_ITEM_TO_ENTRY(rpool, item) \
(void *)((unsigned long)item + sizeof(struct list_head) \
- rpool->entry_size)
#define RPOOL_ENTRY_TO_ITEM(rpool, entry) \
(struct list_head *)((unsigned long)entry + rpool->entry_size \
- sizeof(struct list_head))
void _cas_rpool_pre_alloc_do(struct work_struct *ws)
{
struct _cas_rpool_pre_alloc_info *info =
container_of(ws, struct _cas_rpool_pre_alloc_info, ws);
struct cas_reserve_pool *rpool_master = info->rpool_master;
struct _cas_reserve_pool_per_cpu *current_rpool;
struct list_head *item;
void *entry;
int i, cpu;
CAS_DEBUG_TRACE();
cpu = smp_processor_id();
current_rpool = &rpool_master->rpools[cpu];
for (i = 0; i < rpool_master->limit; i++) {
entry = info->rpool_new(info->allocator_ctx, cpu);
if (!entry) {
info->error = -ENOMEM;
complete(&info->cmpl);
return;
}
item = RPOOL_ENTRY_TO_ITEM(rpool_master, entry);
list_add_tail(item, &current_rpool->list);
atomic_inc(&current_rpool->count);
}
CAS_DEBUG_PARAM("Added [%d] pre allocated items to reserve poll [%s]"
" for cpu %d", atomic_read(&current_rpool->count),
rpool_master->name, cpu);
complete(&info->cmpl);
}
int _cas_rpool_pre_alloc_schedule(int cpu,
struct _cas_rpool_pre_alloc_info *info)
{
init_completion(&info->cmpl);
INIT_WORK(&info->ws, _cas_rpool_pre_alloc_do);
schedule_work_on(cpu, &info->ws);
schedule();
wait_for_completion(&info->cmpl);
return info->error;
}
void cas_rpool_destroy(struct cas_reserve_pool *rpool_master,
cas_rpool_del rpool_del, void *allocator_ctx)
{
int i, cpu_no = num_online_cpus();
struct _cas_reserve_pool_per_cpu *current_rpool = NULL;
struct list_head *item = NULL, *next = NULL;
void *entry;
CAS_DEBUG_TRACE();
if (!rpool_master)
return;
if (!rpool_master->rpools) {
kfree(rpool_master);
return;
}
for (i = 0; i < cpu_no; i++) {
current_rpool = &rpool_master->rpools[i];
CAS_DEBUG_PARAM("Destroyed reserve poll [%s] for cpu %d",
rpool_master->name, i);
if (!atomic_read(&current_rpool->count))
continue;
list_for_each_safe(item, next, &current_rpool->list) {
entry = RPOOL_ITEM_TO_ENTRY(rpool_master, item);
list_del(item);
rpool_del(allocator_ctx, entry);
atomic_dec(&current_rpool->count);
}
if (atomic_read(&current_rpool->count)) {
printk(KERN_CRIT "Not all object from reserve poll"
"[%s] deallocated\n", rpool_master->name);
WARN(true, OCF_PREFIX_SHORT" Cleanup problem\n");
}
}
kfree(rpool_master->rpools);
kfree(rpool_master);
}
struct cas_reserve_pool *cas_rpool_create(uint32_t limit, char *name,
uint32_t entry_size, cas_rpool_new rpool_new,
cas_rpool_del rpool_del, void *allocator_ctx)
{
int i, cpu_no = num_online_cpus();
struct cas_reserve_pool *rpool_master = NULL;
struct _cas_reserve_pool_per_cpu *current_rpool = NULL;
struct _cas_rpool_pre_alloc_info info;
CAS_DEBUG_TRACE();
memset(&info, 0, sizeof(info));
rpool_master = kzalloc(sizeof(*rpool_master), GFP_KERNEL);
if (!rpool_master)
goto error;
rpool_master->rpools = kzalloc(sizeof(*rpool_master->rpools) * cpu_no,
GFP_KERNEL);
if (!rpool_master->rpools)
goto error;
rpool_master->limit = limit;
rpool_master->name = name;
rpool_master->entry_size = entry_size;
info.rpool_master = rpool_master;
info.rpool_new = rpool_new;
info.allocator_ctx = allocator_ctx;
for (i = 0; i < cpu_no; i++) {
current_rpool = &rpool_master->rpools[i];
spin_lock_init(&current_rpool->lock);
INIT_LIST_HEAD(&current_rpool->list);
if (_cas_rpool_pre_alloc_schedule(i, &info))
goto error;
CAS_DEBUG_PARAM("Created reserve poll [%s] for cpu %d",
rpool_master->name, i);
}
return rpool_master;
error:
cas_rpool_destroy(rpool_master, rpool_del, allocator_ctx);
return NULL;
}
#define LIST_FIRST_ITEM(head) head.next
void *cas_rpool_try_get(struct cas_reserve_pool *rpool_master, int *cpu)
{
unsigned long flags;
struct _cas_reserve_pool_per_cpu *current_rpool = NULL;
struct list_head *item = NULL;
void *entry = NULL;
CAS_DEBUG_TRACE();
*cpu = smp_processor_id();
current_rpool = &rpool_master->rpools[*cpu];
spin_lock_irqsave(&current_rpool->lock, flags);
if (!list_empty(&current_rpool->list)) {
item = LIST_FIRST_ITEM(current_rpool->list);
entry = RPOOL_ITEM_TO_ENTRY(rpool_master, item);
list_del(item);
atomic_dec(&current_rpool->count);
}
spin_unlock_irqrestore(&current_rpool->lock, flags);
CAS_DEBUG_PARAM("[%s]Removed item from reserve pool [%s] for cpu [%d], "
"items in pool %d", rpool_master->name,
item == NULL ? "SKIPPED" : "OK", *cpu,
atomic_read(&current_rpool->count));
return entry;
}
int cas_rpool_try_put(struct cas_reserve_pool *rpool_master, void *entry, int cpu)
{
int ret = 0;
unsigned long flags;
struct _cas_reserve_pool_per_cpu *current_rpool = NULL;
struct list_head *item;
CAS_DEBUG_TRACE();
current_rpool = &rpool_master->rpools[cpu];
spin_lock_irqsave(&current_rpool->lock, flags);
if (atomic_read(&current_rpool->count) >= rpool_master->limit) {
ret = 1;
goto error;
}
item = RPOOL_ENTRY_TO_ITEM(rpool_master, entry);
list_add_tail(item, &current_rpool->list);
atomic_inc(&current_rpool->count);
error:
CAS_DEBUG_PARAM("[%s]Added item to reserve pool [%s] for cpu [%d], "
"items in pool %d", rpool_master->name,
ret == 1 ? "SKIPPED" : "OK", cpu,
atomic_read(&current_rpool->count));
spin_unlock_irqrestore(&current_rpool->lock, flags);
return ret;
}

View File

@@ -0,0 +1,28 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __CAS_RPOOL_H__
#define __CAS_RPOOL_H__
#define CAS_RPOOL_MIN_SIZE_ITEM sizeof(struct list_head)
struct cas_reserve_pool;
typedef void (*cas_rpool_del)(void *allocator_ctx, void *item);
typedef void *(*cas_rpool_new)(void *allocator_ctx, int cpu);
struct cas_reserve_pool *cas_rpool_create(uint32_t limit, char *name,
uint32_t item_size, cas_rpool_new rpool_new,
cas_rpool_del rpool_del, void *allocator_ctx);
void cas_rpool_destroy(struct cas_reserve_pool *rpool,
cas_rpool_del rpool_del, void *allocator_ctx);
void *cas_rpool_try_get(struct cas_reserve_pool *rpool, int *cpu);
int cas_rpool_try_put(struct cas_reserve_pool *rpool, void *item, int cpu);
#endif /* __CAS_RPOOL_H__ */

View File

@@ -0,0 +1,53 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __OBJ_BLK_H__
#define __OBJ_BLK_H__
#include "vol_atomic_dev_bottom.h"
#include "vol_block_dev_bottom.h"
#include "vol_block_dev_top.h"
struct casdsk_disk;
struct bd_object {
struct casdsk_disk *dsk;
struct block_device *btm_bd;
/**
* This denotes state of volatile write cache of the device.
* This is set to true when:
* - opening the device
* - when writing to a device without FUA/FLUSH flags
* This is set to false when:
* - FLUSH request is completed on device.
* When it is false
* - FLUSH requests from upper layer are NOT passed to the device.
*/
atomic_t potentially_dirty;
uint32_t expobj_valid : 1;
/*!< Bit indicates that exported object was created */
uint32_t expobj_locked : 1;
/*!< Non zero value indicates data exported object is locked */
uint32_t opened_by_bdev : 1;
/*!< Opened by supplying bdev manually */
struct atomic_dev_params atomic_params;
atomic64_t pending_rqs;
/*!< This fields describes in flight IO requests */
struct workqueue_struct *workqueue;
/*< Workqueue for internally trigerred I/O */
};
static inline struct bd_object *bd_object(ocf_volume_t vol)
{
return ocf_volume_get_priv(vol);
}
#endif /* __OBJ_BLK_H__ */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,31 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __VOL_ATOMIC_DEV_BOTTOM_H__
#define __VOL_ATOMIC_DEV_BOTTOM_H__
#include "../cas_cache.h"
enum atomic_metadata_mode {
ATOMIC_METADATA_MODE_ELBA,
ATOMIC_METADATA_MODE_SEPBUF,
ATOMIC_METADATA_MODE_NONE,
};
struct atomic_dev_params {
unsigned int nsid;
uint64_t size;
enum atomic_metadata_mode metadata_mode;
unsigned is_mode_optimal : 1;
/* IMPORTANT: If this field is 0, the other fields are invalid! */
unsigned is_atomic_capable : 1;
};
int atomic_dev_init(void);
void atomic_dev_deinit(void);
#endif /* __VOL_ATOMIC_DEV_BOTTOM_H__ */

View File

@@ -0,0 +1,470 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#include "vol_blk_utils.h"
static void cas_io_iter_advanced(struct bio_vec_iter *iter, uint32_t bytes)
{
BUG_ON(bytes > iter->len);
iter->len -= bytes;
iter->offset += bytes;
if (iter->len) {
/* Still in this item, bytes to be processed */
return;
}
/* Move to next item in data vector */
iter->idx++;
if (iter->idx < iter->vec_size) {
iter->ivec = &iter->vec[iter->idx];
iter->len = iter->ivec->bv_len;
iter->offset = iter->ivec->bv_offset;
} else {
iter->ivec = NULL;
iter->len = 0;
iter->offset = 0;
}
}
uint32_t cas_io_iter_cpy(struct bio_vec_iter *dst, struct bio_vec_iter *src,
uint32_t bytes)
{
uint32_t to_copy, written = 0;
void *adst, *asrc;
if (dst->idx >= dst->vec_size)
return 0;
BUG_ON(dst->offset + dst->len > PAGE_SIZE);
if (src->idx >= src->vec_size)
return 0;
BUG_ON(src->offset + src->len > PAGE_SIZE);
while (bytes) {
to_copy = min(dst->len, src->len);
to_copy = min(to_copy, bytes);
if (to_copy == 0) {
/* No more bytes for coping */
break;
}
adst = page_address(dst->ivec->bv_page) + dst->offset;
asrc = page_address(src->ivec->bv_page) + src->offset;
memcpy(adst, asrc, to_copy);
bytes -= to_copy;
written += to_copy;
cas_io_iter_advanced(dst, to_copy);
cas_io_iter_advanced(src, to_copy);
}
return written;
}
uint32_t cas_io_iter_cpy_from_data(struct bio_vec_iter *dst,
const void *src, uint32_t bytes)
{
uint32_t to_copy, written = 0;
void *adst;
const void *asrc;
if (dst->idx >= dst->vec_size)
return 0;
BUG_ON(dst->offset + dst->len > PAGE_SIZE);
while (bytes) {
to_copy = min(dst->len, bytes);
if (to_copy == 0) {
/* No more bytes for coping */
break;
}
adst = page_address(dst->ivec->bv_page) + dst->offset;
asrc = src + written;
memcpy(adst, asrc, to_copy);
bytes -= to_copy;
written += to_copy;
cas_io_iter_advanced(dst, to_copy);
}
return written;
}
uint32_t cas_io_iter_cpy_to_data(void *dst, struct bio_vec_iter *src,
uint32_t bytes)
{
uint32_t to_copy, written = 0;
void *adst, *asrc;
BUG_ON(dst == NULL);
if (src->idx >= src->vec_size)
return 0;
BUG_ON(src->offset + src->len > PAGE_SIZE);
while (bytes) {
to_copy = min(bytes, src->len);
if (to_copy == 0) {
/* No more bytes for coping */
break;
}
adst = dst + written;
asrc = page_address(src->ivec->bv_page) + src->offset;
memcpy(adst, asrc, to_copy);
bytes -= to_copy;
written += to_copy;
cas_io_iter_advanced(src, to_copy);
}
return written;
}
uint32_t cas_io_iter_move(struct bio_vec_iter *iter, uint32_t bytes)
{
uint32_t to_move, moved = 0;
if (iter->idx >= iter->vec_size)
return 0;
BUG_ON(iter->offset + iter->len > PAGE_SIZE);
while (bytes) {
to_move = min(iter->len, bytes);
if (to_move == 0) {
/* No more bytes for coping */
break;
}
bytes -= to_move;
moved += to_move;
cas_io_iter_advanced(iter, to_move);
}
return moved;
}
uint32_t cas_io_iter_zero(struct bio_vec_iter *dst, uint32_t bytes)
{
uint32_t to_fill, zeroed = 0;
void *adst;
if (dst->idx >= dst->vec_size)
return 0;
BUG_ON(dst->offset + dst->len > PAGE_SIZE);
while (bytes) {
to_fill = min(dst->len, (typeof(dst->len))PAGE_SIZE);
if (to_fill == 0) {
/* No more bytes for coping */
break;
}
adst = page_address(dst->ivec->bv_page) + dst->offset;
memset(adst, 0, to_fill);
bytes -= to_fill;
zeroed += to_fill;
cas_io_iter_advanced(dst, to_fill);
}
return zeroed;
}
/*
*
*/
int cas_blk_io_set_data(struct ocf_io *io,
ctx_data_t *ctx_data, uint32_t offset)
{
struct blkio *blkio = cas_io_to_blkio(io);
struct blk_data *data = ctx_data;
/* Set BIO vector (IO data) and initialize iterator */
blkio->data = data;
if (blkio->data) {
cas_io_iter_init(&blkio->iter, blkio->data->vec,
blkio->data->size);
/* Move into specified offset in BIO vector iterator */
if (offset != cas_io_iter_move(&blkio->iter, offset)) {
/* TODO Log message */
blkio->error = -ENOBUFS;
return -ENOBUFS;
}
}
return 0;
}
/*
*
*/
ctx_data_t *cas_blk_io_get_data(struct ocf_io *io)
{
struct blkio *blkio = cas_io_to_blkio(io);
return blkio->data;
}
#if defined(CAS_NVME_PARTIAL)
#include "utils/utils_nvme.h"
int cas_blk_identify_type_by_bdev(struct block_device *bdev,
uint8_t *type, struct atomic_dev_params *atomic_params)
{
struct nvme_id_ns *ns;
unsigned int nsid, selected, ms, ds, pi, elba, sbsupp;
long long int ret = 0;
struct atomic_dev_params atomic_params_int = {0};
ns = kmalloc(sizeof(*ns), GFP_KERNEL);
if (!ns)
return -OCF_ERR_NO_MEM;
ret = cas_nvme_get_nsid(bdev, &nsid);
if (ret < 0) {
/*
* We cannot obtain NSID which means we are not dealing with
* NVMe device
*/
goto out1;
}
ret = cas_nvme_identify_ns(bdev, nsid, ns);
if (ret < 0) {
/*
* We cannot obtain ns structure which means we ARE dealing with
* NVMe device but can not recognize format so let's treat that
* device as block device
*/
goto out1;
}
selected = ns->flbas & 0xf;
ms = ns->lbaf[selected].ms;
ds = ns->lbaf[selected].ds;
pi = ns->dps & 0x7;
elba = !!(ns->flbas & (1<<4));
sbsupp = !!(ns->mc & (1<<1));
atomic_params_int.is_atomic_capable = 1;
atomic_params_int.nsid = nsid;
atomic_params_int.size = (ns->nsze << (ds - 9)) * SECTOR_SIZE;
if (pi != 0) {
/* We don't support formats which have
* enable Protection Information feature.
*/
ret = -KCAS_ERR_NVME_BAD_FORMAT;
goto out2;
}
switch (ms) {
case 0:
/* NVMe metadata features disabled, so we handle it as
* regular block device
*/
if (ds != 9 && ds != 12) {
ret = -KCAS_ERR_NVME_BAD_FORMAT;
goto out2;
}
*type = BLOCK_DEVICE_VOLUME;
atomic_params_int.metadata_mode = ATOMIC_METADATA_MODE_NONE;
#if !defined(CAS_NVME_FULL)
/*
* Only partial support user can't using
* device in atomic mode, so mode is optimal
*/
atomic_params_int.is_mode_optimal = 1;
break;
#else
if (bdev == bdev->bd_contains) {
/*
* Entire device - format isn't optimal
*/
atomic_params_int.is_mode_optimal = 0;
} else {
/*
* Partition - format is optimal, user can't using
* partitions in atomic mode
*/
atomic_params_int.is_mode_optimal = 1;
}
break;
case 8:
/* For atomic writes we support only metadata size 8B and
* data size 512B
*/
if (ds != 9) {
ret = -KCAS_ERR_NVME_BAD_FORMAT;
goto out2;
}
*type = ATOMIC_DEVICE_VOLUME;
atomic_params_int.metadata_mode = elba ?
ATOMIC_METADATA_MODE_ELBA :
ATOMIC_METADATA_MODE_SEPBUF;
atomic_params_int.is_mode_optimal = sbsupp ? !elba : 1;
break;
#endif
default:
ret = -KCAS_ERR_NVME_BAD_FORMAT;
}
if (atomic_params)
*atomic_params = atomic_params_int;
goto out2;
out1:
*type = BLOCK_DEVICE_VOLUME;
ret = 0;
out2:
kfree(ns);
return ret;
}
static inline int _cas_detect_blk_type(const char *path, uint8_t *type,
struct atomic_dev_params *atomic_params)
{
int ret;
struct block_device *bdev;
char holder[] = "CAS DETECT\n";
bdev = OPEN_BDEV_EXCLUSIVE(path, FMODE_READ, holder);
if (IS_ERR(bdev))
return -OCF_ERR_NOT_OPEN_EXC;
ret = cas_blk_identify_type_by_bdev(bdev, type, atomic_params);
CLOSE_BDEV_EXCLUSIVE(bdev, FMODE_READ);
return ret;
}
#else
static inline int _cas_detect_blk_type(const char *path, uint8_t *type,
struct atomic_dev_params *atomic_params)
{
/*
* NVMe is not supported with given kernel version, so we
* have no way to figure out what the current NVMe format
* is. In this situation we make a naive assumption that
* it's formatted to LBA size 512B, and try to treat it
* as regular block device.
*/
*type = BLOCK_DEVICE_VOLUME;
return 0;
}
int cas_blk_identify_type_by_bdev(struct block_device *bdev,
uint8_t *type, struct atomic_dev_params *atomic_params)
{
*type = BLOCK_DEVICE_VOLUME;
return 0;
}
#endif
int cas_blk_open_volume_by_bdev(ocf_volume_t *vol,
struct block_device *bdev)
{
struct atomic_dev_params atomic_params = {0};
struct bd_object *bdobj;
uint8_t type;
int ret;
ret = cas_blk_identify_type_by_bdev(bdev, &type, &atomic_params);
if (ret)
goto err;
ret = ocf_ctx_volume_create(cas_ctx, vol, NULL, type);
if (ret)
goto err;
bdobj = bd_object(*vol);
bdobj->btm_bd = bdev;
bdobj->opened_by_bdev = true;
ocf_volume_open(*vol);
return 0;
err:
return ret;
}
void cas_blk_close_volume(ocf_volume_t vol)
{
ocf_volume_close(vol);
ocf_volume_deinit(vol);
env_free(vol);
}
int _cas_blk_identify_type(const char *path, uint8_t *type,
struct atomic_dev_params *atomic_params)
{
struct file *file;
int result = 0;
file = filp_open(path, O_RDONLY, 0);
if (IS_ERR(file))
return -OCF_ERR_INVAL_VOLUME_TYPE;
if (S_ISBLK(FILE_INODE(file)->i_mode))
*type = BLOCK_DEVICE_VOLUME;
else if (S_ISCHR(FILE_INODE(file)->i_mode))
*type = NVME_CONTROLLER;
else
result = -OCF_ERR_INVAL_VOLUME_TYPE;
filp_close(file, 0);
if (result)
return result;
if (*type == BLOCK_DEVICE_VOLUME) {
result = _cas_detect_blk_type(path, type, atomic_params);
if (result < 0)
return result;
}
return 0;
}
int cas_blk_identify_type(const char *path, uint8_t *type)
{
return _cas_blk_identify_type(path, type, NULL);
}
int cas_blk_identify_type_atomic(const char *path, uint8_t *type,
struct atomic_dev_params *atomic_params)
{
return _cas_blk_identify_type(path, type, atomic_params);
}

View File

@@ -0,0 +1,148 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __VOL_BLK_UTILS_H__
#define __VOL_BLK_UTILS_H__
#include "obj_blk.h"
#include "context.h"
static inline bool cas_blk_is_flush_io(unsigned long flags)
{
if ((flags & OCF_WRITE_FLUSH) == OCF_WRITE_FLUSH)
return true;
if ((flags & OCF_WRITE_FLUSH_FUA) == OCF_WRITE_FLUSH_FUA)
return true;
return false;
}
struct blkio {
int error;
atomic_t rq_remaning;
atomic_t ref_counter;
int32_t dirty;
int32_t dir;
struct blk_data *data; /* IO data buffer */
/* BIO vector iterator for sending IO */
struct bio_vec_iter iter;
};
static inline struct blkio *cas_io_to_blkio(struct ocf_io *io)
{
return ocf_io_get_priv(io);
}
int cas_blk_io_set_data(struct ocf_io *io, ctx_data_t *data,
uint32_t offset);
ctx_data_t *cas_blk_io_get_data(struct ocf_io *io);
int cas_blk_identify_type_by_bdev(struct block_device *bdev,
uint8_t *type, struct atomic_dev_params *atomic_params);
int cas_blk_open_volume_by_bdev(ocf_volume_t *vol,
struct block_device *bdev);
void cas_blk_close_volume(ocf_volume_t vol);
int cas_blk_identify_type(const char *path, uint8_t *type);
int cas_blk_identify_type_atomic(const char *path, uint8_t *type,
struct atomic_dev_params *atomic_params);
static inline void cas_io_iter_init(struct bio_vec_iter *iter,
struct bio_vec *vec, uint32_t vec_size)
{
iter->vec = iter->ivec = vec;
iter->vec_size = vec_size;
iter->idx = 0;
iter->offset = vec->bv_offset;
iter->len = vec->bv_len;
}
static inline void cas_io_iter_set(struct bio_vec_iter *iter,
struct bio_vec *vec, uint32_t vec_size,
uint32_t idx, uint32_t offset, uint32_t len)
{
iter->vec = vec;
iter->vec_size = vec_size;
iter->idx = idx;
iter->offset = offset;
iter->len = len;
if (iter->idx < vec_size) {
iter->ivec = &vec[iter->idx];
} else {
iter->ivec = NULL;
WARN(1, "Setting offset out of BIO vector");
}
}
static inline void cas_io_iter_copy_set(struct bio_vec_iter *dst,
struct bio_vec_iter *src)
{
dst->vec = src->vec;
dst->vec_size = src->vec_size;
dst->idx = src->idx;
dst->offset = src->offset;
dst->len = src->len;
dst->ivec = src->ivec;
}
static inline bool cas_io_iter_is_next(struct bio_vec_iter *iter)
{
return iter->idx < iter->vec_size ? true : false;
/* TODO UNITTEST */
}
static inline uint32_t cas_io_iter_size_done(struct bio_vec_iter *iter)
{
return iter->idx;
/* TODO UNITTEST */
}
static inline uint32_t cas_io_iter_size_left(struct bio_vec_iter *iter)
{
if (iter->idx < iter->vec_size)
return iter->vec_size - iter->idx;
return 0;
/* TODO UNITTEST */
}
static inline uint32_t cas_io_iter_current_offset(struct bio_vec_iter *iter)
{
return iter->idx < iter->vec_size ? iter->offset : 0;
/* TODO UNITTEST */
}
static inline uint32_t cas_io_iter_current_length(struct bio_vec_iter *iter)
{
return iter->idx < iter->vec_size ? iter->len : 0;
/* TODO UNITTEST */
}
static inline struct page *cas_io_iter_current_page(struct bio_vec_iter *iter)
{
return iter->idx < iter->vec_size ? iter->ivec->bv_page : NULL;
/* TODO UNITTEST */
}
uint32_t cas_io_iter_cpy(struct bio_vec_iter *dst, struct bio_vec_iter *src,
uint32_t bytes);
uint32_t cas_io_iter_cpy_from_data(struct bio_vec_iter *dst,
const void *src, uint32_t bytes);
uint32_t cas_io_iter_cpy_to_data(void *dst, struct bio_vec_iter *src,
uint32_t bytes);
uint32_t cas_io_iter_move(struct bio_vec_iter *iter,
uint32_t bytes);
uint32_t cas_io_iter_zero(struct bio_vec_iter *iter, uint32_t bytes);
#endif /* __VOL_BLK_UTILS_H__ */

View File

@@ -0,0 +1,597 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#include "cas_cache.h"
#define CAS_DEBUG_IO 0
#if CAS_DEBUG_IO == 1
#define CAS_DEBUG_TRACE() printk(KERN_DEBUG \
"[IO] %s:%d\n", __func__, __LINE__)
#define CAS_DEBUG_MSG(msg) printk(KERN_DEBUG \
"[IO] %s:%d - %s\n", __func__, __LINE__, msg)
#define CAS_DEBUG_PARAM(format, ...) printk(KERN_DEBUG \
"[IO] %s:%d - "format"\n", __func__, __LINE__, ##__VA_ARGS__)
#else
#define CAS_DEBUG_TRACE()
#define CAS_DEBUG_MSG(msg)
#define CAS_DEBUG_PARAM(format, ...)
#endif
int block_dev_open_object(ocf_volume_t vol)
{
struct bd_object *bdobj = bd_object(vol);
const struct ocf_volume_uuid *uuid = ocf_volume_get_uuid(vol);
struct casdsk_disk *dsk;
if (bdobj->opened_by_bdev) {
/* Bdev has beed set manually, so there is nothing to do. */
return 0;
}
if (unlikely(true == cas_upgrade_is_in_upgrade())) {
dsk = casdisk_functions.casdsk_disk_claim(uuid->data, NULL);
casdisk_functions.casdsk_disk_set_attached(dsk);
} else {
dsk = casdisk_functions.casdsk_disk_open(uuid->data, NULL);
}
if (IS_ERR_OR_NULL(dsk)) {
int error = PTR_ERR(dsk) ?: -EINVAL;
if (error == -EBUSY)
error = -OCF_ERR_NOT_OPEN_EXC;
return error;
}
bdobj->dsk = dsk;
bdobj->btm_bd = casdisk_functions.casdsk_disk_get_blkdev(dsk);
return 0;
}
void block_dev_close_object(ocf_volume_t vol)
{
struct bd_object *bdobj = bd_object(vol);
if (bdobj->opened_by_bdev)
return;
if (likely(!cas_upgrade_is_in_upgrade())) {
casdisk_functions.casdsk_disk_close(bdobj->dsk);
} else {
casdisk_functions.casdsk_disk_set_pt(bdobj->dsk);
casdisk_functions.casdsk_disk_dettach(bdobj->dsk);
}
}
unsigned int block_dev_get_max_io_size(ocf_volume_t vol)
{
struct bd_object *bdobj = bd_object(vol);
struct block_device *bd = bdobj->btm_bd;
return queue_max_sectors(bd->bd_disk->queue) << SECTOR_SHIFT;
}
uint64_t block_dev_get_byte_length(ocf_volume_t vol)
{
struct bd_object *bdobj = bd_object(vol);
struct block_device *bd = bdobj->btm_bd;
uint64_t sector_length;
sector_length = (bd->bd_contains == bd) ?
get_capacity(bd->bd_disk) :
bd->bd_part->nr_sects;
return sector_length << SECTOR_SHIFT;
}
#if LINUX_VERSION_CODE <= KERNEL_VERSION(3, 3, 0)
static char *__block_dev_get_elevator_name(struct request_queue *q)
{
if (q->elevator->elevator_type == NULL)
return NULL;
if (q->elevator->elevator_type->elevator_name == NULL)
return NULL;
if (q->elevator->elevator_type->elevator_name[0] == 0)
return NULL;
return q->elevator->elevator_type->elevator_name;
}
#else
static char *__block_dev_get_elevator_name(struct request_queue *q)
{
if (q->elevator->type == NULL)
return NULL;
if (q->elevator->type->elevator_name == NULL)
return NULL;
if (q->elevator->type->elevator_name[0] == 0)
return NULL;
return q->elevator->type->elevator_name;
}
#endif
/*
*
*/
const char *block_dev_get_elevator_name(struct request_queue *q)
{
if (!q)
return NULL;
if (q->elevator == NULL)
return NULL;
return __block_dev_get_elevator_name(q);
}
/*
*
*/
int block_dev_is_metadata_mode_optimal(struct atomic_dev_params *atomic_params,
uint8_t type)
{
if (type == BLOCK_DEVICE_VOLUME) {
if (atomic_params->is_atomic_capable)
return atomic_params->is_mode_optimal;
} else if (type == ATOMIC_DEVICE_VOLUME) {
return atomic_params->is_mode_optimal;
}
return 1;
}
/*
*
*/
static inline struct bio *cas_bd_io_alloc_bio(struct blkio *bdio)
{
struct bio *bio
= bio_alloc(GFP_NOIO, cas_io_iter_size_left(&bdio->iter));
if (bio)
return bio;
if (cas_io_iter_size_left(&bdio->iter) < MAX_LINES_PER_IO) {
/* BIO vector was small, so it was memory
* common problem - NO RAM!!!
*/
return NULL;
}
/* Retry with smaller */
return bio_alloc(GFP_NOIO, MAX_LINES_PER_IO);
}
/*
*
*/
static void cas_bd_io_end(struct ocf_io *io, int error)
{
struct blkio *bdio = cas_io_to_blkio(io);
if (error)
bdio->error |= error;
if (atomic_dec_return(&bdio->rq_remaning))
return;
CAS_DEBUG_MSG("Completion");
/* Send completion to caller */
io->end(io, bdio->error);
/* Free allocated structures */
ocf_io_put(io);
}
/*
*
*/
DECLARE_BLOCK_CALLBACK(cas_bd_io_end, struct bio *bio,
unsigned int bytes_done, int error)
{
struct ocf_io *io;
struct blkio *bdio;
struct bd_object *bdobj;
int err;
BUG_ON(!bio);
BUG_ON(!bio->bi_private);
BLOCK_CALLBACK_INIT(bio);
io = bio->bi_private;
bdobj = bd_object(io->volume);
BUG_ON(!bdobj);
err = BLOCK_CALLBACK_ERROR(bio, error);
bdio = cas_io_to_blkio(io);
BUG_ON(!bdio);
CAS_DEBUG_TRACE();
if (err)
goto out;
if (bdio->dir == OCF_WRITE) {
/* IO was a write */
if (!cas_blk_is_flush_io(io->flags)) {
/* Device cache is dirty, mark it */
atomic_inc(&bdobj->potentially_dirty);
} else {
/* IO flush finished, update potential
* dirty state
*/
atomic_sub(bdio->dirty, &bdobj->potentially_dirty);
}
}
out:
if (err == -EOPNOTSUPP && (BIO_OP_FLAGS(bio) & CAS_BIO_DISCARD))
err = 0;
cas_bd_io_end(io, err);
bio_put(bio);
BLOCK_CALLBACK_RETURN();
}
static void block_dev_submit_flush(struct ocf_io *io)
{
#ifdef CAS_FLUSH_SUPPORTED
struct blkio *blkio = cas_io_to_blkio(io);
struct bd_object *bdobj = bd_object(io->volume);
struct block_device *bdev = bdobj->btm_bd;
struct request_queue *q = bdev_get_queue(bdev);
struct bio *bio = NULL;
blkio->dirty = atomic_read(&bdobj->potentially_dirty);
/* Prevent races of completing IO */
atomic_set(&blkio->rq_remaning, 1);
/* Increase IO reference counter for FLUSH IO */
ocf_io_get(io);
if (!blkio->dirty) {
/* Didn't write anything to underlying disk; no need to
* send req_flush
*/
goto out;
}
if (q == NULL) {
/* No queue, error */
blkio->error = -EINVAL;
goto out;
}
if (!CHECK_QUEUE_FLUSH(q)) {
/* This block device does not support flush, call back */
atomic_sub(blkio->dirty, &bdobj->potentially_dirty);
goto out;
}
bio = bio_alloc(GFP_NOIO, 0);
if (bio == NULL) {
CAS_PRINT_RL(KERN_ERR "Couldn't allocate memory for BIO\n");
blkio->error = -ENOMEM;
goto out;
}
blkio->dir = io->dir;
bio->bi_end_io = REFER_BLOCK_CALLBACK(cas_bd_io_end);
CAS_BIO_SET_DEV(bio, bdev);
bio->bi_private = io;
atomic_inc(&blkio->rq_remaning);
cas_submit_bio(OCF_WRITE_FLUSH, bio);
out:
cas_bd_io_end(io, blkio->error);
#else
/* Running operating system without support for REQ_FLUSH
* (i.e. SLES 11 SP 1) CAS cannot use flushing requests to
* handle power-fail safe Write-Back
*/
io->end(io, -ENOTSUPP);
/* on SLES 11 SP 1 powerfail safety can only be achieved
* through disabling volatile write cache of disk itself.
*/
#endif
}
void block_dev_submit_discard(struct ocf_io *io)
{
struct blkio *blkio = cas_io_to_blkio(io);
struct bd_object *bdobj = bd_object(io->volume);
struct block_device *bd = bdobj->btm_bd;
struct request_queue *q = bdev_get_queue(bd);
struct bio *bio = NULL;
unsigned int max_discard_sectors, granularity, bio_sects;
int alignment;
sector_t sects, start, end, tmp;
/* Prevent races of completing IO */
atomic_set(&blkio->rq_remaning, 1);
/* Increase IO reference counter for FLUSH IO */
ocf_io_get(io);
if (!q) {
/* No queue, error */
blkio->error = -ENXIO;
goto out;
}
if (!blk_queue_discard(q)) {
/* Discard is not supported by bottom device, send completion
* to caller
*/
goto out;
}
granularity = max(q->limits.discard_granularity >> SECTOR_SHIFT, 1U);
alignment = (bdev_discard_alignment(bd) >> SECTOR_SHIFT) % granularity;
max_discard_sectors =
min(q->limits.max_discard_sectors, UINT_MAX >> SECTOR_SHIFT);
max_discard_sectors -= max_discard_sectors % granularity;
if (unlikely(!max_discard_sectors))
goto out;
sects = io->bytes >> SECTOR_SHIFT;
start = io->addr >> SECTOR_SHIFT;
while (sects) {
bio = bio_alloc(GFP_NOIO, 1);
if (!bio) {
CAS_PRINT_RL(CAS_KERN_ERR "Couldn't allocate memory for BIO\n");
blkio->error = -ENOMEM;
break;
}
bio_sects = min_t(sector_t, sects, max_discard_sectors);
end = start + bio_sects;
tmp = end;
if (bio_sects < sects &&
sector_div(tmp, granularity) != alignment) {
end = end - alignment;
sector_div(end, granularity);
end = end * granularity + alignment;
bio_sects = end - start;
}
CAS_BIO_SET_DEV(bio, bd);
BIO_BISECTOR(bio) = start;
BIO_BISIZE(bio) = bio_sects << SECTOR_SHIFT;
bio->bi_next = NULL;
bio->bi_private = io;
bio->bi_end_io = REFER_BLOCK_CALLBACK(cas_bd_io_end);
atomic_inc(&blkio->rq_remaning);
cas_submit_bio(CAS_BIO_DISCARD, bio);
sects -= bio_sects;
start = end;
cond_resched();
}
out:
cas_bd_io_end(io, blkio->error);
}
static inline bool cas_bd_io_prepare(int *dir, struct ocf_io *io)
{
struct blkio *bdio = cas_io_to_blkio(io);
struct bd_object *bdobj = bd_object(io->volume);
/* Setup DIR */
bdio->dir = *dir;
/* Save dirty counter */
bdio->dirty = atomic_read(&bdobj->potentially_dirty);
/* Convert CAS direction into kernel values */
switch (bdio->dir) {
case OCF_READ:
*dir = READ;
break;
case OCF_WRITE:
*dir = WRITE;
break;
default:
bdio->error = -EINVAL;
break;
}
if (!io->bytes) {
/* Don not accept empty request */
CAS_PRINT_RL(KERN_ERR "Invalid zero size IO\n");
bdio->error = -EINVAL;
}
if (bdio->error)
return false;
return true;
}
/*
*
*/
static void block_dev_submit_io(struct ocf_io *io)
{
struct blkio *bdio = cas_io_to_blkio(io);
struct bd_object *bdobj = bd_object(io->volume);
struct bio_vec_iter *iter = &bdio->iter;
uint64_t addr = io->addr;
uint32_t bytes = io->bytes;
int dir = io->dir;
if (!CAS_IS_WRITE_FLUSH_FUA(io->flags) &&
CAS_IS_WRITE_FLUSH(io->flags)) {
CAS_DEBUG_MSG("Flush request");
/* It is flush requests handle it */
block_dev_submit_flush(io);
return;
}
CAS_DEBUG_PARAM("Address = %llu, bytes = %u\n", bdio->addr,
bdio->bytes);
/* Increase IO reference */
ocf_io_get(io);
/* Prevent races of completing IO */
atomic_set(&bdio->rq_remaning, 1);
if (!cas_bd_io_prepare(&dir, io)) {
CAS_DEBUG_MSG("Invalid request");
cas_bd_io_end(io, -EINVAL);
return;
}
while (cas_io_iter_is_next(iter) && bytes) {
/* Still IO vectors to be sent */
/* Allocate BIO */
struct bio *bio = cas_bd_io_alloc_bio(bdio);
if (!bio) {
bdio->error = -ENOMEM;
break;
}
/* Setup BIO */
CAS_BIO_SET_DEV(bio, bdobj->btm_bd);
BIO_BISECTOR(bio) = addr / SECTOR_SIZE;
bio->bi_next = NULL;
bio->bi_private = io;
BIO_OP_FLAGS(bio) |= io->flags;
BIO_SET_RW_FLAGS(bio);
bio->bi_end_io = REFER_BLOCK_CALLBACK(cas_bd_io_end);
/* Add pages */
while (cas_io_iter_is_next(iter) && bytes) {
struct page *page = cas_io_iter_current_page(iter);
uint32_t offset = cas_io_iter_current_offset(iter);
uint32_t length = cas_io_iter_current_length(iter);
int added;
if (length > bytes)
length = bytes;
added = bio_add_page(bio, page, length, offset);
BUG_ON(added < 0);
if (added == 0) {
/* No more space in BIO, stop adding pages */
break;
}
/* Update address, bytes sent */
bytes -= added;
addr += added;
/* Update BIO vector iterator */
if (added != cas_io_iter_move(iter, added)) {
bdio->error = -ENOBUFS;
break;
}
}
if (bdio->error == 0) {
/* Increase IO reference for sending this IO */
atomic_inc(&bdio->rq_remaning);
/* Send BIO */
CAS_DEBUG_MSG("Submit IO");
cas_submit_bio(dir, bio);
bio = NULL;
} else {
if (bio) {
bio_put(bio);
bio = NULL;
}
/* ERROR, stop processed */
break;
}
}
if (bytes && bdio->error == 0) {
/* Not all bytes sent, mark error */
bdio->error = -ENOBUFS;
}
/* Prevent races of completing IO when
* there are still child IOs not being send.
*/
cas_bd_io_end(io, 0);
}
const struct ocf_volume_properties cas_object_blk_properties = {
.name = "Block Device",
.io_priv_size = sizeof(struct blkio),
.volume_priv_size = sizeof(struct bd_object),
.caps = {
.atomic_writes = 0, /* Atomic writes not supported */
},
.ops = {
.submit_io = block_dev_submit_io,
.submit_flush = block_dev_submit_flush,
.submit_metadata = NULL,
.submit_discard = block_dev_submit_discard,
.open = block_dev_open_object,
.close = block_dev_close_object,
.get_max_io_size = block_dev_get_max_io_size,
.get_length = block_dev_get_byte_length,
},
.io_ops = {
.set_data = cas_blk_io_set_data,
.get_data = cas_blk_io_get_data,
},
};
int block_dev_init(void)
{
int ret;
ret = ocf_ctx_register_volume_type(cas_ctx, BLOCK_DEVICE_VOLUME,
&cas_object_blk_properties);
if (ret < 0)
return ret;
return 0;
}
void block_dev_deinit(void)
{
ocf_ctx_unregister_volume_type(cas_ctx, BLOCK_DEVICE_VOLUME);
}
int block_dev_try_get_io_class(struct bio *bio, int *io_class)
{
struct ocf_io *io;
if (bio->bi_end_io != REFER_BLOCK_CALLBACK(cas_bd_io_end))
return -1;
io = bio->bi_private;
*io_class = io->io_class;
return 0;
}

View File

@@ -0,0 +1,26 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __VOL_BLOCK_DEV_BOTTOM_H__
#define __VOL_BLOCK_DEV_BOTTOM_H__
#include "../cas_cache.h"
int block_dev_open_object(ocf_volume_t vol);
void block_dev_close_object(ocf_volume_t vol);
const char *block_dev_get_elevator_name(struct request_queue *q);
int block_dev_is_metadata_mode_optimal(struct atomic_dev_params *atomic_params,
uint8_t type);
int block_dev_try_get_io_class(struct bio *bio, int *io_class);
int block_dev_init(void);
void block_dev_deinit(void);
#endif /* __VOL_BLOCK_DEV_BOTTOM_H__ */

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,17 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __VOL_BLOCK_DEV_TOP_H__
#define __VOL_BLOCK_DEV_TOP_H__
int block_dev_activate_exported_object(ocf_core_t core);
int block_dev_create_exported_object(ocf_core_t core);
int block_dev_destroy_exported_object(ocf_core_t core);
int block_dev_destroy_all_exported_objects(ocf_cache_t cache);
#endif /* __VOL_BLOCK_DEV_TOP_H__ */

View File

@@ -0,0 +1,2 @@
/home/robert/work/cas/ICAS_Linux/modules/cas_cache/volume/vol_block_dev_top.o-.text-9bd
/home/robert/work/cas/ICAS_Linux/modules/cas_cache/volume/vol_block_dev_top.o-.text-9c4