Initial commit

Signed-off-by: Robert Baldyga <robert.baldyga@intel.com>
This commit is contained in:
Robert Baldyga
2019-03-29 08:39:34 +01:00
commit 94e8ca09e0
140 changed files with 37144 additions and 0 deletions

51
modules/CAS_VERSION_GEN Executable file
View File

@@ -0,0 +1,51 @@
#!/bin/bash
#
# Copyright(c) 2012-2019 Intel Corporation
# SPDX-License-Identifier: BSD-3-Clause-Clear
#
VER_FILE=CAS_VERSION
which git > /dev/null 2>&1
if [ $? -eq 0 ] && [ -e ../../../.git ]; then
echo "Generating ${VER_FILE} from git revision."
echo ""
VERSION=`git describe HEAD 2>/dev/null`
CAS_VERSION_MAIN=`echo ${VERSION} | cut -d '.' -f 1 | awk '{print substr($0, 2)}'`
CAS_VERSION_MAJOR=`echo ${VERSION} | cut -d '.' -f 2 | awk '{print substr($0, 2)}'`
CAS_VERSION_MINOR=`echo ${VERSION} | cut -d '.' -f 3 | awk '{print substr($0, 2)}'`
CAS_BUILD_NO=`echo ${VERSION} | cut -d '.' -f 4 | cut -d '-' -f 1`
CAS_BUILD_FLAG=`echo ${VERSION} | cut -d '.' -f 4 | cut -s -d '-' -f 3`
rm -f ${VER_FILE}
touch ${VER_FILE}
echo "CAS_VERSION_MAIN=${CAS_VERSION_MAIN}" >> ${VER_FILE}
echo "CAS_VERSION_MAJOR=${CAS_VERSION_MAJOR}" >> ${VER_FILE}
echo "CAS_VERSION_MINOR=${CAS_VERSION_MINOR}" >> ${VER_FILE}
echo "CAS_BUILD_NO=${CAS_BUILD_NO}" >> ${VER_FILE}
echo "CAS_BUILD_FLAG=${CAS_BUILD_FLAG}" >> ${VER_FILE}
elif [ -f ${VER_FILE} ]; then
echo "Using existing ${VER_FILE} version file."
echo ""
else
echo "No ${VER_FILE} found. Preparing default version file."
echo ""
CAS_VERSION_MAIN=19
CAS_VERSION_MAJOR=3
CAS_VERSION_MINOR=0
CAS_BUILD_NO=0000`date +%m%d`
CAS_BUILD_FLAG=
touch ${VER_FILE}
echo "CAS_VERSION_MAIN=${CAS_VERSION_MAIN}" >> ${VER_FILE}
echo "CAS_VERSION_MAJOR=${CAS_VERSION_MAJOR}" >> ${VER_FILE}
echo "CAS_VERSION_MINOR=${CAS_VERSION_MINOR}" >> ${VER_FILE}
echo "CAS_BUILD_NO=${CAS_BUILD_NO}" >> ${VER_FILE}
echo "CAS_BUILD_FLAG=${CAS_BUILD_FLAG}" >> ${VER_FILE}
fi
cat ${VER_FILE}

76
modules/Makefile Normal file
View File

@@ -0,0 +1,76 @@
#
# Copyright(c) 2012-2019 Intel Corporation
# SPDX-License-Identifier: BSD-3-Clause-Clear
#
# If KERNELRELEASE is defined, we've been invoked from the
# kernel build system and can use its language.
ifneq ($(KERNELRELEASE),)
include $(M)/config.mk
obj-y += cas_cache/
obj-y += cas_disk/
# Otherwise we were called directly from the command
# line; invoke the kernel build system.
else
VERSION_FILE=$(PWD)/CAS_VERSION
OCFDIR=$(PWD)/../ocf
KERNEL_DIR ?= "/lib/modules/$(shell uname -r)/build"
PWD=$(shell pwd)
KERNEL_VERSION := $(shell uname -r)
MODULES_DIR=/lib/modules/$(KERNEL_VERSION)/extra
DISK_MODULE = cas_disk
CACHE_MODULE = cas_cache
DEPMOD:=$(shell which depmod)
RMMOD :=$(shell which rmmod)
MODPROBE:=$(shell which modprobe)
all: default
$(VERSION_FILE):
./CAS_VERSION_GEN
# Extra targets and file configuration
ifneq ($(wildcard $(PWD)/extra.mk),)
include $(PWD)/extra.mk
else
sync distsync:
endif
default: $(VERSION_FILE) sync
cd $(KERNEL_DIR) && $(MAKE) M=$(PWD) modules
clean:
cd $(KERNEL_DIR) && make M=$(PWD) clean
distclean: clean distsync
install:
@echo "Installing Open-CAS modules"
@install -m 755 -d $(MODULES_DIR)
@install -m 744 cas_disk/$(DISK_MODULE).ko $(MODULES_DIR)/$(DISK_MODULE).ko
@install -m 744 cas_cache/$(CACHE_MODULE).ko $(MODULES_DIR)/$(CACHE_MODULE).ko
@$(DEPMOD)
@$(MODPROBE) $(CACHE_MODULE)
uninstall:
@echo "Uninstalling Open-CAS modules"
@$(RMMOD) $(CACHE_MODULE)
@$(RMMOD) $(DISK_MODULE)
@rm $(MODULES_DIR)/$(CACHE_MODULE).ko
@rm $(MODULES_DIR)/$(DISK_MODULE).ko
@$(DEPMOD)
reinstall: uninstall install
.PHONY: all default clean distclean sync distsync install uninstall
endif

17
modules/README Normal file
View File

@@ -0,0 +1,17 @@
Open CAS accelerates Linux applications by caching active (hot) data to
a local flash device inside servers. Open CAS implements caching at the
server level, utilizing local high-performance flash media as the cache drive
media inside the application server as close as possible to the CPU, thus
reducing storage latency as much as possible.
The Open Cache Acceleration Software installs into the GNU/Linux operating
system itself, as a kernel module. The nature of the integration provides a
cache solution that is transparent to users and applications, and your
existing storage infrastructure. No storage migration effort or application
changes are required.
Open CAS is distributed on Dual BSD-2-Clause-Patent/GPLv2 license (see
https://opensource.org/licenses/BSDplusPatent and
https://opensource.org/licenses/GPL-2.0 for for full license texts).
Open CAS uses Safe string library (safeclib) that is MIT licensed.

3
modules/cas_cache/.gitignore vendored Normal file
View File

@@ -0,0 +1,3 @@
include/
src/

View File

@@ -0,0 +1,10 @@
#
# Copyright(c) 2012-2019 Intel Corporation
# SPDX-License-Identifier: BSD-3-Clause-Clear
#
include $(M)/config.mk
obj-m := cas_cache.o
cas_cache-c = $(shell find $(M)/cas_cache -name \*.c)
cas_cache-objs = $(patsubst $(M)/cas_cache/%.c,%.o,$(cas_cache-c))

View File

@@ -0,0 +1,97 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __CAS_CACHE_H__
#define __CAS_CACHE_H__
#include "ocf/ocf.h"
#include "ocf_env.h"
#include <cas_version.h>
#include <cas_ioctl_codes.h>
#include "linux_kernel_version.h"
#include "layer_upgrade.h"
#include "control.h"
#include "layer_cache_management.h"
#include "service_ui_ioctl.h"
#include "utils/cas_cache_utils.h"
#include "volume/vol_blk_utils.h"
#include "classifier.h"
#include "context.h"
#include <linux/kallsyms.h>
#define CAS_KERN_EMERG KERN_EMERG OCF_PREFIX_SHORT
#define CAS_KERN_ALERT KERN_ALERT OCF_PREFIX_SHORT
#define CAS_KERN_CRIT KERN_CRIT OCF_PREFIX_SHORT
#define CAS_KERN_ERR KERN_ERR OCF_PREFIX_SHORT
#define CAS_KERN_WARNING KERN_WARNING OCF_PREFIX_SHORT
#define CAS_KERN_NOTICE KERN_NOTICE OCF_PREFIX_SHORT
#define CAS_KERN_INFO KERN_INFO OCF_PREFIX_SHORT
#define CAS_KERN_DEBUG KERN_DEBUG OCF_PREFIX_SHORT
#ifndef SECTOR_SHIFT
#define SECTOR_SHIFT 9
#endif
#ifndef SECTOR_SIZE
#define SECTOR_SIZE (1<<SECTOR_SHIFT)
#endif
#define MAX_LINES_PER_IO 16
/**
* cache/core object types */
enum {
BLOCK_DEVICE_VOLUME = 1, /**< block device volume */
ATOMIC_DEVICE_VOLUME, /**< block device volume with atomic
metadata support */
/** \cond SKIP_IN_DOC */
OBJECT_TYPE_MAX,
NVME_CONTROLLER
/** \endcond */
};
struct cas_classifier;
struct cache_priv {
struct cas_classifier *classifier;
ocf_queue_t mngt_queue;
ocf_queue_t io_queues[];
};
extern ocf_ctx_t cas_ctx;
extern struct casdsk_functions_mapper casdisk_functions;
struct casdsk_functions_mapper {
int (*casdsk_disk_dettach)(struct casdsk_disk *dsk);
int (*casdsk_exp_obj_destroy)(struct casdsk_disk *dsk);
int (*casdsk_exp_obj_create)(struct casdsk_disk *dsk, const char *dev_name,
struct module *owner, struct casdsk_exp_obj_ops *ops);
struct request_queue *(*casdsk_disk_get_queue)(struct casdsk_disk *dsk);
void (*casdsk_store_config)(size_t n_blobs, struct casdsk_props_conf *blobs);
struct block_device *(*casdsk_disk_get_blkdev)(struct casdsk_disk *dsk);
struct request_queue *(*casdsk_exp_obj_get_queue)(struct casdsk_disk *dsk);
uint32_t (*casdsk_get_version)(void);
void (*casdsk_disk_close)(struct casdsk_disk *dsk);
struct casdsk_disk *(*casdsk_disk_claim)(const char *path, void *private);
int (*casdsk_exp_obj_unlock)(struct casdsk_disk *dsk);
int (*casdsk_disk_set_pt)(struct casdsk_disk *dsk);
size_t (*casdsk_get_stored_config)(struct casdsk_props_conf **blobs);
struct gendisk *(*casdsk_disk_get_gendisk)(struct casdsk_disk *dsk);
int (*casdsk_disk_attach) (struct casdsk_disk *dsk, struct module *owner,
struct casdsk_exp_obj_ops *ops);
int (*casdsk_disk_set_attached)(struct casdsk_disk *dsk);
int (*casdsk_exp_obj_activate)(struct casdsk_disk *dsk);
bool (*casdsk_exp_obj_activated)(struct casdsk_disk *ds);
int (*casdsk_exp_obj_lock)(struct casdsk_disk *dsk);
void (*casdsk_free_stored_config)(void);
struct casdsk_disk *(*casdsk_disk_open)(const char *path, void *private);
int (*casdsk_disk_clear_pt)(struct casdsk_disk *dsk);
struct gendisk *(*casdsk_exp_obj_get_gendisk)(struct casdsk_disk *dsk);
};
#endif

View File

@@ -0,0 +1,967 @@
/*
* Copyright(c) 2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#include "cas_cache.h"
#include "linux_kernel_version.h"
#include "classifier.h"
#include "classifier_defs.h"
#include <linux/namei.h>
/* Kernel log prefix */
#define CAS_CLS_LOG_PREFIX OCF_PREFIX_SHORT"[Classifier]"
/* Production version logs */
#define CAS_CLS_MSG(severity, format, ...) \
printk(severity CAS_CLS_LOG_PREFIX " " format, ##__VA_ARGS__);
/* Set to 1 to enable debug logs */
#define CAS_CLASSIFIER_CLS_DEBUG 0
#if 1 == CAS_CLASSIFIER_CLS_DEBUG
/* Debug log */
#define CAS_CLS_DEBUG_MSG(format, ...) \
CAS_CLS_MSG(KERN_INFO, format, ##__VA_ARGS__)
/* Trace log */
#define CAS_CLS_DEBUG_TRACE(format, ...) \
trace_printk(format, ##__VA_ARGS__)
#else
#define CAS_CLS_DEBUG_MSG(format, ...)
#define CAS_CLS_DEBUG_TRACE(format, ...)
#endif
/* Done condition test - always accepts and stops evaluation */
static cas_cls_eval_t _cas_cls_done_test(struct cas_classifier *cls,
struct cas_cls_condition *c, struct cas_cls_io *io,
ocf_part_id_t part_id)
{
cas_cls_eval_t ret = {.yes = 1, .stop = 1};
return ret;
}
/* Metadata condition test */
static cas_cls_eval_t _cas_cls_metadata_test(struct cas_classifier *cls,
struct cas_cls_condition *c, struct cas_cls_io *io,
ocf_part_id_t part_id)
{
if (!io->page)
return cas_cls_eval_no;
if (PageAnon(io->page))
return cas_cls_eval_no;
if (PageSlab(io->page) || PageCompound(io->page)) {
/* A filesystem issues IO on pages that does not belongs
* to the file page cache. It means that it is a
* part of metadata
*/
return cas_cls_eval_yes;
}
if (!io->page->mapping) {
/* XFS case, page are allocated internally and do not
* have references into inode
*/
return cas_cls_eval_yes;
}
if (!io->inode)
return cas_cls_eval_no;
if (S_ISBLK(io->inode->i_mode)) {
/* EXT3 and EXT4 case. Metadata IO is performed into pages
* of block device cache
*/
return cas_cls_eval_yes;
}
if (S_ISDIR(io->inode->i_mode)) {
return cas_cls_eval_yes;
}
return cas_cls_eval_no;
}
/* Direct I/O condition test function */
static cas_cls_eval_t _cas_cls_direct_test(struct cas_classifier *cls,
struct cas_cls_condition *c, struct cas_cls_io *io,
ocf_part_id_t part_id)
{
if (!io->page)
return cas_cls_eval_no;
if (PageAnon(io->page))
return cas_cls_eval_yes;
return cas_cls_eval_no;
}
/* Generic condition constructor for conditions without operands (e.g. direct,
* metadata) */
static int _cas_cls_generic_ctr(struct cas_classifier *cls,
struct cas_cls_condition *c, char *data)
{
if (data) {
CAS_CLS_MSG(KERN_ERR, "Unexpected operand in condition\n");
return -EINVAL;
}
return 0;
}
/* Generic condition destructor */
static void _cas_cls_generic_dtr(struct cas_classifier *cls,
struct cas_cls_condition *c)
{
if (c->context)
kfree(c->context);
c->context = NULL;
}
/* Numeric condition constructor. @data is expected to contain either
* plain number string or range specifier (e.g. "gt:4096"). */
static int _cas_cls_numeric_ctr(struct cas_classifier* cls,
struct cas_cls_condition *c, char *data)
{
struct cas_cls_numeric *ctx;
int result;
char *ptr;
if (!data || strlen(data) == 0) {
CAS_CLS_MSG(KERN_ERR, "Missing numeric condition operand\n");
return -EINVAL;
}
ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx)
return -ENOMEM;
ctx->operator = cas_cls_numeric_eq;
ptr = strpbrk(data, ":");
if (ptr) {
/* Terminate sub-string containing arithmetic operator */
*ptr = '\0';
++ptr;
if (!strcmp(data, "eq")) {
ctx->operator = cas_cls_numeric_eq;
} else if (!strcmp(data, "ne")) {
ctx->operator = cas_cls_numeric_ne;
} else if (!strcmp(data, "lt")) {
ctx->operator = cas_cls_numeric_lt;
} else if (!strcmp(data, "gt")) {
ctx->operator = cas_cls_numeric_gt;
} else if (!strcmp(data, "le")) {
ctx->operator = cas_cls_numeric_le;
} else if (!strcmp(data, "ge")) {
ctx->operator = cas_cls_numeric_ge;
} else {
CAS_CLS_MSG(KERN_ERR, "Invalid numeric operator \n");
result = -EINVAL;
goto error;
}
} else {
/* Plain number case */
ptr = data;
}
result = kstrtou64(ptr, 10, &ctx->v_u64);
if (result) {
CAS_CLS_MSG(KERN_ERR, "Invalid numeric operand\n");
goto error;
}
CAS_CLS_DEBUG_MSG("\t\t - Using operator %d with value %llu\n",
ctx->operator, ctx->v_u64);
c->context = ctx;
return 0;
error:
kfree(ctx);
return result;
}
/* Unsigned int numeric test function */
static cas_cls_eval_t _cas_cls_numeric_test_u(
struct cas_cls_condition *c, uint64_t val)
{
struct cas_cls_numeric *ctx = c->context;
switch (ctx->operator) {
case cas_cls_numeric_eq:
return val == ctx->v_u64 ? cas_cls_eval_yes : cas_cls_eval_no;
case cas_cls_numeric_ne:
return val != ctx->v_u64 ? cas_cls_eval_yes : cas_cls_eval_no;
case cas_cls_numeric_lt:
return val < ctx->v_u64 ? cas_cls_eval_yes : cas_cls_eval_no;
case cas_cls_numeric_gt:
return val > ctx->v_u64 ? cas_cls_eval_yes : cas_cls_eval_no;
case cas_cls_numeric_le:
return val <= ctx->v_u64 ? cas_cls_eval_yes : cas_cls_eval_no;
case cas_cls_numeric_ge:
return val >= ctx->v_u64 ? cas_cls_eval_yes : cas_cls_eval_no;
}
return cas_cls_eval_no;
}
/* Io class test function */
static cas_cls_eval_t _cas_cls_io_class_test(struct cas_classifier *cls,
struct cas_cls_condition *c, struct cas_cls_io *io,
ocf_part_id_t part_id)
{
return _cas_cls_numeric_test_u(c, part_id);
}
/* File size test function */
static cas_cls_eval_t _cas_cls_file_size_test(
struct cas_classifier *cls, struct cas_cls_condition *c,
struct cas_cls_io *io, ocf_part_id_t part_id)
{
if (!io->inode)
return cas_cls_eval_no;
if (S_ISBLK(io->inode->i_mode))
return cas_cls_eval_no;
if (!S_ISREG(io->inode->i_mode))
return cas_cls_eval_no;
return _cas_cls_numeric_test_u(c, i_size_read(io->inode));
}
/* Resolve path to inode */
static void _cas_cls_directory_resolve(struct cas_classifier *cls,
struct cas_cls_directory *ctx)
{
struct path path;
struct inode *inode;
int error;
int o_res;
unsigned long o_ino;
o_res = ctx->resolved;
o_ino = ctx->i_ino;
error = kern_path(ctx->pathname, LOOKUP_FOLLOW, &path);
if (error) {
ctx->resolved = 0;
if (o_res) {
CAS_CLS_DEBUG_MSG("Removed inode resolution for %s\n",
ctx->pathname);
}
return;
}
inode = path.dentry->d_inode;
ctx->i_ino = inode->i_ino;
ctx->resolved = 1;
path_put(&path);
if (!o_res) {
CAS_CLS_DEBUG_MSG("Resolved %s to inode: %lu\n", ctx->pathname,
ctx->i_ino);
} else if (o_ino != ctx->i_ino) {
CAS_CLS_DEBUG_MSG("Changed inode resolution for %s: %lu => %lu"
"\n", ctx->pathname, o_ino, ctx->i_ino);
}
}
/* Inode resolving work entry point */
static void _cas_cls_directory_resolve_work(struct work_struct *work)
{
struct cas_cls_directory *ctx;
ctx = container_of(work, struct cas_cls_directory, d_work.work);
_cas_cls_directory_resolve(ctx->cls, ctx);
queue_delayed_work(ctx->cls->wq, &ctx->d_work,
msecs_to_jiffies(ctx->resolved ? 5000 : 1000));
}
/* Get unaliased dentry for given dir inode */
static struct dentry *_cas_cls_dir_get_inode_dentry(struct inode *inode)
{
struct dentry *d = NULL, *iter;
ALIAS_NODE_TYPE *pos; /* alias list current element */
if (DENTRY_LIST_EMPTY(&inode->i_dentry))
return NULL;
spin_lock(&inode->i_lock);
if (S_ISDIR(inode->i_mode))
goto unlock;
INODE_FOR_EACH_DENTRY(pos, &inode->i_dentry) {
iter = ALIAS_NODE_TO_DENTRY(pos);
spin_lock(&iter->d_lock);
if (!d_unhashed(iter))
d = iter;
spin_unlock(&d->d_lock);
if (d)
break;
}
unlock:
spin_unlock(&inode->i_lock);
return d;
}
/* Directory condition test function */
static cas_cls_eval_t _cas_cls_directory_test(
struct cas_classifier *cls, struct cas_cls_condition *c,
struct cas_cls_io *io, ocf_part_id_t part_id)
{
struct cas_cls_directory *ctx;
struct inode *inode, *p_inode;
struct dentry *dentry, *p_dentry;
ctx = c->context;
inode = io->inode;
if (!inode || !ctx->resolved)
return cas_cls_eval_no;
/* I/O target inode dentry */
dentry = _cas_cls_dir_get_inode_dentry(inode);
if (!dentry)
return cas_cls_eval_no;
/* Walk up directory tree starting from I/O destination
* dir until current dir inode matches condition inode or top
* directory is reached. */
while (inode) {
if (inode->i_ino == ctx->i_ino)
return cas_cls_eval_yes;
spin_lock(&dentry->d_lock);
p_dentry = dentry->d_parent;
if (!p_dentry) {
spin_unlock(&dentry->d_lock);
return cas_cls_eval_no;
}
p_inode = p_dentry->d_inode;
spin_unlock(&dentry->d_lock);
if (p_inode != inode) {
inode = p_inode;
dentry = p_dentry;
} else {
inode = NULL;
}
}
return cas_cls_eval_no;
}
/* Directory condition constructor */
static int _cas_cls_directory_ctr(struct cas_classifier *cls,
struct cas_cls_condition *c, char *data)
{
struct cas_cls_directory *ctx;
if (!data || strlen(data) == 0) {
CAS_CLS_MSG(KERN_ERR, "Missing directory specifier\n");
return -EINVAL;
}
ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx)
return -ENOMEM;
ctx->cls = cls;
ctx->resolved = 0;
ctx->pathname = kstrdup(data, GFP_KERNEL);
if (!ctx->pathname) {
kfree(ctx);
return -ENOMEM;
}
INIT_DELAYED_WORK(&ctx->d_work, _cas_cls_directory_resolve_work);
queue_delayed_work(cls->wq, &ctx->d_work,
msecs_to_jiffies(10));
c->context = ctx;
return 0;
}
/* Directory condition destructor */
static void _cas_cls_directory_dtr(struct cas_classifier *cls,
struct cas_cls_condition *c)
{
struct cas_cls_directory *ctx;
ctx = c->context;
if (!ctx)
return;
cancel_delayed_work_sync(&ctx->d_work);
kfree(ctx->pathname);
kfree(ctx);
}
/* Array of condition handlers */
static struct cas_cls_condition_handler _handlers[] = {
{ "done", _cas_cls_done_test, _cas_cls_generic_ctr },
{ "metadata", _cas_cls_metadata_test, _cas_cls_generic_ctr },
{ "direct", _cas_cls_direct_test, _cas_cls_generic_ctr },
{ "io_class", _cas_cls_io_class_test, _cas_cls_numeric_ctr,
_cas_cls_generic_dtr },
{ "file_size", _cas_cls_file_size_test, _cas_cls_numeric_ctr,
_cas_cls_generic_dtr },
{ "directory", _cas_cls_directory_test, _cas_cls_directory_ctr,
_cas_cls_directory_dtr },
{ NULL }
};
/* Get condition handler for condition string token */
static struct cas_cls_condition_handler *_cas_cls_lookup_handler(
const char *token)
{
struct cas_cls_condition_handler *h = _handlers;
while (h->token) {
if (strcmp(h->token, token) == 0)
return h;
h++;
}
return NULL;
}
/* Deallocate condition */
static void _cas_cls_free_condition(struct cas_classifier *cls,
struct cas_cls_condition *c)
{
if (c->handler->dtr)
c->handler->dtr(cls, c);
kfree(c);
}
/* Allocate condition */
static struct cas_cls_condition * _cas_cls_create_condition(
struct cas_classifier *cls, const char *token,
char *data, int l_op)
{
struct cas_cls_condition_handler *h;
struct cas_cls_condition *c;
int result;
h = _cas_cls_lookup_handler(token);
if (!h) {
CAS_CLS_DEBUG_MSG("Cannot find handler for condition"
" %s\n", token);
return ERR_PTR(-ENOENT);
}
c = kmalloc(sizeof(*c), GFP_KERNEL);
if (!c)
return ERR_PTR(-ENOMEM);
c->handler = h;
c->context = NULL;
c->l_op = l_op;
if (c->handler->ctr) {
result = c->handler->ctr(cls, c, data);
if (result) {
kfree(c);
return ERR_PTR(result);
}
}
CAS_CLS_DEBUG_MSG("\t\t - Created condition %s\n", token);
return c;
}
/* Read single codnition from text input and return cas_cls_condition
* representation. *rule pointer is advanced to point to next condition.
* Input @rule string is modified to speed up parsing (selected bytes are
* overwritten with 0).
*
* *l_op contains logical operator from previous condition and gets overwritten
* with operator read from currently parsed condition.
*
* Returns pointer to condition if successfull.
* Returns NULL if no more conditions in string.
* Returns error pointer in case of syntax or runtime error.
*/
static struct cas_cls_condition *_cas_cls_parse_condition(
struct cas_classifier *cls, char **rule,
enum cas_cls_logical_op *l_op)
{
char *token = *rule; /* Condition token substring (e.g. file_size) */
char *operand = NULL; /* Operand substring (e.g. "lt:4096" or path) */
char *ptr; /* Current position in input string */
char *last = token; /* Last seen substring in condition */
char op = 'X'; /* Logical operator at the end of condition */
struct cas_cls_condition *c; /* Output condition */
if (**rule == '\0') {
/* Empty condition */
return NULL;
}
ptr = strpbrk(*rule, ":&|");
if (!ptr) {
/* No operands in condition (e.g. "metadata"), no logical
* operators following condition - we're done with parsing. */
goto create;
}
if (*ptr == ':') {
/* Operand found - terminate token string and move forward. */
*ptr = '\0';
ptr += 1;
operand = ptr;
last = ptr;
ptr = strpbrk(ptr, "&|");
if (!ptr) {
/* No operator past condition - create rule and exit */
goto create;
}
}
/* Remember operator value and zero target byte to terminate previous
* string (token or operand) */
op = *ptr;
*ptr = '\0';
create:
c = _cas_cls_create_condition(cls, token, operand, *l_op);
*l_op = (op == '|' ? cas_cls_logical_or : cas_cls_logical_and);
/* Set *rule to character past current condition and logical operator */
if (ptr) {
/* Set pointer for next iteration */
*rule = ptr + 1;
} else {
/* Set pointer to terminating zero */
*rule = last + strlen(last);
}
return c;
}
/* Parse all conditions in rule text description. @rule might be overwritten */
static int _cas_cls_parse_conditions(struct cas_classifier *cls,
struct cas_cls_rule *r, char *rule)
{
char *start;
struct cas_cls_condition *c;
enum cas_cls_logical_op l_op = cas_cls_logical_or;
start = rule;
for (;;) {
c = _cas_cls_parse_condition(cls, &start, &l_op);
if (IS_ERR(c))
return PTR_ERR(c);
if (!c)
break;
list_add_tail(&c->list, &r->conditions);
}
return 0;
}
static struct cas_classifier* cas_get_classifier(ocf_cache_t cache)
{
struct cache_priv *cache_priv = ocf_cache_get_priv(cache);
ENV_BUG_ON(!cache_priv);
return cache_priv->classifier;
}
static void cas_set_classifier(ocf_cache_t cache,
struct cas_classifier* cls)
{
struct cache_priv *cache_priv = ocf_cache_get_priv(cache);
ENV_BUG_ON(!cache_priv);
cache_priv->classifier = cls;
}
void _cas_cls_rule_destroy(struct cas_classifier *cls,
struct cas_cls_rule *r)
{
struct list_head *item, *n;
struct cas_cls_condition *c = NULL;
if (!r)
return;
list_for_each_safe(item, n, &r->conditions) {
c = list_entry(item, struct cas_cls_condition, list);
list_del(item);
_cas_cls_free_condition(cls, c);
}
kfree(r);
}
/* Destroy rule */
void cas_cls_rule_destroy(ocf_cache_t cache, struct cas_cls_rule *r)
{
struct cas_classifier *cls = cas_get_classifier(cache);
BUG_ON(!cls);
_cas_cls_rule_destroy(cls, r);
}
/* Create rule from text description. @rule might be overwritten */
static struct cas_cls_rule *_cas_cls_rule_create(struct cas_classifier *cls,
ocf_part_id_t part_id, char *rule)
{
struct cas_cls_rule *r;
int result;
if (part_id == 0 || rule[0] == '\0')
return NULL;
r = kmalloc(sizeof(*r), GFP_KERNEL);
if (!r)
return ERR_PTR(-ENOMEM);
r->part_id = part_id;
INIT_LIST_HEAD(&r->conditions);
result = _cas_cls_parse_conditions(cls, r, rule);
if (result) {
_cas_cls_rule_destroy(cls, r);
return ERR_PTR(result);
}
return r;
}
/* Update rule associated with given io class */
void cas_cls_rule_apply(ocf_cache_t cache,
ocf_part_id_t part_id, struct cas_cls_rule *new)
{
struct cas_classifier *cls;
struct cas_cls_rule *old = NULL, *elem;
struct list_head *item, *_n;
cls = cas_get_classifier(cache);
BUG_ON(!cls);
write_lock(&cls->lock);
/* Walk through list of rules in reverse order (tail to head), visiting
* rules from high to low part_id */
list_for_each_prev_safe(item, _n, &cls->rules) {
elem = list_entry(item, struct cas_cls_rule, list);
if (elem->part_id == part_id) {
old = elem;
list_del(item);
}
if (elem->part_id < part_id)
break;
}
/* Insert new element past loop cursor */
if (new)
list_add(&new->list, item);
write_unlock(&cls->lock);
_cas_cls_rule_destroy(cls, old);
if (old)
CAS_CLS_DEBUG_MSG("Removed rule for class %d\n", part_id);
if (new)
CAS_CLS_DEBUG_MSG("New rule for for class %d\n", part_id);
return;
}
/*
* Translate classification rule error from linux error code to CAS error code.
* Internal classifier functions use PTR_ERR / ERR_PTR macros to propagate
* error in pointers. These macros do not work well with CAS error codes, so
* this function is used to form fine-grained CAS error code when returning
* from classifier management function.
*/
static int _cas_cls_rule_err_to_cass_err(int err)
{
switch (err) {
case -ENOENT:
return KCAS_ERR_CLS_RULE_UNKNOWN_CONDITION;
case -EINVAL:
return KCAS_ERR_CLS_RULE_INVALID_SYNTAX;
default:
return err;
}
}
/* Create and apply classification rule for given class id */
static int _cas_cls_rule_init(ocf_cache_t cache, ocf_part_id_t part_id)
{
struct cas_classifier *cls;
struct ocf_io_class_info *info;
struct cas_cls_rule *r;
int result;
cls = cas_get_classifier(cache);
if (!cls)
return -EINVAL;
info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info)
return -ENOMEM;
result = ocf_cache_io_class_get_info(cache, part_id, info);
if (result) {
if (result == -OCF_ERR_IO_CLASS_NOT_EXIST)
result = 0;
goto exit;
}
if (strnlen(info->name, sizeof(info->name)) == sizeof(info->name)) {
CAS_CLS_MSG(KERN_ERR, "IO class name not null terminated\n");
result = -EINVAL;
goto exit;
}
r = _cas_cls_rule_create(cls, part_id, info->name);
if (IS_ERR(r)) {
result = _cas_cls_rule_err_to_cass_err(PTR_ERR(r));
goto exit;
}
cas_cls_rule_apply(cache, part_id, r);
exit:
kfree(info);
return result;
}
/* Create classification rule from text description */
int cas_cls_rule_create(ocf_cache_t cache,
ocf_part_id_t part_id, const char* rule,
struct cas_cls_rule **cls_rule)
{
struct cas_cls_rule *r = NULL;
struct cas_classifier *cls;
char *_rule;
int ret;
if (!cls_rule)
return -EINVAL;
cls = cas_get_classifier(cache);
if (!cls)
return -EINVAL;
if (strnlen(rule, OCF_IO_CLASS_NAME_MAX) == OCF_IO_CLASS_NAME_MAX) {
CAS_CLS_MSG(KERN_ERR, "IO class name not null terminated\n");
return -EINVAL;
}
/* Make description copy as _cas_cls_rule_create might modify input
* string */
_rule = kstrdup(rule, GFP_KERNEL);
if (!_rule)
return -ENOMEM;
r = _cas_cls_rule_create(cls, part_id, _rule);
if (IS_ERR(r))
ret = _cas_cls_rule_err_to_cass_err(PTR_ERR(r));
else {
CAS_CLS_DEBUG_MSG("Created rule: %s => %d\n", rule, part_id);
*cls_rule = r;
ret = 0;
}
kfree(_rule);
return ret;
}
/* Deinitialize classifier and remove rules */
void cas_cls_deinit(ocf_cache_t cache)
{
struct cas_classifier *cls;
struct list_head *item, *n;
struct cas_cls_rule *r = NULL;
cls = cas_get_classifier(cache);
ENV_BUG_ON(!cls);
list_for_each_safe(item, n, &cls->rules) {
r = list_entry(item, struct cas_cls_rule, list);
list_del(item);
_cas_cls_rule_destroy(cls, r);
}
destroy_workqueue(cls->wq);
kfree(cls);
cas_set_classifier(cache, NULL);
CAS_CLS_MSG(KERN_INFO, "Deinitialized IO classifier\n");
return;
}
/* Initialize classifier context */
static struct cas_classifier *_cas_cls_init(ocf_cache_t cache)
{
struct cas_classifier *cls;
cls = kzalloc(sizeof(*cls), GFP_KERNEL);
if (!cls)
return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(&cls->rules);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 36)
cls->wq = alloc_workqueue("kcas_clsd", WQ_UNBOUND | WQ_FREEZABLE, 1);
#else
cls->wq = create_singlethread_workqueue("kcas_clsd");
#endif
if (!cls->wq) {
kfree(cls);
return ERR_PTR(-ENOMEM);
}
rwlock_init(&cls->lock);
CAS_CLS_MSG(KERN_INFO, "Initialized IO classifier\n");
return cls;
}
/* Initialize classifier and create rules for existing I/O classes */
int cas_cls_init(ocf_cache_t cache)
{
struct cas_classifier *cls;
unsigned result = 0;
unsigned i;
cls = _cas_cls_init(cache);
if (IS_ERR(cls))
return PTR_ERR(cls);
cas_set_classifier(cache, cls);
/* Update rules for all I/O classes except 0 - this is default for all
* unclassified I/O */
for (i = 1; i < OCF_IO_CLASS_MAX; i++) {
result = _cas_cls_rule_init(cache, i);
if (result)
break;
}
if (result)
cas_cls_deinit(cache);
return result;
}
/* Determine whether io matches rule */
static cas_cls_eval_t cas_cls_process_rule(struct cas_classifier *cls,
struct cas_cls_rule *r, struct cas_cls_io *io,
ocf_part_id_t *part_id)
{
struct list_head *item;
struct cas_cls_condition *c;
cas_cls_eval_t ret = cas_cls_eval_no, rr;
CAS_CLS_DEBUG_TRACE(" Processing rule for class %d\n", r->part_id);
list_for_each(item, &r->conditions) {
c = list_entry(item, struct cas_cls_condition, list);
if (!ret.yes && c->l_op == cas_cls_logical_and)
break;
rr = c->handler->test(cls, c, io, *part_id);
CAS_CLS_DEBUG_TRACE(" Processing condition %s => %d, stop:%d "
"(l_op: %d)\n", c->handler->token, rr.yes,
rr.stop, (int)c->l_op);
ret.yes = (c->l_op == cas_cls_logical_and) ?
rr.yes && ret.yes :
rr.yes || ret.yes;
ret.stop = rr.stop;
if (ret.stop)
break;
}
CAS_CLS_DEBUG_TRACE(" Rule %d output => %d stop: %d\n", r->part_id,
ret.yes, ret.stop);
return ret;
}
/* Fill in cas_cls_io for given bio - it is assumed that ctx is
* zeroed upon entry */
static void _cas_cls_get_bio_context(struct bio *bio,
struct cas_cls_io *ctx)
{
struct page *page = NULL;
if (!bio)
return;
ctx->bio = bio;
if (!SEGMENT_BVEC(bio_iovec(bio)))
return;
page = bio_page(bio);
if (!page)
return;
ctx->page = page;
if (PageAnon(page))
return;
if (PageSlab(page) || PageCompound(page))
return;
if (!page->mapping)
return;
ctx->inode = page->mapping->host;
return;
}
/* Determine I/O class for bio */
ocf_part_id_t cas_cls_classify(ocf_cache_t cache, struct bio *bio)
{
struct cas_classifier *cls;
struct cas_cls_io io = {};
struct list_head *item;
struct cas_cls_rule *r;
ocf_part_id_t part_id = 0;
cas_cls_eval_t ret;
cls = cas_get_classifier(cache);
ENV_BUG_ON(!cls);
_cas_cls_get_bio_context(bio, &io);
read_lock(&cls->lock);
CAS_CLS_DEBUG_TRACE("%s\n", "Starting processing");
list_for_each(item, &cls->rules) {
r = list_entry(item, struct cas_cls_rule, list);
ret = cas_cls_process_rule(cls, r, &io, &part_id);
if (ret.yes)
part_id = r->part_id;
if (ret.stop)
break;
}
read_unlock(&cls->lock);
return part_id;
}

View File

@@ -0,0 +1,33 @@
/*
* Copyright(c) 2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __CLASSIFIER_H__
#define __CLASSIFIER_H__
struct cas_cls_rule;
/* Initialize classifier and create rules for existing I/O classes */
int cas_cls_init(ocf_cache_t cache);
/* Deinitialize classifier and remove rules */
void cas_cls_deinit(ocf_cache_t cache);
/* Allocate and initialize classification rule */
int cas_cls_rule_create(ocf_cache_t cache,
ocf_part_id_t part_id, const char* rule,
struct cas_cls_rule **cls_rule);
/* Deinit classification rule */
void cas_cls_rule_destroy(ocf_cache_t cache, struct cas_cls_rule *r);
/* Bind classification rule to io class */
void cas_cls_rule_apply(ocf_cache_t cache, ocf_part_id_t part_id,
struct cas_cls_rule *r);
/* Determine I/O class for bio */
ocf_part_id_t cas_cls_classify(ocf_cache_t cache, struct bio *bio);
#endif

View File

@@ -0,0 +1,139 @@
/*
* Copyright(c) 2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __CLASSIFIER_DEFS_H__
#define __CLASSIFIER_DEFS_H__
/* Rule matches 1:1 with io class. It contains multiple conditions with
* associated logical operator (and/or) */
struct cas_cls_rule {
/* Rules list element */
struct list_head list;
/* Associated partition id */
ocf_part_id_t part_id;
/* Conditions for this rule */
struct list_head conditions;
};
/* Classifier context - one per cache instance. */
struct cas_classifier {
/* Rules list head */
struct list_head rules;
/* Directory inode resolving workqueue */
struct workqueue_struct *wq;
/* Lock for rules list */
rwlock_t lock;
};
struct cas_cls_condition_handler;
/* cas_cls_condition represents single test (e.g. file_size <= 4K) plus
* logical operator (and/or) to combine evaluation of this condition with
* previous conditions within one rule */
struct cas_cls_condition {
/* Condition handler */
struct cas_cls_condition_handler *handler;
/* Conditions list element */
struct list_head list;
/* Data specific to this condition instance */
void *context;
/* Logical operator to apply to previous conditions evaluation */
int l_op;
};
/* Helper structure aggregating I/O data often accessed by condition handlers */
struct cas_cls_io {
/* bio */
struct bio *bio;
/* First page associated with bio */
struct page *page;
/* Inode associated with page */
struct inode *inode;
};
/* Condition evaluation return flags */
typedef struct cas_cls_eval {
uint8_t yes : 1;
uint8_t stop : 1;
} cas_cls_eval_t;
static const cas_cls_eval_t cas_cls_eval_yes = { .yes = 1 };
static const cas_cls_eval_t cas_cls_eval_no = { };
/* Logical operators */
enum cas_cls_logical_op {
cas_cls_logical_and = 0,
cas_cls_logical_or
};
/* Condition handler - abstraction over different kinds of condition checks
* (e.g. file size, metadata). Does not contain all the data required to
* evaluate condition (e.g. actual file size value), these are stored in
* @context member of cas_cls_condition object, provided as input argument to
* test, ctr and dtr callbacks. */
struct cas_cls_condition_handler {
/* String representing this condition class */
const char *token;
/* Condition test */
cas_cls_eval_t (*test)(struct cas_classifier *cls,
struct cas_cls_condition *c, struct cas_cls_io *io,
ocf_part_id_t part_id);
/* Condition constructor */
int (*ctr)(struct cas_classifier *cls, struct cas_cls_condition *c,
char *data);
/* Condition destructor */
void (*dtr)(struct cas_classifier *cls, struct cas_cls_condition *c);
};
/* Numeric condition numeric operators */
enum cas_cls_numeric_op {
cas_cls_numeric_eq = 0,
cas_cls_numeric_ne = 1,
cas_cls_numeric_lt = 2,
cas_cls_numeric_gt = 3,
cas_cls_numeric_le = 4,
cas_cls_numeric_ge = 5,
};
/* Numeric condition context */
struct cas_cls_numeric {
/* Arithmetic operator */
enum cas_cls_numeric_op operator;
/* Condition operand as unsigned int */
uint64_t v_u64;
};
/* Directory condition context */
struct cas_cls_directory {
/* 1 if directory had been resolved */
int resolved;
/* Dir path */
char *pathname;
/* Resolved inode */
unsigned long i_ino;
/* Back pointer to classifier context */
struct cas_classifier *cls;
/* Work item associated with resolving dir for this condition */
struct delayed_work d_work;
};
#endif

482
modules/cas_cache/context.c Normal file
View File

@@ -0,0 +1,482 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#include "cas_cache.h"
#include "context.h"
#include "utils/utils_rpool.h"
#include "utils/utils_data.h"
#include "utils/utils_gc.h"
#include "threads.h"
struct ocf_mpool *cas_bvec_pool;
struct cas_reserve_pool *cas_bvec_pages_rpool;
#define CAS_ALLOC_PAGE_LIMIT 1024
#define PG_cas PG_private
#define CAS_LOG_RATELIMIT HZ * 5
/* High burst limit to ensure cache init logs are printed properly */
#define CAS_LOG_BURST_LIMIT 50
static inline void _cas_page_set_priv(struct page *page)
{
set_bit(PG_cas , &page->flags);
}
static inline void _cas_page_clear_priv(struct page *page)
{
clear_bit(PG_cas , &page->flags);
page->private = 0;
}
static inline int _cas_page_test_priv(struct page *page)
{
return test_bit(PG_cas , &page->flags);
}
static void _cas_free_page_rpool(void *allocator_ctx, void *item)
{
struct page *page = virt_to_page(item);
_cas_page_clear_priv(page);
__free_page(page);
}
static void _cas_page_set_cpu(struct page *page, int cpu)
{
page->private = cpu;
}
void *_cas_alloc_page_rpool(void *allocator_ctx, int cpu)
{
struct page *page;
page = alloc_page(GFP_NOIO | __GFP_NORETRY);
if (!page)
return NULL;
if (_cas_page_test_priv(page)) {
printk(KERN_WARNING "CAS private bit is set\n");
WARN(true, OCF_PREFIX_SHORT" CAS private bit is set\n");
}
_cas_page_set_priv(page);
_cas_page_set_cpu(page, cpu);
return page_address(page);
}
static int _cas_page_get_cpu(struct page *page)
{
return page->private;
}
/* *** CONTEXT DATA OPERATIONS *** */
/*
*
*/
ctx_data_t *__cas_ctx_data_alloc(uint32_t pages, bool zalloc)
{
struct blk_data *data;
uint32_t i;
void *page_addr = NULL;
struct page *page = NULL;
int cpu;
data = ocf_mpool_new(cas_bvec_pool, pages);
if (!data) {
CAS_PRINT_RL(KERN_ERR "Couldn't allocate BIO vector.\n");
return NULL;
}
data->size = pages;
for (i = 0; i < pages; ++i) {
page_addr = cas_rpool_try_get(cas_bvec_pages_rpool, &cpu);
if (page_addr) {
data->vec[i].bv_page = virt_to_page(page_addr);
_cas_page_set_cpu(data->vec[i].bv_page, cpu);
} else {
data->vec[i].bv_page = alloc_page(GFP_NOIO);
}
if (!data->vec[i].bv_page)
break;
if (zalloc) {
if (!page_addr) {
page_addr = page_address(
data->vec[i].bv_page);
}
memset(page_addr, 0, PAGE_SIZE);
}
data->vec[i].bv_len = PAGE_SIZE;
data->vec[i].bv_offset = 0;
}
/* One of allocations failed */
if (i != pages) {
for (pages = 0; pages < i; pages++) {
page = data->vec[i].bv_page;
if (page && !(_cas_page_test_priv(page) &&
!cas_rpool_try_put(cas_bvec_pages_rpool,
page_address(page),
_cas_page_get_cpu(page)))) {
__free_page(page);
}
}
ocf_mpool_del(cas_bvec_pool, data, pages);
data = NULL;
} else {
/* Initialize iterator */
cas_io_iter_init(&data->iter, data->vec, data->size);
}
return data;
}
ctx_data_t *cas_ctx_data_alloc(uint32_t pages)
{
return __cas_ctx_data_alloc(pages, false);
}
ctx_data_t *cas_ctx_data_zalloc(uint32_t pages)
{
return __cas_ctx_data_alloc(pages, true);
}
/*
*
*/
void cas_ctx_data_free(ctx_data_t *ctx_data)
{
uint32_t i;
struct page *page = NULL;
struct blk_data *data = ctx_data;
if (!data)
return;
for (i = 0; i < data->size; i++) {
page = data->vec[i].bv_page;
if (!(_cas_page_test_priv(page) && !cas_rpool_try_put(
cas_bvec_pages_rpool,
page_address(page),
_cas_page_get_cpu(page))))
__free_page(page);
}
ocf_mpool_del(cas_bvec_pool, data, data->size);
}
static int _cas_ctx_data_mlock(ctx_data_t *ctx_data)
{
return 0;
}
static void _cas_ctx_data_munlock(ctx_data_t *ctx_data)
{
}
void cas_ctx_data_secure_erase(ctx_data_t *ctx_data)
{
struct blk_data *data = ctx_data;
uint32_t i;
void *ptr;
for (i = 0; i < data->size; i++) {
ptr = page_address(data->vec[i].bv_page);
memset(ptr, 0, PAGE_SIZE);
}
}
/*
*
*/
static uint32_t _cas_ctx_read_data(void *dst, ctx_data_t *src,
uint32_t size)
{
struct blk_data *data = src;
return cas_io_iter_cpy_to_data(dst, &data->iter, size);
}
/*
*
*/
static uint32_t _cas_ctx_write_data(ctx_data_t *dst, const void *src,
uint32_t size)
{
struct blk_data *data = dst;
return cas_io_iter_cpy_from_data(&data->iter, src, size);
}
/*
*
*/
static uint32_t _cas_ctx_zero_data(ctx_data_t *dst, uint32_t size)
{
struct blk_data *data = dst;
return cas_io_iter_zero(&data->iter, size);
}
/*
*
*/
static uint32_t _cas_ctx_seek_data(ctx_data_t *dst,
ctx_data_seek_t seek, uint32_t offset)
{
struct blk_data *data = dst;
switch (seek) {
case ctx_data_seek_begin:
cas_io_iter_init(&data->iter, data->vec, data->size);
case ctx_data_seek_current:
/* TODO Implement this if needed or remove this from enum */
break;
default:
BUG();
return 0;
}
return cas_io_iter_move(&data->iter, offset);
}
/*
*
*/
static uint64_t _cas_ctx_data_copy(ctx_data_t *dst, ctx_data_t *src,
uint64_t to, uint64_t from, uint64_t bytes)
{
struct blk_data *src_data = src, *dst_data = dst;
return cas_data_cpy(dst_data->vec, dst_data->size, src_data->vec,
src_data->size, to, from, bytes);
}
static int _cas_ctx_cleaner_init(ocf_cleaner_t c)
{
return cas_create_cleaner_thread(c);
}
static void _cas_ctx_cleaner_stop(ocf_cleaner_t c)
{
return cas_stop_cleaner_thread(c);
}
static int _cas_ctx_metadata_updater_init(ocf_metadata_updater_t mu)
{
return cas_create_metadata_updater_thread(mu);
}
static void _cas_ctx_metadata_updater_kick(ocf_metadata_updater_t mu)
{
return cas_kick_metadata_updater_thread(mu);
}
static void _cas_ctx_metadata_updater_stop(ocf_metadata_updater_t mu)
{
return cas_stop_metadata_updater_thread(mu);
}
/*
*
*/
static int _cas_ctx_logger_printf(ocf_logger_t logger, ocf_logger_lvl_t lvl,
const char *fmt, va_list args)
{
static const char* level[] = {
[log_emerg] = KERN_EMERG,
[log_alert] = KERN_ALERT,
[log_crit] = KERN_CRIT,
[log_err] = KERN_ERR,
[log_warn] = KERN_WARNING,
[log_notice] = KERN_NOTICE,
[log_info] = KERN_INFO,
[log_debug] = KERN_DEBUG,
};
char *format;
if (((unsigned)lvl) >= sizeof(level))
return -EINVAL;
format = kasprintf(GFP_ATOMIC, "%s%s", level[lvl], fmt);
if (!format)
return -ENOMEM;
vprintk(format, args);
kfree(format);
return 0;
}
/*
*
*/
static int _cas_ctx_logger_printf_rl(ocf_logger_t logger, const char *func_name)
{
static DEFINE_RATELIMIT_STATE(cas_log_rl, CAS_LOG_RATELIMIT,
CAS_LOG_BURST_LIMIT);
if (!func_name)
return -EINVAL;
return CAS_RATELIMIT(&cas_log_rl, func_name);
}
/*
*
*/
static int _cas_ctx_logger_dump_stack(ocf_logger_t logger)
{
dump_stack();
return 0;
}
static const struct ocf_ctx_config ctx_cfg = {
.name = "CAS Linux Kernel",
.ops = {
.data = {
.alloc = cas_ctx_data_alloc,
.free = cas_ctx_data_free,
.mlock = _cas_ctx_data_mlock,
.munlock = _cas_ctx_data_munlock,
.read = _cas_ctx_read_data,
.write = _cas_ctx_write_data,
.zero = _cas_ctx_zero_data,
.seek = _cas_ctx_seek_data,
.copy = _cas_ctx_data_copy,
.secure_erase = cas_ctx_data_secure_erase,
},
.cleaner = {
.init = _cas_ctx_cleaner_init,
.stop = _cas_ctx_cleaner_stop,
},
.metadata_updater = {
.init = _cas_ctx_metadata_updater_init,
.kick = _cas_ctx_metadata_updater_kick,
.stop = _cas_ctx_metadata_updater_stop,
},
.logger = {
.printf = _cas_ctx_logger_printf,
.printf_rl = _cas_ctx_logger_printf_rl,
.dump_stack = _cas_ctx_logger_dump_stack,
},
},
};
/* *** CONTEXT INITIALIZATION *** */
int cas_initialize_context(void)
{
struct blk_data data;
int ret;
ret = ocf_ctx_init(&cas_ctx, &ctx_cfg);
if (ret < 0)
return ret;
cas_bvec_pool = ocf_mpool_create(NULL, sizeof(data),
sizeof(data.vec[0]), GFP_NOIO, 7, "cas_biovec");
if (!cas_bvec_pool) {
printk(KERN_ERR "Cannot create BIO vector memory pool\n");
ret = -ENOMEM;
goto err_ctx;
}
cas_bvec_pages_rpool = cas_rpool_create(CAS_ALLOC_PAGE_LIMIT,
NULL, PAGE_SIZE, _cas_alloc_page_rpool,
_cas_free_page_rpool, NULL);
if (!cas_bvec_pages_rpool) {
printk(KERN_ERR "Cannot create reserve pool for "
"BIO vector memory pool\n");
ret = -ENOMEM;
goto err_mpool;
}
cas_garbage_collector_init();
ret = block_dev_init();
if (ret) {
printk(KERN_ERR "Cannot initialize block device layer\n");
goto err_rpool;
}
ret = atomic_dev_init();
if (ret) {
printk(KERN_ERR "Cannot initialize atomic device layer\n");
goto err_block_dev;
}
ocf_mngt_core_pool_init(cas_ctx);
return 0;
err_block_dev:
block_dev_deinit();
err_rpool:
cas_rpool_destroy(cas_bvec_pages_rpool, _cas_free_page_rpool, NULL);
err_mpool:
ocf_mpool_destroy(cas_bvec_pool);
err_ctx:
ocf_ctx_exit(cas_ctx);
return ret;
}
int cas_cleanup_context(void)
{
ocf_mngt_core_pool_deinit(cas_ctx);
block_dev_deinit();
atomic_dev_deinit();
cas_garbage_collector_deinit();
ocf_mpool_destroy(cas_bvec_pool);
cas_rpool_destroy(cas_bvec_pages_rpool, _cas_free_page_rpool, NULL);
return ocf_ctx_exit(cas_ctx);
}
/* *** CONTEXT DATA HELPER FUNCTION *** */
/*
*
*/
struct blk_data *cas_alloc_blk_data(uint32_t size, gfp_t flags)
{
struct blk_data *data = ocf_mpool_new_f(cas_bvec_pool, size, flags);
if (data)
data->size = size;
return data;
}
/*
*
*/
void cas_free_blk_data(struct blk_data *data)
{
if (!data)
return;
ocf_mpool_del(cas_bvec_pool, data, data->size);
}

View File

@@ -0,0 +1,79 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __CONTEXT_H__
#define __CONTEXT_H__
#include "linux_kernel_version.h"
struct bio_vec_iter {
struct bio_vec *vec;
uint32_t vec_size;
uint32_t idx;
uint32_t offset;
uint32_t len;
struct bio_vec *ivec;
};
struct blk_data {
/**
* @brief Atomic counter for core device
*/
atomic_t master_remaining;
/**
* @brief Core device request context (core private info)
*/
void *master_io_req;
/**
* @brief CAS IO with which data is associated
*/
struct ocf_io *io;
/**
* @brief List item used for IO splitting
*/
struct list_head list;
/**
* @brief Timestamp of start processing request
*/
unsigned long long start_time;
/**
* @brief Request data siz
*/
uint32_t size;
/**
* @brief This filed indicates an error for request
*/
int error;
/**
* @brief Iterator for accessing data
*/
struct bio_vec_iter iter;
/**
* @brief Request data
*/
struct bio_vec vec[];
};
struct blk_data *cas_alloc_blk_data(uint32_t size, gfp_t flags);
void cas_free_blk_data(struct blk_data *data);
ctx_data_t *cas_ctx_data_alloc(uint32_t pages);
ctx_data_t *cas_ctx_data_zalloc(uint32_t pages);
void cas_ctx_data_free(ctx_data_t *ctx_data);
void cas_ctx_data_secure_erase(ctx_data_t *ctx_data);
int cas_initialize_context(void);
int cas_cleanup_context(void);
#endif /* __CONTEXT_H__ */

View File

@@ -0,0 +1,80 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#include <linux/cdev.h>
#include <linux/fs.h>
#include "linux_kernel_version.h"
#include "service_ui_ioctl.h"
#include "control.h"
#include "cas_cache/cas_cache.h"
struct cas_ctrl_device {
struct cdev cdev;
struct class *class;
dev_t dev;
};
static struct cas_ctrl_device _control_device;
static const struct file_operations _ctrl_dev_fops = {
.owner = THIS_MODULE,
.unlocked_ioctl = cas_service_ioctl_ctrl
};
int __init cas_ctrl_device_init(void)
{
struct cas_ctrl_device *ctrl = &_control_device;
struct device *device;
int result = 0;
result = alloc_chrdev_region(&ctrl->dev, 0, 1, "cas");
if (result) {
printk(KERN_ERR "Cannot allocate control chrdev number.\n");
goto error_alloc_chrdev_region;
}
cdev_init(&ctrl->cdev, &_ctrl_dev_fops);
result = cdev_add(&ctrl->cdev, ctrl->dev, 1);
if (result) {
printk(KERN_ERR "Cannot add control chrdev.\n");
goto error_cdev_add;
}
ctrl->class = class_create(THIS_MODULE, "cas");
if (IS_ERR(ctrl->class)) {
printk(KERN_ERR "Cannot create control chrdev class.\n");
result = PTR_ERR(ctrl->class);
goto error_class_create;
}
device = device_create(ctrl->class, NULL, ctrl->dev, NULL,
"cas_ctrl");
if (IS_ERR(device)) {
printk(KERN_ERR "Cannot create control chrdev.\n");
result = PTR_ERR(device);
goto error_device_create;
}
return result;
error_device_create:
class_destroy(ctrl->class);
error_class_create:
cdev_del(&ctrl->cdev);
error_cdev_add:
unregister_chrdev_region(ctrl->dev, 1);
error_alloc_chrdev_region:
return result;
}
void __exit cas_ctrl_device_deinit(void)
{
struct cas_ctrl_device *ctrl = &_control_device;
device_destroy(ctrl->class, ctrl->dev);
class_destroy(ctrl->class);
cdev_del(&ctrl->cdev);
unregister_chrdev_region(ctrl->dev, 1);
}

View File

@@ -0,0 +1,11 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __CAS_CONTROL_H__
#define __CAS_CONTROL_H__
int __init cas_ctrl_device_init(void);
void __exit cas_ctrl_device_deinit(void);
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,92 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __LAYER_CACHE_MANAGEMENT_H__
#define __LAYER_CACHE_MANAGEMENT_H__
#define CAS_BLK_DEV_REQ_TYPE_BIO 1
#define CAS_BLK_DEV_REQ_TYPE_REQ 3
int cache_mng_set_cleaning_policy(ocf_cache_id_t cache_id, uint32_t type);
int cache_mng_get_cleaning_policy(ocf_cache_id_t cache_id, uint32_t *type);
int cache_mng_set_cleaning_param(ocf_cache_id_t cache_id, ocf_cleaning_t type,
uint32_t param_id, uint32_t param_value);
int cache_mng_get_cleaning_param(ocf_cache_id_t cache_id, ocf_cleaning_t type,
uint32_t param_id, uint32_t *param_value);
int cache_mng_add_core_to_cache(struct ocf_mngt_core_config *cfg,
struct kcas_insert_core *cmd_info);
int cache_mng_remove_core_from_cache(struct kcas_remove_core *cmd);
int cache_mng_reset_core_stats(ocf_cache_id_t cache_id,
ocf_core_id_t core_id);
int cache_mng_set_partitions(struct kcas_io_classes *cfg);
int cache_mng_exit_instance(ocf_cache_id_t id, int flush);
int cache_mng_prepare_cache_cfg(struct ocf_mngt_cache_config *cfg,
struct ocf_mngt_cache_device_config *device_cfg,
struct kcas_start_cache *cmd);
int cache_mng_core_pool_get_paths(struct kcas_core_pool_path *cmd_info);
int cache_mng_core_pool_remove(struct kcas_core_pool_remove *cmd_info);
int cache_mng_cache_check_device(struct kcas_cache_check_device *cmd_info);
int cache_mng_prepare_core_cfg(struct ocf_mngt_core_config *cfg,
struct kcas_insert_core *cmd_info);
int cache_mng_init_instance(struct ocf_mngt_cache_config *cfg,
struct ocf_mngt_cache_device_config *device_cfg,
struct kcas_start_cache *cmd);
int cache_mng_set_seq_cutoff_threshold(ocf_cache_id_t id, ocf_core_id_t core_id,
uint32_t thresh);
int cache_mng_set_seq_cutoff_policy(ocf_cache_id_t id, ocf_core_id_t core_id,
ocf_seq_cutoff_policy policy);
int cache_mng_get_seq_cutoff_threshold(ocf_cache_id_t id, ocf_core_id_t core_id,
uint32_t *thresh);
int cache_mng_get_seq_cutoff_policy(ocf_cache_id_t id, ocf_core_id_t core_id,
ocf_seq_cutoff_policy *policy);
int cache_mng_set_cache_mode(ocf_cache_id_t id, ocf_cache_mode_t mode,
uint8_t flush);
int cache_mng_flush_object(ocf_cache_id_t cache_id, ocf_core_id_t core_id);
int cache_mng_flush_device(ocf_cache_id_t id);
ocf_cache_line_t cache_mng_lookup(ocf_cache_t cache,
ocf_core_id_t core_id, uint64_t core_cacheline);
int cache_mng_list_caches(struct kcas_cache_list *list);
int cache_mng_interrupt_flushing(ocf_cache_id_t id);
int cache_mng_get_info(struct kcas_cache_info *info);
int cache_mng_get_io_class_info(struct kcas_io_class *part);
int cache_mng_get_core_info(struct kcas_core_info *info);
void cache_mng_wait_for_rq_finish(ocf_cache_t cache);
int cache_mng_set_core_params(struct kcas_set_core_param *info);
int cache_mng_get_core_params(struct kcas_get_core_param *info);
int cache_mng_set_cache_params(struct kcas_set_cache_param *info);
int cache_mng_get_cache_params(struct kcas_get_cache_param *info);
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,46 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __LAYER_UPGRADE_H
#define __LAYER_UPGRADE_H
#include "cas_cache/cas_cache.h"
extern bool in_upgrade;
/**
* @brief Check that CAS is in upgarde state
* @return true if is or false if isn't
*/
bool cas_upgrade_is_in_upgrade(void);
/**
* @brief Check that caches configuration is stored at casdsk
* @return 0 if exist
*/
int cas_upgrade_get_configuration(void);
/**
* @brief Start upgrade in flight procedure, dump configuration,
* switch caches to PT and close caches
* @return result
*/
int cas_upgrade(void);
/**
* @brief Finish upgrade in new CAS module - restore all caches
* @return result of restoring
*/
int cas_upgrade_finish(void);
/**
* @brief Try to parse configuration stored in casdisk
* @return result of verification
*/
int cas_upgrade_verify(void);
#endif /* __LAYER_UPGRADE_H */

View File

@@ -0,0 +1,624 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __LINUX_KERNEL_VERSION_H__
#define __LINUX_KERNEL_VERSION_H__
/* Libraries. */
#include <linux/types.h>
#include <linux/module.h>
#include <linux/list.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/vmalloc.h>
#include <linux/uaccess.h>
#include <linux/kthread.h>
#include <linux/spinlock.h>
#include <linux/bio.h>
#include <linux/fs.h>
#include <linux/stat.h>
#include <linux/genhd.h>
#include <linux/blkdev.h>
#include <linux/version.h>
#include <linux/workqueue.h>
#include <linux/cpumask.h>
#include <linux/smp.h>
#include <linux/ioctl.h>
#include <linux/delay.h>
#include <linux/sort.h>
#include <linux/swap.h>
#include <linux/thread_info.h>
#include <asm-generic/ioctl.h>
#include <linux/bitops.h>
#include <linux/crc16.h>
#include <linux/crc32.h>
#include <linux/nmi.h>
#include <linux/ratelimit.h>
#ifdef CONFIG_SLAB
#include <linux/slab_def.h>
#endif
#if LINUX_VERSION_CODE > KERNEL_VERSION(3, 0, 0)
#include <generated/utsrelease.h>
#ifdef UTS_UBUNTU_RELEASE_ABI
#define CAS_UBUNTU
#endif
#endif
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32)
#error Unsupported Linux Kernel Version
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)
#define FILE_INODE(file) file->f_inode
#else
#define FILE_INODE(file) file->f_dentry->d_inode
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 76)
#define DENTRY_ALIAS_HEAD(dentry) (dentry)->d_u.d_alias
#define ALIAS_NODE_TO_DENTRY(alias) container_of(alias, struct dentry, d_u.d_alias)
#else
#define DENTRY_ALIAS_HEAD(dentry) (dentry)->d_alias
#define ALIAS_NODE_TO_DENTRY(alias) container_of(alias, struct dentry, d_alias)
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0)
#define ALIAS_NODE_TYPE struct hlist_node
#define DENTRY_LIST_EMPTY(head) hlist_empty(head)
#define INODE_FOR_EACH_DENTRY(pos, head) hlist_for_each(pos, head)
#else
#define DENTRY_LIST_EMPTY(head) list_empty(head)
#define ALIAS_NODE_TYPE struct list_head
#define INODE_FOR_EACH_DENTRY(pos, head) list_for_each(pos, head)
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0)
#define BIO_OP_STATUS(bio) bio->bi_status
#else
#define BIO_OP_STATUS(bio) bio->bi_error
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0)
#define BIO_ENDIO(BIO, BYTES_DONE, ERROR) \
({ BIO_OP_STATUS(BIO) = ERROR; bio_endio(BIO); })
#else
#define BIO_ENDIO(BIO, BYTES_DONE, ERROR) bio_endio(BIO, ERROR)
#endif
#define REFER_BLOCK_CALLBACK(name) name##_callback
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
#define DECLARE_BLOCK_CALLBACK(name, BIO, BYTES_DONE, ERROR) \
void name##_callback(BIO, ERROR)
#define BLOCK_CALLBACK_INIT(BIO) {; }
#define BLOCK_CALLBACK_RETURN() { return; }
#define BLOCK_CALLBACK_ERROR(BIO, ERROR) ERROR
#else
#define DECLARE_BLOCK_CALLBACK(name, BIO, BYTES_DONE, ERROR) \
void name##_callback(BIO)
#define BLOCK_CALLBACK_INIT(BIO) {; }
#define BLOCK_CALLBACK_RETURN() { return; }
#define BLOCK_CALLBACK_ERROR(BIO, ERROR) BIO_OP_STATUS(BIO)
#endif
#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 37)
#define OPEN_BDEV_EXCLUSIVE(PATH, FMODE, HOLDER) \
blkdev_get_by_path(PATH, (FMODE_EXCL | FMODE), HOLDER)
#define CLOSE_BDEV_EXCLUSIVE(BDEV, FMODE) \
blkdev_put(BDEV, (FMODE_EXCL | FMODE))
#else
#define OPEN_BDEV_EXCLUSIVE(PATH, FMODE, HOLDER) \
open_bdev_exclusive(PATH, FMODE, HOLDER)
#define CLOSE_BDEV_EXCLUSIVE(BDEV, FMODE) \
close_bdev_exclusive(BDEV, FMODE)
#endif
#ifdef CAS_UBUNTU
#define LOOKUP_BDEV(PATH) lookup_bdev(PATH, 0)
#else
#define LOOKUP_BDEV(PATH) lookup_bdev(PATH)
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0) || defined CAS_SLES12SP3
#define BIO_OP_FLAGS_FORMAT "0x%016X"
#define BIO_OP_FLAGS(bio) (bio)->bi_opf
#else
#define BIO_OP_FLAGS_FORMAT "0x%016lX"
#define BIO_OP_FLAGS(bio) (bio)->bi_rw
#endif
#if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 32)
#define BIO_RW_FLAGS ((1U << BIO_RW_UNPLUG) | \
(1U << BIO_RW_NOIDLE) | (1U << BIO_RW_SYNCIO))
#define BIO_SET_RW_FLAGS(bio) BIO_OP_FLAGS((bio)) |= BIO_RW_FLAGS
#else
#define BIO_RW_FLAGS 0
#define BIO_SET_RW_FLAGS(bio)
#endif
#if defined RQF_SOFTBARRIER
#define CHECK_BARRIER(bio) ((BIO_OP_FLAGS(bio) & RQF_SOFTBARRIER) != 0)
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 0, 1)
#define CHECK_BARRIER(bio) ((BIO_OP_FLAGS(bio) & REQ_SOFTBARRIER) != 0)
#else
#define CHECK_BARRIER(bio) (bio_rw_flagged((bio), BIO_RW_BARRIER))
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0) || defined CAS_SLES12SP3
#define RQ_DATA_DIR(rq) rq_data_dir(rq)
#define RQ_DATA_DIR_WR WRITE
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 34)
#define RQ_DATA_DIR(rq) rq_data_dir(rq)
#define RQ_DATA_DIR_WR REQ_WRITE
#else
#define RQ_DATA_DIR(rq) rq_data_dir(rq)
#define RQ_DATA_DIR_WR WRITE
#endif
#if defined REQ_PREFLUSH
#define CAS_REQ_FLUSH REQ_PREFLUSH
#define CAS_FLUSH_SUPPORTED
#elif defined REQ_FLUSH
#define CAS_REQ_FLUSH REQ_FLUSH
#define CAS_FLUSH_SUPPORTED
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0) || defined CAS_SLES12SP3
#define CHECK_QUEUE_FLUSH(q) test_bit(QUEUE_FLAG_WC, &(q)->queue_flags)
#define CHECK_QUEUE_FUA(q) test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags)
static inline void cas_set_queue_flush_fua(struct request_queue *q,
bool flush, bool fua)
{
blk_queue_write_cache(q, flush, fua);
}
#else
#define CHECK_QUEUE_FLUSH(q) ((q)->flush_flags & CAS_REQ_FLUSH)
#define CHECK_QUEUE_FUA(q) ((q)->flush_flags & REQ_FUA)
static inline void cas_set_queue_flush_fua(struct request_queue *q,
bool flush, bool fua)
{
unsigned int flags = 0;
if (flush)
flags |= CAS_REQ_FLUSH;
if (fua)
flags |= REQ_FUA;
if (flags)
blk_queue_flush(q, flags);
}
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 32)
#ifdef WRITE_FLUSH
#define RQ_IS_FLUSH(rq) ((rq)->cmd_flags & CAS_REQ_FLUSH)
#ifdef BIO_FLUSH
#define CAS_IS_WRITE_FLUSH(flags) ((flags) & BIO_FLUSH)
#else
#define CAS_IS_WRITE_FLUSH(flags) \
((flags) & CAS_REQ_FLUSH)
#endif
#define OCF_WRITE_FLUSH WRITE_FLUSH
#elif defined REQ_PREFLUSH
#define RQ_IS_FLUSH(rq) ((rq)->cmd_flags & REQ_PREFLUSH)
#define OCF_WRITE_FLUSH (REQ_OP_WRITE | REQ_PREFLUSH)
#define CAS_IS_WRITE_FLUSH(flags) \
(OCF_WRITE_FLUSH == ((flags) & OCF_WRITE_FLUSH))
#else
#define RQ_IS_FLUSH(rq) 0
#define CAS_IS_WRITE_FLUSH(flags) \
(WRITE_BARRIER == ((flags) & WRITE_BARRIER))
#define OCF_WRITE_FLUSH WRITE_BARRIER
#endif /* #ifdef WRITE_FLUSH */
#ifdef WRITE_FLUSH_FUA
#define OCF_WRITE_FLUSH_FUA WRITE_FLUSH_FUA
#ifdef BIO_FUA
#define CAS_IS_WRITE_FLUSH_FUA(flags) \
((BIO_FUA | BIO_FLUSH) == \
((flags) & (BIO_FUA | BIO_FLUSH)))
#else
#define CAS_IS_WRITE_FLUSH_FUA(flags) \
((REQ_FUA | CAS_REQ_FLUSH) == \
((flags) & (REQ_FUA | CAS_REQ_FLUSH)))
#endif
#elif defined REQ_PREFLUSH
#define CAS_IS_WRITE_FLUSH_FUA(flags) \
((REQ_PREFLUSH | REQ_FUA) == \
((flags) & (REQ_PREFLUSH |REQ_FUA)))
#define OCF_WRITE_FLUSH_FUA (REQ_PREFLUSH | REQ_FUA)
#else
#define CAS_IS_WRITE_FLUSH_FUA(flags) 0
#define OCF_WRITE_FLUSH_FUA WRITE_BARRIER
#endif /* #ifdef WRITE_FLUSH_FUA */
#ifdef WRITE_FUA
#ifdef BIO_FUA
#define CAS_IS_WRITE_FUA(flags) ((flags) & BIO_FUA)
#else
#define CAS_IS_WRITE_FUA(flags) ((flags) & REQ_FUA)
#endif
#define OCF_WRITE_FUA WRITE_FUA
#elif defined REQ_FUA
#define CAS_IS_WRITE_FUA(flags) ((flags) & REQ_FUA)
#define OCF_WRITE_FUA REQ_FUA
#else
#define CAS_IS_WRITE_FUA(flags) 0
#define OCF_WRITE_FUA WRITE_BARRIER
#endif /* #ifdef WRITE_FUA */
#endif /* #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 32) */
#if LINUX_VERSION_CODE <= KERNEL_VERSION(3, 7, 9)
#define DAEMONIZE(name, arg...) daemonize(name, ##arg)
#else
#define DAEMONIZE(name, arg...) do { } while (0)
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
#define SET_QUEUE_CHUNK_SECTORS(queue, chunk_size) \
queue->limits.chunk_sectors = chunk_size;
#else
#define SET_QUEUE_CHUNK_SECTORS(queue, chunk_size) {; }
#endif
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0)
#define BIO_BISIZE(bio) bio->bi_size
#define BIO_BIIDX(bio) bio->bi_idx
#define BIO_BISECTOR(bio) bio->bi_sector
#else
#define BIO_BISIZE(bio) bio->bi_iter.bi_size
#define BIO_BISECTOR(bio) bio->bi_iter.bi_sector
#define BIO_BIIDX(bio) bio->bi_iter.bi_idx
#endif
#ifdef CAS_SLES12SP3
#define CAS_IS_DISCARD(bio) \
(((BIO_OP_FLAGS(bio)) & REQ_OP_MASK) == REQ_OP_DISCARD)
#define CAS_BIO_DISCARD \
((REQ_OP_WRITE | REQ_OP_DISCARD))
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
#define CAS_IS_DISCARD(bio) \
(bio_op(bio) == REQ_OP_DISCARD)
#define CAS_BIO_DISCARD \
(REQ_OP_DISCARD)
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)
#define CAS_IS_DISCARD(bio) \
((BIO_OP_FLAGS(bio)) & REQ_OP_DISCARD)
#define CAS_BIO_DISCARD \
((REQ_OP_WRITE | REQ_OP_DISCARD))
#elif LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32)
#define CAS_IS_DISCARD(bio) ((BIO_OP_FLAGS(bio)) & REQ_DISCARD)
#define CAS_BIO_DISCARD (REQ_WRITE | REQ_DISCARD)
#else
#define CAS_IS_DISCARD(bio) ((BIO_OP_FLAGS(bio)) & (1 << BIO_RW_DISCARD))
#define CAS_BIO_DISCARD ((1 << BIO_RW) | (1 << BIO_RW_DISCARD))
#endif
#include <linux/mm.h>
#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32)
#include <uapi/asm-generic/mman-common.h>
static inline unsigned long cas_vm_mmap(struct file *file,
unsigned long addr, unsigned long len)
{
return vm_mmap(file, addr, len, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, 0);
}
static inline int cas_vm_munmap(unsigned long start, size_t len)
{
return vm_munmap(start, len);
}
#else
#include <asm-generic/mman-common.h>
static inline unsigned long cas_vm_mmap(struct file *file,
unsigned long addr, unsigned long len)
{
return do_mmap_pgoff(file, addr, len, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, 0);
}
static inline int cas_vm_munmap(unsigned long start, size_t len)
{
return do_munmap(current->mm, start, len);
}
#endif
/*
* For 8KB process kernel stack check if request is not continous and
* submit each bio as separate request. This prevent nvme driver from
* splitting requests.
* For large requests, nvme splitting causes stack overrun.
*/
#if THREAD_SIZE <= 8192
#define RQ_CHECK_CONTINOUS
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 0)
#define SEGMENT_BVEC(vec) (&(vec))
#else
#define SEGMENT_BVEC(vec) (vec)
#endif
#ifndef SHRT_MIN
#define SHRT_MIN ((s16)-32768)
#endif
#ifndef SHRT_MAX
#define SHRT_MAX ((s16)32767)
#endif
#define ENOTSUP ENOTSUPP
#ifdef RHEL_RELEASE_VERSION
#if RHEL_RELEASE_CODE == RHEL_RELEASE_VERSION(7, 3)
#define CAS_RHEL_73
#endif
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0) || defined CAS_SLES12SP3
static inline blk_qc_t cas_submit_bio(int rw, struct bio *bio)
{
BIO_OP_FLAGS(bio) |= rw;
return submit_bio(bio);
}
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)
static inline blk_qc_t cas_submit_bio(int rw, struct bio *bio)
{
return submit_bio(rw, bio);
}
#else
static inline void cas_submit_bio(int rw, struct bio *bio)
{
submit_bio(rw, bio);
}
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
#define cas_blk_rq_set_block_pc(rq) {}
#else
#define cas_blk_rq_set_block_pc(rq) blk_rq_set_block_pc(rq)
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0)
#define cas_blk_queue_bounce(q, bounce_bio) ({})
#else
#define cas_blk_queue_bounce(q, bounce_bio) blk_queue_bounce(q, bounce_bio)
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 14, 11)
#define cas_blk_rq_append_bio(rq, bounce_bio) blk_rq_append_bio(rq, &bounce_bio)
#else
#define cas_blk_rq_append_bio(rq, bounce_bio) blk_rq_append_bio(rq, bounce_bio)
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0) || defined CAS_SLES12SP3
static inline struct request *cas_blk_make_request(struct request_queue *q,
struct bio *bio, gfp_t gfp_mask)
{
struct request *rq = blk_get_request(q, bio_data_dir(bio), gfp_mask);
if (IS_ERR(rq))
return rq;
cas_blk_rq_set_block_pc(rq);
rq->q = q;
for_each_bio(bio) {
struct bio *bounce_bio = bio;
int ret;
cas_blk_queue_bounce(q, &bounce_bio);
ret = cas_blk_rq_append_bio(rq, bounce_bio);
if (unlikely(ret)) {
blk_put_request(rq);
return ERR_PTR(ret);
}
}
return rq;
}
#else
static inline struct request *cas_blk_make_request(struct request_queue *q,
struct bio *bio, gfp_t gfp_mask)
{
return blk_make_request(q, bio, gfp_mask);
}
#endif
#ifdef CAS_RHEL_73
static inline void cas_copy_queue_limits(struct request_queue *exp_q,
struct request_queue *cache_q, struct request_queue *core_q)
{
struct queue_limits_aux *l_aux = exp_q->limits.limits_aux;
exp_q->limits = cache_q->limits;
exp_q->limits.limits_aux = l_aux;
if (exp_q->limits.limits_aux && cache_q->limits.limits_aux)
*exp_q->limits.limits_aux = *cache_q->limits.limits_aux;
exp_q->limits.max_sectors = core_q->limits.max_sectors;
exp_q->limits.max_hw_sectors = core_q->limits.max_hw_sectors;
exp_q->limits.max_segments = core_q->limits.max_segments;
exp_q->limits.max_write_same_sectors = 0;
/*
* Workaround for RHEL/CentOS 7.3 bug in kernel.
* Merging implementation on blk-mq does not respec virt boundary
* restriction and front merges bios with non-zero offsets.
* This leads to request with gaps between bios and in consequence
* triggers BUG_ON() in nvme driver or silently corrupts data.
* To prevent this, disable merging on cache queue if there are
* requirements regarding virt boundary (marking bios with REQ_NOMERGE
* does not solve this problem).
*/
if (queue_virt_boundary(cache_q))
queue_flag_set(QUEUE_FLAG_NOMERGES, cache_q);
}
#else
static inline void cas_copy_queue_limits(struct request_queue *exp_q,
struct request_queue *cache_q, struct request_queue *core_q)
{
exp_q->limits = cache_q->limits;
exp_q->limits.max_sectors = core_q->limits.max_sectors;
exp_q->limits.max_hw_sectors = core_q->limits.max_hw_sectors;
exp_q->limits.max_segments = core_q->limits.max_segments;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) || defined CAS_SLES12SP3
exp_q->limits.max_write_same_sectors = 0;
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) || defined CAS_SLES12SP3
exp_q->limits.max_write_zeroes_sectors = 0;
#endif
}
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,8,0)
#define CAS_GARBAGE_COLLECTOR
#endif
/* rate-limited printk */
#define CAS_PRINT_RL(...) \
if (printk_ratelimit()) \
printk(__VA_ARGS__)
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 19, 0)
static inline void cas_generic_start_io_acct(struct request_queue *q,
int rw, unsigned long sectors, struct hd_struct *part)
{
int cpu = part_stat_lock();
part_round_stats(cpu, part);
part_stat_inc(cpu, part, ios[rw]);
part_stat_add(cpu, part, sectors[rw], sectors);
part_inc_in_flight(part, rw);
part_stat_unlock();
}
static inline void cas_generic_end_io_acct(struct request_queue *q,
int rw, struct hd_struct *part, unsigned long start_time)
{
unsigned long duration = jiffies - start_time;
int cpu = part_stat_lock();
part_stat_add(cpu, part, ticks[rw], duration);
part_round_stats(cpu, part);
part_dec_in_flight(part, rw);
part_stat_unlock();
}
#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0)
static inline void cas_generic_start_io_acct(struct request_queue *q,
int rw, unsigned long sectors, struct hd_struct *part)
{
generic_start_io_acct(rw, sectors, part);
}
static inline void cas_generic_end_io_acct(struct request_queue *q,
int rw, struct hd_struct *part, unsigned long start_time)
{
generic_end_io_acct(rw, part, start_time);
}
#else
static inline void cas_generic_start_io_acct(struct request_queue *q,
int rw, unsigned long sectors, struct hd_struct *part)
{
generic_start_io_acct(q, rw, sectors, part);
}
static inline void cas_generic_end_io_acct(struct request_queue *q,
int rw, struct hd_struct *part, unsigned long start_time)
{
generic_end_io_acct(q, rw, part, start_time);
}
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 14, 0)
static inline unsigned long cas_global_zone_page_state(enum zone_stat_item item)
{
return global_zone_page_state(item);
}
#define CAS_BIO_SET_DEV(bio, bdev) bio_set_dev(bio, bdev)
#define CAS_BIO_GET_DEV(bio) bio->bi_disk
#else
static inline unsigned long cas_global_zone_page_state(enum zone_stat_item item)
{
return global_page_state(item);
}
#define CAS_BIO_SET_DEV(bio, bdev) bio->bi_bdev = bdev
#define CAS_BIO_GET_DEV(bio) bio->bi_bdev->bd_disk
#endif
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 33)
#define CAS_RATELIMIT(state, func_name) __ratelimit(state)
#else
#define CAS_RATELIMIT(state, func_name) ___ratelimit(state, func_name)
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 19, 0)
static inline struct bio *cas_bio_clone(struct bio *bio, gfp_t gfp_mask)
{
return bio_clone_fast(bio, gfp_mask, NULL);
}
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0)
static inline struct bio *cas_bio_clone(struct bio *bio, gfp_t gfp_mask)
{
return bio_clone_kmalloc(bio, gfp_mask);
}
#define CAS_BLK_STATUS_T blk_status_t
#else
static inline struct bio *cas_bio_clone(struct bio *bio, gfp_t gfp_mask)
{
return bio_clone(bio, gfp_mask);
}
#define CAS_BLK_STATUS_T int
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
static inline int is_rq_type_fs(struct request *rq)
{
switch (req_op(rq)){
case REQ_OP_READ:
case REQ_OP_WRITE:
case REQ_OP_FLUSH:
case REQ_OP_DISCARD:
return true;
default:
return false;
}
}
#else
static inline int is_rq_type_fs(struct request *rq)
{
return rq->cmd_type == REQ_TYPE_FS;
}
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 12, 0)
#define CAS_SET_DISCARD_ZEROES_DATA(queue_limits, val) ({})
#else
#define CAS_SET_DISCARD_ZEROES_DATA(queue_limits, val) \
queue_limits.discard_zeroes_data = val
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0)
#define cas_queue_flag_set_unlocked(flag, request_queue) \
blk_queue_flag_set(flag, request_queue)
#else
#define cas_queue_flag_set_unlocked(flag, request_queue) \
queue_flag_set_unlocked(flag, request_queue)
#endif
#endif /* #ifndef __LINUX_KERNEL_VERSION_H__ */

210
modules/cas_cache/main.c Normal file
View File

@@ -0,0 +1,210 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#include "cas_cache.h"
/* Layer information. */
MODULE_AUTHOR("Intel(R) Corporation");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_VERSION(CAS_VERSION);
u32 max_writeback_queue_size = 65536;
module_param(max_writeback_queue_size, uint, (S_IRUSR | S_IRGRP));
MODULE_PARM_DESC(max_writeback_queue_size,
"Max cache writeback queue size (65536)");
u32 writeback_queue_unblock_size = 60000;
module_param(writeback_queue_unblock_size, uint, (S_IRUSR | S_IRGRP));
MODULE_PARM_DESC(writeback_queue_unblock_size,
"Cache writeback queue size (60000) at which queue "
"is unblocked when blocked");
u32 dry_run;
module_param(dry_run, uint, (S_IRUSR | S_IRGRP));
MODULE_PARM_DESC(dry_run, "Perform dry run on module load");
u32 use_io_scheduler = 1;
module_param(use_io_scheduler, uint, (S_IRUSR | S_IRGRP));
MODULE_PARM_DESC(use_io_scheduler,
"Configure how IO shall be handled. "
"0 - in make request function, 1 - in request function");
u32 metadata_layout = ocf_metadata_layout_default;
module_param(metadata_layout, uint, (S_IRUSR | S_IRGRP));
MODULE_PARM_DESC(metadata_layout, "Metadata layout, 0 - striping, 1 - sequential");
u32 unaligned_io = 1;
module_param(unaligned_io, uint, (S_IRUSR | S_IRGRP));
MODULE_PARM_DESC(unaligned_io,
"Define how to handle I/O requests unaligned to 4 kiB, "
"0 - apply PT, 1 - handle by cache");
u32 seq_cut_off_mb = 1;
module_param(seq_cut_off_mb, uint, (S_IRUSR | S_IRGRP));
MODULE_PARM_DESC(seq_cut_off_mb,
"Sequential cut off threshold in MiB. 0 - disable");
/* globals */
bool in_upgrade;
ocf_ctx_t cas_ctx;
struct casdsk_functions_mapper casdisk_functions;
struct exported_symbol {
char *name;
unsigned long addr;
};
int static cas_find_symbol(void *data, const char *namebuf,
struct module *module, unsigned long kallsyms_addresses)
{
struct exported_symbol *sym = data;
if (strcmp(namebuf, sym->name) == 0)
sym->addr = kallsyms_addresses;
return 0;
}
#define cas_lookup_symbol(f) ({ \
struct exported_symbol sym = {#f, 0}; \
kallsyms_on_each_symbol(&cas_find_symbol, &sym); \
casdisk_functions.f = (void *)sym.addr; \
if (!casdisk_functions.f) \
return -EINVAL; \
})
int static cas_casdisk_lookup_funtions(void)
{
cas_lookup_symbol(casdsk_disk_dettach);
cas_lookup_symbol(casdsk_exp_obj_destroy);
cas_lookup_symbol(casdsk_exp_obj_create);
cas_lookup_symbol(casdsk_disk_get_queue);
cas_lookup_symbol(casdsk_store_config);
cas_lookup_symbol(casdsk_disk_get_blkdev);
cas_lookup_symbol(casdsk_exp_obj_get_queue);
cas_lookup_symbol(casdsk_get_version);
cas_lookup_symbol(casdsk_disk_close);
cas_lookup_symbol(casdsk_disk_claim);
cas_lookup_symbol(casdsk_exp_obj_unlock);
cas_lookup_symbol(casdsk_disk_set_pt);
cas_lookup_symbol(casdsk_get_stored_config);
cas_lookup_symbol(casdsk_disk_get_gendisk);
cas_lookup_symbol(casdsk_disk_attach);
cas_lookup_symbol(casdsk_disk_set_attached);
cas_lookup_symbol(casdsk_exp_obj_activate);
cas_lookup_symbol(casdsk_exp_obj_activated);
cas_lookup_symbol(casdsk_exp_obj_lock);
cas_lookup_symbol(casdsk_free_stored_config);
cas_lookup_symbol(casdsk_disk_open);
cas_lookup_symbol(casdsk_disk_clear_pt);
cas_lookup_symbol(casdsk_exp_obj_get_gendisk);
return 0;
}
static int __init cas_init_module(void)
{
int result = 0;
result = cas_casdisk_lookup_funtions();
if (result) {
printk(KERN_ERR OCF_PREFIX_SHORT
"Could not find inteldisk functions.\n");
return result;
}
if (casdisk_functions.casdsk_get_version() != CASDSK_IFACE_VERSION) {
printk(KERN_ERR OCF_PREFIX_SHORT
"Incompatible inteldisk module\n");
return -EINVAL;
}
if (!writeback_queue_unblock_size || !max_writeback_queue_size) {
printk(KERN_ERR OCF_PREFIX_SHORT
"Invalid module parameter.\n");
return -EINVAL;
}
if (writeback_queue_unblock_size >= max_writeback_queue_size) {
printk(KERN_ERR OCF_PREFIX_SHORT
"parameter writeback_queue_unblock_size"
" must be less than max_writeback_queue_size\n");
return -EINVAL;
}
if (metadata_layout >= ocf_metadata_layout_max) {
printk(KERN_ERR OCF_PREFIX_SHORT
"Invalid value for metadata_layout parameter\n");
return -EINVAL;
}
if (unaligned_io != 0 && unaligned_io != 1) {
printk(KERN_ERR OCF_PREFIX_SHORT
"Invalid value for unaligned_io parameter\n");
return -EINVAL;
}
if (use_io_scheduler != 0 && use_io_scheduler != 1) {
printk(KERN_ERR OCF_PREFIX_SHORT
"Invalid value for use_io_scheduler parameter\n");
return -EINVAL;
}
result = cas_initialize_context();
if (result) {
printk(KERN_ERR OCF_PREFIX_SHORT
"Cannot initialize cache library\n");
return result;
}
result = cas_upgrade_get_configuration();
if (-KCAS_ERR_NO_STORED_CONF == result) {
printk(KERN_INFO OCF_PREFIX_SHORT
"Not found configuration for upgrade. "
"Standard module initialization.\n");
} else {
if (!dry_run) {
result = cas_upgrade_finish();
if (result) {
printk(KERN_ERR OCF_PREFIX_SHORT
"Error during finish upgrade, "
"result: %d\n", result);
goto error_cas_ctx_init;
}
} else {
result = cas_upgrade_verify();
if (result) {
printk(KERN_ERR OCF_PREFIX_SHORT
"Error during upgrade "
"verification\n");
goto error_cas_ctx_init;
}
}
}
result = cas_ctrl_device_init();
if (result) {
printk(KERN_ERR OCF_PREFIX_SHORT
"Cannot initialize control device\n");
goto error_cas_ctx_init;
}
printk(KERN_INFO "%s Version %s (%s)::Module loaded successfully\n",
OCF_PREFIX_LONG, CAS_VERSION, CAS_KERNEL);
return 0;
error_cas_ctx_init:
cas_cleanup_context();
return result;
}
module_init(cas_init_module);
static void __exit cas_exit_module(void)
{
cas_ctrl_device_deinit();
cas_cleanup_context();
}
module_exit(cas_exit_module);

284
modules/cas_cache/ocf_env.c Normal file
View File

@@ -0,0 +1,284 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#include "cas_cache.h"
#include "utils/utils_rpool.h"
/* *** ALLOCATOR *** */
#define CAS_ALLOC_ALLOCATOR_LIMIT 256
struct _env_allocator {
/*!< Memory pool ID unique name */
char *name;
/*!< Size of specific item of memory pool */
uint32_t item_size;
/*!< OS handle to memory pool */
struct kmem_cache *kmem_cache;
/*!< Number of currently allocated items in pool */
atomic_t count;
struct cas_reserve_pool *rpool;
};
static inline size_t env_allocator_align(size_t size)
{
if (size <= 2)
return size;
return (1ULL << 32) >> __builtin_clz(size - 1);
}
struct _env_allocator_item {
uint32_t flags;
uint32_t cpu;
char data[];
};
void *env_allocator_new(env_allocator *allocator)
{
struct _env_allocator_item *item = NULL;
int cpu;
item = cas_rpool_try_get(allocator->rpool, &cpu);
if (item) {
memset(item->data, 0, allocator->item_size -
sizeof(struct _env_allocator_item));
} else {
item = kmem_cache_zalloc(allocator->kmem_cache, GFP_ATOMIC);
}
if (item) {
item->cpu = cpu;
atomic_inc(&allocator->count);
return &item->data;
} else {
return NULL;
}
}
void *env_allocator_new_rpool(void *allocator_ctx, int cpu)
{
env_allocator *allocator = (env_allocator*) allocator_ctx;
struct _env_allocator_item *item;
item = kmem_cache_zalloc(allocator->kmem_cache, GFP_NOIO |
__GFP_NORETRY);
if (item) {
item->flags = (GFP_NOIO | __GFP_NORETRY);
item->cpu = cpu;
}
return item;
}
void env_allocator_del_rpool(void *allocator_ctx, void *item)
{
env_allocator *allocator = (env_allocator* ) allocator_ctx;
kmem_cache_free(allocator->kmem_cache, item);
}
#define ENV_ALLOCATOR_NAME_MAX 128
env_allocator *env_allocator_create(uint32_t size, const char *name)
{
int error = -1;
bool retry = true;
env_allocator *allocator = kzalloc(sizeof(*allocator), GFP_KERNEL);
if (!allocator) {
error = __LINE__;
goto err;
}
if (size < CAS_RPOOL_MIN_SIZE_ITEM) {
printk(KERN_ERR "Can not create allocator."
" Item size is too small.");
ENV_WARN(true, OCF_PREFIX_SHORT" Can not create allocator."
" Item size is too small.\n");
error = __LINE__;
goto err;
}
allocator->item_size = size + sizeof(struct _env_allocator_item);
if (allocator->item_size > PAGE_SIZE) {
printk(KERN_WARNING "Creating allocator with item size"
" greater than 4096B");
ENV_WARN(true, OCF_PREFIX_SHORT" Creating allocator"
" with item size greater than 4096B\n");
}
allocator->name = kstrdup(name, ENV_MEM_NORMAL);
if (!allocator->name) {
error = __LINE__;
goto err;
}
/* Initialize kernel memory cache */
#ifdef CONFIG_SLAB
RETRY:
#else
(void)retry;
#endif
allocator->kmem_cache = kmem_cache_create(allocator->name,
allocator->item_size, 0, 0, NULL);
if (!allocator->kmem_cache) {
/* Can not setup kernel memory cache */
error = __LINE__;
goto err;
}
#ifdef CONFIG_SLAB
if ((allocator->item_size < PAGE_SIZE)
&& allocator->kmem_cache->gfporder) {
/* Goal is to have one page allocation */
if (retry) {
retry = false;
kmem_cache_destroy(allocator->kmem_cache);
allocator->kmem_cache = NULL;
allocator->item_size = env_allocator_align(allocator->item_size);
goto RETRY;
}
}
#endif
/* Initialize reserve pool handler per cpu */
allocator->rpool = cas_rpool_create(CAS_ALLOC_ALLOCATOR_LIMIT,
allocator->name, allocator->item_size, env_allocator_new_rpool,
env_allocator_del_rpool, allocator);
if (!allocator->rpool) {
error = __LINE__;
goto err;
}
return allocator;
err:
printk(KERN_ERR "Cannot create memory allocator, ERROR %d", error);
env_allocator_destroy(allocator);
return NULL;
}
void env_allocator_del(env_allocator *allocator, void *obj)
{
struct _env_allocator_item *item =
container_of(obj, struct _env_allocator_item, data);
atomic_dec(&allocator->count);
if (item->flags == (GFP_NOIO | __GFP_NORETRY) &&
!cas_rpool_try_put(allocator->rpool, item, item->cpu))
return;
kmem_cache_free(allocator->kmem_cache, item);
}
void env_allocator_destroy(env_allocator *allocator)
{
if (allocator) {
cas_rpool_destroy(allocator->rpool, env_allocator_del_rpool,
allocator);
allocator->rpool = NULL;
if (atomic_read(&allocator->count)) {
printk(KERN_CRIT "Not all object deallocated\n");
ENV_WARN(true, OCF_PREFIX_SHORT" Cleanup problem\n");
}
if (allocator->kmem_cache)
kmem_cache_destroy(allocator->kmem_cache);
kfree(allocator->name);
kfree(allocator);
}
}
uint32_t env_allocator_item_count(env_allocator *allocator)
{
return atomic_read(&allocator->count);
}
static int env_sort_is_aligned(const void *base, int align)
{
return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||
((unsigned long)base & (align - 1)) == 0;
}
static void env_sort_u32_swap(void *a, void *b, int size)
{
u32 t = *(u32 *)a;
*(u32 *)a = *(u32 *)b;
*(u32 *)b = t;
}
static void env_sort_u64_swap(void *a, void *b, int size)
{
u64 t = *(u64 *)a;
*(u64 *)a = *(u64 *)b;
*(u64 *)b = t;
}
static void env_sort_generic_swap(void *a, void *b, int size)
{
char t;
do {
t = *(char *)a;
*(char *)a++ = *(char *)b;
*(char *)b++ = t;
} while (--size > 0);
}
void env_sort(void *base, size_t num, size_t size,
int (*cmp_fn)(const void *, const void *),
void (*swap_fn)(void *, void *, int size))
{
/* pre-scale counters for performance */
int64_t i = (num/2 - 1) * size, n = num * size, c, r;
if (!swap_fn) {
if (size == 4 && env_sort_is_aligned(base, 4))
swap_fn = env_sort_u32_swap;
else if (size == 8 && env_sort_is_aligned(base, 8))
swap_fn = env_sort_u64_swap;
else
swap_fn = env_sort_generic_swap;
}
/* heapify */
for ( ; i >= 0; i -= size) {
for (r = i; r * 2 + size < n; r = c) {
c = r * 2 + size;
if (c < n - size &&
cmp_fn(base + c, base + c + size) < 0)
c += size;
if (cmp_fn(base + r, base + c) >= 0)
break;
swap_fn(base + r, base + c, size);
}
}
/* sort */
for (i = n - size; i > 0; i -= size) {
swap_fn(base, base + i, size);
for (r = 0; r * 2 + size < i; r = c) {
c = r * 2 + size;
if (c < i - size &&
cmp_fn(base + c, base + c + size) < 0)
c += size;
if (cmp_fn(base + r, base + c) >= 0)
break;
swap_fn(base + r, base + c, size);
}
}
}

584
modules/cas_cache/ocf_env.h Normal file
View File

@@ -0,0 +1,584 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __OCF_ENV_H__
#define __OCF_ENV_H__
#include "linux_kernel_version.h"
#include "utils/utils_gc.h"
#include "ocf/ocf_err.h"
/* linux sector 512-bytes */
#define ENV_SECTOR_SHIFT 9
/* *** MEMORY MANAGEMENT *** */
#define ENV_MEM_NORMAL GFP_KERNEL
#define ENV_MEM_NOIO GFP_NOIO
#define ENV_MEM_ATOMIC GFP_ATOMIC
static inline uint64_t env_get_free_memory(void)
{
return cas_global_zone_page_state(NR_FREE_PAGES) << PAGE_SHIFT;
}
static inline void *env_malloc(size_t size, int flags)
{
return kmalloc(size, flags);
}
static inline void *env_zalloc(size_t size, int flags)
{
return kzalloc(size, flags);
}
static inline void env_free(const void *ptr)
{
kfree(ptr);
}
static inline void *env_vmalloc(size_t size)
{
return vmalloc(size);
}
static inline void *env_vzalloc(size_t size)
{
return vzalloc(size);
}
static inline void env_vfree(const void *ptr)
{
cas_vfree(ptr);
}
/* *** ALLOCATOR *** */
typedef struct _env_allocator env_allocator;
env_allocator *env_allocator_create(uint32_t size, const char *name);
void env_allocator_destroy(env_allocator *allocator);
void *env_allocator_new(env_allocator *allocator);
void env_allocator_del(env_allocator *allocator, void *item);
uint32_t env_allocator_item_count(env_allocator *allocator);
/* *** MUTEX *** */
typedef struct mutex env_mutex;
static inline int env_mutex_init(env_mutex *mutex)
{
mutex_init(mutex);
return 0;
}
static inline void env_mutex_lock(env_mutex *mutex)
{
mutex_lock(mutex);
}
static inline int env_mutex_lock_interruptible(env_mutex *mutex)
{
return mutex_lock_interruptible(mutex) ? -OCF_ERR_INTR : 0;
}
static inline int env_mutex_trylock(env_mutex *mutex)
{
return mutex_trylock(mutex) ? 0 : -OCF_ERR_NO_LOCK;
}
static inline void env_mutex_unlock(env_mutex *mutex)
{
mutex_unlock(mutex);
}
static inline int env_mutex_is_locked(env_mutex *mutex)
{
return mutex_is_locked(mutex);
}
/* *** RECURSIVE MUTEX *** */
typedef struct {
struct mutex mutex;
atomic_t count;
struct task_struct *holder;
} env_rmutex;
static inline int env_rmutex_init(env_rmutex *rmutex)
{
mutex_init(&rmutex->mutex);
atomic_set(&rmutex->count, 0);
rmutex->holder = NULL;
return 0;
}
static inline void env_rmutex_lock(env_rmutex *rmutex)
{
if (current == rmutex->holder) {
atomic_inc(&rmutex->count);
return;
}
mutex_lock(&rmutex->mutex);
rmutex->holder = current;
atomic_inc(&rmutex->count);
}
static inline int env_rmutex_lock_interruptible(env_rmutex *rmutex)
{
int result = 0;
if (current == rmutex->holder) {
atomic_inc(&rmutex->count);
return 0;
}
result = mutex_lock_interruptible(&rmutex->mutex);
if (result) {
/* No lock */
return -OCF_ERR_INTR;
}
rmutex->holder = current;
atomic_inc(&rmutex->count);
return 0;
}
static inline int env_rmutex_trylock(env_rmutex *rmutex)
{
if (current == rmutex->holder) {
atomic_inc(&rmutex->count);
return 0;
}
if (mutex_trylock(&rmutex->mutex)) {
/* No lock */
return -OCF_ERR_NO_LOCK;
}
rmutex->holder = current;
atomic_inc(&rmutex->count);
return 0;
}
static inline void env_rmutex_unlock(env_rmutex *rmutex)
{
BUG_ON(current != rmutex->holder);
if (atomic_dec_return(&rmutex->count)) {
return;
}
rmutex->holder = NULL;
mutex_unlock(&rmutex->mutex);
}
static inline int env_rmutex_is_locked(env_rmutex *rmutex)
{
return mutex_is_locked(&rmutex->mutex);
}
/* *** RW SEMAPHORE *** */
typedef struct
{
struct rw_semaphore sem;
wait_queue_head_t wq;
} env_rwsem;
static inline int env_rwsem_init(env_rwsem *s)
{
init_rwsem(&s->sem);
init_waitqueue_head(&s->wq);
return 0;
}
static inline void env_rwsem_up_read(env_rwsem *s)
{
up_read(&s->sem);
wake_up_all(&s->wq);
}
static inline void env_rwsem_down_read(env_rwsem *s)
{
down_read(&s->sem);
}
static inline int env_rwsem_down_read_interruptible(env_rwsem *s)
{
return wait_event_interruptible(s->wq,
down_read_trylock(&s->sem)) ? -OCF_ERR_INTR : 0;
}
static inline int env_rwsem_down_read_trylock(env_rwsem *s)
{
return down_read_trylock(&s->sem) ? 0 : -OCF_ERR_NO_LOCK;
}
static inline void env_rwsem_up_write(env_rwsem *s)
{
up_write(&s->sem);
wake_up_all(&s->wq);
}
static inline void env_rwsem_down_write(env_rwsem *s)
{
down_write(&s->sem);
}
static inline int env_rwsem_down_write_interruptible(env_rwsem *s)
{
return wait_event_interruptible(s->wq,
down_write_trylock(&s->sem)) ? -OCF_ERR_INTR : 0;
}
static inline int env_rwsem_down_write_trylock(env_rwsem *s)
{
return down_write_trylock(&s->sem) ? 0 : -OCF_ERR_NO_LOCK;
}
static inline int env_rwsem_is_locked(env_rwsem *s)
{
return rwsem_is_locked(&s->sem);
}
/* *** COMPLETION *** */
typedef struct completion env_completion;
static inline void env_completion_init(env_completion *completion)
{
init_completion(completion);
}
static inline void env_completion_wait(env_completion *completion)
{
wait_for_completion(completion);
}
static inline void env_completion_complete(env_completion *completion)
{
complete(completion);
}
/* *** ATOMIC VARIABLES *** */
typedef atomic_t env_atomic;
typedef atomic64_t env_atomic64;
static inline int env_atomic_read(const env_atomic *a)
{
return atomic_read(a);
}
static inline void env_atomic_set(env_atomic *a, int i)
{
atomic_set(a, i);
}
static inline void env_atomic_add(int i, env_atomic *a)
{
atomic_add(i, a);
}
static inline void env_atomic_sub(int i, env_atomic *a)
{
atomic_sub(i, a);
}
static inline bool env_atomic_sub_and_test(int i, env_atomic *a)
{
return atomic_sub_and_test(i, a);
}
static inline void env_atomic_inc(env_atomic *a)
{
atomic_inc(a);
}
static inline void env_atomic_dec(env_atomic *a)
{
atomic_dec(a);
}
static inline bool env_atomic_dec_and_test(env_atomic *a)
{
return atomic_dec_and_test(a);
}
static inline bool env_atomic_inc_and_test(env_atomic *a)
{
return atomic_inc_and_test(a);
}
static inline int env_atomic_add_return(int i, env_atomic *a)
{
return atomic_add_return(i, a);
}
static inline int env_atomic_sub_return(int i, env_atomic *a)
{
return atomic_sub_return(i, a);
}
static inline int env_atomic_inc_return(env_atomic *a)
{
return atomic_inc_return(a);
}
static inline int env_atomic_dec_return(env_atomic *a)
{
return atomic_dec_return(a);
}
static inline int env_atomic_cmpxchg(env_atomic *a, int old, int new_value)
{
return atomic_cmpxchg(a, old, new_value);
}
static inline int env_atomic_add_unless(env_atomic *a, int i, int u)
{
return atomic_add_unless(a, i, u);
}
static inline u64 env_atomic64_read(const env_atomic64 *a)
{
return atomic64_read(a);
}
static inline void env_atomic64_set(env_atomic64 *a, u64 i)
{
atomic64_set(a, i);
}
static inline void env_atomic64_add(u64 i, env_atomic64 *a)
{
atomic64_add(i, a);
}
static inline void env_atomic64_sub(u64 i, env_atomic64 *a)
{
atomic64_sub(i, a);
}
static inline void env_atomic64_inc(env_atomic64 *a)
{
atomic64_inc(a);
}
static inline void env_atomic64_dec(env_atomic64 *a)
{
atomic64_dec(a);
}
static inline u64 env_atomic64_inc_return(env_atomic64 *a)
{
return atomic64_inc_return(a);
}
static inline u64 env_atomic64_cmpxchg(atomic64_t *a, u64 old, u64 new)
{
return atomic64_cmpxchg(a, old, new);
}
/* *** SPIN LOCKS *** */
typedef spinlock_t env_spinlock;
static inline void env_spinlock_init(env_spinlock *l)
{
spin_lock_init(l);
}
static inline void env_spinlock_lock(env_spinlock *l)
{
spin_lock(l);
}
static inline void env_spinlock_unlock(env_spinlock *l)
{
spin_unlock(l);
}
static inline void env_spinlock_lock_irq(env_spinlock *l)
{
spin_lock_irq(l);
}
static inline void env_spinlock_unlock_irq(env_spinlock *l)
{
spin_unlock_irq(l);
}
#define env_spinlock_lock_irqsave(l, flags) \
spin_lock_irqsave((l), (flags))
#define env_spinlock_unlock_irqrestore(l, flags) \
spin_unlock_irqrestore((l), (flags))
/* *** RW LOCKS *** */
typedef rwlock_t env_rwlock;
static inline void env_rwlock_init(env_rwlock *l)
{
rwlock_init(l);
}
static inline void env_rwlock_read_lock(env_rwlock *l)
{
read_lock(l);
}
static inline void env_rwlock_read_unlock(env_rwlock *l)
{
read_unlock(l);
}
static inline void env_rwlock_write_lock(env_rwlock *l)
{
write_lock(l);
}
static inline void env_rwlock_write_unlock(env_rwlock *l)
{
write_unlock(l);
}
/* *** WAITQUEUE *** */
typedef wait_queue_head_t env_waitqueue;
static inline void env_waitqueue_init(env_waitqueue *w)
{
init_waitqueue_head(w);
}
static inline void env_waitqueue_wake_up(env_waitqueue *w)
{
wake_up(w);
}
#define env_waitqueue_wait(w, condition) \
wait_event_interruptible((w), (condition))
/* *** SCHEDULING *** */
static inline void env_cond_resched(void)
{
cond_resched();
}
static inline int env_in_interrupt(void)
{
return in_interrupt();;
}
/* *** TIME *** */
static inline uint64_t env_get_tick_count(void)
{
return jiffies;
}
static inline uint64_t env_ticks_to_msecs(uint64_t j)
{
return jiffies_to_msecs(j);
}
static inline uint64_t env_ticks_to_nsecs(uint64_t j)
{
return jiffies_to_usecs(j) * NSEC_PER_USEC;
}
static inline bool env_time_after(uint64_t a, uint64_t b)
{
return time_after64(a,b);
}
static inline uint64_t env_ticks_to_secs(uint64_t j)
{
return j >> SHIFT_HZ;
}
static inline uint64_t env_secs_to_ticks(uint64_t j)
{
return j << SHIFT_HZ;
}
/* *** BIT OPERATIONS *** */
static inline void env_bit_set(int nr, volatile void *addr)
{
set_bit(nr, addr);
}
static inline void env_bit_clear(int nr, volatile void *addr)
{
clear_bit(nr, addr);
}
static inline int env_bit_test(int nr, const void *addr)
{
return test_bit(nr, addr);
}
static inline void env_msleep(uint64_t n)
{
msleep(n);
}
/* *** STRING OPERATIONS *** */
#define env_memset(dest, dmax, val) ({ \
memset(dest, val, dmax); \
0; \
})
#define env_memcpy(dest, dmax, src, slen) ({ \
memcpy(dest, src, min_t(int, dmax, slen)); \
0; \
})
#define env_memcmp(s1, s1max, s2, s2max, diff) ({ \
*diff = memcmp(s1, s2, min_t(int, s1max, s2max)); \
0; \
})
#define env_strdup kstrdup
#define env_strnlen(s, smax) strnlen(s, smax)
#define env_strncmp strncmp
#define env_strncpy(dest, dmax, src, slen) ({ \
strlcpy(dest, src, min_t(int, dmax, slen)); \
0; \
})
/* *** SORTING *** */
void env_sort(void *base, size_t num, size_t size,
int (*cmp_fn)(const void *, const void *),
void (*swap_fn)(void *, void *, int size));
/* *** CRC *** */
static inline uint32_t env_crc32(uint32_t crc, uint8_t const *data, size_t len)
{
return crc32(crc, data, len);
}
/* *** LOGGING *** */
#define ENV_PRIu64 "llu"
#define ENV_WARN(cond, fmt...) WARN(cond, fmt)
#define ENV_WARN_ON(cond) WARN_ON(cond)
#define ENV_BUG() BUG()
#define ENV_BUG_ON(cond) BUG_ON(cond)
#endif /* __OCF_ENV_H__ */

View File

@@ -0,0 +1,21 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __OCF_ENV_HEADERS_H__
#define __OCF_ENV_HEADERS_H__
#include <linux/types.h>
/* TODO: Move prefix printing to context logger. */
#define OCF_LOGO "Open-CAS"
#define OCF_PREFIX_SHORT "[" OCF_LOGO "] "
#define OCF_PREFIX_LONG "Open Cache Acceleration Software Linux"
#define OCF_VERSION_MAIN CAS_VERSION_MAIN
#define OCF_VERSION_MAJOR CAS_VERSION_MAJOR
#define OCF_VERSION_MINOR CAS_VERSION_MINOR
#endif /* __OCF_ENV_HEADERS_H__ */

View File

@@ -0,0 +1,414 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#include "cas_cache.h"
struct {
int cas_error;
int std_error;
} static cas_error_code_map[] = {
/* IOC error mappings*/
{ OCF_ERR_INVAL, EINVAL },
{ OCF_ERR_INVAL_VOLUME_TYPE, EINVAL },
{ OCF_ERR_INTR, EINTR },
{ OCF_ERR_UNKNOWN, EINVAL },
{ OCF_ERR_TOO_MANY_CACHES, ENOSPC },
{ OCF_ERR_NO_MEM, ENOMEM },
{ OCF_ERR_NO_FREE_RAM, ENOMEM },
{ OCF_ERR_START_CACHE_FAIL, EFAULT },
{ OCF_ERR_CACHE_IN_USE, EBUSY },
{ OCF_ERR_CACHE_NOT_EXIST, ENODEV },
{ OCF_ERR_CACHE_EXIST, EEXIST },
{ OCF_ERR_TOO_MANY_CORES, ENOSPC },
{ OCF_ERR_CORE_NOT_AVAIL, ENAVAIL },
{ OCF_ERR_NOT_OPEN_EXC, EBUSY },
{ OCF_ERR_CACHE_NOT_AVAIL, ENAVAIL },
{ OCF_ERR_IO_CLASS_NOT_EXIST, ENODEV },
{ OCF_ERR_WRITE_CACHE, EIO },
{ OCF_ERR_WRITE_CORE, EIO },
{ OCF_ERR_DIRTY_SHUTDOWN, EFAULT },
{ OCF_ERR_DIRTY_EXISTS, EFAULT },
{ OCF_ERR_FLUSHING_INTERRUPTED, EINTR },
/* CAS kernel error mappings*/
{ KCAS_ERR_ROOT, EPERM },
{ KCAS_ERR_SYSTEM, EINVAL },
{ KCAS_ERR_BAD_RANGE, ERANGE },
{ KCAS_ERR_DEV_SPACE, ENOSPC },
{ KCAS_ERR_INV_IOCTL, EINVAL },
{ KCAS_ERR_DEV_PENDING, EBUSY },
{ KCAS_ERR_DIRTY_EXISTS_NVME, EFAULT },
{ KCAS_ERR_FILE_EXISTS, EEXIST },
{ KCAS_ERR_IN_UPGRADE, EFAULT },
{ KCAS_ERR_UNALIGNED, EINVAL },
{ KCAS_ERR_NO_STORED_CONF, EINTR },
{ KCAS_ERR_ROLLBACK, EFAULT },
{ KCAS_ERR_NOT_NVME, ENODEV },
{ KCAS_ERR_FORMAT_FAILED, EFAULT },
{ KCAS_ERR_NVME_BAD_FORMAT, EINVAL },
{ KCAS_ERR_CONTAINS_PART, EINVAL },
{ KCAS_ERR_A_PART, EINVAL },
{ KCAS_ERR_REMOVED_DIRTY, EIO },
{ KCAS_ERR_STOPPED_DIRTY, EIO },
};
/*******************************************/
/* Helper which change cas-specific error */
/* codes to kernel generic error codes */
/*******************************************/
int map_cas_err_to_generic_code(int cas_error_code)
{
int i;
if (cas_error_code == 0)
return 0; /* No Error */
cas_error_code = abs(cas_error_code);
for (i = 0; i < ARRAY_SIZE(cas_error_code_map); i++) {
if (cas_error_code_map[i].cas_error == cas_error_code)
return -cas_error_code_map[i].std_error;
}
return -cas_error_code;
}
#define _GET_CMD_INFO(cmd_info, arg, size) ({ \
cmd_info = vmalloc(size); \
if (!cmd_info) \
return -ENOMEM; \
if (copy_from_user(cmd_info, (void __user *)arg, size)) { \
printk(KERN_ALERT "Cannot copy cmd info from user space\n"); \
vfree(cmd_info); \
return -EINVAL; \
} \
})
#define GET_CMD_INFO(cmd_info, arg) _GET_CMD_INFO(cmd_info, arg, \
sizeof(*cmd_info))
#define RETURN_CMD_RESULT(cmd_info, arg, result) ({ \
int ret = result; \
cmd_info->ext_err_code = abs(result); \
if (copy_to_user((void __user *)arg, cmd_info, sizeof(*cmd_info))) { \
printk(KERN_ALERT "Unable to copy response to user\n"); \
ret = -EFAULT; \
} \
vfree(cmd_info); \
return map_cas_err_to_generic_code(ret); \
})
/* this handles IOctl for /dev/cas */
/*********************************************/
long cas_service_ioctl_ctrl(struct file *filp, unsigned int cmd,
unsigned long arg)
{
int retval = 0;
if (_IOC_TYPE(cmd) != KCAS_IOCTL_MAGIC)
return -EINVAL;
if (!capable(CAP_SYS_ADMIN)) {
/* Must be root to issue ioctls */
return -EPERM;
}
if (cas_upgrade_is_in_upgrade() &&
cmd != KCAS_IOCTL_CACHE_INFO &&
cmd != KCAS_IOCTL_LIST_CACHE &&
cmd != KCAS_IOCTL_GET_CACHE_COUNT &&
cmd != KCAS_IOCTL_CORE_INFO &&
cmd != KCAS_IOCTL_PARTITION_STATS &&
cmd != KCAS_IOCTL_GET_CAPABILITIES) {
return -EFAULT;
}
switch (cmd) {
case KCAS_IOCTL_START_CACHE: {
struct kcas_start_cache *cmd_info;
struct ocf_mngt_cache_config cfg;
struct ocf_mngt_cache_device_config device_cfg;
GET_CMD_INFO(cmd_info, arg);
retval = cache_mng_prepare_cache_cfg(&cfg, &device_cfg, cmd_info);
if (retval)
RETURN_CMD_RESULT(cmd_info, arg, retval);
retval = cache_mng_init_instance(&cfg, &device_cfg, cmd_info);
RETURN_CMD_RESULT(cmd_info, arg, retval);
}
case KCAS_IOCTL_STOP_CACHE: {
struct kcas_stop_cache *cmd_info;
GET_CMD_INFO(cmd_info, arg);
retval = cache_mng_exit_instance(cmd_info->cache_id,
cmd_info->flush_data);
RETURN_CMD_RESULT(cmd_info, arg, retval);
}
case KCAS_IOCTL_SET_CACHE_STATE: {
struct kcas_set_cache_state *cmd_info;
GET_CMD_INFO(cmd_info, arg);
retval = cache_mng_set_cache_mode(cmd_info->cache_id,
cmd_info->caching_mode, cmd_info->flush_data);
RETURN_CMD_RESULT(cmd_info, arg, retval);
}
case KCAS_IOCTL_INSERT_CORE: {
struct kcas_insert_core *cmd_info;
struct ocf_mngt_core_config cfg;
GET_CMD_INFO(cmd_info, arg);
retval = cache_mng_prepare_core_cfg(&cfg, cmd_info);
if (retval)
RETURN_CMD_RESULT(cmd_info, arg, retval);
retval = cache_mng_add_core_to_cache(&cfg, cmd_info);
RETURN_CMD_RESULT(cmd_info, arg, retval);
}
case KCAS_IOCTL_REMOVE_CORE: {
struct kcas_remove_core *cmd_info;
GET_CMD_INFO(cmd_info, arg);
retval = cache_mng_remove_core_from_cache(cmd_info);
RETURN_CMD_RESULT(cmd_info, arg, retval);
}
case KCAS_IOCTL_RESET_STATS: {
struct kcas_reset_stats *cmd_info;
GET_CMD_INFO(cmd_info, arg);
retval = cache_mng_reset_core_stats(cmd_info->cache_id,
cmd_info->core_id);
RETURN_CMD_RESULT(cmd_info, arg, retval);
}
case KCAS_IOCTL_FLUSH_CACHE: {
struct kcas_flush_cache *cmd_info;
GET_CMD_INFO(cmd_info, arg);
retval = cache_mng_flush_device(cmd_info->cache_id);
RETURN_CMD_RESULT(cmd_info, arg, retval);
}
case KCAS_IOCTL_INTERRUPT_FLUSHING: {
struct kcas_interrupt_flushing *cmd_info;
GET_CMD_INFO(cmd_info, arg);
retval = cache_mng_interrupt_flushing(cmd_info->cache_id);
RETURN_CMD_RESULT(cmd_info, arg, retval);
}
case KCAS_IOCTL_FLUSH_CORE: {
struct kcas_flush_core *cmd_info;
GET_CMD_INFO(cmd_info, arg);
retval = cache_mng_flush_object(cmd_info->cache_id,
cmd_info->core_id);
RETURN_CMD_RESULT(cmd_info, arg, retval);
}
case KCAS_IOCTL_CACHE_INFO: {
struct kcas_cache_info *cmd_info;
GET_CMD_INFO(cmd_info, arg);
retval = cache_mng_get_info(cmd_info);
RETURN_CMD_RESULT(cmd_info, arg, retval);
}
case KCAS_IOCTL_CORE_INFO: {
struct kcas_core_info *cmd_info;
GET_CMD_INFO(cmd_info, arg);
retval = cache_mng_get_core_info(cmd_info);
RETURN_CMD_RESULT(cmd_info, arg, retval);
}
case KCAS_IOCTL_PARTITION_STATS: {
struct kcas_io_class *cmd_info;
GET_CMD_INFO(cmd_info, arg);
retval = cache_mng_get_io_class_info(cmd_info);
RETURN_CMD_RESULT(cmd_info, arg, retval);
}
case KCAS_IOCTL_PARTITION_SET: {
struct kcas_io_classes *cmd_info;
/* copy entire memory from user, including array of
* ocf_io_class_info structs past the end of kcas_io_classes */
_GET_CMD_INFO(cmd_info, arg, KCAS_IO_CLASSES_SIZE);
retval = cache_mng_set_partitions(cmd_info);
/* return just sizeof(struct kcas_io_classes) bytes of data */
RETURN_CMD_RESULT(cmd_info, arg, retval);
}
case KCAS_IOCTL_GET_CACHE_COUNT: {
struct kcas_cache_count *cmd_info;
GET_CMD_INFO(cmd_info, arg);
cmd_info->cache_count = ocf_mngt_cache_get_count(cas_ctx);
RETURN_CMD_RESULT(cmd_info, arg, 0);
}
case KCAS_IOCTL_LIST_CACHE: {
struct kcas_cache_list *cmd_info;
GET_CMD_INFO(cmd_info, arg);
retval = cache_mng_list_caches(cmd_info);
RETURN_CMD_RESULT(cmd_info, arg, retval > 0 ? 0 : retval);
}
case KCAS_IOCTL_GET_CAPABILITIES: {
struct kcas_capabilites *cmd_info;
GET_CMD_INFO(cmd_info, arg);
memset(cmd_info, 0, sizeof(*cmd_info));
#ifdef CAS_NVME_FULL
cmd_info->nvme_format = 1;
#endif
RETURN_CMD_RESULT(cmd_info, arg, 0);
}
case KCAS_IOCTL_UPGRADE: {
struct kcas_upgrade *cmd_info;
GET_CMD_INFO(cmd_info, arg);
retval = cas_upgrade();
RETURN_CMD_RESULT(cmd_info, arg, retval);
}
#if defined(CAS_NVME_FULL)
case KCAS_IOCTL_NVME_FORMAT: {
struct kcas_nvme_format *cmd_info;
GET_CMD_INFO(cmd_info, arg);
retval = cas_nvme_format_optimal(
cmd_info->device_path_name,
cmd_info->metadata_mode,
cmd_info->force);
RETURN_CMD_RESULT(cmd_info, arg, retval);
}
#endif
case KCAS_IOCTL_GET_CORE_POOL_COUNT: {
struct kcas_core_pool_count *cmd_info;
GET_CMD_INFO(cmd_info, arg);
cmd_info->core_pool_count =
ocf_mngt_core_pool_get_count(cas_ctx);
RETURN_CMD_RESULT(cmd_info, arg, 0);
}
case KCAS_IOCTL_GET_CORE_POOL_PATHS: {
struct kcas_core_pool_path *cmd_info;
GET_CMD_INFO(cmd_info, arg);
retval = cache_mng_core_pool_get_paths(cmd_info);
RETURN_CMD_RESULT(cmd_info, arg, retval);
}
case KCAS_IOCTL_CORE_POOL_REMOVE: {
struct kcas_core_pool_remove *cmd_info;
GET_CMD_INFO(cmd_info, arg);
retval = cache_mng_core_pool_remove(cmd_info);
RETURN_CMD_RESULT(cmd_info, arg, retval);
}
case KCAS_IOCTL_CACHE_CHECK_DEVICE: {
struct kcas_cache_check_device *cmd_info;
GET_CMD_INFO(cmd_info, arg);
retval = cache_mng_cache_check_device(cmd_info);
RETURN_CMD_RESULT(cmd_info, arg, retval);
}
case KCAS_IOCTL_SET_CORE_PARAM: {
struct kcas_set_core_param *cmd_info;
GET_CMD_INFO(cmd_info, arg);
retval = cache_mng_set_core_params(cmd_info);
RETURN_CMD_RESULT(cmd_info, arg, retval);
}
case KCAS_IOCTL_GET_CORE_PARAM: {
struct kcas_get_core_param *cmd_info;
GET_CMD_INFO(cmd_info, arg);
retval = cache_mng_get_core_params(cmd_info);
RETURN_CMD_RESULT(cmd_info, arg, retval);
}
case KCAS_IOCTL_SET_CACHE_PARAM: {
struct kcas_set_cache_param *cmd_info;
GET_CMD_INFO(cmd_info, arg);
retval = cache_mng_set_cache_params(cmd_info);
RETURN_CMD_RESULT(cmd_info, arg, retval);
}
case KCAS_IOCTL_GET_CACHE_PARAM: {
struct kcas_get_cache_param *cmd_info;
GET_CMD_INFO(cmd_info, arg);
retval = cache_mng_get_cache_params(cmd_info);
RETURN_CMD_RESULT(cmd_info, arg, retval);
}
default:
return -EINVAL;
}
}

View File

@@ -0,0 +1,15 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __SERVICE_UI_IOCTL_H__
#define __SERVICE_UI_IOCTL_H__
struct casdsk_disk;
long cas_service_ioctl_ctrl(struct file *filp, unsigned int cmd,
unsigned long arg);
#endif

281
modules/cas_cache/threads.c Normal file
View File

@@ -0,0 +1,281 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#include "threads.h"
#include "cas_cache.h"
#define MAX_THREAD_NAME_SIZE 16
struct cas_thread_info {
atomic_t stop;
struct completion compl;
struct completion sync_compl;
void *sync_data;
wait_queue_head_t wq;
atomic_t kicked;
struct task_struct *thread;
char name[MAX_THREAD_NAME_SIZE];
bool running;
};
static int _cas_io_queue_thread(void *data)
{
ocf_queue_t q = data;
struct cas_thread_info *info;
BUG_ON(!q);
/* complete the creation of the thread */
info = ocf_queue_get_priv(q);
BUG_ON(!info);
DAEMONIZE(info->thread->comm);
complete(&info->compl);
/* Continue working until signaled to exit. */
do {
/* Wait until there are completed read misses from the HDDs,
* or a stop.
*/
wait_event_interruptible(info->wq, ocf_queue_pending_io(q) ||
atomic_read(&info->stop));
ocf_queue_run(q);
} while (!atomic_read(&info->stop) || ocf_queue_pending_io(q));
WARN(ocf_queue_pending_io(q), "Still pending IO requests\n");
/* If we get here, then thread was signalled to terminate.
* So, let's complete and exit.
*/
complete_and_exit(&info->compl, 0);
return 0;
}
static void _cas_cleaner_complete(ocf_cleaner_t c, uint32_t interval)
{
struct cas_thread_info *info = ocf_cleaner_get_priv(c);
uint32_t *ms = info->sync_data;
*ms = interval;
complete(&info->sync_compl);
}
static int _cas_cleaner_thread(void *data)
{
ocf_cleaner_t c = data;
ocf_cache_t cache = ocf_cleaner_get_cache(c);
struct cache_priv *cache_priv = ocf_cache_get_priv(cache);
struct cas_thread_info *info;
uint32_t ms;
BUG_ON(!c);
ENV_BUG_ON(!cache_priv);
/* complete the creation of the thread */
info = ocf_cleaner_get_priv(c);
BUG_ON(!info);
DAEMONIZE(info->thread->comm);
complete(&info->compl);
info->sync_data = &ms;
ocf_cleaner_set_cmpl(c, _cas_cleaner_complete);
do {
init_completion(&info->sync_compl);
ocf_cleaner_run(c, cache_priv->io_queues[smp_processor_id()]);
wait_for_completion(&info->sync_compl);
} while (0 == wait_event_interruptible_timeout(info->wq,
atomic_read(&info->stop), msecs_to_jiffies(ms)));
complete_and_exit(&info->compl, 0);
return 0;
}
static int _cas_metadata_updater_thread(void *data)
{
ocf_metadata_updater_t mu = data;
struct cas_thread_info *info;
BUG_ON(!mu);
/* complete the creation of the thread */
info = ocf_metadata_updater_get_priv(mu);
BUG_ON(!info);
DAEMONIZE(info->thread->comm);
complete(&info->compl);
do {
if (atomic_read(&info->stop))
break;
atomic_set(&info->kicked, 0);
if (ocf_metadata_updater_run(mu))
continue;
wait_event_interruptible(info->wq, atomic_read(&info->stop) ||
atomic_read(&info->kicked));
} while (true);
complete_and_exit(&info->compl, 0);
return 0;
}
static int _cas_create_thread(struct cas_thread_info **pinfo,
int (*threadfn)(void *), void *priv, int cpu,
const char *fmt, ...)
{
struct cas_thread_info *info;
struct task_struct *thread;
va_list args;
info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info)
return -ENOMEM;
atomic_set(&info->stop, 0);
init_completion(&info->compl);
init_completion(&info->sync_compl);
init_waitqueue_head(&info->wq);
va_start(args, fmt);
vsnprintf(info->name, sizeof(info->name), fmt, args);
va_end(args);
thread = kthread_create(threadfn, priv, "%s", info->name);
if (IS_ERR(thread)) {
kfree(info);
/* Propagate error code as PTR_ERR */
return PTR_ERR(thread);
}
info->thread = thread;
/* Affinitize thread to core */
if (cpu != CAS_CPUS_ALL)
kthread_bind(thread, cpu);
if (pinfo)
*pinfo = info;
return 0;
}
static void _cas_start_thread(struct cas_thread_info *info)
{
wake_up_process(info->thread);
wait_for_completion(&info->compl);
info->running = true;
printk(KERN_DEBUG "Thread %s started\n", info->name);
}
static void _cas_stop_thread(struct cas_thread_info *info)
{
if (info->running && info->thread) {
init_completion(&info->compl);
atomic_set(&info->stop, 1);
wake_up(&info->wq);
wait_for_completion(&info->compl);
printk(KERN_DEBUG "Thread %s stopped\n", info->name);
}
kfree(info);
}
int cas_create_queue_thread(ocf_queue_t q, int cpu)
{
struct cas_thread_info *info;
ocf_cache_t cache = ocf_queue_get_cache(q);
int result;
result = _cas_create_thread(&info, _cas_io_queue_thread, q, cpu,
"cas_io_%s_%d", ocf_cache_get_name(cache), cpu);
if (!result) {
ocf_queue_set_priv(q, info);
_cas_start_thread(info);
}
return result;
}
void cas_kick_queue_thread(ocf_queue_t q)
{
struct cas_thread_info *info = ocf_queue_get_priv(q);
wake_up(&info->wq);
}
void cas_stop_queue_thread(ocf_queue_t q)
{
struct cas_thread_info *info = ocf_queue_get_priv(q);
ocf_queue_set_priv(q, NULL);
_cas_stop_thread(info);
}
int cas_create_cleaner_thread(ocf_cleaner_t c)
{
struct cas_thread_info *info;
ocf_cache_t cache = ocf_cleaner_get_cache(c);
int result;
result = _cas_create_thread(&info, _cas_cleaner_thread, c,
CAS_CPUS_ALL, "cas_clean_%d",
ocf_cache_get_id(cache));
if (!result) {
ocf_cleaner_set_priv(c, info);
_cas_start_thread(info);
}
return result;
}
void cas_stop_cleaner_thread(ocf_cleaner_t c)
{
struct cas_thread_info *info = ocf_cleaner_get_priv(c);
ocf_cleaner_set_priv(c, NULL);
_cas_stop_thread(info);
}
int cas_create_metadata_updater_thread(ocf_metadata_updater_t mu)
{
struct cas_thread_info *info;
int result;
result = _cas_create_thread(&info, _cas_metadata_updater_thread,
mu, CAS_CPUS_ALL, "ocf_metadata_updater_%d",
ocf_cache_get_id(ocf_metadata_updater_get_cache(mu)));
if (!result) {
ocf_metadata_updater_set_priv(mu, info);
_cas_start_thread(info);
}
return result;
}
void cas_kick_metadata_updater_thread(ocf_metadata_updater_t mu)
{
struct cas_thread_info *info = ocf_metadata_updater_get_priv(mu);
atomic_set(&info->kicked, 1);
wake_up(&info->wq);
}
void cas_stop_metadata_updater_thread(ocf_metadata_updater_t mu)
{
struct cas_thread_info *info = ocf_metadata_updater_get_priv(mu);
ocf_metadata_updater_set_priv(mu, NULL);
_cas_stop_thread(info);
}

View File

@@ -0,0 +1,26 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __THREADS_H__
#define __THREADS_H__
#include "ocf/ocf.h"
#include "linux_kernel_version.h"
#define CAS_CPUS_ALL -1
int cas_create_queue_thread(ocf_queue_t q, int cpu);
void cas_kick_queue_thread(ocf_queue_t q);
void cas_stop_queue_thread(ocf_queue_t q);
int cas_create_cleaner_thread(ocf_cleaner_t c);
void cas_stop_cleaner_thread(ocf_cleaner_t c);
int cas_create_metadata_updater_thread(ocf_metadata_updater_t mu);
void cas_kick_metadata_updater_thread(ocf_metadata_updater_t mu);
void cas_stop_metadata_updater_thread(ocf_metadata_updater_t mu);
#endif /* __THREADS_H__ */

View File

@@ -0,0 +1,13 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __CAS_UTILS_H__
#define __CAS_UTILS_H__
#include "utils_nvme.h"
#include "utils_properties.h"
#endif /* __CAS_UTILS_H__ */

View File

@@ -0,0 +1,22 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#include "utils_blk.h"
int cas_blk_get_part_count(struct block_device *bdev)
{
struct disk_part_tbl *ptbl;
int i, count = 0;
rcu_read_lock();
ptbl = rcu_dereference(bdev->bd_disk->part_tbl);
for (i = 0; i < ptbl->len; ++i) {
if (rcu_access_pointer(ptbl->part[i]))
count++;
}
rcu_read_unlock();
return count;
}

View File

@@ -0,0 +1,14 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef UTILS_BLK_H_
#define UTILS_BLK_H_
#include <linux/fs.h>
#include <linux/genhd.h>
int cas_blk_get_part_count(struct block_device *bdev);
#endif /* UTILS_BLK_H_ */

View File

@@ -0,0 +1,130 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#include "cas_cache.h"
/**
* This function locates index of IO vec from given vecs array where byte at
* offset is located. When found it returns its index and byte offset within
* this vec.
* @param vecs IO vector array to be searched
* @param vec_num number of items in IO vector array
* @param offset byte offset to be found
* @param offset_in_vec byte offset within found IO vec
* @return vec index if it lies within specified buffer, otherwise -1
*/
static int get_starting_vec(struct bio_vec *vecs, uint64_t vecs_num,
uint64_t offset, uint64_t *offset_in_vec)
{
int i;
for (i = 0; i < vecs_num; i++) {
if (vecs[i].bv_len > offset) {
if (offset_in_vec != NULL)
*offset_in_vec = offset;
return i;
}
offset -= vecs[i].bv_len;
}
return -1;
}
uint64_t cas_data_cpy(struct bio_vec *dst, uint64_t dst_num,
struct bio_vec *src, uint64_t src_num,
uint64_t to, uint64_t from, uint64_t bytes)
{
uint64_t i, j, dst_len, src_len, to_copy;
uint64_t dst_off, src_off;
uint64_t written = 0;
int ret;
void *dst_p, *src_p;
struct bio_vec *curr_dst, *curr_src;
/* Locate vec idx and offset in dst vec array */
ret = get_starting_vec(dst, dst_num, to, &to);
if (ret < 0) {
CAS_PRINT_RL(KERN_INFO "llu dst buffer too small "
"to_offset=%llu bytes=%llu", to, bytes);
return 0;
}
j = ret;
/* Locate vec idx and offset in src vec array */
ret = get_starting_vec(src, src_num, from, &from);
if (ret < 0) {
CAS_PRINT_RL(KERN_INFO "llu src buffer too small "
"from_offset=%llu bytes=%llu", from, bytes);
return 0;
}
i = ret;
curr_dst = &dst[j];
curr_src = &src[i];
dst_off = curr_dst->bv_offset + to;
dst_len = curr_dst->bv_len - to;
src_off = curr_src->bv_offset + from;
src_len = curr_src->bv_len - from;
while (written < bytes) {
dst_p = page_address(curr_dst->bv_page) + dst_off;
src_p = page_address(curr_src->bv_page) + src_off;
to_copy = src_len > dst_len ? dst_len : src_len;
/* Prevent from copying too much*/
if ((written + to_copy) > bytes)
to_copy = bytes - written;
memcpy(dst_p, src_p, to_copy);
written += to_copy;
if (written == bytes)
break;
/* Setup new len and offset. */
dst_off += to_copy;
dst_len -= to_copy;
src_off += to_copy;
src_len -= to_copy;
/* Go to next src buffer */
if (src_len == 0) {
i++;
/* Setup new len and offset. */
if (i < src_num) {
curr_src = &src[i];
src_off = curr_src->bv_offset;
src_len = curr_src->bv_len;
} else {
break;
}
}
/* Go to next dst buffer */
if (dst_len == 0) {
j++;
if (j < dst_num) {
curr_dst = &dst[j];
dst_off = curr_dst->bv_offset;
dst_len = curr_dst->bv_len;
} else {
break;
}
}
}
if (written != bytes) {
CAS_PRINT_RL(KERN_INFO "Written bytes not equal requested bytes "
"(written=%llu; requested=%llu)", written, bytes);
}
return written;
}

View File

@@ -0,0 +1,31 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef UTILS_DATA_H_
#define UTILS_DATA_H_
/**
* @brief Copy data from a data vector to another one
*
* This function copies number of bytes from source IO vector to destination
* IO vector. It starts coping to specified offset in destination IO vector. If
* there is not enough space it will return number of bytes that was
* successfully copied.
*
* @param dst destination IO vector
* @param dst_num size of destination IO vector
* @param src source IO vector
* @param src_num size of source IO vector
* @param to dst offset where write to will start
* @param from src offset where write from will start
* @param bytes number of bytes to be copied
*
* @return number of bytes written from src to dst
*/
uint64_t cas_data_cpy(struct bio_vec *dst, uint64_t dst_num,
struct bio_vec *src, uint64_t src_num,
uint64_t to, uint64_t from, uint64_t bytes);
#endif /* UTILS_DATA_H_ */

View File

@@ -0,0 +1,78 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#include "utils_gc.h"
#include <linux/vmalloc.h>
#if defined (CAS_GARBAGE_COLLECTOR)
struct cas_vfree_item {
struct llist_head list;
struct work_struct ws;
};
static DEFINE_PER_CPU(struct cas_vfree_item, cas_vfree_item);
static atomic_t freed = ATOMIC_INIT(0);
static void cas_garbage_collector(struct work_struct *w)
{
struct cas_vfree_item *item = container_of(w, struct cas_vfree_item,
ws);
struct llist_node *llnode = llist_del_all(&item->list);
while (llnode) {
void *item = llnode;
llnode = llnode->next;
atomic_dec(&freed);
vfree(item);
}
}
void cas_vfree(const void *addr)
{
struct cas_vfree_item *item = this_cpu_ptr(&cas_vfree_item);
atomic_inc(&freed);
if (llist_add((struct llist_node *)addr, &item->list))
schedule_work(&item->ws);
}
void cas_garbage_collector_init(void)
{
int i;
for_each_possible_cpu(i) {
struct cas_vfree_item *item;
item = &per_cpu(cas_vfree_item, i);
init_llist_head(&item->list);
INIT_WORK(&item->ws, cas_garbage_collector);
}
}
void cas_garbage_collector_deinit(void)
{
int i;
for_each_possible_cpu(i) {
struct cas_vfree_item *item;
item = &per_cpu(cas_vfree_item, i);
while (work_pending(&item->ws))
schedule();
}
WARN(atomic_read(&freed) != 0,
OCF_PREFIX_SHORT" Not all memory deallocated\n");
}
#else
void cas_garbage_collector_init(void) {};
void cas_garbage_collector_deinit(void) {};
void cas_vfree(const void *addr) { vfree(addr); };
#endif

View File

@@ -0,0 +1,16 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef UTILS_GC_H_
#define UTILS_GC_H_
void cas_garbage_collector_init(void);
void cas_garbage_collector_deinit(void);
void cas_vfree(const void *addr);
#endif /* UTILS_GC_H_ */

View File

@@ -0,0 +1,583 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#if defined(CAS_NVME_PARTIAL)
#include "cas_cache.h"
#include "utils_nvme.h"
#include "utils_blk.h"
#include <linux/ioctl.h>
#include <linux/file.h>
int cas_nvme_get_nsid(struct block_device *bdev, unsigned int *nsid)
{
int ret = 0;
/*
* Maximum NSID is 0xFFFFFFFF, so theoretically there is no free
* room for error code. However it's unlikely that there will ever
* be device with such number of namespaces, so we treat this value
* as it was signed. Then in case of negative value we interpret it
* as an error code. Moreover in case of error we can be sure, that
* we deal with non-NVMe device, because this ioctl should never
* fail with NVMe driver.
*/
ret = ioctl_by_bdev(bdev, NVME_IOCTL_ID, (unsigned long)NULL);
if (ret < 0)
return ret;
*nsid = (unsigned int)ret;
return 0;
}
#define NVME_ID_CNS_NS 0x00
#define NVME_ID_CNS_CTRL 0x01
int cas_nvme_identify_ns(struct block_device *bdev, unsigned int nsid,
struct nvme_id_ns *ns)
{
struct nvme_admin_cmd cmd = { };
unsigned long __user buffer;
int ret = 0;
buffer = cas_vm_mmap(NULL, 0, sizeof(*ns));
if (IS_ERR((void *)buffer))
return PTR_ERR((void *)buffer);
cmd.opcode = nvme_admin_identify;
cmd.nsid = cpu_to_le32(nsid);
cmd.addr = (__u64)buffer;
cmd.data_len = sizeof(*ns);
cmd.cdw10 = NVME_ID_CNS_NS;
ret = ioctl_by_bdev(bdev, NVME_IOCTL_ADMIN_CMD, (unsigned long)&cmd);
if (ret < 0)
goto out;
ret = copy_from_user(ns, (void *)buffer, sizeof(*ns));
if (ret > 0)
ret = -EINVAL;
out:
cas_vm_munmap(buffer, sizeof(*ns));
return ret;
}
int cas_nvme_identify_ns_contorller(struct file *file, struct nvme_id_ns *ns)
{
struct nvme_admin_cmd cmd = { };
unsigned long __user buffer;
mm_segment_t old_fs;
int ret = 0;
buffer = cas_vm_mmap(NULL, 0, sizeof(*ns));
if (IS_ERR((void *)buffer))
return PTR_ERR((void *)buffer);
cmd.opcode = nvme_admin_identify;
cmd.nsid = 1;
cmd.addr = (__u64)buffer;
cmd.data_len = sizeof(*ns);
cmd.cdw10 = NVME_ID_CNS_NS;
old_fs = get_fs();
set_fs(KERNEL_DS);
ret = file->f_op->unlocked_ioctl(file,
NVME_IOCTL_ADMIN_CMD, (unsigned long)&cmd);
set_fs(old_fs);
if (ret < 0)
goto out;
ret = copy_from_user(ns, (void *)buffer, sizeof(*ns));
if (ret > 0)
ret = -EINVAL;
out:
cas_vm_munmap(buffer, sizeof(*ns));
return ret;
}
#if defined(CAS_NVME_FULL)
#define FORMAT_WORKAROUND_NOT_NEED 0
#define FORMAT_WORKAROUND_NEED 1
static int __cas_nvme_check_fw(struct nvme_id_ctrl *id_ctrl)
{
/*
* If firmware is older then 8DV101H0 we need do
* workaround - make format twice. We need to compare
* only 5 last characters.
*/
return (strncmp(&id_ctrl->fr[3], "101H0", 5) < 0) ?
FORMAT_WORKAROUND_NEED :
FORMAT_WORKAROUND_NOT_NEED;
}
int cas_nvme_identify_ctrl(struct block_device *bdev,
struct nvme_id_ctrl *id_ctrl)
{
struct nvme_admin_cmd cmd = { };
unsigned long __user buffer;
int ret = 0;
buffer = cas_vm_mmap(NULL, 0, sizeof(*id_ctrl));
if (IS_ERR((void *)buffer))
return PTR_ERR((void *)buffer);
cmd.opcode = nvme_admin_identify;
cmd.addr = (__u64)buffer;
cmd.data_len = sizeof(*id_ctrl);
cmd.cdw10 = NVME_ID_CNS_CTRL;
ret = ioctl_by_bdev(bdev, NVME_IOCTL_ADMIN_CMD, (unsigned long)&cmd);
if (ret < 0)
goto out;
ret = copy_from_user(id_ctrl, (void *)buffer, sizeof(*id_ctrl));
if (ret > 0)
ret = -EINVAL;
out:
cas_vm_munmap(buffer, sizeof(*id_ctrl));
return ret;
}
static int _cas_nvme_format_bdev(struct block_device *bdev, unsigned int nsid,
int lbaf, int ms)
{
struct nvme_admin_cmd cmd = { };
cmd.opcode = nvme_admin_format_nvm;
cmd.nsid = nsid;
cmd.cdw10 = lbaf | ms<<4;
cmd.timeout_ms = 1200000;
return ioctl_by_bdev(bdev, NVME_IOCTL_ADMIN_CMD, (unsigned long)&cmd);
}
static int _cas_nvme_controller_identify(struct file *character_device_file,
unsigned long __user buffer)
{
struct nvme_admin_cmd cmd = { };
mm_segment_t old_fs;
int ret;
old_fs = get_fs();
cmd.opcode = nvme_admin_identify;
cmd.nsid = 0;
cmd.addr = (__u64)buffer;
/* 1 - identify contorller, 0 - identify namespace */
cmd.cdw10 = 1;
cmd.data_len = 0x1000;
set_fs(KERNEL_DS);
ret = character_device_file->f_op->unlocked_ioctl(character_device_file,
NVME_IOCTL_ADMIN_CMD, (unsigned long)&cmd);
set_fs(old_fs);
return ret;
}
static int _cas_nvme_format_controller(struct file *character_device_file,
int lbaf, bool sbnsupp)
{
struct nvme_admin_cmd cmd = { };
mm_segment_t old_fs;
int ret;
old_fs = get_fs();
/* Send format command to device */
cmd.opcode = nvme_admin_format_nvm;
cmd.nsid = 0xFFFFFFFF;
cmd.cdw10 = lbaf | sbnsupp << 4;
cmd.timeout_ms = 120000;
cmd.addr = 0;
set_fs(KERNEL_DS);
ret = character_device_file->f_op->unlocked_ioctl(character_device_file,
NVME_IOCTL_ADMIN_CMD, (unsigned long)&cmd);
set_fs(old_fs);
return ret;
}
static inline int find_lbaf(struct nvme_lbaf *lbaf, int cnt, int atomic)
{
int ms = atomic ? 8 : 0;
int i;
for (i = 0; i <= cnt; ++i)
if (lbaf[i].ms == ms && lbaf[i].ds == 9)
return i;
return -EINVAL;
}
/* context for async probe */
struct _probe_context
{
struct completion cmpl;
struct ocf_metadata_probe_status status;
int error;
};
static void _cas_nvme_probe_cmpl(void *priv, int error,
struct ocf_metadata_probe_status *status)
{
struct _probe_context *ctx = (struct _probe_context*)priv;
ctx->error = error;
if (!error) {
ctx->status = *status;
}
complete(&ctx->cmpl);
}
static int _cas_nvme_preformat_check(struct block_device *bdev, int force)
{
ocf_volume_t volume;
struct _probe_context probe_ctx;
int ret = 0;
if (bdev != bdev->bd_contains)
return -KCAS_ERR_A_PART;
if (cas_blk_get_part_count(bdev) > 1 && !force)
return -KCAS_ERR_CONTAINS_PART;
ret = cas_blk_open_volume_by_bdev(&volume, bdev);
if (ret == -KCAS_ERR_NVME_BAD_FORMAT) {
/* Current format is not supported by CAS, so we can be sure
* that there is no dirty data. Do format
*/
return 0;
} else if (ret) {
/* An error occurred, stop processing */
return ret;
}
init_completion(&probe_ctx.cmpl);
ocf_metadata_probe(cas_ctx, volume, _cas_nvme_probe_cmpl, &probe_ctx);
if (wait_for_completion_interruptible(&probe_ctx.cmpl)) {
ocf_volume_close(volume);
return -OCF_ERR_FLUSHING_INTERRUPTED;
}
if (probe_ctx.error == -ENODATA) {
/* Cache was not detected on this device
* NVMe can be formated
*/
ret = 0;
} else if (probe_ctx.error == -EBUSY) {
ret = -OCF_ERR_NOT_OPEN_EXC;
} else if (probe_ctx.error) {
/* Some error occurred, we do not have sure about clean cache */
ret = -KCAS_ERR_FORMAT_FAILED;
} else {
/* Check if cache was closed in proper way */
if (!probe_ctx.status.clean_shutdown ||
probe_ctx.status.cache_dirty) {
/* Dirty shutdown */
ret = -KCAS_ERR_DIRTY_EXISTS_NVME;
}
if (force) {
/* Force overwrites dirty shutdown */
ret = 0;
}
}
ocf_volume_close(volume);
return ret;
}
static int _cas_nvme_format_namespace_by_path(const char *device_path,
int metadata_mode, int force)
{
struct nvme_id_ns *ns;
struct nvme_id_ctrl *id;
unsigned int nsid, sbnsupp = 0;
int best_lbaf = 0;
int ret = 0;
struct block_device *bdev;
char holder[] = "CAS FORMAT\n";
ns = kmalloc(sizeof(*ns), GFP_KERNEL);
if (!ns)
return -OCF_ERR_NO_MEM;
id = kmalloc(sizeof(*id), GFP_KERNEL);
if (!id) {
ret = -OCF_ERR_NO_MEM;
goto out1;
}
bdev = OPEN_BDEV_EXCLUSIVE(device_path,
FMODE_READ | FMODE_WRITE | FMODE_EXCL, holder);
if (IS_ERR(bdev)) {
if (PTR_ERR(bdev) == -EBUSY)
ret = -OCF_ERR_NOT_OPEN_EXC;
else
ret = -OCF_ERR_INVAL_VOLUME_TYPE;
goto out1;
}
ret = cas_nvme_get_nsid(bdev, &nsid);
if (ret < 0) {
ret = -KCAS_ERR_NOT_NVME;
goto out2;
}
ret = _cas_nvme_preformat_check(bdev, force);
if (ret)
goto out2;
ret = cas_nvme_identify_ns(bdev, nsid, ns);
if (ret < 0) {
ret = -KCAS_ERR_FORMAT_FAILED;
goto out2;
}
if (metadata_mode == CAS_METADATA_MODE_NORMAL) {
best_lbaf = find_lbaf(ns->lbaf, ns->nlbaf, 0);
sbnsupp = 0;
} else if (metadata_mode == CAS_METADATA_MODE_ATOMIC) {
best_lbaf = find_lbaf(ns->lbaf, ns->nlbaf, 1);
sbnsupp = !(ns->mc & (1<<1));
}
if (best_lbaf < 0) {
ret = -KCAS_ERR_FORMAT_FAILED;
goto out2;
}
ret = cas_nvme_identify_ctrl(bdev, id);
if (ret < 0) {
ret = -KCAS_ERR_FORMAT_FAILED;
goto out2;
}
if (__cas_nvme_check_fw(id) == FORMAT_WORKAROUND_NEED) {
/*
* If firmware is older then 8DV101H0 we need do
* workaround - make format twice.
*/
ret = _cas_nvme_format_bdev(bdev, nsid, best_lbaf, sbnsupp);
if (ret)
goto out2;
}
ret = _cas_nvme_format_bdev(bdev, nsid, best_lbaf, sbnsupp);
if (ret)
goto out2;
ret = ioctl_by_bdev(bdev, BLKRRPART, (unsigned long)NULL);
out2:
CLOSE_BDEV_EXCLUSIVE(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
out1:
kfree(id);
kfree(ns);
return ret;
}
static int _cas_nvme_get_bdev_from_controller(struct block_device **bdev,
int major, int minor, int namespace_number)
{
mm_segment_t old_fs;
char *sys_path;
struct file *file;
char readbuffer[12] = {0};
char holder[] = "CAS FORMAT\n";
int ret = 0;
sys_path = kzalloc(sizeof(char)*MAX_STR_LEN, GFP_KERNEL);
if (!sys_path)
return -OCF_ERR_NO_MEM;
sprintf(sys_path, "/sys/dev/char/%d:%d/nvme%dn%d/dev",
major, minor, minor, namespace_number);
file = filp_open(sys_path, O_RDONLY, 0);
kfree(sys_path);
if (IS_ERR(file))
return -KCAS_ERR_FORMAT_FAILED;
old_fs = get_fs();
set_fs(KERNEL_DS);
ret = file->f_op->read(file, readbuffer, sizeof(readbuffer),
&file->f_pos);
set_fs(old_fs);
filp_close(file, 0);
if (ret < 0)
return -KCAS_ERR_FORMAT_FAILED;
ret = sscanf(readbuffer, "%d:%d", &major, &minor);
if (ret < 0)
return -KCAS_ERR_FORMAT_FAILED;
*bdev = blkdev_get_by_dev(MKDEV(major, minor),
FMODE_READ | FMODE_WRITE | FMODE_EXCL, holder);
if (IS_ERR(*bdev))
return -OCF_ERR_INVAL_VOLUME_TYPE;
return 0;
}
static int _cas_nvme_format_character_device(const char *device_path,
int metadata_mode, int force)
{
mm_segment_t old_fs;
int ret;
struct file *character_device_file = NULL;
struct nvme_id_ctrl *ctrl;
unsigned long __user buffer;
struct kstat *stat;
struct block_device **ndev = NULL;
int i;
struct nvme_id_ns *ns;
int best_lbaf = 0;
int sbnsupp = 0;
ctrl = kzalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL);
buffer = cas_vm_mmap(NULL, 0, sizeof(*ctrl));
stat = kmalloc(sizeof(struct kstat), GFP_KERNEL);
ns = kmalloc(sizeof(*ns), GFP_KERNEL);
old_fs = get_fs();
if (!ctrl || !buffer || !stat || !ns) {
ret = -OCF_ERR_NO_MEM;
goto out1;
}
character_device_file = filp_open(device_path, O_RDWR | O_EXCL, 0);
if (IS_ERR(character_device_file)) {
ret = -OCF_ERR_INVAL_VOLUME_TYPE;
goto out1;
}
ret = _cas_nvme_controller_identify(character_device_file, buffer);
if (ret < 0) {
ret = KCAS_ERR_FORMAT_FAILED;
goto out1;
}
ret = copy_from_user(ctrl, (void *)buffer, sizeof(*ctrl));
if (ret)
goto out1;
ndev = kmalloc_array(ctrl->nn, sizeof(struct block_device), GFP_KERNEL);
if (!ndev) {
ret = -OCF_ERR_NO_MEM;
goto out1;
}
set_fs(KERNEL_DS);
ret = vfs_stat(device_path, stat);
set_fs(old_fs);
if (ret)
goto out1;
for (i = 1; i <= ctrl->nn; i++) {
ret = _cas_nvme_get_bdev_from_controller(&ndev[i-1],
MAJOR(stat->rdev), MINOR(stat->rdev), i);
if (ret) {
i--;
goto cleanup;
}
ret = _cas_nvme_preformat_check(ndev[i-1], force);
if (ret)
goto cleanup;
}
ret = cas_nvme_identify_ns_contorller(character_device_file, ns);
if (ret)
goto cleanup;
if (metadata_mode == CAS_METADATA_MODE_NORMAL) {
best_lbaf = find_lbaf(ns->lbaf, ns->nlbaf, 0);
sbnsupp = 0;
} else if (metadata_mode == CAS_METADATA_MODE_ATOMIC) {
best_lbaf = find_lbaf(ns->lbaf, ns->nlbaf, 1);
sbnsupp = !(ns->mc & (1<<1));
}
if (best_lbaf < 0) {
ret = -KCAS_ERR_FORMAT_FAILED;
goto cleanup;
}
if (__cas_nvme_check_fw(ctrl) == FORMAT_WORKAROUND_NEED) {
/*
* If firmware is older then 8DV101H0 we need do
* workaround - make format twice.
*/
ret = _cas_nvme_format_controller(character_device_file,
best_lbaf, sbnsupp);
if (ret < 0) {
ret = -KCAS_ERR_FORMAT_FAILED;
goto cleanup;
}
}
ret = _cas_nvme_format_controller(character_device_file,
best_lbaf, sbnsupp);
if (ret < 0)
ret = -KCAS_ERR_FORMAT_FAILED;
cleanup:
for (i = i-1; i >= 1; i--) {
ret |= ioctl_by_bdev(ndev[i-1], BLKRRPART, (unsigned long)NULL);
blkdev_put(ndev[i-1], FMODE_READ | FMODE_WRITE | FMODE_EXCL);
}
out1:
kfree(ndev);
kfree(ctrl);
kfree(stat);
kfree(ns);
cas_vm_munmap(buffer, sizeof(buffer));
filp_close(character_device_file, 0);
return ret;
}
int cas_nvme_format_optimal(const char *device_path, int metadata_mode,
int force)
{
int ret;
uint8_t type;
ret = cas_blk_identify_type(device_path, &type);
if (ret == -OCF_ERR_INVAL_VOLUME_TYPE) {
/* An error occurred, stop processing */
return ret;
}
if (type == BLOCK_DEVICE_VOLUME || type == ATOMIC_DEVICE_VOLUME) {
ret = _cas_nvme_format_namespace_by_path(device_path,
metadata_mode, force);
} else if (type == NVME_CONTROLLER && false) {
/*
* TODO(rbaldyga): Make it safe with NVMe drives that do not
* handle format change properly.
*/
ret = _cas_nvme_format_character_device(device_path,
metadata_mode, force);
} else {
ret = -OCF_ERR_INVAL_VOLUME_TYPE;
}
return ret;
}
#endif
#endif

View File

@@ -0,0 +1,38 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef UTILS_NVME_H_
#define UTILS_NVME_H_
#if defined(CAS_UAPI_NVME)
#include <uapi/nvme.h>
#endif
#if defined(CAS_UAPI_LINUX_NVME)
#include <uapi/linux/nvme.h>
#endif
#if defined(CAS_UAPI_LINUX_NVME_IOCTL)
#include <uapi/linux/nvme_ioctl.h>
#endif
#if defined(CAS_NVME_PARTIAL)
#include <linux/nvme.h>
int cas_nvme_get_nsid(struct block_device *bdev, unsigned int *nsid);
int cas_nvme_identify_ns(struct block_device *bdev, unsigned int nsid,
struct nvme_id_ns *ns);
#if defined(CAS_NVME_FULL)
int cas_nvme_format_optimal(const char *device_path, int metadata_mode,
int force);
#endif /* CAS_NVME_FULL */
#endif /* CAS_NVME_PARTIAL */
#endif /* UTILS_NVME_H_ */

View File

@@ -0,0 +1,769 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#include "cas_cache.h"
#define INTERNAL_CALL 0
#define EXTERNAL_CALL 1
#define CAS_PROPERTIES_VERSION 101
#define VERSION_STR ".version"
/*
* Difference between constant and non constant entry is store in LSB
* e.g.:
* cas_property_string in binary 0000 1010
* cas_property_string_const in binary 0000 1011
*/
#define CAS_PROP_UNCONST(type) (type & ~CAS_PROPERTIES_CONST)
#define CAS_PROP_CHECK_CONST(type) (type & CAS_PROPERTIES_CONST)
enum cas_property_type {
cas_property_string = 10,
cas_property_string_const =
(cas_property_string | CAS_PROPERTIES_CONST),
cas_property_sint = 16,
cas_property_sint_const = (cas_property_sint | CAS_PROPERTIES_CONST),
cas_property_uint = 74,
cas_property_uint_const = (cas_property_uint | CAS_PROPERTIES_CONST),
};
struct cas_properties {
struct list_head list;
};
struct _cas_property {
uint8_t type;
char *key;
struct list_head item;
union {
void *value;
uint64_t value_uint;
int64_t value_sint;
};
};
struct cas_properties *cas_properties_create(void)
{
struct cas_properties *props;
int result;
props = kzalloc(sizeof(*props), GFP_KERNEL);
if (!props)
return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(&props->list);
result = cas_properties_add_uint(props, VERSION_STR,
CAS_PROPERTIES_VERSION, CAS_PROPERTIES_CONST);
if (result) {
kfree(props);
return ERR_PTR(result);
}
result = cas_properties_add_uint(props, ".size", 0,
CAS_PROPERTIES_NON_CONST);
if (result) {
kfree(props);
return ERR_PTR(result);
}
return props;
}
void cas_properties_destroy(struct cas_properties *props)
{
struct list_head *curr, *tmp;
struct _cas_property *entry;
list_for_each_safe(curr, tmp, &props->list) {
entry = list_entry(curr, struct _cas_property, item);
list_del(curr);
if (cas_property_string == CAS_PROP_UNCONST(entry->type))
kfree(entry->value);
kfree(entry->key);
kfree(entry);
}
kfree(props);
}
static uint64_t _cas_prop_get_size(struct cas_properties *props)
{
struct list_head *curr;
struct _cas_property *entry;
uint64_t size_all = 0;
list_for_each(curr, &props->list) {
entry = list_entry(curr, struct _cas_property, item);
size_all += cas_prop_strnlen(entry->key, MAX_STRING_SIZE) + 1;
size_all += sizeof(entry->type);
switch (CAS_PROP_UNCONST(entry->type)) {
case cas_property_string:
size_all += cas_prop_strnlen(entry->value,
MAX_STRING_SIZE) + 1;
break;
case cas_property_sint:
size_all += sizeof(entry->value_sint);
break;
case cas_property_uint:
size_all += sizeof(entry->value_uint);
break;
default:
return 0;
}
}
return size_all;
}
static int _cas_prop_serialize_string(char *buffer, const uint64_t size,
uint64_t *offset, char *value)
{
uint64_t str_size = 0;
str_size = cas_prop_strnlen(value, MAX_STRING_SIZE) + 1;
if ((*offset + str_size) > size)
return -ENOMEM;
memcpy(buffer + *offset, value, str_size);
*offset += str_size;
return 0;
}
static int _cas_prop_parse_string(const char *buffer, const uint64_t size,
uint64_t *offset, char **str)
{
char *tmp_str = NULL;
uint64_t str_size = 0;
if (*offset >= size)
return -ENOMEM;
str_size = cas_prop_strnlen(&buffer[*offset], size - *offset ) + 1;
if (str_size > size - *offset) {
/* no null terminator at the end of buffer */
return -ENOMEM;
}
tmp_str = kstrdup(&buffer[*offset], GFP_KERNEL);
if (!tmp_str)
return -ENOMEM;
*offset += str_size;
*str = tmp_str;
return 0;
}
static int _cas_prop_serialize_int(char *buffer, const uint64_t size,
uint64_t *offset, uint64_t number)
{
int32_t i;
/*
* To prevent issue connected with byte order we
* serialize integer byte by byte.
*/
for (i = 0; i < sizeof(number); i++) {
char byte = number & 0xFF;
if (*offset < size)
buffer[*offset] = byte;
else
return -ENOMEM;
(*offset)++;
number = number >> 8;
}
return 0;
}
static int _cas_prop_serialize_uint(char *buffer, const uint64_t size,
uint64_t *offset, uint64_t number)
{
return _cas_prop_serialize_int(buffer, size, offset, number);
}
static int _cas_prop_serialize_sint(char *buffer, const uint64_t size,
uint64_t *offset, int64_t number)
{
return _cas_prop_serialize_int(buffer, size, offset, (uint64_t) number);
}
static int _cas_prop_parse_int(const char *buffer,
const uint64_t size, uint64_t *offset, uint64_t *number)
{
int32_t i;
uint64_t byte;
*number = 0;
/*
* To prevent issue connected with byte order we
* parse integer byte by byte.
*/
for (i = 0; i < sizeof(*number); i++) {
if (*offset >= size)
return -ENOMEM;
byte = buffer[*offset] & 0xFF;
byte = byte << (i * 8);
*number |= byte;
(*offset)++;
}
return 0;
}
static int _cas_prop_parse_uint(const char *buffer,
const uint64_t size, uint64_t *offset, uint64_t *number)
{
return _cas_prop_parse_int(buffer, size, offset, number);
}
static int _cas_prop_parse_sint(const char *buffer,
const uint64_t size, uint64_t *offset, int64_t *number)
{
return _cas_prop_parse_int(buffer, size, offset, (uint64_t *) number);
}
static int _cas_prop_serialize(struct _cas_property *entry, void *buffer,
const uint64_t size, uint64_t *offset)
{
uint64_t item_size = 0;
void *item;
int result = 0;
if (*offset > size)
return -ENOMEM;
/*
* Each entry is represented in buffer in order as below
* (e.g. in case we have entry with integer) :
* <----- entry ----->
* <- key -><-type-><- integer ->
* <- X bytes -><1 byte><- 8 byte ->
* | | | |
*/
/*
* First step - serialize key
*/
item_size = cas_prop_strnlen(entry->key, MAX_STRING_SIZE) + 1;
item = entry->key;
if ((*offset + item_size) > size)
return -ENOMEM;
memcpy(buffer + *offset, item, item_size);
*offset += item_size;
/*
* Second step - serialize type
*/
item_size = sizeof(entry->type);
item = &entry->type;
if ((*offset + item_size) > size)
return -ENOMEM;
memcpy(buffer + *offset, item, item_size);
*offset += item_size;
/*
* Third step - serialize value
*/
switch (CAS_PROP_UNCONST(entry->type)) {
case cas_property_string:
/* Serialize string */
result = _cas_prop_serialize_string(buffer, size, offset,
entry->value);
break;
case cas_property_sint:
/* Serialize signed integer */
result = _cas_prop_serialize_sint(buffer, size, offset,
entry->value_uint);
break;
case cas_property_uint:
/* Serialize unsigned integer */
result = _cas_prop_serialize_uint(buffer, size, offset,
entry->value_uint);
break;
default:
result = -EINVAL;
break;
}
return result;
}
int cas_properties_serialize(struct cas_properties *props,
struct casdsk_props_conf *caches_serialized_conf)
{
int result = 0;
uint64_t offset = 0, size;
uint16_t crc = 0;
void *buffer;
struct list_head *curr;
struct _cas_property *entry;
size = _cas_prop_get_size(props);
if (size == 0)
return -EINVAL;
buffer = vzalloc(size);
if (!buffer)
return -ENOMEM;
/*
* Update first entry on list - size of buffer
*/
result = cas_properties_add_uint(props, ".size", size,
CAS_PROPERTIES_CONST);
if (result)
goto error_after_buffer_allocation;
/*
* Serialize each entry, one by one
*/
list_for_each(curr, &props->list) {
entry = list_entry(curr, struct _cas_property, item);
result = _cas_prop_serialize(entry, buffer, size, &offset);
if (result)
goto error_after_buffer_allocation;
}
crc = crc16(0, buffer, size);
caches_serialized_conf->buffer = buffer;
caches_serialized_conf->size = size;
caches_serialized_conf->crc = crc;
return result;
error_after_buffer_allocation:
vfree(buffer);
return result;
}
void cas_properties_print(struct cas_properties *props)
{
int result = 0;
struct list_head *curr;
struct _cas_property *entry;
char *abc;
/*
* Serialize each entry, one by one
*/
list_for_each(curr, &props->list) {
entry = list_entry(curr, struct _cas_property, item);
printk(KERN_DEBUG "[Upgrade] Key: %s", entry->key);
switch (CAS_PROP_UNCONST(entry->type)) {
case cas_property_string:
printk(", string, ");
abc = (char *)entry->value;
printk("Value: %s ", abc);
break;
case cas_property_sint:
break;
case cas_property_uint:
printk(", uint, ");
printk("Value: %llu ", entry->value_uint);
default:
result = -EINVAL;
break;
}
printk("\n");
}
}
static int _cas_prop_parse_version(const char *buffer, uint64_t *offset,
uint64_t *version, int trigger)
{
int result = 0;
char *key = NULL;
uint8_t type;
result = _cas_prop_parse_string(buffer, strlen(VERSION_STR) + 1,
offset, &key);
if (result)
goto error_during_parse_key;
if (strcmp(VERSION_STR, key)) {
result = -EINVAL;
goto error_after_parse_key;
}
type = buffer[*offset];
if (cas_property_uint_const != type) {
result = -EINVAL;
goto error_after_parse_key;
}
*offset += sizeof(type);
result = _cas_prop_parse_uint(buffer,
strlen(VERSION_STR) + 1 + sizeof(type) +
sizeof(*version), offset, version);
if (result)
goto error_after_parse_key;
/*
* In case that is external call
* we don't need check version.
*/
if (trigger == INTERNAL_CALL && *version != CAS_PROPERTIES_VERSION) {
printk(KERN_ERR "Version of interface using to parse is "
"different than version used to serialize\n");
result = -EPERM;
}
error_after_parse_key:
kfree(key);
error_during_parse_key:
return result;
}
int cas_properites_parse_version(struct casdsk_props_conf *caches_serialized_conf,
uint64_t *version)
{
uint64_t offset = 0;
char *buffer = NULL;
buffer = (char *) caches_serialized_conf->buffer;
if (!buffer)
return -EINVAL;
return _cas_prop_parse_version(buffer, &offset, version, EXTERNAL_CALL);
}
struct cas_properties *
cas_properites_parse(struct casdsk_props_conf *caches_serialized_conf)
{
struct cas_properties *props;
char *key = NULL, *value = NULL, *buffer = NULL;
int result;
uint8_t type;
uint64_t uint_value, size = 0, offset = 0, version = 0;
uint16_t crc;
int64_t sint_value;
bool constant = false;
props = cas_properties_create();
if (IS_ERR(props))
return ERR_PTR(-ENOMEM);
if (!caches_serialized_conf) {
result = -EINVAL;
goto error_after_props_allocation;
}
buffer = (char *) caches_serialized_conf->buffer;
if (!buffer) {
result = -EINVAL;
goto error_after_props_allocation;
}
size = caches_serialized_conf->size;
crc = crc16(0, buffer, size);
if (crc != caches_serialized_conf->crc) {
printk(KERN_ERR "Cache configuration corrupted");
result = -EINVAL;
goto error_after_props_allocation;
}
/*
* Parse first entry on list - version of interface used to
* serialization
*/
result = _cas_prop_parse_version(buffer, &offset, &version,
INTERNAL_CALL);
if (result)
goto error_after_props_allocation;
while (offset < size) {
/*
* Parse key of entry
*/
result = _cas_prop_parse_string(buffer, size, &offset, &key);
if (result)
goto error_after_props_allocation;
/*
* Parse type of entry
*/
if (offset + sizeof(type) > size) {
kfree(key);
goto error_after_props_allocation;
}
memcpy(&type, buffer + offset, sizeof(type));
offset += sizeof(type);
constant = CAS_PROP_CHECK_CONST(type);
type = CAS_PROP_UNCONST(type);
switch (type) {
case cas_property_string:
/* Parse string */
result = _cas_prop_parse_string(buffer, size, &offset,
&value);
if (result)
break;
/*
* Add new entry with string to CAS properties instance
*/
result |= cas_properties_add_string(props, key, value,
constant);
kfree(value);
break;
case cas_property_sint:
/* Parse signed integer */
result = _cas_prop_parse_sint(buffer, size, &offset,
&sint_value);
/* Add new entry with signed integer to CAS properties
* instance
*/
result |= cas_properties_add_sint(props, key,
sint_value, constant);
break;
case cas_property_uint:
/* Parse unsigned integer */
result = _cas_prop_parse_uint(buffer, size, &offset,
&uint_value);
/* Add new entry with unsigned integer to CAS properties
* instance
*/
result |= cas_properties_add_uint(props, key,
uint_value, constant);
break;
default:
result = -EINVAL;
break;
}
/*
* In case when we added new entry,
* we not need hold key value longer.
*/
kfree(key);
if (result)
goto error_after_props_allocation;
}
return props;
error_after_props_allocation:
cas_properties_destroy(props);
return ERR_PTR(result);
}
static struct _cas_property *_cas_prop_find(const struct cas_properties *props,
const char *key)
{
struct list_head *curr;
struct _cas_property *entry;
list_for_each(curr, &props->list) {
entry = list_entry(curr, struct _cas_property, item);
if (strncmp(key, entry->key, MAX_STRING_SIZE) == 0)
return entry;
}
return ERR_PTR(-ENOENT);
}
static struct _cas_property *_cas_prop_alloc_entry_key(const char *key)
{
struct _cas_property *entry;
entry = kzalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
return ERR_PTR(-ENOMEM);
entry->key = kstrdup(key, GFP_KERNEL);
if (!entry->key) {
kfree(entry);
return ERR_PTR(-ENOMEM);
}
INIT_LIST_HEAD(&entry->item);
return entry;
}
/*
* ADD
*/
int cas_properties_add_uint(struct cas_properties *props, const char *key,
uint64_t value, bool constant)
{
struct _cas_property *entry;
/*
* Looks for entry with same key,
* if it is exist - update, if not - create new
*/
entry = _cas_prop_find(props, key);
if (IS_ERR(entry)) {
entry = _cas_prop_alloc_entry_key(key);
if (IS_ERR(entry))
return PTR_ERR(entry);
list_add_tail(&entry->item, &props->list);
} else if (cas_property_uint != entry->type) {
/*
* We can update only non constant entry,
* so we need compare type only with non constant type.
*/
return -EINVAL;
}
entry->type = constant ? cas_property_uint_const : cas_property_uint;
entry->value_uint = value;
return 0;
}
int cas_properties_add_sint(struct cas_properties *props, const char *key,
int64_t value, bool constant)
{
struct _cas_property *entry;
/*
* Looks for entry with same key,
* if it is exist - update, if not - create new
*/
entry = _cas_prop_find(props, key);
if (IS_ERR(entry)) {
entry = _cas_prop_alloc_entry_key(key);
if (IS_ERR(entry))
return PTR_ERR(entry);
list_add_tail(&entry->item, &props->list);
} else if (cas_property_sint != entry->type) {
/*
* We can update only non constant entry,
* so we need compare type only with non constant type.
*/
return -EINVAL;
}
entry->type = constant ? cas_property_sint_const : cas_property_sint;
entry->value_sint = value;
return 0;
}
int cas_properties_add_string(struct cas_properties *props, const char *key,
const char *value, bool constant)
{
struct _cas_property *entry;
char *tmp_value = NULL;
tmp_value = kstrdup(value, GFP_KERNEL);
if (!tmp_value)
return -ENOMEM;
/*
* Looks for entry with same key,
* if it is exist - update, if not - create new
*/
entry = _cas_prop_find(props, key);
if (IS_ERR(entry)) {
entry = _cas_prop_alloc_entry_key(key);
if (IS_ERR(entry)) {
kfree(tmp_value);
return PTR_ERR(entry);
}
list_add_tail(&entry->item, &props->list);
} else {
if (cas_property_string != entry->type) {
/*
* We can update only non constant entry,
* so we need compare type only with non constant type.
*/
kfree(tmp_value);
return -EINVAL;
}
kfree(entry->value);
}
entry->type = constant ? cas_property_string_const :
cas_property_string;
entry->value = tmp_value;
return 0;
}
/*
* GET
*/
int cas_properties_get_uint(struct cas_properties *props, const char *key,
uint64_t *value)
{
struct _cas_property *entry;
entry = _cas_prop_find(props, key);
if ((IS_ERR(entry) == 0) && (cas_property_uint ==
CAS_PROP_UNCONST(entry->type))) {
*value = entry->value_uint;
return 0;
}
return IS_ERR(entry) ? PTR_ERR(entry) : -EINVAL;
}
int cas_properties_get_sint(struct cas_properties *props, const char *key,
int64_t *value)
{
struct _cas_property *entry;
entry = _cas_prop_find(props, key);
if ((IS_ERR(entry) == 0) && (cas_property_sint ==
CAS_PROP_UNCONST(entry->type))) {
*value = entry->value_sint;
return 0;
}
return IS_ERR(entry) ? PTR_ERR(entry) : -EINVAL;
}
int cas_properties_get_string(struct cas_properties *props, const char *key,
char *value, uint32_t size)
{
struct _cas_property *entry;
entry = _cas_prop_find(props, key);
if ((IS_ERR(entry) == 0) && (cas_property_string ==
CAS_PROP_UNCONST(entry->type))) {
/* Check if size of destination memory is enough */
if (size < cas_prop_strnlen(entry->value, MAX_STRING_SIZE) + 1)
return -ENOMEM;
cas_prop_strncpy(value, size, entry->value,
cas_prop_strnlen(entry->value, MAX_STRING_SIZE));
return 0;
}
return IS_ERR(entry) ? PTR_ERR(entry) : -EINVAL;
}

View File

@@ -0,0 +1,153 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef UTILS_PROPERTIES_H_
#define UTILS_PROPERTIES_H_
#ifdef __KERNEL__
#define cas_prop_strncpy(dest, dest_size, src, src_size) \
strlcpy(dest, src, dest_size)
#define cas_prop_strnlen(string, size) strnlen(string, size)
#else
#define cas_prop_strncpy(dest, dest_size, src, src_size) \
strncpy(dest, src, src_size)
#define cas_prop_strnlen(string, size) strlen(string)
#endif
#include "../../cas_disk/cas_disk.h"
#define MAX_STRING_SIZE 4095
#define CAS_PROPERTIES_NON_CONST false
#define CAS_PROPERTIES_CONST true
/**
* @file utils_properties.h
* @brief CAS cache interface for collect and serialization CAS properties
*/
/**
* @brief Handler for instance of CAS properties
*/
struct cas_properties;
/**
* @brief Initialize instance of CAS properties
*
* @return Handler to instance of interface
*/
struct cas_properties *cas_properties_create(void);
/**
* @brief De-initialize instance of CAS properties
*
* @param props Handler to instance to de-initialize
*/
void cas_properties_destroy(struct cas_properties *props);
/**
* @brief Serialize given CAS properties instance to continuous buffer
*
* @param props instance of CAS properties
* @param idisk conf instance of CAS properties
* @return result of serialize CAS properties
*/
int cas_properties_serialize(struct cas_properties *props,
struct casdsk_props_conf *caches_serialized_conf);
/**
* @brief Parse of first entry given continuous buffer to get version of
* interface which been used to serialize
*
* @param buffer pointer to continuous buffer with serialized CAS properties
* @param version pointer to memory where we will put version
* @return result of getting version, 0 success
*/
int cas_properites_parse_version(struct casdsk_props_conf *caches_serialized_conf,
uint64_t *version);
/**
* @brief Parse of given continuous buffer to CAS properties instance
*
* @param buffer pointer to continuous buffer with serialized CAS properties
* @return handler to CAS properties instance
*/
struct cas_properties *
cas_properites_parse(struct casdsk_props_conf *caches_serialized_conf);
/**
* @brief Add unsigned integer to CAS properties instance
*
* @param props CAS properties instance to add variable
* @param key key paired with variable
* @param value value of variable
* @param private if true value cannot be updated
* @return result of adding 0 success
*/
int cas_properties_add_uint(struct cas_properties *props, const char *key,
uint64_t value, bool private);
/**
* @brief Add signed integer to CAS properties instance
*
* @param props CAS properties instance to add variable
* @param key key paired with variable
* @param value value of variable
* @param private if true value cannot be updated
* @return result of adding 0 success
*/
int cas_properties_add_sint(struct cas_properties *props, const char *key,
int64_t value, bool private);
/**
* @brief Add string to CAS properties instance
*
* @param props CAS properties instance to add variable
* @param key key paired with variable
* @param value value of variable
* @param private if true value cannot be updated
* @return result of adding 0 success
*/
int cas_properties_add_string(struct cas_properties *props, const char *key,
const char *value, bool private);
/**
* @brief Get unsigned integer to CAS properties instance
*
* @param props CAS properties instance to add variable
* @param key key paired with variable
* @param value pointer to memory where we will put value
* @return result of getting 0 success
*/
int cas_properties_get_uint(struct cas_properties *props, const char *key,
uint64_t *value);
/**
* @brief Get signed integer to CAS properties instance
*
* @param props CAS properties instance to add variable
* @param key key paired with variable
* @param value pointer to memory where we will put value
* @return result of getting 0 success
*/
int cas_properties_get_sint(struct cas_properties *props, const char *key,
int64_t *value);
/**
* @brief Get string integer to CAS properties instance
*
* @param props CAS properties instance to add variable
* @param key key paired with variable
* @param value pointer to memory where we will put value
* @param size size of destination memory
* @return result of getting 0 success, 1 error, 2 not enough space
* in destination
*/
int cas_properties_get_string(struct cas_properties *props, const char *key,
char *value, uint32_t size);
void cas_properties_print(struct cas_properties *props);
#endif /* UTILS_PROPERTIES_H_ */

View File

@@ -0,0 +1,262 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#include "ocf/ocf.h"
#include "utils_rpool.h"
#include "ocf_env.h"
#include "../cas_cache.h"
#define CAS_UTILS_RPOOL_DEBUG 0
#if 1 == CAS_UTILS_RPOOL_DEBUG
#define CAS_DEBUG_TRACE() \
printk(KERN_INFO "[Utils][RPOOL] %s\n", __func__)
#define CAS_DEBUG_MSG(msg) \
printk(KERN_INFO "[Utils][RPOOL] %s - %s\n", __func__, msg)
#define CAS_DEBUG_PARAM(format, ...) \
printk(KERN_INFO "[Utils][RPOOL] %s - "format"\n", \
__func__, ##__VA_ARGS__)
#else
#define CAS_DEBUG_TRACE()
#define CAS_DEBUG_MSG(msg)
#define CAS_DEBUG_PARAM(format, ...)
#endif
struct _cas_reserve_pool_per_cpu {
spinlock_t lock;
struct list_head list;
atomic_t count;
};
struct cas_reserve_pool {
uint32_t limit;
char *name;
uint32_t entry_size;
struct _cas_reserve_pool_per_cpu *rpools;
};
struct _cas_rpool_pre_alloc_info {
struct work_struct ws;
struct cas_reserve_pool *rpool_master;
cas_rpool_new rpool_new;
void *allocator_ctx;
struct completion cmpl;
int error;
};
#define RPOOL_ITEM_TO_ENTRY(rpool, item) \
(void *)((unsigned long)item + sizeof(struct list_head) \
- rpool->entry_size)
#define RPOOL_ENTRY_TO_ITEM(rpool, entry) \
(struct list_head *)((unsigned long)entry + rpool->entry_size \
- sizeof(struct list_head))
void _cas_rpool_pre_alloc_do(struct work_struct *ws)
{
struct _cas_rpool_pre_alloc_info *info =
container_of(ws, struct _cas_rpool_pre_alloc_info, ws);
struct cas_reserve_pool *rpool_master = info->rpool_master;
struct _cas_reserve_pool_per_cpu *current_rpool;
struct list_head *item;
void *entry;
int i, cpu;
CAS_DEBUG_TRACE();
cpu = smp_processor_id();
current_rpool = &rpool_master->rpools[cpu];
for (i = 0; i < rpool_master->limit; i++) {
entry = info->rpool_new(info->allocator_ctx, cpu);
if (!entry) {
info->error = -ENOMEM;
complete(&info->cmpl);
return;
}
item = RPOOL_ENTRY_TO_ITEM(rpool_master, entry);
list_add_tail(item, &current_rpool->list);
atomic_inc(&current_rpool->count);
}
CAS_DEBUG_PARAM("Added [%d] pre allocated items to reserve poll [%s]"
" for cpu %d", atomic_read(&current_rpool->count),
rpool_master->name, cpu);
complete(&info->cmpl);
}
int _cas_rpool_pre_alloc_schedule(int cpu,
struct _cas_rpool_pre_alloc_info *info)
{
init_completion(&info->cmpl);
INIT_WORK(&info->ws, _cas_rpool_pre_alloc_do);
schedule_work_on(cpu, &info->ws);
schedule();
wait_for_completion(&info->cmpl);
return info->error;
}
void cas_rpool_destroy(struct cas_reserve_pool *rpool_master,
cas_rpool_del rpool_del, void *allocator_ctx)
{
int i, cpu_no = num_online_cpus();
struct _cas_reserve_pool_per_cpu *current_rpool = NULL;
struct list_head *item = NULL, *next = NULL;
void *entry;
CAS_DEBUG_TRACE();
if (!rpool_master)
return;
if (!rpool_master->rpools) {
kfree(rpool_master);
return;
}
for (i = 0; i < cpu_no; i++) {
current_rpool = &rpool_master->rpools[i];
CAS_DEBUG_PARAM("Destroyed reserve poll [%s] for cpu %d",
rpool_master->name, i);
if (!atomic_read(&current_rpool->count))
continue;
list_for_each_safe(item, next, &current_rpool->list) {
entry = RPOOL_ITEM_TO_ENTRY(rpool_master, item);
list_del(item);
rpool_del(allocator_ctx, entry);
atomic_dec(&current_rpool->count);
}
if (atomic_read(&current_rpool->count)) {
printk(KERN_CRIT "Not all object from reserve poll"
"[%s] deallocated\n", rpool_master->name);
WARN(true, OCF_PREFIX_SHORT" Cleanup problem\n");
}
}
kfree(rpool_master->rpools);
kfree(rpool_master);
}
struct cas_reserve_pool *cas_rpool_create(uint32_t limit, char *name,
uint32_t entry_size, cas_rpool_new rpool_new,
cas_rpool_del rpool_del, void *allocator_ctx)
{
int i, cpu_no = num_online_cpus();
struct cas_reserve_pool *rpool_master = NULL;
struct _cas_reserve_pool_per_cpu *current_rpool = NULL;
struct _cas_rpool_pre_alloc_info info;
CAS_DEBUG_TRACE();
memset(&info, 0, sizeof(info));
rpool_master = kzalloc(sizeof(*rpool_master), GFP_KERNEL);
if (!rpool_master)
goto error;
rpool_master->rpools = kzalloc(sizeof(*rpool_master->rpools) * cpu_no,
GFP_KERNEL);
if (!rpool_master->rpools)
goto error;
rpool_master->limit = limit;
rpool_master->name = name;
rpool_master->entry_size = entry_size;
info.rpool_master = rpool_master;
info.rpool_new = rpool_new;
info.allocator_ctx = allocator_ctx;
for (i = 0; i < cpu_no; i++) {
current_rpool = &rpool_master->rpools[i];
spin_lock_init(&current_rpool->lock);
INIT_LIST_HEAD(&current_rpool->list);
if (_cas_rpool_pre_alloc_schedule(i, &info))
goto error;
CAS_DEBUG_PARAM("Created reserve poll [%s] for cpu %d",
rpool_master->name, i);
}
return rpool_master;
error:
cas_rpool_destroy(rpool_master, rpool_del, allocator_ctx);
return NULL;
}
#define LIST_FIRST_ITEM(head) head.next
void *cas_rpool_try_get(struct cas_reserve_pool *rpool_master, int *cpu)
{
unsigned long flags;
struct _cas_reserve_pool_per_cpu *current_rpool = NULL;
struct list_head *item = NULL;
void *entry = NULL;
CAS_DEBUG_TRACE();
*cpu = smp_processor_id();
current_rpool = &rpool_master->rpools[*cpu];
spin_lock_irqsave(&current_rpool->lock, flags);
if (!list_empty(&current_rpool->list)) {
item = LIST_FIRST_ITEM(current_rpool->list);
entry = RPOOL_ITEM_TO_ENTRY(rpool_master, item);
list_del(item);
atomic_dec(&current_rpool->count);
}
spin_unlock_irqrestore(&current_rpool->lock, flags);
CAS_DEBUG_PARAM("[%s]Removed item from reserve pool [%s] for cpu [%d], "
"items in pool %d", rpool_master->name,
item == NULL ? "SKIPPED" : "OK", *cpu,
atomic_read(&current_rpool->count));
return entry;
}
int cas_rpool_try_put(struct cas_reserve_pool *rpool_master, void *entry, int cpu)
{
int ret = 0;
unsigned long flags;
struct _cas_reserve_pool_per_cpu *current_rpool = NULL;
struct list_head *item;
CAS_DEBUG_TRACE();
current_rpool = &rpool_master->rpools[cpu];
spin_lock_irqsave(&current_rpool->lock, flags);
if (atomic_read(&current_rpool->count) >= rpool_master->limit) {
ret = 1;
goto error;
}
item = RPOOL_ENTRY_TO_ITEM(rpool_master, entry);
list_add_tail(item, &current_rpool->list);
atomic_inc(&current_rpool->count);
error:
CAS_DEBUG_PARAM("[%s]Added item to reserve pool [%s] for cpu [%d], "
"items in pool %d", rpool_master->name,
ret == 1 ? "SKIPPED" : "OK", cpu,
atomic_read(&current_rpool->count));
spin_unlock_irqrestore(&current_rpool->lock, flags);
return ret;
}

View File

@@ -0,0 +1,28 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __CAS_RPOOL_H__
#define __CAS_RPOOL_H__
#define CAS_RPOOL_MIN_SIZE_ITEM sizeof(struct list_head)
struct cas_reserve_pool;
typedef void (*cas_rpool_del)(void *allocator_ctx, void *item);
typedef void *(*cas_rpool_new)(void *allocator_ctx, int cpu);
struct cas_reserve_pool *cas_rpool_create(uint32_t limit, char *name,
uint32_t item_size, cas_rpool_new rpool_new,
cas_rpool_del rpool_del, void *allocator_ctx);
void cas_rpool_destroy(struct cas_reserve_pool *rpool,
cas_rpool_del rpool_del, void *allocator_ctx);
void *cas_rpool_try_get(struct cas_reserve_pool *rpool, int *cpu);
int cas_rpool_try_put(struct cas_reserve_pool *rpool, void *item, int cpu);
#endif /* __CAS_RPOOL_H__ */

View File

@@ -0,0 +1,53 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __OBJ_BLK_H__
#define __OBJ_BLK_H__
#include "vol_atomic_dev_bottom.h"
#include "vol_block_dev_bottom.h"
#include "vol_block_dev_top.h"
struct casdsk_disk;
struct bd_object {
struct casdsk_disk *dsk;
struct block_device *btm_bd;
/**
* This denotes state of volatile write cache of the device.
* This is set to true when:
* - opening the device
* - when writing to a device without FUA/FLUSH flags
* This is set to false when:
* - FLUSH request is completed on device.
* When it is false
* - FLUSH requests from upper layer are NOT passed to the device.
*/
atomic_t potentially_dirty;
uint32_t expobj_valid : 1;
/*!< Bit indicates that exported object was created */
uint32_t expobj_locked : 1;
/*!< Non zero value indicates data exported object is locked */
uint32_t opened_by_bdev : 1;
/*!< Opened by supplying bdev manually */
struct atomic_dev_params atomic_params;
atomic64_t pending_rqs;
/*!< This fields describes in flight IO requests */
struct workqueue_struct *workqueue;
/*< Workqueue for internally trigerred I/O */
};
static inline struct bd_object *bd_object(ocf_volume_t vol)
{
return ocf_volume_get_priv(vol);
}
#endif /* __OBJ_BLK_H__ */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,31 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __VOL_ATOMIC_DEV_BOTTOM_H__
#define __VOL_ATOMIC_DEV_BOTTOM_H__
#include "../cas_cache.h"
enum atomic_metadata_mode {
ATOMIC_METADATA_MODE_ELBA,
ATOMIC_METADATA_MODE_SEPBUF,
ATOMIC_METADATA_MODE_NONE,
};
struct atomic_dev_params {
unsigned int nsid;
uint64_t size;
enum atomic_metadata_mode metadata_mode;
unsigned is_mode_optimal : 1;
/* IMPORTANT: If this field is 0, the other fields are invalid! */
unsigned is_atomic_capable : 1;
};
int atomic_dev_init(void);
void atomic_dev_deinit(void);
#endif /* __VOL_ATOMIC_DEV_BOTTOM_H__ */

View File

@@ -0,0 +1,470 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#include "vol_blk_utils.h"
static void cas_io_iter_advanced(struct bio_vec_iter *iter, uint32_t bytes)
{
BUG_ON(bytes > iter->len);
iter->len -= bytes;
iter->offset += bytes;
if (iter->len) {
/* Still in this item, bytes to be processed */
return;
}
/* Move to next item in data vector */
iter->idx++;
if (iter->idx < iter->vec_size) {
iter->ivec = &iter->vec[iter->idx];
iter->len = iter->ivec->bv_len;
iter->offset = iter->ivec->bv_offset;
} else {
iter->ivec = NULL;
iter->len = 0;
iter->offset = 0;
}
}
uint32_t cas_io_iter_cpy(struct bio_vec_iter *dst, struct bio_vec_iter *src,
uint32_t bytes)
{
uint32_t to_copy, written = 0;
void *adst, *asrc;
if (dst->idx >= dst->vec_size)
return 0;
BUG_ON(dst->offset + dst->len > PAGE_SIZE);
if (src->idx >= src->vec_size)
return 0;
BUG_ON(src->offset + src->len > PAGE_SIZE);
while (bytes) {
to_copy = min(dst->len, src->len);
to_copy = min(to_copy, bytes);
if (to_copy == 0) {
/* No more bytes for coping */
break;
}
adst = page_address(dst->ivec->bv_page) + dst->offset;
asrc = page_address(src->ivec->bv_page) + src->offset;
memcpy(adst, asrc, to_copy);
bytes -= to_copy;
written += to_copy;
cas_io_iter_advanced(dst, to_copy);
cas_io_iter_advanced(src, to_copy);
}
return written;
}
uint32_t cas_io_iter_cpy_from_data(struct bio_vec_iter *dst,
const void *src, uint32_t bytes)
{
uint32_t to_copy, written = 0;
void *adst;
const void *asrc;
if (dst->idx >= dst->vec_size)
return 0;
BUG_ON(dst->offset + dst->len > PAGE_SIZE);
while (bytes) {
to_copy = min(dst->len, bytes);
if (to_copy == 0) {
/* No more bytes for coping */
break;
}
adst = page_address(dst->ivec->bv_page) + dst->offset;
asrc = src + written;
memcpy(adst, asrc, to_copy);
bytes -= to_copy;
written += to_copy;
cas_io_iter_advanced(dst, to_copy);
}
return written;
}
uint32_t cas_io_iter_cpy_to_data(void *dst, struct bio_vec_iter *src,
uint32_t bytes)
{
uint32_t to_copy, written = 0;
void *adst, *asrc;
BUG_ON(dst == NULL);
if (src->idx >= src->vec_size)
return 0;
BUG_ON(src->offset + src->len > PAGE_SIZE);
while (bytes) {
to_copy = min(bytes, src->len);
if (to_copy == 0) {
/* No more bytes for coping */
break;
}
adst = dst + written;
asrc = page_address(src->ivec->bv_page) + src->offset;
memcpy(adst, asrc, to_copy);
bytes -= to_copy;
written += to_copy;
cas_io_iter_advanced(src, to_copy);
}
return written;
}
uint32_t cas_io_iter_move(struct bio_vec_iter *iter, uint32_t bytes)
{
uint32_t to_move, moved = 0;
if (iter->idx >= iter->vec_size)
return 0;
BUG_ON(iter->offset + iter->len > PAGE_SIZE);
while (bytes) {
to_move = min(iter->len, bytes);
if (to_move == 0) {
/* No more bytes for coping */
break;
}
bytes -= to_move;
moved += to_move;
cas_io_iter_advanced(iter, to_move);
}
return moved;
}
uint32_t cas_io_iter_zero(struct bio_vec_iter *dst, uint32_t bytes)
{
uint32_t to_fill, zeroed = 0;
void *adst;
if (dst->idx >= dst->vec_size)
return 0;
BUG_ON(dst->offset + dst->len > PAGE_SIZE);
while (bytes) {
to_fill = min(dst->len, (typeof(dst->len))PAGE_SIZE);
if (to_fill == 0) {
/* No more bytes for coping */
break;
}
adst = page_address(dst->ivec->bv_page) + dst->offset;
memset(adst, 0, to_fill);
bytes -= to_fill;
zeroed += to_fill;
cas_io_iter_advanced(dst, to_fill);
}
return zeroed;
}
/*
*
*/
int cas_blk_io_set_data(struct ocf_io *io,
ctx_data_t *ctx_data, uint32_t offset)
{
struct blkio *blkio = cas_io_to_blkio(io);
struct blk_data *data = ctx_data;
/* Set BIO vector (IO data) and initialize iterator */
blkio->data = data;
if (blkio->data) {
cas_io_iter_init(&blkio->iter, blkio->data->vec,
blkio->data->size);
/* Move into specified offset in BIO vector iterator */
if (offset != cas_io_iter_move(&blkio->iter, offset)) {
/* TODO Log message */
blkio->error = -ENOBUFS;
return -ENOBUFS;
}
}
return 0;
}
/*
*
*/
ctx_data_t *cas_blk_io_get_data(struct ocf_io *io)
{
struct blkio *blkio = cas_io_to_blkio(io);
return blkio->data;
}
#if defined(CAS_NVME_PARTIAL)
#include "utils/utils_nvme.h"
int cas_blk_identify_type_by_bdev(struct block_device *bdev,
uint8_t *type, struct atomic_dev_params *atomic_params)
{
struct nvme_id_ns *ns;
unsigned int nsid, selected, ms, ds, pi, elba, sbsupp;
long long int ret = 0;
struct atomic_dev_params atomic_params_int = {0};
ns = kmalloc(sizeof(*ns), GFP_KERNEL);
if (!ns)
return -OCF_ERR_NO_MEM;
ret = cas_nvme_get_nsid(bdev, &nsid);
if (ret < 0) {
/*
* We cannot obtain NSID which means we are not dealing with
* NVMe device
*/
goto out1;
}
ret = cas_nvme_identify_ns(bdev, nsid, ns);
if (ret < 0) {
/*
* We cannot obtain ns structure which means we ARE dealing with
* NVMe device but can not recognize format so let's treat that
* device as block device
*/
goto out1;
}
selected = ns->flbas & 0xf;
ms = ns->lbaf[selected].ms;
ds = ns->lbaf[selected].ds;
pi = ns->dps & 0x7;
elba = !!(ns->flbas & (1<<4));
sbsupp = !!(ns->mc & (1<<1));
atomic_params_int.is_atomic_capable = 1;
atomic_params_int.nsid = nsid;
atomic_params_int.size = (ns->nsze << (ds - 9)) * SECTOR_SIZE;
if (pi != 0) {
/* We don't support formats which have
* enable Protection Information feature.
*/
ret = -KCAS_ERR_NVME_BAD_FORMAT;
goto out2;
}
switch (ms) {
case 0:
/* NVMe metadata features disabled, so we handle it as
* regular block device
*/
if (ds != 9 && ds != 12) {
ret = -KCAS_ERR_NVME_BAD_FORMAT;
goto out2;
}
*type = BLOCK_DEVICE_VOLUME;
atomic_params_int.metadata_mode = ATOMIC_METADATA_MODE_NONE;
#if !defined(CAS_NVME_FULL)
/*
* Only partial support user can't using
* device in atomic mode, so mode is optimal
*/
atomic_params_int.is_mode_optimal = 1;
break;
#else
if (bdev == bdev->bd_contains) {
/*
* Entire device - format isn't optimal
*/
atomic_params_int.is_mode_optimal = 0;
} else {
/*
* Partition - format is optimal, user can't using
* partitions in atomic mode
*/
atomic_params_int.is_mode_optimal = 1;
}
break;
case 8:
/* For atomic writes we support only metadata size 8B and
* data size 512B
*/
if (ds != 9) {
ret = -KCAS_ERR_NVME_BAD_FORMAT;
goto out2;
}
*type = ATOMIC_DEVICE_VOLUME;
atomic_params_int.metadata_mode = elba ?
ATOMIC_METADATA_MODE_ELBA :
ATOMIC_METADATA_MODE_SEPBUF;
atomic_params_int.is_mode_optimal = sbsupp ? !elba : 1;
break;
#endif
default:
ret = -KCAS_ERR_NVME_BAD_FORMAT;
}
if (atomic_params)
*atomic_params = atomic_params_int;
goto out2;
out1:
*type = BLOCK_DEVICE_VOLUME;
ret = 0;
out2:
kfree(ns);
return ret;
}
static inline int _cas_detect_blk_type(const char *path, uint8_t *type,
struct atomic_dev_params *atomic_params)
{
int ret;
struct block_device *bdev;
char holder[] = "CAS DETECT\n";
bdev = OPEN_BDEV_EXCLUSIVE(path, FMODE_READ, holder);
if (IS_ERR(bdev))
return -OCF_ERR_NOT_OPEN_EXC;
ret = cas_blk_identify_type_by_bdev(bdev, type, atomic_params);
CLOSE_BDEV_EXCLUSIVE(bdev, FMODE_READ);
return ret;
}
#else
static inline int _cas_detect_blk_type(const char *path, uint8_t *type,
struct atomic_dev_params *atomic_params)
{
/*
* NVMe is not supported with given kernel version, so we
* have no way to figure out what the current NVMe format
* is. In this situation we make a naive assumption that
* it's formatted to LBA size 512B, and try to treat it
* as regular block device.
*/
*type = BLOCK_DEVICE_VOLUME;
return 0;
}
int cas_blk_identify_type_by_bdev(struct block_device *bdev,
uint8_t *type, struct atomic_dev_params *atomic_params)
{
*type = BLOCK_DEVICE_VOLUME;
return 0;
}
#endif
int cas_blk_open_volume_by_bdev(ocf_volume_t *vol,
struct block_device *bdev)
{
struct atomic_dev_params atomic_params = {0};
struct bd_object *bdobj;
uint8_t type;
int ret;
ret = cas_blk_identify_type_by_bdev(bdev, &type, &atomic_params);
if (ret)
goto err;
ret = ocf_ctx_volume_create(cas_ctx, vol, NULL, type);
if (ret)
goto err;
bdobj = bd_object(*vol);
bdobj->btm_bd = bdev;
bdobj->opened_by_bdev = true;
ocf_volume_open(*vol);
return 0;
err:
return ret;
}
void cas_blk_close_volume(ocf_volume_t vol)
{
ocf_volume_close(vol);
ocf_volume_deinit(vol);
env_free(vol);
}
int _cas_blk_identify_type(const char *path, uint8_t *type,
struct atomic_dev_params *atomic_params)
{
struct file *file;
int result = 0;
file = filp_open(path, O_RDONLY, 0);
if (IS_ERR(file))
return -OCF_ERR_INVAL_VOLUME_TYPE;
if (S_ISBLK(FILE_INODE(file)->i_mode))
*type = BLOCK_DEVICE_VOLUME;
else if (S_ISCHR(FILE_INODE(file)->i_mode))
*type = NVME_CONTROLLER;
else
result = -OCF_ERR_INVAL_VOLUME_TYPE;
filp_close(file, 0);
if (result)
return result;
if (*type == BLOCK_DEVICE_VOLUME) {
result = _cas_detect_blk_type(path, type, atomic_params);
if (result < 0)
return result;
}
return 0;
}
int cas_blk_identify_type(const char *path, uint8_t *type)
{
return _cas_blk_identify_type(path, type, NULL);
}
int cas_blk_identify_type_atomic(const char *path, uint8_t *type,
struct atomic_dev_params *atomic_params)
{
return _cas_blk_identify_type(path, type, atomic_params);
}

View File

@@ -0,0 +1,148 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __VOL_BLK_UTILS_H__
#define __VOL_BLK_UTILS_H__
#include "obj_blk.h"
#include "context.h"
static inline bool cas_blk_is_flush_io(unsigned long flags)
{
if ((flags & OCF_WRITE_FLUSH) == OCF_WRITE_FLUSH)
return true;
if ((flags & OCF_WRITE_FLUSH_FUA) == OCF_WRITE_FLUSH_FUA)
return true;
return false;
}
struct blkio {
int error;
atomic_t rq_remaning;
atomic_t ref_counter;
int32_t dirty;
int32_t dir;
struct blk_data *data; /* IO data buffer */
/* BIO vector iterator for sending IO */
struct bio_vec_iter iter;
};
static inline struct blkio *cas_io_to_blkio(struct ocf_io *io)
{
return ocf_io_get_priv(io);
}
int cas_blk_io_set_data(struct ocf_io *io, ctx_data_t *data,
uint32_t offset);
ctx_data_t *cas_blk_io_get_data(struct ocf_io *io);
int cas_blk_identify_type_by_bdev(struct block_device *bdev,
uint8_t *type, struct atomic_dev_params *atomic_params);
int cas_blk_open_volume_by_bdev(ocf_volume_t *vol,
struct block_device *bdev);
void cas_blk_close_volume(ocf_volume_t vol);
int cas_blk_identify_type(const char *path, uint8_t *type);
int cas_blk_identify_type_atomic(const char *path, uint8_t *type,
struct atomic_dev_params *atomic_params);
static inline void cas_io_iter_init(struct bio_vec_iter *iter,
struct bio_vec *vec, uint32_t vec_size)
{
iter->vec = iter->ivec = vec;
iter->vec_size = vec_size;
iter->idx = 0;
iter->offset = vec->bv_offset;
iter->len = vec->bv_len;
}
static inline void cas_io_iter_set(struct bio_vec_iter *iter,
struct bio_vec *vec, uint32_t vec_size,
uint32_t idx, uint32_t offset, uint32_t len)
{
iter->vec = vec;
iter->vec_size = vec_size;
iter->idx = idx;
iter->offset = offset;
iter->len = len;
if (iter->idx < vec_size) {
iter->ivec = &vec[iter->idx];
} else {
iter->ivec = NULL;
WARN(1, "Setting offset out of BIO vector");
}
}
static inline void cas_io_iter_copy_set(struct bio_vec_iter *dst,
struct bio_vec_iter *src)
{
dst->vec = src->vec;
dst->vec_size = src->vec_size;
dst->idx = src->idx;
dst->offset = src->offset;
dst->len = src->len;
dst->ivec = src->ivec;
}
static inline bool cas_io_iter_is_next(struct bio_vec_iter *iter)
{
return iter->idx < iter->vec_size ? true : false;
/* TODO UNITTEST */
}
static inline uint32_t cas_io_iter_size_done(struct bio_vec_iter *iter)
{
return iter->idx;
/* TODO UNITTEST */
}
static inline uint32_t cas_io_iter_size_left(struct bio_vec_iter *iter)
{
if (iter->idx < iter->vec_size)
return iter->vec_size - iter->idx;
return 0;
/* TODO UNITTEST */
}
static inline uint32_t cas_io_iter_current_offset(struct bio_vec_iter *iter)
{
return iter->idx < iter->vec_size ? iter->offset : 0;
/* TODO UNITTEST */
}
static inline uint32_t cas_io_iter_current_length(struct bio_vec_iter *iter)
{
return iter->idx < iter->vec_size ? iter->len : 0;
/* TODO UNITTEST */
}
static inline struct page *cas_io_iter_current_page(struct bio_vec_iter *iter)
{
return iter->idx < iter->vec_size ? iter->ivec->bv_page : NULL;
/* TODO UNITTEST */
}
uint32_t cas_io_iter_cpy(struct bio_vec_iter *dst, struct bio_vec_iter *src,
uint32_t bytes);
uint32_t cas_io_iter_cpy_from_data(struct bio_vec_iter *dst,
const void *src, uint32_t bytes);
uint32_t cas_io_iter_cpy_to_data(void *dst, struct bio_vec_iter *src,
uint32_t bytes);
uint32_t cas_io_iter_move(struct bio_vec_iter *iter,
uint32_t bytes);
uint32_t cas_io_iter_zero(struct bio_vec_iter *iter, uint32_t bytes);
#endif /* __VOL_BLK_UTILS_H__ */

View File

@@ -0,0 +1,597 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#include "cas_cache.h"
#define CAS_DEBUG_IO 0
#if CAS_DEBUG_IO == 1
#define CAS_DEBUG_TRACE() printk(KERN_DEBUG \
"[IO] %s:%d\n", __func__, __LINE__)
#define CAS_DEBUG_MSG(msg) printk(KERN_DEBUG \
"[IO] %s:%d - %s\n", __func__, __LINE__, msg)
#define CAS_DEBUG_PARAM(format, ...) printk(KERN_DEBUG \
"[IO] %s:%d - "format"\n", __func__, __LINE__, ##__VA_ARGS__)
#else
#define CAS_DEBUG_TRACE()
#define CAS_DEBUG_MSG(msg)
#define CAS_DEBUG_PARAM(format, ...)
#endif
int block_dev_open_object(ocf_volume_t vol)
{
struct bd_object *bdobj = bd_object(vol);
const struct ocf_volume_uuid *uuid = ocf_volume_get_uuid(vol);
struct casdsk_disk *dsk;
if (bdobj->opened_by_bdev) {
/* Bdev has beed set manually, so there is nothing to do. */
return 0;
}
if (unlikely(true == cas_upgrade_is_in_upgrade())) {
dsk = casdisk_functions.casdsk_disk_claim(uuid->data, NULL);
casdisk_functions.casdsk_disk_set_attached(dsk);
} else {
dsk = casdisk_functions.casdsk_disk_open(uuid->data, NULL);
}
if (IS_ERR_OR_NULL(dsk)) {
int error = PTR_ERR(dsk) ?: -EINVAL;
if (error == -EBUSY)
error = -OCF_ERR_NOT_OPEN_EXC;
return error;
}
bdobj->dsk = dsk;
bdobj->btm_bd = casdisk_functions.casdsk_disk_get_blkdev(dsk);
return 0;
}
void block_dev_close_object(ocf_volume_t vol)
{
struct bd_object *bdobj = bd_object(vol);
if (bdobj->opened_by_bdev)
return;
if (likely(!cas_upgrade_is_in_upgrade())) {
casdisk_functions.casdsk_disk_close(bdobj->dsk);
} else {
casdisk_functions.casdsk_disk_set_pt(bdobj->dsk);
casdisk_functions.casdsk_disk_dettach(bdobj->dsk);
}
}
unsigned int block_dev_get_max_io_size(ocf_volume_t vol)
{
struct bd_object *bdobj = bd_object(vol);
struct block_device *bd = bdobj->btm_bd;
return queue_max_sectors(bd->bd_disk->queue) << SECTOR_SHIFT;
}
uint64_t block_dev_get_byte_length(ocf_volume_t vol)
{
struct bd_object *bdobj = bd_object(vol);
struct block_device *bd = bdobj->btm_bd;
uint64_t sector_length;
sector_length = (bd->bd_contains == bd) ?
get_capacity(bd->bd_disk) :
bd->bd_part->nr_sects;
return sector_length << SECTOR_SHIFT;
}
#if LINUX_VERSION_CODE <= KERNEL_VERSION(3, 3, 0)
static char *__block_dev_get_elevator_name(struct request_queue *q)
{
if (q->elevator->elevator_type == NULL)
return NULL;
if (q->elevator->elevator_type->elevator_name == NULL)
return NULL;
if (q->elevator->elevator_type->elevator_name[0] == 0)
return NULL;
return q->elevator->elevator_type->elevator_name;
}
#else
static char *__block_dev_get_elevator_name(struct request_queue *q)
{
if (q->elevator->type == NULL)
return NULL;
if (q->elevator->type->elevator_name == NULL)
return NULL;
if (q->elevator->type->elevator_name[0] == 0)
return NULL;
return q->elevator->type->elevator_name;
}
#endif
/*
*
*/
const char *block_dev_get_elevator_name(struct request_queue *q)
{
if (!q)
return NULL;
if (q->elevator == NULL)
return NULL;
return __block_dev_get_elevator_name(q);
}
/*
*
*/
int block_dev_is_metadata_mode_optimal(struct atomic_dev_params *atomic_params,
uint8_t type)
{
if (type == BLOCK_DEVICE_VOLUME) {
if (atomic_params->is_atomic_capable)
return atomic_params->is_mode_optimal;
} else if (type == ATOMIC_DEVICE_VOLUME) {
return atomic_params->is_mode_optimal;
}
return 1;
}
/*
*
*/
static inline struct bio *cas_bd_io_alloc_bio(struct blkio *bdio)
{
struct bio *bio
= bio_alloc(GFP_NOIO, cas_io_iter_size_left(&bdio->iter));
if (bio)
return bio;
if (cas_io_iter_size_left(&bdio->iter) < MAX_LINES_PER_IO) {
/* BIO vector was small, so it was memory
* common problem - NO RAM!!!
*/
return NULL;
}
/* Retry with smaller */
return bio_alloc(GFP_NOIO, MAX_LINES_PER_IO);
}
/*
*
*/
static void cas_bd_io_end(struct ocf_io *io, int error)
{
struct blkio *bdio = cas_io_to_blkio(io);
if (error)
bdio->error |= error;
if (atomic_dec_return(&bdio->rq_remaning))
return;
CAS_DEBUG_MSG("Completion");
/* Send completion to caller */
io->end(io, bdio->error);
/* Free allocated structures */
ocf_io_put(io);
}
/*
*
*/
DECLARE_BLOCK_CALLBACK(cas_bd_io_end, struct bio *bio,
unsigned int bytes_done, int error)
{
struct ocf_io *io;
struct blkio *bdio;
struct bd_object *bdobj;
int err;
BUG_ON(!bio);
BUG_ON(!bio->bi_private);
BLOCK_CALLBACK_INIT(bio);
io = bio->bi_private;
bdobj = bd_object(io->volume);
BUG_ON(!bdobj);
err = BLOCK_CALLBACK_ERROR(bio, error);
bdio = cas_io_to_blkio(io);
BUG_ON(!bdio);
CAS_DEBUG_TRACE();
if (err)
goto out;
if (bdio->dir == OCF_WRITE) {
/* IO was a write */
if (!cas_blk_is_flush_io(io->flags)) {
/* Device cache is dirty, mark it */
atomic_inc(&bdobj->potentially_dirty);
} else {
/* IO flush finished, update potential
* dirty state
*/
atomic_sub(bdio->dirty, &bdobj->potentially_dirty);
}
}
out:
if (err == -EOPNOTSUPP && (BIO_OP_FLAGS(bio) & CAS_BIO_DISCARD))
err = 0;
cas_bd_io_end(io, err);
bio_put(bio);
BLOCK_CALLBACK_RETURN();
}
static void block_dev_submit_flush(struct ocf_io *io)
{
#ifdef CAS_FLUSH_SUPPORTED
struct blkio *blkio = cas_io_to_blkio(io);
struct bd_object *bdobj = bd_object(io->volume);
struct block_device *bdev = bdobj->btm_bd;
struct request_queue *q = bdev_get_queue(bdev);
struct bio *bio = NULL;
blkio->dirty = atomic_read(&bdobj->potentially_dirty);
/* Prevent races of completing IO */
atomic_set(&blkio->rq_remaning, 1);
/* Increase IO reference counter for FLUSH IO */
ocf_io_get(io);
if (!blkio->dirty) {
/* Didn't write anything to underlying disk; no need to
* send req_flush
*/
goto out;
}
if (q == NULL) {
/* No queue, error */
blkio->error = -EINVAL;
goto out;
}
if (!CHECK_QUEUE_FLUSH(q)) {
/* This block device does not support flush, call back */
atomic_sub(blkio->dirty, &bdobj->potentially_dirty);
goto out;
}
bio = bio_alloc(GFP_NOIO, 0);
if (bio == NULL) {
CAS_PRINT_RL(KERN_ERR "Couldn't allocate memory for BIO\n");
blkio->error = -ENOMEM;
goto out;
}
blkio->dir = io->dir;
bio->bi_end_io = REFER_BLOCK_CALLBACK(cas_bd_io_end);
CAS_BIO_SET_DEV(bio, bdev);
bio->bi_private = io;
atomic_inc(&blkio->rq_remaning);
cas_submit_bio(OCF_WRITE_FLUSH, bio);
out:
cas_bd_io_end(io, blkio->error);
#else
/* Running operating system without support for REQ_FLUSH
* (i.e. SLES 11 SP 1) CAS cannot use flushing requests to
* handle power-fail safe Write-Back
*/
io->end(io, -ENOTSUPP);
/* on SLES 11 SP 1 powerfail safety can only be achieved
* through disabling volatile write cache of disk itself.
*/
#endif
}
void block_dev_submit_discard(struct ocf_io *io)
{
struct blkio *blkio = cas_io_to_blkio(io);
struct bd_object *bdobj = bd_object(io->volume);
struct block_device *bd = bdobj->btm_bd;
struct request_queue *q = bdev_get_queue(bd);
struct bio *bio = NULL;
unsigned int max_discard_sectors, granularity, bio_sects;
int alignment;
sector_t sects, start, end, tmp;
/* Prevent races of completing IO */
atomic_set(&blkio->rq_remaning, 1);
/* Increase IO reference counter for FLUSH IO */
ocf_io_get(io);
if (!q) {
/* No queue, error */
blkio->error = -ENXIO;
goto out;
}
if (!blk_queue_discard(q)) {
/* Discard is not supported by bottom device, send completion
* to caller
*/
goto out;
}
granularity = max(q->limits.discard_granularity >> SECTOR_SHIFT, 1U);
alignment = (bdev_discard_alignment(bd) >> SECTOR_SHIFT) % granularity;
max_discard_sectors =
min(q->limits.max_discard_sectors, UINT_MAX >> SECTOR_SHIFT);
max_discard_sectors -= max_discard_sectors % granularity;
if (unlikely(!max_discard_sectors))
goto out;
sects = io->bytes >> SECTOR_SHIFT;
start = io->addr >> SECTOR_SHIFT;
while (sects) {
bio = bio_alloc(GFP_NOIO, 1);
if (!bio) {
CAS_PRINT_RL(CAS_KERN_ERR "Couldn't allocate memory for BIO\n");
blkio->error = -ENOMEM;
break;
}
bio_sects = min_t(sector_t, sects, max_discard_sectors);
end = start + bio_sects;
tmp = end;
if (bio_sects < sects &&
sector_div(tmp, granularity) != alignment) {
end = end - alignment;
sector_div(end, granularity);
end = end * granularity + alignment;
bio_sects = end - start;
}
CAS_BIO_SET_DEV(bio, bd);
BIO_BISECTOR(bio) = start;
BIO_BISIZE(bio) = bio_sects << SECTOR_SHIFT;
bio->bi_next = NULL;
bio->bi_private = io;
bio->bi_end_io = REFER_BLOCK_CALLBACK(cas_bd_io_end);
atomic_inc(&blkio->rq_remaning);
cas_submit_bio(CAS_BIO_DISCARD, bio);
sects -= bio_sects;
start = end;
cond_resched();
}
out:
cas_bd_io_end(io, blkio->error);
}
static inline bool cas_bd_io_prepare(int *dir, struct ocf_io *io)
{
struct blkio *bdio = cas_io_to_blkio(io);
struct bd_object *bdobj = bd_object(io->volume);
/* Setup DIR */
bdio->dir = *dir;
/* Save dirty counter */
bdio->dirty = atomic_read(&bdobj->potentially_dirty);
/* Convert CAS direction into kernel values */
switch (bdio->dir) {
case OCF_READ:
*dir = READ;
break;
case OCF_WRITE:
*dir = WRITE;
break;
default:
bdio->error = -EINVAL;
break;
}
if (!io->bytes) {
/* Don not accept empty request */
CAS_PRINT_RL(KERN_ERR "Invalid zero size IO\n");
bdio->error = -EINVAL;
}
if (bdio->error)
return false;
return true;
}
/*
*
*/
static void block_dev_submit_io(struct ocf_io *io)
{
struct blkio *bdio = cas_io_to_blkio(io);
struct bd_object *bdobj = bd_object(io->volume);
struct bio_vec_iter *iter = &bdio->iter;
uint64_t addr = io->addr;
uint32_t bytes = io->bytes;
int dir = io->dir;
if (!CAS_IS_WRITE_FLUSH_FUA(io->flags) &&
CAS_IS_WRITE_FLUSH(io->flags)) {
CAS_DEBUG_MSG("Flush request");
/* It is flush requests handle it */
block_dev_submit_flush(io);
return;
}
CAS_DEBUG_PARAM("Address = %llu, bytes = %u\n", bdio->addr,
bdio->bytes);
/* Increase IO reference */
ocf_io_get(io);
/* Prevent races of completing IO */
atomic_set(&bdio->rq_remaning, 1);
if (!cas_bd_io_prepare(&dir, io)) {
CAS_DEBUG_MSG("Invalid request");
cas_bd_io_end(io, -EINVAL);
return;
}
while (cas_io_iter_is_next(iter) && bytes) {
/* Still IO vectors to be sent */
/* Allocate BIO */
struct bio *bio = cas_bd_io_alloc_bio(bdio);
if (!bio) {
bdio->error = -ENOMEM;
break;
}
/* Setup BIO */
CAS_BIO_SET_DEV(bio, bdobj->btm_bd);
BIO_BISECTOR(bio) = addr / SECTOR_SIZE;
bio->bi_next = NULL;
bio->bi_private = io;
BIO_OP_FLAGS(bio) |= io->flags;
BIO_SET_RW_FLAGS(bio);
bio->bi_end_io = REFER_BLOCK_CALLBACK(cas_bd_io_end);
/* Add pages */
while (cas_io_iter_is_next(iter) && bytes) {
struct page *page = cas_io_iter_current_page(iter);
uint32_t offset = cas_io_iter_current_offset(iter);
uint32_t length = cas_io_iter_current_length(iter);
int added;
if (length > bytes)
length = bytes;
added = bio_add_page(bio, page, length, offset);
BUG_ON(added < 0);
if (added == 0) {
/* No more space in BIO, stop adding pages */
break;
}
/* Update address, bytes sent */
bytes -= added;
addr += added;
/* Update BIO vector iterator */
if (added != cas_io_iter_move(iter, added)) {
bdio->error = -ENOBUFS;
break;
}
}
if (bdio->error == 0) {
/* Increase IO reference for sending this IO */
atomic_inc(&bdio->rq_remaning);
/* Send BIO */
CAS_DEBUG_MSG("Submit IO");
cas_submit_bio(dir, bio);
bio = NULL;
} else {
if (bio) {
bio_put(bio);
bio = NULL;
}
/* ERROR, stop processed */
break;
}
}
if (bytes && bdio->error == 0) {
/* Not all bytes sent, mark error */
bdio->error = -ENOBUFS;
}
/* Prevent races of completing IO when
* there are still child IOs not being send.
*/
cas_bd_io_end(io, 0);
}
const struct ocf_volume_properties cas_object_blk_properties = {
.name = "Block Device",
.io_priv_size = sizeof(struct blkio),
.volume_priv_size = sizeof(struct bd_object),
.caps = {
.atomic_writes = 0, /* Atomic writes not supported */
},
.ops = {
.submit_io = block_dev_submit_io,
.submit_flush = block_dev_submit_flush,
.submit_metadata = NULL,
.submit_discard = block_dev_submit_discard,
.open = block_dev_open_object,
.close = block_dev_close_object,
.get_max_io_size = block_dev_get_max_io_size,
.get_length = block_dev_get_byte_length,
},
.io_ops = {
.set_data = cas_blk_io_set_data,
.get_data = cas_blk_io_get_data,
},
};
int block_dev_init(void)
{
int ret;
ret = ocf_ctx_register_volume_type(cas_ctx, BLOCK_DEVICE_VOLUME,
&cas_object_blk_properties);
if (ret < 0)
return ret;
return 0;
}
void block_dev_deinit(void)
{
ocf_ctx_unregister_volume_type(cas_ctx, BLOCK_DEVICE_VOLUME);
}
int block_dev_try_get_io_class(struct bio *bio, int *io_class)
{
struct ocf_io *io;
if (bio->bi_end_io != REFER_BLOCK_CALLBACK(cas_bd_io_end))
return -1;
io = bio->bi_private;
*io_class = io->io_class;
return 0;
}

View File

@@ -0,0 +1,26 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __VOL_BLOCK_DEV_BOTTOM_H__
#define __VOL_BLOCK_DEV_BOTTOM_H__
#include "../cas_cache.h"
int block_dev_open_object(ocf_volume_t vol);
void block_dev_close_object(ocf_volume_t vol);
const char *block_dev_get_elevator_name(struct request_queue *q);
int block_dev_is_metadata_mode_optimal(struct atomic_dev_params *atomic_params,
uint8_t type);
int block_dev_try_get_io_class(struct bio *bio, int *io_class);
int block_dev_init(void);
void block_dev_deinit(void);
#endif /* __VOL_BLOCK_DEV_BOTTOM_H__ */

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,17 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __VOL_BLOCK_DEV_TOP_H__
#define __VOL_BLOCK_DEV_TOP_H__
int block_dev_activate_exported_object(ocf_core_t core);
int block_dev_create_exported_object(ocf_core_t core);
int block_dev_destroy_exported_object(ocf_core_t core);
int block_dev_destroy_all_exported_objects(ocf_cache_t cache);
#endif /* __VOL_BLOCK_DEV_TOP_H__ */

View File

@@ -0,0 +1,2 @@
/home/robert/work/cas/ICAS_Linux/modules/cas_cache/volume/vol_block_dev_top.o-.text-9bd
/home/robert/work/cas/ICAS_Linux/modules/cas_cache/volume/vol_block_dev_top.o-.text-9c4

12
modules/cas_disk/Makefile Normal file
View File

@@ -0,0 +1,12 @@
#
# Copyright(c) 2012-2019 Intel Corporation
# SPDX-License-Identifier: BSD-3-Clause-Clear
#
include $(M)/config.mk
obj-m := cas_disk.o
cas_disk-objs = main.o
cas_disk-objs += disk.o
cas_disk-objs += exp_obj.o
cas_disk-objs += sysfs.o

253
modules/cas_disk/cas_disk.h Normal file
View File

@@ -0,0 +1,253 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __CASDISK_H__
#define __CASDISK_H__
#include <linux/blkdev.h>
/**
* Version of cas_disk interface
*/
#define CASDSK_IFACE_VERSION 2
struct casdsk_disk;
#define CASDSK_BIO_NOT_HANDLED 0
#define CASDSK_BIO_HANDLED 1
struct casdsk_exp_obj_ops {
/**
* @brief Prepare request queue of exported object (top) block device.
* Could be NULL.
*/
int (*prepare_queue)(struct casdsk_disk *dsk, struct request_queue *q,
void *private);
/**
* @brief Cleanup request queue of exported object (top) block device.
* Could be NULL.
*/
void (*cleanup_queue)(struct casdsk_disk *dsk, struct request_queue *q,
void *private);
/**
* @brief Set geometry of exported object (top) block device.
* Could be NULL.
*/
int (*set_geometry)(struct casdsk_disk *dsk, void *private);
/**
* @brief make_request_fn of exported object (top) block device.
* Called by cas_disk when cas_disk device is in attached mode.
*
* @return casdsk_BIO_HANDLED when bio was handled.
* Otherwise casdsk_BIO_NOT_HANDLED. In this case bio will be submitted
* to I/O scheduler and should be handled by request_fn.
*/
int (*make_request_fn)(struct casdsk_disk *dsk, struct request_queue *q,
struct bio *bio, void *private);
/**
* @brief request_fn of exported object (top) block device.
* Called by cas_disk when cas_disk device is in attached mode.
*/
void (*request_fn)(struct casdsk_disk *dsk, struct request_queue *q,
void *private);
/**
* @brief prep_rq_fn of exported object (top) block device.
* Called by cas_disk when cas_disk device is in attached mode.
*/
int (*prep_rq_fn)(struct casdsk_disk *dsk, struct request_queue *q,
struct request *rq, void *private);
/**
* @brief ioctl handler of exported object (top) block device.
* Called by cas_disk when cas_disk device is in attached mode.
*/
int (*ioctl)(struct casdsk_disk *dsk, unsigned int cmd, unsigned long arg,
void *private);
};
/**
* Stored configuration buffer description
*/
struct casdsk_props_conf {
void *buffer;
size_t size;
uint16_t crc;
};
/**
* @brief Get version of cas_disk interface
* @return cas_disk interface version
*/
uint32_t casdsk_get_version(void);
/**
* @brief Store configuration buffers in cas_disk
* @param n_blobs Number of configuration buffers
* @param blobs Array of configuration buffers structures
*/
void casdsk_store_config(size_t n_blobs, struct casdsk_props_conf *blobs);
/**
* @brief Get previously stored configuration buffers
* @param blobs Where to store pointer to configuration buffers array
* @return Number of stored configuration buffers
*/
size_t casdsk_get_stored_config(struct casdsk_props_conf **blobs);
/**
* @brief Free resources related to stored configuration buffers
*/
void casdsk_free_stored_config(void);
/**
* @brief Open block device
* @param path Path to block device
* @param private Private data
* @return Pointer to casdsk_disk related to opened block device
*/
struct casdsk_disk *casdsk_disk_open(const char *path, void *private);
/**
* @brief Claim previously opened block device (holded by cas_disk)
* @param path Path to block device
* @param private Private data
* @return Pointer to casdsk_disk structure related to block device, or NULL
* if device is not opened by cas_disk.
*/
struct casdsk_disk *casdsk_disk_claim(const char *path, void *private);
/**
* @brief Close block device and remove from cas_disk
* @param dsk Pointer to casdsk_disk structure related to block device
* which should be closed.
*/
void casdsk_disk_close(struct casdsk_disk *dsk);
/**
* @brief Get block_device structure of bottom block device
* @param dsk Pointer to casdsk_disk structure related to cas_disk device
* @return Pointer to block_device structure of bottom block device
*/
struct block_device *casdsk_disk_get_blkdev(struct casdsk_disk *dsk);
/**
* @brief Get request queue of bottom block device
* @param dsk Pointer to casdsk_disk structure related to cas_disk device
* @return Pointer to reqest_queue structure of bottom block device
*/
struct request_queue *casdsk_disk_get_queue(struct casdsk_disk *dsk);
/**
* @brief Get gendisk structure of bottom block device
* @param dsk Pointer to casdsk_disk structure related to cas_disk device
* @return Pointer to gendisk structure of bottom block device
*/
struct gendisk *casdsk_disk_get_gendisk(struct casdsk_disk *dsk);
/**
* @brief Prepare cas_disk device to switch to pass-through mode
* @param dsk Pointer to casdsk_disk structure related to cas_disk device
* @return 0 if success, errno if failure
*/
int casdsk_disk_set_pt(struct casdsk_disk *dsk);
/**
* @brief Prepare cas_disk device to switch to attached mode
* @param dsk Pointer to casdsk_disk structure related to cas_disk device
* @return 0 if success, errno if failure
*/
int casdsk_disk_set_attached(struct casdsk_disk *dsk);
/**
* @brief Revert cas_disk device back to attached mode
* @param dsk Pointer to casdsk_disk structure related to cas_disk device
* @return 0 if success, errno if failure
*/
int casdsk_disk_clear_pt(struct casdsk_disk *dsk);
/**
* @brief Dettach cas from cas_disk device
* @param dsk Pointer to casdsk_disk structure related to cas_disk device
* @return 0 if success, errno if failure
*/
int casdsk_disk_dettach(struct casdsk_disk *dsk);
/**
* @brief Attach cas to cas_disk device
* @param dsk Pointer to casdsk_disk structure related to cas_disk device
* @param owner Pointer to cas module
* @param ops Pointer to structure with callback functions
* @return 0 if success, errno if failure
*/
int casdsk_disk_attach(struct casdsk_disk *dsk, struct module *owner,
struct casdsk_exp_obj_ops *ops);
/**
* @brief Create exported object (top device)
* @param dsk Pointer to casdsk_disk structure related to cas_disk device
* @param dev_name Name of exported object (top device)
* @param owner Pointer to cas module
* @param ops Pointer to structure with callback functions
* @return 0 if success, errno if failure
*/
int casdsk_exp_obj_create(struct casdsk_disk *dsk, const char *dev_name,
struct module *owner, struct casdsk_exp_obj_ops *ops);
/**
* @brief Get request queue of exported object (top) block device
* @param dsk Pointer to casdsk_disk structure related to cas_disk device
* @return Pointer to reqest_queue structure of top block device
*/
struct request_queue *casdsk_exp_obj_get_queue(struct casdsk_disk *dsk);
/**
* @brief Get gendisk structure of exported object (top) block device
* @param dsk Pointer to casdsk_disk structure related to cas_disk device
* @return Pointer to gendisk structure of top block device
*/
struct gendisk *casdsk_exp_obj_get_gendisk(struct casdsk_disk *dsk);
/**
* @brief Activate exported object (make it visible to OS
* and allow I/O handling)
* @param dsk Pointer to casdsk_disk structure related to cas_disk device
* @return 0 if success, errno if failure
*/
int casdsk_exp_obj_activate(struct casdsk_disk *dsk);
/**
* @brief Check if exported object is active
* @param dsk Pointer to casdsk_disk structure related to cas_disk device
* @return true if exported object is active
*/
bool casdsk_exp_obj_activated(struct casdsk_disk *ds);
/**
* @brief Lock exported object
* @param dsk Pointer to casdsk_disk structure related to cas_disk device
* @return 0 if success, errno if failure
*/
int casdsk_exp_obj_lock(struct casdsk_disk *dsk);
/**
* @brief Unlock exported object
* @param dsk Pointer to casdsk_disk structure related to cas_disk device
* @return 0 if success, errno if failure
*/
int casdsk_exp_obj_unlock(struct casdsk_disk *dsk);
/**
* @brief Destroy exported object
* @param dsk Pointer to casdsk_disk structure related to cas_disk device
* @return 0 if success, errno if failure
*/
int casdsk_exp_obj_destroy(struct casdsk_disk *dsk);
#endif

View File

@@ -0,0 +1,93 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __CASDISK_DEFS_H__
#define __CASDISK_DEFS_H__
#include <linux/version.h>
#include <linux/fs.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/kobject.h>
#include <cas_version.h>
struct casdsk_stored_config {
size_t n_blobs;
struct casdsk_props_conf *blobs;
};
struct casdsk_module {
struct mutex lock;
struct list_head disk_list;
uint32_t next_disk_id;
int disk_major;
int next_minor;
struct kmem_cache *disk_cache;
struct kmem_cache *exp_obj_cache;
struct kmem_cache *pt_io_ctx_cache;
struct kmem_cache *pending_rqs_cache;
struct kobject kobj;
struct casdsk_stored_config config;
};
extern struct casdsk_module *casdsk_module;
/* prefixes for messages */
#define CASDSK_LOGO "CAS Disk"
#define CASDSK_PREFIX_SHORT "[" CASDSK_LOGO "] "
#define CASDSK_PREFIX_LONG "Cache Acceleration Software Linux"
#define CASDSK_KERN_EMERG KERN_EMERG""CASDSK_PREFIX_SHORT
#define CASDSK_KERN_ALERT KERN_ALERT""CASDSK_PREFIX_SHORT
#define CASDSK_KERN_CRIT KERN_CRIT""CASDSK_PREFIX_SHORT
#define CASDSK_KERN_ERR KERN_ERR""CASDSK_PREFIX_SHORT
#define CASDSK_KERN_WARNING KERN_WARNING""CASDSK_PREFIX_SHORT
#define CASDSK_KERN_NOTICE KERN_NOTICE""CASDSK_PREFIX_SHORT
#define CASDSK_KERN_INFO KERN_INFO""CASDSK_PREFIX_SHORT
#define CASDSK_KERN_DEBUG KERN_DEBUG""CASDSK_PREFIX_SHORT
#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 37)
static inline struct block_device *open_bdev_exclusive(const char *path,
fmode_t mode,
void *holder)
{
return blkdev_get_by_path(path, mode | FMODE_EXCL, holder);
}
static inline void close_bdev_exclusive(struct block_device *bdev, fmode_t mode)
{
blkdev_put(bdev, mode | FMODE_EXCL);
}
static inline int bd_claim_by_disk(struct block_device *bdev, void *holder,
struct gendisk *disk)
{
return bd_link_disk_holder(bdev, disk);
}
static inline void bd_release_from_disk(struct block_device *bdev,
struct gendisk *disk)
{
return bd_unlink_disk_holder(bdev, disk);
}
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0)
#define KRETURN(x) ({ return (x); })
#define MAKE_RQ_RET_TYPE blk_qc_t
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 2, 0)
#define KRETURN(x) return
#define MAKE_RQ_RET_TYPE void
#else
#define KRETURN(x) ({ return (x); })
#define MAKE_RQ_RET_TYPE int
#endif
#include "debug.h"
#endif

45
modules/cas_disk/debug.h Normal file
View File

@@ -0,0 +1,45 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __CASDISK_DEBUG_H__
#define __CASDISK_DEBUG_H__
#undef CASDSK_DEBUG
#ifdef CASDSK_DEBUG
#define CASDSK_DEBUG_TRACE() \
printk(CASDSK_KERN_INFO "%s\n", __func__)
#define CASDSK_DEBUG_DISK_TRACE(dsk) \
printk(CASDSK_KERN_INFO "[%u] %s\n", dsk->id, __func__)
#define CASDSK_DEBUG_MSG(msg) \
printk(CASDSK_KERN_INFO "%s - %s\n", __func__, msg)
#define CASDSK_DEBUG_PARAM(format, ...) \
printk(CASDSK_KERN_INFO "%s - "format"\n", \
__func__, ##__VA_ARGS__)
#define CASDSK_DEBUG_DISK(dsk, format, ...) \
printk(CASDSK_KERN_INFO "[%u] %s - "format"\n", \
dsk->id, \
__func__, ##__VA_ARGS__)
#define CASDSK_DEBUG_ERROR(error, ...) \
CASDSK_DEBUG_PARAM("ERROR(%d) "error, __LINE__, ##__VA_ARGS__)
#define CASDSK_DEBUG_DISK_ERROR(dsk, error, ...) \
CASDSK_DEBUG_DISK(dsk, "ERROR(%d) "error, __LINE__, ##__VA_ARGS__)
#else
#define CASDSK_DEBUG_TRACE()
#define CASDSK_DEBUG_DISK_TRACE(dsk)
#define CASDSK_DEBUG_MSG(msg)
#define CASDSK_DEBUG_PARAM(format, ...)
#define CASDSK_DEBUG_DISK(dsk, format, ...)
#define CASDSK_DEBUG_ERROR(error, ...)
#define CASDSK_DEBUG_DISK_ERROR(dsk, error, ...)
#endif
#endif

452
modules/cas_disk/disk.c Normal file
View File

@@ -0,0 +1,452 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/fs.h>
#include <linux/delay.h>
#include "cas_disk_defs.h"
#include "cas_cache.h"
#include "disk.h"
#include "exp_obj.h"
#include "sysfs.h"
#define CASDSK_DISK_OPEN_FMODE (FMODE_READ | FMODE_WRITE)
static const char * const _casdsk_disk_modes[] = {
[CASDSK_MODE_UNKNOWN] = "unknown",
[CASDSK_MODE_PT] = "pass-through",
[CASDSK_MODE_ATTACHED] = "attached",
[CASDSK_MODE_TRANS_TO_PT] = "attached -> pass-through",
[CASDSK_MODE_TRANS_TO_ATTACHED] = "pass-through -> attached"
};
static void _casdsk_disk_release(struct kobject *kobj)
{
struct casdsk_disk *dsk;
BUG_ON(!kobj);
dsk = casdsk_kobj_to_disk(kobj);
BUG_ON(!dsk);
CASDSK_DEBUG_DISK_TRACE(dsk);
kfree(dsk->path);
kmem_cache_free(casdsk_module->disk_cache, dsk);
}
static ssize_t _casdsk_disk_mode_show(struct kobject *kobj, char *page)
{
struct casdsk_disk *dsk = casdsk_kobj_to_disk(kobj);
CASDSK_DEBUG_DISK_TRACE(dsk);
return scnprintf(page, PAGE_SIZE, "%s",
_casdsk_disk_modes[atomic_read(&dsk->mode)]);
}
static struct casdsk_attribute _casdsk_disk_mode_attr =
__ATTR(mode, S_IRUGO, _casdsk_disk_mode_show, NULL);
static struct attribute *_casdsk_disk_attrs[] = {
&_casdsk_disk_mode_attr.attr,
NULL
};
static struct kobj_type casdsk_disk_ktype = {
.release = _casdsk_disk_release,
.sysfs_ops = &casdsk_sysfs_ops,
.default_attrs = _casdsk_disk_attrs
};
int __init casdsk_init_disks(void)
{
CASDSK_DEBUG_TRACE();
casdsk_module->next_disk_id = 1;
INIT_LIST_HEAD(&casdsk_module->disk_list);
casdsk_module->disk_major = register_blkdev(casdsk_module->disk_major,
"cas");
if (casdsk_module->disk_major <= 0) {
CASDSK_DEBUG_ERROR("Cannot allocate major number");
return -EINVAL;
}
CASDSK_DEBUG_PARAM("Allocated major number: %d", casdsk_module->disk_major);
casdsk_module->disk_cache =
kmem_cache_create("casdsk_disk", sizeof(struct casdsk_disk),
0, 0, NULL);
if (!casdsk_module->disk_cache) {
unregister_blkdev(casdsk_module->disk_major, "cas");
return -ENOMEM;
}
return 0;
}
void casdsk_deinit_disks(void)
{
CASDSK_DEBUG_TRACE();
kmem_cache_destroy(casdsk_module->disk_cache);
unregister_blkdev(casdsk_module->disk_major, "cas");
}
static int _casdsk_disk_init_kobject(struct casdsk_disk *dsk)
{
int result = 0;
kobject_init(&dsk->kobj, &casdsk_disk_ktype);
result = kobject_add(&dsk->kobj, &disk_to_dev(dsk->bd->bd_disk)->kobj,
"cas%d", dsk->id);
if (result)
CASDSK_DEBUG_DISK_ERROR(dsk, "Cannot register kobject");
return result;
}
struct casdsk_disk *casdsk_disk_open(const char *path, void *private)
{
struct casdsk_disk *dsk;
int result = 0;
BUG_ON(!path);
CASDSK_DEBUG_TRACE();
dsk = kmem_cache_zalloc(casdsk_module->disk_cache, GFP_KERNEL);
if (!dsk) {
CASDSK_DEBUG_ERROR("Cannot allocate memory");
result = -ENOMEM;
goto error_kmem;
}
mutex_init(&dsk->lock);
dsk->path = kstrdup(path, GFP_KERNEL);
if (!dsk->path) {
result = -ENOMEM;
goto error_kstrdup;
}
atomic_set(&dsk->mode, CASDSK_MODE_UNKNOWN);
dsk->bd = open_bdev_exclusive(path, CASDSK_DISK_OPEN_FMODE, dsk);
if (IS_ERR(dsk->bd)) {
CASDSK_DEBUG_ERROR("Cannot open exclusive");
result = PTR_ERR(dsk->bd);
goto error_open_bdev;
}
dsk->private = private;
mutex_lock(&casdsk_module->lock);
dsk->id = casdsk_module->next_disk_id++;
list_add(&dsk->list, &casdsk_module->disk_list);
mutex_unlock(&casdsk_module->lock);
result = _casdsk_disk_init_kobject(dsk);
if (result)
goto error_kobject;
CASDSK_DEBUG_DISK(dsk, "Created (%p)", dsk);
return dsk;
error_kobject:
mutex_lock(&casdsk_module->lock);
list_del(&dsk->list);
mutex_unlock(&casdsk_module->lock);
close_bdev_exclusive(dsk->bd, CASDSK_DISK_OPEN_FMODE);
error_open_bdev:
kfree(dsk->path);
error_kstrdup:
kmem_cache_free(casdsk_module->disk_cache, dsk);
error_kmem:
return ERR_PTR(result);
}
EXPORT_SYMBOL(casdsk_disk_open);
static void _casdsk_disk_claim(struct casdsk_disk *dsk, void *private)
{
dsk->private = private;
}
struct casdsk_disk *casdsk_disk_claim(const char *path, void *private)
{
struct list_head *item;
struct casdsk_disk *dsk = NULL;
BUG_ON(!path);
mutex_lock(&casdsk_module->lock);
list_for_each(item, &casdsk_module->disk_list) {
dsk = list_entry(item, struct casdsk_disk, list);
if (strncmp(path, dsk->path, PATH_MAX) == 0) {
_casdsk_disk_claim(dsk, private);
mutex_unlock(&casdsk_module->lock);
return dsk;
}
}
mutex_unlock(&casdsk_module->lock);
return NULL;
}
EXPORT_SYMBOL(casdsk_disk_claim);
static void __casdsk_disk_close(struct casdsk_disk *dsk)
{
close_bdev_exclusive(dsk->bd, CASDSK_DISK_OPEN_FMODE);
casdsk_exp_obj_free(dsk);
kobject_put(&dsk->kobj);
}
void casdsk_disk_close(struct casdsk_disk *dsk)
{
BUG_ON(!dsk);
BUG_ON(!dsk->bd);
CASDSK_DEBUG_DISK(dsk, "Destroying (%p)", dsk);
mutex_lock(&casdsk_module->lock);
list_del(&dsk->list);
mutex_unlock(&casdsk_module->lock);
__casdsk_disk_close(dsk);
}
EXPORT_SYMBOL(casdsk_disk_close);
void __exit casdsk_disk_shutdown_all(void)
{
struct list_head *item, *n;
struct casdsk_disk *dsk;
CASDSK_DEBUG_TRACE();
mutex_lock(&casdsk_module->lock);
list_for_each_safe(item, n, &casdsk_module->disk_list) {
dsk = list_entry(item, struct casdsk_disk, list);
list_del(item);
casdsk_disk_lock(dsk);
BUG_ON(!casdsk_disk_is_pt(dsk) && !casdsk_disk_is_unknown(dsk));
if (casdsk_disk_is_pt(dsk)) {
atomic_set(&dsk->mode, CASDSK_MODE_TRANS_TO_SHUTDOWN);
casdsk_exp_obj_prepare_shutdown(dsk);
}
atomic_set(&dsk->mode, CASDSK_MODE_SHUTDOWN);
if (dsk->exp_obj) {
casdsk_exp_obj_lock(dsk);
casdsk_exp_obj_destroy(dsk);
casdsk_exp_obj_unlock(dsk);
}
casdsk_disk_unlock(dsk);
__casdsk_disk_close(dsk);
}
mutex_unlock(&casdsk_module->lock);
}
struct block_device *casdsk_disk_get_blkdev(struct casdsk_disk *dsk)
{
BUG_ON(!dsk);
return dsk->bd;
}
EXPORT_SYMBOL(casdsk_disk_get_blkdev);
struct gendisk *casdsk_disk_get_gendisk(struct casdsk_disk *dsk)
{
BUG_ON(!dsk);
BUG_ON(!dsk->bd);
return dsk->bd->bd_disk;
}
EXPORT_SYMBOL(casdsk_disk_get_gendisk);
struct request_queue *casdsk_disk_get_queue(struct casdsk_disk *dsk)
{
BUG_ON(!dsk);
BUG_ON(!dsk->bd);
BUG_ON(!dsk->bd->bd_contains);
BUG_ON(!dsk->bd->bd_contains->bd_disk);
return dsk->bd->bd_contains->bd_disk->queue;
}
EXPORT_SYMBOL(casdsk_disk_get_queue);
int casdsk_disk_allocate_minors(int count)
{
int minor = -1;
mutex_lock(&casdsk_module->lock);
if (casdsk_module->next_minor + count <= (1 << MINORBITS)) {
minor = casdsk_module->next_minor;
casdsk_module->next_minor += count;
}
mutex_unlock(&casdsk_module->lock);
return minor;
}
static inline int __casdsk_disk_set_pt(struct casdsk_disk *dsk)
{
BUG_ON(!dsk);
atomic_set(&dsk->mode, CASDSK_MODE_TRANS_TO_PT);
casdsk_exp_obj_prepare_pt(dsk);
return 0;
}
int casdsk_disk_set_pt(struct casdsk_disk *dsk)
{
int result;
CASDSK_DEBUG_DISK_TRACE(dsk);
if (!dsk->exp_obj)
return 0;
casdsk_disk_lock(dsk);
result = __casdsk_disk_set_pt(dsk);
casdsk_disk_unlock(dsk);
return result;
}
EXPORT_SYMBOL(casdsk_disk_set_pt);
static inline int __casdsk_disk_set_attached(struct casdsk_disk *dsk)
{
atomic_set(&dsk->mode, CASDSK_MODE_TRANS_TO_ATTACHED);
casdsk_exp_obj_prepare_attached(dsk);
return 0;
}
int casdsk_disk_set_attached(struct casdsk_disk *dsk)
{
int result;
BUG_ON(!dsk);
CASDSK_DEBUG_DISK_TRACE(dsk);
if (!dsk->exp_obj)
return 0;
casdsk_disk_lock(dsk);
result = __casdsk_disk_set_attached(dsk);
casdsk_disk_unlock(dsk);
return result;
}
EXPORT_SYMBOL(casdsk_disk_set_attached);
static inline int __casdsk_disk_clear_pt(struct casdsk_disk *dsk)
{
BUG_ON(atomic_read(&dsk->mode) != CASDSK_MODE_TRANS_TO_PT);
atomic_set(&dsk->mode, CASDSK_MODE_ATTACHED);
return 0;
}
int casdsk_disk_clear_pt(struct casdsk_disk *dsk)
{
int result;
BUG_ON(!dsk);
CASDSK_DEBUG_DISK_TRACE(dsk);
if (!dsk->exp_obj)
return 0;
casdsk_disk_lock(dsk);
result = __casdsk_disk_clear_pt(dsk);
casdsk_disk_unlock(dsk);
return result;
}
EXPORT_SYMBOL(casdsk_disk_clear_pt);
static inline int __casdsk_disk_dettach(struct casdsk_disk *dsk)
{
int result;
BUG_ON(atomic_read(&dsk->mode) != CASDSK_MODE_TRANS_TO_PT);
atomic_set(&dsk->mode, CASDSK_MODE_PT);
result = casdsk_exp_obj_dettach(dsk);
if (result) {
atomic_set(&dsk->mode, CASDSK_MODE_ATTACHED);
return result;
}
return 0;
}
int casdsk_disk_dettach(struct casdsk_disk *dsk)
{
int result;
BUG_ON(!dsk);
CASDSK_DEBUG_DISK_TRACE(dsk);
if (!dsk->exp_obj)
return 0;
casdsk_disk_lock(dsk);
result = __casdsk_disk_dettach(dsk);
casdsk_disk_unlock(dsk);
return result;
}
EXPORT_SYMBOL(casdsk_disk_dettach);
static inline int __casdsk_disk_attach(struct casdsk_disk *dsk,
struct module *owner, struct casdsk_exp_obj_ops *ops)
{
int result;
BUG_ON(!ops);
BUG_ON(atomic_read(&dsk->mode) != CASDSK_MODE_TRANS_TO_ATTACHED);
result = casdsk_exp_obj_attach(dsk, owner, ops);
if (result) {
atomic_set(&dsk->mode, CASDSK_MODE_PT);
return result;
}
atomic_set(&dsk->mode, CASDSK_MODE_ATTACHED);
return 0;
}
int casdsk_disk_attach(struct casdsk_disk *dsk, struct module *owner,
struct casdsk_exp_obj_ops *ops)
{
int result;
CASDSK_DEBUG_DISK_TRACE(dsk);
if (!dsk->exp_obj)
return 0;
casdsk_disk_lock(dsk);
result = __casdsk_disk_attach(dsk, owner, ops);
casdsk_disk_unlock(dsk);
return result;
}
EXPORT_SYMBOL(casdsk_disk_attach);

96
modules/cas_disk/disk.h Normal file
View File

@@ -0,0 +1,96 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __
#define __CASDISK_DISK_H__
#include <linux/kobject.h>
#include <linux/fs.h>
#include <linux/blkdev.h>
#include <linux/mutex.h>
struct casdsk_exp_obj;
#define CASDSK_MODE_UNKNOWN 0
#define CASDSK_MODE_PT (1 << 0)
#define CASDSK_MODE_ATTACHED (1 << 1)
#define CASDSK_MODE_SHUTDOWN (1 << 2)
#define CASDSK_MODE_TRANSITION (1 << 3)
#define CASDSK_MODE_TRANS_TO_ATTACHED (CASDSK_MODE_PT | CASDSK_MODE_TRANSITION)
#define CASDSK_MODE_TRANS_TO_PT (CASDSK_MODE_ATTACHED | \
CASDSK_MODE_TRANSITION)
#define CASDSK_MODE_TRANS_TO_SHUTDOWN (CASDSK_MODE_SHUTDOWN | \
CASDSK_MODE_TRANSITION)
struct casdsk_disk {
uint32_t id;
atomic_t mode;
char *path;
struct mutex lock;
struct block_device *bd;
int gd_flags;
int gd_minors;
struct casdsk_exp_obj *exp_obj;
struct kobject kobj;
struct list_head list;
void *private;
};
int __init casdsk_init_disks(void);
void casdsk_deinit_disks(void);
void __exit casdsk_disk_shutdown_all(void);
int casdsk_disk_allocate_minors(int count);
static inline void casdsk_disk_lock(struct casdsk_disk *dsk)
{
mutex_lock(&dsk->lock);
}
static inline void casdsk_disk_unlock(struct casdsk_disk *dsk)
{
mutex_unlock(&dsk->lock);
}
static inline struct casdsk_disk *casdsk_kobj_to_disk(struct kobject *kobj)
{
return container_of(kobj, struct casdsk_disk, kobj);
}
static inline bool casdsk_disk_in_transition(struct casdsk_disk *dsk)
{
return (atomic_read(&dsk->mode) & CASDSK_MODE_TRANSITION) ==
CASDSK_MODE_TRANSITION;
}
static inline bool casdsk_disk_is_attached(struct casdsk_disk *dsk)
{
return (atomic_read(&dsk->mode) & CASDSK_MODE_ATTACHED) ==
CASDSK_MODE_ATTACHED;
}
static inline bool casdsk_disk_is_pt(struct casdsk_disk *dsk)
{
return (atomic_read(&dsk->mode) & CASDSK_MODE_PT) == CASDSK_MODE_PT;
}
static inline bool casdsk_disk_is_shutdown(struct casdsk_disk *dsk)
{
return (atomic_read(&dsk->mode) & CASDSK_MODE_SHUTDOWN) ==
CASDSK_MODE_SHUTDOWN;
}
static inline bool casdsk_disk_is_unknown(struct casdsk_disk *dsk)
{
return atomic_read(&dsk->mode) == CASDSK_MODE_UNKNOWN;
}
#endif

842
modules/cas_disk/exp_obj.c Normal file
View File

@@ -0,0 +1,842 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#include <linux/module.h>
#include <linux/blkdev.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/blkpg.h>
#include <linux/elevator.h>
#include "cas_disk_defs.h"
#include "cas_disk.h"
#include "disk.h"
#include "exp_obj.h"
#include "linux_kernel_version.h"
#define CASDSK_DEV_MINORS 16
#define KMEM_CACHE_MIN_SIZE sizeof(void *)
int __init casdsk_init_exp_objs(void)
{
int ncpus;
CASDSK_DEBUG_TRACE();
casdsk_module->exp_obj_cache = kmem_cache_create("casdsk_exp_obj",
sizeof(struct casdsk_exp_obj), 0, 0, NULL);
if (!casdsk_module->exp_obj_cache)
goto error_exp_obj_cache;
ncpus = num_online_cpus();
casdsk_module->pending_rqs_cache =
kmem_cache_create("casdsk_exp_obj_pending_rqs",
((sizeof(atomic_t) * ncpus) < KMEM_CACHE_MIN_SIZE) ?
KMEM_CACHE_MIN_SIZE : (sizeof(atomic_t) * ncpus),
0, 0, NULL);
if (!casdsk_module->pending_rqs_cache)
goto error_pending_rqs_cache;
casdsk_module->pt_io_ctx_cache =
kmem_cache_create("casdsk_exp_obj_pt_io_ctx",
sizeof(struct casdsk_exp_obj_pt_io_ctx),
0, 0, NULL);
if (!casdsk_module->pt_io_ctx_cache)
goto error_pt_io_ctx_cache;
return 0;
error_pt_io_ctx_cache:
kmem_cache_destroy(casdsk_module->pending_rqs_cache);
error_pending_rqs_cache:
kmem_cache_destroy(casdsk_module->exp_obj_cache);
error_exp_obj_cache:
return -ENOMEM;
}
void casdsk_deinit_exp_objs(void)
{
CASDSK_DEBUG_TRACE();
kmem_cache_destroy(casdsk_module->pt_io_ctx_cache);
kmem_cache_destroy(casdsk_module->pending_rqs_cache);
kmem_cache_destroy(casdsk_module->exp_obj_cache);
}
static int _casdsk_exp_obj_prep_rq_fn(struct request_queue *q, struct request *rq)
{
struct casdsk_disk *dsk;;
BUG_ON(!q);
BUG_ON(!q->queuedata);
dsk = q->queuedata;
BUG_ON(!dsk->exp_obj);
if (likely(dsk->exp_obj->ops && dsk->exp_obj->ops->prep_rq_fn))
return dsk->exp_obj->ops->prep_rq_fn(dsk, q, rq, dsk->private);
else
return BLKPREP_OK;
}
static void _casdsk_exp_obj_request_fn(struct request_queue *q)
{
struct casdsk_disk *dsk;
struct request *rq;
BUG_ON(!q);
BUG_ON(!q->queuedata);
dsk = q->queuedata;
BUG_ON(!dsk);
BUG_ON(!dsk->exp_obj);
if (likely(dsk->exp_obj->ops && dsk->exp_obj->ops->request_fn)) {
dsk->exp_obj->ops->request_fn(dsk, q, dsk->private);
} else {
/*
* request_fn() is required, as we can't do any default
* action in attached mode. In PT mode we handle all bios
* directly in make_request_fn(), so request_fn() will not
* be called.
*/
rq = blk_peek_request(q);
BUG_ON(rq);
}
}
static inline void _casdsk_exp_obj_handle_bio_att(struct casdsk_disk *dsk,
struct request_queue *q,
struct bio *bio)
{
int status = CASDSK_BIO_NOT_HANDLED;
if (likely(dsk->exp_obj->ops->make_request_fn))
status = dsk->exp_obj->ops->
make_request_fn(dsk, q, bio, dsk->private);
if (status == CASDSK_BIO_NOT_HANDLED)
dsk->exp_obj->mk_rq_fn(q, bio);
}
DECLARE_BLOCK_CALLBACK(_casdsk_exp_obj_bio_pt_io, struct bio *bio,
unsigned int bytes_done, int error)
{
struct casdsk_exp_obj_pt_io_ctx *io;
BUG_ON(!bio);
BLOCK_CALLBACK_INIT(bio);
io = bio->bi_private;
BUG_ON(!io);
BIO_ENDIO(io->bio, BIO_BISIZE(io->bio),
BLOCK_CALLBACK_ERROR(bio, error));
if (atomic_dec_return(&io->dsk->exp_obj->pt_ios) < 0)
BUG();
bio_put(bio);
kmem_cache_free(casdsk_module->pt_io_ctx_cache, io);
BLOCK_CALLBACK_RETURN();
}
static inline void _casdsk_exp_obj_handle_bio_pt(struct casdsk_disk *dsk,
struct request_queue *q,
struct bio *bio)
{
struct bio *cloned_bio;
struct casdsk_exp_obj_pt_io_ctx *io;
io = kmem_cache_zalloc(casdsk_module->pt_io_ctx_cache, GFP_ATOMIC);
if (!io) {
BIO_ENDIO(bio, BIO_BISIZE(bio), -ENOMEM);
return;
}
cloned_bio = cas_bio_clone(bio, GFP_ATOMIC);
if (!cloned_bio) {
kmem_cache_free(casdsk_module->pt_io_ctx_cache, io);
BIO_ENDIO(bio, BIO_BISIZE(bio), -ENOMEM);
return;
}
io->bio = bio;
io->dsk = dsk;
atomic_inc(&dsk->exp_obj->pt_ios);
CAS_BIO_SET_DEV(cloned_bio, casdsk_disk_get_blkdev(dsk));
cloned_bio->bi_private = io;
cloned_bio->bi_end_io = REFER_BLOCK_CALLBACK(_casdsk_exp_obj_bio_pt_io);
cas_submit_bio(BIO_OP_FLAGS(cloned_bio), cloned_bio);
}
static inline void _casdsk_exp_obj_handle_bio(struct casdsk_disk *dsk,
struct request_queue *q,
struct bio *bio)
{
if (likely(casdsk_disk_is_attached(dsk)))
_casdsk_exp_obj_handle_bio_att(dsk, q, bio);
else if (casdsk_disk_is_pt(dsk))
_casdsk_exp_obj_handle_bio_pt(dsk, q, bio);
else if (casdsk_disk_is_shutdown(dsk))
BIO_ENDIO(bio, BIO_BISIZE(bio), -EIO);
else
BUG();
}
static inline void _casdsk_exp_obj_end_rq(struct casdsk_disk *dsk, unsigned int cpu)
{
return atomic_dec(&dsk->exp_obj->pending_rqs[cpu]);
}
static inline unsigned int _casdsk_exp_obj_begin_rq(struct casdsk_disk *dsk)
{
unsigned int cpu;
BUG_ON(!dsk);
retry:
while (unlikely(casdsk_disk_in_transition(dsk)))
io_schedule();
cpu = smp_processor_id();
atomic_inc(&dsk->exp_obj->pending_rqs[cpu]);
if (unlikely(casdsk_disk_in_transition(dsk))) {
/*
* If we are in transition state, decrement pending rqs counter
* and retry bio processing
*/
_casdsk_exp_obj_end_rq(dsk, cpu);
goto retry;
}
return cpu;
}
static MAKE_RQ_RET_TYPE _casdsk_exp_obj_make_rq_fn(struct request_queue *q,
struct bio *bio)
{
struct casdsk_disk *dsk;
unsigned int cpu;
BUG_ON(!bio);
BUG_ON(!q);
BUG_ON(!q->queuedata);
dsk = q->queuedata;
cpu = _casdsk_exp_obj_begin_rq(dsk);
_casdsk_exp_obj_handle_bio(dsk, q, bio);
_casdsk_exp_obj_end_rq(dsk, cpu);
KRETURN(0);
}
static int _casdsk_get_next_part_no(struct block_device *bd)
{
int part_no = 0;
struct gendisk *disk = bd->bd_disk;
struct disk_part_iter piter;
struct hd_struct *part;
mutex_lock(&bd->bd_mutex);
disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY);
while ((part = disk_part_iter_next(&piter))) {
part_no = part->partno;
break;
}
disk_part_iter_exit(&piter);
mutex_unlock(&bd->bd_mutex);
return part_no;
}
static int _casdsk_del_partitions(struct block_device *bd)
{
int result = 0;
int part_no;
struct blkpg_partition bpart;
struct blkpg_ioctl_arg barg;
memset(&bpart, 0, sizeof(struct blkpg_partition));
memset(&barg, 0, sizeof(struct blkpg_ioctl_arg));
barg.data = (void __force __user *) &bpart;
barg.op = BLKPG_DEL_PARTITION;
while ((part_no = _casdsk_get_next_part_no(bd))) {
bpart.pno = part_no;
result = ioctl_by_bdev(bd, BLKPG, (unsigned long) &barg);
if (result == 0) {
printk(CASDSK_KERN_INFO "Partition %d on %s hidden\n",
part_no, bd->bd_disk->disk_name);
} else {
printk(CASDSK_KERN_ERR "Error(%d) hiding the partition %d on %s\n",
result, part_no, bd->bd_disk->disk_name);
break;
}
}
return result;
}
#ifdef GENHD_FL_NO_PART_SCAN
static int _casdsk_flags = GENHD_FL_NO_PART_SCAN | GENHD_FL_EXT_DEVT;
#else
static int _casdsk_flags = GENHD_FL_EXT_DEVT;
#endif
static int _casdsk_exp_obj_hide_parts(struct casdsk_disk *dsk)
{
struct block_device *bd = casdsk_disk_get_blkdev(dsk);
struct gendisk *gdsk = casdsk_disk_get_gendisk(dsk);
if (bd != bd->bd_contains)
/* It is partition, no more job required */
return 0;
if (disk_max_parts(dsk->bd->bd_disk) > 1) {
if (_casdsk_del_partitions(bd)) {
printk(CASDSK_KERN_ERR "Error deleting a partition on thedevice %s\n",
gdsk->disk_name);
/* Try restore previous partitions by rescaning */
ioctl_by_bdev(bd, BLKRRPART, (unsigned long) NULL);
return -EINVAL;
}
}
/* Save original flags and minors */
dsk->gd_flags = gdsk->flags & _casdsk_flags;
dsk->gd_minors = gdsk->minors;
/* Setup disk of bottom device as not partitioned device */
gdsk->flags &= ~_casdsk_flags;
gdsk->minors = 1;
/* Rescan partitions */
ioctl_by_bdev(bd, BLKRRPART, (unsigned long) NULL);
return 0;
}
static int _casdsk_exp_obj_set_dev_t(struct casdsk_disk *dsk, struct gendisk *gd)
{
int flags;
int minors = disk_max_parts(casdsk_disk_get_gendisk(dsk));
struct block_device *bdev;
bdev = casdsk_disk_get_blkdev(dsk);
BUG_ON(!bdev);
if (bdev->bd_contains != bdev) {
minors = 1;
flags = 0;
} else {
if (_casdsk_exp_obj_hide_parts(dsk))
return -EINVAL;
flags = dsk->gd_flags;
}
gd->first_minor = casdsk_disk_allocate_minors(minors);
if (gd->first_minor < 0) {
CASDSK_DEBUG_DISK_ERROR(dsk, "Cannot allocate %d minors", minors);
return -EINVAL;
}
gd->minors = minors;
gd->major = casdsk_module->disk_major;
gd->flags |= flags;
return 0;
}
static void _casdsk_exp_obj_clear_dev_t(struct casdsk_disk *dsk)
{
struct block_device *bdev = casdsk_disk_get_blkdev(dsk);
struct gendisk *gdsk = casdsk_disk_get_gendisk(dsk);
if (bdev->bd_contains == bdev) {
/* Restore previous configuration of bottom disk */
gdsk->minors = dsk->gd_minors;
gdsk->flags |= dsk->gd_flags;
ioctl_by_bdev(bdev, BLKRRPART, (unsigned long) NULL);
}
}
static const struct block_device_operations _casdsk_exp_obj_ops = {
.owner = THIS_MODULE,
};
static int casdsk_exp_obj_alloc(struct casdsk_disk *dsk)
{
struct casdsk_exp_obj *exp_obj;
int result;
BUG_ON(!dsk);
BUG_ON(dsk->exp_obj);
CASDSK_DEBUG_DISK_TRACE(dsk);
exp_obj = kmem_cache_zalloc(casdsk_module->exp_obj_cache, GFP_KERNEL);
if (!exp_obj) {
CASDSK_DEBUG_ERROR("Cannot allocate memory");
result = -ENOMEM;
goto error_exp_obj_alloc;
}
exp_obj->pending_rqs = kmem_cache_zalloc(casdsk_module->pending_rqs_cache,
GFP_KERNEL);
if (!exp_obj->pending_rqs) {
result = -ENOMEM;
goto error_pending_rqs_alloc;
}
dsk->exp_obj = exp_obj;
return 0;
error_pending_rqs_alloc:
kmem_cache_free(casdsk_module->exp_obj_cache, exp_obj);
error_exp_obj_alloc:
return result;
}
void casdsk_exp_obj_free(struct casdsk_disk *dsk)
{
struct casdsk_exp_obj *exp_obj;
CASDSK_DEBUG_DISK_TRACE(dsk);
exp_obj = dsk->exp_obj;
if (!exp_obj)
return;
kobject_put(&exp_obj->kobj);
dsk->exp_obj = NULL;
}
static void __casdsk_exp_obj_release(struct casdsk_exp_obj *exp_obj)
{
kfree(exp_obj->dev_name);
kmem_cache_free(casdsk_module->pending_rqs_cache, exp_obj->pending_rqs);
kmem_cache_free(casdsk_module->exp_obj_cache, exp_obj);
}
static void _casdsk_exp_obj_release(struct kobject *kobj)
{
struct casdsk_exp_obj *exp_obj;
struct module *owner;
BUG_ON(!kobj);
exp_obj = casdsk_kobj_to_exp_obj(kobj);
BUG_ON(!exp_obj);
CASDSK_DEBUG_TRACE();
owner = exp_obj->owner;
__casdsk_exp_obj_release(exp_obj);
if (owner)
module_put(owner);
}
static struct kobj_type casdsk_exp_obj_ktype = {
.release = _casdsk_exp_obj_release
};
static int _casdsk_exp_obj_init_kobject(struct casdsk_disk *dsk)
{
int result = 0;
struct casdsk_exp_obj *exp_obj = dsk->exp_obj;
kobject_init(&exp_obj->kobj, &casdsk_exp_obj_ktype);
result = kobject_add(&exp_obj->kobj, &dsk->kobj,
"%s", exp_obj->dev_name);
if (result)
CASDSK_DEBUG_DISK_ERROR(dsk, "Cannot register kobject");
return result;
}
int casdsk_exp_obj_create(struct casdsk_disk *dsk, const char *dev_name,
struct module *owner, struct casdsk_exp_obj_ops *ops)
{
struct casdsk_exp_obj *exp_obj;
struct request_queue *queue;
struct gendisk *gd;
int result = 0;
BUG_ON(!owner);
BUG_ON(!dsk);
BUG_ON(!ops);
BUG_ON(dsk->exp_obj);
CASDSK_DEBUG_DISK_TRACE(dsk);
if (strlen(dev_name) >= DISK_NAME_LEN)
return -EINVAL;
result = casdsk_exp_obj_alloc(dsk);
if (result)
goto error_alloc;
exp_obj = dsk->exp_obj;
exp_obj->dev_name = kstrdup(dev_name, GFP_KERNEL);
if (!exp_obj->dev_name) {
__casdsk_exp_obj_release(exp_obj);
result = -ENOMEM;
goto error_strdup;
}
result = _casdsk_exp_obj_init_kobject(dsk);
if (result) {
__casdsk_exp_obj_release(exp_obj);
goto error_kobject;
}
if (!try_module_get(owner)) {
CASDSK_DEBUG_DISK_ERROR(dsk, "Cannot get reference to module");
result = -ENAVAIL;
goto error_module;
}
exp_obj->owner = owner;
exp_obj->ops = ops;
gd = alloc_disk(1);
if (!gd) {
result = -ENOMEM;
goto error_alloc_disk;
}
exp_obj->gd = gd;
result = _casdsk_exp_obj_set_dev_t(dsk, gd);
if (result)
goto error_dev_t;
spin_lock_init(&exp_obj->rq_lock);
queue = blk_init_queue(_casdsk_exp_obj_request_fn, &exp_obj->rq_lock);
if (!queue) {
result = -ENOMEM;
goto error_init_queue;
}
BUG_ON(queue->queuedata);
queue->queuedata = dsk;
exp_obj->queue = queue;
gd->fops = &_casdsk_exp_obj_ops;
gd->queue = queue;
gd->private_data = dsk;
strlcpy(gd->disk_name, exp_obj->dev_name, sizeof(gd->disk_name));
if (exp_obj->ops->prepare_queue) {
result = exp_obj->ops->prepare_queue(dsk, queue, dsk->private);
if (result)
goto error_prepare_queue;
}
blk_queue_prep_rq(queue, _casdsk_exp_obj_prep_rq_fn);
dsk->exp_obj->mk_rq_fn = queue->make_request_fn;
blk_queue_make_request(queue, _casdsk_exp_obj_make_rq_fn);
if (exp_obj->ops->set_geometry) {
result = exp_obj->ops->set_geometry(dsk, dsk->private);
if (result)
goto error_set_geometry;
}
return 0;
error_set_geometry:
if (exp_obj->ops->cleanup_queue)
exp_obj->ops->cleanup_queue(dsk, queue, dsk->private);
error_prepare_queue:
blk_cleanup_queue(queue);
error_init_queue:
_casdsk_exp_obj_clear_dev_t(dsk);
error_dev_t:
put_disk(gd);
error_alloc_disk:
module_put(owner);
dsk->exp_obj->owner = NULL;
error_module:
casdsk_exp_obj_free(dsk);
error_kobject:
error_strdup:
dsk->exp_obj = NULL;
error_alloc:
return result;
}
EXPORT_SYMBOL(casdsk_exp_obj_create);
struct request_queue *casdsk_exp_obj_get_queue(struct casdsk_disk *dsk)
{
BUG_ON(!dsk);
BUG_ON(!dsk->exp_obj);
return dsk->exp_obj->queue;
}
EXPORT_SYMBOL(casdsk_exp_obj_get_queue);
struct gendisk *casdsk_exp_obj_get_gendisk(struct casdsk_disk *dsk)
{
BUG_ON(!dsk);
BUG_ON(!dsk->exp_obj);
return dsk->exp_obj->gd;
}
EXPORT_SYMBOL(casdsk_exp_obj_get_gendisk);
static bool _casdsk_exp_obj_exists(const char *path)
{
struct file *exported;
exported = filp_open(path, O_RDONLY, 0);
if (!exported || IS_ERR(exported)) {
/*failed to open file - it is safe to assume,
* it does not exist
*/
return false;
}
filp_close(exported, NULL);
return true;
}
int casdsk_exp_obj_activate(struct casdsk_disk *dsk)
{
char *path;
int result;
BUG_ON(!dsk);
BUG_ON(!dsk->exp_obj);
BUG_ON(!dsk->exp_obj->gd);
BUG_ON(dsk->exp_obj->activated);
CASDSK_DEBUG_DISK_TRACE(dsk);
path = kmalloc(PATH_MAX, GFP_KERNEL);
if (!path)
return -ENOMEM;
snprintf(path, PATH_MAX, "/dev/%s", dsk->exp_obj->dev_name);
if (_casdsk_exp_obj_exists(path)) {
printk(CASDSK_KERN_ERR "Could not activate exported object, "
"because file %s exists.\n", path);
kfree(path);
return -EEXIST;
}
kfree(path);
dsk->exp_obj->activated = true;
atomic_set(&dsk->mode, CASDSK_MODE_ATTACHED);
add_disk(dsk->exp_obj->gd);
result = bd_claim_by_disk(dsk->bd, dsk, dsk->exp_obj->gd);
if (result)
goto error_bd_claim;
result = sysfs_create_link(&dsk->exp_obj->kobj,
&disk_to_dev(dsk->exp_obj->gd)->kobj,
"blockdev");
if (result)
goto error_sysfs_link;
CASDSK_DEBUG_DISK(dsk, "Activated exp object %s", dsk->exp_obj->dev_name);
return 0;
error_sysfs_link:
bd_release_from_disk(dsk->bd, dsk->exp_obj->gd);
error_bd_claim:
del_gendisk(dsk->exp_obj->gd);
dsk->exp_obj->activated = false;
return result;
}
EXPORT_SYMBOL(casdsk_exp_obj_activate);
bool casdsk_exp_obj_activated(struct casdsk_disk *dsk)
{
BUG_ON(!dsk);
return dsk->exp_obj->activated;
}
EXPORT_SYMBOL(casdsk_exp_obj_activated);
int casdsk_exp_obj_lock(struct casdsk_disk *dsk)
{
struct casdsk_exp_obj *exp_obj;
BUG_ON(!dsk);
BUG_ON(!dsk->exp_obj);
CASDSK_DEBUG_DISK_TRACE(dsk);
exp_obj = dsk->exp_obj;
exp_obj->locked_bd = bdget_disk(exp_obj->gd, 0);
if (!exp_obj->locked_bd)
return -ENAVAIL;
mutex_lock(&exp_obj->locked_bd->bd_mutex);
if (exp_obj->locked_bd->bd_openers) {
printk(CASDSK_KERN_DEBUG "Device %s in use (openers=%d). Refuse to stop\n",
exp_obj->locked_bd->bd_disk->disk_name,
exp_obj->locked_bd->bd_openers);
casdsk_exp_obj_unlock(dsk);
return -EBUSY;
}
return 0;
}
EXPORT_SYMBOL(casdsk_exp_obj_lock);
int casdsk_exp_obj_unlock(struct casdsk_disk *dsk)
{
BUG_ON(!dsk);
BUG_ON(!dsk->exp_obj);
BUG_ON(!dsk->exp_obj->locked_bd);
CASDSK_DEBUG_DISK_TRACE(dsk);
mutex_unlock(&dsk->exp_obj->locked_bd->bd_mutex);
bdput(dsk->exp_obj->locked_bd);
dsk->exp_obj->locked_bd = NULL;
return 0;
}
EXPORT_SYMBOL(casdsk_exp_obj_unlock);
int casdsk_exp_obj_destroy(struct casdsk_disk *dsk)
{
struct casdsk_exp_obj *exp_obj;
BUG_ON(!dsk);
BUG_ON(!dsk->exp_obj);
BUG_ON(!dsk->exp_obj->locked_bd);
CASDSK_DEBUG_DISK_TRACE(dsk);
exp_obj = dsk->exp_obj;
if (casdsk_exp_obj_activated(dsk)) {
sysfs_remove_link(&exp_obj->kobj, "blockdev");
bd_release_from_disk(dsk->bd, exp_obj->gd);
_casdsk_exp_obj_clear_dev_t(dsk);
del_gendisk(exp_obj->gd);
}
if (exp_obj->queue)
blk_cleanup_queue(exp_obj->queue);
atomic_set(&dsk->mode, CASDSK_MODE_UNKNOWN);
put_disk(exp_obj->gd);
return 0;
}
EXPORT_SYMBOL(casdsk_exp_obj_destroy);
int casdsk_exp_obj_dettach(struct casdsk_disk *dsk)
{
module_put(dsk->exp_obj->owner);
dsk->exp_obj->owner = NULL;
dsk->exp_obj->ops = NULL;
return 0;
}
int casdsk_exp_obj_attach(struct casdsk_disk *dsk, struct module *owner,
struct casdsk_exp_obj_ops *ops)
{
if (!try_module_get(owner)) {
CASDSK_DEBUG_DISK_ERROR(dsk, "Cannot get reference to module");
return -ENAVAIL;
}
dsk->exp_obj->owner = owner;
dsk->exp_obj->ops = ops;
return 0;
}
static void _casdsk_exp_obj_wait_for_pending_rqs(struct casdsk_disk *dsk)
{
int i, ncpus;
struct casdsk_exp_obj *exp_obj = dsk->exp_obj;
ncpus = num_online_cpus();
for (i = 0; i < ncpus; i++)
while (atomic_read(&exp_obj->pending_rqs[i]))
schedule();
}
#if LINUX_VERSION_CODE <= KERNEL_VERSION(3, 3, 0)
static void _casdsk_exp_obj_drain_elevator(struct request_queue *q)
{
if (q->elevator && q->elevator->elevator_type)
while (q->elevator->elevator_type->ops.
elevator_dispatch_fn(q, 1))
;
}
#elif LINUX_VERSION_CODE <= KERNEL_VERSION(4, 10, 0)
static void _casdsk_exp_obj_drain_elevator(struct request_queue *q)
{
if (q->elevator && q->elevator->type)
while (q->elevator->type->ops.elevator_dispatch_fn(q, 1))
;
}
#else
static void _casdsk_exp_obj_drain_elevator(struct request_queue *q)
{
if (q->elevator && q->elevator->type)
while (q->elevator->type->ops.sq.elevator_dispatch_fn(q, 1))
;
}
#endif
static void _casdsk_exp_obj_flush_queue(struct casdsk_disk *dsk)
{
struct casdsk_exp_obj *exp_obj = dsk->exp_obj;
struct request_queue *q = exp_obj->queue;
spin_lock_irq(q->queue_lock);
_casdsk_exp_obj_drain_elevator(q);
spin_unlock_irq(q->queue_lock);
blk_run_queue(q);
blk_sync_queue(q);
}
void casdsk_exp_obj_prepare_pt(struct casdsk_disk *dsk)
{
_casdsk_exp_obj_wait_for_pending_rqs(dsk);
_casdsk_exp_obj_flush_queue(dsk);
}
void casdsk_exp_obj_prepare_attached(struct casdsk_disk *dsk)
{
_casdsk_exp_obj_wait_for_pending_rqs(dsk);
while (atomic_read(&dsk->exp_obj->pt_ios))
schedule_timeout(msecs_to_jiffies(200));
}
void casdsk_exp_obj_prepare_shutdown(struct casdsk_disk *dsk)
{
_casdsk_exp_obj_wait_for_pending_rqs(dsk);
while (atomic_read(&dsk->exp_obj->pt_ios))
schedule_timeout(msecs_to_jiffies(200));
}

View File

@@ -0,0 +1,59 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __CASDISK_EXP_OBJ_H__
#define __CASDISK_EXP_OBJ_H__
#include <linux/kobject.h>
#include <linux/fs.h>
struct casdsk_disk;
struct casdsk_exp_obj_pt_io_ctx {
struct casdsk_disk *dsk;
struct bio *bio;
};
struct casdsk_exp_obj {
struct gendisk *gd;
struct request_queue *queue;
spinlock_t rq_lock;
struct block_device *locked_bd;
struct module *owner;
bool activated;
struct casdsk_exp_obj_ops *ops;
make_request_fn *mk_rq_fn;
const char *dev_name;
struct kobject kobj;
atomic_t pt_ios;
atomic_t *pending_rqs;
};
int __init casdsk_init_exp_objs(void);
void casdsk_deinit_exp_objs(void);
void casdsk_exp_obj_free(struct casdsk_disk *dsk);
int casdsk_exp_obj_dettach(struct casdsk_disk *dsk);
int casdsk_exp_obj_attach(struct casdsk_disk *dsk, struct module *owner,
struct casdsk_exp_obj_ops *ops);
void casdsk_exp_obj_prepare_pt(struct casdsk_disk *dsk);
void casdsk_exp_obj_prepare_attached(struct casdsk_disk *dsk);
void casdsk_exp_obj_prepare_shutdown(struct casdsk_disk *dsk);
static inline struct casdsk_exp_obj *casdsk_kobj_to_exp_obj(struct kobject *kobj)
{
return container_of(kobj, struct casdsk_exp_obj, kobj);
}
#endif

View File

@@ -0,0 +1,2 @@
/home/robert/work/cas/ICAS_Linux/modules/cas_disk/exp_obj.o-.text-f20
/home/robert/work/cas/ICAS_Linux/modules/cas_disk/exp_obj.o-.text-f27

165
modules/cas_disk/main.c Normal file
View File

@@ -0,0 +1,165 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/vmalloc.h>
#include "cas_disk_defs.h"
#include "cas_disk.h"
#include "disk.h"
#include "exp_obj.h"
#include "sysfs.h"
/* Layer information. */
MODULE_AUTHOR("Intel(R) Corporation");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_VERSION(CAS_VERSION);
static int iface_version = CASDSK_IFACE_VERSION;
module_param(iface_version, int, (S_IRUSR | S_IRGRP));
static int upgrade_in_progress = 0;
module_param(upgrade_in_progress, int, (S_IRUSR | S_IRGRP));
struct casdsk_module *casdsk_module;
uint32_t casdsk_get_version(void)
{
return CASDSK_IFACE_VERSION;
}
EXPORT_SYMBOL(casdsk_get_version);
static void _casdsk_module_free_config(struct casdsk_module *mod)
{
int i;
if (mod->config.blobs) {
for (i = 0; i < mod->config.n_blobs; i++)
vfree(mod->config.blobs[i].buffer);
kfree(mod->config.blobs);
mod->config.blobs = NULL;
mod->config.n_blobs = 0;
}
}
void casdsk_store_config(size_t n_blobs, struct casdsk_props_conf *blobs)
{
upgrade_in_progress = 1;
_casdsk_module_free_config(casdsk_module);
casdsk_module->config.blobs = blobs;
casdsk_module->config.n_blobs = n_blobs;
}
EXPORT_SYMBOL(casdsk_store_config);
size_t casdsk_get_stored_config(struct casdsk_props_conf **blobs)
{
BUG_ON(!blobs);
*blobs = casdsk_module->config.blobs;
return casdsk_module->config.n_blobs;
}
EXPORT_SYMBOL(casdsk_get_stored_config);
void casdsk_free_stored_config(void)
{
CASDSK_DEBUG_TRACE();
_casdsk_module_free_config(casdsk_module);
upgrade_in_progress = 0;
}
EXPORT_SYMBOL(casdsk_free_stored_config);
static void _casdsk_module_release(struct kobject *kobj)
{
struct casdsk_module *mod;
CASDSK_DEBUG_TRACE();
BUG_ON(!kobj);
mod = container_of(kobj, struct casdsk_module, kobj);
BUG_ON(!mod);
_casdsk_module_free_config(mod);
kfree(mod);
}
static struct kobj_type _casdsk_module_ktype = {
.release = _casdsk_module_release,
};
static int __init casdsk_init_kobjects(void)
{
int result = 0;
CASDSK_DEBUG_TRACE();
kobject_init(&casdsk_module->kobj, &_casdsk_module_ktype);
result = kobject_add(&casdsk_module->kobj, kernel_kobj, "cas_disk");
if (result)
CASDSK_DEBUG_ERROR("Cannot register kobject");
return result;
}
static int __init casdsk_init_module(void)
{
int result = 0;
casdsk_module = kzalloc(sizeof(*casdsk_module), GFP_KERNEL);
if (!casdsk_module) {
result = -ENOMEM;
goto error_kmalloc;
}
mutex_init(&casdsk_module->lock);
mutex_lock(&casdsk_module->lock);
result = casdsk_init_exp_objs();
if (result)
goto error_init_exp_objs;
result = casdsk_init_disks();
if (result)
goto error_init_disks;
result = casdsk_init_kobjects();
if (result)
goto error_kobjects;
mutex_unlock(&casdsk_module->lock);
printk(CASDSK_KERN_INFO "%s Version %s (%s)::Module loaded successfully\n",
CASDSK_LOGO, CAS_VERSION, CAS_KERNEL);
return result;
error_kobjects:
casdsk_deinit_disks();
error_init_disks:
casdsk_deinit_exp_objs();
error_init_exp_objs:
mutex_unlock(&casdsk_module->lock);
kfree(casdsk_module);
error_kmalloc:
return result;
}
module_init(casdsk_init_module);
static void __exit casdsk_deinit_kobjects(void)
{
kobject_put(&casdsk_module->kobj);
}
static void __exit casdsk_exit_module(void)
{
casdsk_disk_shutdown_all();
casdsk_deinit_disks();
casdsk_deinit_exp_objs();
casdsk_deinit_kobjects();
}
module_exit(casdsk_exit_module);

35
modules/cas_disk/sysfs.c Normal file
View File

@@ -0,0 +1,35 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#include "cas_disk_defs.h"
#include "sysfs.h"
static ssize_t _casdsk_sysfs_show(struct kobject *kobj, struct attribute *attr,
char *page)
{
struct casdsk_attribute *casdsk_attr =
container_of(attr, struct casdsk_attribute, attr);
if (!casdsk_attr->show)
return -EIO;
return casdsk_attr->show(kobj, page);
}
static ssize_t _casdsk_sysfs_store(struct kobject *kobj, struct attribute *attr,
const char *buf, size_t len)
{
struct casdsk_attribute *casdsk_attr =
container_of(attr, struct casdsk_attribute, attr);
if (!casdsk_attr->store)
return -EIO;
return casdsk_attr->store(kobj, buf, len);
}
const struct sysfs_ops casdsk_sysfs_ops = {
.show = _casdsk_sysfs_show,
.store = _casdsk_sysfs_store
};

21
modules/cas_disk/sysfs.h Normal file
View File

@@ -0,0 +1,21 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __CASDISK_SYSFS_H__
#define __CASDISK_SYSFS_H__
#include <linux/kobject.h>
#include <linux/sysfs.h>
struct casdsk_disk;
struct casdsk_attribute {
struct attribute attr;
ssize_t (*show)(struct kobject *kobj, char *page);
ssize_t (*store)(struct kobject *kobj, const char *buf, size_t len);
};
extern const struct sysfs_ops casdsk_sysfs_ops;
#endif

94
modules/config.mk Normal file
View File

@@ -0,0 +1,94 @@
#
# Copyright(c) 2012-2019 Intel Corporation
# SPDX-License-Identifier: BSD-3-Clause-Clear
#
VERSION_FILE := $(M)/CAS_VERSION
$(VERSION_FILE):
./CAS_VERSION_GEN
check_cflag=$(shell echo "" | \
gcc -c -xc ${1} -o /dev/null - 2>/dev/null; \
if [ $$? -eq 0 ]; then echo 1; else echo 0; fi; )
-include $(VERSION_FILE)
EXTRA_CFLAGS += -DCAS_VERSION_MAIN=$(CAS_VERSION_MAIN)
EXTRA_CFLAGS += -DCAS_VERSION_MAJOR=$(CAS_VERSION_MAJOR)
EXTRA_CFLAGS += -DCAS_VERSION_MINOR=$(CAS_VERSION_MINOR)
EXTRA_CFLAGS += -DCAS_BUILD_NO=\"$(CAS_BUILD_NO)\"
ifeq ($(strip $(CAS_BUILD_FLAG)),)
else
EXTRA_CFLAGS += -DCAS_BUILD_FLAG=\"$(CAS_BUILD_FLAG)\"
endif
EXTRA_CFLAGS += -O2 -D_FORTIFY_SOURCE=2 -Wformat -Wformat-security
EXTRA_CFLAGS += -I$(M)
EXTRA_CFLAGS += -I$(M)/cas_cache
EXTRA_CFLAGS += -I$(M)/include
EXTRA_CFLAGS += -DCAS_KERNEL=\"$(KERNELRELEASE)\"
check_header=$(shell echo "\#include <${1}>" | \
gcc -c -xc -o /dev/null - 2>/dev/null; \
if [ $$? -eq 0 ]; then echo 1; else echo 0; fi; )
INCDIR = $(PWD)/include
NVME_FULL = 0
SLES ?= $(shell cat /etc/SuSE-release 2>/dev/null)
ifneq ($(SLES),)
EXTRA_CFLAGS += -DCAS_UAPI_LINUX_NVME_IOCTL
EXTRA_CFLAGS += -DCAS_NVME_PARTIAL
EXTRA_CFLAGS += -DCAS_SLES
SLES_VERSION := $(shell cat /etc/os-release |\
sed -n 's/VERSION="\([0-9]\+\)-\(.\+\)"/\1\2/p')
EXTRA_CFLAGS += -DCAS_SLES$(SLES_VERSION)
INCDIR = ""
endif
ifeq ($(call check_header,$(INCDIR)/uapi/nvme.h), 1)
EXTRA_CFLAGS += -DCAS_UAPI_NVME_IOCTL
EXTRA_CFLAGS += -DCAS_UAPI_NVME
EXTRA_CFLAGS += -DCAS_NVME_PARTIAL
endif
ifeq ($(call check_header,$(INCDIR)/uapi/linux/nvme.h), 1)
EXTRA_CFLAGS += -DCAS_UAPI_LINUX_NVME
EXTRA_CFLAGS += -DCAS_NVME_PARTIAL
endif
ifeq ($(call check_header,$(INCDIR)/uapi/linux/nvme_ioctl.h), 1)
EXTRA_CFLAGS += -DCAS_UAPI_LINUX_NVME_IOCTL
EXTRA_CFLAGS += -DCAS_NVME_PARTIAL
ifeq ($(shell cat /etc/redhat-release 2>/dev/null | grep "\(Red Hat\|CentOS\) [a-zA-Z ]* 7\.[45]" | wc -l), 1)
NVME_FULL = 1
endif
endif
KERNEL_VERSION = $(shell echo $(KERNELRELEASE) | cut -d'.' -f1)
KERNEL_MAJOR = $(shell echo $(KERNELRELEASE) | cut -d'.' -f2)
ifeq ($(shell expr $(KERNEL_VERSION) \>= 4 \& $(KERNEL_MAJOR) \> 11),1)
NVME_FULL = 0
endif
ifeq ($(NVME_FULL),1)
EXTRA_CFLAGS += -DCAS_NVME_FULL
endif
EXTRA_CFLAGS += -Werror
EXTRA_LDFLAGS += -z noexecstack -z relro -z now
# workaround for missing objtool in kernel devel package
ifeq ($(shell expr $(KERNEL_VERSION) == 4 \& $(KERNEL_MAJOR) == 14),1)
ifeq ($(CONFIG_STACK_VALIDATION), y)
OBJTOOL=$(shell [ -f tools/objtool/objtool ] && echo "y")
ifneq ($(OBJTOOL), y)
CONFIG_STACK_VALIDATION=
endif
endif
endif
-include $(M)/extra.mk

23
modules/extra.mk Normal file
View File

@@ -0,0 +1,23 @@
#
# Copyright(c) 2012-2019 Intel Corporation
# SPDX-License-Identifier: BSD-3-Clause-Clear
#
ifneq ($(KERNELRELEASE),)
ifeq ($(CAS_EXT_EXP),1)
EXTRA_CFLAGS += -DWI_AVAILABLE
endif
else #KERNELRELEASE
.PHONY: sync distsync
sync:
@cd $(OCFDIR) && $(MAKE) inc O=$(PWD)
@cd $(OCFDIR) && $(MAKE) src O=$(PWD)/cas_cache
distsync:
@cd $(OCFDIR) && $(MAKE) distclean O=$(PWD)
@cd $(OCFDIR) && $(MAKE) distclean O=$(PWD)/cas_cache
endif

View File

@@ -0,0 +1,572 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __CAS_IOCTL_CODES_H__
#define __CAS_IOCTL_CODES_H__
/**
* @file
* @brief Main file for ioctl interface between kernel module and userspace component.
*
* This file contains IOCTL commands, structured passed as parameters to said commands
* and documentation of CAS specific extended error codes (that are a bit more verbose than
* standard errno)
*/
#include "ocf/ocf.h"
#include <linux/limits.h>
/**
* Max path, string size
*/
#define MAX_STR_LEN PATH_MAX
/**
* Max size of elevator name (including null terminator)
*/
#define MAX_ELEVATOR_NAME 16
/** \cond SKIP_IN_DOC */
#define CACHE_LIST_ID_LIMIT 20
#define INVALID_FLUSH_PARAM -1
/** \endcond */
#define CACHE_INIT_NEW 0 /**< initialize new metadata from fresh start */
#define CACHE_INIT_LOAD 1 /**< load existing metadata */
struct kcas_start_cache {
/**
* id of newely inserted cache (in range 1-OCF_CACHE_ID_MAX).
*/
ocf_cache_id_t cache_id;
/**
* cache initialization mode
* valid choices are:
* * CACHE_INIT_NEW
* * CACHE_INIT_LOAD
*/
uint8_t init_cache;
char cache_path_name[MAX_STR_LEN]; /**< path to an ssd*/
/**
* caching mode for new cache instance
* valid choices are:
* * WRITE_THROUGH
* * WRITE_BACK
* * WRITE_AROUND
* * PASS_THROUGH
*/
ocf_cache_mode_t caching_mode;
/**
* eviction policy to be used for newely configured cache instance.
*/
ocf_eviction_t eviction_policy;
uint8_t flush_data; /**< should data be flushed? */
/**
* cache line size
*/
ocf_cache_line_size_t line_size;
uint8_t force; /**< should force option be used? */
uint64_t min_free_ram; /**< Minimum free RAM memory for cache metadata */
uint8_t metadata_mode_optimal; /**< Current metadata mode is optimal */
char cache_elevator[MAX_ELEVATOR_NAME];
int ext_err_code;
};
struct kcas_stop_cache {
ocf_cache_id_t cache_id; /**< id of cache to be stopped */
uint8_t flush_data; /**< should data be flushed? */
int ext_err_code;
};
struct kcas_set_cache_state {
ocf_cache_id_t cache_id; /**< id of cache for which state should be set */
/**
* caching mode for new cache instance
* valid choices are:
* * WRITE_THROUGH
* * WRITE_BACK
* * WRITE_AROUND
* * PASS_THROUGH
*/
ocf_cache_mode_t caching_mode;
uint8_t flush_data; /**< should data be flushed? */
int ext_err_code;
};
struct kcas_insert_core {
ocf_cache_id_t cache_id; /**< id of an running cache */
ocf_core_id_t core_id; /**< id of newely inserted core object */
char core_path_name[MAX_STR_LEN]; /**< path to a core object */
bool try_add; /**< add core to pool if cache isn't present */
bool update_path; /**< provide alternative path for core device */
int ext_err_code;
};
struct kcas_remove_core {
ocf_cache_id_t cache_id; /**< id of an running cache */
ocf_core_id_t core_id; /**< id core object to be removed */
bool force_no_flush; /**< remove core without flushing */
bool detach; /**< detach core without removing it from cache metadata */
int ext_err_code;
};
struct kcas_reset_stats {
ocf_cache_id_t cache_id; /**< id of an running cache */
ocf_core_id_t core_id; /**< id core object to be removed */
int ext_err_code;
};
struct kcas_flush_cache {
ocf_cache_id_t cache_id; /**< id of an running cache */
int ext_err_code;
};
struct kcas_interrupt_flushing {
ocf_cache_id_t cache_id; /**< id of an running cache */
int ext_err_code;
};
struct kcas_flush_core {
ocf_cache_id_t cache_id; /**< id of an running cache */
ocf_core_id_t core_id; /**< id core object to be removed */
int ext_err_code;
};
struct kcas_cache_info {
/** id of a cache */
ocf_cache_id_t cache_id;
/** path to caching device */
char cache_path_name[MAX_STR_LEN];
/**
* IDs of cores associated with this cache.
*/
ocf_core_id_t core_id[OCF_CORE_MAX];
struct ocf_cache_info info;
uint8_t metadata_mode; /**< metadata mode (normal/atomic) */
int ext_err_code;
};
struct kcas_core_info {
/** Path name to underlying device */
char core_path_name[MAX_STR_LEN];
/** Cache id */
ocf_cache_id_t cache_id;
/** Core id */
ocf_core_id_t core_id;
/** CAS statistics of core */
struct ocf_stats_core stats;
ocf_core_state_t state;
int ext_err_code;
};
struct kcas_core_pool_path {
/** Handler to tab with cores path*/
char *core_path_tab;
/** Number of cores in core pool */
int core_pool_count;
int ext_err_code;
};
struct kcas_cache_count {
/** Number of running caches */
int cache_count;
int ext_err_code;
};
struct kcas_core_pool_count {
/** Number of cores in core pool */
int core_pool_count;
int ext_err_code;
};
/**
* IO class info and statistics
*/
struct kcas_io_class {
/** Cache ID */
ocf_cache_id_t cache_id;
/** Core ID */
ocf_core_id_t core_id;
/** IO class id for which info will be retrieved */
uint32_t class_id;
/** IO class info */
struct ocf_io_class_info info;
/** Flag indicating if partition counters should be fetched. */
uint8_t get_stats;
/** IO class statistics */
struct ocf_stats_io_class stats;
int ext_err_code;
};
/**
* IO class settings
*/
struct kcas_io_classes {
/** Cache ID */
ocf_cache_id_t cache_id;
int ext_err_code;
/** IO class info */
struct ocf_io_class_info info[];
};
#define KCAS_IO_CLASSES_SIZE (sizeof(struct kcas_io_classes) \
+ OCF_IO_CLASS_MAX * sizeof(struct ocf_io_class_info))
/**
* structure in which result of KCAS_IOCTL_LIST_CACHE is supplied from kernel module.
*/
struct kcas_cache_list {
/** starting position in dev list for getting cache id */
uint32_t id_position;
/** requested number of ids and returned in response cmd */
uint32_t in_out_num;
/** array with cache list and its properties */
ocf_cache_id_t cache_id_tab[CACHE_LIST_ID_LIMIT];
int ext_err_code;
};
/**
* CAS capabilities.
*/
struct kcas_capabilites {
uint8_t nvme_format : 1;
/**< NVMe format support */
int ext_err_code;
};
struct kcas_upgrade {
int ext_err_code;
};
/**
* Format NVMe namespace.
*/
#define CAS_METADATA_MODE_NORMAL 0
#define CAS_METADATA_MODE_ATOMIC 1
#define CAS_METADATA_MODE_INVALID 255
struct kcas_nvme_format {
char device_path_name[MAX_STR_LEN]; /**< path to NVMe device*/
int metadata_mode; /**< selected metadata mode */
int force;
int ext_err_code;
};
struct kcas_core_pool_remove {
char core_path_name[MAX_STR_LEN]; /**< path to a core object */
int ext_err_code;
};
struct kcas_cache_check_device {
char path_name[MAX_STR_LEN]; /**< path to a device */
bool is_cache_device;
bool clean_shutdown;
bool cache_dirty;
bool format_atomic;
int ext_err_code;
};
enum kcas_core_param_id {
core_param_seq_cutoff_threshold,
core_param_seq_cutoff_policy,
core_param_id_max,
};
struct kcas_set_core_param {
ocf_cache_id_t cache_id;
ocf_core_id_t core_id;
enum kcas_core_param_id param_id;
uint32_t param_value;
int ext_err_code;
};
struct kcas_get_core_param {
ocf_cache_id_t cache_id;
ocf_core_id_t core_id;
enum kcas_core_param_id param_id;
uint32_t param_value;
int ext_err_code;
};
enum kcas_cache_param_id {
cache_param_cleaning_policy_type,
cache_param_cleaning_alru_wake_up_time,
cache_param_cleaning_alru_stale_buffer_time,
cache_param_cleaning_alru_flush_max_buffers,
cache_param_cleaning_alru_activity_threshold,
cache_param_cleaning_acp_wake_up_time,
cache_param_cleaning_acp_flush_max_buffers,
cache_param_id_max,
};
struct kcas_set_cache_param {
ocf_cache_id_t cache_id;
enum kcas_cache_param_id param_id;
uint32_t param_value;
int ext_err_code;
};
struct kcas_get_cache_param {
ocf_cache_id_t cache_id;
enum kcas_cache_param_id param_id;
uint32_t param_value;
int ext_err_code;
};
/*******************************************************************************
* CODE * NAME * STATUS *
*******************************************************************************
* 1 * KCAS_IOCTL_START_CACHE * DEPRECATED *
* 2 * KCAS_IOCTL_STOP_CACHE * OK *
* 3 * KCAS_IOCTL_SET_CACHE_STATE * OK *
* 4 * KCAS_IOCTL_INSERT_CORE * DEPRECATED *
* 5 * KCAS_IOCTL_REMOVE_CORE * DEPRECATED *
* 6 * KCAS_IOCTL_RESET_STATS * OK *
* 7 * KCAS_IOCTL_SET_CLEANING_PARAMETERS * DEPRECATED *
* 8 * KCAS_IOCTL_GET_CLEANING_PARAMETERS * DEPRECATED *
* 9 * KCAS_IOCTL_FLUSH_CACHE * OK *
* 10 * KCAS_IOCTL_INTERRUPT_FLUSHING * OK *
* 11 * KCAS_IOCTL_FLUSH_CORE * OK *
* 12 * KCAS_IOCTL_CACHE_INFO * DEPRECATED *
* 13 * KCAS_IOCTL_CORE_INFO * DEPRECATED *
* 14 * KCAS_IOCTL_PARTITION_STATS * OK *
* 15 * KCAS_IOCTL_PARTITION_SET * OK *
* 16 * KCAS_IOCTL_GET_CACHE_COUNT * OK *
* 17 * KCAS_IOCTL_LIST_CACHE * OK *
* 18 * KCAS_IOCTL_GET_CAPABILITIES * OK *
* 19 * KCAS_IOCTL_UPGRADE * OK *
* 20 * KCAS_IOCTL_NVME_FORMAT * OK *
* 21 * KCAS_IOCTL_START_CACHE * OK *
* 22 * KCAS_IOCTL_INSERT_CORE * OK *
* 23 * KCAS_IOCTL_REMOVE_CORE * OK *
* 24 * KCAS_IOCTL_CACHE_INFO * OK *
* 25 * KCAS_IOCTL_CORE_INFO * OK *
* 26 * KCAS_IOCTL_GET_CORE_POOL_COUNT * OK *
* 27 * KCAS_IOCTL_GET_CORE_POOL_PATHS * OK *
* 28 * KCAS_IOCTL_CORE_POOL_REMOVE * OK *
* 29 * KCAS_IOCTL_CACHE_CHECK_DEVICE * OK *
* 30 * KCAS_IOCTL_SET_CORE_PARAM * OK *
* 31 * KCAS_IOCTL_GET_CORE_PARAM * OK *
* 32 * KCAS_IOCTL_SET_CACHE_PARAM * OK *
* 33 * KCAS_IOCTL_GET_CACHE_PARAM * OK *
*******************************************************************************
*/
/** \cond SKIP_IN_DOC */
#define KCAS_IOCTL_MAGIC (0xBA)
/** \endcond */
/** Stop cache with or without flushing dirty data */
#define KCAS_IOCTL_STOP_CACHE _IOWR(KCAS_IOCTL_MAGIC, 2, struct kcas_stop_cache)
/** Set cache mode (write back, write through etc... */
#define KCAS_IOCTL_SET_CACHE_STATE _IOR(KCAS_IOCTL_MAGIC, 3, struct kcas_set_cache_state)
/** Reset statistic counters for given cache object */
#define KCAS_IOCTL_RESET_STATS _IOR(KCAS_IOCTL_MAGIC, 6, struct kcas_reset_stats)
/** Flush dirty data from an running cache instance that
* is or was running in write-back mode */
#define KCAS_IOCTL_FLUSH_CACHE _IOWR(KCAS_IOCTL_MAGIC, 9, struct kcas_flush_cache)
/** Interrupt dirty block flushing operation */
#define KCAS_IOCTL_INTERRUPT_FLUSHING _IOWR(KCAS_IOCTL_MAGIC, 10, struct kcas_interrupt_flushing)
/* Flush dirty data from an running core object
* that is or was running in write-back mode */
#define KCAS_IOCTL_FLUSH_CORE _IOR(KCAS_IOCTL_MAGIC, 11, struct kcas_flush_core)
/** Retrieving partition status for specified cache id and partition id */
#define KCAS_IOCTL_PARTITION_STATS _IOWR(KCAS_IOCTL_MAGIC, 14, struct kcas_io_class)
/** Configure partitions for specified cache id */
#define KCAS_IOCTL_PARTITION_SET _IOWR(KCAS_IOCTL_MAGIC, 15, struct kcas_io_classes)
/** Obtain number of valid cache ids within running open cas instance */
#define KCAS_IOCTL_GET_CACHE_COUNT _IOR(KCAS_IOCTL_MAGIC, 16, struct kcas_cache_count)
/** List valid cache ids within Open CAS module */
#define KCAS_IOCTL_LIST_CACHE _IOWR(KCAS_IOCTL_MAGIC, 17, struct kcas_cache_list)
/** Provides capabilites of installed open cas module */
#define KCAS_IOCTL_GET_CAPABILITIES _IOWR(KCAS_IOCTL_MAGIC, 18, struct kcas_capabilites)
/** Start upgrade in flight procedure */
#define KCAS_IOCTL_UPGRADE _IOR(KCAS_IOCTL_MAGIC, 19, struct kcas_upgrade)
/** Format NVMe namespace to support selected metadata mode */
#define KCAS_IOCTL_NVME_FORMAT _IOWR(KCAS_IOCTL_MAGIC, 20, struct kcas_nvme_format)
/** Start new cache instance, load cache or recover cache */
#define KCAS_IOCTL_START_CACHE _IOWR(KCAS_IOCTL_MAGIC, 21, struct kcas_start_cache)
/** Add core object to an running cache instance */
#define KCAS_IOCTL_INSERT_CORE _IOWR(KCAS_IOCTL_MAGIC, 22, struct kcas_insert_core)
/** Remove core object from an running cache instance */
#define KCAS_IOCTL_REMOVE_CORE _IOR(KCAS_IOCTL_MAGIC, 23, struct kcas_remove_core)
/** Retrieve properties of a running cache instance (incl. mode etc.) */
#define KCAS_IOCTL_CACHE_INFO _IOWR(KCAS_IOCTL_MAGIC, 24, struct kcas_cache_info)
/** Rretrieve statisting of a given core object */
#define KCAS_IOCTL_CORE_INFO _IOWR(KCAS_IOCTL_MAGIC, 25, struct kcas_core_info)
/** Get core pool count */
#define KCAS_IOCTL_GET_CORE_POOL_COUNT _IOR(KCAS_IOCTL_MAGIC, 26, struct kcas_core_pool_count)
/** Ret paths from devices which are in core pool */
#define KCAS_IOCTL_GET_CORE_POOL_PATHS _IOWR(KCAS_IOCTL_MAGIC, 27, struct kcas_core_pool_path)
/** Remove device from core pool */
#define KCAS_IOCTL_CORE_POOL_REMOVE _IOWR(KCAS_IOCTL_MAGIC, 28, struct kcas_core_pool_remove)
/** Check if given device is initialized cache device */
#define KCAS_IOCTL_CACHE_CHECK_DEVICE _IOWR(KCAS_IOCTL_MAGIC, 29, struct kcas_cache_check_device)
/** Set various core runtime parameters */
#define KCAS_IOCTL_SET_CORE_PARAM _IOW(KCAS_IOCTL_MAGIC, 30, struct kcas_set_core_param)
/** Get various core runtime parameters */
#define KCAS_IOCTL_GET_CORE_PARAM _IOW(KCAS_IOCTL_MAGIC, 31, struct kcas_get_core_param)
/** Set various cache runtime parameters */
#define KCAS_IOCTL_SET_CACHE_PARAM _IOW(KCAS_IOCTL_MAGIC, 32, struct kcas_set_cache_param)
/** Get various cache runtime parameters */
#define KCAS_IOCTL_GET_CACHE_PARAM _IOW(KCAS_IOCTL_MAGIC, 33, struct kcas_get_cache_param)
/**
* Extended kernel CAS error codes
*/
enum kcas_error {
/** Must be root */
KCAS_ERR_ROOT = 2000000,
/** System Error */
KCAS_ERR_SYSTEM,
/** Range parameters are invalid */
KCAS_ERR_BAD_RANGE,
/** Illegal range, out of device space */
KCAS_ERR_DEV_SPACE,
/** Invalid ioctl */
KCAS_ERR_INV_IOCTL,
/** Device opens or mount are pending to this cache */
KCAS_ERR_DEV_PENDING,
/** NVMe Cache device contains dirty data. */
KCAS_ERR_DIRTY_EXISTS_NVME,
/** Could not create exported object because file in /dev directory
* exists
*/
KCAS_ERR_FILE_EXISTS,
/** CAS is under upgrade */
KCAS_ERR_IN_UPGRADE,
/** Cache device sector size is greater than core device %s sector size
*/
KCAS_ERR_UNALIGNED,
/** No caches configuration for upgrade in flight */
KCAS_ERR_NO_STORED_CONF,
/** Cannot roll-back previous configuration */
KCAS_ERR_ROLLBACK,
/** Device is not NVMe */
KCAS_ERR_NOT_NVME,
/** Failed to format NVMe device */
KCAS_ERR_FORMAT_FAILED,
/** NVMe is formatted to unsupported format */
KCAS_ERR_NVME_BAD_FORMAT,
/** Device contains partitions */
KCAS_ERR_CONTAINS_PART,
/** Given device is a partition */
KCAS_ERR_A_PART,
/** Core has been removed with flush error */
KCAS_ERR_REMOVED_DIRTY,
/** Cache has been stopped, but it may contain dirty data */
KCAS_ERR_STOPPED_DIRTY,
/** Core pool is not empty */
KCAS_ERR_CORE_POOL_NOT_EMPTY,
/** No caching device is attached */
KCAS_ERR_NO_CACHE_ATTACHED,
/** Invalid syntax of classification rule */
KCAS_ERR_CLS_RULE_INVALID_SYNTAX,
/** Condition token does not identify any known condition */
KCAS_ERR_CLS_RULE_UNKNOWN_CONDITION,
};
#endif

View File

@@ -0,0 +1,30 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __CAS_VERSION_H__
#define __CAS_VERSION_H__
#if !defined(CAS_BUILD_NO)
#error "You must define build number for version"
#endif
#define STR_PREP(x) #x
#define PR_STR(x) STR_PREP(x)
#define FMT_VERSION(x) "0" PR_STR(x)
#ifdef CAS_BUILD_FLAG
#define CAS_VERSION_FLAG "-"CAS_BUILD_FLAG
#else
#define CAS_VERSION_FLAG ""
#endif
#define CAS_VERSION \
FMT_VERSION(CAS_VERSION_MAIN) "." \
FMT_VERSION(CAS_VERSION_MAJOR) "." \
FMT_VERSION(CAS_VERSION_MINOR) "." \
CAS_BUILD_NO \
CAS_VERSION_FLAG
#endif

1422
modules/tags Normal file

File diff suppressed because it is too large Load Diff