Initial commit

Signed-off-by: Robert Baldyga <robert.baldyga@intel.com>
This commit is contained in:
Robert Baldyga
2019-03-29 08:39:34 +01:00
commit 94e8ca09e0
140 changed files with 37144 additions and 0 deletions

View File

@@ -0,0 +1,53 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __OBJ_BLK_H__
#define __OBJ_BLK_H__
#include "vol_atomic_dev_bottom.h"
#include "vol_block_dev_bottom.h"
#include "vol_block_dev_top.h"
struct casdsk_disk;
struct bd_object {
struct casdsk_disk *dsk;
struct block_device *btm_bd;
/**
* This denotes state of volatile write cache of the device.
* This is set to true when:
* - opening the device
* - when writing to a device without FUA/FLUSH flags
* This is set to false when:
* - FLUSH request is completed on device.
* When it is false
* - FLUSH requests from upper layer are NOT passed to the device.
*/
atomic_t potentially_dirty;
uint32_t expobj_valid : 1;
/*!< Bit indicates that exported object was created */
uint32_t expobj_locked : 1;
/*!< Non zero value indicates data exported object is locked */
uint32_t opened_by_bdev : 1;
/*!< Opened by supplying bdev manually */
struct atomic_dev_params atomic_params;
atomic64_t pending_rqs;
/*!< This fields describes in flight IO requests */
struct workqueue_struct *workqueue;
/*< Workqueue for internally trigerred I/O */
};
static inline struct bd_object *bd_object(ocf_volume_t vol)
{
return ocf_volume_get_priv(vol);
}
#endif /* __OBJ_BLK_H__ */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,31 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __VOL_ATOMIC_DEV_BOTTOM_H__
#define __VOL_ATOMIC_DEV_BOTTOM_H__
#include "../cas_cache.h"
enum atomic_metadata_mode {
ATOMIC_METADATA_MODE_ELBA,
ATOMIC_METADATA_MODE_SEPBUF,
ATOMIC_METADATA_MODE_NONE,
};
struct atomic_dev_params {
unsigned int nsid;
uint64_t size;
enum atomic_metadata_mode metadata_mode;
unsigned is_mode_optimal : 1;
/* IMPORTANT: If this field is 0, the other fields are invalid! */
unsigned is_atomic_capable : 1;
};
int atomic_dev_init(void);
void atomic_dev_deinit(void);
#endif /* __VOL_ATOMIC_DEV_BOTTOM_H__ */

View File

@@ -0,0 +1,470 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#include "vol_blk_utils.h"
static void cas_io_iter_advanced(struct bio_vec_iter *iter, uint32_t bytes)
{
BUG_ON(bytes > iter->len);
iter->len -= bytes;
iter->offset += bytes;
if (iter->len) {
/* Still in this item, bytes to be processed */
return;
}
/* Move to next item in data vector */
iter->idx++;
if (iter->idx < iter->vec_size) {
iter->ivec = &iter->vec[iter->idx];
iter->len = iter->ivec->bv_len;
iter->offset = iter->ivec->bv_offset;
} else {
iter->ivec = NULL;
iter->len = 0;
iter->offset = 0;
}
}
uint32_t cas_io_iter_cpy(struct bio_vec_iter *dst, struct bio_vec_iter *src,
uint32_t bytes)
{
uint32_t to_copy, written = 0;
void *adst, *asrc;
if (dst->idx >= dst->vec_size)
return 0;
BUG_ON(dst->offset + dst->len > PAGE_SIZE);
if (src->idx >= src->vec_size)
return 0;
BUG_ON(src->offset + src->len > PAGE_SIZE);
while (bytes) {
to_copy = min(dst->len, src->len);
to_copy = min(to_copy, bytes);
if (to_copy == 0) {
/* No more bytes for coping */
break;
}
adst = page_address(dst->ivec->bv_page) + dst->offset;
asrc = page_address(src->ivec->bv_page) + src->offset;
memcpy(adst, asrc, to_copy);
bytes -= to_copy;
written += to_copy;
cas_io_iter_advanced(dst, to_copy);
cas_io_iter_advanced(src, to_copy);
}
return written;
}
uint32_t cas_io_iter_cpy_from_data(struct bio_vec_iter *dst,
const void *src, uint32_t bytes)
{
uint32_t to_copy, written = 0;
void *adst;
const void *asrc;
if (dst->idx >= dst->vec_size)
return 0;
BUG_ON(dst->offset + dst->len > PAGE_SIZE);
while (bytes) {
to_copy = min(dst->len, bytes);
if (to_copy == 0) {
/* No more bytes for coping */
break;
}
adst = page_address(dst->ivec->bv_page) + dst->offset;
asrc = src + written;
memcpy(adst, asrc, to_copy);
bytes -= to_copy;
written += to_copy;
cas_io_iter_advanced(dst, to_copy);
}
return written;
}
uint32_t cas_io_iter_cpy_to_data(void *dst, struct bio_vec_iter *src,
uint32_t bytes)
{
uint32_t to_copy, written = 0;
void *adst, *asrc;
BUG_ON(dst == NULL);
if (src->idx >= src->vec_size)
return 0;
BUG_ON(src->offset + src->len > PAGE_SIZE);
while (bytes) {
to_copy = min(bytes, src->len);
if (to_copy == 0) {
/* No more bytes for coping */
break;
}
adst = dst + written;
asrc = page_address(src->ivec->bv_page) + src->offset;
memcpy(adst, asrc, to_copy);
bytes -= to_copy;
written += to_copy;
cas_io_iter_advanced(src, to_copy);
}
return written;
}
uint32_t cas_io_iter_move(struct bio_vec_iter *iter, uint32_t bytes)
{
uint32_t to_move, moved = 0;
if (iter->idx >= iter->vec_size)
return 0;
BUG_ON(iter->offset + iter->len > PAGE_SIZE);
while (bytes) {
to_move = min(iter->len, bytes);
if (to_move == 0) {
/* No more bytes for coping */
break;
}
bytes -= to_move;
moved += to_move;
cas_io_iter_advanced(iter, to_move);
}
return moved;
}
uint32_t cas_io_iter_zero(struct bio_vec_iter *dst, uint32_t bytes)
{
uint32_t to_fill, zeroed = 0;
void *adst;
if (dst->idx >= dst->vec_size)
return 0;
BUG_ON(dst->offset + dst->len > PAGE_SIZE);
while (bytes) {
to_fill = min(dst->len, (typeof(dst->len))PAGE_SIZE);
if (to_fill == 0) {
/* No more bytes for coping */
break;
}
adst = page_address(dst->ivec->bv_page) + dst->offset;
memset(adst, 0, to_fill);
bytes -= to_fill;
zeroed += to_fill;
cas_io_iter_advanced(dst, to_fill);
}
return zeroed;
}
/*
*
*/
int cas_blk_io_set_data(struct ocf_io *io,
ctx_data_t *ctx_data, uint32_t offset)
{
struct blkio *blkio = cas_io_to_blkio(io);
struct blk_data *data = ctx_data;
/* Set BIO vector (IO data) and initialize iterator */
blkio->data = data;
if (blkio->data) {
cas_io_iter_init(&blkio->iter, blkio->data->vec,
blkio->data->size);
/* Move into specified offset in BIO vector iterator */
if (offset != cas_io_iter_move(&blkio->iter, offset)) {
/* TODO Log message */
blkio->error = -ENOBUFS;
return -ENOBUFS;
}
}
return 0;
}
/*
*
*/
ctx_data_t *cas_blk_io_get_data(struct ocf_io *io)
{
struct blkio *blkio = cas_io_to_blkio(io);
return blkio->data;
}
#if defined(CAS_NVME_PARTIAL)
#include "utils/utils_nvme.h"
int cas_blk_identify_type_by_bdev(struct block_device *bdev,
uint8_t *type, struct atomic_dev_params *atomic_params)
{
struct nvme_id_ns *ns;
unsigned int nsid, selected, ms, ds, pi, elba, sbsupp;
long long int ret = 0;
struct atomic_dev_params atomic_params_int = {0};
ns = kmalloc(sizeof(*ns), GFP_KERNEL);
if (!ns)
return -OCF_ERR_NO_MEM;
ret = cas_nvme_get_nsid(bdev, &nsid);
if (ret < 0) {
/*
* We cannot obtain NSID which means we are not dealing with
* NVMe device
*/
goto out1;
}
ret = cas_nvme_identify_ns(bdev, nsid, ns);
if (ret < 0) {
/*
* We cannot obtain ns structure which means we ARE dealing with
* NVMe device but can not recognize format so let's treat that
* device as block device
*/
goto out1;
}
selected = ns->flbas & 0xf;
ms = ns->lbaf[selected].ms;
ds = ns->lbaf[selected].ds;
pi = ns->dps & 0x7;
elba = !!(ns->flbas & (1<<4));
sbsupp = !!(ns->mc & (1<<1));
atomic_params_int.is_atomic_capable = 1;
atomic_params_int.nsid = nsid;
atomic_params_int.size = (ns->nsze << (ds - 9)) * SECTOR_SIZE;
if (pi != 0) {
/* We don't support formats which have
* enable Protection Information feature.
*/
ret = -KCAS_ERR_NVME_BAD_FORMAT;
goto out2;
}
switch (ms) {
case 0:
/* NVMe metadata features disabled, so we handle it as
* regular block device
*/
if (ds != 9 && ds != 12) {
ret = -KCAS_ERR_NVME_BAD_FORMAT;
goto out2;
}
*type = BLOCK_DEVICE_VOLUME;
atomic_params_int.metadata_mode = ATOMIC_METADATA_MODE_NONE;
#if !defined(CAS_NVME_FULL)
/*
* Only partial support user can't using
* device in atomic mode, so mode is optimal
*/
atomic_params_int.is_mode_optimal = 1;
break;
#else
if (bdev == bdev->bd_contains) {
/*
* Entire device - format isn't optimal
*/
atomic_params_int.is_mode_optimal = 0;
} else {
/*
* Partition - format is optimal, user can't using
* partitions in atomic mode
*/
atomic_params_int.is_mode_optimal = 1;
}
break;
case 8:
/* For atomic writes we support only metadata size 8B and
* data size 512B
*/
if (ds != 9) {
ret = -KCAS_ERR_NVME_BAD_FORMAT;
goto out2;
}
*type = ATOMIC_DEVICE_VOLUME;
atomic_params_int.metadata_mode = elba ?
ATOMIC_METADATA_MODE_ELBA :
ATOMIC_METADATA_MODE_SEPBUF;
atomic_params_int.is_mode_optimal = sbsupp ? !elba : 1;
break;
#endif
default:
ret = -KCAS_ERR_NVME_BAD_FORMAT;
}
if (atomic_params)
*atomic_params = atomic_params_int;
goto out2;
out1:
*type = BLOCK_DEVICE_VOLUME;
ret = 0;
out2:
kfree(ns);
return ret;
}
static inline int _cas_detect_blk_type(const char *path, uint8_t *type,
struct atomic_dev_params *atomic_params)
{
int ret;
struct block_device *bdev;
char holder[] = "CAS DETECT\n";
bdev = OPEN_BDEV_EXCLUSIVE(path, FMODE_READ, holder);
if (IS_ERR(bdev))
return -OCF_ERR_NOT_OPEN_EXC;
ret = cas_blk_identify_type_by_bdev(bdev, type, atomic_params);
CLOSE_BDEV_EXCLUSIVE(bdev, FMODE_READ);
return ret;
}
#else
static inline int _cas_detect_blk_type(const char *path, uint8_t *type,
struct atomic_dev_params *atomic_params)
{
/*
* NVMe is not supported with given kernel version, so we
* have no way to figure out what the current NVMe format
* is. In this situation we make a naive assumption that
* it's formatted to LBA size 512B, and try to treat it
* as regular block device.
*/
*type = BLOCK_DEVICE_VOLUME;
return 0;
}
int cas_blk_identify_type_by_bdev(struct block_device *bdev,
uint8_t *type, struct atomic_dev_params *atomic_params)
{
*type = BLOCK_DEVICE_VOLUME;
return 0;
}
#endif
int cas_blk_open_volume_by_bdev(ocf_volume_t *vol,
struct block_device *bdev)
{
struct atomic_dev_params atomic_params = {0};
struct bd_object *bdobj;
uint8_t type;
int ret;
ret = cas_blk_identify_type_by_bdev(bdev, &type, &atomic_params);
if (ret)
goto err;
ret = ocf_ctx_volume_create(cas_ctx, vol, NULL, type);
if (ret)
goto err;
bdobj = bd_object(*vol);
bdobj->btm_bd = bdev;
bdobj->opened_by_bdev = true;
ocf_volume_open(*vol);
return 0;
err:
return ret;
}
void cas_blk_close_volume(ocf_volume_t vol)
{
ocf_volume_close(vol);
ocf_volume_deinit(vol);
env_free(vol);
}
int _cas_blk_identify_type(const char *path, uint8_t *type,
struct atomic_dev_params *atomic_params)
{
struct file *file;
int result = 0;
file = filp_open(path, O_RDONLY, 0);
if (IS_ERR(file))
return -OCF_ERR_INVAL_VOLUME_TYPE;
if (S_ISBLK(FILE_INODE(file)->i_mode))
*type = BLOCK_DEVICE_VOLUME;
else if (S_ISCHR(FILE_INODE(file)->i_mode))
*type = NVME_CONTROLLER;
else
result = -OCF_ERR_INVAL_VOLUME_TYPE;
filp_close(file, 0);
if (result)
return result;
if (*type == BLOCK_DEVICE_VOLUME) {
result = _cas_detect_blk_type(path, type, atomic_params);
if (result < 0)
return result;
}
return 0;
}
int cas_blk_identify_type(const char *path, uint8_t *type)
{
return _cas_blk_identify_type(path, type, NULL);
}
int cas_blk_identify_type_atomic(const char *path, uint8_t *type,
struct atomic_dev_params *atomic_params)
{
return _cas_blk_identify_type(path, type, atomic_params);
}

View File

@@ -0,0 +1,148 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __VOL_BLK_UTILS_H__
#define __VOL_BLK_UTILS_H__
#include "obj_blk.h"
#include "context.h"
static inline bool cas_blk_is_flush_io(unsigned long flags)
{
if ((flags & OCF_WRITE_FLUSH) == OCF_WRITE_FLUSH)
return true;
if ((flags & OCF_WRITE_FLUSH_FUA) == OCF_WRITE_FLUSH_FUA)
return true;
return false;
}
struct blkio {
int error;
atomic_t rq_remaning;
atomic_t ref_counter;
int32_t dirty;
int32_t dir;
struct blk_data *data; /* IO data buffer */
/* BIO vector iterator for sending IO */
struct bio_vec_iter iter;
};
static inline struct blkio *cas_io_to_blkio(struct ocf_io *io)
{
return ocf_io_get_priv(io);
}
int cas_blk_io_set_data(struct ocf_io *io, ctx_data_t *data,
uint32_t offset);
ctx_data_t *cas_blk_io_get_data(struct ocf_io *io);
int cas_blk_identify_type_by_bdev(struct block_device *bdev,
uint8_t *type, struct atomic_dev_params *atomic_params);
int cas_blk_open_volume_by_bdev(ocf_volume_t *vol,
struct block_device *bdev);
void cas_blk_close_volume(ocf_volume_t vol);
int cas_blk_identify_type(const char *path, uint8_t *type);
int cas_blk_identify_type_atomic(const char *path, uint8_t *type,
struct atomic_dev_params *atomic_params);
static inline void cas_io_iter_init(struct bio_vec_iter *iter,
struct bio_vec *vec, uint32_t vec_size)
{
iter->vec = iter->ivec = vec;
iter->vec_size = vec_size;
iter->idx = 0;
iter->offset = vec->bv_offset;
iter->len = vec->bv_len;
}
static inline void cas_io_iter_set(struct bio_vec_iter *iter,
struct bio_vec *vec, uint32_t vec_size,
uint32_t idx, uint32_t offset, uint32_t len)
{
iter->vec = vec;
iter->vec_size = vec_size;
iter->idx = idx;
iter->offset = offset;
iter->len = len;
if (iter->idx < vec_size) {
iter->ivec = &vec[iter->idx];
} else {
iter->ivec = NULL;
WARN(1, "Setting offset out of BIO vector");
}
}
static inline void cas_io_iter_copy_set(struct bio_vec_iter *dst,
struct bio_vec_iter *src)
{
dst->vec = src->vec;
dst->vec_size = src->vec_size;
dst->idx = src->idx;
dst->offset = src->offset;
dst->len = src->len;
dst->ivec = src->ivec;
}
static inline bool cas_io_iter_is_next(struct bio_vec_iter *iter)
{
return iter->idx < iter->vec_size ? true : false;
/* TODO UNITTEST */
}
static inline uint32_t cas_io_iter_size_done(struct bio_vec_iter *iter)
{
return iter->idx;
/* TODO UNITTEST */
}
static inline uint32_t cas_io_iter_size_left(struct bio_vec_iter *iter)
{
if (iter->idx < iter->vec_size)
return iter->vec_size - iter->idx;
return 0;
/* TODO UNITTEST */
}
static inline uint32_t cas_io_iter_current_offset(struct bio_vec_iter *iter)
{
return iter->idx < iter->vec_size ? iter->offset : 0;
/* TODO UNITTEST */
}
static inline uint32_t cas_io_iter_current_length(struct bio_vec_iter *iter)
{
return iter->idx < iter->vec_size ? iter->len : 0;
/* TODO UNITTEST */
}
static inline struct page *cas_io_iter_current_page(struct bio_vec_iter *iter)
{
return iter->idx < iter->vec_size ? iter->ivec->bv_page : NULL;
/* TODO UNITTEST */
}
uint32_t cas_io_iter_cpy(struct bio_vec_iter *dst, struct bio_vec_iter *src,
uint32_t bytes);
uint32_t cas_io_iter_cpy_from_data(struct bio_vec_iter *dst,
const void *src, uint32_t bytes);
uint32_t cas_io_iter_cpy_to_data(void *dst, struct bio_vec_iter *src,
uint32_t bytes);
uint32_t cas_io_iter_move(struct bio_vec_iter *iter,
uint32_t bytes);
uint32_t cas_io_iter_zero(struct bio_vec_iter *iter, uint32_t bytes);
#endif /* __VOL_BLK_UTILS_H__ */

View File

@@ -0,0 +1,597 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#include "cas_cache.h"
#define CAS_DEBUG_IO 0
#if CAS_DEBUG_IO == 1
#define CAS_DEBUG_TRACE() printk(KERN_DEBUG \
"[IO] %s:%d\n", __func__, __LINE__)
#define CAS_DEBUG_MSG(msg) printk(KERN_DEBUG \
"[IO] %s:%d - %s\n", __func__, __LINE__, msg)
#define CAS_DEBUG_PARAM(format, ...) printk(KERN_DEBUG \
"[IO] %s:%d - "format"\n", __func__, __LINE__, ##__VA_ARGS__)
#else
#define CAS_DEBUG_TRACE()
#define CAS_DEBUG_MSG(msg)
#define CAS_DEBUG_PARAM(format, ...)
#endif
int block_dev_open_object(ocf_volume_t vol)
{
struct bd_object *bdobj = bd_object(vol);
const struct ocf_volume_uuid *uuid = ocf_volume_get_uuid(vol);
struct casdsk_disk *dsk;
if (bdobj->opened_by_bdev) {
/* Bdev has beed set manually, so there is nothing to do. */
return 0;
}
if (unlikely(true == cas_upgrade_is_in_upgrade())) {
dsk = casdisk_functions.casdsk_disk_claim(uuid->data, NULL);
casdisk_functions.casdsk_disk_set_attached(dsk);
} else {
dsk = casdisk_functions.casdsk_disk_open(uuid->data, NULL);
}
if (IS_ERR_OR_NULL(dsk)) {
int error = PTR_ERR(dsk) ?: -EINVAL;
if (error == -EBUSY)
error = -OCF_ERR_NOT_OPEN_EXC;
return error;
}
bdobj->dsk = dsk;
bdobj->btm_bd = casdisk_functions.casdsk_disk_get_blkdev(dsk);
return 0;
}
void block_dev_close_object(ocf_volume_t vol)
{
struct bd_object *bdobj = bd_object(vol);
if (bdobj->opened_by_bdev)
return;
if (likely(!cas_upgrade_is_in_upgrade())) {
casdisk_functions.casdsk_disk_close(bdobj->dsk);
} else {
casdisk_functions.casdsk_disk_set_pt(bdobj->dsk);
casdisk_functions.casdsk_disk_dettach(bdobj->dsk);
}
}
unsigned int block_dev_get_max_io_size(ocf_volume_t vol)
{
struct bd_object *bdobj = bd_object(vol);
struct block_device *bd = bdobj->btm_bd;
return queue_max_sectors(bd->bd_disk->queue) << SECTOR_SHIFT;
}
uint64_t block_dev_get_byte_length(ocf_volume_t vol)
{
struct bd_object *bdobj = bd_object(vol);
struct block_device *bd = bdobj->btm_bd;
uint64_t sector_length;
sector_length = (bd->bd_contains == bd) ?
get_capacity(bd->bd_disk) :
bd->bd_part->nr_sects;
return sector_length << SECTOR_SHIFT;
}
#if LINUX_VERSION_CODE <= KERNEL_VERSION(3, 3, 0)
static char *__block_dev_get_elevator_name(struct request_queue *q)
{
if (q->elevator->elevator_type == NULL)
return NULL;
if (q->elevator->elevator_type->elevator_name == NULL)
return NULL;
if (q->elevator->elevator_type->elevator_name[0] == 0)
return NULL;
return q->elevator->elevator_type->elevator_name;
}
#else
static char *__block_dev_get_elevator_name(struct request_queue *q)
{
if (q->elevator->type == NULL)
return NULL;
if (q->elevator->type->elevator_name == NULL)
return NULL;
if (q->elevator->type->elevator_name[0] == 0)
return NULL;
return q->elevator->type->elevator_name;
}
#endif
/*
*
*/
const char *block_dev_get_elevator_name(struct request_queue *q)
{
if (!q)
return NULL;
if (q->elevator == NULL)
return NULL;
return __block_dev_get_elevator_name(q);
}
/*
*
*/
int block_dev_is_metadata_mode_optimal(struct atomic_dev_params *atomic_params,
uint8_t type)
{
if (type == BLOCK_DEVICE_VOLUME) {
if (atomic_params->is_atomic_capable)
return atomic_params->is_mode_optimal;
} else if (type == ATOMIC_DEVICE_VOLUME) {
return atomic_params->is_mode_optimal;
}
return 1;
}
/*
*
*/
static inline struct bio *cas_bd_io_alloc_bio(struct blkio *bdio)
{
struct bio *bio
= bio_alloc(GFP_NOIO, cas_io_iter_size_left(&bdio->iter));
if (bio)
return bio;
if (cas_io_iter_size_left(&bdio->iter) < MAX_LINES_PER_IO) {
/* BIO vector was small, so it was memory
* common problem - NO RAM!!!
*/
return NULL;
}
/* Retry with smaller */
return bio_alloc(GFP_NOIO, MAX_LINES_PER_IO);
}
/*
*
*/
static void cas_bd_io_end(struct ocf_io *io, int error)
{
struct blkio *bdio = cas_io_to_blkio(io);
if (error)
bdio->error |= error;
if (atomic_dec_return(&bdio->rq_remaning))
return;
CAS_DEBUG_MSG("Completion");
/* Send completion to caller */
io->end(io, bdio->error);
/* Free allocated structures */
ocf_io_put(io);
}
/*
*
*/
DECLARE_BLOCK_CALLBACK(cas_bd_io_end, struct bio *bio,
unsigned int bytes_done, int error)
{
struct ocf_io *io;
struct blkio *bdio;
struct bd_object *bdobj;
int err;
BUG_ON(!bio);
BUG_ON(!bio->bi_private);
BLOCK_CALLBACK_INIT(bio);
io = bio->bi_private;
bdobj = bd_object(io->volume);
BUG_ON(!bdobj);
err = BLOCK_CALLBACK_ERROR(bio, error);
bdio = cas_io_to_blkio(io);
BUG_ON(!bdio);
CAS_DEBUG_TRACE();
if (err)
goto out;
if (bdio->dir == OCF_WRITE) {
/* IO was a write */
if (!cas_blk_is_flush_io(io->flags)) {
/* Device cache is dirty, mark it */
atomic_inc(&bdobj->potentially_dirty);
} else {
/* IO flush finished, update potential
* dirty state
*/
atomic_sub(bdio->dirty, &bdobj->potentially_dirty);
}
}
out:
if (err == -EOPNOTSUPP && (BIO_OP_FLAGS(bio) & CAS_BIO_DISCARD))
err = 0;
cas_bd_io_end(io, err);
bio_put(bio);
BLOCK_CALLBACK_RETURN();
}
static void block_dev_submit_flush(struct ocf_io *io)
{
#ifdef CAS_FLUSH_SUPPORTED
struct blkio *blkio = cas_io_to_blkio(io);
struct bd_object *bdobj = bd_object(io->volume);
struct block_device *bdev = bdobj->btm_bd;
struct request_queue *q = bdev_get_queue(bdev);
struct bio *bio = NULL;
blkio->dirty = atomic_read(&bdobj->potentially_dirty);
/* Prevent races of completing IO */
atomic_set(&blkio->rq_remaning, 1);
/* Increase IO reference counter for FLUSH IO */
ocf_io_get(io);
if (!blkio->dirty) {
/* Didn't write anything to underlying disk; no need to
* send req_flush
*/
goto out;
}
if (q == NULL) {
/* No queue, error */
blkio->error = -EINVAL;
goto out;
}
if (!CHECK_QUEUE_FLUSH(q)) {
/* This block device does not support flush, call back */
atomic_sub(blkio->dirty, &bdobj->potentially_dirty);
goto out;
}
bio = bio_alloc(GFP_NOIO, 0);
if (bio == NULL) {
CAS_PRINT_RL(KERN_ERR "Couldn't allocate memory for BIO\n");
blkio->error = -ENOMEM;
goto out;
}
blkio->dir = io->dir;
bio->bi_end_io = REFER_BLOCK_CALLBACK(cas_bd_io_end);
CAS_BIO_SET_DEV(bio, bdev);
bio->bi_private = io;
atomic_inc(&blkio->rq_remaning);
cas_submit_bio(OCF_WRITE_FLUSH, bio);
out:
cas_bd_io_end(io, blkio->error);
#else
/* Running operating system without support for REQ_FLUSH
* (i.e. SLES 11 SP 1) CAS cannot use flushing requests to
* handle power-fail safe Write-Back
*/
io->end(io, -ENOTSUPP);
/* on SLES 11 SP 1 powerfail safety can only be achieved
* through disabling volatile write cache of disk itself.
*/
#endif
}
void block_dev_submit_discard(struct ocf_io *io)
{
struct blkio *blkio = cas_io_to_blkio(io);
struct bd_object *bdobj = bd_object(io->volume);
struct block_device *bd = bdobj->btm_bd;
struct request_queue *q = bdev_get_queue(bd);
struct bio *bio = NULL;
unsigned int max_discard_sectors, granularity, bio_sects;
int alignment;
sector_t sects, start, end, tmp;
/* Prevent races of completing IO */
atomic_set(&blkio->rq_remaning, 1);
/* Increase IO reference counter for FLUSH IO */
ocf_io_get(io);
if (!q) {
/* No queue, error */
blkio->error = -ENXIO;
goto out;
}
if (!blk_queue_discard(q)) {
/* Discard is not supported by bottom device, send completion
* to caller
*/
goto out;
}
granularity = max(q->limits.discard_granularity >> SECTOR_SHIFT, 1U);
alignment = (bdev_discard_alignment(bd) >> SECTOR_SHIFT) % granularity;
max_discard_sectors =
min(q->limits.max_discard_sectors, UINT_MAX >> SECTOR_SHIFT);
max_discard_sectors -= max_discard_sectors % granularity;
if (unlikely(!max_discard_sectors))
goto out;
sects = io->bytes >> SECTOR_SHIFT;
start = io->addr >> SECTOR_SHIFT;
while (sects) {
bio = bio_alloc(GFP_NOIO, 1);
if (!bio) {
CAS_PRINT_RL(CAS_KERN_ERR "Couldn't allocate memory for BIO\n");
blkio->error = -ENOMEM;
break;
}
bio_sects = min_t(sector_t, sects, max_discard_sectors);
end = start + bio_sects;
tmp = end;
if (bio_sects < sects &&
sector_div(tmp, granularity) != alignment) {
end = end - alignment;
sector_div(end, granularity);
end = end * granularity + alignment;
bio_sects = end - start;
}
CAS_BIO_SET_DEV(bio, bd);
BIO_BISECTOR(bio) = start;
BIO_BISIZE(bio) = bio_sects << SECTOR_SHIFT;
bio->bi_next = NULL;
bio->bi_private = io;
bio->bi_end_io = REFER_BLOCK_CALLBACK(cas_bd_io_end);
atomic_inc(&blkio->rq_remaning);
cas_submit_bio(CAS_BIO_DISCARD, bio);
sects -= bio_sects;
start = end;
cond_resched();
}
out:
cas_bd_io_end(io, blkio->error);
}
static inline bool cas_bd_io_prepare(int *dir, struct ocf_io *io)
{
struct blkio *bdio = cas_io_to_blkio(io);
struct bd_object *bdobj = bd_object(io->volume);
/* Setup DIR */
bdio->dir = *dir;
/* Save dirty counter */
bdio->dirty = atomic_read(&bdobj->potentially_dirty);
/* Convert CAS direction into kernel values */
switch (bdio->dir) {
case OCF_READ:
*dir = READ;
break;
case OCF_WRITE:
*dir = WRITE;
break;
default:
bdio->error = -EINVAL;
break;
}
if (!io->bytes) {
/* Don not accept empty request */
CAS_PRINT_RL(KERN_ERR "Invalid zero size IO\n");
bdio->error = -EINVAL;
}
if (bdio->error)
return false;
return true;
}
/*
*
*/
static void block_dev_submit_io(struct ocf_io *io)
{
struct blkio *bdio = cas_io_to_blkio(io);
struct bd_object *bdobj = bd_object(io->volume);
struct bio_vec_iter *iter = &bdio->iter;
uint64_t addr = io->addr;
uint32_t bytes = io->bytes;
int dir = io->dir;
if (!CAS_IS_WRITE_FLUSH_FUA(io->flags) &&
CAS_IS_WRITE_FLUSH(io->flags)) {
CAS_DEBUG_MSG("Flush request");
/* It is flush requests handle it */
block_dev_submit_flush(io);
return;
}
CAS_DEBUG_PARAM("Address = %llu, bytes = %u\n", bdio->addr,
bdio->bytes);
/* Increase IO reference */
ocf_io_get(io);
/* Prevent races of completing IO */
atomic_set(&bdio->rq_remaning, 1);
if (!cas_bd_io_prepare(&dir, io)) {
CAS_DEBUG_MSG("Invalid request");
cas_bd_io_end(io, -EINVAL);
return;
}
while (cas_io_iter_is_next(iter) && bytes) {
/* Still IO vectors to be sent */
/* Allocate BIO */
struct bio *bio = cas_bd_io_alloc_bio(bdio);
if (!bio) {
bdio->error = -ENOMEM;
break;
}
/* Setup BIO */
CAS_BIO_SET_DEV(bio, bdobj->btm_bd);
BIO_BISECTOR(bio) = addr / SECTOR_SIZE;
bio->bi_next = NULL;
bio->bi_private = io;
BIO_OP_FLAGS(bio) |= io->flags;
BIO_SET_RW_FLAGS(bio);
bio->bi_end_io = REFER_BLOCK_CALLBACK(cas_bd_io_end);
/* Add pages */
while (cas_io_iter_is_next(iter) && bytes) {
struct page *page = cas_io_iter_current_page(iter);
uint32_t offset = cas_io_iter_current_offset(iter);
uint32_t length = cas_io_iter_current_length(iter);
int added;
if (length > bytes)
length = bytes;
added = bio_add_page(bio, page, length, offset);
BUG_ON(added < 0);
if (added == 0) {
/* No more space in BIO, stop adding pages */
break;
}
/* Update address, bytes sent */
bytes -= added;
addr += added;
/* Update BIO vector iterator */
if (added != cas_io_iter_move(iter, added)) {
bdio->error = -ENOBUFS;
break;
}
}
if (bdio->error == 0) {
/* Increase IO reference for sending this IO */
atomic_inc(&bdio->rq_remaning);
/* Send BIO */
CAS_DEBUG_MSG("Submit IO");
cas_submit_bio(dir, bio);
bio = NULL;
} else {
if (bio) {
bio_put(bio);
bio = NULL;
}
/* ERROR, stop processed */
break;
}
}
if (bytes && bdio->error == 0) {
/* Not all bytes sent, mark error */
bdio->error = -ENOBUFS;
}
/* Prevent races of completing IO when
* there are still child IOs not being send.
*/
cas_bd_io_end(io, 0);
}
const struct ocf_volume_properties cas_object_blk_properties = {
.name = "Block Device",
.io_priv_size = sizeof(struct blkio),
.volume_priv_size = sizeof(struct bd_object),
.caps = {
.atomic_writes = 0, /* Atomic writes not supported */
},
.ops = {
.submit_io = block_dev_submit_io,
.submit_flush = block_dev_submit_flush,
.submit_metadata = NULL,
.submit_discard = block_dev_submit_discard,
.open = block_dev_open_object,
.close = block_dev_close_object,
.get_max_io_size = block_dev_get_max_io_size,
.get_length = block_dev_get_byte_length,
},
.io_ops = {
.set_data = cas_blk_io_set_data,
.get_data = cas_blk_io_get_data,
},
};
int block_dev_init(void)
{
int ret;
ret = ocf_ctx_register_volume_type(cas_ctx, BLOCK_DEVICE_VOLUME,
&cas_object_blk_properties);
if (ret < 0)
return ret;
return 0;
}
void block_dev_deinit(void)
{
ocf_ctx_unregister_volume_type(cas_ctx, BLOCK_DEVICE_VOLUME);
}
int block_dev_try_get_io_class(struct bio *bio, int *io_class)
{
struct ocf_io *io;
if (bio->bi_end_io != REFER_BLOCK_CALLBACK(cas_bd_io_end))
return -1;
io = bio->bi_private;
*io_class = io->io_class;
return 0;
}

View File

@@ -0,0 +1,26 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __VOL_BLOCK_DEV_BOTTOM_H__
#define __VOL_BLOCK_DEV_BOTTOM_H__
#include "../cas_cache.h"
int block_dev_open_object(ocf_volume_t vol);
void block_dev_close_object(ocf_volume_t vol);
const char *block_dev_get_elevator_name(struct request_queue *q);
int block_dev_is_metadata_mode_optimal(struct atomic_dev_params *atomic_params,
uint8_t type);
int block_dev_try_get_io_class(struct bio *bio, int *io_class);
int block_dev_init(void);
void block_dev_deinit(void);
#endif /* __VOL_BLOCK_DEV_BOTTOM_H__ */

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,17 @@
/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#ifndef __VOL_BLOCK_DEV_TOP_H__
#define __VOL_BLOCK_DEV_TOP_H__
int block_dev_activate_exported_object(ocf_core_t core);
int block_dev_create_exported_object(ocf_core_t core);
int block_dev_destroy_exported_object(ocf_core_t core);
int block_dev_destroy_all_exported_objects(ocf_cache_t cache);
#endif /* __VOL_BLOCK_DEV_TOP_H__ */

View File

@@ -0,0 +1,2 @@
/home/robert/work/cas/ICAS_Linux/modules/cas_cache/volume/vol_block_dev_top.o-.text-9bd
/home/robert/work/cas/ICAS_Linux/modules/cas_cache/volume/vol_block_dev_top.o-.text-9c4