open-cas-linux/modules/cas_cache/volume/vol_atomic_dev_bottom.c
Michal Rakowski 2ff31a6abf cas_cache: use underscore instead of space in volume type name
Signed-off-by: Michal Rakowski <michal.rakowski@intel.com>
2020-01-16 17:47:57 +01:00

1191 lines
27 KiB
C

/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#include "cas_cache.h"
#include "utils/utils_mpool.h"
#if defined(CAS_NVME_FULL)
#include <linux/nvme.h>
#include <linux/blk-mq.h>
#define CAS_DEBUG_IO_ATOMIC 0
#if 1 == CAS_DEBUG_IO_ATOMIC
#define CAS_DEBUG_TRACE() printk(KERN_DEBUG \
"[IO][ATOMIC] %s:%d\n", __func__, __LINE__)
#define CAS_DEBUG_MSG(msg) printk(KERN_DEBUG \
"[IO][ATOMIC] %s:%d - %s\n", __func__, __LINE__, msg)
#define CAS_DEBUG_PARAM(format, ...) printk(KERN_DEBUG \
"[IO][ATOMIC] %s:%d - "format"\n", __func__, \
__LINE__, ##__VA_ARGS__)
#else
#define CAS_DEBUG_TRACE()
#define CAS_DEBUG_MSG(msg)
#define CAS_DEBUG_PARAM(format, ...)
#endif
#define ADMIN_TIMEOUT (60 * HZ)
struct cas_atomic_io {
ocf_volume_t volume;
struct cas_atomic_io *master;
atomic_t req_remaining;
atomic_t potential_dirty;
uint32_t count;
uint64_t addr;
uint32_t bytes;
uint32_t start;
uint32_t end;
int error;
unsigned dir:1;
unsigned metadata:1;
unsigned discard:1;
unsigned long flags;
ocf_end_io_t cmpl_fn;
void *cmpl_context;
struct blk_data *data;
uint32_t bvec_size;
struct nvme_command cmd;
struct bio *bio;
struct request *request;
struct bio_vec_iter iter;
};
static struct cas_mpool *atomic_io_allocator;
static inline uint32_t cas_atomic_max_io_sectors(void)
{
/* TODO Take into account max IO size of bottom device */
return 128 * KiB / (SECTOR_SIZE + OCF_ATOMIC_METADATA_SIZE);
}
static inline uint32_t cas_atomic_size_of(uint32_t size)
{
BUG_ON(size % SECTOR_SIZE);
return size + (size / SECTOR_SIZE * OCF_ATOMIC_METADATA_SIZE);
}
static void cas_atomic_dealloc(struct cas_atomic_io *atomics)
{
uint32_t i;
for (i = 0; i < atomics->count; i++) {
struct cas_atomic_io *this = &atomics[i];
if (this->request && !IS_ERR(this->request)) {
blk_mq_free_request(this->request);
this->request = NULL;
}
if (this->bio)
bio_put(this->bio);
if (this->data) {
cas_ctx_data_secure_erase(this->data);
cas_ctx_data_free(this->data);
}
}
cas_mpool_del(atomic_io_allocator, atomics, atomics->count);
}
static struct cas_atomic_io *cas_atomic_alloc(int dir, struct ocf_io *io, bool write_zero)
{
/* Get max size of IO */
const uint32_t max_io_size = cas_atomic_max_io_sectors()
* SECTOR_SIZE;
/* Get number of IOs to be issued */
uint32_t ios_count;
ocf_cache_t cache = ocf_volume_get_cache(ocf_io_get_volume(io));
uint64_t addr = io->addr;
uint32_t i, bytes = io->bytes;
uint32_t increase_sectors_start = 0, increase_sectors_end = 0;
struct cas_atomic_io *atoms;
if (dir == OCF_WRITE && !write_zero) {
/* TODO: this logic is probably no longer required */
BUG_ON(!cache);
increase_sectors_start =
ocf_metadata_check_invalid_before(cache, addr);
increase_sectors_end =
ocf_metadata_check_invalid_after(cache, addr,
io->bytes);
increase_sectors_start *= 512;
increase_sectors_end *= 512;
if (increase_sectors_start) {
bytes += increase_sectors_start;
addr -= increase_sectors_start;
}
if (increase_sectors_end)
bytes += increase_sectors_end;
}
/* Get number of IOs to be issued */
ios_count = DIV_ROUND_UP(bytes, max_io_size);
atoms = cas_mpool_new(atomic_io_allocator,
ios_count);
if (!atoms)
return NULL;
CAS_DEBUG_PARAM("Addr = %llu, bytes = %u", io->addr, io->bytes);
/* setup master IO */
atomic_set(&atoms->req_remaining, ios_count);
atoms->count = ios_count;
atoms->cmpl_fn = io->end;
atoms->cmpl_context = io;
for (i = 0; i < ios_count; i++) {
struct cas_atomic_io *this = &atoms[i];
this->master = atoms;
this->addr = addr;
this->bytes = min(bytes, max_io_size);
this->dir = dir;
this->flags = io->flags;
this->volume = ocf_io_get_volume(io);
CAS_DEBUG_PARAM("Sub-atomic IO (%u), Addr = %llu, bytes = %u",
i, this->addr, this->bytes);
addr += this->bytes;
bytes -= this->bytes;
/* Allocate BIO data vector with pages */
this->bvec_size = cas_atomic_size_of(this->bytes);
this->bvec_size = DIV_ROUND_UP(this->bvec_size, PAGE_SIZE);
if (write_zero || increase_sectors_start ||
increase_sectors_end)
this->data = cas_ctx_data_zalloc(this->bvec_size);
else
this->data = cas_ctx_data_alloc(this->bvec_size);
if (!this->data)
goto cas_atomic_alloc_ERROR;
/* Set length of last page */
this->data->vec[this->bvec_size - 1].bv_len =
cas_atomic_size_of(this->bytes) % PAGE_SIZE;
CAS_DEBUG_PARAM("Sub-atomic IO (%u), BIO vector size = %u, "
"alignment %u", i, this->bvec_size,
this->data->vec[this->bvec_size - 1].bv_len);
this->start = min(this->bytes, increase_sectors_start);
increase_sectors_start -= this->start;
}
BUG_ON(bytes);
for (i = ios_count; i && increase_sectors_end; i--) {
struct cas_atomic_io *this = &atoms[i - 1];
this->end = min(this->bytes, increase_sectors_end);
increase_sectors_end -= this->end;
}
return atoms;
cas_atomic_alloc_ERROR:
if (atoms)
cas_atomic_dealloc(atoms);
return NULL;
}
static int cas_atomic_rd_complete(struct cas_atomic_io *atom)
{
struct bio_vec_iter *dst, src;
uint32_t copied;
const uint32_t size = OCF_ATOMIC_METADATA_SIZE;
uint32_t bytes = atom->bytes;
CAS_DEBUG_TRACE();
/* Initialize iterators */
cas_io_iter_init(&src, atom->data->vec, atom->bvec_size);
dst = &atom->iter;
BUG_ON(bytes % SECTOR_SIZE);
BUG_ON(size != OCF_ATOMIC_METADATA_SIZE);
copied = 0;
while (bytes) {
/* Copy data */
copied += cas_io_iter_cpy(dst, &src, SECTOR_SIZE);
/* Omit metadata */
copied += cas_io_iter_move(&src, size);
bytes -= SECTOR_SIZE;
}
/* Validate if copied proper numbers of bytes */
if (copied != cas_atomic_size_of(atom->bytes)) {
CAS_DEBUG_PARAM("ERROR, copied %u, expected = %u",
copied, cas_atomic_size_of(atom->bytes));
/* Metadata and data coping problem */
return -EIO;
}
return 0;
}
static int cas_atomic_rd_metadata_complete(struct cas_atomic_io *atom)
{
struct bio_vec_iter *dst, src;
uint32_t copied;
const uint32_t size = OCF_ATOMIC_METADATA_SIZE;
uint32_t bytes = atom->bytes;
CAS_DEBUG_TRACE();
/* Initialize iterators */
cas_io_iter_init(&src, atom->data->vec, atom->bvec_size);
dst = &atom->iter;
BUG_ON(bytes % SECTOR_SIZE);
BUG_ON(size != OCF_ATOMIC_METADATA_SIZE);
copied = 0;
while (bytes) {
/* Copy data */
copied += cas_io_iter_move(&src, SECTOR_SIZE);
/* Omit metadata */
copied += cas_io_iter_cpy(dst, &src, size);
bytes -= SECTOR_SIZE;
}
/* Validate if copied proper numbers of bytes */
if (copied != cas_atomic_size_of(atom->bytes)) {
CAS_DEBUG_PARAM("ERROR, copied %u, expected = %u",
copied, cas_atomic_size_of(atom->bytes));
/* Metadata and data coping problem */
return -EIO;
}
return 0;
}
static int cas_atomic_rd_prepare(struct ocf_io *io,
struct cas_atomic_io *atom)
{
struct blkio *blkio = cas_io_to_blkio(io);
uint32_t moved;
/* Store BIO vector iterator, when read completed then it will be
* known were place data
*/
cas_io_iter_copy_set(&atom->iter, &blkio->iter);
/* Move iterator for next IO */
moved = cas_io_iter_move(&blkio->iter, atom->bytes);
/* Validate if there is enough space in BIO data vector to do read */
if (moved != atom->bytes) {
CAS_DEBUG_PARAM("ERROR, moved %u, expected = %u",
moved, cas_atomic_size_of(atom->bytes));
return -EINVAL;
}
return 0;
}
static int cas_atomic_wr_prepare(struct ocf_io *io,
struct cas_atomic_io *atom)
{
struct blkio *blkio = cas_io_to_blkio(io);
ocf_cache_t cache;
struct ocf_atomic_metadata metadata;
struct bio_vec_iter dst, src;
uint32_t copied, added;
uint64_t addr = atom->addr;
uint32_t bytes = atom->bytes;
cache = ocf_volume_get_cache(ocf_io_get_volume(io));
/* Initialize iterators */
cas_io_iter_init(&dst, atom->data->vec, atom->bvec_size);
cas_io_iter_copy_set(&src, &blkio->iter);
BUG_ON(!cache);
BUG_ON(bytes % SECTOR_SIZE);
copied = 0;
if (atom->start) {
added = cas_atomic_size_of(atom->start);
cas_io_iter_move(&dst, added);
bytes -= atom->start;
copied = added;
addr += atom->start;
}
if (atom->end) {
added = cas_atomic_size_of(atom->end);
bytes -= atom->end;
copied += added;
}
BUG_ON(sizeof(metadata) != OCF_ATOMIC_METADATA_SIZE);
while (bytes) {
/* Get metadata */
if (ocf_metadata_get_atomic_entry(cache, addr, &metadata))
break;
/* Copy data */
copied += cas_io_iter_cpy(&dst, &src, SECTOR_SIZE);
/* Copy metadata */
copied += cas_io_iter_cpy_from_data(&dst, &metadata,
sizeof(metadata));
bytes -= SECTOR_SIZE;
addr += SECTOR_SIZE;
}
cas_io_iter_move(&blkio->iter, atom->bytes - (atom->start + atom->end));
/* Validate if copied proper numbers of bytes */
if (copied != cas_atomic_size_of(atom->bytes)) {
CAS_DEBUG_PARAM("ERROR, copied %u, expected = %u",
copied, cas_atomic_size_of(atom->bytes));
/* Metadata and data coping problem */
return -EINVAL;
}
return 0;
}
static int cas_atomic_rd_metadata_prepare(struct ocf_io *io,
struct cas_atomic_io *atom)
{
struct blkio *blkio = cas_io_to_blkio(io);
uint32_t moved;
BUG_ON(io->dir != OCF_READ);
atom->metadata = true;
/* Store BIO vector iterator, when read completed then it will be
* known were place data
*/
cas_io_iter_copy_set(&atom->iter, &blkio->iter);
/* Move iterator for next IO */
moved = cas_io_iter_move(&blkio->iter, (atom->bytes / SECTOR_SIZE) *
OCF_ATOMIC_METADATA_SIZE);
/* Validate if copied proper numbers of bytes */
if (moved != (atom->bytes / SECTOR_SIZE) *
OCF_ATOMIC_METADATA_SIZE) {
CAS_DEBUG_PARAM("ERROR, copied %u, expected = %u",
moved, cas_atomic_size_of(atom->bytes));
/* Metadata and data coping problem */
return -EIO;
}
return 0;
}
static void cas_atomic_end_atom(struct cas_atomic_io *atom, int error)
{
struct cas_atomic_io *master = atom->master;
struct ocf_io *io = master->cmpl_context;
if (error)
master->error |= error;
if (atomic_dec_return(&master->req_remaining))
return;
CAS_DEBUG_MSG("Completion");
/* Send completion to caller */
master->cmpl_fn(io, master->error);
/* Free allocated structures */
cas_atomic_dealloc(master);
ocf_io_put(io);
}
static CAS_DECLARE_BLOCK_CALLBACK(cas_atomic_fire_atom, struct bio *bio,
unsigned int bytes, int error)
{
int err;
struct cas_atomic_io *atom;
struct bd_object *bdobj;
BUG_ON(!bio);
BUG_ON(!bio->bi_private);
err = CAS_BLOCK_CALLBACK_ERROR(bio, error);
atom = bio->bi_private;
BUG_ON(!atom->master);
bdobj = bd_object(atom->volume);
CAS_DEBUG_PARAM("BIO result = %d", CAS_BLOCK_CALLBACK_ERROR(bio, error));
if (err != 0)
goto out;
if (atom->discard)
goto out;
if (atom->metadata) {
if (cas_atomic_rd_metadata_complete(atom))
atom->master->error = -EIO;
goto out;
}
switch (atom->dir) {
case OCF_READ:
if (cas_atomic_rd_complete(atom))
atom->master->error = -EIO;
break;
case OCF_WRITE:
if (!cas_blk_is_flush_io(atom->flags)) {
atomic_inc(&bdobj->potentially_dirty);
} else {
/* IO flush finished, update potential
* dirty state
*/
atomic_sub(atomic_read(&atom->potential_dirty),
&bdobj->potentially_dirty);
}
break;
}
out:
/* Free BIO, no needed any more */
BUG_ON(bio != atom->bio);
bio_put(bio);
atom->bio = NULL;
cas_atomic_end_atom(atom, err);
}
static void _cas_atomic_setup_cmd(
ocf_volume_t volume,
struct request *req,
struct bio* bio,
uint64_t bytes,
int dir,
void *end_io_data,
struct nvme_command *cmd)
{
struct bd_object *bdobj = bd_object(volume);
unsigned int ns_id = bdobj->atomic_params.nsid;
unsigned long *cmd_addr = blk_mq_rq_to_pdu(req);
cmd->rw.opcode = (dir == OCF_WRITE) ? nvme_cmd_write : nvme_cmd_read;
cmd->rw.nsid = cpu_to_le32(ns_id);
cmd->rw.slba = cpu_to_le64(CAS_BIO_BISECTOR(bio));
cmd->rw.length = cpu_to_le16((bytes / SECTOR_SIZE) - 1);
cmd->rw.control = cpu_to_le16(NVME_RW_LR);
req->cmd_type = REQ_TYPE_DRV_PRIV;
req->cmd_flags |= REQ_FAILFAST_DRIVER;
*cmd_addr = (unsigned long)cmd;
req->timeout = ADMIN_TIMEOUT; /* TODO Use timeout for regular IO */
req->cmd = (unsigned char *) cmd;
req->cmd_len = sizeof(*cmd);
req->special = NULL;
req->end_io_data = end_io_data;
}
static void cas_atomic_setup_cmd(int dir, struct cas_atomic_io *atom)
{
_cas_atomic_setup_cmd(atom->volume, atom->request, atom->bio,
atom->bytes, dir, atom, &atom->cmd);
}
static void cas_atomic_end_request(struct request *request, int error)
{
struct cas_atomic_io *atom;
BUG_ON(!request);
atom = request->end_io_data;
/* Free request not needed any more */
BUG_ON(atom->request != request);
blk_mq_free_request(request);
atom->request = NULL;
CAS_DEBUG_PARAM("RQ result = %d", error);
cas_atomic_end_atom(atom, error);
}
static void cas_atomic_fire_atom(int dir, struct ocf_io *io,
struct cas_atomic_io *atom)
{
struct bd_object *bdobj = bd_object(atom->volume);
struct block_device *bdev = bdobj->btm_bd;
struct request_queue *queue = bdev_get_queue(bdev);
struct bio *bio;
struct bio_vec *bvec;
uint32_t i;
/* Allocate BIO */
bio = atom->bio = bio_alloc(GFP_NOIO, atom->bvec_size);
if (!bio)
goto _fire_atom_ERROR;
/* Setup BIO */
bio->bi_bdev = bdev;
CAS_BIO_BISECTOR(bio) = atom->addr / SECTOR_SIZE;
bio->bi_next = NULL;
bio->bi_private = atom;
CAS_BIO_OP_FLAGS(bio) |= io->flags;
bio->bi_end_io = CAS_REFER_BLOCK_CALLBACK(cas_atomic_fire_atom);
/* Add pages to the BIO */
bvec = atom->data->vec;
for (i = 0; i < atom->bvec_size; i++, bvec++) {
int added = bio_add_pc_page(queue, bio,
bvec->bv_page, bvec->bv_len, bvec->bv_offset);
if (added != bvec->bv_len) {
/* Oops, a problem, cannot add page to the BIO */
goto _fire_atom_ERROR;
}
}
/* Allocate request */
atom->request = cas_blk_make_request(queue, atom->bio, GFP_NOIO);
if (IS_ERR(atom->request)) {
atom->request = NULL;
goto _fire_atom_ERROR;
}
/* Setup command */
cas_atomic_setup_cmd(dir, atom);
/* Additional completion for request */
atomic_inc(&atom->master->req_remaining);
/* Send requests (NVME atomic command) */
blk_execute_rq_nowait(queue, NULL, atom->request, 0,
cas_atomic_end_request);
return;
_fire_atom_ERROR:
CAS_DEBUG_MSG("ERROR");
cas_atomic_end_atom(atom, -EIO);
}
static void cas_atomic_fire_atoms(int dir, struct ocf_io *io,
struct cas_atomic_io *atoms)
{
uint32_t i;
ocf_io_get(io);
for (i = 0; i < atoms->count; i++) {
struct cas_atomic_io *this = &atoms[i];
CAS_DEBUG_PARAM("Fire(%u), Addr = %llu, bytes = %u",
i, this->addr, this->bytes);
cas_atomic_fire_atom(dir, io, this);
}
}
typedef int (*cas_prepare_atom_pfn_t)(struct ocf_io *io,
struct cas_atomic_io *atoms);
static int cas_atomic_prepare_atoms(struct ocf_io *io,
cas_prepare_atom_pfn_t prepare,
struct cas_atomic_io *atoms)
{
int i;
int result = 0;
if (!prepare)
return 0;
for (i = 0; i < atoms->count; i++) {
struct cas_atomic_io *this = &atoms[i];
CAS_DEBUG_PARAM("Sub-atomic IO preparation(%u), Addr = %llu, "
"bytes = %u, dir = %d", i, this->addr,
this->bytes, dir);
result |= prepare(io, this);
}
return result;
}
static void cas_atomic_fire_io(struct ocf_io *io,
cas_prepare_atom_pfn_t prepare,
bool write_zero)
{
int dir = io->dir;
/* Create atomic IOs context, mainly allocations */
struct cas_atomic_io *atoms = cas_atomic_alloc(dir, io, write_zero);
if (!atoms) {
CAS_DEBUG_MSG("Memory allocation ERROR");
goto _submit_io_ERROR;
}
/* Prepare IOs, mainly coping data */
if (cas_atomic_prepare_atoms(io, prepare, atoms)) {
CAS_DEBUG_MSG("Preparation ERROR");
goto _submit_io_ERROR;
}
/* Send IO */
atomic_inc(&atoms->req_remaining);
cas_atomic_fire_atoms(dir, io, atoms);
cas_atomic_end_atom(atoms, 0);
return;
_submit_io_ERROR:
if (atoms)
cas_atomic_dealloc(atoms);
io->end(io, -EIO);
}
static void cas_atomic_submit_flush_bio(struct cas_atomic_io *atom)
{
struct request *req = atom->request;
struct bd_object *bdobj = bd_object(atom->volume);
unsigned int ns_id = bdobj->atomic_params.nsid;
struct nvme_command *cmd = &atom->cmd;
unsigned long *cmd_addr = blk_mq_rq_to_pdu(req);
cmd->rw.opcode = nvme_cmd_flush;
cmd->rw.nsid = cpu_to_le32(ns_id);
*cmd_addr = (unsigned long)cmd;
req->cmd_type = REQ_TYPE_DRV_PRIV;
req->timeout = ADMIN_TIMEOUT;
req->cmd = (unsigned char *) cmd;
req->cmd_len = sizeof(*cmd);
req->special = NULL;
req->end_io_data = atom;
/* Additional completion for request */
atomic_inc(&atom->master->req_remaining);
/* Send NVMe flush command */
blk_execute_rq_nowait(req->q, NULL, req, 0, cas_atomic_end_request);
}
static int cas_atomic_submit_discard_bio(struct cas_atomic_io *atom)
{
struct request *req = atom->request;
struct nvme_command *cmd = &atom->cmd;
struct bd_object *bdobj = bd_object(atom->volume);
unsigned int ns_id = bdobj->atomic_params.nsid;
struct nvme_dsm_range *nvm_discard;
struct page *page;
int offset;
unsigned long *cmd_addr = blk_mq_rq_to_pdu(req);
nvm_discard = kmalloc(sizeof(*nvm_discard), GFP_NOIO);
if (!nvm_discard) {
return -ENOMEM;
}
nvm_discard->cattr = cpu_to_le32(0);
nvm_discard->nlb = cpu_to_le32(CAS_BIO_BISIZE(atom->bio) >> SECTOR_SHIFT);
nvm_discard->slba = cpu_to_le64(CAS_BIO_BISECTOR(atom->bio));
cmd->dsm.opcode = nvme_cmd_dsm;
cmd->dsm.nsid = cpu_to_le32(ns_id);
cmd->dsm.nr = 0;
cmd->dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
req->completion_data = nvm_discard;
page = virt_to_page(nvm_discard);
offset = offset_in_page(nvm_discard);
blk_add_request_payload(req, page, offset, sizeof(*nvm_discard));
req->__sector = CAS_BIO_BISECTOR(atom->bio);
req->__data_len = CAS_BIO_BISIZE(atom->bio);
req->ioprio = bio_prio(atom->bio);
req->timeout = ADMIN_TIMEOUT;
req->end_io_data = atom;
req->cmd_type = REQ_TYPE_DRV_PRIV;
req->cmd_flags = CAS_BIO_DISCARD;
req->errors = 0;
*cmd_addr = (unsigned long)cmd;
/* Additional completion for request */
atomic_inc(&atom->master->req_remaining);
/* Send NVMe flush command */
blk_execute_rq_nowait(req->q, NULL, req, 0, cas_atomic_end_request);
return 0;
}
static int cas_atomic_special_req_prepare(struct cas_atomic_io *atom,
struct ocf_io *io)
{
struct bd_object *bdobj = bd_object(ocf_io_get_volume(io));
struct block_device *bdev = bdobj->btm_bd;
CAS_DEBUG_TRACE();
atom->master = atom;
atom->count = 1;
atom->cmpl_fn = io->end;
atom->cmpl_context = io;
atom->volume = ocf_io_get_volume(io);
atom->flags = io->flags;
atomic_set(&atom->req_remaining, 1);
atom->bio = bio_alloc(GFP_NOIO, 1);
if (!atom->bio) {
CAS_PRINT_RL(KERN_ERR "Couldn't allocate memory for bio\n");
return -ENOMEM;
}
atom->bio->bi_end_io = CAS_REFER_BLOCK_CALLBACK(cas_atomic_fire_atom);
atom->bio->bi_bdev = bdev;
atom->bio->bi_private = atom;
return 0;
}
void cas_atomic_submit_discard(struct ocf_io *io)
{
struct bd_object *bdobj = bd_object(ocf_io_get_volume(io));
struct block_device *bdev = bdobj->btm_bd;
struct request_queue *q = bdev_get_queue(bdev);
int result = 0;
struct cas_atomic_io *atom = NULL;
struct blkio *blkio = cas_io_to_blkio(io);
CAS_DEBUG_TRACE();
if (!q) {
/* No queue, error */
io->end(io, -EINVAL);
return;
}
/* Allocate and setup control structure. */
atom = cas_mpool_new(atomic_io_allocator, 1);
if (!atom) {
CAS_PRINT_RL(KERN_ERR "Couldn't allocate memory for IO ctrl\n");
io->end(io, -ENOMEM);
return;
}
result = cas_atomic_special_req_prepare(atom, io);
if (result) {
blkio->error = result;
goto out;
}
/* Increase IO reference counter for FLUSH IO */
ocf_io_get(io);
/* Set up specific field */
atom->discard = true;
CAS_BIO_OP_FLAGS(atom->bio) = CAS_BIO_DISCARD;
CAS_BIO_BISECTOR(atom->bio) = io->addr / SECTOR_SIZE;
CAS_BIO_BISIZE(atom->bio) = io->bytes;
atom->request = cas_blk_make_request(q, atom->bio, GFP_NOIO);
if (IS_ERR(atom->request)) {
blkio->error = PTR_ERR(atom->request);
goto out;
}
atomic_inc(&atom->req_remaining);
result = cas_atomic_submit_discard_bio(atom);
if (result)
blkio->error = result;
out:
cas_atomic_end_atom(atom, blkio->error);
}
void cas_atomic_submit_flush(struct ocf_io *io)
{
#ifdef CAS_FLUSH_SUPPORTED
struct bd_object *bdobj = bd_object(ocf_io_get_volume(io));
struct block_device *bdev = bdobj->btm_bd;
struct request_queue *q = bdev_get_queue(bdev);
int result = 0;
struct cas_atomic_io *atom = NULL;
struct blkio *blkio = cas_io_to_blkio(io);
CAS_DEBUG_TRACE();
blkio->dirty = atomic_read(&bdobj->potentially_dirty);
if (!blkio->dirty) {
/* Didn't write anything to underlying disk;
* no need to send req_flush
*/
io->end(io, 0);
return;
}
if (!q) {
io->end(io, -EINVAL);
return;
}
if (!CAS_CHECK_QUEUE_FLUSH(q)) {
/* This block device does not support flush */
atomic_sub(blkio->dirty, &bdobj->potentially_dirty);
io->end(io, 0);
return;
}
/* Allocate and setup control structure. */
atom = cas_mpool_new(atomic_io_allocator, 1);
if (!atom) {
CAS_PRINT_RL(KERN_ERR "Couldn't allocate memory for IO ctrl\n");
io->end(io, -ENOMEM);
return;
}
/* Increase IO reference counter for FLUSH IO */
ocf_io_get(io);
result = cas_atomic_special_req_prepare(atom, io);
if (result) {
CAS_PRINT_RL(CAS_KERN_ERR "Couldn't allocate memory for BIO\n");
blkio->error = -ENOMEM;
goto out;
}
/* Set up specific field */
atom->dir = OCF_WRITE;
atomic_set(&atom->potential_dirty, blkio->dirty);
atom->request = cas_blk_make_request(q, atom->bio, GFP_NOIO);
if (IS_ERR(atom->request)) {
blkio->error = PTR_ERR(atom->request);
goto out;
}
atomic_inc(&atom->req_remaining);
cas_atomic_submit_flush_bio(atom);
out:
cas_atomic_end_atom(atom, blkio->error);
#else
/* Running operating system without support for REQ_FLUSH
* (i.e. SLES 11 SP 1) CAS cannot use flushing requests to handle
* power-fail safe Write-Back
*/
struct blkio *bdio = cas_io_to_blkio(io);
io->end(io, -EINVAL);
/* on SLES 11 SP 1 powerfail safety can only be achieved through
* disabling volatile write cache of disk itself.
*/
#endif
}
void cas_atomic_submit_io(struct ocf_io *io)
{
CAS_DEBUG_TRACE();
if (!CAS_IS_WRITE_FLUSH_FUA(io->flags) &&
CAS_IS_WRITE_FLUSH(io->flags)) {
/* FLUSH */
cas_atomic_submit_flush(io);
return;
}
if (unlikely(!io->bytes)) {
CAS_PRINT_RL(KERN_ERR "Zero length request\n");
io->end(io, -EINVAL);
return;
}
cas_atomic_fire_io(io, io->dir == OCF_READ ? cas_atomic_rd_prepare :
cas_atomic_wr_prepare, false);
}
void cas_atomic_submit_metadata(struct ocf_io *io)
{
BUG_ON(io->dir != OCF_READ);
CAS_DEBUG_TRACE();
if (unlikely(!io->bytes)) {
CAS_PRINT_RL(CAS_KERN_ERR "Zero length request\n");
io->end(io, -EINVAL);
return;
}
cas_atomic_fire_io(io, cas_atomic_rd_metadata_prepare, false);
}
unsigned int cas_atomic_get_max_io_size(ocf_volume_t volume)
{
struct block_device *bd;
if (!volume)
return 0;
bd = bd_object(volume)->btm_bd;
if (!bd->bd_disk)
return 0;
return queue_max_sectors(bd->bd_disk->queue) << SECTOR_SHIFT;
}
void cas_atomic_close_object(ocf_volume_t volume)
{
struct bd_object *bdobj = bd_object(volume);
if(bdobj->workqueue)
destroy_workqueue(bdobj->workqueue);
block_dev_close_object(volume);
}
int cas_atomic_open_object(ocf_volume_t volume, void *volume_params)
{
int result;
struct bd_object *bdobj = NULL;
if (!volume_params)
return -EINVAL;
result = block_dev_open_object(volume, volume_params);
if (result)
return result;
bdobj = bd_object(volume);
memcpy(&bdobj->atomic_params, volume_params,
sizeof(bdobj->atomic_params));
bdobj->workqueue = create_workqueue("CAS_AT_ZER");
if (!bdobj->workqueue) {
cas_atomic_close_object(volume);
result = -ENOMEM;
goto end;
}
end:
return result;
}
uint64_t cas_atomic_get_length(ocf_volume_t volume)
{
struct bd_object *bdobj = bd_object(volume);
return bdobj->atomic_params.size;
}
/* context to keep track of write_zero progress across child IOs */
struct cas_atomic_write_zero_ctx
{
struct ocf_io *sub_io;
struct ocf_io *original_io;
struct work_struct cmpl_work;
unsigned step_size;
};
static void _cas_atomic_write_zeroes_end(struct cas_atomic_write_zero_ctx *ctx,
int error)
{
struct ocf_io *io = ctx->original_io;
/* end master io */
io->end(io, error);
ocf_io_put(io);
/* cleanup context */
ocf_io_put(ctx->sub_io);
kfree(ctx);
}
/* atomic write zerores I/O completion */
static void _cas_atomic_write_zeroes_step_cmpl(struct ocf_io *io, int error)
{
struct cas_atomic_write_zero_ctx *ctx = io->priv1;
struct bd_object *bdobj = bd_object(ocf_io_get_volume(io));
const unsigned bytes_processed = (io->addr - ctx->original_io->addr)
+ io->bytes;
const unsigned bytes_left = ctx->original_io->bytes - bytes_processed;
BUG_ON(io->bytes > ctx->step_size);
/* update I/O address and size */
io->addr += io->bytes;
io->bytes = min(bytes_left, ctx->step_size);
if (!bytes_left || error) {
_cas_atomic_write_zeroes_end(ctx, error);
} else {
/* submit next IO from work context */
queue_work(bdobj->workqueue, &ctx->cmpl_work);
}
}
/* work routine to schedule next portion of write zero I/O */
void _cas_atomic_write_zeroes_work(struct work_struct *work)
{
struct cas_atomic_write_zero_ctx *ctx = container_of(work,
struct cas_atomic_write_zero_ctx, cmpl_work);
cas_atomic_fire_io(ctx->sub_io, NULL, true);
}
void cas_atomic_submit_write_zeroes(struct ocf_io *io)
{
/* send 8 atoms in each I/O */
const unsigned step_size = min(cas_atomic_max_io_sectors()
* SECTOR_SIZE * 8, io->bytes);
struct cas_atomic_write_zero_ctx *ctx = NULL;
int result = 0;
if (unlikely(!io->bytes)) {
CAS_PRINT_RL(CAS_KERN_ERR "Zero length request\n");
result = -EINVAL;
goto error;
}
ctx = kmalloc(sizeof(*ctx), GFP_NOIO);
if (!ctx) {
result = -ENOMEM;
goto error;
}
ctx->sub_io = ocf_volume_new_io(ocf_io_get_volume(io), io->io_queue,
io->addr, min(io->bytes, step_size),
OCF_WRITE, 0, 0);
if (!ctx->sub_io) {
result = -ENOMEM;
goto error_after_ctx;
}
/* set up context */
ctx->step_size = step_size;
ctx->original_io = io;
INIT_WORK(&ctx->cmpl_work, _cas_atomic_write_zeroes_work);
/* get reference to original io */
ocf_io_get(io);
/* set up sub-io */
ocf_io_set_cmpl(ctx->sub_io, ctx, NULL, _cas_atomic_write_zeroes_step_cmpl);
cas_atomic_fire_io(ctx->sub_io, NULL, true);
return;
error_after_ctx:
kfree(ctx);
error:
io->end(io, result);
}
static void atomic_dev_deinit(void)
{
if (atomic_io_allocator) {
cas_mpool_destroy(atomic_io_allocator);
atomic_io_allocator = NULL;
}
}
const struct ocf_volume_properties cas_object_atomic_properties = {
.name = "Atomic_Writes_NVMe",
.io_priv_size = sizeof(struct blkio),
.volume_priv_size = sizeof(struct bd_object),
.caps = {
.atomic_writes = 1,
},
.ops = {
.submit_io = cas_atomic_submit_io,
.submit_flush = cas_atomic_submit_flush,
.submit_discard = cas_atomic_submit_discard,
.submit_metadata = cas_atomic_submit_metadata,
.submit_write_zeroes = cas_atomic_submit_write_zeroes,
.open = cas_atomic_open_object,
.close = block_dev_close_object,
.get_max_io_size = cas_atomic_get_max_io_size,
.get_length = cas_atomic_get_length,
},
.io_ops = {
.set_data = cas_blk_io_set_data,
.get_data = cas_blk_io_get_data,
},
.deinit = atomic_dev_deinit
};
int atomic_dev_init(void)
{
int ret;
ret = ocf_ctx_register_volume_type(cas_ctx, ATOMIC_DEVICE_VOLUME,
&cas_object_atomic_properties);
if (ret < 0)
return -EINVAL;
atomic_io_allocator = cas_mpool_create(0, sizeof(struct cas_atomic_io),
GFP_NOIO, 1, "cas_atomic_io");
if (!atomic_io_allocator) {
ocf_ctx_unregister_volume_type(cas_ctx, ATOMIC_DEVICE_VOLUME);
return -ENOMEM;
}
return 0;
}
#else
int atomic_dev_init(void)
{
return 0;
}
#endif