实验任务
-
下载SPDK源代码并编译安装
-
运行NVME hello world程序
-
通过分析NVME hello world源码学习SPDK基本原理
-
修改hello world,实现zns命令I/O读写
实验过程
下载spdk代码
由于从github上clone容易出问题,故从国内的镜像下载。
git clone https://gitee.com/openstorage_1/spdk_obj.git
使用ls命令查看下载内容。
为了方便使用幻灯片的命令,将文件夹的名字修改成spdk。
安装所需依赖
首先进入spdk目录,然后运行scripts/pkgdep.sh
cd spdk
sudo scripts/pkgdep.sh --all
接下来会自动安装所需的依赖。安装完后执行
./configure
make
提示编译失败,这是因为clone下来的是公共模块,适用的子模块并没有下载下来,运行图中提示代码才是完整的工程。
尝试运行提示代码,再次configure、make即可。在make的时候,可能一直会没有反应,其实按ctrl c中途打断就会知道它确实是在工作,只是没有写出来而已。
make时报错
有同学指出是启动时删掉了host -cpu,缺少指令集。需要重新启动。
qemu-system-x86_64 --enable-kvm -cpu host -name znslab -m 2G -smp 4 -hda ./ubuntu.qcow2 -net user,hostfwd=tcp:127.0.0.1:7777-:22,hostfwd=tcp:127.0.0.1:2222-:2000 -net nic -drive file=./znsssd.qcow2,id=mynvme,format=qcow2,if=none -device nvme,serial=baz,id=nvme2 -device nvme-ns,id=ns2,drive=mynvme,nsid=2,logical_block_size=4096,physical_block_size=4096,zoned=true,zoned.zone_size=131072,zoned.zone_capacity=131072,zoned.max_open=0,zoned.max_active=0,bus=nvme2 -nographic
单元测试
./test/unit/unittest.sh
报错:segmentation fault
参考https://review.spdk.io/gerrit/c/spdk/spdk/+/4372,可能是因为cunit版本太低,导致之前没有编译。尝试自行安装cunit。
wget https://udomain.dl.sourceforge.net/project/cunit/CUnit/2.1-3/CUnit-2.1-3.tar.bz2
sudo apt install automake
sudo apt install libtool
tar -jxvf CUnit-2.1-3.tar.bz2
cd CUnit-2.1-3
aclocal
autoheader
libtoolize
automake --add-missing
autoconf
automake
./configure
make
sudo make install
重新./configre
,make
后再运行单元测试。
这个单元测试好像不过也没什么。
运行完后安装。
sudo make install
页分配与设备解绑
运行SPDK之前需要分配一些过大的页面,NVMe设备需要从原来的内核驱动解绑。
sudo scripts/setup.sh
要重新与内核绑定,只需运行
sudo scripts/setup.sh reset
由于内存不够,页面分小一点。sudo HUGEMEM=1024 scripts/setup.sh
运行hello_world
sudo ./build/examples/hello_world
分析源码
目录:spdk/examples/nvme/hello_world/hello_world.c
分析:该实例通过SPDK向ssd写入并读取了字符串Hello world!
- 主函数
-
hello_world函数
-
write_complete
- read_complete
修改hello_world.c,实现ZNS SSD读写
设置共享文件夹
为了修改文件,有必要设置虚拟机的共享文件夹,参考
https://miracle24.site/other/cs-exp-zns-1/
https://www.cnblogs.com/alone153/p/15779775.html。
注意:需要安装以下软件包
sudo apt install libattr1-dev
sudo apt install libcap-ng-dev
然后重新编译
cd qemu-7.1.0
./configure --enable-kvm --enable-virtfs
make
qemu-system-x86_64 --enable-kvm -cpu host -name znslab -m 2G -smp 4 -hda ./ubuntu.qcow2 -net user,hostfwd=tcp:127.0.0.1:7777-:22,hostfwd=tcp:127.0.0.1:2222-:2000 -net nic -drive file=./znsssd.qcow2,id=mynvme,format=qcow2,if=none -device nvme,serial=baz,id=nvme2 -device nvme-ns,id=ns2,drive=mynvme,nsid=2,logical_block_size=4096,physical_block_size=4096,zoned=true,zoned.zone_size=131072,zoned.zone_capacity=131072,zoned.max_open=0,zoned.max_active=0,bus=nvme2 -nographic -fsdev local,id=fsdev0,path=./work/,security_model=none -device virtio-9p-pci,id=fs0,fsdev=fsdev0,mount_tag=hostshare
启动虚拟机后挂在共享目录
sudo mount hostshare -t 9p ./work
将spdk文件夹移入work,这样从宿主机也能修改其中内容。
编写程序实现读写
目标:向模拟的ZNS SSD设备(znsssd.qcow2)中写入一串数据This is a test log of the second lab
,并读出该数据。
ZNS驱动接口函数在include/spdk/nvme_zns.h中。
源程序:lab2_code.c
简介:本程序主要修改了helloworld函数并且命名为为lab2func函数。首先打印NVMe的相关信息,再向缓冲区sequence.buf中写入一串数据"This is a test log of the second lab\n"。将其append到第一个zone的起始lba后,并且读出数据到缓冲区再打印出来。
#include "spdk/stdinc.h"
#include "spdk/nvme.h"
#include "spdk/vmd.h"
#include "spdk/nvme_zns.h"
#include "spdk/env.h"
#include "spdk/string.h"
#include "spdk/log.h"
static int64_t g_zone_report_limit = 8;
struct ctrlr_entry {
struct spdk_nvme_ctrlr *ctrlr;
TAILQ_ENTRY(ctrlr_entry) link;
char name[1024];
};
struct ns_entry {
struct spdk_nvme_ctrlr *ctrlr;
struct spdk_nvme_ns *ns;
TAILQ_ENTRY(ns_entry) link;
struct spdk_nvme_qpair *qpair;
};
static TAILQ_HEAD(, ctrlr_entry) g_controllers = TAILQ_HEAD_INITIALIZER(g_controllers);
static TAILQ_HEAD(, ns_entry) g_namespaces = TAILQ_HEAD_INITIALIZER(g_namespaces);
static struct spdk_nvme_transport_id g_trid = {};
static bool g_vmd = false;
static void
register_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns)
{
struct ns_entry *entry;
if (!spdk_nvme_ns_is_active(ns)) {
return;
}
entry = malloc(sizeof(struct ns_entry));
if (entry == NULL) {
perror("ns_entry malloc");
exit(1);
}
entry->ctrlr = ctrlr;
entry->ns = ns;
TAILQ_INSERT_TAIL(&g_namespaces, entry, link);
printf(" Namespace ID: %d size: %juGB\n", spdk_nvme_ns_get_id(ns),
spdk_nvme_ns_get_size(ns) / 1000000000);
}
struct lab2_sequence {
struct ns_entry *ns_entry;
char *buf;
unsigned using_cmb_io;
int is_completed;
};
static void
read_complete(void *arg, const struct spdk_nvme_cpl *completion)
{
struct lab2_sequence *sequence = arg;
/* Assume the I/O was successful */
sequence->is_completed = 1;
/* See if an error occurred. If so, display information
* about it, and set completion value so that I/O
* caller is aware that an error occurred.
*/
if (spdk_nvme_cpl_is_error(completion)) {
spdk_nvme_qpair_print_completion(sequence->ns_entry->qpair, (struct spdk_nvme_cpl *)completion);
fprintf(stderr, "I/O error status: %s\n", spdk_nvme_cpl_get_status_string(&completion->status));
fprintf(stderr, "Read I/O failed, aborting run\n");
sequence->is_completed = 2;
exit(1);
}
/*
* The read I/O has completed. Print the contents of the
* buffer, free the buffer, then mark the sequence as
* completed. This will trigger the hello_world() function
* to exit its polling loop.
*/
printf("%s", sequence->buf);
spdk_free(sequence->buf);
}
static void
write_complete(void *arg, const struct spdk_nvme_cpl *completion)
{
struct lab2_sequence *sequence = arg;
struct ns_entry *ns_entry = sequence->ns_entry;
int rc;
/* See if an error occurred. If so, display information
* about it, and set completion value so that I/O
* caller is aware that an error occurred.
*/
if (spdk_nvme_cpl_is_error(completion)) {
spdk_nvme_qpair_print_completion(sequence->ns_entry->qpair, (struct spdk_nvme_cpl *)completion);
fprintf(stderr, "I/O error status: %s\n", spdk_nvme_cpl_get_status_string(&completion->status));
fprintf(stderr, "Write I/O failed, aborting run\n");
sequence->is_completed = 2;
exit(1);
}
/*
* The write I/O has completed. Free the buffer associated with
* the write I/O and allocate a new zeroed buffer for reading
* the data back from the NVMe namespace.
*/
if (sequence->using_cmb_io) {
spdk_nvme_ctrlr_unmap_cmb(ns_entry->ctrlr);
} else {
spdk_free(sequence->buf);
}
sequence->buf = spdk_zmalloc(0x1000, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
rc = spdk_nvme_ns_cmd_read(ns_entry->ns, ns_entry->qpair, sequence->buf,
0, /* LBA start */
1, /* number of LBAs */
read_complete, (void *)sequence, 0);
if (rc != 0) {
fprintf(stderr, "starting read I/O failed\n");
exit(1);
}
}
static void
reset_zone_complete(void *arg, const struct spdk_nvme_cpl *completion)
{
struct lab2_sequence *sequence = arg;
/* Assume the I/O was successful */
sequence->is_completed = 1;
/* See if an error occurred. If so, display information
* about it, and set completion value so that I/O
* caller is aware that an error occurred.
*/
if (spdk_nvme_cpl_is_error(completion)) {
spdk_nvme_qpair_print_completion(sequence->ns_entry->qpair, (struct spdk_nvme_cpl *)completion);
fprintf(stderr, "I/O error status: %s\n", spdk_nvme_cpl_get_status_string(&completion->status));
fprintf(stderr, "Reset zone I/O failed, aborting run\n");
sequence->is_completed = 2;
exit(1);
}
}
static void
reset_zone_and_wait_for_completion(struct lab2_sequence *sequence)
{
if (spdk_nvme_zns_reset_zone(sequence->ns_entry->ns, sequence->ns_entry->qpair,
0, /* starting LBA of the zone to reset */
false, /* don't reset all zones */
reset_zone_complete,
sequence)) {
fprintf(stderr, "starting reset zone I/O failed\n");
exit(1);
}
while (!sequence->is_completed) {
spdk_nvme_qpair_process_completions(sequence->ns_entry->qpair, 0);
}
sequence->is_completed = 0;
}
bool io_completed;
static void check_complete(void *arg, const struct spdk_nvme_cpl *cpl)
{
if (spdk_nvme_cpl_is_error(cpl))
{
printf("I/O Option Failed\n");
}
io_completed = true;
}
static void
lab2func(void)
{
struct ns_entry *ns_entry;
struct lab2_sequence sequence;
int rc;
size_t sz;
TAILQ_FOREACH(ns_entry, &g_namespaces, link) {
ns_entry->qpair = spdk_nvme_ctrlr_alloc_io_qpair(ns_entry->ctrlr, NULL, 0);
if (ns_entry->qpair == NULL) {
printf("ERROR: spdk_nvme_ctrlr_alloc_io_qpair() failed\n");
return;
}
/*
* 分配4KB写缓冲区
*/
sequence.using_cmb_io = 1;
sequence.buf = spdk_nvme_ctrlr_map_cmb(ns_entry->ctrlr, &sz);
if (sequence.buf == NULL || sz < 0x1000) {
sequence.using_cmb_io = 0;
sequence.buf = spdk_zmalloc(0x1000, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
}
if (sequence.buf == NULL) {
printf("ERROR: write buffer allocation failed\n");
return;
}
if (sequence.using_cmb_io) {
printf("INFO: using controller memory buffer for IO\n");
} else {
printf("INFO: using host memory buffer for IO\n");
}
sequence.is_completed = 0;
sequence.ns_entry = ns_entry;
/*
* If the namespace is a Zoned Namespace, rather than a regular
* NVM namespace, we need to reset the first zone, before we
* write to it. This not needed for regular NVM namespaces.
*/
if (spdk_nvme_ns_get_csi(ns_entry->ns) == SPDK_NVME_CSI_ZNS) {
reset_zone_and_wait_for_completion(&sequence);
}
// 输出ZNS信息
printf("------------输出NVMe ZNS SSD信息------------\n");
uint64_t num_zones = spdk_nvme_zns_ns_get_num_zones(ns_entry->ns);
printf("NVMe namespace has %lu zones\n", num_zones);
const struct spdk_nvme_ns_data *ref_ns_data = spdk_nvme_ns_get_data(ns_entry->ns);
printf("NVMe namespace LBA size: %lu\n",ref_ns_data->nsze);
const struct spdk_nvme_zns_ns_data *ref_ns_zns_data = spdk_nvme_zns_ns_get_data(ns_entry->ns);
printf("NVMe namespace zone size: %lu(%lu*%lu)\n", spdk_nvme_zns_ns_get_zone_size(ns_entry->ns),
ref_ns_zns_data->lbafe->zsze, ref_ns_data->nsze);
printf("NVMe namespace zone append size limit: %u\n",spdk_nvme_zns_ctrlr_get_max_zone_append_size(ns_entry->ns));
printf("------------------输出结束------------------\n");
/*
* 向写缓冲区写入字符串
*/
printf("正在向写缓冲区写入字符串\n");
snprintf(sequence.buf, 0x1000, "%s", "This is a test log of the second lab\n");
printf("向写缓冲区写入字符串完成\n");
//得到first_zone_info
struct spdk_nvme_zns_zone_desc *first_zone_info;
uint32_t zds, zrs, zd_index;
size_t zdes = 0;
uint8_t *report_buf;
size_t report_bufsize;
zrs = sizeof(struct spdk_nvme_zns_zone_report);
zds = sizeof(struct spdk_nvme_zns_zone_desc);
zdes = ref_ns_zns_data->lbafe[ref_ns_data->flbas.format].zdes * 64;
zd_index = zrs + 0 * (zds + zdes);
report_bufsize = spdk_nvme_ns_get_max_io_xfer_size(ns_entry->ns);
report_buf = calloc(1, report_bufsize);
if (!report_buf) {
printf("Zone report allocation failed!\n");
exit(1);
}
rc = spdk_nvme_zns_report_zones(ns_entry->ns, ns_entry->qpair, report_buf, report_bufsize,
0, SPDK_NVME_ZRA_LIST_ALL, true,
check_complete, NULL);
if (rc) {
fprintf(stderr, "Report zones failed\n");
exit(1);
}
first_zone_info = (struct spdk_nvme_zns_zone_desc *)(report_buf + zd_index);
//调用驱动函数将缓存添加到zone中
spdk_nvme_zns_zone_append(ns_entry->ns, ns_entry->qpair, sequence.buf, first_zone_info->zslba, 1, check_complete, NULL, 0);
while (!sequence.is_completed) {
sequence.is_completed = spdk_nvme_qpair_process_completions(ns_entry->qpair, 0);
}
//等待io完成
io_completed = false;
while (!io_completed)
{
spdk_nvme_qpair_process_completions(ns_entry->qpair, 0);
}
printf("append操作完成\n接下来从SSD中读取\n");
//从ssd中读取
//写入完成后,释放buffer并重新分配
if (sequence.using_cmb_io) {
spdk_nvme_ctrlr_unmap_cmb(ns_entry->ctrlr);
} else {
spdk_free(sequence.buf);
}
sequence.buf = spdk_zmalloc(0x1000, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
//读取第一个zone的第一个lba
io_completed = false;
rc = spdk_nvme_ns_cmd_read(ns_entry->ns, ns_entry->qpair, sequence.buf, first_zone_info->zslba, 1, check_complete, NULL, 0);
if (rc != 0) {
fprintf(stderr, "starting read I/O failed\n");
exit(1);
}
while (!io_completed) {
spdk_nvme_qpair_process_completions(ns_entry->qpair, 0);
}
printf("读出数据为: %s\n", sequence.buf);
spdk_free(sequence.buf);
free(report_buf);
spdk_nvme_ctrlr_free_io_qpair(ns_entry->qpair);
}
}
static bool
probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
struct spdk_nvme_ctrlr_opts *opts)
{
printf("Attaching to %s\n", trid->traddr);
return true;
}
static void
attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
{
int nsid;
struct ctrlr_entry *entry;
struct spdk_nvme_ns *ns;
const struct spdk_nvme_ctrlr_data *cdata;
entry = malloc(sizeof(struct ctrlr_entry));
if (entry == NULL) {
perror("ctrlr_entry malloc");
exit(1);
}
printf("Attached to %s\n", trid->traddr);
/*
* spdk_nvme_ctrlr is the logical abstraction in SPDK for an NVMe
* controller. During initialization, the IDENTIFY data for the
* controller is read using an NVMe admin command, and that data
* can be retrieved using spdk_nvme_ctrlr_get_data() to get
* detailed information on the controller. Refer to the NVMe
* specification for more details on IDENTIFY for NVMe controllers.
*/
cdata = spdk_nvme_ctrlr_get_data(ctrlr);
snprintf(entry->name, sizeof(entry->name), "%-20.20s (%-20.20s)", cdata->mn, cdata->sn);
entry->ctrlr = ctrlr;
TAILQ_INSERT_TAIL(&g_controllers, entry, link);
/*
* Each controller has one or more namespaces. An NVMe namespace is basically
* equivalent to a SCSI LUN. The controller's IDENTIFY data tells us how
* many namespaces exist on the controller. For Intel(R) P3X00 controllers,
* it will just be one namespace.
*
* Note that in NVMe, namespace IDs start at 1, not 0.
*/
for (nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); nsid != 0;
nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid)) {
ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
if (ns == NULL) {
continue;
}
register_ns(ctrlr, ns);
}
}
void
spdk_nvme_detach_poll(struct spdk_nvme_detach_ctx *detach_ctx)
{
while (detach_ctx && spdk_nvme_detach_poll_async(detach_ctx) == -EAGAIN) {
;
}
}
static void
cleanup(void)
{
struct ns_entry *ns_entry, *tmp_ns_entry;
struct ctrlr_entry *ctrlr_entry, *tmp_ctrlr_entry;
struct spdk_nvme_detach_ctx *detach_ctx = NULL;
TAILQ_FOREACH_SAFE(ns_entry, &g_namespaces, link, tmp_ns_entry) {
TAILQ_REMOVE(&g_namespaces, ns_entry, link);
free(ns_entry);
}
TAILQ_FOREACH_SAFE(ctrlr_entry, &g_controllers, link, tmp_ctrlr_entry) {
TAILQ_REMOVE(&g_controllers, ctrlr_entry, link);
spdk_nvme_detach_async(ctrlr_entry->ctrlr, &detach_ctx);
free(ctrlr_entry);
}
if (detach_ctx) {
spdk_nvme_detach_poll(detach_ctx);
}
}
static void
usage(const char *program_name)
{
printf("%s [options]", program_name);
printf("\t\n");
printf("options:\n");
printf("\t[-d DPDK huge memory size in MB]\n");
printf("\t[-g use single file descriptor for DPDK memory segments]\n");
printf("\t[-i shared memory group ID]\n");
printf("\t[-r remote NVMe over Fabrics target address]\n");
printf("\t[-V enumerate VMD]\n");
#ifdef DEBUG
printf("\t[-L enable debug logging]\n");
#else
printf("\t[-L enable debug logging (flag disabled, must reconfigure with --enable-debug)]\n");
#endif
}
static int
parse_args(int argc, char **argv, struct spdk_env_opts *env_opts)
{
int op, rc;
spdk_nvme_trid_populate_transport(&g_trid, SPDK_NVME_TRANSPORT_PCIE);
snprintf(g_trid.subnqn, sizeof(g_trid.subnqn), "%s", SPDK_NVMF_DISCOVERY_NQN);
while ((op = getopt(argc, argv, "d:gi:r:L:V")) != -1) {
switch (op) {
case 'V':
g_vmd = true;
break;
case 'i':
env_opts->shm_id = spdk_strtol(optarg, 10);
if (env_opts->shm_id < 0) {
fprintf(stderr, "Invalid shared memory ID\n");
return env_opts->shm_id;
}
break;
case 'g':
env_opts->hugepage_single_segments = true;
break;
case 'r':
if (spdk_nvme_transport_id_parse(&g_trid, optarg) != 0) {
fprintf(stderr, "Error parsing transport address\n");
return 1;
}
break;
case 'd':
env_opts->mem_size = spdk_strtol(optarg, 10);
if (env_opts->mem_size < 0) {
fprintf(stderr, "Invalid DPDK memory size\n");
return env_opts->mem_size;
}
break;
case 'L':
rc = spdk_log_set_flag(optarg);
if (rc < 0) {
fprintf(stderr, "unknown flag\n");
usage(argv[0]);
exit(EXIT_FAILURE);
}
#ifdef DEBUG
spdk_log_set_print_level(SPDK_LOG_DEBUG);
#endif
break;
default:
usage(argv[0]);
return 1;
}
}
return 0;
}
int
main(int argc, char **argv)
{
int rc;
struct spdk_env_opts opts;
/*
* SPDK relies on an abstraction around the local environment
* named env that handles memory allocation and PCI device operations.
* This library must be initialized first.
*
*/
spdk_env_opts_init(&opts);
rc = parse_args(argc, argv, &opts);
if (rc != 0) {
return rc;
}
opts.name = "lab2";
if (spdk_env_init(&opts) < 0) {
fprintf(stderr, "Unable to initialize SPDK env\n");
return 1;
}
printf("Initializing NVMe Controllers\n");
if (g_vmd && spdk_vmd_init()) {
fprintf(stderr, "Failed to initialize VMD."
" Some NVMe devices can be unavailable.\n");
}
/*
* Start the SPDK NVMe enumeration process. probe_cb will be called
* for each NVMe controller found, giving our application a choice on
* whether to attach to each controller. attach_cb will then be
* called for each controller after the SPDK NVMe driver has completed
* initializing the controller we chose to attach.
*/
rc = spdk_nvme_probe(&g_trid, NULL, probe_cb, attach_cb, NULL);
if (rc != 0) {
fprintf(stderr, "spdk_nvme_probe() failed\n");
rc = 1;
goto exit;
}
if (TAILQ_EMPTY(&g_controllers)) {
fprintf(stderr, "no NVMe controllers found\n");
rc = 1;
goto exit;
}
printf("Initialization complete.\n");
// hello_world();
lab2func();
cleanup();
if (g_vmd) {
spdk_vmd_fini();
}
exit:
cleanup();
spdk_env_fini();
return rc;
}
Makefile
# SPDX-License-Identifier: BSD-3-Clause
# Copyright (C) 2015 Intel Corporation.
# All rights reserved.
#
SPDK_ROOT_DIR := /home/cwj/work/spdk
APP = lab2_code
include $(SPDK_ROOT_DIR)/mk/nvme.libtest.mk
run: all
@ rm -f lab2_code.d lab2_code.o
@ sudo ./lab2_code
运行编写的程序
将写好的源文件和makefile文件放在共享目录文件夹里,重新make
make
sudo HUGEMEM=1024 scripts/setup.sh
sudo ./lab2_code
可能出现missing separator错误,这是因为makefile中命令必须以tab开头,不能是空格。
可能出现下面报错
EAL: FATAL: Cannot use IOVA as ‘PA’ since physical addresses are not available
EAL: Cannot use IOVA as ‘PA’ since physical addresses are not available
这是因为执行文件时要用管理员权限,应该用sudo ./lab_code2
可以看到结果中显示了写入的字符串。
标签:计算机,sequence,nvme,SPDK,spdk,entry,ns,安装,ctrlr From: https://www.cnblogs.com/hushrush/p/16918921.html