Skip to content

Commit f6c23e2

Browse files
fuporovvStackFedorUporovVstack
authored andcommitted
zvol: Enable zvol threading functionality on FreeBSD
Make zvol I/O requests processing asynchronous on FreeBSD side. Clone zvol threading logic and required module parameters from Linux side. Make zvol threadpool creation/destruction logic shared for both Linux and FreeBSD. Use OS physio routines in async mode, in case if zvol is exported in cdev mode. Disable volthreading zfs parameter on FreeBSD side by default. Signed-off-by: Fedor Uporov <[email protected]>
1 parent 3862ebb commit f6c23e2

File tree

8 files changed

+257
-129
lines changed

8 files changed

+257
-129
lines changed

include/sys/zvol_impl.h

+29-1
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,30 @@ typedef struct zvol_state {
6060
boolean_t zv_threading; /* volthreading property */
6161
} zvol_state_t;
6262

63+
/*
64+
* zvol taskqs
65+
*/
66+
typedef struct zv_taskq {
67+
uint_t tqs_cnt;
68+
taskq_t **tqs_taskq;
69+
} zv_taskq_t;
70+
71+
typedef struct zv_request_stack {
72+
zvol_state_t *zv;
73+
struct bio *bio;
74+
struct request *rq;
75+
} zv_request_t;
76+
77+
typedef struct zv_request_task {
78+
zv_request_t zvr;
79+
taskq_ent_t ent;
80+
} zv_request_task_t;
81+
82+
/*
83+
* Switch taskq at multiple of 512 MB offset. This can be set to a lower value
84+
* to utilize more threads for small files but may affect prefetch hits.
85+
*/
86+
#define ZVOL_TASKQ_OFFSET_SHIFT 29
6387

6488
extern krwlock_t zvol_state_lock;
6589
#define ZVOL_HT_SIZE 1024
@@ -70,6 +94,8 @@ extern zil_replay_func_t *const zvol_replay_vector[TX_MAX_TYPE];
7094
extern unsigned int zvol_volmode;
7195
extern unsigned int zvol_inhibit_dev;
7296

97+
extern zv_taskq_t zvol_taskqs;
98+
7399
/*
74100
* platform independent functions exported to platform code
75101
*/
@@ -86,14 +112,16 @@ void zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
86112
uint64_t size, boolean_t commit);
87113
int zvol_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf,
88114
struct lwb *lwb, zio_t *zio);
89-
int zvol_init_impl(void);
115+
int zvol_init_impl(uint32_t ncpus, uint32_t num_taskqs, uint32_t zvol_threads);
90116
void zvol_fini_impl(void);
91117
void zvol_wait_close(zvol_state_t *zv);
92118
int zvol_clone_range(zvol_state_handle_t *, uint64_t,
93119
zvol_state_handle_t *, uint64_t, uint64_t);
94120
void zvol_log_clone_range(zilog_t *zilog, dmu_tx_t *tx, int txtype,
95121
uint64_t off, uint64_t len, uint64_t blksz, const blkptr_t *bps,
96122
size_t nbps);
123+
zv_request_task_t *zv_request_task_create(zv_request_t zvr);
124+
void zv_request_task_free(zv_request_task_t *task);
97125

98126
/*
99127
* platform dependent functions exported to platform independent code

module/os/freebsd/zfs/zvol_os.c

+83-8
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@
9999
#include <geom/geom.h>
100100
#include <sys/zvol.h>
101101
#include <sys/zvol_impl.h>
102+
#include <cityhash.h>
102103

103104
#include "zfs_namecheck.h"
104105

@@ -146,8 +147,19 @@ SYSCTL_NODE(_vfs_zfs, OID_AUTO, vol, CTLFLAG_RW, 0, "ZFS VOLUME");
146147
SYSCTL_INT(_vfs_zfs_vol, OID_AUTO, mode, CTLFLAG_RWTUN, &zvol_volmode, 0,
147148
"Expose as GEOM providers (1), device files (2) or neither");
148149
static boolean_t zpool_on_zvol = B_FALSE;
150+
static unsigned int zvol_threads = 0;
151+
static unsigned int zvol_num_taskqs = 0;
152+
static unsigned int zvol_request_sync = 1;
149153
SYSCTL_INT(_vfs_zfs_vol, OID_AUTO, recursive, CTLFLAG_RWTUN, &zpool_on_zvol, 0,
150154
"Allow zpools to use zvols as vdevs (DANGEROUS)");
155+
SYSCTL_UINT(_vfs_zfs_vol, OID_AUTO, threads, CTLFLAG_RWTUN, &zvol_threads, 0,
156+
"Number of threads for I/O requests. Set to 0 to use all active CPUs");
157+
SYSCTL_UINT(_vfs_zfs_vol, OID_AUTO, taskqs, CTLFLAG_RWTUN, &zvol_num_taskqs, 0,
158+
"Number of zvol taskqs");
159+
SYSCTL_UINT(_vfs_zfs_vol, OID_AUTO, sync, CTLFLAG_RWTUN, &zvol_request_sync, 0,
160+
"Synchronously handle bio requests");
161+
162+
struct request {};
151163

152164
/*
153165
* Toggle unmap functionality.
@@ -660,9 +672,10 @@ zvol_cdev_kqfilter(struct cdev *dev, struct knote *kn)
660672
}
661673

662674
static void
663-
zvol_geom_bio_strategy(struct bio *bp)
675+
zvol_strategy_impl(zv_request_t *zvr)
664676
{
665677
zvol_state_t *zv;
678+
struct bio *bp;
666679
uint64_t off, volsize;
667680
size_t resid;
668681
char *addr;
@@ -673,11 +686,8 @@ zvol_geom_bio_strategy(struct bio *bp)
673686
boolean_t is_dumpified;
674687
boolean_t commit;
675688

676-
if (bp->bio_to)
677-
zv = bp->bio_to->private;
678-
else
679-
zv = bp->bio_dev->si_drv2;
680-
689+
bp = zvr->bio;
690+
zv = zvr->zv;
681691
if (zv == NULL) {
682692
error = SET_ERROR(ENXIO);
683693
goto out;
@@ -813,6 +823,58 @@ zvol_geom_bio_strategy(struct bio *bp)
813823
biofinish(bp, NULL, error);
814824
}
815825

826+
static void
827+
zvol_strategy_task(void *arg)
828+
{
829+
zv_request_task_t *task = arg;
830+
831+
zvol_strategy_impl(&task->zvr);
832+
zv_request_task_free(task);
833+
}
834+
835+
static void
836+
zvol_geom_bio_strategy(struct bio *bp)
837+
{
838+
zv_taskq_t *ztqs = &zvol_taskqs;
839+
zv_request_task_t *task;
840+
zvol_state_t *zv;
841+
uint64_t taskq_hash;
842+
uint32_t tq_idx;
843+
int error;
844+
845+
if (bp->bio_to)
846+
zv = bp->bio_to->private;
847+
else
848+
zv = bp->bio_dev->si_drv2;
849+
850+
if (zv == NULL) {
851+
error = SET_ERROR(ENXIO);
852+
if (bp->bio_to)
853+
g_io_deliver(bp, error);
854+
else
855+
biofinish(bp, NULL, error);
856+
857+
return;
858+
}
859+
860+
zv_request_t zvr = {
861+
.zv = zv,
862+
.bio = bp,
863+
};
864+
865+
if (zvol_request_sync || zv->zv_threading == B_FALSE) {
866+
zvol_strategy_impl(&zvr);
867+
return;
868+
}
869+
870+
taskq_hash = cityhash3((uintptr_t)zv, curcpu, bp->bio_offset >>
871+
ZVOL_TASKQ_OFFSET_SHIFT);
872+
tq_idx = taskq_hash % ztqs->tqs_cnt;
873+
task = zv_request_task_create(zvr);
874+
taskq_dispatch_ent(ztqs->tqs_taskq[tq_idx], zvol_strategy_task, task,
875+
0, &task->ent);
876+
}
877+
816878
/*
817879
* Character device mode implementation
818880
*/
@@ -839,6 +901,9 @@ zvol_cdev_read(struct cdev *dev, struct uio *uio_s, int ioflag)
839901
(zfs_uio_offset(&uio) < 0 || zfs_uio_offset(&uio) > volsize))
840902
return (SET_ERROR(EIO));
841903

904+
if (!zvol_request_sync && !zv->zv_threading)
905+
return (physread(dev, uio_s, ioflag));
906+
842907
rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER);
843908
ssize_t start_resid = zfs_uio_resid(&uio);
844909
lr = zfs_rangelock_enter(&zv->zv_rangelock, zfs_uio_offset(&uio),
@@ -886,6 +951,9 @@ zvol_cdev_write(struct cdev *dev, struct uio *uio_s, int ioflag)
886951
(zfs_uio_offset(&uio) < 0 || zfs_uio_offset(&uio) > volsize))
887952
return (SET_ERROR(EIO));
888953

954+
if (!zvol_request_sync && !zv->zv_threading)
955+
return (physwrite(dev, uio_s, ioflag));
956+
889957
ssize_t start_resid = zfs_uio_resid(&uio);
890958
commit = (ioflag & IO_SYNC) ||
891959
(zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS);
@@ -1385,6 +1453,7 @@ zvol_os_create_minor(const char *name)
13851453
uint64_t volsize;
13861454
uint64_t volmode, hash;
13871455
int error;
1456+
uint64_t volthreading;
13881457
bool replayed_zil = B_FALSE;
13891458

13901459
ZFS_LOG(1, "Creating ZVOL %s...", name);
@@ -1479,6 +1548,13 @@ zvol_os_create_minor(const char *name)
14791548
zv->zv_volsize = volsize;
14801549
zv->zv_objset = os;
14811550

1551+
/* Default */
1552+
zv->zv_threading = B_FALSE;
1553+
error = dsl_prop_get_integer(name, "volthreading", &volthreading,
1554+
NULL);
1555+
if (error == 0)
1556+
zv->zv_threading = volthreading;
1557+
14821558
ASSERT3P(zv->zv_kstat.dk_kstats, ==, NULL);
14831559
error = dataset_kstats_create(&zv->zv_kstat, zv->zv_objset);
14841560
if (error)
@@ -1606,8 +1682,7 @@ zvol_busy(void)
16061682
int
16071683
zvol_init(void)
16081684
{
1609-
zvol_init_impl();
1610-
return (0);
1685+
return (zvol_init_impl(mp_ncpus, zvol_num_taskqs, zvol_threads));
16111686
}
16121687

16131688
void

module/os/linux/zfs/zvol_os.c

+7-118
Original file line numberDiff line numberDiff line change
@@ -117,45 +117,8 @@ struct zvol_state_os {
117117
boolean_t use_blk_mq;
118118
};
119119

120-
typedef struct zv_taskq {
121-
uint_t tqs_cnt;
122-
taskq_t **tqs_taskq;
123-
} zv_taskq_t;
124-
static zv_taskq_t zvol_taskqs;
125120
static struct ida zvol_ida;
126121

127-
typedef struct zv_request_stack {
128-
zvol_state_t *zv;
129-
struct bio *bio;
130-
struct request *rq;
131-
} zv_request_t;
132-
133-
typedef struct zv_work {
134-
struct request *rq;
135-
struct work_struct work;
136-
} zv_work_t;
137-
138-
typedef struct zv_request_task {
139-
zv_request_t zvr;
140-
taskq_ent_t ent;
141-
} zv_request_task_t;
142-
143-
static zv_request_task_t *
144-
zv_request_task_create(zv_request_t zvr)
145-
{
146-
zv_request_task_t *task;
147-
task = kmem_alloc(sizeof (zv_request_task_t), KM_SLEEP);
148-
taskq_init_ent(&task->ent);
149-
task->zvr = zvr;
150-
return (task);
151-
}
152-
153-
static void
154-
zv_request_task_free(zv_request_task_t *task)
155-
{
156-
kmem_free(task, sizeof (*task));
157-
}
158-
159122
/*
160123
* This is called when a new block multiqueue request comes in. A request
161124
* contains one or more BIOs.
@@ -1793,59 +1756,17 @@ zvol_init(void)
17931756
{
17941757
int error;
17951758

1796-
/*
1797-
* zvol_threads is the module param the user passes in.
1798-
*
1799-
* zvol_actual_threads is what we use internally, since the user can
1800-
* pass zvol_thread = 0 to mean "use all the CPUs" (the default).
1801-
*/
1802-
static unsigned int zvol_actual_threads;
1759+
ida_init(&zvol_ida);
18031760

1804-
if (zvol_threads == 0) {
1805-
/*
1806-
* See dde9380a1 for why 32 was chosen here. This should
1807-
* probably be refined to be some multiple of the number
1808-
* of CPUs.
1809-
*/
1810-
zvol_actual_threads = MAX(num_online_cpus(), 32);
1811-
} else {
1812-
zvol_actual_threads = MIN(MAX(zvol_threads, 1), 1024);
1761+
error = zvol_init_impl(num_online_cpus(), zvol_num_taskqs,
1762+
zvol_threads);
1763+
if (error) {
1764+
printk(KERN_INFO "ZFS: zvol_init_impl() failed %d\n", error);
1765+
return (error);
18131766
}
18141767

1815-
/*
1816-
* Use atleast 32 zvol_threads but for many core system,
1817-
* prefer 6 threads per taskq, but no more taskqs
1818-
* than threads in them on large systems.
1819-
*
1820-
* taskq total
1821-
* cpus taskqs threads threads
1822-
* ------- ------- ------- -------
1823-
* 1 1 32 32
1824-
* 2 1 32 32
1825-
* 4 1 32 32
1826-
* 8 2 16 32
1827-
* 16 3 11 33
1828-
* 32 5 7 35
1829-
* 64 8 8 64
1830-
* 128 11 12 132
1831-
* 256 16 16 256
1832-
*/
1833-
zv_taskq_t *ztqs = &zvol_taskqs;
1834-
uint_t num_tqs = MIN(num_online_cpus(), zvol_num_taskqs);
1835-
if (num_tqs == 0) {
1836-
num_tqs = 1 + num_online_cpus() / 6;
1837-
while (num_tqs * num_tqs > zvol_actual_threads)
1838-
num_tqs--;
1839-
}
1840-
uint_t per_tq_thread = zvol_actual_threads / num_tqs;
1841-
if (per_tq_thread * num_tqs < zvol_actual_threads)
1842-
per_tq_thread++;
1843-
ztqs->tqs_cnt = num_tqs;
1844-
ztqs->tqs_taskq = kmem_alloc(num_tqs * sizeof (taskq_t *), KM_SLEEP);
18451768
error = register_blkdev(zvol_major, ZVOL_DRIVER);
18461769
if (error) {
1847-
kmem_free(ztqs->tqs_taskq, ztqs->tqs_cnt * sizeof (taskq_t *));
1848-
ztqs->tqs_taskq = NULL;
18491770
printk(KERN_INFO "ZFS: register_blkdev() failed %d\n", error);
18501771
return (error);
18511772
}
@@ -1864,47 +1785,15 @@ zvol_init(void)
18641785
1024);
18651786
}
18661787

1867-
for (uint_t i = 0; i < num_tqs; i++) {
1868-
char name[32];
1869-
(void) snprintf(name, sizeof (name), "%s_tq-%u",
1870-
ZVOL_DRIVER, i);
1871-
ztqs->tqs_taskq[i] = taskq_create(name, per_tq_thread,
1872-
maxclsyspri, per_tq_thread, INT_MAX,
1873-
TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
1874-
if (ztqs->tqs_taskq[i] == NULL) {
1875-
for (int j = i - 1; j >= 0; j--)
1876-
taskq_destroy(ztqs->tqs_taskq[j]);
1877-
unregister_blkdev(zvol_major, ZVOL_DRIVER);
1878-
kmem_free(ztqs->tqs_taskq, ztqs->tqs_cnt *
1879-
sizeof (taskq_t *));
1880-
ztqs->tqs_taskq = NULL;
1881-
return (-ENOMEM);
1882-
}
1883-
}
1884-
1885-
zvol_init_impl();
1886-
ida_init(&zvol_ida);
18871788
return (0);
18881789
}
18891790

18901791
void
18911792
zvol_fini(void)
18921793
{
1893-
zv_taskq_t *ztqs = &zvol_taskqs;
1894-
zvol_fini_impl();
18951794
unregister_blkdev(zvol_major, ZVOL_DRIVER);
18961795

1897-
if (ztqs->tqs_taskq == NULL) {
1898-
ASSERT3U(ztqs->tqs_cnt, ==, 0);
1899-
} else {
1900-
for (uint_t i = 0; i < ztqs->tqs_cnt; i++) {
1901-
ASSERT3P(ztqs->tqs_taskq[i], !=, NULL);
1902-
taskq_destroy(ztqs->tqs_taskq[i]);
1903-
}
1904-
kmem_free(ztqs->tqs_taskq, ztqs->tqs_cnt *
1905-
sizeof (taskq_t *));
1906-
ztqs->tqs_taskq = NULL;
1907-
}
1796+
zvol_fini_impl();
19081797

19091798
ida_destroy(&zvol_ida);
19101799
}

module/zcommon/zfs_prop.c

+6-1
Original file line numberDiff line numberDiff line change
@@ -643,7 +643,12 @@ zfs_prop_init(void)
643643
ZFS_TYPE_VOLUME, "512 to 128k, power of 2", "VOLBLOCK", B_FALSE,
644644
sfeatures);
645645
zprop_register_index(ZFS_PROP_VOLTHREADING, "volthreading",
646-
1, PROP_DEFAULT, ZFS_TYPE_VOLUME, "on | off", "zvol threading",
646+
#ifdef __FreeBSD__
647+
0,
648+
#else
649+
1,
650+
#endif
651+
PROP_DEFAULT, ZFS_TYPE_VOLUME, "on | off", "zvol threading",
647652
boolean_table, sfeatures);
648653
zprop_register_number(ZFS_PROP_USEDSNAP, "usedbysnapshots", 0,
649654
PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>",

0 commit comments

Comments
 (0)