432
437
mutex_init(&spa->spa_scrub_lock, NULL, MUTEX_DEFAULT, NULL);
433
438
mutex_init(&spa->spa_errlog_lock, NULL, MUTEX_DEFAULT, NULL);
434
439
mutex_init(&spa->spa_errlist_lock, NULL, MUTEX_DEFAULT, NULL);
435
mutex_init(&spa->spa_sync_bplist.bpl_lock, NULL, MUTEX_DEFAULT, NULL);
436
440
mutex_init(&spa->spa_history_lock, NULL, MUTEX_DEFAULT, NULL);
437
441
mutex_init(&spa->spa_props_lock, NULL, MUTEX_DEFAULT, NULL);
442
mutex_init(&spa->spa_suspend_lock, NULL, MUTEX_DEFAULT, NULL);
443
mutex_init(&spa->spa_vdev_top_lock, NULL, MUTEX_DEFAULT, NULL);
439
445
cv_init(&spa->spa_async_cv, NULL, CV_DEFAULT, NULL);
440
446
cv_init(&spa->spa_scrub_io_cv, NULL, CV_DEFAULT, NULL);
441
447
cv_init(&spa->spa_suspend_cv, NULL, CV_DEFAULT, NULL);
449
for (int t = 0; t < TXG_SIZE; t++)
450
bplist_init(&spa->spa_free_bplist[t]);
451
bplist_init(&spa->spa_deferred_bplist);
443
453
(void) strlcpy(spa->spa_name, name, sizeof (spa->spa_name));
444
454
spa->spa_state = POOL_STATE_UNINITIALIZED;
445
455
spa->spa_freeze_txg = UINT64_MAX;
446
456
spa->spa_final_txg = UINT64_MAX;
457
spa->spa_load_max_txg = UINT64_MAX;
448
459
refcount_create(&spa->spa_refcount);
449
460
spa_config_lock_init(spa);
451
462
avl_add(&spa_namespace_avl, spa);
453
mutex_init(&spa->spa_suspend_lock, NULL, MUTEX_DEFAULT, NULL);
456
465
* Set the alternate root, if there is one.
467
476
offsetof(spa_config_dirent_t, scd_link));
469
478
dp = kmem_zalloc(sizeof (spa_config_dirent_t), KM_SLEEP);
470
dp->scd_path = spa_strdup(spa_config_path);
479
dp->scd_path = altroot ? NULL : spa_strdup(spa_config_path);
471
480
list_insert_head(&spa->spa_config_list, dp);
483
VERIFY(nvlist_dup(config, &spa->spa_config, 0) == 0);
517
535
mutex_destroy(&spa->spa_scrub_lock);
518
536
mutex_destroy(&spa->spa_errlog_lock);
519
537
mutex_destroy(&spa->spa_errlist_lock);
520
mutex_destroy(&spa->spa_sync_bplist.bpl_lock);
521
538
mutex_destroy(&spa->spa_history_lock);
522
539
mutex_destroy(&spa->spa_props_lock);
523
540
mutex_destroy(&spa->spa_suspend_lock);
541
mutex_destroy(&spa->spa_vdev_top_lock);
525
543
kmem_free(spa, sizeof (spa_t));
835
847
spa_vdev_enter(spa_t *spa)
849
mutex_enter(&spa->spa_vdev_top_lock);
837
850
mutex_enter(&spa_namespace_lock);
851
return (spa_vdev_config_enter(spa));
855
* Internal implementation for spa_vdev_enter(). Used when a vdev
856
* operation requires multiple syncs (i.e. removing a device) while
857
* keeping the spa_namespace_lock held.
860
spa_vdev_config_enter(spa_t *spa)
862
ASSERT(MUTEX_HELD(&spa_namespace_lock));
839
864
spa_config_enter(spa, SCL_ALL, spa, RW_WRITER);
845
* Unlock the spa_t after adding or removing a vdev. Besides undoing the
846
* locking of spa_vdev_enter(), we also want make sure the transactions have
847
* synced to disk, and then update the global configuration cache with the new
870
* Used in combination with spa_vdev_config_enter() to allow the syncing
871
* of multiple transactions without releasing the spa_namespace_lock.
851
spa_vdev_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error)
874
spa_vdev_config_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error, char *tag)
876
ASSERT(MUTEX_HELD(&spa_namespace_lock));
853
878
int config_changed = B_FALSE;
855
880
ASSERT(txg > spa_last_synced_txg(spa));
867
892
if (error == 0 && !list_is_empty(&spa->spa_config_dirty_list)) {
868
893
dsl_pool_scrub_restart(spa->spa_dsl_pool);
869
894
config_changed = B_TRUE;
895
spa->spa_config_generation++;
899
* Verify the metaslab classes.
901
ASSERT(metaslab_class_validate(spa_normal_class(spa)) == 0);
902
ASSERT(metaslab_class_validate(spa_log_class(spa)) == 0);
872
904
spa_config_exit(spa, SCL_ALL, spa);
907
* Panic the system if the specified tag requires it. This
908
* is useful for ensuring that configurations are updated
911
if (zio_injection_enabled)
912
zio_handle_panic_injection(spa, tag, 0);
875
915
* Note: this txg_wait_synced() is important because it ensures
876
916
* that there won't be more than one config change per txg.
877
917
* This allows us to use the txg as the generation number.
892
932
if (config_changed)
893
933
spa_config_sync(spa, B_FALSE, B_TRUE);
937
* Unlock the spa_t after adding or removing a vdev. Besides undoing the
938
* locking of spa_vdev_enter(), we also want make sure the transactions have
939
* synced to disk, and then update the global configuration cache with the new
943
spa_vdev_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error)
945
spa_vdev_config_exit(spa, vd, txg, error, FTAG);
895
946
mutex_exit(&spa_namespace_lock);
947
mutex_exit(&spa->spa_vdev_top_lock);
901
953
* Lock the given spa_t for the purpose of changing vdev state.
904
spa_vdev_state_enter(spa_t *spa)
956
spa_vdev_state_enter(spa_t *spa, int oplocks)
906
spa_config_enter(spa, SCL_STATE_ALL, spa, RW_WRITER);
958
int locks = SCL_STATE_ALL | oplocks;
960
spa_config_enter(spa, locks, spa, RW_WRITER);
961
spa->spa_vdev_locks = locks;
910
965
spa_vdev_state_exit(spa_t *spa, vdev_t *vd, int error)
967
boolean_t config_changed = B_FALSE;
969
if (vd != NULL || error == 0)
970
vdev_dtl_reassess(vd ? vd->vdev_top : spa->spa_root_vdev,
913
974
vdev_state_dirty(vd->vdev_top);
975
config_changed = B_TRUE;
976
spa->spa_config_generation++;
915
spa_config_exit(spa, SCL_STATE_ALL, spa);
979
ASSERT3U(spa->spa_vdev_locks, >=, SCL_STATE_ALL);
980
spa_config_exit(spa, spa->spa_vdev_locks, spa);
918
983
* If anything changed, wait for it to sync. This ensures that,
1055
1129
return (r % range);
1133
spa_generate_guid(spa_t *spa)
1135
uint64_t guid = spa_get_random(-1ULL);
1138
while (guid == 0 || spa_guid_exists(spa_guid(spa), guid))
1139
guid = spa_get_random(-1ULL);
1141
while (guid == 0 || spa_guid_exists(guid, 0))
1142
guid = spa_get_random(-1ULL);
1059
sprintf_blkptr(char *buf, int len, const blkptr_t *bp)
1149
sprintf_blkptr(char *buf, const blkptr_t *bp)
1064
(void) snprintf(buf, len, "<NULL>");
1068
if (BP_IS_HOLE(bp)) {
1069
(void) snprintf(buf, len, "<hole>");
1073
(void) snprintf(buf, len, "[L%llu %s] %llxL/%llxP ",
1074
(u_longlong_t)BP_GET_LEVEL(bp),
1075
dmu_ot[BP_GET_TYPE(bp)].ot_name,
1076
(u_longlong_t)BP_GET_LSIZE(bp),
1077
(u_longlong_t)BP_GET_PSIZE(bp));
1079
for (d = 0; d < BP_GET_NDVAS(bp); d++) {
1080
const dva_t *dva = &bp->blk_dva[d];
1081
(void) snprintf(buf + strlen(buf), len - strlen(buf),
1082
"DVA[%d]=<%llu:%llx:%llx> ", d,
1083
(u_longlong_t)DVA_GET_VDEV(dva),
1084
(u_longlong_t)DVA_GET_OFFSET(dva),
1085
(u_longlong_t)DVA_GET_ASIZE(dva));
1088
(void) snprintf(buf + strlen(buf), len - strlen(buf),
1089
"%s %s %s %s birth=%llu fill=%llu cksum=%llx:%llx:%llx:%llx",
1090
zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_name,
1091
zio_compress_table[BP_GET_COMPRESS(bp)].ci_name,
1092
BP_GET_BYTEORDER(bp) == 0 ? "BE" : "LE",
1093
BP_IS_GANG(bp) ? "gang" : "contiguous",
1094
(u_longlong_t)bp->blk_birth,
1095
(u_longlong_t)bp->blk_fill,
1096
(u_longlong_t)bp->blk_cksum.zc_word[0],
1097
(u_longlong_t)bp->blk_cksum.zc_word[1],
1098
(u_longlong_t)bp->blk_cksum.zc_word[2],
1099
(u_longlong_t)bp->blk_cksum.zc_word[3]);
1152
char *checksum = NULL;
1153
char *compress = NULL;
1156
type = dmu_ot[BP_GET_TYPE(bp)].ot_name;
1157
checksum = zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_name;
1158
compress = zio_compress_table[BP_GET_COMPRESS(bp)].ci_name;
1161
SPRINTF_BLKPTR(snprintf, ' ', buf, bp, type, checksum, compress);
1202
1264
return (spa->spa_first_txg);
1268
spa_syncing_txg(spa_t *spa)
1270
return (spa->spa_syncing_txg);
1206
1274
spa_state(spa_t *spa)
1208
1276
return (spa->spa_state);
1280
spa_load_state(spa_t *spa)
1282
return (spa->spa_load_state);
1212
1286
spa_freeze_txg(spa_t *spa)
1214
1288
return (spa->spa_freeze_txg);
1218
* Return how much space is allocated in the pool (ie. sum of all asize)
1221
spa_get_alloc(spa_t *spa)
1223
return (spa->spa_root_vdev->vdev_stat.vs_alloc);
1227
* Return how much (raid-z inflated) space there is in the pool.
1230
spa_get_space(spa_t *spa)
1232
return (spa->spa_root_vdev->vdev_stat.vs_space);
1236
* Return the amount of raid-z-deflated space in the pool.
1239
spa_get_dspace(spa_t *spa)
1241
if (spa->spa_deflate)
1242
return (spa->spa_root_vdev->vdev_stat.vs_dspace);
1244
return (spa->spa_root_vdev->vdev_stat.vs_space);
1249
1293
spa_get_asize(spa_t *spa, uint64_t lsize)
1252
* For now, the worst case is 512-byte RAID-Z blocks, in which
1253
* case the space requirement is exactly 2x; so just assume that.
1254
* Add to this the fact that we can have up to 3 DVAs per bp, and
1255
* we have to multiply by a total of 6x.
1296
* The worst case is single-sector max-parity RAID-Z blocks, in which
1297
* case the space requirement is exactly (VDEV_RAIDZ_MAXPARITY + 1)
1298
* times the size; so just assume that. Add to this the fact that
1299
* we can have up to 3 DVAs per bp, and one more factor of 2 because
1300
* the block may be dittoed with up to 3 DVAs by ddt_sync().
1302
return (lsize * (VDEV_RAIDZ_MAXPARITY + 1) * SPA_DVAS_PER_BP * 2);
1306
spa_get_dspace(spa_t *spa)
1308
return (spa->spa_dspace);
1312
spa_update_dspace(spa_t *spa)
1314
spa->spa_dspace = metaslab_class_get_dspace(spa_normal_class(spa)) +
1315
ddt_get_dedup_dspace(spa);
1296
bp_get_dasize(spa_t *spa, const blkptr_t *bp)
1300
if (!spa->spa_deflate)
1301
return (BP_GET_ASIZE(bp));
1372
dva_get_dsize_sync(spa_t *spa, const dva_t *dva)
1374
uint64_t asize = DVA_GET_ASIZE(dva);
1375
uint64_t dsize = asize;
1377
ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0);
1379
if (asize != 0 && spa->spa_deflate) {
1380
vdev_t *vd = vdev_lookup_top(spa, DVA_GET_VDEV(dva));
1381
dsize = (asize >> SPA_MINBLOCKSHIFT) * vd->vdev_deflate_ratio;
1388
bp_get_dsize_sync(spa_t *spa, const blkptr_t *bp)
1392
for (int d = 0; d < SPA_DVAS_PER_BP; d++)
1393
dsize += dva_get_dsize_sync(spa, &bp->blk_dva[d]);
1399
bp_get_dsize(spa_t *spa, const blkptr_t *bp)
1303
1403
spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
1304
for (i = 0; i < SPA_DVAS_PER_BP; i++) {
1306
vdev_lookup_top(spa, DVA_GET_VDEV(&bp->blk_dva[i]));
1308
sz += (DVA_GET_ASIZE(&bp->blk_dva[i]) >>
1309
SPA_MINBLOCKSHIFT) * vd->vdev_deflate_ratio;
1405
for (int d = 0; d < SPA_DVAS_PER_BP; d++)
1406
dsize += dva_get_dsize_sync(spa, &bp->blk_dva[d]);
1311
1408
spa_config_exit(spa, SCL_VDEV, FTAG);