2
* Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
3
* Copyright (C) 2004-2012 Red Hat, Inc. All rights reserved.
5
* This file is part of LVM2.
7
* This copyrighted material is made available to anyone wishing to use,
8
* modify, copy, or redistribute it subject to the terms and conditions
9
* of the GNU Lesser General Public License v.2.1.
11
* You should have received a copy of the GNU Lesser General Public License
12
* along with this program; if not, write to the Free Software Foundation,
13
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
#include "lvm-string.h"
21
#include "toolcontext.h"
38
/* FIXME: remove RAID_METADATA_AREA_LEN macro after defining 'raid_log_extents'*/
39
#define RAID_METADATA_AREA_LEN 1
41
/* FIXME These ended up getting used differently from first intended. Refactor. */
42
#define A_CONTIGUOUS 0x01
44
#define A_CLING_BY_TAGS 0x04
45
#define A_CLING_TO_ALLOCED 0x08 /* Only for ALLOC_NORMAL */
46
#define A_CAN_SPLIT 0x10
49
* Constant parameters during a single allocation attempt.
53
unsigned flags; /* Holds A_* */
54
struct lv_segment *prev_lvseg;
55
uint32_t extents_still_needed;
59
* Holds varying state of each allocation attempt.
62
struct pv_area_used *areas;
64
uint32_t log_area_count_still_needed; /* Number of areas still needing to be allocated for the log */
65
uint32_t allocated; /* Total number of extents allocated so far */
73
int add_seg_to_segs_using_this_lv(struct logical_volume *lv,
74
struct lv_segment *seg)
78
dm_list_iterate_items(sl, &lv->segs_using_this_lv) {
85
log_very_verbose("Adding %s:%" PRIu32 " as an user of %s",
86
seg->lv->name, seg->le, lv->name);
88
if (!(sl = dm_pool_zalloc(lv->vg->vgmem, sizeof(*sl)))) {
89
log_error("Failed to allocate segment list");
95
dm_list_add(&lv->segs_using_this_lv, &sl->list);
100
int remove_seg_from_segs_using_this_lv(struct logical_volume *lv,
101
struct lv_segment *seg)
105
dm_list_iterate_items(sl, &lv->segs_using_this_lv) {
111
log_very_verbose("%s:%" PRIu32 " is no longer a user "
112
"of %s", seg->lv->name, seg->le,
114
dm_list_del(&sl->list);
123
* This is a function specialized for the common case where there is
124
* only one segment which uses the LV.
125
* e.g. the LV is a layer inserted by insert_layer_for_lv().
127
* In general, walk through lv->segs_using_this_lv.
129
struct lv_segment *get_only_segment_using_this_lv(struct logical_volume *lv)
133
if (dm_list_size(&lv->segs_using_this_lv) != 1) {
134
log_error("%s is expected to have only one segment using it, "
135
"while it has %d", lv->name,
136
dm_list_size(&lv->segs_using_this_lv));
140
dm_list_iterate_items(sl, &lv->segs_using_this_lv)
141
break; /* first item */
143
if (sl->count != 1) {
144
log_error("%s is expected to have only one segment using it, "
145
"while %s:%" PRIu32 " uses it %d times",
146
lv->name, sl->seg->lv->name, sl->seg->le, sl->count);
154
* PVs used by a segment of an LV
159
struct dm_list pvs; /* struct pv_list */
165
static struct seg_pvs *_find_seg_pvs_by_le(struct dm_list *list, uint32_t le)
167
struct seg_pvs *spvs;
169
dm_list_iterate_items(spvs, list)
170
if (le >= spvs->le && le < spvs->le + spvs->len)
177
* Find first unused LV number.
179
uint32_t find_free_lvnum(struct logical_volume *lv)
181
int lvnum_used[MAX_RESTRICTED_LVS + 1];
186
memset(&lvnum_used, 0, sizeof(lvnum_used));
188
dm_list_iterate_items(lvl, &lv->vg->lvs) {
189
lvnum = lvnum_from_lvid(&lvl->lv->lvid);
190
if (lvnum <= MAX_RESTRICTED_LVS)
191
lvnum_used[lvnum] = 1;
194
while (lvnum_used[i])
197
/* FIXME What if none are free? */
203
* All lv_segments get created here.
205
struct lv_segment *alloc_lv_segment(const struct segment_type *segtype,
206
struct logical_volume *lv,
207
uint32_t le, uint32_t len,
209
uint32_t stripe_size,
210
struct logical_volume *log_lv,
211
struct logical_volume *thin_pool_lv,
215
uint32_t region_size,
216
uint32_t extents_copied,
217
struct lv_segment *pvmove_source_seg)
219
struct lv_segment *seg;
220
struct dm_pool *mem = lv->vg->vgmem;
221
uint32_t areas_sz = area_count * sizeof(*seg->areas);
224
log_error(INTERNAL_ERROR "alloc_lv_segment: Missing segtype.");
228
if (!(seg = dm_pool_zalloc(mem, sizeof(*seg))))
231
if (!(seg->areas = dm_pool_zalloc(mem, areas_sz))) {
232
dm_pool_free(mem, seg);
236
if (segtype_is_raid(segtype) &&
237
!(seg->meta_areas = dm_pool_zalloc(mem, areas_sz))) {
238
dm_pool_free(mem, seg); /* frees everything alloced since seg */
242
seg->segtype = segtype;
246
seg->status = status;
247
seg->stripe_size = stripe_size;
248
seg->area_count = area_count;
249
seg->area_len = area_len;
250
seg->chunk_size = chunk_size;
251
seg->region_size = region_size;
252
seg->extents_copied = extents_copied;
253
seg->pvmove_source_seg = pvmove_source_seg;
254
dm_list_init(&seg->tags);
255
dm_list_init(&seg->thin_messages);
258
/* If this thin volume, thin snapshot is being created */
259
if (lv_is_thin_volume(thin_pool_lv)) {
260
seg->transaction_id = first_seg(first_seg(thin_pool_lv)->pool_lv)->transaction_id;
261
if (!attach_pool_lv(seg, first_seg(thin_pool_lv)->pool_lv, thin_pool_lv))
264
seg->transaction_id = first_seg(thin_pool_lv)->transaction_id;
265
if (!attach_pool_lv(seg, thin_pool_lv, NULL))
270
if (log_lv && !attach_mirror_log(seg, log_lv))
276
struct lv_segment *alloc_snapshot_seg(struct logical_volume *lv,
277
uint64_t status, uint32_t old_le_count)
279
struct lv_segment *seg;
280
const struct segment_type *segtype;
282
segtype = get_segtype_from_string(lv->vg->cmd, "snapshot");
284
log_error("Failed to find snapshot segtype");
288
if (!(seg = alloc_lv_segment(segtype, lv, old_le_count,
289
lv->le_count - old_le_count, status, 0,
290
NULL, NULL, 0, lv->le_count - old_le_count,
292
log_error("Couldn't allocate new snapshot segment.");
296
dm_list_add(&lv->segments, &seg->list);
297
lv->status |= VIRTUAL;
302
int release_lv_segment_area(struct lv_segment *seg, uint32_t s,
303
uint32_t area_reduction)
305
if (seg_type(seg, s) == AREA_UNASSIGNED)
308
if (seg_type(seg, s) == AREA_PV) {
309
if (!release_pv_segment(seg_pvseg(seg, s), area_reduction))
311
if (seg->area_len == area_reduction)
312
seg_type(seg, s) = AREA_UNASSIGNED;
316
if ((seg_lv(seg, s)->status & MIRROR_IMAGE) ||
317
(seg_lv(seg, s)->status & THIN_POOL_DATA)) {
318
if (!lv_reduce(seg_lv(seg, s), area_reduction))
319
return_0; /* FIXME: any upper level reporting */
323
if (seg_lv(seg, s)->status & RAID_IMAGE) {
325
* FIXME: Use lv_reduce not lv_remove
326
* We use lv_remove for now, because I haven't figured out
327
* why lv_reduce won't remove the LV.
328
lv_reduce(seg_lv(seg, s), area_reduction);
330
if (area_reduction != seg->area_len) {
331
log_error("Unable to reduce RAID LV - operation not implemented.");
334
if (!lv_remove(seg_lv(seg, s))) {
335
log_error("Failed to remove RAID image %s",
336
seg_lv(seg, s)->name);
341
/* Remove metadata area if image has been removed */
342
if (area_reduction == seg->area_len) {
343
if (!lv_reduce(seg_metalv(seg, s),
344
seg_metalv(seg, s)->le_count)) {
345
log_error("Failed to remove RAID meta-device %s",
346
seg_metalv(seg, s)->name);
353
if (area_reduction == seg->area_len) {
354
log_very_verbose("Remove %s:%" PRIu32 "[%" PRIu32 "] from "
355
"the top of LV %s:%" PRIu32,
356
seg->lv->name, seg->le, s,
357
seg_lv(seg, s)->name, seg_le(seg, s));
359
remove_seg_from_segs_using_this_lv(seg_lv(seg, s), seg);
360
seg_lv(seg, s) = NULL;
362
seg_type(seg, s) = AREA_UNASSIGNED;
369
* Move a segment area from one segment to another
371
int move_lv_segment_area(struct lv_segment *seg_to, uint32_t area_to,
372
struct lv_segment *seg_from, uint32_t area_from)
374
struct physical_volume *pv;
375
struct logical_volume *lv;
378
switch (seg_type(seg_from, area_from)) {
380
pv = seg_pv(seg_from, area_from);
381
pe = seg_pe(seg_from, area_from);
383
if (!release_lv_segment_area(seg_from, area_from, seg_from->area_len))
386
if (!release_lv_segment_area(seg_to, area_to, seg_to->area_len))
389
if (!set_lv_segment_area_pv(seg_to, area_to, pv, pe))
395
lv = seg_lv(seg_from, area_from);
396
le = seg_le(seg_from, area_from);
398
if (!release_lv_segment_area(seg_from, area_from, seg_from->area_len))
401
if (!release_lv_segment_area(seg_to, area_to, seg_to->area_len))
404
if (!set_lv_segment_area_lv(seg_to, area_to, lv, le, 0))
409
case AREA_UNASSIGNED:
410
if (!release_lv_segment_area(seg_to, area_to, seg_to->area_len))
418
* Link part of a PV to an LV segment.
420
int set_lv_segment_area_pv(struct lv_segment *seg, uint32_t area_num,
421
struct physical_volume *pv, uint32_t pe)
423
seg->areas[area_num].type = AREA_PV;
425
if (!(seg_pvseg(seg, area_num) =
426
assign_peg_to_lvseg(pv, pe, seg->area_len, seg, area_num)))
433
* Link one LV segment to another. Assumes sizes already match.
435
int set_lv_segment_area_lv(struct lv_segment *seg, uint32_t area_num,
436
struct logical_volume *lv, uint32_t le,
439
log_very_verbose("Stack %s:%" PRIu32 "[%" PRIu32 "] on LV %s:%" PRIu32,
440
seg->lv->name, seg->le, area_num, lv->name, le);
442
if (status & RAID_META) {
443
seg->meta_areas[area_num].type = AREA_LV;
444
seg_metalv(seg, area_num) = lv;
446
log_error(INTERNAL_ERROR "Meta le != 0");
449
seg_metale(seg, area_num) = 0;
451
seg->areas[area_num].type = AREA_LV;
452
seg_lv(seg, area_num) = lv;
453
seg_le(seg, area_num) = le;
455
lv->status |= status;
457
if (!add_seg_to_segs_using_this_lv(lv, seg))
464
* Prepare for adding parallel areas to an existing segment.
466
static int _lv_segment_add_areas(struct logical_volume *lv,
467
struct lv_segment *seg,
468
uint32_t new_area_count)
470
struct lv_segment_area *newareas;
471
uint32_t areas_sz = new_area_count * sizeof(*newareas);
473
if (!(newareas = dm_pool_zalloc(lv->vg->cmd->mem, areas_sz)))
476
memcpy(newareas, seg->areas, seg->area_count * sizeof(*seg->areas));
478
seg->areas = newareas;
479
seg->area_count = new_area_count;
485
* Reduce the size of an lv_segment. New size can be zero.
487
static int _lv_segment_reduce(struct lv_segment *seg, uint32_t reduction)
489
uint32_t area_reduction, s;
491
/* Caller must ensure exact divisibility */
492
if (seg_is_striped(seg)) {
493
if (reduction % seg->area_count) {
494
log_error("Segment extent reduction %" PRIu32
495
" not divisible by #stripes %" PRIu32,
496
reduction, seg->area_count);
499
area_reduction = (reduction / seg->area_count);
501
area_reduction = reduction;
503
for (s = 0; s < seg->area_count; s++)
504
if (!release_lv_segment_area(seg, s, area_reduction))
507
seg->len -= reduction;
508
seg->area_len -= area_reduction;
514
* Entry point for all LV reductions in size.
516
static int _lv_reduce(struct logical_volume *lv, uint32_t extents, int delete)
518
struct lv_segment *seg;
519
uint32_t count = extents;
522
dm_list_iterate_back_items(seg, &lv->segments) {
526
if (seg->len <= count) {
527
/* remove this segment completely */
528
/* FIXME Check this is safe */
529
if (seg->log_lv && !lv_remove(seg->log_lv))
532
if (seg->metadata_lv && !lv_remove(seg->metadata_lv))
536
if (!detach_pool_lv(seg))
540
dm_list_del(&seg->list);
541
reduction = seg->len;
545
if (!_lv_segment_reduce(seg, reduction))
550
lv->le_count -= extents;
551
lv->size = (uint64_t) lv->le_count * lv->vg->extent_size;
556
/* Remove the LV if it is now empty */
557
if (!lv->le_count && !unlink_lv_from_vg(lv))
559
else if (lv->vg->fid->fmt->ops->lv_setup &&
560
!lv->vg->fid->fmt->ops->lv_setup(lv->vg->fid, lv))
569
int lv_empty(struct logical_volume *lv)
571
return _lv_reduce(lv, lv->le_count, 0);
575
* Empty an LV and add error segment.
577
int replace_lv_with_error_segment(struct logical_volume *lv)
579
uint32_t len = lv->le_count;
581
if (len && !lv_empty(lv))
584
/* Minimum size required for a table. */
589
* Since we are replacing the whatever-was-there with
590
* an error segment, we should also clear any flags
591
* that suggest it is anything other than "error".
593
lv->status &= ~(MIRRORED|PVMOVE);
595
/* FIXME: Should we bug if we find a log_lv attached? */
597
if (!lv_add_virtual_segment(lv, 0, len, get_segtype_from_string(lv->vg->cmd, "error"), NULL))
604
* Remove given number of extents from LV.
606
int lv_reduce(struct logical_volume *lv, uint32_t extents)
608
return _lv_reduce(lv, extents, 1);
612
* Completely remove an LV.
614
int lv_remove(struct logical_volume *lv)
617
if (!lv_reduce(lv, lv->le_count))
624
* A set of contiguous physical extents allocated
626
struct alloced_area {
629
struct physical_volume *pv;
635
* Details of an allocation attempt
637
struct alloc_handle {
638
struct cmd_context *cmd;
641
alloc_policy_t alloc; /* Overall policy */
642
uint32_t new_extents; /* Number of new extents required */
643
uint32_t area_count; /* Number of parallel areas */
644
uint32_t parity_count; /* Adds to area_count, but not area_multiple */
645
uint32_t area_multiple; /* seg->len = area_len * area_multiple */
646
uint32_t log_area_count; /* Number of parallel logs */
647
uint32_t metadata_area_count; /* Number of parallel metadata areas */
648
uint32_t log_len; /* Length of log/metadata_area */
649
uint32_t region_size; /* Mirror region size */
650
uint32_t total_area_len; /* Total number of parallel extents */
652
unsigned maximise_cling;
653
unsigned mirror_logs_separate; /* Force mirror logs on separate PVs? */
656
* RAID devices require a metadata area that accompanies each
657
* device. During initial creation, it is best to look for space
658
* that is new_extents + log_len and then split that between two
659
* allocated areas when found. 'alloc_and_split_meta' indicates
660
* that this is the desired dynamic.
662
unsigned alloc_and_split_meta;
664
const struct dm_config_node *cling_tag_list_cn;
666
struct dm_list *parallel_areas; /* PVs to avoid */
669
* Contains area_count lists of areas allocated to data stripes
670
* followed by log_area_count lists of areas allocated to log stripes.
672
struct dm_list alloced_areas[0];
675
static uint32_t _calc_area_multiple(const struct segment_type *segtype,
676
const uint32_t area_count, const uint32_t stripes)
682
if (segtype_is_striped(segtype))
685
/* Mirrored stripes */
694
* Returns log device size in extents, algorithm from kernel code
697
static uint32_t mirror_log_extents(uint32_t region_size, uint32_t pe_size, uint32_t area_len)
699
size_t area_size, bitset_size, log_size, region_count;
701
area_size = area_len * pe_size;
702
region_count = dm_div_up(area_size, region_size);
704
/* Work out how many "unsigned long"s we need to hold the bitset. */
705
bitset_size = dm_round_up(region_count, sizeof(uint32_t) << BYTE_SHIFT);
706
bitset_size >>= BYTE_SHIFT;
708
/* Log device holds both header and bitset. */
709
log_size = dm_round_up((MIRROR_LOG_OFFSET << SECTOR_SHIFT) + bitset_size, 1 << SECTOR_SHIFT);
710
log_size >>= SECTOR_SHIFT;
711
log_size = dm_div_up(log_size, pe_size);
714
* Kernel requires a mirror to be at least 1 region large. So,
715
* if our mirror log is itself a mirror, it must be at least
716
* 1 region large. This restriction may not be necessary for
717
* non-mirrored logs, but we apply the rule anyway.
719
* (The other option is to make the region size of the log
720
* mirror smaller than the mirror it is acting as a log for,
721
* but that really complicates things. It's much easier to
722
* keep the region_size the same for both.)
724
return (log_size > (region_size / pe_size)) ? log_size :
725
(region_size / pe_size);
729
* Preparation for a specific allocation attempt
730
* stripes and mirrors refer to the parallel areas used for data.
731
* If log_area_count > 1 it is always mirrored (not striped).
733
static struct alloc_handle *_alloc_init(struct cmd_context *cmd,
735
const struct segment_type *segtype,
736
alloc_policy_t alloc,
737
uint32_t new_extents,
740
uint32_t metadata_area_count,
741
uint32_t extent_size,
742
uint32_t region_size,
743
struct dm_list *parallel_areas)
745
struct alloc_handle *ah;
746
uint32_t s, area_count, alloc_count, parity_count;
749
/* FIXME Caller should ensure this */
750
if (mirrors && !stripes)
753
if (segtype_is_virtual(segtype))
755
else if (mirrors > 1)
756
area_count = mirrors * stripes;
758
area_count = stripes;
763
* It is a requirement that RAID 4/5/6 are created with a number of
764
* stripes that is greater than the number of parity devices. (e.g
765
* RAID4/5 must have at least 2 stripes and RAID6 must have at least
766
* 3.) It is also a constraint that, when replacing individual devices
767
* in a RAID 4/5/6 array, no more devices can be replaced than
768
* there are parity devices. (Otherwise, there would not be enough
769
* redundancy to maintain the array.) Understanding these two
770
* constraints allows us to infer whether the caller of this function
771
* is intending to allocate an entire array or just replacement
772
* component devices. In the former case, we must account for the
773
* necessary parity_count. In the later case, we do not need to
774
* account for the extra parity devices because the array already
775
* exists and they only want replacement drives.
777
parity_count = (area_count <= segtype->parity_devs) ? 0 :
778
segtype->parity_devs;
779
alloc_count = area_count + parity_count;
780
if (segtype_is_raid(segtype) && metadata_area_count)
781
/* RAID has a meta area for each device */
784
/* mirrors specify their exact log count */
785
alloc_count += metadata_area_count;
787
size += sizeof(ah->alloced_areas[0]) * alloc_count;
789
if (!(ah = dm_pool_zalloc(mem, size))) {
790
log_error("allocation handle allocation failed");
796
if (segtype_is_virtual(segtype))
799
if (!(area_count + metadata_area_count)) {
800
log_error(INTERNAL_ERROR "_alloc_init called for non-virtual segment with no disk space.");
804
if (!(ah->mem = dm_pool_create("allocation", 1024))) {
805
log_error("allocation pool creation failed");
809
if (mirrors || stripes)
810
ah->new_extents = new_extents;
813
ah->area_count = area_count;
814
ah->parity_count = parity_count;
815
ah->region_size = region_size;
817
ah->area_multiple = _calc_area_multiple(segtype, area_count, stripes);
818
ah->mirror_logs_separate = find_config_tree_bool(cmd, "allocation/mirror_logs_require_separate_pvs",
819
DEFAULT_MIRROR_LOGS_REQUIRE_SEPARATE_PVS);
821
if (segtype_is_raid(segtype)) {
822
if (metadata_area_count) {
823
if (metadata_area_count != area_count)
824
log_error(INTERNAL_ERROR
825
"Bad metadata_area_count");
826
ah->metadata_area_count = area_count;
827
ah->alloc_and_split_meta = 1;
829
ah->log_len = RAID_METADATA_AREA_LEN;
832
* We need 'log_len' extents for each
833
* RAID device's metadata_area
835
ah->new_extents += (ah->log_len * ah->area_multiple);
837
ah->log_area_count = 0;
840
} else if (segtype_is_thin_pool(segtype)) {
841
ah->log_area_count = metadata_area_count;
842
/* thin_pool uses region_size to pass metadata size in extents */
843
ah->log_len = ah->region_size;
845
ah->mirror_logs_separate =
846
find_config_tree_bool(cmd, "allocation/thin_pool_metadata_require_separate_pvs",
847
DEFAULT_THIN_POOL_METADATA_REQUIRE_SEPARATE_PVS);
849
ah->log_area_count = metadata_area_count;
850
ah->log_len = !metadata_area_count ? 0 :
851
mirror_log_extents(ah->region_size, extent_size,
852
new_extents / ah->area_multiple);
855
for (s = 0; s < alloc_count; s++)
856
dm_list_init(&ah->alloced_areas[s]);
858
ah->parallel_areas = parallel_areas;
860
ah->cling_tag_list_cn = find_config_tree_node(cmd, "allocation/cling_tag_list");
862
ah->maximise_cling = find_config_tree_bool(cmd, "allocation/maximise_cling", DEFAULT_MAXIMISE_CLING);
867
void alloc_destroy(struct alloc_handle *ah)
870
dm_pool_destroy(ah->mem);
873
/* Is there enough total space or should we give up immediately? */
874
static int _sufficient_pes_free(struct alloc_handle *ah, struct dm_list *pvms,
875
uint32_t allocated, uint32_t extents_still_needed)
877
uint32_t area_extents_needed = (extents_still_needed - allocated) * ah->area_count / ah->area_multiple;
878
uint32_t parity_extents_needed = (extents_still_needed - allocated) * ah->parity_count / ah->area_multiple;
879
uint32_t metadata_extents_needed = ah->metadata_area_count * RAID_METADATA_AREA_LEN; /* One each */
880
uint32_t total_extents_needed = area_extents_needed + parity_extents_needed + metadata_extents_needed;
881
uint32_t free_pes = pv_maps_size(pvms);
883
if (total_extents_needed > free_pes) {
884
log_error("Insufficient free space: %" PRIu32 " extents needed,"
885
" but only %" PRIu32 " available",
886
total_extents_needed, free_pes);
893
/* For striped mirrors, all the areas are counted, through the mirror layer */
894
static uint32_t _stripes_per_mimage(struct lv_segment *seg)
896
struct lv_segment *last_lvseg;
898
if (seg_is_mirrored(seg) && seg->area_count && seg_type(seg, 0) == AREA_LV) {
899
last_lvseg = dm_list_item(dm_list_last(&seg_lv(seg, 0)->segments), struct lv_segment);
900
if (seg_is_striped(last_lvseg))
901
return last_lvseg->area_count;
907
static void _init_alloc_parms(struct alloc_handle *ah, struct alloc_parms *alloc_parms, alloc_policy_t alloc,
908
struct lv_segment *prev_lvseg, unsigned can_split,
909
uint32_t allocated, uint32_t extents_still_needed)
911
alloc_parms->alloc = alloc;
912
alloc_parms->prev_lvseg = prev_lvseg;
913
alloc_parms->flags = 0;
914
alloc_parms->extents_still_needed = extents_still_needed;
916
/* Are there any preceding segments we must follow on from? */
917
if (alloc_parms->prev_lvseg) {
918
if (alloc_parms->alloc == ALLOC_CONTIGUOUS)
919
alloc_parms->flags |= A_CONTIGUOUS;
920
else if (alloc_parms->alloc == ALLOC_CLING)
921
alloc_parms->flags |= A_CLING;
922
else if (alloc_parms->alloc == ALLOC_CLING_BY_TAGS) {
923
alloc_parms->flags |= A_CLING;
924
alloc_parms->flags |= A_CLING_BY_TAGS;
929
* For normal allocations, if any extents have already been found
930
* for allocation, prefer to place further extents on the same disks as
931
* have already been used.
933
if (ah->maximise_cling && alloc_parms->alloc == ALLOC_NORMAL && allocated != alloc_parms->extents_still_needed)
934
alloc_parms->flags |= A_CLING_TO_ALLOCED;
937
alloc_parms->flags |= A_CAN_SPLIT;
940
static int _log_parallel_areas(struct dm_pool *mem, struct dm_list *parallel_areas)
942
struct seg_pvs *spvs;
949
dm_list_iterate_items(spvs, parallel_areas) {
950
if (!dm_pool_begin_object(mem, 256)) {
951
log_error("dm_pool_begin_object failed");
955
dm_list_iterate_items(pvl, &spvs->pvs) {
956
if (!dm_pool_grow_object(mem, pv_dev_name(pvl->pv), strlen(pv_dev_name(pvl->pv)))) {
957
log_error("dm_pool_grow_object failed");
958
dm_pool_abandon_object(mem);
961
if (!dm_pool_grow_object(mem, " ", 1)) {
962
log_error("dm_pool_grow_object failed");
963
dm_pool_abandon_object(mem);
968
if (!dm_pool_grow_object(mem, "\0", 1)) {
969
log_error("dm_pool_grow_object failed");
970
dm_pool_abandon_object(mem);
974
pvnames = dm_pool_end_object(mem);
975
log_debug("Parallel PVs at LE %" PRIu32 " length %" PRIu32 ": %s",
976
spvs->le, spvs->len, pvnames);
977
dm_pool_free(mem, pvnames);
983
static int _setup_alloced_segment(struct logical_volume *lv, uint64_t status,
985
uint32_t stripe_size,
986
const struct segment_type *segtype,
987
struct alloced_area *aa,
988
uint32_t region_size)
990
uint32_t s, extents, area_multiple;
991
struct lv_segment *seg;
993
area_multiple = _calc_area_multiple(segtype, area_count, 0);
995
if (!(seg = alloc_lv_segment(segtype, lv, lv->le_count,
996
aa[0].len * area_multiple,
997
status, stripe_size, NULL, NULL,
999
aa[0].len, 0u, region_size, 0u, NULL))) {
1000
log_error("Couldn't allocate new LV segment.");
1004
for (s = 0; s < area_count; s++)
1005
if (!set_lv_segment_area_pv(seg, s, aa[s].pv, aa[s].pe))
1008
dm_list_add(&lv->segments, &seg->list);
1010
extents = aa[0].len * area_multiple;
1011
lv->le_count += extents;
1012
lv->size += (uint64_t) extents *lv->vg->extent_size;
1014
if (segtype_is_mirrored(segtype))
1015
lv->status |= MIRRORED;
1020
static int _setup_alloced_segments(struct logical_volume *lv,
1021
struct dm_list *alloced_areas,
1022
uint32_t area_count,
1024
uint32_t stripe_size,
1025
const struct segment_type *segtype,
1026
uint32_t region_size)
1028
struct alloced_area *aa;
1030
dm_list_iterate_items(aa, &alloced_areas[0]) {
1031
if (!_setup_alloced_segment(lv, status, area_count,
1032
stripe_size, segtype, aa,
1041
* This function takes a list of pv_areas and adds them to allocated_areas.
1042
* If the complete area is not needed then it gets split.
1043
* The part used is removed from the pv_map so it can't be allocated twice.
1045
static int _alloc_parallel_area(struct alloc_handle *ah, uint32_t max_to_allocate,
1046
struct alloc_state *alloc_state, uint32_t ix_log_offset)
1048
uint32_t area_len, len;
1050
uint32_t ix_log_skip = 0; /* How many areas to skip in middle of array to reach log areas */
1051
uint32_t total_area_count;
1052
struct alloced_area *aa;
1053
struct pv_area *pva;
1055
total_area_count = ah->area_count + alloc_state->log_area_count_still_needed;
1056
total_area_count += ah->parity_count;
1057
if (!total_area_count) {
1058
log_error(INTERNAL_ERROR "_alloc_parallel_area called without any allocation to do.");
1062
area_len = max_to_allocate / ah->area_multiple;
1064
/* Reduce area_len to the smallest of the areas */
1065
for (s = 0; s < ah->area_count + ah->parity_count; s++)
1066
if (area_len > alloc_state->areas[s].used)
1067
area_len = alloc_state->areas[s].used;
1069
len = (ah->alloc_and_split_meta) ? total_area_count * 2 : total_area_count;
1071
if (!(aa = dm_pool_alloc(ah->mem, len))) {
1072
log_error("alloced_area allocation failed");
1077
* Areas consists of area_count areas for data stripes, then
1078
* ix_log_skip areas to skip, then log_area_count areas to use for the
1079
* log, then some areas too small for the log.
1082
for (s = 0; s < total_area_count; s++) {
1083
if (s == (ah->area_count + ah->parity_count)) {
1084
ix_log_skip = ix_log_offset - ah->area_count;
1088
pva = alloc_state->areas[s + ix_log_skip].pva;
1089
if (ah->alloc_and_split_meta) {
1091
* The metadata area goes at the front of the allocated
1092
* space for now, but could easily go at the end (or
1095
* Even though we split these two from the same
1096
* allocation, we store the images at the beginning
1097
* of the areas array and the metadata at the end.
1099
s += ah->area_count + ah->parity_count;
1100
aa[s].pv = pva->map->pv;
1101
aa[s].pe = pva->start;
1102
aa[s].len = ah->log_len;
1104
log_debug("Allocating parallel metadata area %" PRIu32
1105
" on %s start PE %" PRIu32
1106
" length %" PRIu32 ".",
1107
(s - (ah->area_count + ah->parity_count)),
1108
pv_dev_name(aa[s].pv), aa[s].pe,
1111
consume_pv_area(pva, ah->log_len);
1112
dm_list_add(&ah->alloced_areas[s], &aa[s].list);
1113
s -= ah->area_count + ah->parity_count;
1115
aa[s].pv = pva->map->pv;
1116
aa[s].pe = pva->start;
1117
aa[s].len = (ah->alloc_and_split_meta) ? len - ah->log_len : len;
1119
log_debug("Allocating parallel area %" PRIu32
1120
" on %s start PE %" PRIu32 " length %" PRIu32 ".",
1121
s, pv_dev_name(aa[s].pv), aa[s].pe, aa[s].len);
1123
consume_pv_area(pva, aa[s].len);
1125
dm_list_add(&ah->alloced_areas[s], &aa[s].list);
1128
/* Only need to alloc metadata from the first batch */
1129
ah->alloc_and_split_meta = 0;
1131
ah->total_area_len += area_len;
1133
alloc_state->allocated += area_len * ah->area_multiple;
1139
* Call fn for each AREA_PV used by the LV segment at lv:le of length *max_seg_len.
1140
* If any constituent area contains more than one segment, max_seg_len is
1141
* reduced to cover only the first.
1142
* fn should return 0 on error, 1 to continue scanning or >1 to terminate without error.
1143
* In the last case, this function passes on the return code.
1145
static int _for_each_pv(struct cmd_context *cmd, struct logical_volume *lv,
1146
uint32_t le, uint32_t len, struct lv_segment *seg,
1147
uint32_t *max_seg_len,
1148
uint32_t first_area, uint32_t max_areas,
1149
int top_level_area_index,
1150
int only_single_area_segments,
1151
int (*fn)(struct cmd_context *cmd,
1152
struct pv_segment *peg, uint32_t s,
1157
uint32_t remaining_seg_len, area_len, area_multiple;
1158
uint32_t stripes_per_mimage = 1;
1161
if (!seg && !(seg = find_seg_by_le(lv, le))) {
1162
log_error("Failed to find segment for %s extent %" PRIu32,
1167
/* Remaining logical length of segment */
1168
remaining_seg_len = seg->len - (le - seg->le);
1170
if (remaining_seg_len > len)
1171
remaining_seg_len = len;
1173
if (max_seg_len && *max_seg_len > remaining_seg_len)
1174
*max_seg_len = remaining_seg_len;
1176
area_multiple = _calc_area_multiple(seg->segtype, seg->area_count, 0);
1177
area_len = remaining_seg_len / area_multiple ? : 1;
1179
/* For striped mirrors, all the areas are counted, through the mirror layer */
1180
if (top_level_area_index == -1)
1181
stripes_per_mimage = _stripes_per_mimage(seg);
1183
for (s = first_area;
1184
s < seg->area_count && (!max_areas || s <= max_areas);
1186
if (seg_type(seg, s) == AREA_LV) {
1187
if (!(r = _for_each_pv(cmd, seg_lv(seg, s),
1189
(le - seg->le) / area_multiple,
1190
area_len, NULL, max_seg_len, 0,
1191
(stripes_per_mimage == 1) && only_single_area_segments ? 1U : 0U,
1192
(top_level_area_index != -1) ? top_level_area_index : (int) (s * stripes_per_mimage),
1193
only_single_area_segments, fn,
1196
} else if (seg_type(seg, s) == AREA_PV)
1197
if (!(r = fn(cmd, seg_pvseg(seg, s), top_level_area_index != -1 ? (uint32_t) top_level_area_index + s : s, data)))
1203
/* FIXME only_single_area_segments used as workaround to skip log LV - needs new param? */
1204
if (!only_single_area_segments && seg_is_mirrored(seg) && seg->log_lv) {
1205
if (!(r = _for_each_pv(cmd, seg->log_lv, 0, seg->log_lv->le_count, NULL,
1206
NULL, 0, 0, 0, only_single_area_segments,
1213
/* FIXME Add snapshot cow LVs etc. */
1218
static int _comp_area(const void *l, const void *r)
1220
const struct pv_area_used *lhs = (const struct pv_area_used *) l;
1221
const struct pv_area_used *rhs = (const struct pv_area_used *) r;
1223
if (lhs->used < rhs->used)
1226
else if (lhs->used > rhs->used)
1233
* Search for pvseg that matches condition
1236
int (*condition)(struct pv_match *pvmatch, struct pv_segment *pvseg, struct pv_area *pva);
1238
struct pv_area_used *areas;
1239
struct pv_area *pva;
1240
uint32_t areas_size;
1241
const struct dm_config_node *cling_tag_list_cn;
1242
int s; /* Area index of match */
1246
* Is PV area on the same PV?
1248
static int _is_same_pv(struct pv_match *pvmatch __attribute((unused)), struct pv_segment *pvseg, struct pv_area *pva)
1250
if (pvseg->pv != pva->map->pv)
1257
* Does PV area have a tag listed in allocation/cling_tag_list that
1258
* matches a tag of the PV of the existing segment?
1260
static int _has_matching_pv_tag(struct pv_match *pvmatch, struct pv_segment *pvseg, struct pv_area *pva)
1262
const struct dm_config_value *cv;
1264
const char *tag_matched;
1266
for (cv = pvmatch->cling_tag_list_cn->v; cv; cv = cv->next) {
1267
if (cv->type != DM_CFG_STRING) {
1268
log_error("Ignoring invalid string in config file entry "
1269
"allocation/cling_tag_list");
1274
log_error("Ignoring empty string in config file entry "
1275
"allocation/cling_tag_list");
1280
log_error("Ignoring string not starting with @ in config file entry "
1281
"allocation/cling_tag_list: %s", str);
1288
log_error("Ignoring empty tag in config file entry "
1289
"allocation/cling_tag_list");
1293
/* Wildcard matches any tag against any tag. */
1294
if (!strcmp(str, "*")) {
1295
if (!str_list_match_list(&pvseg->pv->tags, &pva->map->pv->tags, &tag_matched))
1298
log_debug("Matched allocation PV tag %s on existing %s with free space on %s.",
1299
tag_matched, pv_dev_name(pvseg->pv), pv_dev_name(pva->map->pv));
1304
if (!str_list_match_item(&pvseg->pv->tags, str) ||
1305
!str_list_match_item(&pva->map->pv->tags, str))
1308
log_debug("Matched allocation PV tag %s on existing %s with free space on %s.",
1309
str, pv_dev_name(pvseg->pv), pv_dev_name(pva->map->pv));
1318
* Is PV area contiguous to PV segment?
1320
static int _is_contiguous(struct pv_match *pvmatch __attribute((unused)), struct pv_segment *pvseg, struct pv_area *pva)
1322
if (pvseg->pv != pva->map->pv)
1325
if (pvseg->pe + pvseg->len != pva->start)
1331
static void _reserve_area(struct pv_area_used *area_used, struct pv_area *pva, uint32_t required,
1332
uint32_t ix_pva, uint32_t unreserved)
1334
log_debug("%s allocation area %" PRIu32 " %s %s start PE %" PRIu32
1335
" length %" PRIu32 " leaving %" PRIu32 ".",
1336
area_used->pva ? "Changing " : "Considering",
1337
ix_pva - 1, area_used->pva ? "to" : "as",
1338
dev_name(pva->map->pv->dev), pva->start, required, unreserved);
1340
area_used->pva = pva;
1341
area_used->used = required;
1344
static int _is_condition(struct cmd_context *cmd __attribute__((unused)),
1345
struct pv_segment *pvseg, uint32_t s,
1348
struct pv_match *pvmatch = data;
1350
if (pvmatch->areas[s].pva)
1351
return 1; /* Area already assigned */
1353
if (!pvmatch->condition(pvmatch, pvseg, pvmatch->pva))
1354
return 1; /* Continue */
1356
if (s >= pvmatch->areas_size)
1360
* Only used for cling and contiguous policies (which only make one allocation per PV)
1361
* so it's safe to say all the available space is used.
1363
_reserve_area(&pvmatch->areas[s], pvmatch->pva, pvmatch->pva->count, s + 1, 0);
1365
return 2; /* Finished */
1369
* Is pva on same PV as any existing areas?
1371
static int _check_cling(struct alloc_handle *ah,
1372
const struct dm_config_node *cling_tag_list_cn,
1373
struct lv_segment *prev_lvseg, struct pv_area *pva,
1374
struct alloc_state *alloc_state)
1376
struct pv_match pvmatch;
1380
pvmatch.condition = cling_tag_list_cn ? _has_matching_pv_tag : _is_same_pv;
1381
pvmatch.areas = alloc_state->areas;
1382
pvmatch.areas_size = alloc_state->areas_size;
1384
pvmatch.cling_tag_list_cn = cling_tag_list_cn;
1386
if (ah->maximise_cling) {
1387
/* Check entire LV */
1389
len = prev_lvseg->le + prev_lvseg->len;
1391
/* Only check 1 LE at end of previous LV segment */
1392
le = prev_lvseg->le + prev_lvseg->len - 1;
1396
/* FIXME Cope with stacks by flattening */
1397
if (!(r = _for_each_pv(ah->cmd, prev_lvseg->lv, le, len, NULL, NULL,
1399
_is_condition, &pvmatch)))
1409
* Is pva contiguous to any existing areas or on the same PV?
1411
static int _check_contiguous(struct cmd_context *cmd,
1412
struct lv_segment *prev_lvseg, struct pv_area *pva,
1413
struct alloc_state *alloc_state)
1415
struct pv_match pvmatch;
1418
pvmatch.condition = _is_contiguous;
1419
pvmatch.areas = alloc_state->areas;
1420
pvmatch.areas_size = alloc_state->areas_size;
1422
pvmatch.cling_tag_list_cn = NULL;
1424
/* FIXME Cope with stacks by flattening */
1425
if (!(r = _for_each_pv(cmd, prev_lvseg->lv,
1426
prev_lvseg->le + prev_lvseg->len - 1, 1, NULL, NULL,
1428
_is_condition, &pvmatch)))
1438
* Is pva on same PV as any areas already used in this allocation attempt?
1440
static int _check_cling_to_alloced(struct alloc_handle *ah, struct pv_area *pva, struct alloc_state *alloc_state)
1443
struct alloced_area *aa;
1446
* Ignore log areas. They are always allocated whole as part of the
1447
* first allocation. If they aren't yet set, we know we've nothing to do.
1449
if (alloc_state->log_area_count_still_needed)
1452
for (s = 0; s < ah->area_count; s++) {
1453
if (alloc_state->areas[s].pva)
1454
continue; /* Area already assigned */
1455
dm_list_iterate_items(aa, &ah->alloced_areas[s]) {
1456
if (pva->map->pv == aa[0].pv) {
1457
_reserve_area(&alloc_state->areas[s], pva, pva->count, s + 1, 0);
1466
static int _pv_is_parallel(struct physical_volume *pv, struct dm_list *parallel_pvs)
1468
struct pv_list *pvl;
1470
dm_list_iterate_items(pvl, parallel_pvs)
1478
* Decide whether or not to try allocation from supplied area pva.
1479
* alloc_state->areas may get modified.
1481
static area_use_t _check_pva(struct alloc_handle *ah, struct pv_area *pva, uint32_t still_needed,
1482
const struct alloc_parms *alloc_parms, struct alloc_state *alloc_state,
1483
unsigned already_found_one, unsigned iteration_count, unsigned log_iteration_count)
1487
/* Skip fully-reserved areas (which are not currently removed from the list). */
1488
if (!pva->unreserved)
1491
/* FIXME Should this test be removed? */
1492
if (iteration_count)
1494
* Don't use an area twice.
1496
for (s = 0; s < alloc_state->areas_size; s++)
1497
if (alloc_state->areas[s].pva == pva)
1500
/* If maximise_cling is set, perform several checks, otherwise perform exactly one. */
1501
if (!iteration_count && !log_iteration_count && alloc_parms->flags & (A_CONTIGUOUS | A_CLING | A_CLING_TO_ALLOCED)) {
1503
if (((alloc_parms->flags & A_CONTIGUOUS) || ah->maximise_cling) &&
1504
alloc_parms->prev_lvseg && _check_contiguous(ah->cmd, alloc_parms->prev_lvseg, pva, alloc_state))
1507
/* Try next area on same PV if looking for contiguous space */
1508
if (alloc_parms->flags & A_CONTIGUOUS)
1511
/* Cling_to_alloced? */
1512
if ((alloc_parms->flags & A_CLING_TO_ALLOCED) &&
1513
_check_cling_to_alloced(ah, pva, alloc_state))
1517
if (!(alloc_parms->flags & A_CLING_BY_TAGS) &&
1518
alloc_parms->prev_lvseg && _check_cling(ah, NULL, alloc_parms->prev_lvseg, pva, alloc_state))
1519
/* If this PV is suitable, use this first area */
1522
if (!ah->maximise_cling && !(alloc_parms->flags & A_CLING_BY_TAGS))
1525
/* Cling_by_tags? */
1526
if ((alloc_parms->flags & (A_CLING_BY_TAGS | A_CLING_TO_ALLOCED)) && ah->cling_tag_list_cn &&
1527
alloc_parms->prev_lvseg && _check_cling(ah, ah->cling_tag_list_cn, alloc_parms->prev_lvseg, pva, alloc_state))
1530
if (alloc_parms->flags & A_CLING_BY_TAGS)
1533
/* All areas on this PV give same result so pointless checking more */
1537
/* Normal/Anywhere */
1539
/* Is it big enough on its own? */
1540
if (pva->unreserved * ah->area_multiple < still_needed &&
1541
((!(alloc_parms->flags & A_CAN_SPLIT) && !ah->log_area_count) ||
1542
(already_found_one && alloc_parms->alloc != ALLOC_ANYWHERE)))
1549
* Decide how many extents we're trying to obtain from a given area.
1550
* Removes the extents from further consideration.
1552
static uint32_t _calc_required_extents(struct alloc_handle *ah, struct pv_area *pva, unsigned ix_pva, uint32_t max_to_allocate, alloc_policy_t alloc)
1554
uint32_t required = max_to_allocate / ah->area_multiple;
1557
* Update amount unreserved - effectively splitting an area
1558
* into two or more parts. If the whole stripe doesn't fit,
1559
* reduce amount we're looking for.
1561
if (alloc == ALLOC_ANYWHERE) {
1562
if (ix_pva - 1 >= ah->area_count)
1563
required = ah->log_len;
1564
} else if (required < ah->log_len)
1565
required = ah->log_len;
1567
if (required >= pva->unreserved) {
1568
required = pva->unreserved;
1569
pva->unreserved = 0;
1571
pva->unreserved -= required;
1572
reinsert_changed_pv_area(pva);
1578
static int _reserve_required_area(struct alloc_handle *ah, uint32_t max_to_allocate,
1579
unsigned ix_pva, struct pv_area *pva,
1580
struct alloc_state *alloc_state, alloc_policy_t alloc)
1582
uint32_t required = _calc_required_extents(ah, pva, ix_pva, max_to_allocate, alloc);
1585
/* Expand areas array if needed after an area was split. */
1586
if (ix_pva > alloc_state->areas_size) {
1587
alloc_state->areas_size *= 2;
1588
if (!(alloc_state->areas = dm_realloc(alloc_state->areas, sizeof(*alloc_state->areas) * (alloc_state->areas_size)))) {
1589
log_error("Memory reallocation for parallel areas failed.");
1592
for (s = alloc_state->areas_size / 2; s < alloc_state->areas_size; s++)
1593
alloc_state->areas[s].pva = NULL;
1596
_reserve_area(&alloc_state->areas[ix_pva - 1], pva, required, ix_pva, pva->unreserved);
1601
static void _clear_areas(struct alloc_state *alloc_state)
1605
for (s = 0; s < alloc_state->areas_size; s++)
1606
alloc_state->areas[s].pva = NULL;
1609
static void _reset_unreserved(struct dm_list *pvms)
1612
struct pv_area *pva;
1614
dm_list_iterate_items(pvm, pvms)
1615
dm_list_iterate_items(pva, &pvm->areas)
1616
if (pva->unreserved != pva->count) {
1617
pva->unreserved = pva->count;
1618
reinsert_changed_pv_area(pva);
1622
static void _report_needed_allocation_space(struct alloc_handle *ah,
1623
struct alloc_state *alloc_state)
1625
const char *metadata_type;
1626
uint32_t parallel_areas_count, parallel_area_size;
1627
uint32_t metadata_count, metadata_size;
1629
parallel_area_size = (ah->new_extents - alloc_state->allocated) / ah->area_multiple -
1630
((ah->alloc_and_split_meta) ? ah->log_len : 0);
1632
parallel_areas_count = ah->area_count + ah->parity_count;
1634
metadata_size = ah->log_len;
1635
if (ah->alloc_and_split_meta) {
1636
metadata_type = "RAID metadata area";
1637
metadata_count = parallel_areas_count;
1639
metadata_type = "mirror log";
1640
metadata_count = alloc_state->log_area_count_still_needed;
1643
log_debug("Still need %" PRIu32 " total extents:",
1644
parallel_area_size * parallel_areas_count + metadata_size * metadata_count);
1645
log_debug(" %" PRIu32 " (%" PRIu32 " data/%" PRIu32
1646
" parity) parallel areas of %" PRIu32 " extents each",
1647
parallel_areas_count, ah->area_count, ah->parity_count, parallel_area_size);
1648
log_debug(" %" PRIu32 " %ss of %" PRIu32 " extents each",
1649
metadata_count, metadata_type, metadata_size);
1652
* Returns 1 regardless of whether any space was found, except on error.
1654
static int _find_some_parallel_space(struct alloc_handle *ah, const struct alloc_parms *alloc_parms,
1655
struct dm_list *pvms, struct alloc_state *alloc_state,
1656
struct dm_list *parallel_pvs, uint32_t max_to_allocate)
1661
struct pv_area *pva;
1662
unsigned preferred_count = 0;
1663
unsigned already_found_one;
1664
unsigned ix_offset = 0; /* Offset for non-preferred allocations */
1665
unsigned ix_log_offset; /* Offset to start of areas to use for log */
1666
unsigned too_small_for_log_count; /* How many too small for log? */
1667
unsigned iteration_count = 0; /* cling_to_alloced may need 2 iterations */
1668
unsigned log_iteration_count = 0; /* extra iteration for logs on data devices */
1669
struct alloced_area *aa;
1671
uint32_t devices_needed = ah->area_count + ah->parity_count;
1673
/* ix_offset holds the number of parallel allocations that must be contiguous/cling */
1674
if (alloc_parms->flags & (A_CONTIGUOUS | A_CLING) && alloc_parms->prev_lvseg)
1675
ix_offset = _stripes_per_mimage(alloc_parms->prev_lvseg) * alloc_parms->prev_lvseg->area_count;
1677
if (alloc_parms->flags & A_CLING_TO_ALLOCED)
1678
ix_offset = ah->area_count;
1680
if (alloc_parms->alloc == ALLOC_NORMAL)
1681
log_debug("Cling_to_allocated is %sset",
1682
alloc_parms->flags & A_CLING_TO_ALLOCED ? "" : "not ");
1684
_clear_areas(alloc_state);
1685
_reset_unreserved(pvms);
1687
_report_needed_allocation_space(ah, alloc_state);
1689
/* ix holds the number of areas found on other PVs */
1691
if (log_iteration_count) {
1692
log_debug("Found %u areas for %" PRIu32 " parallel areas and %" PRIu32 " log areas so far.", ix, devices_needed, alloc_state->log_area_count_still_needed);
1693
} else if (iteration_count)
1694
log_debug("Filled %u out of %u preferred areas so far.", preferred_count, ix_offset);
1697
* Provide for escape from the loop if no progress is made.
1698
* This should not happen: ALLOC_ANYWHERE should be able to use
1699
* all available space. (If there aren't enough extents, the code
1700
* should not reach this point.)
1705
* Put the smallest area of each PV that is at least the
1706
* size we need into areas array. If there isn't one
1707
* that fits completely and we're allowed more than one
1708
* LV segment, then take the largest remaining instead.
1710
dm_list_iterate_items(pvm, pvms) {
1711
/* PV-level checks */
1712
if (dm_list_empty(&pvm->areas))
1713
continue; /* Next PV */
1715
if (alloc_parms->alloc != ALLOC_ANYWHERE) {
1716
/* Don't allocate onto the log PVs */
1717
if (ah->log_area_count)
1718
dm_list_iterate_items(aa, &ah->alloced_areas[ah->area_count])
1719
for (s = 0; s < ah->log_area_count; s++)
1723
/* FIXME Split into log and non-log parallel_pvs and only check the log ones if log_iteration? */
1724
/* (I've temporatily disabled the check.) */
1725
/* Avoid PVs used by existing parallel areas */
1726
if (!log_iteration_count && parallel_pvs && _pv_is_parallel(pvm->pv, parallel_pvs))
1730
* Avoid PVs already set aside for log.
1731
* We only reach here if there were enough PVs for the main areas but
1732
* not enough for the logs.
1734
if (log_iteration_count) {
1735
for (s = devices_needed; s < ix + ix_offset; s++)
1736
if (alloc_state->areas[s].pva && alloc_state->areas[s].pva->map->pv == pvm->pv)
1738
/* On a second pass, avoid PVs already used in an uncommitted area */
1739
} else if (iteration_count)
1740
for (s = 0; s < devices_needed; s++)
1741
if (alloc_state->areas[s].pva && alloc_state->areas[s].pva->map->pv == pvm->pv)
1745
already_found_one = 0;
1746
/* First area in each list is the largest */
1747
dm_list_iterate_items(pva, &pvm->areas) {
1749
* There are two types of allocations, which can't be mixed at present.
1750
* PREFERRED are stored immediately in a specific parallel slot.
1751
* USE_AREA are stored for later, then sorted and chosen from.
1753
switch(_check_pva(ah, pva, max_to_allocate, alloc_parms,
1754
alloc_state, already_found_one, iteration_count, log_iteration_count)) {
1768
* Except with ALLOC_ANYWHERE, replace first area with this
1769
* one which is smaller but still big enough.
1771
if (!already_found_one ||
1772
alloc_parms->alloc == ALLOC_ANYWHERE) {
1774
already_found_one = 1;
1777
/* Reserve required amount of pva */
1778
if (!_reserve_required_area(ah, max_to_allocate, ix + ix_offset,
1779
pva, alloc_state, alloc_parms->alloc))
1786
/* With ALLOC_ANYWHERE we ignore further PVs once we have at least enough areas */
1787
/* With cling and contiguous we stop if we found a match for *all* the areas */
1788
/* FIXME Rename these variables! */
1789
if ((alloc_parms->alloc == ALLOC_ANYWHERE &&
1790
ix + ix_offset >= devices_needed + alloc_state->log_area_count_still_needed) ||
1791
(preferred_count == ix_offset &&
1792
(ix_offset == devices_needed + alloc_state->log_area_count_still_needed)))
1795
} while ((alloc_parms->alloc == ALLOC_ANYWHERE && last_ix != ix && ix < devices_needed + alloc_state->log_area_count_still_needed) ||
1796
/* With cling_to_alloced, if there were gaps in the preferred areas, have a second iteration */
1797
(alloc_parms->alloc == ALLOC_NORMAL && preferred_count &&
1798
(preferred_count < ix_offset || alloc_state->log_area_count_still_needed) &&
1799
(alloc_parms->flags & A_CLING_TO_ALLOCED) && !iteration_count++) ||
1800
/* Extra iteration needed to fill log areas on PVs already used? */
1801
(alloc_parms->alloc == ALLOC_NORMAL && preferred_count == ix_offset && !ah->mirror_logs_separate &&
1802
(ix + preferred_count >= devices_needed) &&
1803
(ix + preferred_count < devices_needed + alloc_state->log_area_count_still_needed) && !log_iteration_count++));
1805
if (preferred_count < ix_offset && !(alloc_parms->flags & A_CLING_TO_ALLOCED))
1808
if (ix + preferred_count < devices_needed + alloc_state->log_area_count_still_needed)
1811
/* Sort the areas so we allocate from the biggest */
1812
if (log_iteration_count) {
1813
if (ix > devices_needed + 1) {
1814
log_debug("Sorting %u log areas", ix - devices_needed);
1815
qsort(alloc_state->areas + devices_needed, ix - devices_needed, sizeof(*alloc_state->areas),
1818
} else if (ix > 1) {
1819
log_debug("Sorting %u areas", ix);
1820
qsort(alloc_state->areas + ix_offset, ix, sizeof(*alloc_state->areas),
1824
/* If there are gaps in our preferred areas, fill then from the sorted part of the array */
1825
if (preferred_count && preferred_count != ix_offset) {
1826
for (s = 0; s < devices_needed; s++)
1827
if (!alloc_state->areas[s].pva) {
1828
alloc_state->areas[s].pva = alloc_state->areas[ix_offset].pva;
1829
alloc_state->areas[s].used = alloc_state->areas[ix_offset].used;
1830
alloc_state->areas[ix_offset++].pva = NULL;
1835
* First time around, if there's a log, allocate it on the
1836
* smallest device that has space for it.
1838
too_small_for_log_count = 0;
1841
/* FIXME This logic is due to its heritage and can be simplified! */
1842
if (alloc_state->log_area_count_still_needed) {
1843
/* How many areas are too small for the log? */
1844
while (too_small_for_log_count < ix_offset + ix &&
1845
(*(alloc_state->areas + ix_offset + ix - 1 -
1846
too_small_for_log_count)).used < ah->log_len)
1847
too_small_for_log_count++;
1848
ix_log_offset = ix_offset + ix - too_small_for_log_count - ah->log_area_count;
1851
if (ix + ix_offset < devices_needed +
1852
(alloc_state->log_area_count_still_needed ? alloc_state->log_area_count_still_needed +
1853
too_small_for_log_count : 0))
1857
* Finally add the space identified to the list of areas to be used.
1859
if (!_alloc_parallel_area(ah, max_to_allocate, alloc_state, ix_log_offset))
1863
* Log is always allocated first time.
1865
alloc_state->log_area_count_still_needed = 0;
1871
* Choose sets of parallel areas to use, respecting any constraints
1872
* supplied in alloc_parms.
1874
static int _find_max_parallel_space_for_one_policy(struct alloc_handle *ah, struct alloc_parms *alloc_parms,
1875
struct dm_list *pvms, struct alloc_state *alloc_state)
1878
uint32_t max_to_allocate; /* Maximum extents to allocate this time */
1879
uint32_t old_allocated;
1881
struct seg_pvs *spvs;
1882
struct dm_list *parallel_pvs;
1884
/* FIXME This algorithm needs a lot of cleaning up! */
1885
/* FIXME anywhere doesn't find all space yet */
1887
parallel_pvs = NULL;
1888
max_to_allocate = alloc_parms->extents_still_needed - alloc_state->allocated;
1891
* If there are existing parallel PVs, avoid them and reduce
1892
* the maximum we can allocate in one go accordingly.
1894
if (ah->parallel_areas) {
1895
next_le = (alloc_parms->prev_lvseg ? alloc_parms->prev_lvseg->le + alloc_parms->prev_lvseg->len : 0) + alloc_state->allocated / ah->area_multiple;
1896
dm_list_iterate_items(spvs, ah->parallel_areas) {
1897
if (next_le >= spvs->le + spvs->len)
1900
max_tmp = max_to_allocate +
1901
alloc_state->allocated;
1904
* Because a request that groups metadata and
1905
* data together will be split, we must adjust
1906
* the comparison accordingly.
1908
if (ah->alloc_and_split_meta)
1909
max_tmp -= ah->log_len;
1910
if (max_tmp > (spvs->le + spvs->len) * ah->area_multiple) {
1911
max_to_allocate = (spvs->le + spvs->len) * ah->area_multiple - alloc_state->allocated;
1912
max_to_allocate += ah->alloc_and_split_meta ? ah->log_len : 0;
1914
parallel_pvs = &spvs->pvs;
1919
old_allocated = alloc_state->allocated;
1921
if (!_find_some_parallel_space(ah, alloc_parms, pvms, alloc_state, parallel_pvs, max_to_allocate))
1925
* If we didn't allocate anything this time and had
1926
* A_CLING_TO_ALLOCED set, try again without it.
1928
* For ALLOC_NORMAL, if we did allocate something without the
1929
* flag set, set it and continue so that further allocations
1930
* remain on the same disks where possible.
1932
if (old_allocated == alloc_state->allocated) {
1933
if (alloc_parms->flags & A_CLING_TO_ALLOCED)
1934
alloc_parms->flags &= ~A_CLING_TO_ALLOCED;
1936
break; /* Give up */
1937
} else if (ah->maximise_cling && alloc_parms->alloc == ALLOC_NORMAL &&
1938
!(alloc_parms->flags & A_CLING_TO_ALLOCED))
1939
alloc_parms->flags |= A_CLING_TO_ALLOCED;
1940
} while ((alloc_parms->alloc != ALLOC_CONTIGUOUS) && alloc_state->allocated != alloc_parms->extents_still_needed && (alloc_parms->flags & A_CAN_SPLIT));
1946
* Allocate several segments, each the same size, in parallel.
1947
* If mirrored_pv and mirrored_pe are supplied, it is used as
1948
* the first area, and additional areas are allocated parallel to it.
1950
static int _allocate(struct alloc_handle *ah,
1951
struct volume_group *vg,
1952
struct logical_volume *lv,
1954
struct dm_list *allocatable_pvs)
1956
uint32_t old_allocated;
1957
struct lv_segment *prev_lvseg = NULL;
1959
struct dm_list *pvms;
1960
alloc_policy_t alloc;
1961
struct alloc_parms alloc_parms;
1962
struct alloc_state alloc_state;
1964
alloc_state.allocated = lv ? lv->le_count : 0;
1966
if (alloc_state.allocated >= ah->new_extents && !ah->log_area_count) {
1967
log_error("_allocate called with no work to do!");
1971
if (ah->area_multiple > 1 &&
1972
(ah->new_extents - alloc_state.allocated) % ah->area_multiple) {
1973
log_error("Number of extents requested (%d) needs to be divisible by %d.",
1974
ah->new_extents - alloc_state.allocated,
1979
alloc_state.log_area_count_still_needed = ah->log_area_count;
1981
if (ah->alloc == ALLOC_CONTIGUOUS)
1984
if (lv && !dm_list_empty(&lv->segments))
1985
prev_lvseg = dm_list_item(dm_list_last(&lv->segments),
1988
* Build the sets of available areas on the pv's.
1990
if (!(pvms = create_pv_maps(ah->mem, vg, allocatable_pvs)))
1993
if (!_log_parallel_areas(ah->mem, ah->parallel_areas))
1996
alloc_state.areas_size = dm_list_size(pvms);
1997
if (alloc_state.areas_size &&
1998
alloc_state.areas_size < (ah->area_count + ah->parity_count + ah->log_area_count)) {
1999
if (ah->alloc != ALLOC_ANYWHERE && ah->mirror_logs_separate) {
2000
log_error("Not enough PVs with free space available "
2001
"for parallel allocation.");
2002
log_error("Consider --alloc anywhere if desperate.");
2005
alloc_state.areas_size = ah->area_count + ah->parity_count + ah->log_area_count;
2008
/* Upper bound if none of the PVs in prev_lvseg is in pvms */
2009
/* FIXME Work size out properly */
2011
alloc_state.areas_size += _stripes_per_mimage(prev_lvseg) * prev_lvseg->area_count;
2013
/* Allocate an array of pv_areas to hold the largest space on each PV */
2014
if (!(alloc_state.areas = dm_malloc(sizeof(*alloc_state.areas) * alloc_state.areas_size))) {
2015
log_error("Couldn't allocate areas array.");
2020
* cling includes implicit cling_by_tags
2021
* but it does nothing unless the lvm.conf setting is present.
2023
if (ah->alloc == ALLOC_CLING)
2024
ah->alloc = ALLOC_CLING_BY_TAGS;
2026
/* Attempt each defined allocation policy in turn */
2027
for (alloc = ALLOC_CONTIGUOUS; alloc < ALLOC_INHERIT; alloc++) {
2028
/* Skip cling_by_tags if no list defined */
2029
if (alloc == ALLOC_CLING_BY_TAGS && !ah->cling_tag_list_cn)
2031
old_allocated = alloc_state.allocated;
2032
log_debug("Trying allocation using %s policy.", get_alloc_string(alloc));
2034
if (!_sufficient_pes_free(ah, pvms, alloc_state.allocated, ah->new_extents))
2037
_init_alloc_parms(ah, &alloc_parms, alloc, prev_lvseg,
2038
can_split, alloc_state.allocated,
2041
if (!_find_max_parallel_space_for_one_policy(ah, &alloc_parms, pvms, &alloc_state))
2044
if ((alloc_state.allocated == ah->new_extents && !alloc_state.log_area_count_still_needed) || (ah->alloc == alloc) ||
2045
(!can_split && (alloc_state.allocated != old_allocated)))
2049
if (alloc_state.allocated != ah->new_extents) {
2050
log_error("Insufficient suitable %sallocatable extents "
2051
"for logical volume %s: %u more required",
2052
can_split ? "" : "contiguous ",
2054
(ah->new_extents - alloc_state.allocated) * ah->area_count
2055
/ ah->area_multiple);
2059
if (alloc_state.log_area_count_still_needed) {
2060
log_error("Insufficient free space for log allocation "
2061
"for logical volume %s.",
2062
lv ? lv->name : "");
2069
dm_free(alloc_state.areas);
2073
int lv_add_virtual_segment(struct logical_volume *lv, uint64_t status,
2074
uint32_t extents, const struct segment_type *segtype,
2075
const char *thin_pool_name)
2077
struct lv_segment *seg;
2078
struct logical_volume *thin_pool_lv = NULL;
2079
struct lv_list *lvl;
2082
if (thin_pool_name) {
2083
if (!(lvl = find_lv_in_vg(lv->vg, thin_pool_name))) {
2084
log_error("Unable to find existing pool LV %s in VG %s.",
2085
thin_pool_name, lv->vg->name);
2088
thin_pool_lv = lvl->lv;
2089
size = first_seg(thin_pool_lv)->chunk_size;
2090
if (lv->vg->extent_size < size) {
2091
/* Align extents on chunk boundary size */
2092
size = ((uint64_t)lv->vg->extent_size * extents + size - 1) /
2093
size * size / lv->vg->extent_size;
2094
if (size != extents) {
2095
log_print("Rounding size (%d extents) up to chunk boundary "
2096
"size (%d extents).", extents, size);
2102
if (!dm_list_empty(&lv->segments) &&
2103
(seg = last_seg(lv)) && (seg->segtype == segtype)) {
2104
seg->area_len += extents;
2105
seg->len += extents;
2107
if (!(seg = alloc_lv_segment(segtype, lv, lv->le_count, extents,
2108
status, 0, NULL, thin_pool_lv, 0,
2109
extents, 0, 0, 0, NULL))) {
2110
log_error("Couldn't allocate new zero segment.");
2113
lv->status |= VIRTUAL;
2114
dm_list_add(&lv->segments, &seg->list);
2117
lv->le_count += extents;
2118
lv->size += (uint64_t) extents *lv->vg->extent_size;
2124
* Entry point for all extent allocations.
2126
struct alloc_handle *allocate_extents(struct volume_group *vg,
2127
struct logical_volume *lv,
2128
const struct segment_type *segtype,
2130
uint32_t mirrors, uint32_t log_count,
2131
uint32_t region_size, uint32_t extents,
2132
struct dm_list *allocatable_pvs,
2133
alloc_policy_t alloc,
2134
struct dm_list *parallel_areas)
2136
struct alloc_handle *ah;
2137
uint32_t new_extents;
2139
if (segtype_is_virtual(segtype)) {
2140
log_error("allocate_extents does not handle virtual segments");
2144
if (!allocatable_pvs) {
2145
log_error(INTERNAL_ERROR "Missing allocatable pvs.");
2149
if (vg->fid->fmt->ops->segtype_supported &&
2150
!vg->fid->fmt->ops->segtype_supported(vg->fid, segtype)) {
2151
log_error("Metadata format (%s) does not support required "
2152
"LV segment type (%s).", vg->fid->fmt->name,
2154
log_error("Consider changing the metadata format by running "
2159
if (alloc == ALLOC_INHERIT)
2162
new_extents = (lv ? lv->le_count : 0) + extents;
2163
if (!(ah = _alloc_init(vg->cmd, vg->cmd->mem, segtype, alloc,
2164
new_extents, mirrors, stripes, log_count,
2165
vg->extent_size, region_size,
2169
if (!_allocate(ah, vg, lv, 1, allocatable_pvs)) {
2178
* Add new segments to an LV from supplied list of areas.
2180
int lv_add_segment(struct alloc_handle *ah,
2181
uint32_t first_area, uint32_t num_areas,
2182
struct logical_volume *lv,
2183
const struct segment_type *segtype,
2184
uint32_t stripe_size,
2186
uint32_t region_size)
2189
log_error("Missing segtype in lv_add_segment().");
2193
if (segtype_is_virtual(segtype)) {
2194
log_error("lv_add_segment cannot handle virtual segments");
2198
if ((status & MIRROR_LOG) && dm_list_size(&lv->segments)) {
2199
log_error("Log segments can only be added to an empty LV");
2203
if (!_setup_alloced_segments(lv, &ah->alloced_areas[first_area],
2205
stripe_size, segtype,
2209
if ((segtype->flags & SEG_CAN_SPLIT) && !lv_merge_segments(lv)) {
2210
log_error("Couldn't merge segments after extending "
2215
if (lv->vg->fid->fmt->ops->lv_setup &&
2216
!lv->vg->fid->fmt->ops->lv_setup(lv->vg->fid, lv))
2223
* "mirror" segment type doesn't support split.
2224
* So, when adding mirrors to linear LV segment, first split it,
2225
* then convert it to "mirror" and add areas.
2227
static struct lv_segment *_convert_seg_to_mirror(struct lv_segment *seg,
2228
uint32_t region_size,
2229
struct logical_volume *log_lv)
2231
struct lv_segment *newseg;
2234
if (!seg_is_striped(seg)) {
2235
log_error("Can't convert non-striped segment to mirrored.");
2239
if (seg->area_count > 1) {
2240
log_error("Can't convert striped segment with multiple areas "
2245
if (!(newseg = alloc_lv_segment(get_segtype_from_string(seg->lv->vg->cmd, "mirror"),
2246
seg->lv, seg->le, seg->len,
2247
seg->status, seg->stripe_size,
2249
seg->area_count, seg->area_len,
2250
seg->chunk_size, region_size,
2251
seg->extents_copied, NULL))) {
2252
log_error("Couldn't allocate converted LV segment");
2256
for (s = 0; s < seg->area_count; s++)
2257
if (!move_lv_segment_area(newseg, s, seg, s))
2260
seg->pvmove_source_seg = NULL; /* Not maintained after allocation */
2262
dm_list_add(&seg->list, &newseg->list);
2263
dm_list_del(&seg->list);
2269
* Add new areas to mirrored segments
2271
int lv_add_mirror_areas(struct alloc_handle *ah,
2272
struct logical_volume *lv, uint32_t le,
2273
uint32_t region_size)
2275
struct alloced_area *aa;
2276
struct lv_segment *seg;
2277
uint32_t current_le = le;
2278
uint32_t s, old_area_count, new_area_count;
2280
dm_list_iterate_items(aa, &ah->alloced_areas[0]) {
2281
if (!(seg = find_seg_by_le(lv, current_le))) {
2282
log_error("Failed to find segment for %s extent %"
2283
PRIu32, lv->name, current_le);
2287
/* Allocator assures aa[0].len <= seg->area_len */
2288
if (aa[0].len < seg->area_len) {
2289
if (!lv_split_segment(lv, seg->le + aa[0].len)) {
2290
log_error("Failed to split segment at %s "
2291
"extent %" PRIu32, lv->name, le);
2296
if (!seg_is_mirrored(seg) &&
2297
(!(seg = _convert_seg_to_mirror(seg, region_size, NULL))))
2300
old_area_count = seg->area_count;
2301
new_area_count = old_area_count + ah->area_count;
2303
if (!_lv_segment_add_areas(lv, seg, new_area_count))
2306
for (s = 0; s < ah->area_count; s++) {
2307
if (!set_lv_segment_area_pv(seg, s + old_area_count,
2308
aa[s].pv, aa[s].pe))
2312
current_le += seg->area_len;
2315
lv->status |= MIRRORED;
2317
if (lv->vg->fid->fmt->ops->lv_setup &&
2318
!lv->vg->fid->fmt->ops->lv_setup(lv->vg->fid, lv))
2325
* Add mirror image LVs to mirrored segments
2327
int lv_add_mirror_lvs(struct logical_volume *lv,
2328
struct logical_volume **sub_lvs,
2329
uint32_t num_extra_areas,
2330
uint64_t status, uint32_t region_size)
2332
struct lv_segment *seg;
2333
uint32_t old_area_count, new_area_count;
2335
struct segment_type *mirror_segtype;
2337
seg = first_seg(lv);
2339
if (dm_list_size(&lv->segments) != 1 || seg_type(seg, 0) != AREA_LV) {
2340
log_error("Mirror layer must be inserted before adding mirrors");
2344
mirror_segtype = get_segtype_from_string(lv->vg->cmd, "mirror");
2345
if (seg->segtype != mirror_segtype)
2346
if (!(seg = _convert_seg_to_mirror(seg, region_size, NULL)))
2349
if (region_size && region_size != seg->region_size) {
2350
log_error("Conflicting region_size");
2354
old_area_count = seg->area_count;
2355
new_area_count = old_area_count + num_extra_areas;
2357
if (!_lv_segment_add_areas(lv, seg, new_area_count)) {
2358
log_error("Failed to allocate widened LV segment for %s.",
2363
for (m = 0; m < old_area_count; m++)
2364
seg_lv(seg, m)->status |= status;
2366
for (m = old_area_count; m < new_area_count; m++) {
2367
if (!set_lv_segment_area_lv(seg, m, sub_lvs[m - old_area_count],
2370
lv_set_hidden(sub_lvs[m - old_area_count]);
2373
lv->status |= MIRRORED;
2379
* Turn an empty LV into a mirror log.
2381
* FIXME: Mirrored logs are built inefficiently.
2382
* A mirrored log currently uses the same layout that a mirror
2383
* LV uses. The mirror layer sits on top of AREA_LVs which form the
2384
* legs, rather on AREA_PVs. This is done to allow re-use of the
2385
* various mirror functions to also handle the mirrored LV that makes
2388
* If we used AREA_PVs under the mirror layer of a log, we could
2389
* assemble it all at once by calling 'lv_add_segment' with the
2390
* appropriate segtype (mirror/stripe), like this:
2391
* lv_add_segment(ah, ah->area_count, ah->log_area_count,
2392
* log_lv, segtype, 0, MIRROR_LOG, 0);
2394
* For now, we use the same mechanism to build a mirrored log as we
2395
* do for building a mirrored LV: 1) create initial LV, 2) add a
2396
* mirror layer, and 3) add the remaining copy LVs
2398
int lv_add_log_segment(struct alloc_handle *ah, uint32_t first_area,
2399
struct logical_volume *log_lv, uint64_t status)
2402
return lv_add_segment(ah, ah->area_count + first_area, 1, log_lv,
2403
get_segtype_from_string(log_lv->vg->cmd,
2408
static int _lv_insert_empty_sublvs(struct logical_volume *lv,
2409
const struct segment_type *segtype,
2410
uint32_t stripe_size, uint32_t region_size,
2413
struct logical_volume *sub_lv;
2415
uint64_t sub_lv_status = 0;
2416
const char *layer_name;
2417
size_t len = strlen(lv->name) + 32;
2419
struct lv_segment *mapseg;
2421
if (lv->le_count || !dm_list_empty(&lv->segments)) {
2422
log_error(INTERNAL_ERROR
2423
"Non-empty LV passed to _lv_insert_empty_sublv");
2427
if (segtype_is_raid(segtype)) {
2429
sub_lv_status = RAID_IMAGE;
2430
layer_name = "rimage";
2431
} else if (segtype_is_mirrored(segtype)) {
2432
lv->status |= MIRRORED;
2433
sub_lv_status = MIRROR_IMAGE;
2434
layer_name = "mimage";
2439
* First, create our top-level segment for our top-level LV
2441
if (!(mapseg = alloc_lv_segment(segtype, lv, 0, 0, lv->status,
2442
stripe_size, NULL, NULL,
2443
devices, 0, 0, region_size, 0, NULL))) {
2444
log_error("Failed to create mapping segment for %s", lv->name);
2449
* Next, create all of our sub_lv's and link them in.
2451
for (i = 0; i < devices; i++) {
2454
if (dm_snprintf(img_name, len, "%s_%s_%u",
2455
lv->name, layer_name, i) < 0)
2458
if (dm_snprintf(img_name, len, "%s_%s",
2459
lv->name, layer_name) < 0)
2463
/* FIXME Should use ALLOC_INHERIT here and inherit from parent LV */
2464
if (!(sub_lv = lv_create_empty(img_name, NULL,
2465
LVM_READ | LVM_WRITE,
2466
lv->alloc, lv->vg)))
2469
if (!set_lv_segment_area_lv(mapseg, i, sub_lv, 0, sub_lv_status))
2472
/* Metadata LVs for raid */
2473
if (segtype_is_raid(segtype)) {
2474
if (dm_snprintf(img_name, len, "%s_rmeta_%u", lv->name, i) < 0)
2479
/* FIXME Should use ALLOC_INHERIT here and inherit from parent LV */
2480
if (!(sub_lv = lv_create_empty(img_name, NULL,
2481
LVM_READ | LVM_WRITE,
2482
lv->alloc, lv->vg)))
2485
if (!set_lv_segment_area_lv(mapseg, i, sub_lv, 0, RAID_META))
2489
dm_list_add(&lv->segments, &mapseg->list);
2494
static int _lv_extend_layered_lv(struct alloc_handle *ah,
2495
struct logical_volume *lv,
2496
uint32_t extents, uint32_t first_area,
2497
uint32_t stripes, uint32_t stripe_size)
2499
const struct segment_type *segtype;
2500
struct logical_volume *sub_lv, *meta_lv;
2501
struct lv_segment *seg;
2503
int clear_metadata = 0;
2505
segtype = get_segtype_from_string(lv->vg->cmd, "striped");
2508
* The component devices of a "striped" LV all go in the same
2509
* LV. However, RAID has an LV for each device - making the
2510
* 'stripes' and 'stripe_size' parameters meaningless.
2512
if (seg_is_raid(first_seg(lv))) {
2517
seg = first_seg(lv);
2518
for (fa = first_area, s = 0; s < seg->area_count; s++) {
2519
if (is_temporary_mirror_layer(seg_lv(seg, s))) {
2520
if (!_lv_extend_layered_lv(ah, seg_lv(seg, s), extents,
2521
fa, stripes, stripe_size))
2523
fa += lv_mirror_count(seg_lv(seg, s));
2527
sub_lv = seg_lv(seg, s);
2528
if (!lv_add_segment(ah, fa, stripes, sub_lv, segtype,
2529
stripe_size, sub_lv->status, 0)) {
2530
log_error("Aborting. Failed to extend %s in %s.",
2531
sub_lv->name, lv->name);
2535
/* Extend metadata LVs only on initial creation */
2536
if (seg_is_raid(seg) && !lv->le_count) {
2537
if (!seg->meta_areas) {
2538
log_error("No meta_areas for RAID type");
2542
meta_lv = seg_metalv(seg, s);
2543
if (!lv_add_segment(ah, fa + seg->area_count, 1,
2544
meta_lv, segtype, 0,
2545
meta_lv->status, 0)) {
2546
log_error("Failed to extend %s in %s.",
2547
meta_lv->name, lv->name);
2550
lv_set_visible(meta_lv);
2557
if (clear_metadata) {
2559
* We must clear the metadata areas upon creation.
2561
if (!vg_write(lv->vg) || !vg_commit(lv->vg))
2564
for (s = 0; s < seg->area_count; s++) {
2565
meta_lv = seg_metalv(seg, s);
2566
if (!activate_lv(meta_lv->vg->cmd, meta_lv)) {
2567
log_error("Failed to activate %s/%s for clearing",
2568
meta_lv->vg->name, meta_lv->name);
2572
log_verbose("Clearing metadata area of %s/%s",
2573
meta_lv->vg->name, meta_lv->name);
2575
* Rather than wiping meta_lv->size, we can simply
2576
* wipe '1' to remove the superblock of any previous
2577
* RAID devices. It is much quicker.
2579
if (!set_lv(meta_lv->vg->cmd, meta_lv, 1, 0)) {
2580
log_error("Failed to zero %s/%s",
2581
meta_lv->vg->name, meta_lv->name);
2585
if (!deactivate_lv(meta_lv->vg->cmd, meta_lv)) {
2586
log_error("Failed to deactivate %s/%s",
2587
meta_lv->vg->name, meta_lv->name);
2590
lv_set_hidden(meta_lv);
2594
seg->area_len += extents;
2595
seg->len += extents;
2596
lv->le_count += extents;
2597
lv->size += (uint64_t) extents *lv->vg->extent_size;
2603
* Entry point for single-step LV allocation + extension.
2605
int lv_extend(struct logical_volume *lv,
2606
const struct segment_type *segtype,
2607
uint32_t stripes, uint32_t stripe_size,
2608
uint32_t mirrors, uint32_t region_size,
2609
uint32_t extents, const char *thin_pool_name,
2610
struct dm_list *allocatable_pvs, alloc_policy_t alloc)
2614
struct alloc_handle *ah;
2615
uint32_t sub_lv_count;
2617
log_very_verbose("Extending segment type, %s", segtype->name);
2619
if (segtype_is_virtual(segtype))
2620
return lv_add_virtual_segment(lv, 0u, extents, segtype, thin_pool_name);
2622
if (!lv->le_count && segtype_is_thin_pool(segtype)) {
2623
/* Thin pool allocation treats its metadata device like a mirror log. */
2624
/* FIXME Allow pool and data on same device with NORMAL */
2625
/* FIXME Support striped metadata pool */
2627
} else if (segtype_is_raid(segtype) && !lv->le_count)
2628
log_count = mirrors * stripes;
2629
/* FIXME log_count should be 1 for mirrors */
2631
if (!(ah = allocate_extents(lv->vg, lv, segtype, stripes, mirrors,
2632
log_count, region_size, extents,
2633
allocatable_pvs, alloc, NULL)))
2636
if (segtype_is_thin_pool(segtype)) {
2637
if (!lv->le_count) {
2638
if (!(r = extend_pool(lv, segtype, ah, stripes, stripe_size)))
2640
} else if (!(r = _lv_extend_layered_lv(ah, lv, extents, 0,
2641
stripes, stripe_size)))
2643
} else if (!segtype_is_mirrored(segtype) && !segtype_is_raid(segtype)) {
2644
if (!(r = lv_add_segment(ah, 0, ah->area_count, lv, segtype,
2645
stripe_size, 0u, 0)))
2649
* For RAID, all the devices are AREA_LV.
2650
* However, for 'mirror on stripe' using non-RAID targets,
2651
* the mirror legs are AREA_LV while the stripes underneath
2654
if (segtype_is_raid(segtype))
2655
sub_lv_count = mirrors * stripes + segtype->parity_devs;
2657
sub_lv_count = mirrors;
2659
if (!lv->le_count &&
2660
!(r = _lv_insert_empty_sublvs(lv, segtype, stripe_size,
2661
region_size, sub_lv_count))) {
2662
log_error("Failed to insert layer for %s", lv->name);
2666
if (!(r = _lv_extend_layered_lv(ah, lv, extents, 0,
2667
stripes, stripe_size)))
2671
* If we are expanding an existing mirror, we can skip the
2672
* resync of the extension if the LV is currently in-sync
2673
* and the LV has the LV_NOTSYNCED flag set.
2675
if ((lv->le_count != extents) &&
2676
segtype_is_mirrored(segtype) &&
2677
(lv->status & LV_NOTSYNCED)) {
2678
percent_t sync_percent = PERCENT_INVALID;
2680
if (!lv_is_active(lv)) {
2681
log_print("%s/%s is not active."
2682
" Unable to get sync percent.",
2683
lv->vg->name, lv->name);
2684
if (yes_no_prompt("Do full resync of extended "
2685
"portion of %s/%s? [y/n]: ",
2686
lv->vg->name, lv->name) == 'y')
2692
if (!(r = lv_mirror_percent(lv->vg->cmd, lv, 0,
2693
&sync_percent, NULL))) {
2694
log_error("Failed to get sync percent for %s/%s",
2695
lv->vg->name, lv->name);
2697
} else if (sync_percent == PERCENT_100) {
2698
log_verbose("Skipping initial resync for "
2699
"extended portion of %s/%s",
2700
lv->vg->name, lv->name);
2701
init_mirror_in_sync(1);
2702
lv->status |= LV_NOTSYNCED;
2704
log_error("%s/%s cannot be extended while"
2705
" it is recovering.",
2706
lv->vg->name, lv->name);
2719
* Minimal LV renaming function.
2720
* Metadata transaction should be made by caller.
2721
* Assumes new_name is allocated from cmd->mem pool.
2723
static int _rename_single_lv(struct logical_volume *lv, char *new_name)
2725
struct volume_group *vg = lv->vg;
2727
if (find_lv_in_vg(vg, new_name)) {
2728
log_error("Logical volume \"%s\" already exists in "
2729
"volume group \"%s\"", new_name, vg->name);
2733
if (lv->status & LOCKED) {
2734
log_error("Cannot rename locked LV %s", lv->name);
2738
lv->name = new_name;
2745
* 'lv_name_old' and 'lv_name_new' are old and new names of the main LV.
2747
static int _rename_sub_lv(struct cmd_context *cmd,
2748
struct logical_volume *lv,
2749
const char *lv_name_old, const char *lv_name_new)
2756
* A sub LV name starts with lv_name_old + '_'.
2757
* The suffix follows lv_name_old and includes '_'.
2759
len = strlen(lv_name_old);
2760
if (strncmp(lv->name, lv_name_old, len) || lv->name[len] != '_') {
2761
log_error("Cannot rename \"%s\": name format not recognized "
2762
"for internal LV \"%s\"",
2763
lv_name_old, lv->name);
2766
suffix = lv->name + len;
2769
* Compose a new name for sub lv:
2770
* e.g. new name is "lvol1_mlog"
2771
* if the sub LV is "lvol0_mlog" and
2772
* a new name for main LV is "lvol1"
2774
len = strlen(lv_name_new) + strlen(suffix) + 1;
2775
new_name = dm_pool_alloc(cmd->mem, len);
2777
log_error("Failed to allocate space for new name");
2780
if (dm_snprintf(new_name, len, "%s%s", lv_name_new, suffix) < 0) {
2781
log_error("Failed to create new name");
2786
return _rename_single_lv(lv, new_name);
2789
/* Callback for for_each_sub_lv */
2790
static int _rename_cb(struct cmd_context *cmd, struct logical_volume *lv,
2793
struct lv_names *lv_names = (struct lv_names *) data;
2795
return _rename_sub_lv(cmd, lv, lv_names->old, lv_names->new);
2799
* Loop down sub LVs and call fn for each.
2800
* fn is responsible to log necessary information on failure.
2802
int for_each_sub_lv(struct cmd_context *cmd, struct logical_volume *lv,
2803
int (*fn)(struct cmd_context *cmd,
2804
struct logical_volume *lv, void *data),
2807
struct logical_volume *org;
2808
struct lv_segment *seg;
2811
if (lv_is_cow(lv) && lv_is_virtual_origin(org = origin_from_cow(lv))) {
2812
if (!fn(cmd, org, data))
2814
if (!for_each_sub_lv(cmd, org, fn, data))
2818
dm_list_iterate_items(seg, &lv->segments) {
2820
if (!fn(cmd, seg->log_lv, data))
2822
if (!for_each_sub_lv(cmd, seg->log_lv, fn, data))
2827
if (!fn(cmd, seg->pool_lv, data))
2829
if (!for_each_sub_lv(cmd, seg->pool_lv, fn, data))
2833
if (seg->metadata_lv) {
2834
if (!fn(cmd, seg->metadata_lv, data))
2836
if (!for_each_sub_lv(cmd, seg->metadata_lv, fn, data))
2840
for (s = 0; s < seg->area_count; s++) {
2841
if (seg_type(seg, s) != AREA_LV)
2843
if (!fn(cmd, seg_lv(seg, s), data))
2845
if (!for_each_sub_lv(cmd, seg_lv(seg, s), fn, data))
2849
if (!seg_is_raid(seg))
2852
/* RAID has meta_areas */
2853
for (s = 0; s < seg->area_count; s++) {
2854
if (seg_metatype(seg, s) != AREA_LV)
2856
if (!fn(cmd, seg_metalv(seg, s), data))
2858
if (!for_each_sub_lv(cmd, seg_metalv(seg, s), fn, data))
2868
* Core of LV renaming routine.
2869
* VG must be locked by caller.
2871
int lv_rename(struct cmd_context *cmd, struct logical_volume *lv,
2872
const char *new_name)
2874
struct volume_group *vg = lv->vg;
2875
struct lv_names lv_names;
2876
DM_LIST_INIT(lvs_changed);
2877
struct lv_list lvl, lvl2, *lvlp;
2880
/* rename is not allowed on sub LVs */
2881
if (!lv_is_visible(lv)) {
2882
log_error("Cannot rename internal LV \"%s\".", lv->name);
2886
if (find_lv_in_vg(vg, new_name)) {
2887
log_error("Logical volume \"%s\" already exists in "
2888
"volume group \"%s\"", new_name, vg->name);
2892
if (lv->status & LOCKED) {
2893
log_error("Cannot rename locked LV %s", lv->name);
2900
/* rename sub LVs */
2901
lv_names.old = lv->name;
2902
lv_names.new = new_name;
2903
if (!for_each_sub_lv(cmd, lv, _rename_cb, (void *) &lv_names))
2906
/* rename main LV */
2907
if (!(lv->name = dm_pool_strdup(cmd->mem, new_name))) {
2908
log_error("Failed to allocate space for new name");
2913
dm_list_add(&lvs_changed, &lvl.list);
2915
/* rename active virtual origin too */
2916
if (lv_is_cow(lv) && lv_is_virtual_origin(lvl2.lv = origin_from_cow(lv)))
2917
dm_list_add_h(&lvs_changed, &lvl2.list);
2919
log_verbose("Writing out updated volume group");
2923
if (!suspend_lvs(cmd, &lvs_changed, vg))
2926
if (!(r = vg_commit(vg)))
2930
* FIXME: resume LVs in reverse order to prevent memory
2931
* lock imbalance when resuming virtual snapshot origin
2932
* (resume of snapshot resumes origin too)
2934
dm_list_iterate_back_items(lvlp, &lvs_changed)
2935
if (!resume_lv(cmd, lvlp->lv))
2942
char *generate_lv_name(struct volume_group *vg, const char *format,
2943
char *buffer, size_t len)
2945
struct lv_list *lvl;
2948
dm_list_iterate_items(lvl, &vg->lvs) {
2949
if (sscanf(lvl->lv->name, format, &i) != 1)
2956
if (dm_snprintf(buffer, len, format, high + 1) < 0)
2962
int vg_max_lv_reached(struct volume_group *vg)
2967
if (vg->max_lv > vg_visible_lvs(vg))
2970
log_verbose("Maximum number of logical volumes (%u) reached "
2971
"in volume group %s", vg->max_lv, vg->name);
2976
struct logical_volume *alloc_lv(struct dm_pool *mem)
2978
struct logical_volume *lv;
2980
if (!(lv = dm_pool_zalloc(mem, sizeof(*lv)))) {
2981
log_error("Unable to allocate logical volume structure");
2985
lv->snapshot = NULL;
2986
dm_list_init(&lv->snapshot_segs);
2987
dm_list_init(&lv->segments);
2988
dm_list_init(&lv->tags);
2989
dm_list_init(&lv->segs_using_this_lv);
2990
dm_list_init(&lv->rsites);
2996
* Create a new empty LV.
2998
struct logical_volume *lv_create_empty(const char *name,
3001
alloc_policy_t alloc,
3002
struct volume_group *vg)
3004
struct format_instance *fi = vg->fid;
3005
struct logical_volume *lv;
3006
char dname[NAME_LEN];
3008
if (vg_max_lv_reached(vg))
3011
if (strstr(name, "%d") &&
3012
!(name = generate_lv_name(vg, name, dname, sizeof(dname)))) {
3013
log_error("Failed to generate unique name for the new "
3016
} else if (find_lv_in_vg(vg, name)) {
3017
log_error("Unable to create LV %s in Volume Group %s: "
3018
"name already in use.", name, vg->name);
3022
log_verbose("Creating logical volume %s", name);
3024
if (!(lv = alloc_lv(vg->vgmem)))
3027
if (!(lv->name = dm_pool_strdup(vg->vgmem, name)))
3030
lv->status = status;
3032
lv->read_ahead = vg->cmd->default_settings.read_ahead;
3035
lv->size = UINT64_C(0);
3041
if (!link_lv_to_vg(vg, lv))
3044
if (!lv_set_creation(lv, NULL, 0))
3047
if (fi->fmt->ops->lv_setup && !fi->fmt->ops->lv_setup(fi, lv))
3052
dm_pool_free(vg->vgmem, lv);
3056
static int _add_pvs(struct cmd_context *cmd, struct pv_segment *peg,
3057
uint32_t s __attribute__((unused)), void *data)
3059
struct seg_pvs *spvs = (struct seg_pvs *) data;
3060
struct pv_list *pvl;
3062
/* Don't add again if it's already on list. */
3063
if (find_pv_in_pv_list(&spvs->pvs, peg->pv))
3066
if (!(pvl = dm_pool_alloc(cmd->mem, sizeof(*pvl)))) {
3067
log_error("pv_list allocation failed");
3073
dm_list_add(&spvs->pvs, &pvl->list);
3079
* Construct dm_list of segments of LVs showing which PVs they use.
3080
* For pvmove we use the *parent* LV so we can pick up stripes & existing mirrors etc.
3082
struct dm_list *build_parallel_areas_from_lv(struct logical_volume *lv,
3083
unsigned use_pvmove_parent_lv)
3085
struct cmd_context *cmd = lv->vg->cmd;
3086
struct dm_list *parallel_areas;
3087
struct seg_pvs *spvs;
3088
uint32_t current_le = 0;
3089
uint32_t raid_multiple;
3090
struct lv_segment *seg = first_seg(lv);
3092
if (!(parallel_areas = dm_pool_alloc(cmd->mem, sizeof(*parallel_areas)))) {
3093
log_error("parallel_areas allocation failed");
3097
dm_list_init(parallel_areas);
3100
if (!(spvs = dm_pool_zalloc(cmd->mem, sizeof(*spvs)))) {
3101
log_error("allocation failed");
3105
dm_list_init(&spvs->pvs);
3107
spvs->le = current_le;
3108
spvs->len = lv->le_count - current_le;
3110
dm_list_add(parallel_areas, &spvs->list);
3112
if (use_pvmove_parent_lv && !(seg = find_seg_by_le(lv, current_le))) {
3113
log_error("Failed to find segment for %s extent %" PRIu32,
3114
lv->name, current_le);
3118
/* Find next segment end */
3119
/* FIXME Unnecessary nesting! */
3120
if (!_for_each_pv(cmd, use_pvmove_parent_lv ? seg->pvmove_source_seg->lv : lv,
3121
use_pvmove_parent_lv ? seg->pvmove_source_seg->le : current_le,
3122
use_pvmove_parent_lv ? spvs->len * _calc_area_multiple(seg->pvmove_source_seg->segtype, seg->pvmove_source_seg->area_count, 0) : spvs->len,
3123
use_pvmove_parent_lv ? seg->pvmove_source_seg : NULL,
3125
0, 0, -1, 0, _add_pvs, (void *) spvs))
3128
current_le = spvs->le + spvs->len;
3129
raid_multiple = (seg->segtype->parity_devs) ?
3130
seg->area_count - seg->segtype->parity_devs : 1;
3131
} while ((current_le * raid_multiple) < lv->le_count);
3133
/* FIXME Merge adjacent segments with identical PV lists (avoids need for contiguous allocation attempts between successful allocations) */
3135
return parallel_areas;
3138
int link_lv_to_vg(struct volume_group *vg, struct logical_volume *lv)
3140
struct lv_list *lvl;
3142
if (vg_max_lv_reached(vg))
3145
if (!(lvl = dm_pool_zalloc(vg->vgmem, sizeof(*lvl))))
3150
dm_list_add(&vg->lvs, &lvl->list);
3155
int unlink_lv_from_vg(struct logical_volume *lv)
3157
struct lv_list *lvl;
3159
if (!(lvl = find_lv_in_vg(lv->vg, lv->name)))
3162
dm_list_del(&lvl->list);
3167
void lv_set_visible(struct logical_volume *lv)
3169
if (lv_is_visible(lv))
3172
lv->status |= VISIBLE_LV;
3174
log_debug("LV %s in VG %s is now visible.", lv->name, lv->vg->name);
3177
void lv_set_hidden(struct logical_volume *lv)
3179
if (!lv_is_visible(lv))
3182
lv->status &= ~VISIBLE_LV;
3184
log_debug("LV %s in VG %s is now hidden.", lv->name, lv->vg->name);
3187
int lv_remove_single(struct cmd_context *cmd, struct logical_volume *lv,
3188
const force_t force)
3190
struct volume_group *vg;
3192
struct logical_volume *format1_origin = NULL;
3193
int format1_reload_required = 0;
3195
struct logical_volume *pool_lv = NULL;
3199
if (!vg_check_status(vg, LVM_WRITE))
3202
if (lv_is_origin(lv)) {
3203
log_error("Can't remove logical volume \"%s\" under snapshot",
3208
if (lv->status & MIRROR_IMAGE) {
3209
log_error("Can't remove logical volume %s used by a mirror",
3214
if (lv->status & MIRROR_LOG) {
3215
log_error("Can't remove logical volume %s used as mirror log",
3220
if (lv->status & (RAID_META | RAID_IMAGE)) {
3221
log_error("Can't remove logical volume %s used as RAID device",
3226
if (lv_is_thin_pool_data(lv) || lv_is_thin_pool_metadata(lv)) {
3227
log_error("Can't remove logical volume %s used by a thin pool.",
3230
} else if (lv_is_thin_volume(lv))
3231
pool_lv = first_seg(lv)->pool_lv;
3233
if (lv->status & LOCKED) {
3234
log_error("Can't remove locked LV %s", lv->name);
3238
/* FIXME Ensure not referred to by another existing LVs */
3240
if (lv_info(cmd, lv, 0, &info, 1, 0)) {
3241
if (!lv_check_not_in_use(cmd, lv, &info))
3244
if ((force == PROMPT) &&
3245
lv_is_visible(lv) &&
3247
yes_no_prompt("Do you really want to remove active "
3248
"%slogical volume %s? [y/n]: ",
3249
vg_is_clustered(vg) ? "clustered " : "",
3251
log_error("Logical volume %s not removed", lv->name);
3259
if (lv_is_cow(lv)) {
3260
/* Old format1 code */
3261
if (!(lv->vg->fid->fmt->features & FMT_MDAS))
3262
format1_origin = origin_from_cow(lv);
3264
log_verbose("Removing snapshot %s", lv->name);
3265
/* vg_remove_snapshot() will preload origin/former snapshots */
3266
if (!vg_remove_snapshot(lv))
3270
/* FIXME Review and fix the snapshot error paths! */
3271
if (!deactivate_lv(cmd, lv)) {
3272
log_error("Unable to deactivate logical volume \"%s\"",
3277
/* Clear thin pool stacked messages */
3278
if (pool_lv && !pool_has_message(first_seg(pool_lv), lv, 0) &&
3279
!update_pool_lv(pool_lv, 1)) {
3280
log_error("Failed to update thin pool %s.", pool_lv->name);
3284
visible = lv_is_visible(lv);
3286
log_verbose("Releasing logical volume \"%s\"", lv->name);
3287
if (!lv_remove(lv)) {
3288
log_error("Error releasing logical volume \"%s\"", lv->name);
3293
* Old format1 code: If no snapshots left reload without -real.
3295
if (format1_origin && !lv_is_origin(format1_origin)) {
3296
log_warn("WARNING: Support for snapshots with old LVM1-style metadata is deprecated.");
3297
log_warn("WARNING: Please use lvconvert to update to lvm2 metadata at your convenience.");
3298
format1_reload_required = 1;
3301
/* store it on disks */
3306
if (format1_reload_required && !suspend_lv(cmd, format1_origin))
3307
log_error("Failed to refresh %s without snapshot.", format1_origin->name);
3313
if (format1_reload_required && !resume_lv(cmd, format1_origin)) {
3314
log_error("Failed to resume %s.", format1_origin->name);
3318
/* Release unneeded blocks in thin pool */
3319
/* TODO: defer when multiple LVs relased at once */
3320
if (pool_lv && !update_pool_lv(pool_lv, 1)) {
3321
log_error("Failed to update thin pool %s.", pool_lv->name);
3328
log_print("Logical volume \"%s\" successfully removed", lv->name);
3334
* remove LVs with its dependencies - LV leaf nodes should be removed first
3336
int lv_remove_with_dependencies(struct cmd_context *cmd, struct logical_volume *lv,
3337
const force_t force, unsigned level)
3339
percent_t snap_percent;
3340
struct dm_list *snh, *snht;
3341
struct seg_list *sl, *tsl;
3344
if (lv_is_cow(lv)) {
3346
* A merging snapshot cannot be removed directly unless
3347
* it has been invalidated or failed merge removal is requested.
3349
if (lv_is_merging_cow(lv) && !level) {
3350
if (lv_info(lv->vg->cmd, lv, 0, &info, 1, 0) &&
3351
info.exists && info.live_table) {
3352
if (!lv_snapshot_percent(lv, &snap_percent)) {
3353
log_error("Failed to obtain merging snapshot progress percentage for logical volume %s.",
3357
if ((snap_percent != PERCENT_INVALID) &&
3358
(snap_percent != PERCENT_MERGE_FAILED)) {
3359
log_error("Can't remove merging snapshot logical volume \"%s\"",
3362
} else if ((snap_percent == PERCENT_MERGE_FAILED) &&
3363
(force == PROMPT) &&
3364
yes_no_prompt("Removing snapshot \"%s\" that failed to merge may leave origin \"%s\" inconsistent. "
3365
"Proceed? [y/n]: ", lv->name, origin_from_cow(lv)->name) == 'n') {
3366
log_error("Logical volume %s not removed.", lv->name);
3373
if (lv_is_origin(lv)) {
3374
/* Remove snapshot LVs first */
3375
if ((force == PROMPT) &&
3376
/* Active snapshot already needs to confirm each active LV */
3377
!lv_is_active(lv) &&
3378
yes_no_prompt("Removing origin %s will also remove %u "
3379
"snapshots(s). Proceed? [y/n]: ",
3380
lv->name, lv->origin_count) == 'n') {
3381
log_error("Logical volume %s not removed.", lv->name);
3385
dm_list_iterate_safe(snh, snht, &lv->snapshot_segs)
3386
if (!lv_remove_with_dependencies(cmd, dm_list_struct_base(snh, struct lv_segment,
3392
if (lv_is_used_thin_pool(lv)) {
3393
/* Remove thin LVs first */
3394
if ((force == PROMPT) &&
3395
yes_no_prompt("Removing pool %s will also remove %u "
3396
"thin volume(s). OK? [y/n]: ", lv->name,
3397
/* Note: Snaphosts not included */
3398
dm_list_size(&lv->segs_using_this_lv)) == 'n') {
3399
log_error("Logical volume %s not removed.", lv->name);
3403
dm_list_iterate_items_safe(sl, tsl, &lv->segs_using_this_lv)
3404
if (!lv_remove_with_dependencies(cmd, sl->seg->lv,
3409
return lv_remove_single(cmd, lv, force);
3413
* insert_layer_for_segments_on_pv() inserts a layer segment for a segment area.
3414
* However, layer modification could split the underlying layer segment.
3415
* This function splits the parent area according to keep the 1:1 relationship
3416
* between the parent area and the underlying layer segment.
3417
* Since the layer LV might have other layers below, build_parallel_areas()
3418
* is used to find the lowest-level segment boundaries.
3420
static int _split_parent_area(struct lv_segment *seg, uint32_t s,
3421
struct dm_list *layer_seg_pvs)
3423
uint32_t parent_area_len, parent_le, layer_le;
3424
uint32_t area_multiple;
3425
struct seg_pvs *spvs;
3427
if (seg_is_striped(seg))
3428
area_multiple = seg->area_count;
3432
parent_area_len = seg->area_len;
3433
parent_le = seg->le;
3434
layer_le = seg_le(seg, s);
3436
while (parent_area_len > 0) {
3437
/* Find the layer segment pointed at */
3438
if (!(spvs = _find_seg_pvs_by_le(layer_seg_pvs, layer_le))) {
3439
log_error("layer segment for %s:%" PRIu32 " not found",
3440
seg->lv->name, parent_le);
3444
if (spvs->le != layer_le) {
3445
log_error("Incompatible layer boundary: "
3446
"%s:%" PRIu32 "[%" PRIu32 "] on %s:%" PRIu32,
3447
seg->lv->name, parent_le, s,
3448
seg_lv(seg, s)->name, layer_le);
3452
if (spvs->len < parent_area_len) {
3453
parent_le += spvs->len * area_multiple;
3454
if (!lv_split_segment(seg->lv, parent_le))
3458
parent_area_len -= spvs->len;
3459
layer_le += spvs->len;
3466
* Split the parent LV segments if the layer LV below it is splitted.
3468
int split_parent_segments_for_layer(struct cmd_context *cmd,
3469
struct logical_volume *layer_lv)
3471
struct lv_list *lvl;
3472
struct logical_volume *parent_lv;
3473
struct lv_segment *seg;
3475
struct dm_list *parallel_areas;
3477
if (!(parallel_areas = build_parallel_areas_from_lv(layer_lv, 0)))
3480
/* Loop through all LVs except itself */
3481
dm_list_iterate_items(lvl, &layer_lv->vg->lvs) {
3482
parent_lv = lvl->lv;
3483
if (parent_lv == layer_lv)
3486
/* Find all segments that point at the layer LV */
3487
dm_list_iterate_items(seg, &parent_lv->segments) {
3488
for (s = 0; s < seg->area_count; s++) {
3489
if (seg_type(seg, s) != AREA_LV ||
3490
seg_lv(seg, s) != layer_lv)
3493
if (!_split_parent_area(seg, s, parallel_areas))
3502
/* Remove a layer from the LV */
3503
int remove_layers_for_segments(struct cmd_context *cmd,
3504
struct logical_volume *lv,
3505
struct logical_volume *layer_lv,
3506
uint64_t status_mask, struct dm_list *lvs_changed)
3508
struct lv_segment *seg, *lseg;
3511
struct lv_list *lvl;
3513
log_very_verbose("Removing layer %s for segments of %s",
3514
layer_lv->name, lv->name);
3516
/* Find all segments that point at the temporary mirror */
3517
dm_list_iterate_items(seg, &lv->segments) {
3518
for (s = 0; s < seg->area_count; s++) {
3519
if (seg_type(seg, s) != AREA_LV ||
3520
seg_lv(seg, s) != layer_lv)
3523
/* Find the layer segment pointed at */
3524
if (!(lseg = find_seg_by_le(layer_lv, seg_le(seg, s)))) {
3525
log_error("Layer segment found: %s:%" PRIu32,
3526
layer_lv->name, seg_le(seg, s));
3530
/* Check the segment params are compatible */
3531
if (!seg_is_striped(lseg) || lseg->area_count != 1) {
3532
log_error("Layer is not linear: %s:%" PRIu32,
3533
layer_lv->name, lseg->le);
3536
if ((lseg->status & status_mask) != status_mask) {
3537
log_error("Layer status does not match: "
3538
"%s:%" PRIu32 " status: 0x%" PRIx64 "/0x%" PRIx64,
3539
layer_lv->name, lseg->le,
3540
lseg->status, status_mask);
3543
if (lseg->le != seg_le(seg, s) ||
3544
lseg->area_len != seg->area_len) {
3545
log_error("Layer boundary mismatch: "
3546
"%s:%" PRIu32 "-%" PRIu32 " on "
3548
"%" PRIu32 "-%" PRIu32 " / ",
3549
lv->name, seg->le, seg->area_len,
3550
layer_lv->name, seg_le(seg, s),
3551
lseg->le, lseg->area_len);
3555
if (!move_lv_segment_area(seg, s, lseg, 0))
3558
/* Replace mirror with error segment */
3559
if (!(lseg->segtype =
3560
get_segtype_from_string(lv->vg->cmd, "error"))) {
3561
log_error("Missing error segtype");
3564
lseg->area_count = 0;
3566
/* First time, add LV to list of LVs affected */
3567
if (!lv_changed && lvs_changed) {
3568
if (!(lvl = dm_pool_alloc(cmd->mem, sizeof(*lvl)))) {
3569
log_error("lv_list alloc failed");
3573
dm_list_add(lvs_changed, &lvl->list);
3578
if (lv_changed && !lv_merge_segments(lv))
3584
/* Remove a layer */
3585
int remove_layers_for_segments_all(struct cmd_context *cmd,
3586
struct logical_volume *layer_lv,
3587
uint64_t status_mask,
3588
struct dm_list *lvs_changed)
3590
struct lv_list *lvl;
3591
struct logical_volume *lv1;
3593
/* Loop through all LVs except the temporary mirror */
3594
dm_list_iterate_items(lvl, &layer_lv->vg->lvs) {
3596
if (lv1 == layer_lv)
3599
if (!remove_layers_for_segments(cmd, lv1, layer_lv,
3600
status_mask, lvs_changed))
3604
if (!lv_empty(layer_lv))
3610
int move_lv_segments(struct logical_volume *lv_to,
3611
struct logical_volume *lv_from,
3612
uint64_t set_status, uint64_t reset_status)
3614
struct lv_segment *seg;
3616
dm_list_iterate_items(seg, &lv_to->segments)
3618
log_error("Can't move snapshot segment.");
3622
dm_list_init(&lv_to->segments);
3623
dm_list_splice(&lv_to->segments, &lv_from->segments);
3625
dm_list_iterate_items(seg, &lv_to->segments) {
3627
seg->status &= ~reset_status;
3628
seg->status |= set_status;
3631
lv_to->le_count = lv_from->le_count;
3632
lv_to->size = lv_from->size;
3634
lv_from->le_count = 0;
3640
/* Remove a layer from the LV */
3641
int remove_layer_from_lv(struct logical_volume *lv,
3642
struct logical_volume *layer_lv)
3644
struct logical_volume *parent;
3645
struct lv_segment *parent_seg;
3646
struct segment_type *segtype;
3648
log_very_verbose("Removing layer %s for %s", layer_lv->name, lv->name);
3650
if (!(parent_seg = get_only_segment_using_this_lv(layer_lv))) {
3651
log_error("Failed to find layer %s in %s",
3652
layer_lv->name, lv->name);
3655
parent = parent_seg->lv;
3658
* Before removal, the layer should be cleaned up,
3659
* i.e. additional segments and areas should have been removed.
3661
if (dm_list_size(&parent->segments) != 1 ||
3662
parent_seg->area_count != 1 ||
3663
seg_type(parent_seg, 0) != AREA_LV ||
3664
layer_lv != seg_lv(parent_seg, 0) ||
3665
parent->le_count != layer_lv->le_count)
3668
if (!lv_empty(parent))
3671
if (!move_lv_segments(parent, layer_lv, 0, 0))
3674
/* Replace the empty layer with error segment */
3675
segtype = get_segtype_from_string(lv->vg->cmd, "error");
3676
if (!lv_add_virtual_segment(layer_lv, 0, parent->le_count, segtype, NULL))
3683
* Create and insert a linear LV "above" lv_where.
3684
* After the insertion, a new LV named lv_where->name + suffix is created
3685
* and all segments of lv_where is moved to the new LV.
3686
* lv_where will have a single segment which maps linearly to the new LV.
3688
struct logical_volume *insert_layer_for_lv(struct cmd_context *cmd,
3689
struct logical_volume *lv_where,
3691
const char *layer_suffix)
3694
struct logical_volume *layer_lv;
3697
struct segment_type *segtype;
3698
struct lv_segment *mapseg;
3699
unsigned exclusive = 0;
3701
/* create an empty layer LV */
3702
len = strlen(lv_where->name) + 32;
3703
if (!(name = alloca(len))) {
3704
log_error("layer name allocation failed. "
3705
"Remove new LV and retry.");
3709
if (dm_snprintf(name, len, "%s%s", lv_where->name, layer_suffix) < 0) {
3710
log_error("layer name allocation failed. "
3711
"Remove new LV and retry.");
3715
if (!(layer_lv = lv_create_empty(name, NULL, LVM_READ | LVM_WRITE,
3716
ALLOC_INHERIT, lv_where->vg))) {
3717
log_error("Creation of layer LV failed");
3721
if (lv_is_active_exclusive_locally(lv_where))
3724
if (lv_is_active(lv_where) && strstr(name, "_mimagetmp")) {
3725
log_very_verbose("Creating transient LV %s for mirror conversion in VG %s.", name, lv_where->vg->name);
3727
segtype = get_segtype_from_string(cmd, "error");
3729
if (!lv_add_virtual_segment(layer_lv, 0, lv_where->le_count, segtype, NULL)) {
3730
log_error("Creation of transient LV %s for mirror conversion in VG %s failed.", name, lv_where->vg->name);
3734
if (!vg_write(lv_where->vg)) {
3735
log_error("Failed to write intermediate VG %s metadata for mirror conversion.", lv_where->vg->name);
3739
if (!vg_commit(lv_where->vg)) {
3740
log_error("Failed to commit intermediate VG %s metadata for mirror conversion.", lv_where->vg->name);
3741
vg_revert(lv_where->vg);
3746
r = activate_lv_excl(cmd, layer_lv);
3748
r = activate_lv(cmd, layer_lv);
3751
log_error("Failed to resume transient LV"
3752
" %s for mirror conversion in VG %s.",
3753
name, lv_where->vg->name);
3758
log_very_verbose("Inserting layer %s for %s",
3759
layer_lv->name, lv_where->name);
3761
if (!move_lv_segments(layer_lv, lv_where, 0, 0))
3764
if (!(segtype = get_segtype_from_string(cmd, "striped")))
3767
/* allocate a new linear segment */
3768
if (!(mapseg = alloc_lv_segment(segtype, lv_where, 0, layer_lv->le_count,
3769
status, 0, NULL, NULL, 1, layer_lv->le_count,
3773
/* map the new segment to the original underlying are */
3774
if (!set_lv_segment_area_lv(mapseg, 0, layer_lv, 0, 0))
3777
/* add the new segment to the layer LV */
3778
dm_list_add(&lv_where->segments, &mapseg->list);
3779
lv_where->le_count = layer_lv->le_count;
3780
lv_where->size = (uint64_t) lv_where->le_count * lv_where->vg->extent_size;
3786
* Extend and insert a linear layer LV beneath the source segment area.
3788
static int _extend_layer_lv_for_segment(struct logical_volume *layer_lv,
3789
struct lv_segment *seg, uint32_t s,
3792
struct lv_segment *mapseg;
3793
struct segment_type *segtype;
3794
struct physical_volume *src_pv = seg_pv(seg, s);
3795
uint32_t src_pe = seg_pe(seg, s);
3797
if (seg_type(seg, s) != AREA_PV && seg_type(seg, s) != AREA_LV)
3800
if (!(segtype = get_segtype_from_string(layer_lv->vg->cmd, "striped")))
3803
/* FIXME Incomplete message? Needs more context */
3804
log_very_verbose("Inserting %s:%" PRIu32 "-%" PRIu32 " of %s/%s",
3805
pv_dev_name(src_pv),
3806
src_pe, src_pe + seg->area_len - 1,
3807
seg->lv->vg->name, seg->lv->name);
3809
/* allocate a new segment */
3810
if (!(mapseg = alloc_lv_segment(segtype, layer_lv, layer_lv->le_count,
3811
seg->area_len, status, 0,
3812
NULL, NULL, 1, seg->area_len, 0, 0, 0, seg)))
3815
/* map the new segment to the original underlying are */
3816
if (!move_lv_segment_area(mapseg, 0, seg, s))
3819
/* add the new segment to the layer LV */
3820
dm_list_add(&layer_lv->segments, &mapseg->list);
3821
layer_lv->le_count += seg->area_len;
3822
layer_lv->size += (uint64_t) seg->area_len * layer_lv->vg->extent_size;
3824
/* map the original area to the new segment */
3825
if (!set_lv_segment_area_lv(seg, s, layer_lv, mapseg->le, 0))
3832
* Match the segment area to PEs in the pvl
3833
* (the segment area boundary should be aligned to PE ranges by
3834
* _adjust_layer_segments() so that there is no partial overlap.)
3836
static int _match_seg_area_to_pe_range(struct lv_segment *seg, uint32_t s,
3837
struct pv_list *pvl)
3839
struct pe_range *per;
3840
uint32_t pe_start, per_end;
3845
if (seg_type(seg, s) != AREA_PV || seg_dev(seg, s) != pvl->pv->dev)
3848
pe_start = seg_pe(seg, s);
3850
/* Do these PEs match to any of the PEs in pvl? */
3851
dm_list_iterate_items(per, pvl->pe_ranges) {
3852
per_end = per->start + per->count - 1;
3854
if ((pe_start < per->start) || (pe_start > per_end))
3857
/* FIXME Missing context in this message - add LV/seg details */
3858
log_debug("Matched PE range %s:%" PRIu32 "-%" PRIu32 " against "
3859
"%s %" PRIu32 " len %" PRIu32, dev_name(pvl->pv->dev),
3860
per->start, per_end, dev_name(seg_dev(seg, s)),
3861
seg_pe(seg, s), seg->area_len);
3870
* For each segment in lv_where that uses a PV in pvl directly,
3871
* split the segment if it spans more than one underlying PV.
3873
static int _align_segment_boundary_to_pe_range(struct logical_volume *lv_where,
3874
struct pv_list *pvl)
3876
struct lv_segment *seg;
3877
struct pe_range *per;
3878
uint32_t pe_start, pe_end, per_end, stripe_multiplier, s;
3883
/* Split LV segments to match PE ranges */
3884
dm_list_iterate_items(seg, &lv_where->segments) {
3885
for (s = 0; s < seg->area_count; s++) {
3886
if (seg_type(seg, s) != AREA_PV ||
3887
seg_dev(seg, s) != pvl->pv->dev)
3890
/* Do these PEs match with the condition? */
3891
dm_list_iterate_items(per, pvl->pe_ranges) {
3892
pe_start = seg_pe(seg, s);
3893
pe_end = pe_start + seg->area_len - 1;
3894
per_end = per->start + per->count - 1;
3897
if ((pe_end < per->start) ||
3898
(pe_start > per_end))
3901
if (seg_is_striped(seg))
3902
stripe_multiplier = seg->area_count;
3904
stripe_multiplier = 1;
3906
if ((per->start != pe_start &&
3907
per->start > pe_start) &&
3908
!lv_split_segment(lv_where, seg->le +
3909
(per->start - pe_start) *
3913
if ((per_end != pe_end &&
3914
per_end < pe_end) &&
3915
!lv_split_segment(lv_where, seg->le +
3916
(per_end - pe_start + 1) *
3927
* Scan lv_where for segments on a PV in pvl, and for each one found
3928
* append a linear segment to lv_layer and insert it between the two.
3930
* If pvl is empty, a layer is placed under the whole of lv_where.
3931
* If the layer is inserted, lv_where is added to lvs_changed.
3933
int insert_layer_for_segments_on_pv(struct cmd_context *cmd,
3934
struct logical_volume *lv_where,
3935
struct logical_volume *layer_lv,
3937
struct pv_list *pvl,
3938
struct dm_list *lvs_changed)
3940
struct lv_segment *seg;
3941
struct lv_list *lvl;
3945
log_very_verbose("Inserting layer %s for segments of %s on %s",
3946
layer_lv->name, lv_where->name,
3947
pvl ? pv_dev_name(pvl->pv) : "any");
3949
if (!_align_segment_boundary_to_pe_range(lv_where, pvl))
3952
/* Work through all segments on the supplied PV */
3953
dm_list_iterate_items(seg, &lv_where->segments) {
3954
for (s = 0; s < seg->area_count; s++) {
3955
if (!_match_seg_area_to_pe_range(seg, s, pvl))
3958
/* First time, add LV to list of LVs affected */
3959
if (!lv_used && lvs_changed) {
3960
if (!(lvl = dm_pool_alloc(cmd->mem, sizeof(*lvl)))) {
3961
log_error("lv_list alloc failed");
3965
dm_list_add(lvs_changed, &lvl->list);
3969
if (!_extend_layer_lv_for_segment(layer_lv, seg, s,
3971
log_error("Failed to insert segment in layer "
3972
"LV %s under %s:%" PRIu32 "-%" PRIu32,
3973
layer_lv->name, lv_where->name,
3974
seg->le, seg->le + seg->len);
3984
* Initialize the LV with 'value'.
3986
int set_lv(struct cmd_context *cmd, struct logical_volume *lv,
3987
uint64_t sectors, int value)
3994
* <clausen> also, more than 4k
3995
* <clausen> say, reiserfs puts it's superblock 32k in, IIRC
3996
* <ejt_> k, I'll drop a fixme to that effect
3997
* (I know the device is at least 4k, but not 32k)
3999
if (!(name = dm_pool_alloc(cmd->mem, PATH_MAX))) {
4000
log_error("Name allocation failed - device not cleared");
4004
if (dm_snprintf(name, PATH_MAX, "%s%s/%s", cmd->dev_dir,
4005
lv->vg->name, lv->name) < 0) {
4006
log_error("Name too long - device not cleared (%s)", lv->name);
4010
sync_local_dev_names(cmd); /* Wait until devices are available */
4012
log_verbose("Clearing start of logical volume \"%s\"", lv->name);
4014
if (!(dev = dev_cache_get(name, NULL))) {
4015
log_error("%s: not found: device not cleared", name);
4019
if (!dev_open_quiet(dev))
4023
sectors = UINT64_C(4096) >> SECTOR_SHIFT;
4025
if (sectors > lv->size)
4028
if (!dev_set(dev, UINT64_C(0), (size_t) sectors << SECTOR_SHIFT, value))
4033
if (!dev_close_immediate(dev))
4039
static struct logical_volume *_create_virtual_origin(struct cmd_context *cmd,
4040
struct volume_group *vg,
4041
const char *lv_name,
4042
uint32_t permission,
4043
uint64_t voriginextents)
4045
const struct segment_type *segtype;
4048
struct logical_volume *lv;
4050
if (!(segtype = get_segtype_from_string(cmd, "zero"))) {
4051
log_error("Zero segment type for virtual origin not found");
4055
len = strlen(lv_name) + 32;
4056
if (!(vorigin_name = alloca(len)) ||
4057
dm_snprintf(vorigin_name, len, "%s_vorigin", lv_name) < 0) {
4058
log_error("Virtual origin name allocation failed.");
4062
if (!(lv = lv_create_empty(vorigin_name, NULL, permission,
4063
ALLOC_INHERIT, vg)))
4066
if (!lv_extend(lv, segtype, 1, 0, 1, 0, voriginextents,
4067
NULL, NULL, ALLOC_INHERIT))
4070
/* store vg on disk(s) */
4071
if (!vg_write(vg) || !vg_commit(vg))
4080
* If lp->thin OR lp->activate is AY*, activate the pool if not already active.
4081
* If lp->thin, create thin LV within the pool - as a snapshot if lp->snapshot.
4082
* If lp->activate is AY*, activate it.
4083
* If lp->activate was AN* and the pool was originally inactive, deactivate it.
4085
static struct logical_volume *_lv_create_an_lv(struct volume_group *vg, struct lvcreate_params *lp,
4086
const char *new_lv_name)
4088
struct cmd_context *cmd = vg->cmd;
4090
uint64_t status = UINT64_C(0);
4091
struct logical_volume *lv, *org = NULL;
4092
struct logical_volume *pool_lv;
4093
struct lv_list *lvl;
4094
int origin_active = 0;
4097
if (new_lv_name && find_lv_in_vg(vg, new_lv_name)) {
4098
log_error("Logical volume \"%s\" already exists in "
4099
"volume group \"%s\"", new_lv_name, lp->vg_name);
4103
if (vg_max_lv_reached(vg)) {
4104
log_error("Maximum number of logical volumes (%u) reached "
4105
"in volume group %s", vg->max_lv, vg->name);
4109
if ((segtype_is_mirrored(lp->segtype) ||
4110
segtype_is_raid(lp->segtype) || segtype_is_thin(lp->segtype)) &&
4111
!(vg->fid->fmt->features & FMT_SEGMENTS)) {
4112
log_error("Metadata does not support %s segments.",
4117
if (lp->read_ahead != DM_READ_AHEAD_AUTO &&
4118
lp->read_ahead != DM_READ_AHEAD_NONE &&
4119
(vg->fid->fmt->features & FMT_RESTRICTED_READAHEAD) &&
4120
(lp->read_ahead < 2 || lp->read_ahead > 120)) {
4121
log_error("Metadata only supports readahead values between 2 and 120.");
4125
if (lp->stripe_size > vg->extent_size) {
4126
log_error("Reducing requested stripe size %s to maximum, "
4127
"physical extent size %s",
4128
display_size(cmd, (uint64_t) lp->stripe_size),
4129
display_size(cmd, (uint64_t) vg->extent_size));
4130
lp->stripe_size = vg->extent_size;
4133
/* Need to check the vg's format to verify this - the cmd format isn't setup properly yet */
4134
if (lp->stripes > 1 &&
4135
!(vg->fid->fmt->features & FMT_UNLIMITED_STRIPESIZE) &&
4136
(lp->stripe_size > STRIPE_SIZE_MAX)) {
4137
log_error("Stripe size may not exceed %s",
4138
display_size(cmd, (uint64_t) STRIPE_SIZE_MAX));
4142
if ((size_rest = lp->extents % lp->stripes)) {
4143
log_print("Rounding size (%d extents) up to stripe boundary "
4144
"size (%d extents)", lp->extents,
4145
lp->extents - size_rest + lp->stripes);
4146
lp->extents = lp->extents - size_rest + lp->stripes;
4149
/* Does LV need to be zeroed? Thin handles this as a per-pool in-kernel setting. */
4150
if (lp->zero && !segtype_is_thin(lp->segtype) && !activation()) {
4151
log_error("Can't wipe start of new LV without using "
4152
"device-mapper kernel driver");
4156
status |= lp->permission | VISIBLE_LV;
4158
if (lp->snapshot && lp->thin) {
4159
if (!(org = find_lv(vg, lp->origin))) {
4160
log_error("Couldn't find origin volume '%s'.",
4165
if (org->status & LOCKED) {
4166
log_error("Snapshots of locked devices are not supported.");
4170
lp->voriginextents = org->le_count;
4171
} else if (lp->snapshot) {
4172
if (!activation()) {
4173
log_error("Can't create snapshot without using "
4174
"device-mapper kernel driver");
4179
status |= LVM_WRITE;
4181
if (lp->voriginsize)
4185
if (!(org = find_lv(vg, lp->origin))) {
4186
log_error("Couldn't find origin volume '%s'.",
4190
if (lv_is_virtual_origin(org)) {
4191
log_error("Can't share virtual origins. "
4192
"Use --virtualsize.");
4195
if (lv_is_cow(org)) {
4196
log_error("Snapshots of snapshots are not "
4200
if (org->status & LOCKED) {
4201
log_error("Snapshots of locked devices are not "
4205
if (lv_is_merging_origin(org)) {
4206
log_error("Snapshots of an origin that has a "
4207
"merging snapshot is not supported");
4211
if (lv_is_thin_type(org) && !lv_is_thin_volume(org)) {
4212
log_error("Snapshots of thin pool %sdevices "
4213
"are not supported.",
4214
lv_is_thin_pool_data(org) ? "data " :
4215
lv_is_thin_pool_metadata(org) ?
4220
if ((org->status & MIRROR_IMAGE) ||
4221
(org->status & MIRROR_LOG)) {
4222
log_error("Snapshots of mirror %ss "
4223
"are not supported",
4224
(org->status & MIRROR_LOG) ?
4229
if (!lv_info(cmd, org, 0, &info, 0, 0)) {
4230
log_error("Check for existence of active snapshot "
4231
"origin '%s' failed.", org->name);
4234
origin_active = info.exists;
4236
if (vg_is_clustered(vg) &&
4237
!lv_is_active_exclusive_locally(org)) {
4238
log_error("%s must be active exclusively to"
4239
" create snapshot", org->name);
4245
if (!seg_is_thin_volume(lp) && !lp->extents) {
4246
log_error("Unable to create new logical volume with no extents");
4250
if (seg_is_thin_pool(lp) &&
4251
((uint64_t)lp->extents * vg->extent_size < lp->chunk_size)) {
4252
log_error("Unable to create thin pool smaller than 1 chunk.");
4256
if (lp->snapshot && !lp->thin && ((uint64_t)lp->extents * vg->extent_size < 2 * lp->chunk_size)) {
4257
log_error("Unable to create a snapshot smaller than 2 chunks.");
4261
if (!seg_is_virtual(lp) &&
4262
vg->free_count < lp->extents) {
4263
log_error("Volume group \"%s\" has insufficient free space "
4264
"(%u extents): %u required.",
4265
vg->name, vg->free_count, lp->extents);
4269
if (lp->stripes > dm_list_size(lp->pvh) && lp->alloc != ALLOC_ANYWHERE) {
4270
log_error("Number of stripes (%u) must not exceed "
4271
"number of physical volumes (%d)", lp->stripes,
4272
dm_list_size(lp->pvh));
4276
if (!activation() &&
4277
(seg_is_mirrored(lp) ||
4279
seg_is_thin_pool(lp))) {
4281
* FIXME: For thin pool add some code to allow delayed
4282
* initialization of empty thin pool volume.
4283
* i.e. using some LV flag, fake message,...
4284
* and testing for metadata pool header signature?
4286
log_error("Can't create %s without using "
4287
"device-mapper kernel driver.",
4288
segtype_is_raid(lp->segtype) ? lp->segtype->name :
4289
segtype_is_mirrored(lp->segtype) ? "mirror" :
4290
"thin pool volume");
4294
/* The snapshot segment gets created later */
4295
if (lp->snapshot && !lp->thin &&
4296
!(lp->segtype = get_segtype_from_string(cmd, "striped")))
4302
if (!dm_list_empty(&lp->tags)) {
4303
if (!(vg->fid->fmt->features & FMT_TAGS)) {
4304
log_error("Volume group %s does not support tags",
4310
if (seg_is_thin_volume(lp) &&
4311
((lp->activate == CHANGE_AY) ||
4312
(lp->activate == CHANGE_AE) ||
4313
(lp->activate == CHANGE_ALY))) {
4314
/* Ensure all stacked messages are submitted */
4315
if (!(lvl = find_lv_in_vg(vg, lp->pool))) {
4316
log_error("Unable to find existing pool LV %s in VG %s.",
4317
lp->pool, vg->name);
4320
if (!update_pool_lv(lvl->lv, 1))
4324
if (segtype_is_mirrored(lp->segtype) || segtype_is_raid(lp->segtype)) {
4325
init_mirror_in_sync(lp->nosync);
4328
log_warn("WARNING: New %s won't be synchronised. "
4329
"Don't read what you didn't write!",
4331
status |= LV_NOTSYNCED;
4334
lp->region_size = adjusted_mirror_region_size(vg->extent_size,
4339
if (!(lv = lv_create_empty(new_lv_name ? : "lvol%d", NULL,
4340
status, lp->alloc, vg)))
4343
if (lp->read_ahead != lv->read_ahead) {
4344
log_verbose("Setting read ahead sectors");
4345
lv->read_ahead = lp->read_ahead;
4348
if (!seg_is_thin_pool(lp) && lp->minor >= 0) {
4349
lv->major = lp->major;
4350
lv->minor = lp->minor;
4351
lv->status |= FIXED_MINOR;
4352
log_verbose("Setting device number to (%d, %d)", lv->major,
4356
dm_list_splice(&lv->tags, &lp->tags);
4358
if (!lv_extend(lv, lp->segtype,
4359
lp->stripes, lp->stripe_size,
4361
seg_is_thin_pool(lp) ? lp->poolmetadataextents : lp->region_size,
4362
seg_is_thin_volume(lp) ? lp->voriginextents : lp->extents,
4363
seg_is_thin_volume(lp) ? (org ? org->name : lp->pool) : NULL, lp->pvh, lp->alloc))
4366
if (seg_is_thin_pool(lp)) {
4367
first_seg(lv)->zero_new_blocks = lp->zero ? 1 : 0;
4368
first_seg(lv)->chunk_size = lp->chunk_size;
4369
/* FIXME: use lowwatermark via lvm.conf global for all thinpools ? */
4370
first_seg(lv)->low_water_mark = 0;
4371
} else if (seg_is_thin_volume(lp)) {
4372
pool_lv = first_seg(lv)->pool_lv;
4374
if (!(first_seg(lv)->device_id =
4375
get_free_pool_device_id(first_seg(pool_lv)))) {
4380
if (!attach_pool_message(first_seg(pool_lv),
4381
DM_THIN_MESSAGE_CREATE_THIN, lv, 0, 0)) {
4387
/* FIXME Log allocation and attachment should have happened inside lv_extend. */
4388
if (lp->log_count &&
4389
!seg_is_raid(first_seg(lv)) && seg_is_mirrored(first_seg(lv))) {
4390
if (!add_mirror_log(cmd, lv, lp->log_count,
4391
first_seg(lv)->region_size,
4392
lp->pvh, lp->alloc)) {
4398
/* store vg on disk(s) */
4399
if (!vg_write(vg) || !vg_commit(vg))
4405
log_verbose("Test mode: Skipping activation and zeroing.");
4409
if (seg_is_thin(lp)) {
4410
/* For snapshot, suspend active thin origin first */
4411
if (org && lv_is_active(org)) {
4412
if (!pool_below_threshold(first_seg(first_seg(org)->pool_lv))) {
4413
log_error("Cannot create thin snapshot. Pool %s/%s is filled "
4414
"over the autoextend threshold.",
4415
org->vg->name, first_seg(org)->pool_lv->name);
4418
if (!suspend_lv_origin(cmd, org)) {
4419
log_error("Failed to suspend thin snapshot origin %s/%s.",
4420
org->vg->name, org->name);
4423
if (!resume_lv_origin(cmd, org)) { /* deptree updates thin-pool */
4424
log_error("Failed to resume thin snapshot origin %s/%s.",
4425
org->vg->name, org->name);
4428
/* At this point remove pool messages, snapshot is active */
4429
if (!update_pool_lv(first_seg(org)->pool_lv, 0)) {
4431
goto deactivate_and_revert_new_lv;
4434
if (((lp->activate == CHANGE_AY) ||
4435
(lp->activate == CHANGE_AE) ||
4436
(lp->activate == CHANGE_ALY))) {
4437
/* At this point send message to kernel thin mda */
4438
pool_lv = lv_is_thin_pool(lv) ? lv : first_seg(lv)->pool_lv;
4439
if (!update_pool_lv(pool_lv, 1)) {
4441
goto deactivate_and_revert_new_lv;
4443
if (!activate_lv_excl(cmd, lv)) {
4444
log_error("Aborting. Failed to activate thin %s.",
4446
goto deactivate_and_revert_new_lv;
4449
} else if (lp->snapshot) {
4450
if (!activate_lv_excl(cmd, lv)) {
4451
log_error("Aborting. Failed to activate snapshot "
4452
"exception store.");
4455
} else if ((lp->activate == CHANGE_AY && !activate_lv(cmd, lv)) ||
4456
(lp->activate == CHANGE_AE && !activate_lv_excl(cmd, lv)) ||
4457
(lp->activate == CHANGE_ALY && !activate_lv_local(cmd, lv))) {
4458
log_error("Failed to activate new LV.");
4460
goto deactivate_and_revert_new_lv;
4464
if (!seg_is_thin(lp) && !lp->zero && !lp->snapshot)
4465
log_warn("WARNING: \"%s\" not zeroed", lv->name);
4466
else if ((!seg_is_thin(lp) ||
4467
(lv_is_thin_volume(lv) &&
4468
!first_seg(first_seg(lv)->pool_lv)->zero_new_blocks)) &&
4469
!set_lv(cmd, lv, UINT64_C(0), 0)) {
4470
log_error("Aborting. Failed to wipe %s.",
4471
lp->snapshot ? "snapshot exception store" :
4473
goto deactivate_and_revert_new_lv;
4476
if (lp->snapshot && !lp->thin) {
4477
/* Reset permission after zeroing */
4478
if (!(lp->permission & LVM_WRITE))
4479
lv->status &= ~LVM_WRITE;
4481
/* COW area must be deactivated if origin is not active */
4482
if (!origin_active && !deactivate_lv(cmd, lv)) {
4483
log_error("Aborting. Couldn't deactivate snapshot "
4484
"COW area. Manual intervention required.");
4488
/* A virtual origin must be activated explicitly. */
4489
if (lp->voriginsize &&
4490
(!(org = _create_virtual_origin(cmd, vg, lv->name,
4492
lp->voriginextents)) ||
4493
!activate_lv_excl(cmd, org))) {
4494
log_error("Couldn't create virtual origin for LV %s",
4496
if (org && !lv_remove(org))
4498
goto deactivate_and_revert_new_lv;
4501
/* cow LV remains active and becomes snapshot LV */
4503
if (!vg_add_snapshot(org, lv, NULL,
4504
org->le_count, lp->chunk_size)) {
4505
log_error("Couldn't create snapshot.");
4506
goto deactivate_and_revert_new_lv;
4509
/* store vg on disk(s) */
4513
if (!suspend_lv(cmd, org)) {
4514
log_error("Failed to suspend origin %s", org->name);
4522
if (!resume_lv(cmd, org)) {
4523
log_error("Problem reactivating origin %s", org->name);
4527
/* FIXME out of sequence */
4533
deactivate_and_revert_new_lv:
4534
if (!deactivate_lv(cmd, lv)) {
4535
log_error("Unable to deactivate failed new LV. "
4536
"Manual intervention required.");
4541
/* FIXME Better to revert to backup of metadata? */
4542
if (!lv_remove(lv) || !vg_write(vg) || !vg_commit(vg))
4543
log_error("Manual intervention may be required to remove "
4544
"abandoned LV(s) before retrying.");
4551
int lv_create_single(struct volume_group *vg,
4552
struct lvcreate_params *lp)
4554
struct logical_volume *lv;
4556
/* Create thin pool first if necessary */
4557
if (lp->create_thin_pool) {
4558
if (!seg_is_thin_pool(lp) &&
4559
!(lp->segtype = get_segtype_from_string(vg->cmd, "thin-pool")))
4562
if (!(lv = _lv_create_an_lv(vg, lp, lp->pool)))
4568
lp->pool = lv->name;
4570
if (!(lp->segtype = get_segtype_from_string(vg->cmd, "thin")))
4574
if (!(lv = _lv_create_an_lv(vg, lp, lp->lv_name)))
4578
log_print("Logical volume \"%s\" created", lv->name);