~ubuntu-branches/ubuntu/precise/mdadm/precise-proposed

« back to all changes in this revision

Viewing changes to Grow.c

  • Committer: Package Import Robot
  • Author(s): Surbhi Palande
  • Date: 2010-09-30 17:46:19 UTC
  • mfrom: (1.4.1 experimental) (1.1.25 sid)
  • Revision ID: package-import@ubuntu.com-20100930174619-txqppxj5vhrrvlhq
Tags: 3.1.4-1+8efb9d1ubuntu1
* Merge from debian unstable. (LP: #603582) 
* Remaining changes
  - Assemble.c, config.c: upgraded to the mdadm-3.1.4 version of these files
    from Debian.
  - debian/control: we need udev and util-linux in the right version. We
    also remove the build dependency from quilt and docbook-to-man as both
    are not used in Ubuntus mdadm.
  - debian/initramfs/hook: kept the Ubuntus version for handling the absence
    of active raid arrays in <initramfs>/etc/mdadm/mdadm.conf
  - debian/initramfs/script.local-top.DEBIAN, debian/mdadm-startall,
    debian/mdadm.raid.DEBIAN: removed. udev does its job now instead.
  - debian/mdadm-startall.sgml, debian/mdadm-startall.8: documentation of
    unused startall script
  - debian/mdadm.config, debian/mdadm.postinst - let udev do the handling
    instead. Resolved merge conflict by keeping Ubuntu's version.
  - debian/rules: kept debian's switch to using dh_lintian
  - debian/mdadm.links, debian/mdadm.manpages: dropped owing to the fact
    that these are not used in Ubuntu. Also dropped the build-dep on docbook
    to man)
  - debian/mdadm.postinst, debian/mdadm.config, initramfs/init-premount:
    boot-degraded enablement; maintain udev starting of RAID devices;
    init-premount hook script for the initramfs, to provide information at
    boot
  - debian/mkconf.in is the older mkconf. Kept the Ubuntus version.
  - debian/rules: Kept Ubuntus version for installing apport hooks, not
    installing un-used startall script and for adding a udev rule
    corresponding to mdadm.
  - debian/install-rc, check.d/_numbers, check.d/root_on_raid: Ubuntu partman
    installer changes
  - debian/presubj: Dropped this unused bug reporting file. Instead use
    source_mdadm.py act as an apport hook for bug handling.
  - rename debian/mdadm.vol_id.udev to debian/mdadm.mdadm-blkid.udev so that
    the rules file ends up with a more reasonable name

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
1
/*
2
2
 * mdadm - manage Linux "md" devices aka RAID arrays.
3
3
 *
4
 
 * Copyright (C) 2001-2006 Neil Brown <neilb@suse.de>
 
4
 * Copyright (C) 2001-2009 Neil Brown <neilb@suse.de>
5
5
 *
6
6
 *
7
7
 *    This program is free software; you can redistribute it and/or modify
19
19
 *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
20
20
 *
21
21
 *    Author: Neil Brown
22
 
 *    Email: <neilb@cse.unsw.edu.au>
23
 
 *    Paper: Neil Brown
24
 
 *           School of Computer Science and Engineering
25
 
 *           The University of New South Wales
26
 
 *           Sydney, 2052
27
 
 *           Australia
 
22
 *    Email: <neilb@suse.de>
28
23
 */
29
24
#include        "mdadm.h"
30
25
#include        "dlink.h"
 
26
#include        <sys/mman.h>
31
27
 
32
28
#if ! defined(__BIG_ENDIAN) && ! defined(__LITTLE_ENDIAN)
33
29
#error no endian defined
35
31
#include        "md_u.h"
36
32
#include        "md_p.h"
37
33
 
 
34
#ifndef offsetof
 
35
#define offsetof(t,f) ((size_t)&(((t*)0)->f))
 
36
#endif
 
37
 
38
38
int Grow_Add_device(char *devname, int fd, char *newdev)
39
39
{
40
40
        /* Add a device to an active array.
69
69
                return 1;
70
70
        }
71
71
 
72
 
        nfd = open(newdev, O_RDWR|O_EXCL);
 
72
        nfd = open(newdev, O_RDWR|O_EXCL|O_DIRECT);
73
73
        if (nfd < 0) {
74
74
                fprintf(stderr, Name ": cannot open %s\n", newdev);
75
75
                return 1;
288
288
                return 1;
289
289
        } else if (strcmp(file, "internal") == 0) {
290
290
                int d;
 
291
                if (st->ss->add_internal_bitmap == NULL) {
 
292
                        fprintf(stderr, Name ": Internal bitmaps not supported "
 
293
                                "with %s metadata\n", st->ss->name);
 
294
                        return 1;
 
295
                }
291
296
                for (d=0; d< st->max_devs; d++) {
292
297
                        mdu_disk_info_t disk;
293
298
                        char *dv;
381
386
/*
382
387
 * When reshaping an array we might need to backup some data.
383
388
 * This is written to all spares with a 'super_block' describing it.
384
 
 * The superblock goes 1K form the end of the used space on the
 
389
 * The superblock goes 4K from the end of the used space on the
385
390
 * device.
386
391
 * It if written after the backup is complete.
387
392
 * It has the following structure.
388
393
 */
389
394
 
390
 
struct mdp_backup_super {
391
 
        char    magic[16];  /* md_backup_data-1 */
 
395
static struct mdp_backup_super {
 
396
        char    magic[16];  /* md_backup_data-1 or -2 */
392
397
        __u8    set_uuid[16];
393
398
        __u64   mtime;
394
399
        /* start/sizes in 512byte sectors */
395
 
        __u64   devstart;
 
400
        __u64   devstart;       /* address on backup device/file of data */
396
401
        __u64   arraystart;
397
402
        __u64   length;
398
403
        __u32   sb_csum;        /* csum of preceeding bytes. */
399
 
};
 
404
        __u32   pad1;
 
405
        __u64   devstart2;      /* offset in to data of second section */
 
406
        __u64   arraystart2;
 
407
        __u64   length2;
 
408
        __u32   sb_csum2;       /* csum of preceeding bytes. */
 
409
        __u8 pad[512-68-32];
 
410
} __attribute__((aligned(512))) bsb, bsb2;
400
411
 
401
 
int bsb_csum(char *buf, int len)
 
412
__u32 bsb_csum(char *buf, int len)
402
413
{
403
414
        int i;
404
415
        int csum = 0;
407
418
        return __cpu_to_le32(csum);
408
419
}
409
420
 
 
421
static int child_grow(int afd, struct mdinfo *sra, unsigned long blocks,
 
422
                      int *fds, unsigned long long *offsets,
 
423
                      int disks, int chunk, int level, int layout, int data,
 
424
                      int dests, int *destfd, unsigned long long *destoffsets);
 
425
static int child_shrink(int afd, struct mdinfo *sra, unsigned long blocks,
 
426
                        int *fds, unsigned long long *offsets,
 
427
                        int disks, int chunk, int level, int layout, int data,
 
428
                        int dests, int *destfd, unsigned long long *destoffsets);
 
429
static int child_same_size(int afd, struct mdinfo *sra, unsigned long blocks,
 
430
                           int *fds, unsigned long long *offsets,
 
431
                           unsigned long long start,
 
432
                           int disks, int chunk, int level, int layout, int data,
 
433
                           int dests, int *destfd, unsigned long long *destoffsets);
 
434
 
 
435
int freeze_array(struct mdinfo *sra)
 
436
{
 
437
        /* Try to freeze resync on this array.
 
438
         * Return -1 if the array is busy,
 
439
         * return 0 if this kernel doesn't support 'frozen'
 
440
         * return 1 if it worked.
 
441
         */
 
442
        char buf[20];
 
443
        if (sysfs_get_str(sra, NULL, "sync_action", buf, 20) <= 0)
 
444
                return 0;
 
445
        if (strcmp(buf, "idle\n") != 0 &&
 
446
            strcmp(buf, "frozen\n") != 0)
 
447
                return -1;
 
448
        if (sysfs_set_str(sra, NULL, "sync_action", "frozen") < 0)
 
449
                return 0;
 
450
        return 1;
 
451
}
 
452
 
 
453
void unfreeze_array(struct mdinfo *sra, int frozen)
 
454
{
 
455
        /* If 'frozen' is 1, unfreeze the array */
 
456
        if (frozen > 0)
 
457
                sysfs_set_str(sra, NULL, "sync_action", "idle");
 
458
}
 
459
 
 
460
void wait_reshape(struct mdinfo *sra)
 
461
{
 
462
        int fd = sysfs_get_fd(sra, NULL, "sync_action");
 
463
        char action[20];
 
464
 
 
465
        do {
 
466
                fd_set rfds;
 
467
                FD_ZERO(&rfds);
 
468
                FD_SET(fd, &rfds);
 
469
                select(fd+1, NULL, NULL, &rfds, NULL);
 
470
                
 
471
                if (sysfs_fd_get_str(fd, action, 20) < 0) {
 
472
                        close(fd);
 
473
                        return;
 
474
                }
 
475
        } while  (strncmp(action, "reshape", 7) == 0);
 
476
}
 
477
                        
 
478
                
410
479
int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
411
480
                 long long size,
412
 
                 int level, int layout, int chunksize, int raid_disks)
 
481
                 int level, char *layout_str, int chunksize, int raid_disks)
413
482
{
414
483
        /* Make some changes in the shape of an array.
415
484
         * The kernel must support the change.
416
 
         * Different reshapes have subtly different meaning for different
417
 
         * levels, so we need to check the current state of the array
418
 
         * and go from there.
 
485
         *
 
486
         * There are three different changes.  Each can trigger
 
487
         * a resync or recovery so we freeze that until we have
 
488
         * requested everything (if kernel supports freezing - 2.6.30).
 
489
         * The steps are:
 
490
         *  - change size (i.e. component_size)
 
491
         *  - change level
 
492
         *  - change layout/chunksize/ndisks
 
493
         *
 
494
         * The last can require a reshape.  It is different on different
 
495
         * levels so we need to check the level before actioning it.
 
496
         * Some times the level change needs to be requested after the
 
497
         * reshape (e.g. raid6->raid5, raid5->raid0)
 
498
         *
419
499
         */
420
 
        struct mdu_array_info_s array;
 
500
        struct mdu_array_info_s array, orig;
421
501
        char *c;
422
 
 
423
 
        struct mdp_backup_super bsb;
 
502
        int rv = 0;
424
503
        struct supertype *st;
425
504
 
426
 
        int nlevel, olevel;
427
505
        int nchunk, ochunk;
428
506
        int nlayout, olayout;
429
507
        int ndisks, odisks;
430
 
        int ndata, odata;
431
 
        unsigned long long nstripe, ostripe, last_block;
 
508
        unsigned int ndata, odata;
 
509
        int orig_level = UnSet;
 
510
        char alt_layout[40];
432
511
        int *fdlist;
433
512
        unsigned long long *offsets;
434
 
        int d, i, spares;
 
513
        int d, i;
435
514
        int nrdisks;
436
515
        int err;
 
516
        int frozen;
 
517
        unsigned long a,b, blocks, stripes;
 
518
        unsigned long cache;
 
519
        unsigned long long array_size;
 
520
        int changed = 0;
 
521
        int done;
437
522
 
438
523
        struct mdinfo *sra;
439
524
        struct mdinfo *sd;
443
528
                        devname);
444
529
                return 1;
445
530
        }
 
531
 
 
532
        if (size >= 0 &&
 
533
            (chunksize || level!= UnSet || layout_str || raid_disks)) {
 
534
                fprintf(stderr, Name ": cannot change component size at the same time "
 
535
                        "as other changes.\n"
 
536
                        "   Change size first, then check data is intact before "
 
537
                        "making other changes.\n");
 
538
                return 1;
 
539
        }
 
540
 
 
541
        if (raid_disks && raid_disks < array.raid_disks && array.level > 1 &&
 
542
            get_linux_version() < 2006032 &&
 
543
            !check_env("MDADM_FORCE_FEWER")) {
 
544
                fprintf(stderr, Name ": reducing the number of devices is not safe before Linux 2.6.32\n"
 
545
                        "       Please use a newer kernel\n");
 
546
                return 1;
 
547
        }
 
548
        sra = sysfs_read(fd, 0, GET_LEVEL);
 
549
        if (sra)
 
550
                frozen = freeze_array(sra);
 
551
        else {
 
552
                fprintf(stderr, Name ": failed to read sysfs parameters for %s\n",
 
553
                        devname);
 
554
                return 1;
 
555
        }
 
556
        if (frozen < 0) {
 
557
                fprintf(stderr, Name ": %s is performing resync/recovery and cannot"
 
558
                        " be reshaped\n", devname);
 
559
                return 1;
 
560
        }
 
561
 
 
562
        /* ========= set size =============== */
 
563
        if (size >= 0 && (size == 0 || size != array.size)) {
 
564
                array.size = size;
 
565
                if (array.size != size) {
 
566
                        /* got truncated to 32bit, write to
 
567
                         * component_size instead
 
568
                         */
 
569
                        if (sra)
 
570
                                rv = sysfs_set_num(sra, NULL,
 
571
                                                   "component_size", size);
 
572
                        else
 
573
                                rv = -1;
 
574
                } else
 
575
                        rv = ioctl(fd, SET_ARRAY_INFO, &array);
 
576
                if (rv != 0) {
 
577
                        int err = errno;
 
578
                        fprintf(stderr, Name ": Cannot set device size for %s: %s\n",
 
579
                                devname, strerror(err));
 
580
                        if (err == EBUSY && 
 
581
                            (array.state & (1<<MD_SB_BITMAP_PRESENT)))
 
582
                                fprintf(stderr, "       Bitmap must be removed before size can be changed\n");
 
583
                        rv = 1;
 
584
                        goto release;
 
585
                }
 
586
                ioctl(fd, GET_ARRAY_INFO, &array);
 
587
                size = get_component_size(fd)/2;
 
588
                if (size == 0)
 
589
                        size = array.size;
 
590
                if (!quiet)
 
591
                        fprintf(stderr, Name ": component size of %s has been set to %lluK\n",
 
592
                                devname, size);
 
593
                changed = 1;
 
594
        } else {
 
595
                size = get_component_size(fd)/2;
 
596
                if (size == 0)
 
597
                        size = array.size;
 
598
        }
 
599
 
 
600
        /* ======= set level =========== */
 
601
        if (level != UnSet && level != array.level) {
 
602
                /* Trying to change the level.
 
603
                 * We might need to change layout first and schedule a
 
604
                 * level change for later.
 
605
                 * Level changes that can happen immediately are:
 
606
                 * 0->4,5,6  1->5  4->5,6  5->1,6
 
607
                 * Level changes that need a layout change first are:
 
608
                 * 6->5,4,0 : need a -6 layout, or parity-last
 
609
                 * 5->4,0   : need parity-last
 
610
                 */
 
611
                if ((array.level == 6 || array.level == 5) &&
 
612
                    (level == 5 || level == 4 || level == 0)) {
 
613
                        /* Don't change level yet, but choose intermediate
 
614
                         * layout
 
615
                         */
 
616
                        if (level == 5) {
 
617
                                if (layout_str == NULL)
 
618
                                        switch (array.layout) {
 
619
                                        case ALGORITHM_LEFT_ASYMMETRIC:
 
620
                                        case ALGORITHM_LEFT_ASYMMETRIC_6:
 
621
                                        case ALGORITHM_ROTATING_N_RESTART:
 
622
                                                layout_str = "left-asymmetric-6";
 
623
                                                break;
 
624
                                        case ALGORITHM_LEFT_SYMMETRIC:
 
625
                                        case ALGORITHM_LEFT_SYMMETRIC_6:
 
626
                                        case ALGORITHM_ROTATING_N_CONTINUE:
 
627
                                                layout_str = "left-symmetric-6";
 
628
                                                break;
 
629
                                        case ALGORITHM_RIGHT_ASYMMETRIC:
 
630
                                        case ALGORITHM_RIGHT_ASYMMETRIC_6:
 
631
                                        case ALGORITHM_ROTATING_ZERO_RESTART:
 
632
                                                layout_str = "right-asymmetric-6";
 
633
                                                break;
 
634
                                        case ALGORITHM_RIGHT_SYMMETRIC:
 
635
                                        case ALGORITHM_RIGHT_SYMMETRIC_6:
 
636
                                                layout_str = "right-symmetric-6";
 
637
                                                break;
 
638
                                        case ALGORITHM_PARITY_0:
 
639
                                        case ALGORITHM_PARITY_0_6:
 
640
                                                layout_str = "parity-first-6";
 
641
                                                break;
 
642
                                        case ALGORITHM_PARITY_N:
 
643
                                                layout_str = "parity-last";
 
644
                                                break;
 
645
                                        default:
 
646
                                                fprintf(stderr, Name ": %s: cannot"
 
647
                                                        "convert layout to RAID5 equivalent\n",
 
648
                                                        devname);
 
649
                                                rv = 1;
 
650
                                                goto release;
 
651
                                        }
 
652
                                else {
 
653
                                        int l = map_name(r5layout, layout_str);
 
654
                                        if (l == UnSet) {
 
655
                                                fprintf(stderr, Name ": %s: layout '%s' not recognised\n",
 
656
                                                        devname, layout_str);
 
657
                                                rv = 1;
 
658
                                                goto release;
 
659
                                        }
 
660
                                        if (l != ALGORITHM_PARITY_N) {
 
661
                                                /* need the -6 version */
 
662
                                                char *ls = map_num(r5layout, l);
 
663
                                                strcat(strcpy(alt_layout, ls),
 
664
                                                       "-6");
 
665
                                                layout_str = alt_layout;
 
666
                                        }
 
667
                                }
 
668
                                if (raid_disks)
 
669
                                        /* The final raid6->raid5 conversion
 
670
                                         * will reduce the number of disks,
 
671
                                         * so now we need to aim higher
 
672
                                         */
 
673
                                        raid_disks++;
 
674
                        } else
 
675
                                layout_str = "parity-last";
 
676
                } else {
 
677
                        c = map_num(pers, level);
 
678
                        if (c == NULL) {
 
679
                                rv = 1;/* not possible */
 
680
                                goto release;
 
681
                        }
 
682
                        err = sysfs_set_str(sra, NULL, "level", c);
 
683
                        if (err) {
 
684
                                err = errno;
 
685
                                fprintf(stderr, Name ": %s: could not set level to %s\n",
 
686
                                        devname, c);
 
687
                                if (err == EBUSY && 
 
688
                                    (array.state & (1<<MD_SB_BITMAP_PRESENT)))
 
689
                                        fprintf(stderr, "       Bitmap must be removed before level can be changed\n");
 
690
                                rv = 1;
 
691
                                goto release;
 
692
                        }
 
693
                        orig = array;
 
694
                        orig_level = orig.level;
 
695
                        ioctl(fd, GET_ARRAY_INFO, &array);
 
696
                        if (layout_str == NULL &&
 
697
                            orig.level == 5 && level == 6 &&
 
698
                            array.layout != orig.layout)
 
699
                                layout_str = map_num(r5layout, orig.layout);
 
700
                        if (!quiet)
 
701
                                fprintf(stderr, Name " level of %s changed to %s\n",
 
702
                                        devname, c);
 
703
                        changed = 1;
 
704
                }
 
705
        }
 
706
 
 
707
        /* ========= set shape (chunk_size / layout / ndisks)  ============== */
 
708
        /* Check if layout change is a no-op */
 
709
        if (layout_str) switch(array.level) {
 
710
        case 5:
 
711
                if (array.layout == map_name(r5layout, layout_str))
 
712
                        layout_str = NULL;
 
713
                break;
 
714
        case 6:
 
715
                if (layout_str == NULL &&
 
716
                    ((chunksize && chunksize * 1024 != array.chunk_size) ||
 
717
                     (raid_disks && raid_disks != array.raid_disks)) &&
 
718
                    array.layout >= 16) {
 
719
                        fprintf(stderr, Name
 
720
                                ": %s has a non-standard layout.  If you wish to preserve this\n"
 
721
                                "      during the reshape, please specify --layout=preserve\n"
 
722
                                "      If you want to change it, specify a layout or use --layout=normalise\n",
 
723
                                devname);
 
724
                        rv = 1;
 
725
                        goto release;
 
726
                }
 
727
                if (strcmp(layout_str, "normalise") == 0 ||
 
728
                    strcmp(layout_str, "normalize") == 0) {
 
729
                        char *hyphen;
 
730
                        strcpy(alt_layout, map_num(r6layout, array.layout));
 
731
                        hyphen = strrchr(alt_layout, '-');
 
732
                        if (hyphen && strcmp(hyphen, "-6") == 0) {
 
733
                                *hyphen = 0;
 
734
                                layout_str = alt_layout;
 
735
                        }
 
736
                }
 
737
 
 
738
                if (array.layout == map_name(r6layout, layout_str))
 
739
                        layout_str = NULL;
 
740
                if (layout_str && strcmp(layout_str, "preserve") == 0)
 
741
                        layout_str = NULL;
 
742
                break;
 
743
        }
 
744
        if (layout_str == NULL
 
745
            && (chunksize == 0 || chunksize*1024 == array.chunk_size)
 
746
            && (raid_disks == 0 || raid_disks == array.raid_disks)) {
 
747
                rv = 0;
 
748
                if (level != UnSet && level != array.level) {
 
749
                        /* Looks like this level change doesn't need
 
750
                         * a reshape after all.
 
751
                         */
 
752
                        c = map_num(pers, level);
 
753
                        if (c) {
 
754
                                rv = sysfs_set_str(sra, NULL, "level", c);
 
755
                                if (rv) {
 
756
                                        int err = errno;
 
757
                                        fprintf(stderr, Name ": %s: could not set level to %s\n",
 
758
                                                devname, c);
 
759
                                        if (err == EBUSY && 
 
760
                                            (array.state & (1<<MD_SB_BITMAP_PRESENT)))
 
761
                                                fprintf(stderr, "       Bitmap must be removed before level can be changed\n");
 
762
                                        rv = 1;
 
763
                                }
 
764
                        }
 
765
                } else if (!changed && !quiet)
 
766
                        fprintf(stderr, Name ": %s: no change requested\n",
 
767
                                devname);
 
768
                goto release;
 
769
        }
 
770
 
446
771
        c = map_num(pers, array.level);
447
772
        if (c == NULL) c = "-unknown-";
448
773
        switch(array.level) {
449
774
        default: /* raid0, linear, multipath cannot be reconfigured */
450
775
                fprintf(stderr, Name ": %s array %s cannot be reshaped.\n",
451
776
                        c, devname);
452
 
                return 1;
 
777
                rv = 1;
 
778
                break;
453
779
 
454
780
        case LEVEL_FAULTY: /* only 'layout' change is permitted */
455
781
 
456
 
                if (size >= 0) {
457
 
                        fprintf(stderr, Name ": %s: Cannot change size of a 'faulty' array\n",
458
 
                                devname);
459
 
                        return 1;
460
 
                }
461
 
                if (level != UnSet && level != LEVEL_FAULTY) {
462
 
                        fprintf(stderr, Name ": %s: Cannot change RAID level of a 'faulty' array\n",
463
 
                                devname);
464
 
                        return 1;
465
 
                }
466
782
                if (chunksize  || raid_disks) {
467
783
                        fprintf(stderr, Name ": %s: Cannot change chunksize or disks of a 'faulty' array\n",
468
784
                                devname);
469
 
                        return 1;
 
785
                        rv = 1;
 
786
                        break;
470
787
                }
471
 
                if (layout == UnSet)
472
 
                        return 0; /* nothing to do.... */
 
788
                if (layout_str == NULL)
 
789
                        break; /* nothing to do.... */
473
790
 
474
 
                array.layout = layout;
 
791
                array.layout = parse_layout_faulty(layout_str);
 
792
                if (array.layout < 0) {
 
793
                        fprintf(stderr, Name ": %s: layout %s not understood for 'faulty' array\n",
 
794
                                devname, layout_str);
 
795
                        rv = 1;
 
796
                        break;
 
797
                }
475
798
                if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
476
799
                        fprintf(stderr, Name ": Cannot set layout for %s: %s\n",
477
800
                                devname, strerror(errno));
478
 
                        return 1;
479
 
                }
480
 
                if (!quiet)
 
801
                        rv = 1;
 
802
                } else if (!quiet)
481
803
                        printf("layout for %s set to %d\n", devname, array.layout);
482
 
                return 0;
483
 
 
484
 
        case 1: /* raid_disks and size can each be changed.  They are independant */
485
 
 
486
 
                if (level != UnSet && level != 1) {
487
 
                        fprintf(stderr, Name ": %s: Cannot change RAID level of a RAID1 array.\n",
488
 
                                devname);
489
 
                        return 1;
490
 
                }
491
 
                if (chunksize || layout != UnSet) {
492
 
                        fprintf(stderr, Name ": %s: Cannot change chunk size of layout for a RAID1 array.\n",
493
 
                                devname);
494
 
                        return 1;
495
 
                }
496
 
 
497
 
                /* Each can trigger a resync/recovery which will block the
498
 
                 * other from happening.  Later we could block
499
 
                 * resync for the duration via 'sync_action'...
500
 
                 */
 
804
                break;
 
805
 
 
806
        case 1: /* only raid_disks can each be changed. */
 
807
 
 
808
                if (chunksize || layout_str != NULL) {
 
809
                        fprintf(stderr, Name ": %s: Cannot change chunk size or layout for a RAID1 array.\n",
 
810
                                devname);
 
811
                        rv = 1;
 
812
                        break;
 
813
                }
501
814
                if (raid_disks > 0) {
502
815
                        array.raid_disks = raid_disks;
503
816
                        if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
504
817
                                fprintf(stderr, Name ": Cannot set raid-devices for %s: %s\n",
505
818
                                        devname, strerror(errno));
506
 
                                return 1;
507
 
                        }
508
 
                }
509
 
                if (size >= 0) {
510
 
                        array.size = size;
511
 
                        if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
512
 
                                fprintf(stderr, Name ": Cannot set device size for %s: %s\n",
513
 
                                        devname, strerror(errno));
514
 
                                return 1;
515
 
                        }
516
 
                }
517
 
                return 0;
 
819
                                rv = 1;
 
820
                        }
 
821
                }
 
822
                break;
518
823
 
519
824
        case 4:
520
825
        case 5:
521
826
        case 6:
 
827
 
 
828
                /*
 
829
                 * layout/chunksize/raid_disks can be changed
 
830
                 * though the kernel may not support it all.
 
831
                 */
522
832
                st = super_by_fd(fd);
523
833
 
524
 
                /* size can be changed independently.
525
 
                 * layout/chunksize/raid_disks/level can be changed
526
 
                 * though the kernel may not support it all.
527
 
                 * If 'suspend_lo' is not present in devfs, then
528
 
                 * these cannot be changed.
529
 
                 */
530
 
                if (size >= 0) {
531
 
                        /* Cannot change other details as well.. */
532
 
                        if (layout != UnSet ||
533
 
                            chunksize != 0 ||
534
 
                            raid_disks != 0 ||
535
 
                            level != UnSet) {
536
 
                                fprintf(stderr, Name ": %s: Cannot change shape as well as size of a %s array.\n",
537
 
                                        devname, c);
538
 
                                return 1;
539
 
                        }
540
 
                        array.size = size;
541
 
                        if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
542
 
                                fprintf(stderr, Name ": Cannot set device size/shape for %s: %s\n",
543
 
                                        devname, strerror(errno));
544
 
                                return 1;
545
 
                        }
546
 
                        return 0;
547
 
                }
548
 
                /* Ok, just change the shape. This can be awkward.
549
 
                 *  There are three possibilities.
550
 
                 * 1/ The array will shrink.  We don't support this
551
 
                 *    possibility.  Maybe one day...
552
 
                 * 2/ The array will not change size.  This is easy enough
553
 
                 *    to do, but not reliably.  If the process is aborted
554
 
                 *    the array *will* be corrupted.  So maybe we can allow
555
 
                 *    this but only if the user is really certain.  e.g.
556
 
                 *    --really-risk-everything
557
 
                 * 3/ The array will grow. This can be reliably achieved.
 
834
                /*
 
835
                 * There are three possibilities.
 
836
                 * 1/ The array will shrink.
 
837
                 *    We need to ensure the reshape will pause before reaching
 
838
                 *    the 'critical section'.  We also need to fork and wait for
 
839
                 *    that to happen.  When it does we 
 
840
                 *       suspend/backup/complete/unfreeze
 
841
                 *
 
842
                 * 2/ The array will not change size.
 
843
                 *    This requires that we keep a backup of a sliding window
 
844
                 *    so that we can restore data after a crash.  So we need
 
845
                 *    to fork and monitor progress.
 
846
                 *
 
847
                 * 3/ The array will grow. This is relatively easy.
558
848
                 *    However the kernel's restripe routines will cheerfully
559
849
                 *    overwrite some early data before it is safe.  So we
560
850
                 *    need to make a backup of the early parts of the array
561
851
                 *    and be ready to restore it if rebuild aborts very early.
562
852
                 *
563
 
                 *    We backup data by writing it to all spares (there must be
564
 
                 *    at least 1, so even raid6->raid5 requires a spare to be
565
 
                 *    present).
 
853
                 *    We backup data by writing it to one spare, or to a
 
854
                 *    file which was given on command line.
566
855
                 *
 
856
                 *    [FOLLOWING IS OLD AND PARTLY WRONG]
567
857
                 *    So: we enumerate the devices in the array and
568
858
                 *    make sure we can open all of them.
569
859
                 *    Then we freeze the early part of the array and
573
863
                 *    and finally invalidate the copied data and unfreeze the
574
864
                 *    start of the array.
575
865
                 *
576
 
                 *    Before we can do this we need to decide:
577
 
                 *     - will the array grow?  Just calculate size
578
 
                 *     - how much needs to be saved: count stripes.
579
 
                 *     - where to save data... good question.
580
 
                 *
 
866
                 * In each case, we first make sure that storage is available
 
867
                 * for the required backup.
 
868
                 * Then we:
 
869
                 *   -  request the shape change.
 
870
                 *   -  for to handle backup etc.
581
871
                 */
582
 
                nlevel = olevel = array.level;
583
872
                nchunk = ochunk = array.chunk_size;
584
873
                nlayout = olayout = array.layout;
585
874
                ndisks = odisks = array.raid_disks;
586
875
 
587
 
                if (level != UnSet) nlevel = level;
588
 
                if (chunksize) nchunk = chunksize;
589
 
                if (layout != UnSet) nlayout = layout;
 
876
                if (chunksize) {
 
877
                        nchunk = chunksize * 1024;
 
878
                        if (size % chunksize) {
 
879
                                fprintf(stderr, Name ": component size %lluK is not"
 
880
                                        " a multiple of chunksize %dK\n",
 
881
                                        size, chunksize);
 
882
                                break;
 
883
                        }
 
884
                }
 
885
                if (layout_str != NULL)
 
886
                        switch(array.level) {
 
887
                        case 4: /* ignore layout */
 
888
                                break;
 
889
                        case 5:
 
890
                                nlayout = map_name(r5layout, layout_str);
 
891
                                if (nlayout == UnSet) {
 
892
                                        fprintf(stderr, Name ": layout %s not understood for raid5.\n",
 
893
                                                layout_str);
 
894
                                        rv = 1;
 
895
                                        goto release;
 
896
                                }
 
897
                                break;
 
898
 
 
899
                        case 6:
 
900
                                nlayout = map_name(r6layout, layout_str);
 
901
                                if (nlayout == UnSet) {
 
902
                                        fprintf(stderr, Name ": layout %s not understood for raid6.\n",
 
903
                                                layout_str);
 
904
                                        rv = 1;
 
905
                                        goto release;
 
906
                                }
 
907
                                break;
 
908
                        }
590
909
                if (raid_disks) ndisks = raid_disks;
591
910
 
592
911
                odata = odisks-1;
593
 
                if (olevel == 6) odata--; /* number of data disks */
594
912
                ndata = ndisks-1;
595
 
                if (nlevel == 6) ndata--;
596
 
 
597
 
                if (ndata < odata) {
598
 
                        fprintf(stderr, Name ": %s: Cannot reduce number of data disks (yet).\n",
599
 
                                devname);
600
 
                        return 1;
601
 
                }
602
 
                if (ndata == odata) {
603
 
                        fprintf(stderr, Name ": %s: Cannot reshape array without increasing size (yet).\n",
604
 
                                devname);
605
 
                        return 1;
606
 
                }
607
 
                /* Well, it is growing... so how much do we need to backup.
608
 
                 * Need to backup a full number of new-stripes, such that the
609
 
                 * last one does not over-write any place that it would be read
610
 
                 * from
 
913
                if (array.level == 6) {
 
914
                        odata--; /* number of data disks */
 
915
                        ndata--;
 
916
                }
 
917
 
 
918
                if (odata == ndata &&
 
919
                    get_linux_version() < 2006032) {
 
920
                        fprintf(stderr, Name ": in-place reshape is not safe before 2.6.32, sorry.\n");
 
921
                        break;
 
922
                }
 
923
 
 
924
                /* Check that we can hold all the data */
 
925
                get_dev_size(fd, NULL, &array_size);
 
926
                if (ndata * (unsigned long long)size < (array_size/1024)) {
 
927
                        fprintf(stderr, Name ": this change will reduce the size of the array.\n"
 
928
                                "       use --grow --array-size first to truncate array.\n"
 
929
                                "       e.g. mdadm --grow %s --array-size %llu\n",
 
930
                                devname, ndata * size);
 
931
                        rv = 1;
 
932
                        break;
 
933
                }
 
934
 
 
935
                /* So how much do we need to backup.
 
936
                 * We need an amount of data which is both a whole number of
 
937
                 * old stripes and a whole number of new stripes.
 
938
                 * So LCM for (chunksize*datadisks).
611
939
                 */
612
 
                nstripe = ostripe = 0;
613
 
                while (nstripe >= ostripe) {
614
 
                        nstripe += nchunk/512;
615
 
                        last_block = nstripe * ndata;
616
 
                        ostripe = last_block / odata / (ochunk/512) * (ochunk/512);
 
940
                a = (ochunk/512) * odata;
 
941
                b = (nchunk/512) * ndata;
 
942
                /* Find GCD */
 
943
                while (a != b) {
 
944
                        if (a < b)
 
945
                                b -= a;
 
946
                        if (b < a)
 
947
                                a -= b;
617
948
                }
618
 
                printf("mdadm: Need to backup %lluK of critical section..\n", last_block/2);
 
949
                /* LCM == product / GCD */
 
950
                blocks = (ochunk/512) * (nchunk/512) * odata * ndata / a;
619
951
 
 
952
                sysfs_free(sra);
620
953
                sra = sysfs_read(fd, 0,
621
954
                                 GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE|
622
955
                                 GET_CACHE);
 
956
 
623
957
                if (!sra) {
624
958
                        fprintf(stderr, Name ": %s: Cannot get array details from sysfs\n",
625
959
                                devname);
626
 
                        return 1;
 
960
                        rv = 1;
 
961
                        break;
627
962
                }
628
963
 
629
 
                if (last_block >= sra->component_size/2) {
 
964
                if (ndata == odata) {
 
965
                        /* Make 'blocks' bigger for better throughput, but
 
966
                         * not so big that we reject it below.
 
967
                         * Try for 16 megabytes
 
968
                         */
 
969
                        while (blocks * 32 < sra->component_size &&
 
970
                               blocks < 16*1024*2)
 
971
                               blocks *= 2;
 
972
                } else
 
973
                        fprintf(stderr, Name ": Need to backup %luK of critical "
 
974
                                "section..\n", blocks/2);
 
975
 
 
976
                if (blocks >= sra->component_size/2) {
630
977
                        fprintf(stderr, Name ": %s: Something wrong - reshape aborted\n",
631
978
                                devname);
632
 
                        return 1;
633
 
                }
634
 
                if (sra->array.spare_disks == 0 && backup_file == NULL) {
635
 
                        fprintf(stderr, Name ": %s: Cannot grow - need a spare or backup-file to backup critical section\n",
636
 
                                devname);
637
 
                        return 1;
638
 
                }
639
 
 
640
 
                nrdisks = array.nr_disks + sra->array.spare_disks;
 
979
                        rv = 1;
 
980
                        break;
 
981
                }
 
982
                nrdisks = array.raid_disks + sra->array.spare_disks;
641
983
                /* Now we need to open all these devices so we can read/write.
642
984
                 */
643
985
                fdlist = malloc((1+nrdisks) * sizeof(int));
644
986
                offsets = malloc((1+nrdisks) * sizeof(offsets[0]));
645
987
                if (!fdlist || !offsets) {
646
988
                        fprintf(stderr, Name ": malloc failed: grow aborted\n");
647
 
                        return 1;
 
989
                        rv = 1;
 
990
                        break;
648
991
                }
649
992
                for (d=0; d <= nrdisks; d++)
650
993
                        fdlist[d] = -1;
657
1000
                                                   sd->disk.minor, 1);
658
1001
                                fdlist[sd->disk.raid_disk]
659
1002
                                        = dev_open(dn, O_RDONLY);
660
 
                                offsets[sd->disk.raid_disk] = sd->data_offset;
 
1003
                                offsets[sd->disk.raid_disk] = sd->data_offset*512;
661
1004
                                if (fdlist[sd->disk.raid_disk] < 0) {
662
1005
                                        fprintf(stderr, Name ": %s: cannot open component %s\n",
663
1006
                                                devname, dn?dn:"-unknown-");
664
 
                                        goto abort;
 
1007
                                        rv = 1;
 
1008
                                        goto release;
665
1009
                                }
666
 
                        } else {
 
1010
                        } else if (backup_file == NULL) {
667
1011
                                /* spare */
668
1012
                                char *dn = map_dev(sd->disk.major,
669
1013
                                                   sd->disk.minor, 1);
670
1014
                                fdlist[d] = dev_open(dn, O_RDWR);
671
 
                                offsets[d] = sd->data_offset;
 
1015
                                offsets[d] = (sd->data_offset + sra->component_size - blocks - 8)*512;
672
1016
                                if (fdlist[d]<0) {
673
1017
                                        fprintf(stderr, Name ": %s: cannot open component %s\n",
674
1018
                                                devname, dn?dn:"-unknown");
675
 
                                        goto abort;
 
1019
                                        rv = 1;
 
1020
                                        goto release;
676
1021
                                }
677
1022
                                d++;
678
1023
                        }
679
1024
                }
680
 
                for (i=0 ; i<array.raid_disks; i++)
681
 
                        if (fdlist[i] < 0) {
682
 
                                fprintf(stderr, Name ": %s: failed to find device %d. Array might be degraded.\n"
683
 
                                        " --grow aborted\n", devname, i);
684
 
                                goto abort;
685
 
                        }
686
 
                spares = sra->array.spare_disks;
687
 
                if (backup_file) {
688
 
                        fdlist[d] = open(backup_file, O_RDWR|O_CREAT|O_EXCL, 0600);
 
1025
                if (backup_file == NULL) {
 
1026
                        if (ndata <= odata) {
 
1027
                                fprintf(stderr, Name ": %s: Cannot grow - need backup-file\n",
 
1028
                                        devname);
 
1029
                                rv = 1;
 
1030
                                break;
 
1031
                        } else if (sra->array.spare_disks == 0) {
 
1032
                                fprintf(stderr, Name ": %s: Cannot grow - need a spare or "
 
1033
                                        "backup-file to backup critical section\n",
 
1034
                                        devname);
 
1035
                                rv = 1;
 
1036
                                break;
 
1037
                        }
 
1038
                        if (d == array.raid_disks) {
 
1039
                                fprintf(stderr, Name ": %s: No spare device for backup\n",
 
1040
                                        devname);
 
1041
                                rv = 1;
 
1042
                                break;
 
1043
                        }
 
1044
                } else {
 
1045
                        /* need to check backup file is large enough */
 
1046
                        char buf[512];
 
1047
                        fdlist[d] = open(backup_file, O_RDWR|O_CREAT|O_EXCL,
 
1048
                                     S_IRUSR | S_IWUSR);
 
1049
                        offsets[d] = 8 * 512;
689
1050
                        if (fdlist[d] < 0) {
690
1051
                                fprintf(stderr, Name ": %s: cannot create backup file %s: %s\n",
691
1052
                                        devname, backup_file, strerror(errno));
692
 
                                goto abort;
693
 
                        }
694
 
                        offsets[d] = 8;
 
1053
                                rv = 1;
 
1054
                                break;
 
1055
                        }
 
1056
                        memset(buf, 0, 512);
 
1057
                        for (i=0; i < (signed)blocks + 1 ; i++) {
 
1058
                                if (write(fdlist[d], buf, 512) != 512) {
 
1059
                                        fprintf(stderr, Name ": %s: cannot create backup file %s: %s\n",
 
1060
                                                devname, backup_file, strerror(errno));
 
1061
                                        rv = 1;
 
1062
                                        break;
 
1063
                                }
 
1064
                        }
 
1065
                        if (fsync(fdlist[d]) != 0) {
 
1066
                                fprintf(stderr, Name ": %s: cannot create backup file %s: %s\n",
 
1067
                                        devname, backup_file, strerror(errno));
 
1068
                                rv = 1;
 
1069
                                break;
 
1070
                        }
695
1071
                        d++;
696
 
                        spares++;
697
 
                }
698
 
                if (fdlist[array.raid_disks] < 0) {
699
 
                        fprintf(stderr, Name ": %s: failed to find a spare and no backup-file given - --grow aborted\n",
700
 
                                devname);
701
 
                        goto abort;
702
 
                }
703
 
 
 
1072
                }
 
1073
 
 
1074
                /* lastly, check that the internal stripe cache is
 
1075
                 * large enough, or it won't work.
 
1076
                 */
 
1077
                
 
1078
                cache = (nchunk < ochunk) ? ochunk : nchunk;
 
1079
                cache = cache * 4 / 4096;
 
1080
                if (cache < blocks / 8 / odisks + 16)
 
1081
                        /* Make it big enough to hold 'blocks' */
 
1082
                        cache = blocks / 8 / odisks + 16;
 
1083
                if (sra->cache_size < cache)
 
1084
                        sysfs_set_num(sra, NULL, "stripe_cache_size",
 
1085
                                      cache+1);
 
1086
                /* Right, everything seems fine. Let's kick things off.
 
1087
                 * If only changing raid_disks, use ioctl, else use
 
1088
                 * sysfs.
 
1089
                 */
 
1090
                if (ochunk == nchunk && olayout == nlayout) {
 
1091
                        array.raid_disks = ndisks;
 
1092
                        if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
 
1093
                                int err = errno;
 
1094
                                rv = 1;
 
1095
                                fprintf(stderr, Name ": Cannot set device shape for %s: %s\n",
 
1096
                                        devname, strerror(errno));
 
1097
                                if (ndisks < odisks &&
 
1098
                                    get_linux_version() < 2006030)
 
1099
                                        fprintf(stderr, Name ": linux 2.6.30 or later required\n");
 
1100
                                if (err == EBUSY && 
 
1101
                                    (array.state & (1<<MD_SB_BITMAP_PRESENT)))
 
1102
                                        fprintf(stderr, "       Bitmap must be removed before shape can be changed\n");
 
1103
 
 
1104
                                break;
 
1105
                        }
 
1106
                } else {
 
1107
                        /* set them all just in case some old 'new_*' value
 
1108
                         * persists from some earlier problem
 
1109
                         */
 
1110
                        int err = err; /* only used if rv==1, and always set if
 
1111
                                        * rv==1, so initialisation not needed,
 
1112
                                        * despite gcc warning
 
1113
                                        */
 
1114
                        if (sysfs_set_num(sra, NULL, "chunk_size", nchunk) < 0)
 
1115
                                rv = 1, err = errno;
 
1116
                        if (!rv && sysfs_set_num(sra, NULL, "layout", nlayout) < 0)
 
1117
                                rv = 1, err = errno;
 
1118
                        if (!rv && sysfs_set_num(sra, NULL, "raid_disks", ndisks) < 0)
 
1119
                                rv = 1, err = errno;
 
1120
                        if (rv) {
 
1121
                                fprintf(stderr, Name ": Cannot set device shape for %s\n",
 
1122
                                        devname);
 
1123
                                if (get_linux_version() < 2006030)
 
1124
                                        fprintf(stderr, Name ": linux 2.6.30 or later required\n");
 
1125
                                if (err == EBUSY && 
 
1126
                                    (array.state & (1<<MD_SB_BITMAP_PRESENT)))
 
1127
                                        fprintf(stderr, "       Bitmap must be removed before shape can be changed\n");
 
1128
                                break;
 
1129
                        }
 
1130
                }
 
1131
 
 
1132
                if (ndisks == 2 && odisks == 2) {
 
1133
                        /* No reshape is needed in this trivial case */
 
1134
                        rv = 0;
 
1135
                        break;
 
1136
                }
 
1137
 
 
1138
                /* set up the backup-super-block.  This requires the
 
1139
                 * uuid from the array.
 
1140
                 */
704
1141
                /* Find a superblock */
705
 
                if (st->ss->load_super(st, fdlist[0], NULL)) {
 
1142
                for (sd = sra->devs; sd; sd = sd->next) {
 
1143
                        char *dn;
 
1144
                        int devfd;
 
1145
                        int ok;
 
1146
                        if (sd->disk.state & (1<<MD_DISK_FAULTY))
 
1147
                                continue;
 
1148
                        dn = map_dev(sd->disk.major, sd->disk.minor, 1);
 
1149
                        devfd = dev_open(dn, O_RDONLY);
 
1150
                        if (devfd < 0)
 
1151
                                continue;
 
1152
                        ok = st->ss->load_super(st, devfd, NULL);
 
1153
                        close(devfd);
 
1154
                        if (ok >= 0)
 
1155
                                break;
 
1156
                }
 
1157
                if (!sd) {
706
1158
                        fprintf(stderr, Name ": %s: Cannot find a superblock\n",
707
1159
                                devname);
708
 
                        goto abort;
 
1160
                        rv = 1;
 
1161
                        break;
709
1162
                }
710
1163
 
711
 
 
 
1164
                memset(&bsb, 0, 512);
712
1165
                memcpy(bsb.magic, "md_backup_data-1", 16);
713
1166
                st->ss->uuid_from_super(st, (int*)&bsb.set_uuid);
714
1167
                bsb.mtime = __cpu_to_le64(time(0));
715
 
                bsb.arraystart = 0;
716
 
                bsb.length = __cpu_to_le64(last_block);
 
1168
                bsb.devstart2 = blocks;
 
1169
                stripes = blocks / (ochunk/512) / odata;
 
1170
                /* Now we just need to kick off the reshape and watch, while
 
1171
                 * handling backups of the data...
 
1172
                 * This is all done by a forked background process.
 
1173
                 */
 
1174
                switch(fork()) {
 
1175
                case 0:
 
1176
                        close(fd);
 
1177
                        if (check_env("MDADM_GROW_VERIFY"))
 
1178
                                fd = open(devname, O_RDONLY | O_DIRECT);
 
1179
                        else
 
1180
                                fd = -1;
 
1181
                        mlockall(MCL_FUTURE);
717
1182
 
718
 
                /* Decide offset for the backup, llseek the spares, and write
719
 
                 * a leading superblock 4K earlier.
720
 
                 */
721
 
                for (i=array.raid_disks; i<d; i++) {
722
 
                        char buf[4096];
723
 
                        if (i==d-1 && backup_file) {
724
 
                                /* This is the backup file */
725
 
                                offsets[i] = 8;
726
 
                        } else
727
 
                                offsets[i] += sra->component_size - last_block - 8;
728
 
                        if (lseek64(fdlist[i], (offsets[i]<<9) - 4096, 0)
729
 
                            != (offsets[i]<<9) - 4096) {
730
 
                                fprintf(stderr, Name ": could not seek...\n");
731
 
                                goto abort;
732
 
                        }
733
 
                        memset(buf, 0, sizeof(buf));
734
 
                        bsb.devstart = __cpu_to_le64(offsets[i]);
735
 
                        bsb.sb_csum = bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb));
736
 
                        memcpy(buf, &bsb, sizeof(bsb));
737
 
                        if (write(fdlist[i], buf, 4096) != 4096) {
738
 
                                fprintf(stderr, Name ": could not write leading superblock\n");
739
 
                                goto abort;
740
 
                        }
741
 
                }
742
 
                array.level = nlevel;
743
 
                array.raid_disks = ndisks;
744
 
                array.chunk_size = nchunk;
745
 
                array.layout = nlayout;
746
 
                if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
747
 
                        if (errno == ENOSPC) {
748
 
                                /* stripe cache is not big enough.
749
 
                                 * It needs to be 4 times chunksize_size,
750
 
                                 * and we assume pagesize is 4K
 
1183
                        if (odata < ndata)
 
1184
                                done = child_grow(fd, sra, stripes,
 
1185
                                                  fdlist, offsets,
 
1186
                                                  odisks, ochunk, array.level, olayout, odata,
 
1187
                                                  d - odisks, fdlist+odisks, offsets+odisks);
 
1188
                        else if (odata > ndata)
 
1189
                                done = child_shrink(fd, sra, stripes,
 
1190
                                                    fdlist, offsets,
 
1191
                                                    odisks, ochunk, array.level, olayout, odata,
 
1192
                                                    d - odisks, fdlist+odisks, offsets+odisks);
 
1193
                        else
 
1194
                                done = child_same_size(fd, sra, stripes,
 
1195
                                                       fdlist, offsets,
 
1196
                                                       0,
 
1197
                                                       odisks, ochunk, array.level, olayout, odata,
 
1198
                                                       d - odisks, fdlist+odisks, offsets+odisks);
 
1199
                        if (backup_file && done)
 
1200
                                unlink(backup_file);
 
1201
                        if (level != UnSet && level != array.level) {
 
1202
                                /* We need to wait for the reshape to finish
 
1203
                                 * (which will have happened unless odata < ndata)
 
1204
                                 * and then set the level
751
1205
                                 */
752
 
                                if (sra->cache_size < 4 * (nchunk/4096)) {
753
 
                                        sysfs_set_num(sra, NULL,
754
 
                                                      "stripe_cache_size",
755
 
                                                      4 * (nchunk/4096) +1);
756
 
                                        if (ioctl(fd, SET_ARRAY_INFO,
757
 
                                                  &array) == 0)
758
 
                                                goto ok;
759
 
                                }
760
 
                        }
761
 
                        fprintf(stderr, Name ": Cannot set device size/shape for %s: %s\n",
762
 
                                devname, strerror(errno));
763
 
                        goto abort;
764
 
                }
765
 
                ok: ;
766
 
 
767
 
                /* suspend the relevant region */
768
 
                sysfs_set_num(sra, NULL, "suspend_hi", 0); /* just in case */
769
 
                if (sysfs_set_num(sra, NULL, "suspend_lo", 0) < 0 ||
770
 
                    sysfs_set_num(sra, NULL, "suspend_hi", last_block) < 0) {
771
 
                        fprintf(stderr, Name ": %s: failed to suspend device.\n",
772
 
                                devname);
773
 
                        goto abort_resume;
774
 
                }
775
 
 
776
 
 
777
 
                err = save_stripes(fdlist, offsets,
778
 
                                   odisks, ochunk, olevel, olayout,
779
 
                                   spares, fdlist+odisks,
780
 
                                   0ULL, last_block*512);
781
 
 
782
 
                /* abort if there was an error */
783
 
                if (err < 0) {
784
 
                        fprintf(stderr, Name ": %s: failed to save critical region\n",
785
 
                                devname);
786
 
                        goto abort_resume;
787
 
                }
788
 
 
789
 
                for (i=odisks; i<d ; i++) {
790
 
                        bsb.devstart = __cpu_to_le64(offsets[i]);
791
 
                        bsb.sb_csum = bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb));
792
 
                        if (lseek64(fdlist[i], (offsets[i]+last_block)<<9, 0) < 0 ||
793
 
                            write(fdlist[i], &bsb, sizeof(bsb)) != sizeof(bsb) ||
794
 
                            fsync(fdlist[i]) != 0) {
795
 
                                fprintf(stderr, Name ": %s: fail to save metadata for critical region backups.\n",
796
 
                                        devname);
797
 
                                goto abort_resume;
798
 
                        }
799
 
                }
800
 
 
801
 
                /* start the reshape happening */
802
 
                if (sysfs_set_str(sra, NULL, "sync_action", "reshape") < 0) {
803
 
                        fprintf(stderr, Name ": %s: failed to initiate reshape\n",
804
 
                                devname);
805
 
                        goto abort_resume;
806
 
                }
807
 
                /* wait for reshape to pass the critical region */
808
 
                while(1) {
809
 
                        unsigned long long comp;
810
 
                        if (sysfs_get_ll(sra, NULL, "sync_completed", &comp)<0) {
811
 
                                sleep(5);
812
 
                                break;
813
 
                        }
814
 
                        if (comp >= nstripe)
815
 
                                break;
816
 
                        sleep(1);
817
 
                }
818
 
 
819
 
                /* invalidate superblocks */
820
 
                memset(&bsb, 0, sizeof(bsb));
821
 
                for (i=odisks; i<d ; i++) {
822
 
                        lseek64(fdlist[i], (offsets[i]+last_block)<<9, 0);
823
 
                        if (write(fdlist[i], &bsb, sizeof(bsb)) < 0) {
824
 
                                fprintf(stderr, Name ": %s: failed to invalidate metadata for raid disk %d\n",
825
 
                                        devname, i);
826
 
                        }
827
 
                }
828
 
 
829
 
                /* unsuspend. */
830
 
                sysfs_set_num(sra, NULL, "suspend_lo", last_block);
831
 
 
832
 
                for (i=0; i<d; i++)
833
 
                        if (fdlist[i] >= 0)
834
 
                                close(fdlist[i]);
835
 
                free(fdlist);
836
 
                free(offsets);
837
 
                if (backup_file)
838
 
                        unlink(backup_file);
839
 
 
840
 
                printf(Name ": ... critical section passed.\n");
 
1206
 
 
1207
                                c = map_num(pers, level);
 
1208
                                if (c == NULL)
 
1209
                                        exit(0);/* not possible */
 
1210
 
 
1211
                                if (odata < ndata)
 
1212
                                        wait_reshape(sra);
 
1213
                                err = sysfs_set_str(sra, NULL, "level", c);
 
1214
                                if (err)
 
1215
                                        fprintf(stderr, Name ": %s: could not set level to %s\n",
 
1216
                                                devname, c);
 
1217
                        }
 
1218
                        exit(0);
 
1219
                case -1:
 
1220
                        fprintf(stderr, Name ": Cannot run child to monitor reshape: %s\n",
 
1221
                                strerror(errno));
 
1222
                        rv = 1;
 
1223
                        break;
 
1224
                default:
 
1225
                        /* The child will take care of unfreezing the array */
 
1226
                        frozen = 0;
 
1227
                        break;
 
1228
                }
841
1229
                break;
842
 
        }
843
 
        return 0;
844
 
 
845
 
 
846
 
 abort_resume:
847
 
        sysfs_set_num(sra, NULL, "suspend_lo", last_block);
848
 
 abort:
849
 
        for (i=0; i<array.nr_disks; i++)
850
 
                if (fdlist[i] >= 0)
851
 
                        close(fdlist[i]);
852
 
        free(fdlist);
853
 
        free(offsets);
854
 
        if (backup_file)
855
 
                unlink(backup_file);
856
 
        return 1;
857
 
 
 
1230
 
 
1231
        }
 
1232
 
 
1233
 release:
 
1234
        if (rv && orig_level != UnSet && sra) {
 
1235
                c = map_num(pers, orig_level);
 
1236
                if (c && sysfs_set_str(sra, NULL, "level", c) == 0)
 
1237
                        fprintf(stderr, Name ": aborting level change\n");
 
1238
        }
 
1239
        if (sra)
 
1240
                unfreeze_array(sra, frozen);
 
1241
        return rv;
 
1242
}
 
1243
 
 
1244
/*
 
1245
 * We run a child process in the background which performs the following
 
1246
 * steps:
 
1247
 *   - wait for resync to reach a certain point
 
1248
 *   - suspend io to the following section
 
1249
 *   - backup that section
 
1250
 *   - allow resync to proceed further
 
1251
 *   - resume io
 
1252
 *   - discard the backup.
 
1253
 *
 
1254
 * When are combined in slightly different ways in the three cases.
 
1255
 * Grow:
 
1256
 *   - suspend/backup/allow/wait/resume/discard
 
1257
 * Shrink:
 
1258
 *   - allow/wait/suspend/backup/allow/wait/resume/discard
 
1259
 * same-size:
 
1260
 *   - wait/resume/discard/suspend/backup/allow
 
1261
 *
 
1262
 * suspend/backup/allow always come together
 
1263
 * wait/resume/discard do too.
 
1264
 * For the same-size case we have two backups to improve flow.
 
1265
 * 
 
1266
 */
 
1267
 
 
1268
/* FIXME return status is never checked */
 
1269
int grow_backup(struct mdinfo *sra,
 
1270
                unsigned long long offset, /* per device */
 
1271
                unsigned long stripes, /* per device */
 
1272
                int *sources, unsigned long long *offsets,
 
1273
                int disks, int chunk, int level, int layout,
 
1274
                int dests, int *destfd, unsigned long long *destoffsets,
 
1275
                int part, int *degraded,
 
1276
                char *buf)
 
1277
{
 
1278
        /* Backup 'blocks' sectors at 'offset' on each device of the array,
 
1279
         * to storage 'destfd' (offset 'destoffsets'), after first
 
1280
         * suspending IO.  Then allow resync to continue
 
1281
         * over the suspended section.
 
1282
         * Use part 'part' of the backup-super-block.
 
1283
         */
 
1284
        int odata = disks;
 
1285
        int rv = 0;
 
1286
        int i;
 
1287
        unsigned long long ll;
 
1288
        int new_degraded;
 
1289
        //printf("offset %llu\n", offset);
 
1290
        if (level >= 4)
 
1291
                odata--;
 
1292
        if (level == 6)
 
1293
                odata--;
 
1294
        sysfs_set_num(sra, NULL, "suspend_hi", (offset + stripes * (chunk/512)) * odata);
 
1295
        /* Check that array hasn't become degraded, else we might backup the wrong data */
 
1296
        sysfs_get_ll(sra, NULL, "degraded", &ll);
 
1297
        new_degraded = (int)ll;
 
1298
        if (new_degraded != *degraded) {
 
1299
                /* check each device to ensure it is still working */
 
1300
                struct mdinfo *sd;
 
1301
                for (sd = sra->devs ; sd ; sd = sd->next) {
 
1302
                        if (sd->disk.state & (1<<MD_DISK_FAULTY))
 
1303
                                continue;
 
1304
                        if (sd->disk.state & (1<<MD_DISK_SYNC)) {
 
1305
                                char sbuf[20];
 
1306
                                if (sysfs_get_str(sra, sd, "state", sbuf, 20) < 0 ||
 
1307
                                    strstr(sbuf, "faulty") ||
 
1308
                                    strstr(sbuf, "in_sync") == NULL) {
 
1309
                                        /* this device is dead */
 
1310
                                        sd->disk.state = (1<<MD_DISK_FAULTY);
 
1311
                                        if (sd->disk.raid_disk >= 0 &&
 
1312
                                            sources[sd->disk.raid_disk] >= 0) {
 
1313
                                                close(sources[sd->disk.raid_disk]);
 
1314
                                                sources[sd->disk.raid_disk] = -1;
 
1315
                                        }
 
1316
                                }
 
1317
                        }
 
1318
                }
 
1319
                *degraded = new_degraded;
 
1320
        }
 
1321
        if (part) {
 
1322
                bsb.arraystart2 = __cpu_to_le64(offset * odata);
 
1323
                bsb.length2 = __cpu_to_le64(stripes * (chunk/512) * odata);
 
1324
        } else {
 
1325
                bsb.arraystart = __cpu_to_le64(offset * odata);
 
1326
                bsb.length = __cpu_to_le64(stripes * (chunk/512) * odata);
 
1327
        }
 
1328
        if (part)
 
1329
                bsb.magic[15] = '2';
 
1330
        for (i = 0; i < dests; i++)
 
1331
                if (part)
 
1332
                        lseek64(destfd[i], destoffsets[i] + __le64_to_cpu(bsb.devstart2)*512, 0);
 
1333
                else
 
1334
                        lseek64(destfd[i], destoffsets[i], 0);
 
1335
 
 
1336
        rv = save_stripes(sources, offsets, 
 
1337
                          disks, chunk, level, layout,
 
1338
                          dests, destfd,
 
1339
                          offset*512*odata, stripes * chunk * odata,
 
1340
                          buf);
 
1341
 
 
1342
        if (rv)
 
1343
                return rv;
 
1344
        bsb.mtime = __cpu_to_le64(time(0));
 
1345
        for (i = 0; i < dests; i++) {
 
1346
                bsb.devstart = __cpu_to_le64(destoffsets[i]/512);
 
1347
 
 
1348
                bsb.sb_csum = bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb));
 
1349
                if (memcmp(bsb.magic, "md_backup_data-2", 16) == 0)
 
1350
                        bsb.sb_csum2 = bsb_csum((char*)&bsb,
 
1351
                                                ((char*)&bsb.sb_csum2)-((char*)&bsb));
 
1352
 
 
1353
                rv = -1;
 
1354
                if ((unsigned long long)lseek64(destfd[i], destoffsets[i] - 4096, 0)
 
1355
                    != destoffsets[i] - 4096)
 
1356
                        break;
 
1357
                if (write(destfd[i], &bsb, 512) != 512)
 
1358
                        break;
 
1359
                if (destoffsets[i] > 4096) {
 
1360
                        if ((unsigned long long)lseek64(destfd[i], destoffsets[i]+stripes*chunk*odata, 0) !=
 
1361
                            destoffsets[i]+stripes*chunk*odata)
 
1362
                                break;
 
1363
                        if (write(destfd[i], &bsb, 512) != 512)
 
1364
                                break;
 
1365
                }
 
1366
                fsync(destfd[i]);
 
1367
                rv = 0;
 
1368
        }
 
1369
 
 
1370
        return rv;
 
1371
}
 
1372
 
 
1373
/* in 2.6.30, the value reported by sync_completed can be
 
1374
 * less that it should be by one stripe.
 
1375
 * This only happens when reshape hits sync_max and pauses.
 
1376
 * So allow wait_backup to either extent sync_max further
 
1377
 * than strictly necessary, or return before the
 
1378
 * sync has got quite as far as we would really like.
 
1379
 * This is what 'blocks2' is for.
 
1380
 * The various caller give appropriate values so that
 
1381
 * every works.
 
1382
 */
 
1383
/* FIXME return value is often ignored */
 
1384
int wait_backup(struct mdinfo *sra,
 
1385
                unsigned long long offset, /* per device */
 
1386
                unsigned long long blocks, /* per device */
 
1387
                unsigned long long blocks2, /* per device - hack */
 
1388
                int dests, int *destfd, unsigned long long *destoffsets,
 
1389
                int part)
 
1390
{
 
1391
        /* Wait for resync to pass the section that was backed up
 
1392
         * then erase the backup and allow IO
 
1393
         */
 
1394
        int fd = sysfs_get_fd(sra, NULL, "sync_completed");
 
1395
        unsigned long long completed;
 
1396
        int i;
 
1397
        int rv;
 
1398
 
 
1399
        if (fd < 0)
 
1400
                return -1;
 
1401
        sysfs_set_num(sra, NULL, "sync_max", offset + blocks + blocks2);
 
1402
        if (offset == 0)
 
1403
                sysfs_set_str(sra, NULL, "sync_action", "reshape");
 
1404
        do {
 
1405
                char action[20];
 
1406
                fd_set rfds;
 
1407
                FD_ZERO(&rfds);
 
1408
                FD_SET(fd, &rfds);
 
1409
                select(fd+1, NULL, NULL, &rfds, NULL);
 
1410
                if (sysfs_fd_get_ll(fd, &completed) < 0) {
 
1411
                        close(fd);
 
1412
                        return -1;
 
1413
                }
 
1414
                if (sysfs_get_str(sra, NULL, "sync_action",
 
1415
                                  action, 20) > 0 &&
 
1416
                    strncmp(action, "reshape", 7) != 0)
 
1417
                        break;
 
1418
        } while (completed < offset + blocks);
 
1419
        close(fd);
 
1420
 
 
1421
        if (part) {
 
1422
                bsb.arraystart2 = __cpu_to_le64(0);
 
1423
                bsb.length2 = __cpu_to_le64(0);
 
1424
        } else {
 
1425
                bsb.arraystart = __cpu_to_le64(0);
 
1426
                bsb.length = __cpu_to_le64(0);
 
1427
        }
 
1428
        bsb.mtime = __cpu_to_le64(time(0));
 
1429
        rv = 0;
 
1430
        for (i = 0; i < dests; i++) {
 
1431
                bsb.devstart = __cpu_to_le64(destoffsets[i]/512);
 
1432
                bsb.sb_csum = bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb));
 
1433
                if (memcmp(bsb.magic, "md_backup_data-2", 16) == 0)
 
1434
                        bsb.sb_csum2 = bsb_csum((char*)&bsb,
 
1435
                                                ((char*)&bsb.sb_csum2)-((char*)&bsb));
 
1436
                if ((unsigned long long)lseek64(destfd[i], destoffsets[i]-4096, 0) !=
 
1437
                    destoffsets[i]-4096)
 
1438
                        rv = -1;
 
1439
                if (rv == 0 && 
 
1440
                    write(destfd[i], &bsb, 512) != 512)
 
1441
                        rv = -1;
 
1442
                fsync(destfd[i]);
 
1443
        }
 
1444
        return rv;
 
1445
}
 
1446
 
 
1447
static void fail(char *msg)
 
1448
{
 
1449
        int rv;
 
1450
        rv = (write(2, msg, strlen(msg)) != (int)strlen(msg));
 
1451
        rv |= (write(2, "\n", 1) != 1);
 
1452
        exit(rv ? 1 : 2);
 
1453
}
 
1454
 
 
1455
static char *abuf, *bbuf;
 
1456
static unsigned long long abuflen;
 
1457
static void validate(int afd, int bfd, unsigned long long offset)
 
1458
{
 
1459
        /* check that the data in the backup against the array.
 
1460
         * This is only used for regression testing and should not
 
1461
         * be used while the array is active
 
1462
         */
 
1463
        if (afd < 0)
 
1464
                return;
 
1465
        lseek64(bfd, offset - 4096, 0);
 
1466
        if (read(bfd, &bsb2, 512) != 512)
 
1467
                fail("cannot read bsb");
 
1468
        if (bsb2.sb_csum != bsb_csum((char*)&bsb2,
 
1469
                                     ((char*)&bsb2.sb_csum)-((char*)&bsb2)))
 
1470
                fail("first csum bad");
 
1471
        if (memcmp(bsb2.magic, "md_backup_data", 14) != 0)
 
1472
                fail("magic is bad");
 
1473
        if (memcmp(bsb2.magic, "md_backup_data-2", 16) == 0 &&
 
1474
            bsb2.sb_csum2 != bsb_csum((char*)&bsb2,
 
1475
                                     ((char*)&bsb2.sb_csum2)-((char*)&bsb2)))
 
1476
                fail("second csum bad");
 
1477
 
 
1478
        if (__le64_to_cpu(bsb2.devstart)*512 != offset)
 
1479
                fail("devstart is wrong");
 
1480
 
 
1481
        if (bsb2.length) {
 
1482
                unsigned long long len = __le64_to_cpu(bsb2.length)*512;
 
1483
 
 
1484
                if (abuflen < len) {
 
1485
                        free(abuf);
 
1486
                        free(bbuf);
 
1487
                        abuflen = len;
 
1488
                        if (posix_memalign((void**)&abuf, 4096, abuflen) ||
 
1489
                            posix_memalign((void**)&bbuf, 4096, abuflen)) {
 
1490
                                abuflen = 0;
 
1491
                                /* just stop validating on mem-alloc failure */
 
1492
                                return;
 
1493
                        }
 
1494
                }
 
1495
 
 
1496
                lseek64(bfd, offset, 0);
 
1497
                if ((unsigned long long)read(bfd, bbuf, len) != len) {
 
1498
                        //printf("len %llu\n", len);
 
1499
                        fail("read first backup failed");
 
1500
                }
 
1501
                lseek64(afd, __le64_to_cpu(bsb2.arraystart)*512, 0);
 
1502
                if ((unsigned long long)read(afd, abuf, len) != len)
 
1503
                        fail("read first from array failed");
 
1504
                if (memcmp(bbuf, abuf, len) != 0) {
 
1505
                        #if 0
 
1506
                        int i;
 
1507
                        printf("offset=%llu len=%llu\n",
 
1508
                               (unsigned long long)__le64_to_cpu(bsb2.arraystart)*512, len);
 
1509
                        for (i=0; i<len; i++)
 
1510
                                if (bbuf[i] != abuf[i]) {
 
1511
                                        printf("first diff byte %d\n", i);
 
1512
                                        break;
 
1513
                                }
 
1514
                        #endif
 
1515
                        fail("data1 compare failed");
 
1516
                }
 
1517
        }
 
1518
        if (bsb2.length2) {
 
1519
                unsigned long long len = __le64_to_cpu(bsb2.length2)*512;
 
1520
 
 
1521
                if (abuflen < len) {
 
1522
                        free(abuf);
 
1523
                        free(bbuf);
 
1524
                        abuflen = len;
 
1525
                        abuf = malloc(abuflen);
 
1526
                        bbuf = malloc(abuflen);
 
1527
                }
 
1528
 
 
1529
                lseek64(bfd, offset+__le64_to_cpu(bsb2.devstart2)*512, 0);
 
1530
                if ((unsigned long long)read(bfd, bbuf, len) != len)
 
1531
                        fail("read second backup failed");
 
1532
                lseek64(afd, __le64_to_cpu(bsb2.arraystart2)*512, 0);
 
1533
                if ((unsigned long long)read(afd, abuf, len) != len)
 
1534
                        fail("read second from array failed");
 
1535
                if (memcmp(bbuf, abuf, len) != 0)
 
1536
                        fail("data2 compare failed");
 
1537
        }
 
1538
}
 
1539
 
 
1540
static int child_grow(int afd, struct mdinfo *sra, unsigned long stripes,
 
1541
                      int *fds, unsigned long long *offsets,
 
1542
                      int disks, int chunk, int level, int layout, int data,
 
1543
                      int dests, int *destfd, unsigned long long *destoffsets)
 
1544
{
 
1545
        char *buf;
 
1546
        int degraded = 0;
 
1547
 
 
1548
        if (posix_memalign((void**)&buf, 4096, disks * chunk))
 
1549
                /* Don't start the 'reshape' */
 
1550
                return 0;
 
1551
        sysfs_set_num(sra, NULL, "suspend_hi", 0);
 
1552
        sysfs_set_num(sra, NULL, "suspend_lo", 0);
 
1553
        grow_backup(sra, 0, stripes,
 
1554
                    fds, offsets, disks, chunk, level, layout,
 
1555
                    dests, destfd, destoffsets,
 
1556
                    0, &degraded, buf);
 
1557
        validate(afd, destfd[0], destoffsets[0]);
 
1558
        wait_backup(sra, 0, stripes * (chunk / 512), stripes * (chunk / 512),
 
1559
                    dests, destfd, destoffsets,
 
1560
                    0);
 
1561
        sysfs_set_num(sra, NULL, "suspend_lo", (stripes * (chunk/512)) * data);
 
1562
        free(buf);
 
1563
        /* FIXME this should probably be numeric */
 
1564
        sysfs_set_str(sra, NULL, "sync_max", "max");
 
1565
        return 1;
 
1566
}
 
1567
 
 
1568
static int child_shrink(int afd, struct mdinfo *sra, unsigned long stripes,
 
1569
                        int *fds, unsigned long long *offsets,
 
1570
                        int disks, int chunk, int level, int layout, int data,
 
1571
                        int dests, int *destfd, unsigned long long *destoffsets)
 
1572
{
 
1573
        char *buf;
 
1574
        unsigned long long start;
 
1575
        int rv;
 
1576
        int degraded = 0;
 
1577
 
 
1578
        if (posix_memalign((void**)&buf, 4096, disks * chunk))
 
1579
                return 0;
 
1580
        start = sra->component_size - stripes * (chunk/512);
 
1581
        sysfs_set_num(sra, NULL, "sync_max", start);
 
1582
        sysfs_set_str(sra, NULL, "sync_action", "reshape");
 
1583
        sysfs_set_num(sra, NULL, "suspend_lo", 0);
 
1584
        sysfs_set_num(sra, NULL, "suspend_hi", 0);
 
1585
        rv = wait_backup(sra, 0, start - stripes * (chunk/512), stripes * (chunk/512),
 
1586
                         dests, destfd, destoffsets, 0);
 
1587
        if (rv < 0)
 
1588
                return 0;
 
1589
        grow_backup(sra, 0, stripes,
 
1590
                    fds, offsets,
 
1591
                    disks, chunk, level, layout,
 
1592
                    dests, destfd, destoffsets,
 
1593
                    0, &degraded, buf);
 
1594
        validate(afd, destfd[0], destoffsets[0]);
 
1595
        wait_backup(sra, start, stripes*(chunk/512), 0,
 
1596
                    dests, destfd, destoffsets, 0);
 
1597
        sysfs_set_num(sra, NULL, "suspend_lo", (stripes * (chunk/512)) * data);
 
1598
        free(buf);
 
1599
        /* FIXME this should probably be numeric */
 
1600
        sysfs_set_str(sra, NULL, "sync_max", "max");
 
1601
        return 1;
 
1602
}
 
1603
 
 
1604
static int child_same_size(int afd, struct mdinfo *sra, unsigned long stripes,
 
1605
                           int *fds, unsigned long long *offsets,
 
1606
                           unsigned long long start,
 
1607
                           int disks, int chunk, int level, int layout, int data,
 
1608
                           int dests, int *destfd, unsigned long long *destoffsets)
 
1609
{
 
1610
        unsigned long long size;
 
1611
        unsigned long tailstripes = stripes;
 
1612
        int part;
 
1613
        char *buf;
 
1614
        unsigned long long speed;
 
1615
        int degraded = 0;
 
1616
 
 
1617
 
 
1618
        if (posix_memalign((void**)&buf, 4096, disks * chunk))
 
1619
                return 0;
 
1620
 
 
1621
        sysfs_set_num(sra, NULL, "suspend_lo", 0);
 
1622
        sysfs_set_num(sra, NULL, "suspend_hi", 0);
 
1623
 
 
1624
        sysfs_get_ll(sra, NULL, "sync_speed_min", &speed);
 
1625
        sysfs_set_num(sra, NULL, "sync_speed_min", 200000);
 
1626
 
 
1627
        grow_backup(sra, start, stripes,
 
1628
                    fds, offsets,
 
1629
                    disks, chunk, level, layout,
 
1630
                    dests, destfd, destoffsets,
 
1631
                    0, &degraded, buf);
 
1632
        grow_backup(sra, (start + stripes) * (chunk/512), stripes,
 
1633
                    fds, offsets,
 
1634
                    disks, chunk, level, layout,
 
1635
                    dests, destfd, destoffsets,
 
1636
                    1, &degraded, buf);
 
1637
        validate(afd, destfd[0], destoffsets[0]);
 
1638
        part = 0;
 
1639
        start += stripes * 2; /* where to read next */
 
1640
        size = sra->component_size / (chunk/512);
 
1641
        while (start < size) {
 
1642
                if (wait_backup(sra, (start-stripes*2)*(chunk/512),
 
1643
                                stripes*(chunk/512), 0,
 
1644
                                dests, destfd, destoffsets,
 
1645
                                part) < 0)
 
1646
                        return 0;
 
1647
                sysfs_set_num(sra, NULL, "suspend_lo", start*(chunk/512) * data);
 
1648
                if (start + stripes > size)
 
1649
                        tailstripes = (size - start);
 
1650
 
 
1651
                grow_backup(sra, start*(chunk/512), tailstripes,
 
1652
                            fds, offsets,
 
1653
                            disks, chunk, level, layout,
 
1654
                            dests, destfd, destoffsets,
 
1655
                            part, &degraded, buf);
 
1656
                start += stripes;
 
1657
                part = 1 - part;
 
1658
                validate(afd, destfd[0], destoffsets[0]);
 
1659
        }
 
1660
        if (wait_backup(sra, (start-stripes*2) * (chunk/512), stripes * (chunk/512), 0,
 
1661
                        dests, destfd, destoffsets,
 
1662
                        part) < 0)
 
1663
                return 0;
 
1664
        sysfs_set_num(sra, NULL, "suspend_lo", ((start-stripes)*(chunk/512)) * data);
 
1665
        wait_backup(sra, (start-stripes) * (chunk/512), tailstripes * (chunk/512), 0,
 
1666
                    dests, destfd, destoffsets,
 
1667
                    1-part);
 
1668
        sysfs_set_num(sra, NULL, "suspend_lo", (size*(chunk/512)) * data);
 
1669
        sysfs_set_num(sra, NULL, "sync_speed_min", speed);
 
1670
        free(buf);
 
1671
        return 1;
858
1672
}
859
1673
 
860
1674
/*
862
1676
 * write that data into the array and update the super blocks with
863
1677
 * the new reshape_progress
864
1678
 */
865
 
int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt, char *backup_file)
 
1679
int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt,
 
1680
                 char *backup_file, int verbose)
866
1681
{
867
1682
        int i, j;
868
1683
        int old_disks;
869
1684
        unsigned long long *offsets;
870
 
        unsigned long long  nstripe, ostripe, last_block;
 
1685
        unsigned long long  nstripe, ostripe;
871
1686
        int ndata, odata;
872
1687
 
873
 
        if (info->delta_disks < 0)
874
 
                return 1; /* cannot handle a shrink */
875
 
        if (info->new_level != info->array.level ||
876
 
            info->new_layout != info->array.layout ||
877
 
            info->new_chunk != info->array.chunk_size)
878
 
                return 1; /* Can only handle change in disks */
 
1688
        if (info->new_level != info->array.level)
 
1689
                return 1; /* Cannot handle level changes (they are instantaneous) */
 
1690
 
 
1691
        odata = info->array.raid_disks - info->delta_disks - 1;
 
1692
        if (info->array.level == 6) odata--; /* number of data disks */
 
1693
        ndata = info->array.raid_disks - 1;
 
1694
        if (info->new_level == 6) ndata--;
879
1695
 
880
1696
        old_disks = info->array.raid_disks - info->delta_disks;
881
1697
 
 
1698
        if (info->delta_disks <= 0)
 
1699
                /* Didn't grow, so the backup file must have
 
1700
                 * been used
 
1701
                 */
 
1702
                old_disks = cnt;
882
1703
        for (i=old_disks-(backup_file?1:0); i<cnt; i++) {
883
1704
                struct mdinfo dinfo;
884
 
                struct mdp_backup_super bsb;
885
 
                char buf[4096];
886
1705
                int fd;
 
1706
                int bsbsize;
 
1707
                char *devname, namebuf[20];
887
1708
 
888
1709
                /* This was a spare and may have some saved data on it.
889
1710
                 * Load the superblock, find and load the
894
1715
                 */
895
1716
                if (i == old_disks-1) {
896
1717
                        fd = open(backup_file, O_RDONLY);
897
 
                        if (fd<0)
 
1718
                        if (fd<0) {
 
1719
                                fprintf(stderr, Name ": backup file %s inaccessible: %s\n",
 
1720
                                        backup_file, strerror(errno));
898
1721
                                continue;
 
1722
                        }
 
1723
                        devname = backup_file;
899
1724
                } else {
900
1725
                        fd = fdlist[i];
901
1726
                        if (fd < 0)
908
1733
 
909
1734
                        if (lseek64(fd,
910
1735
                                    (dinfo.data_offset + dinfo.component_size - 8) <<9,
911
 
                                    0) < 0)
 
1736
                                    0) < 0) {
 
1737
                                fprintf(stderr, Name ": Cannot seek on device %d\n", i);
912
1738
                                continue; /* Cannot seek */
 
1739
                        }
 
1740
                        sprintf(namebuf, "device-%d", i);
 
1741
                        devname = namebuf;
913
1742
                }
914
 
                if (read(fd, &bsb, sizeof(bsb)) != sizeof(bsb))
 
1743
                if (read(fd, &bsb, sizeof(bsb)) != sizeof(bsb)) {
 
1744
                        if (verbose)
 
1745
                                fprintf(stderr, Name ": Cannot read from %s\n", devname);
915
1746
                        continue; /* Cannot read */
916
 
                if (memcmp(bsb.magic, "md_backup_data-1", 16) != 0)
 
1747
                }
 
1748
                if (memcmp(bsb.magic, "md_backup_data-1", 16) != 0 &&
 
1749
                    memcmp(bsb.magic, "md_backup_data-2", 16) != 0) {
 
1750
                        if (verbose)
 
1751
                                fprintf(stderr, Name ": No backup metadata on %s\n", devname);
917
1752
                        continue;
918
 
                if (bsb.sb_csum != bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb)))
 
1753
                }
 
1754
                if (bsb.sb_csum != bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb))) {
 
1755
                        if (verbose)
 
1756
                                fprintf(stderr, Name ": Bad backup-metadata checksum on %s\n", devname);
919
1757
                        continue; /* bad checksum */
920
 
                if (memcmp(bsb.set_uuid,info->uuid, 16) != 0)
 
1758
                }
 
1759
                if (memcmp(bsb.magic, "md_backup_data-2", 16) == 0 &&
 
1760
                    bsb.sb_csum2 != bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum2)-((char*)&bsb))) {
 
1761
                        if (verbose)
 
1762
                                fprintf(stderr, Name ": Bad backup-metadata checksum2 on %s\n", devname);
 
1763
                        continue; /* Bad second checksum */
 
1764
                }
 
1765
                if (memcmp(bsb.set_uuid,info->uuid, 16) != 0) {
 
1766
                        if (verbose)
 
1767
                                fprintf(stderr, Name ": Wrong uuid on backup-metadata on %s\n", devname);
921
1768
                        continue; /* Wrong uuid */
922
 
 
923
 
                if (info->array.utime > __le64_to_cpu(bsb.mtime) + 3600 ||
924
 
                    info->array.utime < __le64_to_cpu(bsb.mtime))
925
 
                        continue; /* time stamp is too bad */
926
 
 
927
 
                if (__le64_to_cpu(bsb.arraystart) != 0)
928
 
                        continue; /* Can only handle backup from start of array */
929
 
                if (__le64_to_cpu(bsb.length) <
930
 
                    info->reshape_progress)
931
 
                        continue; /* No new data here */
932
 
 
933
 
                if (lseek64(fd, __le64_to_cpu(bsb.devstart)*512, 0)< 0)
 
1769
                }
 
1770
 
 
1771
                /* array utime and backup-mtime should be updated at much the same time, but it seems that
 
1772
                 * sometimes they aren't... So allow considerable flexability in matching, and allow
 
1773
                 * this test to be overridden by an environment variable.
 
1774
                 */
 
1775
                if (info->array.utime > (int)__le64_to_cpu(bsb.mtime) + 2*60*60 ||
 
1776
                    info->array.utime < (int)__le64_to_cpu(bsb.mtime) - 10*60) {
 
1777
                        if (check_env("MDADM_GROW_ALLOW_OLD")) {
 
1778
                                fprintf(stderr, Name ": accepting backup with timestamp %lu "
 
1779
                                        "for array with timestamp %lu\n",
 
1780
                                        (unsigned long)__le64_to_cpu(bsb.mtime),
 
1781
                                        (unsigned long)info->array.utime);
 
1782
                        } else {
 
1783
                                if (verbose)
 
1784
                                        fprintf(stderr, Name ": too-old timestamp on "
 
1785
                                                "backup-metadata on %s\n", devname);
 
1786
                                continue; /* time stamp is too bad */
 
1787
                        }
 
1788
                }
 
1789
 
 
1790
                if (bsb.magic[15] == '1') {
 
1791
                if (info->delta_disks >= 0) {
 
1792
                        /* reshape_progress is increasing */
 
1793
                        if (__le64_to_cpu(bsb.arraystart) + __le64_to_cpu(bsb.length) <
 
1794
                            info->reshape_progress) {
 
1795
                        nonew:
 
1796
                                if (verbose)
 
1797
                                        fprintf(stderr, Name ": backup-metadata found on %s but is not needed\n", devname);
 
1798
                                continue; /* No new data here */
 
1799
                        }
 
1800
                } else {
 
1801
                        /* reshape_progress is decreasing */
 
1802
                        if (__le64_to_cpu(bsb.arraystart) >=
 
1803
                            info->reshape_progress)
 
1804
                                goto nonew; /* No new data here */
 
1805
                }
 
1806
                } else {
 
1807
                if (info->delta_disks >= 0) {
 
1808
                        /* reshape_progress is increasing */
 
1809
                        if (__le64_to_cpu(bsb.arraystart) + __le64_to_cpu(bsb.length) <
 
1810
                            info->reshape_progress &&
 
1811
                            __le64_to_cpu(bsb.arraystart2) + __le64_to_cpu(bsb.length2) <
 
1812
                            info->reshape_progress)
 
1813
                                goto nonew; /* No new data here */
 
1814
                } else {
 
1815
                        /* reshape_progress is decreasing */
 
1816
                        if (__le64_to_cpu(bsb.arraystart) >=
 
1817
                            info->reshape_progress &&
 
1818
                            __le64_to_cpu(bsb.arraystart2) >=
 
1819
                            info->reshape_progress)
 
1820
                                goto nonew; /* No new data here */
 
1821
                }
 
1822
                }
 
1823
                if (lseek64(fd, __le64_to_cpu(bsb.devstart)*512, 0)< 0) {
 
1824
                second_fail:
 
1825
                        if (verbose)
 
1826
                                fprintf(stderr, Name ": Failed to verify secondary backup-metadata block on %s\n",
 
1827
                                        devname);
934
1828
                        continue; /* Cannot seek */
 
1829
                }
935
1830
                /* There should be a duplicate backup superblock 4k before here */
936
1831
                if (lseek64(fd, -4096, 1) < 0 ||
937
 
                    read(fd, buf, 4096) != 4096 ||
938
 
                    memcmp(buf, &bsb, sizeof(bsb)) != 0)
939
 
                        continue; /* Cannot find leading superblock */
 
1832
                    read(fd, &bsb2, sizeof(bsb2)) != sizeof(bsb2))
 
1833
                        goto second_fail; /* Cannot find leading superblock */
 
1834
                if (bsb.magic[15] == '1')
 
1835
                        bsbsize = offsetof(struct mdp_backup_super, pad1);
 
1836
                else
 
1837
                        bsbsize = offsetof(struct mdp_backup_super, pad);
 
1838
                if (memcmp(&bsb2, &bsb, bsbsize) != 0)
 
1839
                        goto second_fail; /* Cannot find leading superblock */
940
1840
 
941
1841
                /* Now need the data offsets for all devices. */
942
1842
                offsets = malloc(sizeof(*offsets)*info->array.raid_disks);
948
1848
                                continue;
949
1849
                        st->ss->getinfo_super(st, &dinfo);
950
1850
                        st->ss->free_super(st);
951
 
                        offsets[j] = dinfo.data_offset;
 
1851
                        offsets[j] = dinfo.data_offset * 512;
952
1852
                }
953
1853
                printf(Name ": restoring critical section\n");
954
1854
 
958
1858
                                    info->new_level,
959
1859
                                    info->new_layout,
960
1860
                                    fd, __le64_to_cpu(bsb.devstart)*512,
961
 
                                    0, __le64_to_cpu(bsb.length)*512)) {
962
 
                        /* didn't succeed, so giveup */
963
 
                        return 1;
964
 
                }
 
1861
                                    __le64_to_cpu(bsb.arraystart)*512,
 
1862
                                    __le64_to_cpu(bsb.length)*512)) {
 
1863
                        /* didn't succeed, so giveup */
 
1864
                        if (verbose)
 
1865
                                fprintf(stderr, Name ": Error restoring backup from %s\n",
 
1866
                                        devname);
 
1867
                        return 1;
 
1868
                }
 
1869
                
 
1870
                if (bsb.magic[15] == '2' &&
 
1871
                    restore_stripes(fdlist, offsets,
 
1872
                                    info->array.raid_disks,
 
1873
                                    info->new_chunk,
 
1874
                                    info->new_level,
 
1875
                                    info->new_layout,
 
1876
                                    fd, __le64_to_cpu(bsb.devstart)*512 +
 
1877
                                    __le64_to_cpu(bsb.devstart2)*512,
 
1878
                                    __le64_to_cpu(bsb.arraystart2)*512,
 
1879
                                    __le64_to_cpu(bsb.length2)*512)) {
 
1880
                        /* didn't succeed, so giveup */
 
1881
                        if (verbose)
 
1882
                                fprintf(stderr, Name ": Error restoring second backup from %s\n",
 
1883
                                        devname);
 
1884
                        return 1;
 
1885
                }
 
1886
 
965
1887
 
966
1888
                /* Ok, so the data is restored. Let's update those superblocks. */
967
1889
 
 
1890
                if (info->delta_disks >= 0) {
 
1891
                        info->reshape_progress = __le64_to_cpu(bsb.arraystart) +
 
1892
                                __le64_to_cpu(bsb.length);
 
1893
                        if (bsb.magic[15] == '2') {
 
1894
                                unsigned long long p2 = __le64_to_cpu(bsb.arraystart2) +
 
1895
                                        __le64_to_cpu(bsb.length2);
 
1896
                                if (p2 > info->reshape_progress)
 
1897
                                        info->reshape_progress = p2;
 
1898
                        }
 
1899
                } else {
 
1900
                        info->reshape_progress = __le64_to_cpu(bsb.arraystart);
 
1901
                        if (bsb.magic[15] == '2') {
 
1902
                                unsigned long long p2 = __le64_to_cpu(bsb.arraystart2);
 
1903
                                if (p2 < info->reshape_progress)
 
1904
                                        info->reshape_progress = p2;
 
1905
                        }
 
1906
                }
968
1907
                for (j=0; j<info->array.raid_disks; j++) {
969
1908
                        if (fdlist[j] < 0) continue;
970
1909
                        if (st->ss->load_super(st, fdlist[j], NULL))
971
1910
                                continue;
972
1911
                        st->ss->getinfo_super(st, &dinfo);
973
 
                        dinfo.reshape_progress = __le64_to_cpu(bsb.length);
 
1912
                        dinfo.reshape_progress = info->reshape_progress;
974
1913
                        st->ss->update_super(st, &dinfo,
975
1914
                                             "_reshape_progress",
976
1915
                                             NULL,0, 0, NULL);
977
1916
                        st->ss->store_super(st, fdlist[j]);
978
1917
                        st->ss->free_super(st);
979
1918
                }
980
 
 
981
 
                /* And we are done! */
982
1919
                return 0;
983
1920
        }
984
1921
        /* Didn't find any backup data, try to see if any
985
1922
         * was needed.
986
1923
         */
987
 
        nstripe = ostripe = 0;
988
 
        odata = info->array.raid_disks - info->delta_disks - 1;
989
 
        if (info->array.level == 6) odata--; /* number of data disks */
990
 
        ndata = info->array.raid_disks - 1;
991
 
        if (info->new_level == 6) ndata--;
992
 
        last_block = 0;
993
 
        while (nstripe >= ostripe) {
994
 
                nstripe += info->new_chunk / 512;
995
 
                last_block = nstripe * ndata;
996
 
                ostripe = last_block / odata / (info->array.chunk_size/512) *
997
 
                        (info->array.chunk_size/512);
998
 
        }
999
 
 
1000
 
        if (info->reshape_progress >= last_block)
1001
 
                return 0;
 
1924
        if (info->delta_disks < 0) {
 
1925
                /* When shrinking, the critical section is at the end.
 
1926
                 * So see if we are before the critical section.
 
1927
                 */
 
1928
                unsigned long long first_block;
 
1929
                nstripe = ostripe = 0;
 
1930
                first_block = 0;
 
1931
                while (ostripe >= nstripe) {
 
1932
                        ostripe += info->array.chunk_size / 512;
 
1933
                        first_block = ostripe * odata;
 
1934
                        nstripe = first_block / ndata / (info->new_chunk/512) *
 
1935
                                (info->new_chunk/512);
 
1936
                }
 
1937
 
 
1938
                if (info->reshape_progress >= first_block)
 
1939
                        return 0;
 
1940
        }
 
1941
        if (info->delta_disks > 0) {
 
1942
                /* See if we are beyond the critical section. */
 
1943
                unsigned long long last_block;
 
1944
                nstripe = ostripe = 0;
 
1945
                last_block = 0;
 
1946
                while (nstripe >= ostripe) {
 
1947
                        nstripe += info->new_chunk / 512;
 
1948
                        last_block = nstripe * ndata;
 
1949
                        ostripe = last_block / odata / (info->array.chunk_size/512) *
 
1950
                                (info->array.chunk_size/512);
 
1951
                }
 
1952
 
 
1953
                if (info->reshape_progress >= last_block)
 
1954
                        return 0;
 
1955
        }
1002
1956
        /* needed to recover critical section! */
 
1957
        if (verbose)
 
1958
                fprintf(stderr, Name ": Failed to find backup of critical section\n");
1003
1959
        return 1;
1004
1960
}
 
1961
 
 
1962
int Grow_continue(int mdfd, struct supertype *st, struct mdinfo *info,
 
1963
                  char *backup_file)
 
1964
{
 
1965
        /* Array is assembled and ready to be started, but
 
1966
         * monitoring is probably required.
 
1967
         * So:
 
1968
         *   - start read-only
 
1969
         *   - set upper bound for resync
 
1970
         *   - initialise the 'suspend' boundaries
 
1971
         *   - switch to read-write
 
1972
         *   - fork and continue monitoring
 
1973
         */
 
1974
        int err;
 
1975
        int backup_list[1];
 
1976
        unsigned long long backup_offsets[1];
 
1977
        int odisks, ndisks, ochunk, nchunk,odata,ndata;
 
1978
        unsigned long a,b,blocks,stripes;
 
1979
        int backup_fd;
 
1980
        int *fds;
 
1981
        unsigned long long *offsets;
 
1982
        int d;
 
1983
        struct mdinfo *sra, *sd;
 
1984
        int rv;
 
1985
        unsigned long cache;
 
1986
        int done = 0;
 
1987
 
 
1988
        err = sysfs_set_str(info, NULL, "array_state", "readonly");
 
1989
        if (err)
 
1990
                return err;
 
1991
 
 
1992
        /* make sure reshape doesn't progress until we are ready */
 
1993
        sysfs_set_str(info, NULL, "sync_max", "0");
 
1994
        sysfs_set_str(info, NULL, "array_state", "active"); /* FIXME or clean */
 
1995
 
 
1996
        sra = sysfs_read(-1, devname2devnum(info->sys_name),
 
1997
                         GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE|
 
1998
                         GET_CACHE);
 
1999
        if (!sra)
 
2000
                return 1;
 
2001
 
 
2002
        /* ndisks is not growing, so raid_disks is old and +delta is new */
 
2003
        odisks = info->array.raid_disks;
 
2004
        ndisks = odisks + info->delta_disks;
 
2005
        odata = odisks - 1;
 
2006
        ndata = ndisks - 1;
 
2007
        if (info->array.level == 6) {
 
2008
                odata--;
 
2009
                ndata--;
 
2010
        }
 
2011
        ochunk = info->array.chunk_size;
 
2012
        nchunk = info->new_chunk;
 
2013
 
 
2014
        a = (ochunk/512) * odata;
 
2015
        b = (nchunk/512) * ndata;
 
2016
        /* Find GCD */
 
2017
        while (a != b) {
 
2018
                if (a < b)
 
2019
                        b -= a;
 
2020
                if (b < a)
 
2021
                        a -= b;
 
2022
        }
 
2023
        /* LCM == product / GCD */
 
2024
        blocks = (ochunk/512) * (nchunk/512) * odata * ndata / a;
 
2025
 
 
2026
        if (ndata == odata)
 
2027
                while (blocks * 32 < sra->component_size &&
 
2028
                       blocks < 16*1024*2)
 
2029
                        blocks *= 2;
 
2030
        stripes = blocks / (info->array.chunk_size/512) / odata;
 
2031
 
 
2032
        /* check that the internal stripe cache is
 
2033
         * large enough, or it won't work.
 
2034
         */
 
2035
        cache = (nchunk < ochunk) ? ochunk : nchunk;
 
2036
        cache = cache * 4 / 4096;
 
2037
        if (cache < blocks / 8 / odisks + 16)
 
2038
                /* Make it big enough to hold 'blocks' */
 
2039
                cache = blocks / 8 / odisks + 16;
 
2040
        if (sra->cache_size < cache)
 
2041
                sysfs_set_num(sra, NULL, "stripe_cache_size",
 
2042
                              cache+1);
 
2043
 
 
2044
        memset(&bsb, 0, 512);
 
2045
        memcpy(bsb.magic, "md_backup_data-1", 16);
 
2046
        memcpy(&bsb.set_uuid, info->uuid, 16);
 
2047
        bsb.mtime = __cpu_to_le64(time(0));
 
2048
        bsb.devstart2 = blocks;
 
2049
 
 
2050
        backup_fd = open(backup_file, O_RDWR|O_CREAT, S_IRUSR | S_IWUSR);
 
2051
        backup_list[0] = backup_fd;
 
2052
        backup_offsets[0] = 8 * 512;
 
2053
        fds = malloc(odisks * sizeof(fds[0]));
 
2054
        offsets = malloc(odisks * sizeof(offsets[0]));
 
2055
        for (d=0; d<odisks; d++)
 
2056
                fds[d] = -1;
 
2057
 
 
2058
        for (sd = sra->devs; sd; sd = sd->next) {
 
2059
                if (sd->disk.state & (1<<MD_DISK_FAULTY))
 
2060
                        continue;
 
2061
                if (sd->disk.state & (1<<MD_DISK_SYNC)) {
 
2062
                        char *dn = map_dev(sd->disk.major,
 
2063
                                           sd->disk.minor, 1);
 
2064
                        fds[sd->disk.raid_disk]
 
2065
                                = dev_open(dn, O_RDONLY);
 
2066
                        offsets[sd->disk.raid_disk] = sd->data_offset*512;
 
2067
                        if (fds[sd->disk.raid_disk] < 0) {
 
2068
                                fprintf(stderr, Name ": %s: cannot open component %s\n",
 
2069
                                        info->sys_name, dn?dn:"-unknown-");
 
2070
                                rv = 1;
 
2071
                                goto release;
 
2072
                        }
 
2073
                        free(dn);
 
2074
                }
 
2075
        }
 
2076
 
 
2077
        switch(fork()) {
 
2078
        case 0:
 
2079
                close(mdfd);
 
2080
                mlockall(MCL_FUTURE);
 
2081
                if (info->delta_disks < 0)
 
2082
                        done = child_shrink(-1, info, stripes,
 
2083
                                            fds, offsets,
 
2084
                                            info->array.raid_disks,
 
2085
                                            info->array.chunk_size,
 
2086
                                            info->array.level, info->array.layout,
 
2087
                                            odata,
 
2088
                                            1, backup_list, backup_offsets);
 
2089
                else if (info->delta_disks == 0) {
 
2090
                        /* The 'start' is a per-device stripe number.
 
2091
                         * reshape_progress is a per-array sector number.
 
2092
                         * So divide by ndata * chunk_size
 
2093
                         */
 
2094
                        unsigned long long start = info->reshape_progress / ndata;
 
2095
                        start /= (info->array.chunk_size/512);
 
2096
                        done = child_same_size(-1, info, stripes,
 
2097
                                               fds, offsets,
 
2098
                                               start,
 
2099
                                               info->array.raid_disks,
 
2100
                                               info->array.chunk_size,
 
2101
                                               info->array.level, info->array.layout,
 
2102
                                               odata,
 
2103
                                               1, backup_list, backup_offsets);
 
2104
                }
 
2105
                if (backup_file && done)
 
2106
                        unlink(backup_file);
 
2107
                /* FIXME should I intuit a level change */
 
2108
                exit(0);
 
2109
        case -1:
 
2110
                fprintf(stderr, Name ": Cannot run child to continue monitoring reshape: %s\n",
 
2111
                        strerror(errno));
 
2112
                return 1;
 
2113
        default:
 
2114
                break;
 
2115
        }
 
2116
release:
 
2117
        return 0;
 
2118
}
 
2119
 
 
2120