487
811
static struct buf *getdirtybuf(struct buf *, struct mtx *, int);
488
812
static void clear_remove(struct thread *);
489
813
static void clear_inodedeps(struct thread *);
814
static void unlinked_inodedep(struct mount *, struct inodedep *);
815
static void clear_unlinked_inodedep(struct inodedep *);
816
static struct inodedep *first_unlinked_inodedep(struct ufsmount *);
490
817
static int flush_pagedep_deps(struct vnode *, struct mount *,
491
818
struct diraddhd *);
492
static int flush_inodedep_deps(struct mount *, ino_t);
819
static int free_pagedep(struct pagedep *);
820
static int flush_newblk_dep(struct vnode *, struct mount *, ufs_lbn_t);
821
static int flush_inodedep_deps(struct vnode *, struct mount *, ino_t);
493
822
static int flush_deplist(struct allocdirectlst *, int, int *);
823
static int sync_cgs(struct mount *, int);
494
824
static int handle_written_filepage(struct pagedep *, struct buf *);
825
static int handle_written_sbdep(struct sbdep *, struct buf *);
826
static void initiate_write_sbdep(struct sbdep *);
495
827
static void diradd_inode_written(struct diradd *, struct inodedep *);
828
static int handle_written_indirdep(struct indirdep *, struct buf *,
496
830
static int handle_written_inodeblock(struct inodedep *, struct buf *);
497
static void handle_allocdirect_partdone(struct allocdirect *);
831
static int jnewblk_rollforward(struct jnewblk *, struct fs *, struct cg *,
833
static int handle_written_bmsafemap(struct bmsafemap *, struct buf *);
834
static void handle_written_jaddref(struct jaddref *);
835
static void handle_written_jremref(struct jremref *);
836
static void handle_written_jseg(struct jseg *, struct buf *);
837
static void handle_written_jnewblk(struct jnewblk *);
838
static void handle_written_jblkdep(struct jblkdep *);
839
static void handle_written_jfreefrag(struct jfreefrag *);
840
static void complete_jseg(struct jseg *);
841
static void complete_jsegs(struct jseg *);
842
static void jseg_write(struct ufsmount *ump, struct jseg *, uint8_t *);
843
static void jaddref_write(struct jaddref *, struct jseg *, uint8_t *);
844
static void jremref_write(struct jremref *, struct jseg *, uint8_t *);
845
static void jmvref_write(struct jmvref *, struct jseg *, uint8_t *);
846
static void jtrunc_write(struct jtrunc *, struct jseg *, uint8_t *);
847
static void jfsync_write(struct jfsync *, struct jseg *, uint8_t *data);
848
static void jnewblk_write(struct jnewblk *, struct jseg *, uint8_t *);
849
static void jfreeblk_write(struct jfreeblk *, struct jseg *, uint8_t *);
850
static void jfreefrag_write(struct jfreefrag *, struct jseg *, uint8_t *);
851
static inline void inoref_write(struct inoref *, struct jseg *,
853
static void handle_allocdirect_partdone(struct allocdirect *,
855
static struct jnewblk *cancel_newblk(struct newblk *, struct worklist *,
857
static void indirdep_complete(struct indirdep *);
858
static int indirblk_lookup(struct mount *, ufs2_daddr_t);
859
static void indirblk_insert(struct freework *);
860
static void indirblk_remove(struct freework *);
498
861
static void handle_allocindir_partdone(struct allocindir *);
499
862
static void initiate_write_filepage(struct pagedep *, struct buf *);
863
static void initiate_write_indirdep(struct indirdep*, struct buf *);
500
864
static void handle_written_mkdir(struct mkdir *, int);
865
static int jnewblk_rollback(struct jnewblk *, struct fs *, struct cg *,
867
static void initiate_write_bmsafemap(struct bmsafemap *, struct buf *);
501
868
static void initiate_write_inodeblock_ufs1(struct inodedep *, struct buf *);
502
869
static void initiate_write_inodeblock_ufs2(struct inodedep *, struct buf *);
503
870
static void handle_workitem_freefile(struct freefile *);
504
static void handle_workitem_remove(struct dirrem *, struct vnode *);
871
static int handle_workitem_remove(struct dirrem *, int);
505
872
static struct dirrem *newdirrem(struct buf *, struct inode *,
506
873
struct inode *, int, struct dirrem **);
507
static void free_diradd(struct diradd *);
508
static void free_allocindir(struct allocindir *, struct inodedep *);
874
static struct indirdep *indirdep_lookup(struct mount *, struct inode *,
876
static void cancel_indirdep(struct indirdep *, struct buf *,
878
static void free_indirdep(struct indirdep *);
879
static void free_diradd(struct diradd *, struct workhead *);
880
static void merge_diradd(struct inodedep *, struct diradd *);
881
static void complete_diradd(struct diradd *);
882
static struct diradd *diradd_lookup(struct pagedep *, int);
883
static struct jremref *cancel_diradd_dotdot(struct inode *, struct dirrem *,
885
static struct jremref *cancel_mkdir_dotdot(struct inode *, struct dirrem *,
887
static void cancel_diradd(struct diradd *, struct dirrem *, struct jremref *,
888
struct jremref *, struct jremref *);
889
static void dirrem_journal(struct dirrem *, struct jremref *, struct jremref *,
891
static void cancel_allocindir(struct allocindir *, struct buf *bp,
892
struct freeblks *, int);
893
static int setup_trunc_indir(struct freeblks *, struct inode *,
894
ufs_lbn_t, ufs_lbn_t, ufs2_daddr_t);
895
static void complete_trunc_indir(struct freework *);
896
static void trunc_indirdep(struct indirdep *, struct freeblks *, struct buf *,
898
static void complete_mkdir(struct mkdir *);
509
899
static void free_newdirblk(struct newdirblk *);
510
static int indir_trunc(struct freeblks *, ufs2_daddr_t, int, ufs_lbn_t,
512
static void deallocate_dependencies(struct buf *, struct inodedep *);
513
static void free_allocdirect(struct allocdirectlst *,
514
struct allocdirect *, int);
900
static void free_jremref(struct jremref *);
901
static void free_jaddref(struct jaddref *);
902
static void free_jsegdep(struct jsegdep *);
903
static void free_jsegs(struct jblocks *);
904
static void rele_jseg(struct jseg *);
905
static void free_jseg(struct jseg *, struct jblocks *);
906
static void free_jnewblk(struct jnewblk *);
907
static void free_jblkdep(struct jblkdep *);
908
static void free_jfreefrag(struct jfreefrag *);
909
static void free_freedep(struct freedep *);
910
static void journal_jremref(struct dirrem *, struct jremref *,
912
static void cancel_jnewblk(struct jnewblk *, struct workhead *);
913
static int cancel_jaddref(struct jaddref *, struct inodedep *,
915
static void cancel_jfreefrag(struct jfreefrag *);
916
static inline void setup_freedirect(struct freeblks *, struct inode *,
918
static inline void setup_freeext(struct freeblks *, struct inode *, int, int);
919
static inline void setup_freeindir(struct freeblks *, struct inode *, int,
921
static inline struct freeblks *newfreeblks(struct mount *, struct inode *);
922
static void freeblks_free(struct ufsmount *, struct freeblks *, int);
923
static void indir_trunc(struct freework *, ufs2_daddr_t, ufs_lbn_t);
924
ufs2_daddr_t blkcount(struct fs *, ufs2_daddr_t, off_t);
925
static int trunc_check_buf(struct buf *, int *, ufs_lbn_t, int, int);
926
static void trunc_dependencies(struct inode *, struct freeblks *, ufs_lbn_t,
928
static void trunc_pages(struct inode *, off_t, ufs2_daddr_t, int);
929
static int cancel_pagedep(struct pagedep *, struct freeblks *, int);
930
static int deallocate_dependencies(struct buf *, struct freeblks *, int);
931
static void newblk_freefrag(struct newblk*);
932
static void free_newblk(struct newblk *);
933
static void cancel_allocdirect(struct allocdirectlst *,
934
struct allocdirect *, struct freeblks *);
515
935
static int check_inode_unwritten(struct inodedep *);
516
936
static int free_inodedep(struct inodedep *);
517
static void handle_workitem_freeblocks(struct freeblks *, int);
937
static void freework_freeblock(struct freework *);
938
static void freework_enqueue(struct freework *);
939
static int handle_workitem_freeblocks(struct freeblks *, int);
940
static int handle_complete_freeblocks(struct freeblks *, int);
941
static void handle_workitem_indirblk(struct freework *);
942
static void handle_written_freework(struct freework *);
518
943
static void merge_inode_lists(struct allocdirectlst *,struct allocdirectlst *);
519
static void setup_allocindir_phase2(struct buf *, struct inode *,
520
struct allocindir *);
944
static struct worklist *jnewblk_merge(struct worklist *, struct worklist *,
946
static struct freefrag *setup_allocindir_phase2(struct buf *, struct inode *,
947
struct inodedep *, struct allocindir *, ufs_lbn_t);
521
948
static struct allocindir *newallocindir(struct inode *, int, ufs2_daddr_t,
949
ufs2_daddr_t, ufs_lbn_t);
523
950
static void handle_workitem_freefrag(struct freefrag *);
524
static struct freefrag *newfreefrag(struct inode *, ufs2_daddr_t, long);
951
static struct freefrag *newfreefrag(struct inode *, ufs2_daddr_t, long,
525
953
static void allocdirect_merge(struct allocdirectlst *,
526
954
struct allocdirect *, struct allocdirect *);
527
static struct bmsafemap *bmsafemap_lookup(struct mount *, struct buf *);
528
static int newblk_find(struct newblk_hashhead *, struct fs *, ufs2_daddr_t,
530
static int newblk_lookup(struct fs *, ufs2_daddr_t, int, struct newblk **);
955
static struct freefrag *allocindir_merge(struct allocindir *,
956
struct allocindir *);
957
static int bmsafemap_find(struct bmsafemap_hashhead *, struct mount *, int,
958
struct bmsafemap **);
959
static struct bmsafemap *bmsafemap_lookup(struct mount *, struct buf *,
960
int cg, struct bmsafemap *);
961
static int newblk_find(struct newblk_hashhead *, struct mount *, ufs2_daddr_t,
962
int, struct newblk **);
963
static int newblk_lookup(struct mount *, ufs2_daddr_t, int, struct newblk **);
531
964
static int inodedep_find(struct inodedep_hashhead *, struct fs *, ino_t,
532
965
struct inodedep **);
533
966
static int inodedep_lookup(struct mount *, ino_t, int, struct inodedep **);
534
static int pagedep_lookup(struct inode *, ufs_lbn_t, int, struct pagedep **);
967
static int pagedep_lookup(struct mount *, struct buf *bp, ino_t, ufs_lbn_t,
968
int, struct pagedep **);
535
969
static int pagedep_find(struct pagedep_hashhead *, ino_t, ufs_lbn_t,
536
970
struct mount *mp, int, struct pagedep **);
537
971
static void pause_timer(void *);
538
972
static int request_cleanup(struct mount *, int);
539
static int process_worklist_item(struct mount *, int);
540
static void add_to_worklist(struct worklist *);
973
static int process_worklist_item(struct mount *, int, int);
974
static void process_removes(struct vnode *);
975
static void process_truncates(struct vnode *);
976
static void jwork_move(struct workhead *, struct workhead *);
977
static void jwork_insert(struct workhead *, struct jsegdep *);
978
static void add_to_worklist(struct worklist *, int);
979
static void wake_worklist(struct worklist *);
980
static void wait_worklist(struct worklist *, char *);
981
static void remove_from_worklist(struct worklist *);
541
982
static void softdep_flush(void);
983
static void softdep_flushjournal(struct mount *);
542
984
static int softdep_speedup(void);
985
static void worklist_speedup(void);
986
static int journal_mount(struct mount *, struct fs *, struct ucred *);
987
static void journal_unmount(struct mount *);
988
static int journal_space(struct ufsmount *, int);
989
static void journal_suspend(struct ufsmount *);
990
static int journal_unsuspend(struct ufsmount *ump);
991
static void softdep_prelink(struct vnode *, struct vnode *);
992
static void add_to_journal(struct worklist *);
993
static void remove_from_journal(struct worklist *);
994
static void softdep_process_journal(struct mount *, struct worklist *, int);
995
static struct jremref *newjremref(struct dirrem *, struct inode *,
996
struct inode *ip, off_t, nlink_t);
997
static struct jaddref *newjaddref(struct inode *, ino_t, off_t, int16_t,
999
static inline void newinoref(struct inoref *, ino_t, ino_t, off_t, nlink_t,
1001
static inline struct jsegdep *inoref_jseg(struct inoref *);
1002
static struct jmvref *newjmvref(struct inode *, ino_t, off_t, off_t);
1003
static struct jfreeblk *newjfreeblk(struct freeblks *, ufs_lbn_t,
1005
static struct jtrunc *newjtrunc(struct freeblks *, off_t, int);
1006
static void move_newblock_dep(struct jaddref *, struct inodedep *);
1007
static void cancel_jfreeblk(struct freeblks *, ufs2_daddr_t);
1008
static struct jfreefrag *newjfreefrag(struct freefrag *, struct inode *,
1009
ufs2_daddr_t, long, ufs_lbn_t);
1010
static struct freework *newfreework(struct ufsmount *, struct freeblks *,
1011
struct freework *, ufs_lbn_t, ufs2_daddr_t, int, int, int);
1012
static int jwait(struct worklist *, int);
1013
static struct inodedep *inodedep_lookup_ip(struct inode *);
1014
static int bmsafemap_backgroundwrite(struct bmsafemap *, struct buf *);
1015
static struct freefile *handle_bufwait(struct inodedep *, struct workhead *);
1016
static void handle_jwork(struct workhead *);
1017
static struct mkdir *setup_newdir(struct diradd *, ino_t, ino_t, struct buf *,
1019
static struct jblocks *jblocks_create(void);
1020
static ufs2_daddr_t jblocks_alloc(struct jblocks *, int, int *);
1021
static void jblocks_free(struct jblocks *, struct mount *, int);
1022
static void jblocks_destroy(struct jblocks *);
1023
static void jblocks_add(struct jblocks *, ufs2_daddr_t, int);
545
1026
* Exported softdep operations.
2479
mp->mnt_flag &= ~MNT_SOFTDEP;
2480
if (MOUNTEDSUJ(mp) == 0) {
2484
mp->mnt_flag &= ~MNT_SUJ;
2486
journal_unmount(mp);
2489
static struct jblocks *
2490
jblocks_create(void)
2492
struct jblocks *jblocks;
2494
jblocks = malloc(sizeof(*jblocks), M_JBLOCKS, M_WAITOK | M_ZERO);
2495
TAILQ_INIT(&jblocks->jb_segs);
2496
jblocks->jb_avail = 10;
2497
jblocks->jb_extent = malloc(sizeof(struct jextent) * jblocks->jb_avail,
2498
M_JBLOCKS, M_WAITOK | M_ZERO);
2504
jblocks_alloc(jblocks, bytes, actual)
2505
struct jblocks *jblocks;
2510
struct jextent *jext;
2514
blocks = bytes / DEV_BSIZE;
2515
jext = &jblocks->jb_extent[jblocks->jb_head];
2516
freecnt = jext->je_blocks - jblocks->jb_off;
2518
jblocks->jb_off = 0;
2519
if (++jblocks->jb_head > jblocks->jb_used)
2520
jblocks->jb_head = 0;
2521
jext = &jblocks->jb_extent[jblocks->jb_head];
2522
freecnt = jext->je_blocks;
2524
if (freecnt > blocks)
2526
*actual = freecnt * DEV_BSIZE;
2527
daddr = jext->je_daddr + jblocks->jb_off;
2528
jblocks->jb_off += freecnt;
2529
jblocks->jb_free -= freecnt;
2535
jblocks_free(jblocks, mp, bytes)
2536
struct jblocks *jblocks;
2541
jblocks->jb_free += bytes / DEV_BSIZE;
2542
if (jblocks->jb_suspended)
2548
jblocks_destroy(jblocks)
2549
struct jblocks *jblocks;
2552
if (jblocks->jb_extent)
2553
free(jblocks->jb_extent, M_JBLOCKS);
2554
free(jblocks, M_JBLOCKS);
2558
jblocks_add(jblocks, daddr, blocks)
2559
struct jblocks *jblocks;
2563
struct jextent *jext;
2565
jblocks->jb_blocks += blocks;
2566
jblocks->jb_free += blocks;
2567
jext = &jblocks->jb_extent[jblocks->jb_used];
2568
/* Adding the first block. */
2569
if (jext->je_daddr == 0) {
2570
jext->je_daddr = daddr;
2571
jext->je_blocks = blocks;
2574
/* Extending the last extent. */
2575
if (jext->je_daddr + jext->je_blocks == daddr) {
2576
jext->je_blocks += blocks;
2579
/* Adding a new extent. */
2580
if (++jblocks->jb_used == jblocks->jb_avail) {
2581
jblocks->jb_avail *= 2;
2582
jext = malloc(sizeof(struct jextent) * jblocks->jb_avail,
2583
M_JBLOCKS, M_WAITOK | M_ZERO);
2584
memcpy(jext, jblocks->jb_extent,
2585
sizeof(struct jextent) * jblocks->jb_used);
2586
free(jblocks->jb_extent, M_JBLOCKS);
2587
jblocks->jb_extent = jext;
2589
jext = &jblocks->jb_extent[jblocks->jb_used];
2590
jext->je_daddr = daddr;
2591
jext->je_blocks = blocks;
2596
softdep_journal_lookup(mp, vpp)
2600
struct componentname cnp;
2605
error = VFS_VGET(mp, ROOTINO, LK_EXCLUSIVE, &dvp);
2608
bzero(&cnp, sizeof(cnp));
2609
cnp.cn_nameiop = LOOKUP;
2610
cnp.cn_flags = ISLASTCN;
2611
cnp.cn_thread = curthread;
2612
cnp.cn_cred = curthread->td_ucred;
2613
cnp.cn_pnbuf = SUJ_FILE;
2614
cnp.cn_nameptr = SUJ_FILE;
2615
cnp.cn_namelen = strlen(SUJ_FILE);
2616
error = ufs_lookup_ino(dvp, NULL, &cnp, &sujournal);
2620
error = VFS_VGET(mp, sujournal, LK_EXCLUSIVE, vpp);
2625
* Open and verify the journal file.
2628
journal_mount(mp, fs, cred)
2633
struct jblocks *jblocks;
2641
error = softdep_journal_lookup(mp, &vp);
2643
printf("Failed to find journal. Use tunefs to create one\n");
2647
if (ip->i_size < SUJ_MIN) {
2651
bcount = lblkno(fs, ip->i_size); /* Only use whole blocks. */
2652
jblocks = jblocks_create();
2653
for (i = 0; i < bcount; i++) {
2654
error = ufs_bmaparray(vp, i, &blkno, NULL, NULL, NULL);
2657
jblocks_add(jblocks, blkno, fsbtodb(fs, fs->fs_frag));
2660
jblocks_destroy(jblocks);
2663
jblocks->jb_low = jblocks->jb_free / 3; /* Reserve 33%. */
2664
jblocks->jb_min = jblocks->jb_free / 10; /* Suspend at 10%. */
2665
VFSTOUFS(mp)->softdep_jblocks = jblocks;
2669
mp->mnt_flag |= MNT_SUJ;
2670
mp->mnt_flag &= ~MNT_SOFTDEP;
2673
* Only validate the journal contents if the
2674
* filesystem is clean, otherwise we write the logs
2675
* but they'll never be used. If the filesystem was
2676
* still dirty when we mounted it the journal is
2677
* invalid and a new journal can only be valid if it
2678
* starts from a clean mount.
2681
DIP_SET(ip, i_modrev, fs->fs_mtime);
2682
ip->i_flags |= IN_MODIFIED;
2694
struct ufsmount *ump;
2697
if (ump->softdep_jblocks)
2698
jblocks_destroy(ump->softdep_jblocks);
2699
ump->softdep_jblocks = NULL;
2703
* Called when a journal record is ready to be written. Space is allocated
2704
* and the journal entry is created when the journal is flushed to stable
2709
struct worklist *wk;
2711
struct ufsmount *ump;
2713
mtx_assert(&lk, MA_OWNED);
2714
ump = VFSTOUFS(wk->wk_mp);
2715
if (wk->wk_state & ONWORKLIST)
2716
panic("add_to_journal: %s(0x%X) already on list",
2717
TYPENAME(wk->wk_type), wk->wk_state);
2718
wk->wk_state |= ONWORKLIST | DEPCOMPLETE;
2719
if (LIST_EMPTY(&ump->softdep_journal_pending)) {
2720
ump->softdep_jblocks->jb_age = ticks;
2721
LIST_INSERT_HEAD(&ump->softdep_journal_pending, wk, wk_list);
2723
LIST_INSERT_AFTER(ump->softdep_journal_tail, wk, wk_list);
2724
ump->softdep_journal_tail = wk;
2725
ump->softdep_on_journal += 1;
2729
* Remove an arbitrary item for the journal worklist maintain the tail
2730
* pointer. This happens when a new operation obviates the need to
2731
* journal an old operation.
2734
remove_from_journal(wk)
2735
struct worklist *wk;
2737
struct ufsmount *ump;
2739
mtx_assert(&lk, MA_OWNED);
2740
ump = VFSTOUFS(wk->wk_mp);
2743
struct worklist *wkn;
2745
LIST_FOREACH(wkn, &ump->softdep_journal_pending, wk_list)
2749
panic("remove_from_journal: %p is not in journal", wk);
2753
* We emulate a TAILQ to save space in most structures which do not
2754
* require TAILQ semantics. Here we must update the tail position
2755
* when removing the tail which is not the final entry. This works
2756
* only if the worklist linkage are at the beginning of the structure.
2758
if (ump->softdep_journal_tail == wk)
2759
ump->softdep_journal_tail =
2760
(struct worklist *)wk->wk_list.le_prev;
2762
WORKLIST_REMOVE(wk);
2763
ump->softdep_on_journal -= 1;
2767
* Check for journal space as well as dependency limits so the prelink
2768
* code can throttle both journaled and non-journaled filesystems.
2769
* Threshold is 0 for low and 1 for min.
2772
journal_space(ump, thresh)
2773
struct ufsmount *ump;
2776
struct jblocks *jblocks;
2779
jblocks = ump->softdep_jblocks;
2780
if (jblocks == NULL)
2783
* We use a tighter restriction here to prevent request_cleanup()
2784
* running in threads from running into locks we currently hold.
2786
if (dep_current[D_INODEDEP] > (max_softdeps / 10) * 9)
2789
thresh = jblocks->jb_min;
2791
thresh = jblocks->jb_low;
2792
avail = (ump->softdep_on_journal * JREC_SIZE) / DEV_BSIZE;
2793
avail = jblocks->jb_free - avail;
2795
return (avail > thresh);
2799
journal_suspend(ump)
2800
struct ufsmount *ump;
2802
struct jblocks *jblocks;
2806
jblocks = ump->softdep_jblocks;
2808
if ((mp->mnt_kern_flag & MNTK_SUSPEND) == 0) {
2810
mp->mnt_kern_flag |= MNTK_SUSPEND;
2811
mp->mnt_susp_owner = FIRST_THREAD_IN_PROC(softdepproc);
2813
jblocks->jb_suspended = 1;
2818
journal_unsuspend(struct ufsmount *ump)
2820
struct jblocks *jblocks;
2824
jblocks = ump->softdep_jblocks;
2826
if (jblocks != NULL && jblocks->jb_suspended &&
2827
journal_space(ump, jblocks->jb_min)) {
2828
jblocks->jb_suspended = 0;
2830
mp->mnt_susp_owner = curthread;
2831
vfs_write_resume(mp);
2839
* Called before any allocation function to be certain that there is
2840
* sufficient space in the journal prior to creating any new records.
2841
* Since in the case of block allocation we may have multiple locked
2842
* buffers at the time of the actual allocation we can not block
2843
* when the journal records are created. Doing so would create a deadlock
2844
* if any of these buffers needed to be flushed to reclaim space. Instead
2845
* we require a sufficiently large amount of available space such that
2846
* each thread in the system could have passed this allocation check and
2847
* still have sufficient free space. With 20% of a minimum journal size
2848
* of 1MB we have 6553 records available.
2851
softdep_prealloc(vp, waitok)
2855
struct ufsmount *ump;
2858
* Nothing to do if we are not running journaled soft updates.
2859
* If we currently hold the snapshot lock, we must avoid handling
2860
* other resources that could cause deadlock.
2862
if (DOINGSUJ(vp) == 0 || IS_SNAPSHOT(VTOI(vp)))
2864
ump = VFSTOUFS(vp->v_mount);
2866
if (journal_space(ump, 0)) {
2872
if (waitok == MNT_NOWAIT)
2875
* Attempt to sync this vnode once to flush any journal
2876
* work attached to it.
2878
if ((curthread->td_pflags & TDP_COWINPROGRESS) == 0)
2879
ffs_syncvnode(vp, waitok, 0);
2881
process_removes(vp);
2882
process_truncates(vp);
2883
if (journal_space(ump, 0) == 0) {
2885
if (journal_space(ump, 1) == 0)
2886
journal_suspend(ump);
2894
* Before adjusting a link count on a vnode verify that we have sufficient
2895
* journal space. If not, process operations that depend on the currently
2896
* locked pair of vnodes to try to flush space as the syncer, buf daemon,
2897
* and softdep flush threads can not acquire these locks to reclaim space.
2900
softdep_prelink(dvp, vp)
2904
struct ufsmount *ump;
2906
ump = VFSTOUFS(dvp->v_mount);
2907
mtx_assert(&lk, MA_OWNED);
2909
* Nothing to do if we have sufficient journal space.
2910
* If we currently hold the snapshot lock, we must avoid
2911
* handling other resources that could cause deadlock.
2913
if (journal_space(ump, 0) || (vp && IS_SNAPSHOT(VTOI(vp))))
2918
ffs_syncvnode(vp, MNT_NOWAIT, 0);
2919
ffs_syncvnode(dvp, MNT_WAIT, 0);
2921
/* Process vp before dvp as it may create .. removes. */
2923
process_removes(vp);
2924
process_truncates(vp);
2926
process_removes(dvp);
2927
process_truncates(dvp);
2929
process_worklist_item(UFSTOVFS(ump), 2, LK_NOWAIT);
2930
if (journal_space(ump, 0) == 0) {
2932
if (journal_space(ump, 1) == 0)
2933
journal_suspend(ump);
2938
jseg_write(ump, jseg, data)
2939
struct ufsmount *ump;
2943
struct jsegrec *rec;
2945
rec = (struct jsegrec *)data;
2946
rec->jsr_seq = jseg->js_seq;
2947
rec->jsr_oldest = jseg->js_oldseq;
2948
rec->jsr_cnt = jseg->js_cnt;
2949
rec->jsr_blocks = jseg->js_size / ump->um_devvp->v_bufobj.bo_bsize;
2951
rec->jsr_time = ump->um_fs->fs_mtime;
2955
inoref_write(inoref, jseg, rec)
2956
struct inoref *inoref;
2958
struct jrefrec *rec;
2961
inoref->if_jsegdep->jd_seg = jseg;
2962
rec->jr_ino = inoref->if_ino;
2963
rec->jr_parent = inoref->if_parent;
2964
rec->jr_nlink = inoref->if_nlink;
2965
rec->jr_mode = inoref->if_mode;
2966
rec->jr_diroff = inoref->if_diroff;
2970
jaddref_write(jaddref, jseg, data)
2971
struct jaddref *jaddref;
2975
struct jrefrec *rec;
2977
rec = (struct jrefrec *)data;
2978
rec->jr_op = JOP_ADDREF;
2979
inoref_write(&jaddref->ja_ref, jseg, rec);
2983
jremref_write(jremref, jseg, data)
2984
struct jremref *jremref;
2988
struct jrefrec *rec;
2990
rec = (struct jrefrec *)data;
2991
rec->jr_op = JOP_REMREF;
2992
inoref_write(&jremref->jr_ref, jseg, rec);
2996
jmvref_write(jmvref, jseg, data)
2997
struct jmvref *jmvref;
3003
rec = (struct jmvrec *)data;
3004
rec->jm_op = JOP_MVREF;
3005
rec->jm_ino = jmvref->jm_ino;
3006
rec->jm_parent = jmvref->jm_parent;
3007
rec->jm_oldoff = jmvref->jm_oldoff;
3008
rec->jm_newoff = jmvref->jm_newoff;
3012
jnewblk_write(jnewblk, jseg, data)
3013
struct jnewblk *jnewblk;
3017
struct jblkrec *rec;
3019
jnewblk->jn_jsegdep->jd_seg = jseg;
3020
rec = (struct jblkrec *)data;
3021
rec->jb_op = JOP_NEWBLK;
3022
rec->jb_ino = jnewblk->jn_ino;
3023
rec->jb_blkno = jnewblk->jn_blkno;
3024
rec->jb_lbn = jnewblk->jn_lbn;
3025
rec->jb_frags = jnewblk->jn_frags;
3026
rec->jb_oldfrags = jnewblk->jn_oldfrags;
3030
jfreeblk_write(jfreeblk, jseg, data)
3031
struct jfreeblk *jfreeblk;
3035
struct jblkrec *rec;
3037
jfreeblk->jf_dep.jb_jsegdep->jd_seg = jseg;
3038
rec = (struct jblkrec *)data;
3039
rec->jb_op = JOP_FREEBLK;
3040
rec->jb_ino = jfreeblk->jf_ino;
3041
rec->jb_blkno = jfreeblk->jf_blkno;
3042
rec->jb_lbn = jfreeblk->jf_lbn;
3043
rec->jb_frags = jfreeblk->jf_frags;
3044
rec->jb_oldfrags = 0;
3048
jfreefrag_write(jfreefrag, jseg, data)
3049
struct jfreefrag *jfreefrag;
3053
struct jblkrec *rec;
3055
jfreefrag->fr_jsegdep->jd_seg = jseg;
3056
rec = (struct jblkrec *)data;
3057
rec->jb_op = JOP_FREEBLK;
3058
rec->jb_ino = jfreefrag->fr_ino;
3059
rec->jb_blkno = jfreefrag->fr_blkno;
3060
rec->jb_lbn = jfreefrag->fr_lbn;
3061
rec->jb_frags = jfreefrag->fr_frags;
3062
rec->jb_oldfrags = 0;
3066
jtrunc_write(jtrunc, jseg, data)
3067
struct jtrunc *jtrunc;
3071
struct jtrncrec *rec;
3073
jtrunc->jt_dep.jb_jsegdep->jd_seg = jseg;
3074
rec = (struct jtrncrec *)data;
3075
rec->jt_op = JOP_TRUNC;
3076
rec->jt_ino = jtrunc->jt_ino;
3077
rec->jt_size = jtrunc->jt_size;
3078
rec->jt_extsize = jtrunc->jt_extsize;
3082
jfsync_write(jfsync, jseg, data)
3083
struct jfsync *jfsync;
3087
struct jtrncrec *rec;
3089
rec = (struct jtrncrec *)data;
3090
rec->jt_op = JOP_SYNC;
3091
rec->jt_ino = jfsync->jfs_ino;
3092
rec->jt_size = jfsync->jfs_size;
3093
rec->jt_extsize = jfsync->jfs_extsize;
3097
softdep_flushjournal(mp)
3100
struct jblocks *jblocks;
3101
struct ufsmount *ump;
3103
if (MOUNTEDSUJ(mp) == 0)
3106
jblocks = ump->softdep_jblocks;
3108
while (ump->softdep_on_journal) {
3109
jblocks->jb_needseg = 1;
3110
softdep_process_journal(mp, NULL, MNT_WAIT);
3115
static void softdep_synchronize_completed(struct bio *);
3116
static void softdep_synchronize(struct bio *, struct ufsmount *, void *);
3119
softdep_synchronize_completed(bp)
3122
struct jseg *oldest;
3126
* caller1 marks the last segment written before we issued the
3127
* synchronize cache.
3129
jseg = bp->bio_caller1;
3133
* Mark all the journal entries waiting on the synchronize cache
3134
* as completed so they may continue on.
3136
while (jseg != NULL && (jseg->js_state & COMPLETE) == 0) {
3137
jseg->js_state |= COMPLETE;
3139
jseg = TAILQ_PREV(jseg, jseglst, js_next);
3142
* Restart deferred journal entry processing from the oldest
3146
complete_jsegs(oldest);
3153
* Send BIO_FLUSH/SYNCHRONIZE CACHE to the device to enforce write ordering
3154
* barriers. The journal must be written prior to any blocks that depend
3155
* on it and the journal can not be released until the blocks have be
3156
* written. This code handles both barriers simultaneously.
3159
softdep_synchronize(bp, ump, caller1)
3161
struct ufsmount *ump;
3165
bp->bio_cmd = BIO_FLUSH;
3166
bp->bio_flags |= BIO_ORDERED;
3167
bp->bio_data = NULL;
3168
bp->bio_offset = ump->um_cp->provider->mediasize;
3170
bp->bio_done = softdep_synchronize_completed;
3171
bp->bio_caller1 = caller1;
3173
(struct g_consumer *)ump->um_devvp->v_bufobj.bo_private);
3177
* Flush some journal records to disk.
3180
softdep_process_journal(mp, needwk, flags)
3182
struct worklist *needwk;
3185
struct jblocks *jblocks;
3186
struct ufsmount *ump;
3187
struct worklist *wk;
3195
int jrecmin; /* Minimum records per block. */
3196
int jrecmax; /* Maximum records per block. */
3202
if (MOUNTEDSUJ(mp) == 0)
3204
shouldflush = softdep_flushcache;
3209
jblocks = ump->softdep_jblocks;
3210
devbsize = ump->um_devvp->v_bufobj.bo_bsize;
3212
* We write anywhere between a disk block and fs block. The upper
3213
* bound is picked to prevent buffer cache fragmentation and limit
3214
* processing time per I/O.
3216
jrecmin = (devbsize / JREC_SIZE) - 1; /* -1 for seg header */
3217
jrecmax = (fs->fs_bsize / devbsize) * jrecmin;
3220
cnt = ump->softdep_on_journal;
3222
* Criteria for writing a segment:
3223
* 1) We have a full block.
3224
* 2) We're called from jwait() and haven't found the
3226
* 3) Always write if needseg is set.
3227
* 4) If we are called from process_worklist and have
3228
* not yet written anything we write a partial block
3229
* to enforce a 1 second maximum latency on journal
3232
if (cnt < (jrecmax - 1) && needwk == NULL &&
3233
jblocks->jb_needseg == 0 && (segwritten || cnt == 0))
3237
* Verify some free journal space. softdep_prealloc() should
3238
* guarantee that we don't run out so this is indicative of
3239
* a problem with the flow control. Try to recover
3240
* gracefully in any event.
3242
while (jblocks->jb_free == 0) {
3243
if (flags != MNT_WAIT)
3245
printf("softdep: Out of journal space!\n");
3247
msleep(jblocks, &lk, PRIBIO, "jblocks", hz);
3250
jseg = malloc(sizeof(*jseg), M_JSEG, M_SOFTDEP_FLAGS);
3251
workitem_alloc(&jseg->js_list, D_JSEG, mp);
3252
LIST_INIT(&jseg->js_entries);
3253
LIST_INIT(&jseg->js_indirs);
3254
jseg->js_state = ATTACHED;
3255
if (shouldflush == 0)
3256
jseg->js_state |= COMPLETE;
3257
else if (bio == NULL)
3258
bio = g_alloc_bio();
3259
jseg->js_jblocks = jblocks;
3260
bp = geteblk(fs->fs_bsize, 0);
3263
* If there was a race while we were allocating the block
3264
* and jseg the entry we care about was likely written.
3265
* We bail out in both the WAIT and NOWAIT case and assume
3266
* the caller will loop if the entry it cares about is
3269
cnt = ump->softdep_on_journal;
3270
if (cnt + jblocks->jb_needseg == 0 || jblocks->jb_free == 0) {
3271
bp->b_flags |= B_INVAL | B_NOCACHE;
3272
WORKITEM_FREE(jseg, D_JSEG);
3279
* Calculate the disk block size required for the available
3280
* records rounded to the min size.
3284
else if (cnt < jrecmax)
3285
size = howmany(cnt, jrecmin) * devbsize;
3287
size = fs->fs_bsize;
3289
* Allocate a disk block for this journal data and account
3290
* for truncation of the requested size if enough contiguous
3291
* space was not available.
3293
bp->b_blkno = jblocks_alloc(jblocks, size, &size);
3294
bp->b_lblkno = bp->b_blkno;
3295
bp->b_offset = bp->b_blkno * DEV_BSIZE;
3296
bp->b_bcount = size;
3297
bp->b_bufobj = &ump->um_devvp->v_bufobj;
3298
bp->b_flags &= ~B_INVAL;
3299
bp->b_flags |= B_VALIDSUSPWRT | B_NOCOPY;
3301
* Initialize our jseg with cnt records. Assign the next
3302
* sequence number to it and link it in-order.
3304
cnt = MIN(cnt, (size / devbsize) * jrecmin);
3307
jseg->js_refs = cnt + 1; /* Self ref. */
3308
jseg->js_size = size;
3309
jseg->js_seq = jblocks->jb_nextseq++;
3310
if (jblocks->jb_oldestseg == NULL)
3311
jblocks->jb_oldestseg = jseg;
3312
jseg->js_oldseq = jblocks->jb_oldestseg->js_seq;
3313
TAILQ_INSERT_TAIL(&jblocks->jb_segs, jseg, js_next);
3314
if (jblocks->jb_writeseg == NULL)
3315
jblocks->jb_writeseg = jseg;
3317
* Start filling in records from the pending list.
3321
while ((wk = LIST_FIRST(&ump->softdep_journal_pending))
3325
/* Place a segment header on every device block. */
3326
if ((off % devbsize) == 0) {
3327
jseg_write(ump, jseg, data);
3329
data = bp->b_data + off;
3333
remove_from_journal(wk);
3334
wk->wk_state |= INPROGRESS;
3335
WORKLIST_INSERT(&jseg->js_entries, wk);
3336
switch (wk->wk_type) {
3338
jaddref_write(WK_JADDREF(wk), jseg, data);
3341
jremref_write(WK_JREMREF(wk), jseg, data);
3344
jmvref_write(WK_JMVREF(wk), jseg, data);
3347
jnewblk_write(WK_JNEWBLK(wk), jseg, data);
3350
jfreeblk_write(WK_JFREEBLK(wk), jseg, data);
3353
jfreefrag_write(WK_JFREEFRAG(wk), jseg, data);
3356
jtrunc_write(WK_JTRUNC(wk), jseg, data);
3359
jfsync_write(WK_JFSYNC(wk), jseg, data);
3362
panic("process_journal: Unknown type %s",
3363
TYPENAME(wk->wk_type));
3367
data = bp->b_data + off;
3371
* Write this one buffer and continue.
3374
jblocks->jb_needseg = 0;
3375
WORKLIST_INSERT(&bp->b_dep, &jseg->js_list);
3377
BO_LOCK(bp->b_bufobj);
3378
bgetvp(ump->um_devvp, bp);
3379
BO_UNLOCK(bp->b_bufobj);
3381
* We only do the blocking wait once we find the journal
3382
* entry we're looking for.
3384
if (needwk == NULL && flags == MNT_WAIT)
3391
* If we wrote a segment issue a synchronize cache so the journal
3392
* is reflected on disk before the data is written. Since reclaiming
3393
* journal space also requires writing a journal record this
3394
* process also enforces a barrier before reclamation.
3396
if (segwritten && shouldflush) {
3397
softdep_synchronize(bio, ump,
3398
TAILQ_LAST(&jblocks->jb_segs, jseglst));
3402
* If we've suspended the filesystem because we ran out of journal
3403
* space either try to sync it here to make some progress or
3404
* unsuspend it if we already have.
3406
if (flags == 0 && jblocks->jb_suspended) {
3407
if (journal_unsuspend(ump))
3410
VFS_SYNC(mp, MNT_NOWAIT);
3411
ffs_sbupdate(ump, MNT_WAIT, 0);
3417
* Complete a jseg, allowing all dependencies awaiting journal writes
3418
* to proceed. Each journal dependency also attaches a jsegdep to dependent
3419
* structures so that the journal segment can be freed to reclaim space.
3425
struct worklist *wk;
3426
struct jmvref *jmvref;
3432
while ((wk = LIST_FIRST(&jseg->js_entries)) != NULL) {
3433
WORKLIST_REMOVE(wk);
3434
waiting = wk->wk_state & IOWAITING;
3435
wk->wk_state &= ~(INPROGRESS | IOWAITING);
3436
wk->wk_state |= COMPLETE;
3437
KASSERT(i++ < jseg->js_cnt,
3438
("handle_written_jseg: overflow %d >= %d",
3439
i - 1, jseg->js_cnt));
3440
switch (wk->wk_type) {
3442
handle_written_jaddref(WK_JADDREF(wk));
3445
handle_written_jremref(WK_JREMREF(wk));
3448
rele_jseg(jseg); /* No jsegdep. */
3449
jmvref = WK_JMVREF(wk);
3450
LIST_REMOVE(jmvref, jm_deps);
3451
if ((jmvref->jm_pagedep->pd_state & ONWORKLIST) == 0)
3452
free_pagedep(jmvref->jm_pagedep);
3453
WORKITEM_FREE(jmvref, D_JMVREF);
3456
handle_written_jnewblk(WK_JNEWBLK(wk));
3459
handle_written_jblkdep(&WK_JFREEBLK(wk)->jf_dep);
3462
handle_written_jblkdep(&WK_JTRUNC(wk)->jt_dep);
3465
rele_jseg(jseg); /* No jsegdep. */
3466
WORKITEM_FREE(wk, D_JFSYNC);
3469
handle_written_jfreefrag(WK_JFREEFRAG(wk));
3472
panic("handle_written_jseg: Unknown type %s",
3473
TYPENAME(wk->wk_type));
3479
/* Release the self reference so the structure may be freed. */
3484
* Determine which jsegs are ready for completion processing. Waits for
3485
* synchronize cache to complete as well as forcing in-order completion
3486
* of journal entries.
3489
complete_jsegs(jseg)
3492
struct jblocks *jblocks;
3495
jblocks = jseg->js_jblocks;
3497
* Don't allow out of order completions. If this isn't the first
3498
* block wait for it to write before we're done.
3500
if (jseg != jblocks->jb_writeseg)
3502
/* Iterate through available jsegs processing their entries. */
3503
while (jseg && (jseg->js_state & ALLCOMPLETE) == ALLCOMPLETE) {
3504
jblocks->jb_oldestwrseq = jseg->js_oldseq;
3505
jsegn = TAILQ_NEXT(jseg, js_next);
3506
complete_jseg(jseg);
3509
jblocks->jb_writeseg = jseg;
3511
* Attempt to free jsegs now that oldestwrseq may have advanced.
3513
free_jsegs(jblocks);
3517
* Mark a jseg as DEPCOMPLETE and throw away the buffer. Attempt to handle
3518
* the final completions.
3521
handle_written_jseg(jseg, bp)
3526
if (jseg->js_refs == 0)
3527
panic("handle_written_jseg: No self-reference on %p", jseg);
3528
jseg->js_state |= DEPCOMPLETE;
3530
* We'll never need this buffer again, set flags so it will be
3533
bp->b_flags |= B_INVAL | B_NOCACHE;
3534
complete_jsegs(jseg);
3537
static inline struct jsegdep *
3539
struct inoref *inoref;
3541
struct jsegdep *jsegdep;
3543
jsegdep = inoref->if_jsegdep;
3544
inoref->if_jsegdep = NULL;
3550
* Called once a jremref has made it to stable store. The jremref is marked
3551
* complete and we attempt to free it. Any pagedeps writes sleeping waiting
3552
* for the jremref to complete will be awoken by free_jremref.
3555
handle_written_jremref(jremref)
3556
struct jremref *jremref;
3558
struct inodedep *inodedep;
3559
struct jsegdep *jsegdep;
3560
struct dirrem *dirrem;
3562
/* Grab the jsegdep. */
3563
jsegdep = inoref_jseg(&jremref->jr_ref);
3565
* Remove us from the inoref list.
3567
if (inodedep_lookup(jremref->jr_list.wk_mp, jremref->jr_ref.if_ino,
3569
panic("handle_written_jremref: Lost inodedep");
3570
TAILQ_REMOVE(&inodedep->id_inoreflst, &jremref->jr_ref, if_deps);
3572
* Complete the dirrem.
3574
dirrem = jremref->jr_dirrem;
3575
jremref->jr_dirrem = NULL;
3576
LIST_REMOVE(jremref, jr_deps);
3577
jsegdep->jd_state |= jremref->jr_state & MKDIR_PARENT;
3578
jwork_insert(&dirrem->dm_jwork, jsegdep);
3579
if (LIST_EMPTY(&dirrem->dm_jremrefhd) &&
3580
(dirrem->dm_state & COMPLETE) != 0)
3581
add_to_worklist(&dirrem->dm_list, 0);
3582
free_jremref(jremref);
3586
* Called once a jaddref has made it to stable store. The dependency is
3587
* marked complete and any dependent structures are added to the inode
3588
* bufwait list to be completed as soon as it is written. If a bitmap write
3589
* depends on this entry we move the inode into the inodedephd of the
3590
* bmsafemap dependency and attempt to remove the jaddref from the bmsafemap.
3593
handle_written_jaddref(jaddref)
3594
struct jaddref *jaddref;
3596
struct jsegdep *jsegdep;
3597
struct inodedep *inodedep;
3598
struct diradd *diradd;
3599
struct mkdir *mkdir;
3601
/* Grab the jsegdep. */
3602
jsegdep = inoref_jseg(&jaddref->ja_ref);
3605
if (inodedep_lookup(jaddref->ja_list.wk_mp, jaddref->ja_ino,
3607
panic("handle_written_jaddref: Lost inodedep.");
3608
if (jaddref->ja_diradd == NULL)
3609
panic("handle_written_jaddref: No dependency");
3610
if (jaddref->ja_diradd->da_list.wk_type == D_DIRADD) {
3611
diradd = jaddref->ja_diradd;
3612
WORKLIST_INSERT(&inodedep->id_bufwait, &diradd->da_list);
3613
} else if (jaddref->ja_state & MKDIR_PARENT) {
3614
mkdir = jaddref->ja_mkdir;
3615
WORKLIST_INSERT(&inodedep->id_bufwait, &mkdir->md_list);
3616
} else if (jaddref->ja_state & MKDIR_BODY)
3617
mkdir = jaddref->ja_mkdir;
3619
panic("handle_written_jaddref: Unknown dependency %p",
3620
jaddref->ja_diradd);
3621
jaddref->ja_diradd = NULL; /* also clears ja_mkdir */
3623
* Remove us from the inode list.
3625
TAILQ_REMOVE(&inodedep->id_inoreflst, &jaddref->ja_ref, if_deps);
3627
* The mkdir may be waiting on the jaddref to clear before freeing.
3630
KASSERT(mkdir->md_list.wk_type == D_MKDIR,
3631
("handle_written_jaddref: Incorrect type for mkdir %s",
3632
TYPENAME(mkdir->md_list.wk_type)));
3633
mkdir->md_jaddref = NULL;
3634
diradd = mkdir->md_diradd;
3635
mkdir->md_state |= DEPCOMPLETE;
3636
complete_mkdir(mkdir);
3638
jwork_insert(&diradd->da_jwork, jsegdep);
3639
if (jaddref->ja_state & NEWBLOCK) {
3640
inodedep->id_state |= ONDEPLIST;
3641
LIST_INSERT_HEAD(&inodedep->id_bmsafemap->sm_inodedephd,
3644
free_jaddref(jaddref);
3648
* Called once a jnewblk journal is written. The allocdirect or allocindir
3649
* is placed in the bmsafemap to await notification of a written bitmap. If
3650
* the operation was canceled we add the segdep to the appropriate
3651
* dependency to free the journal space once the canceling operation
3655
handle_written_jnewblk(jnewblk)
3656
struct jnewblk *jnewblk;
3658
struct bmsafemap *bmsafemap;
3659
struct freefrag *freefrag;
3660
struct freework *freework;
3661
struct jsegdep *jsegdep;
3662
struct newblk *newblk;
3664
/* Grab the jsegdep. */
3665
jsegdep = jnewblk->jn_jsegdep;
3666
jnewblk->jn_jsegdep = NULL;
3667
if (jnewblk->jn_dep == NULL)
3668
panic("handle_written_jnewblk: No dependency for the segdep.");
3669
switch (jnewblk->jn_dep->wk_type) {
3674
* Add the written block to the bmsafemap so it can
3675
* be notified when the bitmap is on disk.
3677
newblk = WK_NEWBLK(jnewblk->jn_dep);
3678
newblk->nb_jnewblk = NULL;
3679
if ((newblk->nb_state & GOINGAWAY) == 0) {
3680
bmsafemap = newblk->nb_bmsafemap;
3681
newblk->nb_state |= ONDEPLIST;
3682
LIST_INSERT_HEAD(&bmsafemap->sm_newblkhd, newblk,
3685
jwork_insert(&newblk->nb_jwork, jsegdep);
3689
* A newblock being removed by a freefrag when replaced by
3692
freefrag = WK_FREEFRAG(jnewblk->jn_dep);
3693
freefrag->ff_jdep = NULL;
3694
jwork_insert(&freefrag->ff_jwork, jsegdep);
3698
* A direct block was removed by truncate.
3700
freework = WK_FREEWORK(jnewblk->jn_dep);
3701
freework->fw_jnewblk = NULL;
3702
jwork_insert(&freework->fw_freeblks->fb_jwork, jsegdep);
3705
panic("handle_written_jnewblk: Unknown type %d.",
3706
jnewblk->jn_dep->wk_type);
3708
jnewblk->jn_dep = NULL;
3709
free_jnewblk(jnewblk);
3713
* Cancel a jfreefrag that won't be needed, probably due to colliding with
3714
* an in-flight allocation that has not yet been committed. Divorce us
3715
* from the freefrag and mark it DEPCOMPLETE so that it may be added
3719
cancel_jfreefrag(jfreefrag)
3720
struct jfreefrag *jfreefrag;
3722
struct freefrag *freefrag;
3724
if (jfreefrag->fr_jsegdep) {
3725
free_jsegdep(jfreefrag->fr_jsegdep);
3726
jfreefrag->fr_jsegdep = NULL;
3728
freefrag = jfreefrag->fr_freefrag;
3729
jfreefrag->fr_freefrag = NULL;
3730
free_jfreefrag(jfreefrag);
3731
freefrag->ff_state |= DEPCOMPLETE;
3732
CTR1(KTR_SUJ, "cancel_jfreefrag: blkno %jd", freefrag->ff_blkno);
3736
* Free a jfreefrag when the parent freefrag is rendered obsolete.
3739
free_jfreefrag(jfreefrag)
3740
struct jfreefrag *jfreefrag;
3743
if (jfreefrag->fr_state & INPROGRESS)
3744
WORKLIST_REMOVE(&jfreefrag->fr_list);
3745
else if (jfreefrag->fr_state & ONWORKLIST)
3746
remove_from_journal(&jfreefrag->fr_list);
3747
if (jfreefrag->fr_freefrag != NULL)
3748
panic("free_jfreefrag: Still attached to a freefrag.");
3749
WORKITEM_FREE(jfreefrag, D_JFREEFRAG);
3753
* Called when the journal write for a jfreefrag completes. The parent
3754
* freefrag is added to the worklist if this completes its dependencies.
3757
handle_written_jfreefrag(jfreefrag)
3758
struct jfreefrag *jfreefrag;
3760
struct jsegdep *jsegdep;
3761
struct freefrag *freefrag;
3763
/* Grab the jsegdep. */
3764
jsegdep = jfreefrag->fr_jsegdep;
3765
jfreefrag->fr_jsegdep = NULL;
3766
freefrag = jfreefrag->fr_freefrag;
3767
if (freefrag == NULL)
3768
panic("handle_written_jfreefrag: No freefrag.");
3769
freefrag->ff_state |= DEPCOMPLETE;
3770
freefrag->ff_jdep = NULL;
3771
jwork_insert(&freefrag->ff_jwork, jsegdep);
3772
if ((freefrag->ff_state & ALLCOMPLETE) == ALLCOMPLETE)
3773
add_to_worklist(&freefrag->ff_list, 0);
3774
jfreefrag->fr_freefrag = NULL;
3775
free_jfreefrag(jfreefrag);
3779
* Called when the journal write for a jfreeblk completes. The jfreeblk
3780
* is removed from the freeblks list of pending journal writes and the
3781
* jsegdep is moved to the freeblks jwork to be completed when all blocks
3782
* have been reclaimed.
3785
handle_written_jblkdep(jblkdep)
3786
struct jblkdep *jblkdep;
3788
struct freeblks *freeblks;
3789
struct jsegdep *jsegdep;
3791
/* Grab the jsegdep. */
3792
jsegdep = jblkdep->jb_jsegdep;
3793
jblkdep->jb_jsegdep = NULL;
3794
freeblks = jblkdep->jb_freeblks;
3795
LIST_REMOVE(jblkdep, jb_deps);
3796
jwork_insert(&freeblks->fb_jwork, jsegdep);
3798
* If the freeblks is all journaled, we can add it to the worklist.
3800
if (LIST_EMPTY(&freeblks->fb_jblkdephd) &&
3801
(freeblks->fb_state & ALLCOMPLETE) == ALLCOMPLETE)
3802
add_to_worklist(&freeblks->fb_list, WK_NODELAY);
3804
free_jblkdep(jblkdep);
3807
static struct jsegdep *
3808
newjsegdep(struct worklist *wk)
3810
struct jsegdep *jsegdep;
3812
jsegdep = malloc(sizeof(*jsegdep), M_JSEGDEP, M_SOFTDEP_FLAGS);
3813
workitem_alloc(&jsegdep->jd_list, D_JSEGDEP, wk->wk_mp);
3814
jsegdep->jd_seg = NULL;
3819
static struct jmvref *
3820
newjmvref(dp, ino, oldoff, newoff)
3826
struct jmvref *jmvref;
3828
jmvref = malloc(sizeof(*jmvref), M_JMVREF, M_SOFTDEP_FLAGS);
3829
workitem_alloc(&jmvref->jm_list, D_JMVREF, UFSTOVFS(dp->i_ump));
3830
jmvref->jm_list.wk_state = ATTACHED | DEPCOMPLETE;
3831
jmvref->jm_parent = dp->i_number;
3832
jmvref->jm_ino = ino;
3833
jmvref->jm_oldoff = oldoff;
3834
jmvref->jm_newoff = newoff;
3840
* Allocate a new jremref that tracks the removal of ip from dp with the
3841
* directory entry offset of diroff. Mark the entry as ATTACHED and
3842
* DEPCOMPLETE as we have all the information required for the journal write
3843
* and the directory has already been removed from the buffer. The caller
3844
* is responsible for linking the jremref into the pagedep and adding it
3845
* to the journal to write. The MKDIR_PARENT flag is set if we're doing
3846
* a DOTDOT addition so handle_workitem_remove() can properly assign
3847
* the jsegdep when we're done.
3849
static struct jremref *
3850
newjremref(struct dirrem *dirrem, struct inode *dp, struct inode *ip,
3851
off_t diroff, nlink_t nlink)
3853
struct jremref *jremref;
3855
jremref = malloc(sizeof(*jremref), M_JREMREF, M_SOFTDEP_FLAGS);
3856
workitem_alloc(&jremref->jr_list, D_JREMREF, UFSTOVFS(dp->i_ump));
3857
jremref->jr_state = ATTACHED;
3858
newinoref(&jremref->jr_ref, ip->i_number, dp->i_number, diroff,
3860
jremref->jr_dirrem = dirrem;
3866
newinoref(struct inoref *inoref, ino_t ino, ino_t parent, off_t diroff,
3867
nlink_t nlink, uint16_t mode)
3870
inoref->if_jsegdep = newjsegdep(&inoref->if_list);
3871
inoref->if_diroff = diroff;
3872
inoref->if_ino = ino;
3873
inoref->if_parent = parent;
3874
inoref->if_nlink = nlink;
3875
inoref->if_mode = mode;
3879
* Allocate a new jaddref to track the addition of ino to dp at diroff. The
3880
* directory offset may not be known until later. The caller is responsible
3881
* adding the entry to the journal when this information is available. nlink
3882
* should be the link count prior to the addition and mode is only required
3883
* to have the correct FMT.
3885
static struct jaddref *
3886
newjaddref(struct inode *dp, ino_t ino, off_t diroff, int16_t nlink,
3889
struct jaddref *jaddref;
3891
jaddref = malloc(sizeof(*jaddref), M_JADDREF, M_SOFTDEP_FLAGS);
3892
workitem_alloc(&jaddref->ja_list, D_JADDREF, UFSTOVFS(dp->i_ump));
3893
jaddref->ja_state = ATTACHED;
3894
jaddref->ja_mkdir = NULL;
3895
newinoref(&jaddref->ja_ref, ino, dp->i_number, diroff, nlink, mode);
3901
* Create a new free dependency for a freework. The caller is responsible
3902
* for adjusting the reference count when it has the lock held. The freedep
3903
* will track an outstanding bitmap write that will ultimately clear the
3904
* freework to continue.
3906
static struct freedep *
3907
newfreedep(struct freework *freework)
3909
struct freedep *freedep;
3911
freedep = malloc(sizeof(*freedep), M_FREEDEP, M_SOFTDEP_FLAGS);
3912
workitem_alloc(&freedep->fd_list, D_FREEDEP, freework->fw_list.wk_mp);
3913
freedep->fd_freework = freework;
3919
* Free a freedep structure once the buffer it is linked to is written. If
3920
* this is the last reference to the freework schedule it for completion.
3923
free_freedep(freedep)
3924
struct freedep *freedep;
3926
struct freework *freework;
3928
freework = freedep->fd_freework;
3929
freework->fw_freeblks->fb_cgwait--;
3930
if (--freework->fw_ref == 0)
3931
freework_enqueue(freework);
3932
WORKITEM_FREE(freedep, D_FREEDEP);
3936
* Allocate a new freework structure that may be a level in an indirect
3937
* when parent is not NULL or a top level block when it is. The top level
3938
* freework structures are allocated without lk held and before the freeblks
3939
* is visible outside of softdep_setup_freeblocks().
3941
static struct freework *
3942
newfreework(ump, freeblks, parent, lbn, nb, frags, off, journal)
3943
struct ufsmount *ump;
3944
struct freeblks *freeblks;
3945
struct freework *parent;
3952
struct freework *freework;
3954
freework = malloc(sizeof(*freework), M_FREEWORK, M_SOFTDEP_FLAGS);
3955
workitem_alloc(&freework->fw_list, D_FREEWORK, freeblks->fb_list.wk_mp);
3956
freework->fw_state = ATTACHED;
3957
freework->fw_jnewblk = NULL;
3958
freework->fw_freeblks = freeblks;
3959
freework->fw_parent = parent;
3960
freework->fw_lbn = lbn;
3961
freework->fw_blkno = nb;
3962
freework->fw_frags = frags;
3963
freework->fw_indir = NULL;
3964
freework->fw_ref = (MOUNTEDSUJ(UFSTOVFS(ump)) == 0 || lbn >= -NXADDR)
3965
? 0 : NINDIR(ump->um_fs) + 1;
3966
freework->fw_start = freework->fw_off = off;
3968
newjfreeblk(freeblks, lbn, nb, frags);
3969
if (parent == NULL) {
3971
WORKLIST_INSERT(&freeblks->fb_freeworkhd, &freework->fw_list);
3980
* Eliminate a jfreeblk for a block that does not need journaling.
3983
cancel_jfreeblk(freeblks, blkno)
3984
struct freeblks *freeblks;
3987
struct jfreeblk *jfreeblk;
3988
struct jblkdep *jblkdep;
3990
LIST_FOREACH(jblkdep, &freeblks->fb_jblkdephd, jb_deps) {
3991
if (jblkdep->jb_list.wk_type != D_JFREEBLK)
3993
jfreeblk = WK_JFREEBLK(&jblkdep->jb_list);
3994
if (jfreeblk->jf_blkno == blkno)
3997
if (jblkdep == NULL)
3999
CTR1(KTR_SUJ, "cancel_jfreeblk: blkno %jd", blkno);
4000
free_jsegdep(jblkdep->jb_jsegdep);
4001
LIST_REMOVE(jblkdep, jb_deps);
4002
WORKITEM_FREE(jfreeblk, D_JFREEBLK);
4006
* Allocate a new jfreeblk to journal top level block pointer when truncating
4007
* a file. The caller must add this to the worklist when lk is held.
4009
static struct jfreeblk *
4010
newjfreeblk(freeblks, lbn, blkno, frags)
4011
struct freeblks *freeblks;
4016
struct jfreeblk *jfreeblk;
4018
jfreeblk = malloc(sizeof(*jfreeblk), M_JFREEBLK, M_SOFTDEP_FLAGS);
4019
workitem_alloc(&jfreeblk->jf_dep.jb_list, D_JFREEBLK,
4020
freeblks->fb_list.wk_mp);
4021
jfreeblk->jf_dep.jb_jsegdep = newjsegdep(&jfreeblk->jf_dep.jb_list);
4022
jfreeblk->jf_dep.jb_freeblks = freeblks;
4023
jfreeblk->jf_ino = freeblks->fb_inum;
4024
jfreeblk->jf_lbn = lbn;
4025
jfreeblk->jf_blkno = blkno;
4026
jfreeblk->jf_frags = frags;
4027
LIST_INSERT_HEAD(&freeblks->fb_jblkdephd, &jfreeblk->jf_dep, jb_deps);
4033
* Allocate a new jtrunc to track a partial truncation.
4035
static struct jtrunc *
4036
newjtrunc(freeblks, size, extsize)
4037
struct freeblks *freeblks;
4041
struct jtrunc *jtrunc;
4043
jtrunc = malloc(sizeof(*jtrunc), M_JTRUNC, M_SOFTDEP_FLAGS);
4044
workitem_alloc(&jtrunc->jt_dep.jb_list, D_JTRUNC,
4045
freeblks->fb_list.wk_mp);
4046
jtrunc->jt_dep.jb_jsegdep = newjsegdep(&jtrunc->jt_dep.jb_list);
4047
jtrunc->jt_dep.jb_freeblks = freeblks;
4048
jtrunc->jt_ino = freeblks->fb_inum;
4049
jtrunc->jt_size = size;
4050
jtrunc->jt_extsize = extsize;
4051
LIST_INSERT_HEAD(&freeblks->fb_jblkdephd, &jtrunc->jt_dep, jb_deps);
4057
* If we're canceling a new bitmap we have to search for another ref
4058
* to move into the bmsafemap dep. This might be better expressed
4059
* with another structure.
4062
move_newblock_dep(jaddref, inodedep)
4063
struct jaddref *jaddref;
4064
struct inodedep *inodedep;
4066
struct inoref *inoref;
4067
struct jaddref *jaddrefn;
4070
for (inoref = TAILQ_NEXT(&jaddref->ja_ref, if_deps); inoref;
4071
inoref = TAILQ_NEXT(inoref, if_deps)) {
4072
if ((jaddref->ja_state & NEWBLOCK) &&
4073
inoref->if_list.wk_type == D_JADDREF) {
4074
jaddrefn = (struct jaddref *)inoref;
4078
if (jaddrefn == NULL)
4080
jaddrefn->ja_state &= ~(ATTACHED | UNDONE);
4081
jaddrefn->ja_state |= jaddref->ja_state &
4082
(ATTACHED | UNDONE | NEWBLOCK);
4083
jaddref->ja_state &= ~(ATTACHED | UNDONE | NEWBLOCK);
4084
jaddref->ja_state |= ATTACHED;
4085
LIST_REMOVE(jaddref, ja_bmdeps);
4086
LIST_INSERT_HEAD(&inodedep->id_bmsafemap->sm_jaddrefhd, jaddrefn,
4091
* Cancel a jaddref either before it has been written or while it is being
4092
* written. This happens when a link is removed before the add reaches
4093
* the disk. The jaddref dependency is kept linked into the bmsafemap
4094
* and inode to prevent the link count or bitmap from reaching the disk
4095
* until handle_workitem_remove() re-adjusts the counts and bitmaps as
4098
* Returns 1 if the canceled addref requires journaling of the remove and
4102
cancel_jaddref(jaddref, inodedep, wkhd)
4103
struct jaddref *jaddref;
4104
struct inodedep *inodedep;
4105
struct workhead *wkhd;
4107
struct inoref *inoref;
4108
struct jsegdep *jsegdep;
4111
KASSERT((jaddref->ja_state & COMPLETE) == 0,
4112
("cancel_jaddref: Canceling complete jaddref"));
4113
if (jaddref->ja_state & (INPROGRESS | COMPLETE))
4117
if (inodedep == NULL)
4118
if (inodedep_lookup(jaddref->ja_list.wk_mp, jaddref->ja_ino,
4120
panic("cancel_jaddref: Lost inodedep");
4122
* We must adjust the nlink of any reference operation that follows
4123
* us so that it is consistent with the in-memory reference. This
4124
* ensures that inode nlink rollbacks always have the correct link.
4127
for (inoref = TAILQ_NEXT(&jaddref->ja_ref, if_deps); inoref;
4128
inoref = TAILQ_NEXT(inoref, if_deps)) {
4129
if (inoref->if_state & GOINGAWAY)
4134
jsegdep = inoref_jseg(&jaddref->ja_ref);
4135
if (jaddref->ja_state & NEWBLOCK)
4136
move_newblock_dep(jaddref, inodedep);
4137
wake_worklist(&jaddref->ja_list);
4138
jaddref->ja_mkdir = NULL;
4139
if (jaddref->ja_state & INPROGRESS) {
4140
jaddref->ja_state &= ~INPROGRESS;
4141
WORKLIST_REMOVE(&jaddref->ja_list);
4142
jwork_insert(wkhd, jsegdep);
4144
free_jsegdep(jsegdep);
4145
if (jaddref->ja_state & DEPCOMPLETE)
4146
remove_from_journal(&jaddref->ja_list);
4148
jaddref->ja_state |= (GOINGAWAY | DEPCOMPLETE);
4150
* Leave NEWBLOCK jaddrefs on the inodedep so handle_workitem_remove
4151
* can arrange for them to be freed with the bitmap. Otherwise we
4152
* no longer need this addref attached to the inoreflst and it
4153
* will incorrectly adjust nlink if we leave it.
4155
if ((jaddref->ja_state & NEWBLOCK) == 0) {
4156
TAILQ_REMOVE(&inodedep->id_inoreflst, &jaddref->ja_ref,
4158
jaddref->ja_state |= COMPLETE;
4159
free_jaddref(jaddref);
4163
* Leave the head of the list for jsegdeps for fast merging.
4165
if (LIST_FIRST(wkhd) != NULL) {
4166
jaddref->ja_state |= ONWORKLIST;
4167
LIST_INSERT_AFTER(LIST_FIRST(wkhd), &jaddref->ja_list, wk_list);
4169
WORKLIST_INSERT(wkhd, &jaddref->ja_list);
4175
* Attempt to free a jaddref structure when some work completes. This
4176
* should only succeed once the entry is written and all dependencies have
4180
free_jaddref(jaddref)
4181
struct jaddref *jaddref;
4184
if ((jaddref->ja_state & ALLCOMPLETE) != ALLCOMPLETE)
4186
if (jaddref->ja_ref.if_jsegdep)
4187
panic("free_jaddref: segdep attached to jaddref %p(0x%X)\n",
4188
jaddref, jaddref->ja_state);
4189
if (jaddref->ja_state & NEWBLOCK)
4190
LIST_REMOVE(jaddref, ja_bmdeps);
4191
if (jaddref->ja_state & (INPROGRESS | ONWORKLIST))
4192
panic("free_jaddref: Bad state %p(0x%X)",
4193
jaddref, jaddref->ja_state);
4194
if (jaddref->ja_mkdir != NULL)
4195
panic("free_jaddref: Work pending, 0x%X\n", jaddref->ja_state);
4196
WORKITEM_FREE(jaddref, D_JADDREF);
4200
* Free a jremref structure once it has been written or discarded.
4203
free_jremref(jremref)
4204
struct jremref *jremref;
4207
if (jremref->jr_ref.if_jsegdep)
4208
free_jsegdep(jremref->jr_ref.if_jsegdep);
4209
if (jremref->jr_state & INPROGRESS)
4210
panic("free_jremref: IO still pending");
4211
WORKITEM_FREE(jremref, D_JREMREF);
4215
* Free a jnewblk structure.
4218
free_jnewblk(jnewblk)
4219
struct jnewblk *jnewblk;
4222
if ((jnewblk->jn_state & ALLCOMPLETE) != ALLCOMPLETE)
4224
LIST_REMOVE(jnewblk, jn_deps);
4225
if (jnewblk->jn_dep != NULL)
4226
panic("free_jnewblk: Dependency still attached.");
4227
WORKITEM_FREE(jnewblk, D_JNEWBLK);
4231
* Cancel a jnewblk which has been been made redundant by frag extension.
4234
cancel_jnewblk(jnewblk, wkhd)
4235
struct jnewblk *jnewblk;
4236
struct workhead *wkhd;
4238
struct jsegdep *jsegdep;
4240
CTR1(KTR_SUJ, "cancel_jnewblk: blkno %jd", jnewblk->jn_blkno);
4241
jsegdep = jnewblk->jn_jsegdep;
4242
if (jnewblk->jn_jsegdep == NULL || jnewblk->jn_dep == NULL)
4243
panic("cancel_jnewblk: Invalid state");
4244
jnewblk->jn_jsegdep = NULL;
4245
jnewblk->jn_dep = NULL;
4246
jnewblk->jn_state |= GOINGAWAY;
4247
if (jnewblk->jn_state & INPROGRESS) {
4248
jnewblk->jn_state &= ~INPROGRESS;
4249
WORKLIST_REMOVE(&jnewblk->jn_list);
4250
jwork_insert(wkhd, jsegdep);
4252
free_jsegdep(jsegdep);
4253
remove_from_journal(&jnewblk->jn_list);
4255
wake_worklist(&jnewblk->jn_list);
4256
WORKLIST_INSERT(wkhd, &jnewblk->jn_list);
4260
free_jblkdep(jblkdep)
4261
struct jblkdep *jblkdep;
4264
if (jblkdep->jb_list.wk_type == D_JFREEBLK)
4265
WORKITEM_FREE(jblkdep, D_JFREEBLK);
4266
else if (jblkdep->jb_list.wk_type == D_JTRUNC)
4267
WORKITEM_FREE(jblkdep, D_JTRUNC);
4269
panic("free_jblkdep: Unexpected type %s",
4270
TYPENAME(jblkdep->jb_list.wk_type));
4274
* Free a single jseg once it is no longer referenced in memory or on
4275
* disk. Reclaim journal blocks and dependencies waiting for the segment
4279
free_jseg(jseg, jblocks)
4281
struct jblocks *jblocks;
4283
struct freework *freework;
4286
* Free freework structures that were lingering to indicate freed
4287
* indirect blocks that forced journal write ordering on reallocate.
4289
while ((freework = LIST_FIRST(&jseg->js_indirs)) != NULL)
4290
indirblk_remove(freework);
4291
if (jblocks->jb_oldestseg == jseg)
4292
jblocks->jb_oldestseg = TAILQ_NEXT(jseg, js_next);
4293
TAILQ_REMOVE(&jblocks->jb_segs, jseg, js_next);
4294
jblocks_free(jblocks, jseg->js_list.wk_mp, jseg->js_size);
4295
KASSERT(LIST_EMPTY(&jseg->js_entries),
4296
("free_jseg: Freed jseg has valid entries."));
4297
WORKITEM_FREE(jseg, D_JSEG);
4301
* Free all jsegs that meet the criteria for being reclaimed and update
4306
struct jblocks *jblocks;
4311
* Free only those jsegs which have none allocated before them to
4312
* preserve the journal space ordering.
4314
while ((jseg = TAILQ_FIRST(&jblocks->jb_segs)) != NULL) {
4316
* Only reclaim space when nothing depends on this journal
4317
* set and another set has written that it is no longer
4320
if (jseg->js_refs != 0) {
4321
jblocks->jb_oldestseg = jseg;
4324
if ((jseg->js_state & ALLCOMPLETE) != ALLCOMPLETE)
4326
if (jseg->js_seq > jblocks->jb_oldestwrseq)
4329
* We can free jsegs that didn't write entries when
4330
* oldestwrseq == js_seq.
4332
if (jseg->js_seq == jblocks->jb_oldestwrseq &&
4335
free_jseg(jseg, jblocks);
4338
* If we exited the loop above we still must discover the
4339
* oldest valid segment.
4342
for (jseg = jblocks->jb_oldestseg; jseg != NULL;
4343
jseg = TAILQ_NEXT(jseg, js_next))
4344
if (jseg->js_refs != 0)
4346
jblocks->jb_oldestseg = jseg;
4348
* The journal has no valid records but some jsegs may still be
4349
* waiting on oldestwrseq to advance. We force a small record
4350
* out to permit these lingering records to be reclaimed.
4352
if (jblocks->jb_oldestseg == NULL && !TAILQ_EMPTY(&jblocks->jb_segs))
4353
jblocks->jb_needseg = 1;
4357
* Release one reference to a jseg and free it if the count reaches 0. This
4358
* should eventually reclaim journal space as well.
4365
KASSERT(jseg->js_refs > 0,
4366
("free_jseg: Invalid refcnt %d", jseg->js_refs));
4367
if (--jseg->js_refs != 0)
4369
free_jsegs(jseg->js_jblocks);
4373
* Release a jsegdep and decrement the jseg count.
4376
free_jsegdep(jsegdep)
4377
struct jsegdep *jsegdep;
4380
if (jsegdep->jd_seg)
4381
rele_jseg(jsegdep->jd_seg);
4382
WORKITEM_FREE(jsegdep, D_JSEGDEP);
4386
* Wait for a journal item to make it to disk. Initiate journal processing
4391
struct worklist *wk;
4396
* Blocking journal waits cause slow synchronous behavior. Record
4397
* stats on the frequency of these blocking operations.
4399
if (waitfor == MNT_WAIT) {
4400
stat_journal_wait++;
4401
switch (wk->wk_type) {
4404
stat_jwait_filepage++;
4408
stat_jwait_freeblks++;
4411
stat_jwait_newblk++;
4421
* If IO has not started we process the journal. We can't mark the
4422
* worklist item as IOWAITING because we drop the lock while
4423
* processing the journal and the worklist entry may be freed after
4424
* this point. The caller may call back in and re-issue the request.
4426
if ((wk->wk_state & INPROGRESS) == 0) {
4427
softdep_process_journal(wk->wk_mp, wk, waitfor);
4428
if (waitfor != MNT_WAIT)
4432
if (waitfor != MNT_WAIT)
4434
wait_worklist(wk, "jwait");
4439
* Lookup an inodedep based on an inode pointer and set the nlinkdelta as
4440
* appropriate. This is a convenience function to reduce duplicate code
4441
* for the setup and revert functions below.
4443
static struct inodedep *
4444
inodedep_lookup_ip(ip)
4447
struct inodedep *inodedep;
4450
KASSERT(ip->i_nlink >= ip->i_effnlink,
4451
("inodedep_lookup_ip: bad delta"));
4453
if (IS_SNAPSHOT(ip))
4455
(void) inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, dflags,
4457
inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink;
4458
KASSERT((inodedep->id_state & UNLINKED) == 0, ("inode unlinked"));
4464
* Called prior to creating a new inode and linking it to a directory. The
4465
* jaddref structure must already be allocated by softdep_setup_inomapdep
4466
* and it is discovered here so we can initialize the mode and update
4470
softdep_setup_create(dp, ip)
4474
struct inodedep *inodedep;
4475
struct jaddref *jaddref;
4478
KASSERT(ip->i_nlink == 1,
4479
("softdep_setup_create: Invalid link count."));
4482
inodedep = inodedep_lookup_ip(ip);
4483
if (DOINGSUJ(dvp)) {
4484
jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
4486
KASSERT(jaddref != NULL && jaddref->ja_parent == dp->i_number,
4487
("softdep_setup_create: No addref structure present."));
4489
softdep_prelink(dvp, NULL);
4494
* Create a jaddref structure to track the addition of a DOTDOT link when
4495
* we are reparenting an inode as part of a rename. This jaddref will be
4496
* found by softdep_setup_directory_change. Adjusts nlinkdelta for
4497
* non-journaling softdep.
4500
softdep_setup_dotdot_link(dp, ip)
4504
struct inodedep *inodedep;
4505
struct jaddref *jaddref;
4513
* We don't set MKDIR_PARENT as this is not tied to a mkdir and
4514
* is used as a normal link would be.
4517
jaddref = newjaddref(ip, dp->i_number, DOTDOT_OFFSET,
4518
dp->i_effnlink - 1, dp->i_mode);
4520
inodedep = inodedep_lookup_ip(dp);
4522
TAILQ_INSERT_TAIL(&inodedep->id_inoreflst, &jaddref->ja_ref,
4524
softdep_prelink(dvp, ITOV(ip));
4529
* Create a jaddref structure to track a new link to an inode. The directory
4530
* offset is not known until softdep_setup_directory_add or
4531
* softdep_setup_directory_change. Adjusts nlinkdelta for non-journaling
4535
softdep_setup_link(dp, ip)
4539
struct inodedep *inodedep;
4540
struct jaddref *jaddref;
4546
jaddref = newjaddref(dp, ip->i_number, 0, ip->i_effnlink - 1,
4549
inodedep = inodedep_lookup_ip(ip);
4551
TAILQ_INSERT_TAIL(&inodedep->id_inoreflst, &jaddref->ja_ref,
4553
softdep_prelink(dvp, ITOV(ip));
4558
* Called to create the jaddref structures to track . and .. references as
4559
* well as lookup and further initialize the incomplete jaddref created
4560
* by softdep_setup_inomapdep when the inode was allocated. Adjusts
4561
* nlinkdelta for non-journaling softdep.
4564
softdep_setup_mkdir(dp, ip)
4568
struct inodedep *inodedep;
4569
struct jaddref *dotdotaddref;
4570
struct jaddref *dotaddref;
4571
struct jaddref *jaddref;
4575
dotaddref = dotdotaddref = NULL;
4576
if (DOINGSUJ(dvp)) {
4577
dotaddref = newjaddref(ip, ip->i_number, DOT_OFFSET, 1,
4579
dotaddref->ja_state |= MKDIR_BODY;
4580
dotdotaddref = newjaddref(ip, dp->i_number, DOTDOT_OFFSET,
4581
dp->i_effnlink - 1, dp->i_mode);
4582
dotdotaddref->ja_state |= MKDIR_PARENT;
4585
inodedep = inodedep_lookup_ip(ip);
4586
if (DOINGSUJ(dvp)) {
4587
jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
4589
KASSERT(jaddref != NULL,
4590
("softdep_setup_mkdir: No addref structure present."));
4591
KASSERT(jaddref->ja_parent == dp->i_number,
4592
("softdep_setup_mkdir: bad parent %d",
4593
jaddref->ja_parent));
4594
TAILQ_INSERT_BEFORE(&jaddref->ja_ref, &dotaddref->ja_ref,
4597
inodedep = inodedep_lookup_ip(dp);
4599
TAILQ_INSERT_TAIL(&inodedep->id_inoreflst,
4600
&dotdotaddref->ja_ref, if_deps);
4601
softdep_prelink(ITOV(dp), NULL);
4606
* Called to track nlinkdelta of the inode and parent directories prior to
4607
* unlinking a directory.
4610
softdep_setup_rmdir(dp, ip)
4618
(void) inodedep_lookup_ip(ip);
4619
(void) inodedep_lookup_ip(dp);
4620
softdep_prelink(dvp, ITOV(ip));
4625
* Called to track nlinkdelta of the inode and parent directories prior to
4629
softdep_setup_unlink(dp, ip)
4637
(void) inodedep_lookup_ip(ip);
4638
(void) inodedep_lookup_ip(dp);
4639
softdep_prelink(dvp, ITOV(ip));
4644
* Called to release the journal structures created by a failed non-directory
4645
* creation. Adjusts nlinkdelta for non-journaling softdep.
4648
softdep_revert_create(dp, ip)
4652
struct inodedep *inodedep;
4653
struct jaddref *jaddref;
4658
inodedep = inodedep_lookup_ip(ip);
4659
if (DOINGSUJ(dvp)) {
4660
jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
4662
KASSERT(jaddref->ja_parent == dp->i_number,
4663
("softdep_revert_create: addref parent mismatch"));
4664
cancel_jaddref(jaddref, inodedep, &inodedep->id_inowait);
4670
* Called to release the journal structures created by a failed dotdot link
4671
* creation. Adjusts nlinkdelta for non-journaling softdep.
4674
softdep_revert_dotdot_link(dp, ip)
4678
struct inodedep *inodedep;
4679
struct jaddref *jaddref;
4684
inodedep = inodedep_lookup_ip(dp);
4685
if (DOINGSUJ(dvp)) {
4686
jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
4688
KASSERT(jaddref->ja_parent == ip->i_number,
4689
("softdep_revert_dotdot_link: addref parent mismatch"));
4690
cancel_jaddref(jaddref, inodedep, &inodedep->id_inowait);
4696
* Called to release the journal structures created by a failed link
4697
* addition. Adjusts nlinkdelta for non-journaling softdep.
4700
softdep_revert_link(dp, ip)
4704
struct inodedep *inodedep;
4705
struct jaddref *jaddref;
4710
inodedep = inodedep_lookup_ip(ip);
4711
if (DOINGSUJ(dvp)) {
4712
jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
4714
KASSERT(jaddref->ja_parent == dp->i_number,
4715
("softdep_revert_link: addref parent mismatch"));
4716
cancel_jaddref(jaddref, inodedep, &inodedep->id_inowait);
4722
* Called to release the journal structures created by a failed mkdir
4723
* attempt. Adjusts nlinkdelta for non-journaling softdep.
4726
softdep_revert_mkdir(dp, ip)
4730
struct inodedep *inodedep;
4731
struct jaddref *jaddref;
4732
struct jaddref *dotaddref;
4738
inodedep = inodedep_lookup_ip(dp);
4739
if (DOINGSUJ(dvp)) {
4740
jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
4742
KASSERT(jaddref->ja_parent == ip->i_number,
4743
("softdep_revert_mkdir: dotdot addref parent mismatch"));
4744
cancel_jaddref(jaddref, inodedep, &inodedep->id_inowait);
4746
inodedep = inodedep_lookup_ip(ip);
4747
if (DOINGSUJ(dvp)) {
4748
jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
4750
KASSERT(jaddref->ja_parent == dp->i_number,
4751
("softdep_revert_mkdir: addref parent mismatch"));
4752
dotaddref = (struct jaddref *)TAILQ_PREV(&jaddref->ja_ref,
4753
inoreflst, if_deps);
4754
cancel_jaddref(jaddref, inodedep, &inodedep->id_inowait);
4755
KASSERT(dotaddref->ja_parent == ip->i_number,
4756
("softdep_revert_mkdir: dot addref parent mismatch"));
4757
cancel_jaddref(dotaddref, inodedep, &inodedep->id_inowait);
4763
* Called to correct nlinkdelta after a failed rmdir.
4766
softdep_revert_rmdir(dp, ip)
4772
(void) inodedep_lookup_ip(ip);
4773
(void) inodedep_lookup_ip(dp);
1497
4778
* Protecting the freemaps (or bitmaps).
2039
5671
int ptrno; /* offset of pointer in indirect block */
2040
5672
ufs2_daddr_t newblkno; /* disk block number being added */
5674
struct inodedep *inodedep;
2042
5675
struct allocindir *aip;
5680
"softdep_setup_allocindir_meta: ino %d blkno %jd ptrno %d",
5681
ip->i_number, newblkno, ptrno);
5682
lbn = nbp->b_lblkno;
2044
5683
ASSERT_VOP_LOCKED(ITOV(ip), "softdep_setup_allocindir_meta");
2045
aip = newallocindir(ip, ptrno, newblkno, 0);
2047
WORKLIST_INSERT(&nbp->b_dep, &aip->ai_list);
2048
setup_allocindir_phase2(bp, ip, aip);
5684
aip = newallocindir(ip, ptrno, newblkno, 0, lbn);
5686
if (IS_SNAPSHOT(ip))
5688
inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, dflags, &inodedep);
5689
WORKLIST_INSERT(&nbp->b_dep, &aip->ai_block.nb_list);
5690
if (setup_allocindir_phase2(bp, ip, inodedep, aip, lbn))
5691
panic("softdep_setup_allocindir_meta: Block already existed");
2049
5692
FREE_LOCK(&lk);
2053
* Called to finish the allocation of the "aip" allocated
2054
* by one of the two routines above.
2057
setup_allocindir_phase2(bp, ip, aip)
2058
struct buf *bp; /* in-memory copy of the indirect block */
2059
struct inode *ip; /* inode for file being extended */
2060
struct allocindir *aip; /* allocindir allocated by the above routines */
2062
struct worklist *wk;
5696
indirdep_complete(indirdep)
5697
struct indirdep *indirdep;
5699
struct allocindir *aip;
5701
LIST_REMOVE(indirdep, ir_next);
5702
indirdep->ir_state |= DEPCOMPLETE;
5704
while ((aip = LIST_FIRST(&indirdep->ir_completehd)) != NULL) {
5705
LIST_REMOVE(aip, ai_next);
5706
free_newblk(&aip->ai_block);
5709
* If this indirdep is not attached to a buf it was simply waiting
5710
* on completion to clear completehd. free_indirdep() asserts
5711
* that nothing is dangling.
5713
if ((indirdep->ir_state & ONWORKLIST) == 0)
5714
free_indirdep(indirdep);
5717
static struct indirdep *
5718
indirdep_lookup(mp, ip, bp)
2063
5723
struct indirdep *indirdep, *newindirdep;
2064
struct bmsafemap *bmsafemap;
2065
struct allocindir *oldaip;
2066
struct freefrag *freefrag;
2067
5724
struct newblk *newblk;
5725
struct worklist *wk;
2068
5727
ufs2_daddr_t blkno;
2070
5729
mtx_assert(&lk, MA_OWNED);
2071
if (bp->b_lblkno >= 0)
2072
panic("setup_allocindir_phase2: not indir blk");
2073
for (indirdep = NULL, newindirdep = NULL; ; ) {
2074
5734
LIST_FOREACH(wk, &bp->b_dep, wk_list) {
2075
5735
if (wk->wk_type != D_INDIRDEP)
2077
5737
indirdep = WK_INDIRDEP(wk);
2080
if (indirdep == NULL && newindirdep) {
2081
indirdep = newindirdep;
2082
WORKLIST_INSERT(&bp->b_dep, &indirdep->ir_list);
2086
if (newblk_lookup(ip->i_fs, aip->ai_newblkno, 0,
2088
panic("setup_allocindir: lost block");
2089
if (newblk->nb_state == DEPCOMPLETE) {
2090
aip->ai_state |= DEPCOMPLETE;
2093
bmsafemap = newblk->nb_bmsafemap;
2094
aip->ai_buf = bmsafemap->sm_buf;
2095
LIST_REMOVE(newblk, nb_deps);
2096
LIST_INSERT_HEAD(&bmsafemap->sm_allocindirhd,
2099
LIST_REMOVE(newblk, nb_hash);
2100
free(newblk, M_NEWBLK);
2101
aip->ai_indirdep = indirdep;
2103
* Check to see if there is an existing dependency
2104
* for this block. If there is, merge the old
2105
* dependency into the new one.
2107
if (aip->ai_oldblkno == 0)
2111
LIST_FOREACH(oldaip, &indirdep->ir_deplisthd, ai_next)
2112
if (oldaip->ai_offset == aip->ai_offset)
2115
if (oldaip != NULL) {
2116
if (oldaip->ai_newblkno != aip->ai_oldblkno)
2117
panic("setup_allocindir_phase2: blkno");
2118
aip->ai_oldblkno = oldaip->ai_oldblkno;
2119
freefrag = aip->ai_freefrag;
2120
aip->ai_freefrag = oldaip->ai_freefrag;
2121
oldaip->ai_freefrag = NULL;
2122
free_allocindir(oldaip, NULL);
2124
LIST_INSERT_HEAD(&indirdep->ir_deplisthd, aip, ai_next);
2125
if (ip->i_ump->um_fstype == UFS1)
2126
((ufs1_daddr_t *)indirdep->ir_savebp->b_data)
2127
[aip->ai_offset] = aip->ai_oldblkno;
2129
((ufs2_daddr_t *)indirdep->ir_savebp->b_data)
2130
[aip->ai_offset] = aip->ai_oldblkno;
2132
if (freefrag != NULL)
2133
handle_workitem_freefrag(freefrag);
2137
newindirdep->ir_savebp->b_flags |= B_INVAL | B_NOCACHE;
2138
brelse(newindirdep->ir_savebp);
2140
WORKITEM_FREE((caddr_t)newindirdep, D_INDIRDEP);
5740
/* Found on the buffer worklist, no new structure to free. */
5741
if (indirdep != NULL && newindirdep == NULL)
5743
if (indirdep != NULL && newindirdep != NULL)
5744
panic("indirdep_lookup: simultaneous create");
5745
/* None found on the buffer and a new structure is ready. */
5746
if (indirdep == NULL && newindirdep != NULL)
5748
/* None found and no new structure available. */
2149
5750
newindirdep = malloc(sizeof(struct indirdep),
2150
M_INDIRDEP, M_SOFTDEP_FLAGS);
2151
workitem_alloc(&newindirdep->ir_list, D_INDIRDEP,
2152
UFSTOVFS(ip->i_ump));
5751
M_INDIRDEP, M_SOFTDEP_FLAGS);
5752
workitem_alloc(&newindirdep->ir_list, D_INDIRDEP, mp);
2153
5753
newindirdep->ir_state = ATTACHED;
2154
5754
if (ip->i_ump->um_fstype == UFS1)
2155
5755
newindirdep->ir_state |= UFS1FMT;
5756
TAILQ_INIT(&newindirdep->ir_trunc);
5757
newindirdep->ir_saveddata = NULL;
2156
5758
LIST_INIT(&newindirdep->ir_deplisthd);
2157
5759
LIST_INIT(&newindirdep->ir_donehd);
5760
LIST_INIT(&newindirdep->ir_writehd);
5761
LIST_INIT(&newindirdep->ir_completehd);
2158
5762
if (bp->b_blkno == bp->b_lblkno) {
2159
5763
ufs_bmaparray(bp->b_vp, bp->b_lblkno, &blkno, bp,
2161
5765
bp->b_blkno = blkno;
5767
newindirdep->ir_freeblks = NULL;
2163
5768
newindirdep->ir_savebp =
2164
5769
getblk(ip->i_devvp, bp->b_blkno, bp->b_bcount, 0, 0, 0);
5770
newindirdep->ir_bp = bp;
2165
5771
BUF_KERNPROC(newindirdep->ir_savebp);
2166
5772
bcopy(bp->b_data, newindirdep->ir_savebp->b_data, bp->b_bcount);
2167
5773
ACQUIRE_LOCK(&lk);
5775
indirdep = newindirdep;
5776
WORKLIST_INSERT(&bp->b_dep, &indirdep->ir_list);
5778
* If the block is not yet allocated we don't set DEPCOMPLETE so
5779
* that we don't free dependencies until the pointers are valid.
5780
* This could search b_dep for D_ALLOCDIRECT/D_ALLOCINDIR rather
5781
* than using the hash.
5783
if (newblk_lookup(mp, dbtofsb(fs, bp->b_blkno), 0, &newblk))
5784
LIST_INSERT_HEAD(&newblk->nb_indirdeps, indirdep, ir_next);
5786
indirdep->ir_state |= DEPCOMPLETE;
5791
* Called to finish the allocation of the "aip" allocated
5792
* by one of the two routines above.
5794
static struct freefrag *
5795
setup_allocindir_phase2(bp, ip, inodedep, aip, lbn)
5796
struct buf *bp; /* in-memory copy of the indirect block */
5797
struct inode *ip; /* inode for file being extended */
5798
struct inodedep *inodedep; /* Inodedep for ip */
5799
struct allocindir *aip; /* allocindir allocated by the above routines */
5800
ufs_lbn_t lbn; /* Logical block number for this block. */
5803
struct indirdep *indirdep;
5804
struct allocindir *oldaip;
5805
struct freefrag *freefrag;
5808
mtx_assert(&lk, MA_OWNED);
5809
mp = UFSTOVFS(ip->i_ump);
5811
if (bp->b_lblkno >= 0)
5812
panic("setup_allocindir_phase2: not indir blk");
5813
KASSERT(aip->ai_offset >= 0 && aip->ai_offset < NINDIR(fs),
5814
("setup_allocindir_phase2: Bad offset %d", aip->ai_offset));
5815
indirdep = indirdep_lookup(mp, ip, bp);
5816
KASSERT(indirdep->ir_savebp != NULL,
5817
("setup_allocindir_phase2 NULL ir_savebp"));
5818
aip->ai_indirdep = indirdep;
5820
* Check for an unwritten dependency for this indirect offset. If
5821
* there is, merge the old dependency into the new one. This happens
5822
* as a result of reallocblk only.
5825
if (aip->ai_oldblkno != 0) {
5826
LIST_FOREACH(oldaip, &indirdep->ir_deplisthd, ai_next) {
5827
if (oldaip->ai_offset == aip->ai_offset) {
5828
freefrag = allocindir_merge(aip, oldaip);
5832
LIST_FOREACH(oldaip, &indirdep->ir_donehd, ai_next) {
5833
if (oldaip->ai_offset == aip->ai_offset) {
5834
freefrag = allocindir_merge(aip, oldaip);
5840
LIST_INSERT_HEAD(&indirdep->ir_deplisthd, aip, ai_next);
5845
* Merge two allocindirs which refer to the same block. Move newblock
5846
* dependencies and setup the freefrags appropriately.
5848
static struct freefrag *
5849
allocindir_merge(aip, oldaip)
5850
struct allocindir *aip;
5851
struct allocindir *oldaip;
5853
struct freefrag *freefrag;
5854
struct worklist *wk;
5856
if (oldaip->ai_newblkno != aip->ai_oldblkno)
5857
panic("allocindir_merge: blkno");
5858
aip->ai_oldblkno = oldaip->ai_oldblkno;
5859
freefrag = aip->ai_freefrag;
5860
aip->ai_freefrag = oldaip->ai_freefrag;
5861
oldaip->ai_freefrag = NULL;
5862
KASSERT(freefrag != NULL, ("setup_allocindir_phase2: No freefrag"));
5864
* If we are tracking a new directory-block allocation,
5865
* move it from the old allocindir to the new allocindir.
5867
if ((wk = LIST_FIRST(&oldaip->ai_newdirblk)) != NULL) {
5868
WORKLIST_REMOVE(wk);
5869
if (!LIST_EMPTY(&oldaip->ai_newdirblk))
5870
panic("allocindir_merge: extra newdirblk");
5871
WORKLIST_INSERT(&aip->ai_newdirblk, wk);
5874
* We can skip journaling for this freefrag and just complete
5875
* any pending journal work for the allocindir that is being
5876
* removed after the freefrag completes.
5878
if (freefrag->ff_jdep)
5879
cancel_jfreefrag(WK_JFREEFRAG(freefrag->ff_jdep));
5880
LIST_REMOVE(oldaip, ai_next);
5881
freefrag->ff_jdep = (struct worklist *)cancel_newblk(&oldaip->ai_block,
5882
&freefrag->ff_list, &freefrag->ff_jwork);
5883
free_newblk(&oldaip->ai_block);
5889
setup_freedirect(freeblks, ip, i, needj)
5890
struct freeblks *freeblks;
5898
blkno = DIP(ip, i_db[i]);
5901
DIP_SET(ip, i_db[i], 0);
5902
frags = sblksize(ip->i_fs, ip->i_size, i);
5903
frags = numfrags(ip->i_fs, frags);
5904
newfreework(ip->i_ump, freeblks, NULL, i, blkno, frags, 0, needj);
5908
setup_freeext(freeblks, ip, i, needj)
5909
struct freeblks *freeblks;
5917
blkno = ip->i_din2->di_extb[i];
5920
ip->i_din2->di_extb[i] = 0;
5921
frags = sblksize(ip->i_fs, ip->i_din2->di_extsize, i);
5922
frags = numfrags(ip->i_fs, frags);
5923
newfreework(ip->i_ump, freeblks, NULL, -1 - i, blkno, frags, 0, needj);
5927
setup_freeindir(freeblks, ip, i, lbn, needj)
5928
struct freeblks *freeblks;
5936
blkno = DIP(ip, i_ib[i]);
5939
DIP_SET(ip, i_ib[i], 0);
5940
newfreework(ip->i_ump, freeblks, NULL, lbn, blkno, ip->i_fs->fs_frag,
5944
static inline struct freeblks *
5949
struct freeblks *freeblks;
5951
freeblks = malloc(sizeof(struct freeblks),
5952
M_FREEBLKS, M_SOFTDEP_FLAGS|M_ZERO);
5953
workitem_alloc(&freeblks->fb_list, D_FREEBLKS, mp);
5954
LIST_INIT(&freeblks->fb_jblkdephd);
5955
LIST_INIT(&freeblks->fb_jwork);
5956
freeblks->fb_ref = 0;
5957
freeblks->fb_cgwait = 0;
5958
freeblks->fb_state = ATTACHED;
5959
freeblks->fb_uid = ip->i_uid;
5960
freeblks->fb_inum = ip->i_number;
5961
freeblks->fb_vtype = ITOV(ip)->v_type;
5962
freeblks->fb_modrev = DIP(ip, i_modrev);
5963
freeblks->fb_devvp = ip->i_devvp;
5964
freeblks->fb_chkcnt = 0;
5965
freeblks->fb_len = 0;
5971
trunc_indirdep(indirdep, freeblks, bp, off)
5972
struct indirdep *indirdep;
5973
struct freeblks *freeblks;
5977
struct allocindir *aip, *aipn;
5980
* The first set of allocindirs won't be in savedbp.
5982
LIST_FOREACH_SAFE(aip, &indirdep->ir_deplisthd, ai_next, aipn)
5983
if (aip->ai_offset > off)
5984
cancel_allocindir(aip, bp, freeblks, 1);
5985
LIST_FOREACH_SAFE(aip, &indirdep->ir_donehd, ai_next, aipn)
5986
if (aip->ai_offset > off)
5987
cancel_allocindir(aip, bp, freeblks, 1);
5989
* These will exist in savedbp.
5991
LIST_FOREACH_SAFE(aip, &indirdep->ir_writehd, ai_next, aipn)
5992
if (aip->ai_offset > off)
5993
cancel_allocindir(aip, NULL, freeblks, 0);
5994
LIST_FOREACH_SAFE(aip, &indirdep->ir_completehd, ai_next, aipn)
5995
if (aip->ai_offset > off)
5996
cancel_allocindir(aip, NULL, freeblks, 0);
6000
* Follow the chain of indirects down to lastlbn creating a freework
6001
* structure for each. This will be used to start indir_trunc() at
6002
* the right offset and create the journal records for the parrtial
6003
* truncation. A second step will handle the truncated dependencies.
6006
setup_trunc_indir(freeblks, ip, lbn, lastlbn, blkno)
6007
struct freeblks *freeblks;
6013
struct indirdep *indirdep;
6014
struct indirdep *indirn;
6015
struct freework *freework;
6016
struct newblk *newblk;
6030
mp = freeblks->fb_list.wk_mp;
6031
bp = getblk(ITOV(ip), lbn, mp->mnt_stat.f_iosize, 0, 0, 0);
6032
if ((bp->b_flags & B_CACHE) == 0) {
6033
bp->b_blkno = blkptrtodb(VFSTOUFS(mp), blkno);
6034
bp->b_iocmd = BIO_READ;
6035
bp->b_flags &= ~B_INVAL;
6036
bp->b_ioflags &= ~BIO_ERROR;
6037
vfs_busy_pages(bp, 0);
6038
bp->b_iooffset = dbtob(bp->b_blkno);
6040
curthread->td_ru.ru_inblock++;
6041
error = bufwait(bp);
6047
level = lbn_level(lbn);
6048
lbnadd = lbn_offset(ip->i_fs, level);
6050
* Compute the offset of the last block we want to keep. Store
6051
* in the freework the first block we want to completely free.
6053
off = (lastlbn - -(lbn + level)) / lbnadd;
6054
if (off + 1 == NINDIR(ip->i_fs))
6056
freework = newfreework(ip->i_ump, freeblks, NULL, lbn, blkno, 0, off+1,
6059
* Link the freework into the indirdep. This will prevent any new
6060
* allocations from proceeding until we are finished with the
6061
* truncate and the block is written.
6064
indirdep = indirdep_lookup(mp, ip, bp);
6065
if (indirdep->ir_freeblks)
6066
panic("setup_trunc_indir: indirdep already truncated.");
6067
TAILQ_INSERT_TAIL(&indirdep->ir_trunc, freework, fw_next);
6068
freework->fw_indir = indirdep;
6070
* Cancel any allocindirs that will not make it to disk.
6071
* We have to do this for all copies of the indirdep that
6072
* live on this newblk.
6074
if ((indirdep->ir_state & DEPCOMPLETE) == 0) {
6075
newblk_lookup(mp, dbtofsb(ip->i_fs, bp->b_blkno), 0, &newblk);
6076
LIST_FOREACH(indirn, &newblk->nb_indirdeps, ir_next)
6077
trunc_indirdep(indirn, freeblks, bp, off);
6079
trunc_indirdep(indirdep, freeblks, bp, off);
6082
* Creation is protected by the buf lock. The saveddata is only
6083
* needed if a full truncation follows a partial truncation but it
6084
* is difficult to allocate in that case so we fetch it anyway.
6086
if (indirdep->ir_saveddata == NULL)
6087
indirdep->ir_saveddata = malloc(bp->b_bcount, M_INDIRDEP,
6090
/* Fetch the blkno of the child and the zero start offset. */
6091
if (ip->i_ump->um_fstype == UFS1) {
6092
blkno = ((ufs1_daddr_t *)bp->b_data)[off];
6093
start = (uint8_t *)&((ufs1_daddr_t *)bp->b_data)[off+1];
6095
blkno = ((ufs2_daddr_t *)bp->b_data)[off];
6096
start = (uint8_t *)&((ufs2_daddr_t *)bp->b_data)[off+1];
6099
/* Zero the truncated pointers. */
6100
end = bp->b_data + bp->b_bcount;
6101
bzero(start, end - start);
6107
lbn++; /* adjust level */
6108
lbn -= (off * lbnadd);
6109
return setup_trunc_indir(freeblks, ip, lbn, lastlbn, blkno);
6113
* Complete the partial truncation of an indirect block setup by
6114
* setup_trunc_indir(). This zeros the truncated pointers in the saved
6115
* copy and writes them to disk before the freeblks is allowed to complete.
6118
complete_trunc_indir(freework)
6119
struct freework *freework;
6121
struct freework *fwn;
6122
struct indirdep *indirdep;
6127
indirdep = freework->fw_indir;
6129
bp = indirdep->ir_bp;
6130
/* See if the block was discarded. */
6133
/* Inline part of getdirtybuf(). We dont want bremfree. */
6134
if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) == 0)
6137
LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, &lk) == 0)
6141
mtx_assert(&lk, MA_OWNED);
6142
freework->fw_state |= DEPCOMPLETE;
6143
TAILQ_REMOVE(&indirdep->ir_trunc, freework, fw_next);
6145
* Zero the pointers in the saved copy.
6147
if (indirdep->ir_state & UFS1FMT)
6148
start = sizeof(ufs1_daddr_t);
6150
start = sizeof(ufs2_daddr_t);
6151
start *= freework->fw_start;
6152
count = indirdep->ir_savebp->b_bcount - start;
6153
start += (uintptr_t)indirdep->ir_savebp->b_data;
6154
bzero((char *)start, count);
6156
* We need to start the next truncation in the list if it has not
6159
fwn = TAILQ_FIRST(&indirdep->ir_trunc);
6161
if (fwn->fw_freeblks == indirdep->ir_freeblks)
6162
TAILQ_REMOVE(&indirdep->ir_trunc, fwn, fw_next);
6163
if ((fwn->fw_state & ONWORKLIST) == 0)
6164
freework_enqueue(fwn);
6167
* If bp is NULL the block was fully truncated, restore
6168
* the saved block list otherwise free it if it is no
6171
if (TAILQ_EMPTY(&indirdep->ir_trunc)) {
6173
bcopy(indirdep->ir_saveddata,
6174
indirdep->ir_savebp->b_data,
6175
indirdep->ir_savebp->b_bcount);
6176
free(indirdep->ir_saveddata, M_INDIRDEP);
6177
indirdep->ir_saveddata = NULL;
6180
* When bp is NULL there is a full truncation pending. We
6181
* must wait for this full truncation to be journaled before
6182
* we can release this freework because the disk pointers will
6183
* never be written as zero.
6186
if (LIST_EMPTY(&indirdep->ir_freeblks->fb_jblkdephd))
6187
handle_written_freework(freework);
6189
WORKLIST_INSERT(&indirdep->ir_freeblks->fb_freeworkhd,
6190
&freework->fw_list);
6192
/* Complete when the real copy is written. */
6193
WORKLIST_INSERT(&bp->b_dep, &freework->fw_list);
6199
* Calculate the number of blocks we are going to release where datablocks
6200
* is the current total and length is the new file size.
6203
blkcount(fs, datablocks, length)
6205
ufs2_daddr_t datablocks;
6208
off_t totblks, numblks;
6211
numblks = howmany(length, fs->fs_bsize);
6212
if (numblks <= NDADDR) {
6213
totblks = howmany(length, fs->fs_fsize);
6216
totblks = blkstofrags(fs, numblks);
6219
* Count all single, then double, then triple indirects required.
6220
* Subtracting one indirects worth of blocks for each pass
6221
* acknowledges one of each pointed to by the inode.
6224
totblks += blkstofrags(fs, howmany(numblks, NINDIR(fs)));
6225
numblks -= NINDIR(fs);
6228
numblks = howmany(numblks, NINDIR(fs));
6231
totblks = fsbtodb(fs, totblks);
6233
* Handle sparse files. We can't reclaim more blocks than the inode
6234
* references. We will correct it later in handle_complete_freeblks()
6235
* when we know the real count.
6237
if (totblks > datablocks)
6239
return (datablocks - totblks);
6243
* Handle freeblocks for journaled softupdate filesystems.
6245
* Contrary to normal softupdates, we must preserve the block pointers in
6246
* indirects until their subordinates are free. This is to avoid journaling
6247
* every block that is freed which may consume more space than the journal
6248
* itself. The recovery program will see the free block journals at the
6249
* base of the truncated area and traverse them to reclaim space. The
6250
* pointers in the inode may be cleared immediately after the journal
6251
* records are written because each direct and indirect pointer in the
6252
* inode is recorded in a journal. This permits full truncation to proceed
6253
* asynchronously. The write order is journal -> inode -> cgs -> indirects.
6255
* The algorithm is as follows:
6256
* 1) Traverse the in-memory state and create journal entries to release
6257
* the relevant blocks and full indirect trees.
6258
* 2) Traverse the indirect block chain adding partial truncation freework
6259
* records to indirects in the path to lastlbn. The freework will
6260
* prevent new allocation dependencies from being satisfied in this
6261
* indirect until the truncation completes.
6262
* 3) Read and lock the inode block, performing an update with the new size
6263
* and pointers. This prevents truncated data from becoming valid on
6264
* disk through step 4.
6265
* 4) Reap unsatisfied dependencies that are beyond the truncated area,
6266
* eliminate journal work for those records that do not require it.
6267
* 5) Schedule the journal records to be written followed by the inode block.
6268
* 6) Allocate any necessary frags for the end of file.
6269
* 7) Zero any partially truncated blocks.
6271
* From this truncation proceeds asynchronously using the freework and
6272
* indir_trunc machinery. The file will not be extended again into a
6273
* partially truncated indirect block until all work is completed but
6274
* the normal dependency mechanism ensures that it is rolled back/forward
6275
* as appropriate. Further truncation may occur without delay and is
6276
* serialized in indir_trunc().
6279
softdep_journal_freeblocks(ip, cred, length, flags)
6280
struct inode *ip; /* The inode whose length is to be reduced */
6282
off_t length; /* The new length for the file */
6283
int flags; /* IO_EXT and/or IO_NORMAL */
6285
struct freeblks *freeblks, *fbn;
6286
struct worklist *wk, *wkn;
6287
struct inodedep *inodedep;
6288
struct jblkdep *jblkdep;
6289
struct allocdirect *adp, *adpn;
6294
ufs2_daddr_t extblocks, datablocks;
6295
ufs_lbn_t tmpval, lbn, lastlbn;
6296
int frags, lastoff, iboff, allocblock, needj, dflags, error, i;
6299
mp = UFSTOVFS(ip->i_ump);
6307
freeblks = newfreeblks(mp, ip);
6310
* If we're truncating a removed file that will never be written
6311
* we don't need to journal the block frees. The canceled journals
6312
* for the allocations will suffice.
6315
if (IS_SNAPSHOT(ip))
6317
inodedep_lookup(mp, ip->i_number, dflags, &inodedep);
6318
if ((inodedep->id_state & (UNLINKED | DEPCOMPLETE)) == UNLINKED &&
6321
CTR3(KTR_SUJ, "softdep_journal_freeblks: ip %d length %ld needj %d",
6322
ip->i_number, length, needj);
6325
* Calculate the lbn that we are truncating to. This results in -1
6326
* if we're truncating the 0 bytes. So it is the last lbn we want
6327
* to keep, not the first lbn we want to truncate.
6329
lastlbn = lblkno(fs, length + fs->fs_bsize - 1) - 1;
6330
lastoff = blkoff(fs, length);
6332
* Compute frags we are keeping in lastlbn. 0 means all.
6334
if (lastlbn >= 0 && lastlbn < NDADDR) {
6335
frags = fragroundup(fs, lastoff);
6336
/* adp offset of last valid allocdirect. */
6338
} else if (lastlbn > 0)
6340
if (fs->fs_magic == FS_UFS2_MAGIC)
6341
extblocks = btodb(fragroundup(fs, ip->i_din2->di_extsize));
6343
* Handle normal data blocks and indirects. This section saves
6344
* values used after the inode update to complete frag and indirect
6347
if ((flags & IO_NORMAL) != 0) {
6349
* Handle truncation of whole direct and indirect blocks.
6351
for (i = iboff + 1; i < NDADDR; i++)
6352
setup_freedirect(freeblks, ip, i, needj);
6353
for (i = 0, tmpval = NINDIR(fs), lbn = NDADDR; i < NIADDR;
6354
i++, lbn += tmpval, tmpval *= NINDIR(fs)) {
6355
/* Release a whole indirect tree. */
6356
if (lbn > lastlbn) {
6357
setup_freeindir(freeblks, ip, i, -lbn -i,
6363
* Traverse partially truncated indirect tree.
6365
if (lbn <= lastlbn && lbn + tmpval - 1 > lastlbn)
6366
setup_trunc_indir(freeblks, ip, -lbn - i,
6367
lastlbn, DIP(ip, i_ib[i]));
6370
* Handle partial truncation to a frag boundary.
6376
oldfrags = blksize(fs, ip, lastlbn);
6377
blkno = DIP(ip, i_db[lastlbn]);
6378
if (blkno && oldfrags != frags) {
6380
oldfrags = numfrags(ip->i_fs, oldfrags);
6381
blkno += numfrags(ip->i_fs, frags);
6382
newfreework(ip->i_ump, freeblks, NULL, lastlbn,
6383
blkno, oldfrags, 0, needj);
6384
} else if (blkno == 0)
6388
* Add a journal record for partial truncate if we are
6389
* handling indirect blocks. Non-indirects need no extra
6392
if (length != 0 && lastlbn >= NDADDR) {
6393
ip->i_flag |= IN_TRUNCATED;
6394
newjtrunc(freeblks, length, 0);
6396
ip->i_size = length;
6397
DIP_SET(ip, i_size, ip->i_size);
6398
datablocks = DIP(ip, i_blocks) - extblocks;
6400
datablocks = blkcount(ip->i_fs, datablocks, length);
6401
freeblks->fb_len = length;
6403
if ((flags & IO_EXT) != 0) {
6404
for (i = 0; i < NXADDR; i++)
6405
setup_freeext(freeblks, ip, i, needj);
6406
ip->i_din2->di_extsize = 0;
6407
datablocks += extblocks;
6410
/* Reference the quotas in case the block count is wrong in the end. */
6411
quotaref(vp, freeblks->fb_quota);
6412
(void) chkdq(ip, -datablocks, NOCRED, 0);
6414
freeblks->fb_chkcnt = -datablocks;
6415
UFS_LOCK(ip->i_ump);
6416
fs->fs_pendingblocks += datablocks;
6417
UFS_UNLOCK(ip->i_ump);
6418
DIP_SET(ip, i_blocks, DIP(ip, i_blocks) - datablocks);
6420
* Handle truncation of incomplete alloc direct dependencies. We
6421
* hold the inode block locked to prevent incomplete dependencies
6422
* from reaching the disk while we are eliminating those that
6423
* have been truncated. This is a partially inlined ffs_update().
6426
ip->i_flag &= ~(IN_LAZYACCESS | IN_LAZYMOD | IN_MODIFIED);
6427
error = bread(ip->i_devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
6428
(int)fs->fs_bsize, cred, &bp);
6431
softdep_error("softdep_journal_freeblocks", error);
6434
if (bp->b_bufsize == fs->fs_bsize)
6435
bp->b_flags |= B_CLUSTEROK;
6436
softdep_update_inodeblock(ip, bp, 0);
6437
if (ip->i_ump->um_fstype == UFS1)
6438
*((struct ufs1_dinode *)bp->b_data +
6439
ino_to_fsbo(fs, ip->i_number)) = *ip->i_din1;
6441
*((struct ufs2_dinode *)bp->b_data +
6442
ino_to_fsbo(fs, ip->i_number)) = *ip->i_din2;
6444
(void) inodedep_lookup(mp, ip->i_number, dflags, &inodedep);
6445
if ((inodedep->id_state & IOSTARTED) != 0)
6446
panic("softdep_setup_freeblocks: inode busy");
6448
* Add the freeblks structure to the list of operations that
6449
* must await the zero'ed inode being written to disk. If we
6450
* still have a bitmap dependency (needj), then the inode
6451
* has never been written to disk, so we can process the
6452
* freeblks below once we have deleted the dependencies.
6455
WORKLIST_INSERT(&bp->b_dep, &freeblks->fb_list);
6457
freeblks->fb_state |= COMPLETE;
6458
if ((flags & IO_NORMAL) != 0) {
6459
TAILQ_FOREACH_SAFE(adp, &inodedep->id_inoupdt, ad_next, adpn) {
6460
if (adp->ad_offset > iboff)
6461
cancel_allocdirect(&inodedep->id_inoupdt, adp,
6464
* Truncate the allocdirect. We could eliminate
6465
* or modify journal records as well.
6467
else if (adp->ad_offset == iboff && frags)
6468
adp->ad_newsize = frags;
6471
if ((flags & IO_EXT) != 0)
6472
while ((adp = TAILQ_FIRST(&inodedep->id_extupdt)) != 0)
6473
cancel_allocdirect(&inodedep->id_extupdt, adp,
6476
* Scan the bufwait list for newblock dependencies that will never
6479
LIST_FOREACH_SAFE(wk, &inodedep->id_bufwait, wk_list, wkn) {
6480
if (wk->wk_type != D_ALLOCDIRECT)
6482
adp = WK_ALLOCDIRECT(wk);
6483
if (((flags & IO_NORMAL) != 0 && (adp->ad_offset > iboff)) ||
6484
((flags & IO_EXT) != 0 && (adp->ad_state & EXTDATA))) {
6485
cancel_jfreeblk(freeblks, adp->ad_newblkno);
6486
cancel_newblk(WK_NEWBLK(wk), NULL, &freeblks->fb_jwork);
6487
WORKLIST_INSERT(&freeblks->fb_freeworkhd, wk);
6493
LIST_FOREACH(jblkdep, &freeblks->fb_jblkdephd, jb_deps)
6494
add_to_journal(&jblkdep->jb_list);
6498
* Truncate dependency structures beyond length.
6500
trunc_dependencies(ip, freeblks, lastlbn, frags, flags);
6502
* This is only set when we need to allocate a fragment because
6503
* none existed at the end of a frag-sized file. It handles only
6504
* allocating a new, zero filled block.
6507
ip->i_size = length - lastoff;
6508
DIP_SET(ip, i_size, ip->i_size);
6509
error = UFS_BALLOC(vp, length - 1, 1, cred, BA_CLRBUF, &bp);
6511
softdep_error("softdep_journal_freeblks", error);
6514
ip->i_size = length;
6515
DIP_SET(ip, i_size, length);
6516
ip->i_flag |= IN_CHANGE | IN_UPDATE;
6517
allocbuf(bp, frags);
6520
} else if (lastoff != 0 && vp->v_type != VDIR) {
6524
* Zero the end of a truncated frag or block.
6526
size = sblksize(fs, length, lastlbn);
6527
error = bread(vp, lastlbn, size, cred, &bp);
6529
softdep_error("softdep_journal_freeblks", error);
6532
bzero((char *)bp->b_data + lastoff, size - lastoff);
6537
inodedep_lookup(mp, ip->i_number, dflags, &inodedep);
6538
TAILQ_INSERT_TAIL(&inodedep->id_freeblklst, freeblks, fb_next);
6539
freeblks->fb_state |= DEPCOMPLETE | ONDEPLIST;
6541
* We zero earlier truncations so they don't erroneously
6544
if (freeblks->fb_len == 0 && (flags & IO_NORMAL) != 0)
6545
TAILQ_FOREACH(fbn, &inodedep->id_freeblklst, fb_next)
6547
if ((freeblks->fb_state & ALLCOMPLETE) == ALLCOMPLETE &&
6548
LIST_EMPTY(&freeblks->fb_jblkdephd))
6549
freeblks->fb_state |= INPROGRESS;
6554
handle_workitem_freeblocks(freeblks, 0);
6555
trunc_pages(ip, length, extblocks, flags);
6560
* Flush a JOP_SYNC to the journal.
6563
softdep_journal_fsync(ip)
6566
struct jfsync *jfsync;
6568
if ((ip->i_flag & IN_TRUNCATED) == 0)
6570
ip->i_flag &= ~IN_TRUNCATED;
6571
jfsync = malloc(sizeof(*jfsync), M_JFSYNC, M_SOFTDEP_FLAGS | M_ZERO);
6572
workitem_alloc(&jfsync->jfs_list, D_JFSYNC, UFSTOVFS(ip->i_ump));
6573
jfsync->jfs_size = ip->i_size;
6574
jfsync->jfs_ino = ip->i_number;
6576
add_to_journal(&jfsync->jfs_list);
6577
jwait(&jfsync->jfs_list, MNT_WAIT);
2721
7629
* to the number of blocks allocated for the file) are also
2722
7630
* performed in this function.
2725
7633
handle_workitem_freeblocks(freeblks, flags)
2726
7634
struct freeblks *freeblks;
7637
struct freework *freework;
7638
struct newblk *newblk;
7639
struct allocindir *aip;
7640
struct ufsmount *ump;
7641
struct worklist *wk;
7643
KASSERT(LIST_EMPTY(&freeblks->fb_jblkdephd),
7644
("handle_workitem_freeblocks: Journal entries not written."));
7645
ump = VFSTOUFS(freeblks->fb_list.wk_mp);
7647
while ((wk = LIST_FIRST(&freeblks->fb_freeworkhd)) != NULL) {
7648
WORKLIST_REMOVE(wk);
7649
switch (wk->wk_type) {
7651
wk->wk_state |= COMPLETE;
7652
add_to_worklist(wk, 0);
7656
free_newblk(WK_NEWBLK(wk));
7660
aip = WK_ALLOCINDIR(wk);
7662
if (aip->ai_state & DELAYEDFREE) {
7664
freework = newfreework(ump, freeblks, NULL,
7665
aip->ai_lbn, aip->ai_newblkno,
7666
ump->um_fs->fs_frag, 0, 0);
7669
newblk = WK_NEWBLK(wk);
7670
if (newblk->nb_jnewblk) {
7671
freework->fw_jnewblk = newblk->nb_jnewblk;
7672
newblk->nb_jnewblk->jn_dep = &freework->fw_list;
7673
newblk->nb_jnewblk = NULL;
7675
free_newblk(newblk);
7679
freework = WK_FREEWORK(wk);
7680
if (freework->fw_lbn <= -NDADDR)
7681
handle_workitem_indirblk(freework);
7683
freework_freeblock(freework);
7686
panic("handle_workitem_freeblocks: Unknown type %s",
7687
TYPENAME(wk->wk_type));
7690
if (freeblks->fb_ref != 0) {
7691
freeblks->fb_state &= ~INPROGRESS;
7692
wake_worklist(&freeblks->fb_list);
7697
return handle_complete_freeblocks(freeblks, flags);
7702
* Handle completion of block free via truncate. This allows fs_pending
7703
* to track the actual free block count more closely than if we only updated
7704
* it at the end. We must be careful to handle cases where the block count
7705
* on free was incorrect.
7708
freeblks_free(ump, freeblks, blocks)
7709
struct ufsmount *ump;
7710
struct freeblks *freeblks;
7714
ufs2_daddr_t remain;
7717
remain = -freeblks->fb_chkcnt;
7718
freeblks->fb_chkcnt += blocks;
7720
if (remain < blocks)
7723
fs->fs_pendingblocks -= blocks;
7729
* Once all of the freework workitems are complete we can retire the
7730
* freeblocks dependency and any journal work awaiting completion. This
7731
* can not be called until all other dependencies are stable on disk.
7734
handle_complete_freeblocks(freeblks, flags)
7735
struct freeblks *freeblks;
7738
struct inodedep *inodedep;
2729
7739
struct inode *ip;
2730
7740
struct vnode *vp;
2732
7742
struct ufsmount *ump;
2733
int i, nblocks, level, bsize;
2734
ufs2_daddr_t bn, blocksreleased = 0;
2735
int error, allerror = 0;
2736
ufs_lbn_t baselbns[NIADDR], tmpval;
2737
int fs_pendingblocks;
2739
7745
ump = VFSTOUFS(freeblks->fb_list.wk_mp);
2740
7746
fs = ump->um_fs;
2741
fs_pendingblocks = 0;
2743
baselbns[0] = NDADDR;
2744
for (i = 1; i < NIADDR; i++) {
2745
tmpval *= NINDIR(fs);
2746
baselbns[i] = baselbns[i - 1] + tmpval;
2748
nblocks = btodb(fs->fs_bsize);
2751
* Release all extended attribute blocks or frags.
2753
if (freeblks->fb_oldextsize > 0) {
2754
for (i = (NXADDR - 1); i >= 0; i--) {
2755
if ((bn = freeblks->fb_eblks[i]) == 0)
2757
bsize = sblksize(fs, freeblks->fb_oldextsize, i);
2758
ffs_blkfree(ump, fs, freeblks->fb_devvp, bn, bsize,
2759
freeblks->fb_previousinum);
2760
blocksreleased += btodb(bsize);
2764
* Release all data blocks or frags.
2766
if (freeblks->fb_oldsize > 0) {
2768
* Indirect blocks first.
2770
for (level = (NIADDR - 1); level >= 0; level--) {
2771
if ((bn = freeblks->fb_iblks[level]) == 0)
2773
if ((error = indir_trunc(freeblks, fsbtodb(fs, bn),
2774
level, baselbns[level], &blocksreleased)) != 0)
2776
ffs_blkfree(ump, fs, freeblks->fb_devvp, bn,
2777
fs->fs_bsize, freeblks->fb_previousinum);
2778
fs_pendingblocks += nblocks;
2779
blocksreleased += nblocks;
2782
* All direct blocks or frags.
2784
for (i = (NDADDR - 1); i >= 0; i--) {
2785
if ((bn = freeblks->fb_dblks[i]) == 0)
2787
bsize = sblksize(fs, freeblks->fb_oldsize, i);
2788
ffs_blkfree(ump, fs, freeblks->fb_devvp, bn, bsize,
2789
freeblks->fb_previousinum);
2790
fs_pendingblocks += btodb(bsize);
2791
blocksreleased += btodb(bsize);
2795
fs->fs_pendingblocks -= fs_pendingblocks;
2798
* If we still have not finished background cleanup, then check
2799
* to see if the block count needs to be adjusted.
2801
if (freeblks->fb_chkcnt != blocksreleased &&
2802
(fs->fs_flags & FS_UNCLEAN) != 0 &&
2803
ffs_vgetf(freeblks->fb_list.wk_mp, freeblks->fb_previousinum,
2804
(flags & LK_NOWAIT) | LK_EXCLUSIVE, &vp, FFSV_FORCEINSMQ)
7747
flags = LK_EXCLUSIVE | flags;
7748
spare = freeblks->fb_chkcnt;
7751
* If we did not release the expected number of blocks we may have
7752
* to adjust the inode block count here. Only do so if it wasn't
7753
* a truncation to zero and the modrev still matches.
7755
if (spare && freeblks->fb_len != 0) {
7756
if (ffs_vgetf(freeblks->fb_list.wk_mp, freeblks->fb_inum,
7757
flags, &vp, FFSV_FORCEINSMQ) != 0)
2807
DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + \
2808
freeblks->fb_chkcnt - blocksreleased);
2809
ip->i_flag |= IN_CHANGE;
7760
if (DIP(ip, i_modrev) == freeblks->fb_modrev) {
7761
DIP_SET(ip, i_blocks, DIP(ip, i_blocks) - spare);
7762
ip->i_flag |= IN_CHANGE;
7764
* We must wait so this happens before the
7765
* journal is reclaimed.
2814
if (freeblks->fb_chkcnt != blocksreleased &&
2815
((fs->fs_flags & FS_UNCLEAN) == 0 || (flags & LK_NOWAIT) != 0))
2816
printf("handle_workitem_freeblocks: block count\n");
2818
softdep_error("handle_workitem_freeblks", allerror);
2819
#endif /* INVARIANTS */
7773
fs->fs_pendingblocks += spare;
7779
quotaadj(freeblks->fb_quota, ump, -spare);
7780
quotarele(freeblks->fb_quota);
2821
7782
ACQUIRE_LOCK(&lk);
7783
if (freeblks->fb_state & ONDEPLIST) {
7784
inodedep_lookup(freeblks->fb_list.wk_mp, freeblks->fb_inum,
7786
TAILQ_REMOVE(&inodedep->id_freeblklst, freeblks, fb_next);
7787
freeblks->fb_state &= ~ONDEPLIST;
7788
if (TAILQ_EMPTY(&inodedep->id_freeblklst))
7789
free_inodedep(inodedep);
7792
* All of the freeblock deps must be complete prior to this call
7793
* so it's now safe to complete earlier outstanding journal entries.
7795
handle_jwork(&freeblks->fb_jwork);
2822
7796
WORKITEM_FREE(freeblks, D_FREEBLKS);
2824
7797
FREE_LOCK(&lk);
2828
* Release blocks associated with the inode ip and stored in the indirect
7802
* Release blocks associated with the freeblks and stored in the indirect
2829
7803
* block dbn. If level is greater than SINGLE, the block is an indirect block
2830
7804
* and recursive calls to indirtrunc must be used to cleanse other indirect
7807
* This handles partial and complete truncation of blocks. Partial is noted
7808
* with goingaway == 0. In this case the freework is completed after the
7809
* zero'd indirects are written to disk. For full truncation the freework
7810
* is completed after the block is freed.
2834
indir_trunc(freeblks, dbn, level, lbn, countp)
2835
struct freeblks *freeblks;
7813
indir_trunc(freework, dbn, lbn)
7814
struct freework *freework;
2836
7815
ufs2_daddr_t dbn;
2839
ufs2_daddr_t *countp;
7818
struct freework *nfreework;
7819
struct workhead wkhd;
7820
struct freeblks *freeblks;
2841
7821
struct buf *bp;
2843
struct worklist *wk;
2844
7823
struct indirdep *indirdep;
2845
7824
struct ufsmount *ump;
2846
7825
ufs1_daddr_t *bap1 = 0;
2847
ufs2_daddr_t nb, *bap2 = 0;
7826
ufs2_daddr_t nb, nnb, *bap2 = 0;
7827
ufs_lbn_t lbnadd, nlbn;
2849
7828
int i, nblocks, ufs1fmt;
2850
int error, allerror = 0;
2851
int fs_pendingblocks;
7836
freeblks = freework->fw_freeblks;
2853
7837
ump = VFSTOUFS(freeblks->fb_list.wk_mp);
2854
7838
fs = ump->um_fs;
2855
fs_pendingblocks = 0;
2857
for (i = level; i > 0; i--)
2858
lbnadd *= NINDIR(fs);
2860
* Get buffer of block pointers to be freed. This routine is not
2861
* called until the zero'ed inode has been written, so it is safe
2862
* to free blocks as they are encountered. Because the inode has
2863
* been zero'ed, calls to bmap on these blocks will fail. So, we
2864
* have to use the on-disk address and the block device for the
2865
* filesystem to look them up. If the file was deleted before its
2866
* indirect blocks were all written to disk, the routine that set
2867
* us up (deallocate_dependencies) will have arranged to leave
2868
* a complete copy of the indirect block in memory for our use.
2869
* Otherwise we have to read the blocks in from the disk.
7840
* Get buffer of block pointers to be freed. There are three cases:
7842
* 1) Partial truncate caches the indirdep pointer in the freework
7843
* which provides us a back copy to the save bp which holds the
7844
* pointers we want to clear. When this completes the zero
7845
* pointers are written to the real copy.
7846
* 2) The indirect is being completely truncated, cancel_indirdep()
7847
* eliminated the real copy and placed the indirdep on the saved
7848
* copy. The indirdep and buf are discarded when this completes.
7849
* 3) The indirect was not in memory, we read a copy off of the disk
7850
* using the devvp and drop and invalidate the buffer when we're
2872
bp = getblk(freeblks->fb_devvp, dbn, (int)fs->fs_bsize, 0, 0,
2875
bp = incore(&freeblks->fb_devvp->v_bufobj, dbn);
7855
if (freework->fw_indir != NULL) {
7857
indirdep = freework->fw_indir;
7858
bp = indirdep->ir_savebp;
7859
if (bp == NULL || bp->b_blkno != dbn)
7860
panic("indir_trunc: Bad saved buf %p blkno %jd",
7862
} else if ((bp = incore(&freeblks->fb_devvp->v_bufobj, dbn)) != NULL) {
7864
* The lock prevents the buf dep list from changing and
7865
* indirects on devvp should only ever have one dependency.
7867
indirdep = WK_INDIRDEP(LIST_FIRST(&bp->b_dep));
7868
if (indirdep == NULL || (indirdep->ir_state & GOINGAWAY) == 0)
7869
panic("indir_trunc: Bad indirdep %p from buf %p",
7871
} else if (bread(freeblks->fb_devvp, dbn, (int)fs->fs_bsize,
7872
NOCRED, &bp) != 0) {
2877
7876
ACQUIRE_LOCK(&lk);
2878
if (bp != NULL && (wk = LIST_FIRST(&bp->b_dep)) != NULL) {
2879
if (wk->wk_type != D_INDIRDEP ||
2880
(indirdep = WK_INDIRDEP(wk))->ir_savebp != bp ||
2881
(indirdep->ir_state & GOINGAWAY) == 0)
2882
panic("indir_trunc: lost indirdep");
2883
WORKLIST_REMOVE(wk);
2884
WORKITEM_FREE(indirdep, D_INDIRDEP);
2885
if (!LIST_EMPTY(&bp->b_dep))
2886
panic("indir_trunc: dangling dep");
2887
ump->um_numindirdeps -= 1;
2895
error = bread(freeblks->fb_devvp, dbn, (int)fs->fs_bsize,
7877
/* Protects against a race with complete_trunc_indir(). */
7878
freework->fw_state &= ~INPROGRESS;
2903
* Recursively free indirect blocks.
7880
* If we have an indirdep we need to enforce the truncation order
7881
* and discard it when it is complete.
7884
if (freework != TAILQ_FIRST(&indirdep->ir_trunc) &&
7885
!TAILQ_EMPTY(&indirdep->ir_trunc)) {
7887
* Add the complete truncate to the list on the
7888
* indirdep to enforce in-order processing.
7890
if (freework->fw_indir == NULL)
7891
TAILQ_INSERT_TAIL(&indirdep->ir_trunc,
7897
* If we're goingaway, free the indirdep. Otherwise it will
7898
* linger until the write completes.
7901
free_indirdep(indirdep);
7902
ump->um_numindirdeps -= 1;
7906
/* Initialize pointers depending on block size. */
2905
7907
if (ump->um_fstype == UFS1) {
7908
bap1 = (ufs1_daddr_t *)bp->b_data;
7909
nb = bap1[freework->fw_off];
2907
bap1 = (ufs1_daddr_t *)bp->b_data;
7912
bap2 = (ufs2_daddr_t *)bp->b_data;
7913
nb = bap2[freework->fw_off];
2910
bap2 = (ufs2_daddr_t *)bp->b_data;
7916
level = lbn_level(lbn);
7917
needj = MOUNTEDSUJ(UFSTOVFS(ump)) != 0;
7918
lbnadd = lbn_offset(fs, level);
2912
7919
nblocks = btodb(fs->fs_bsize);
2913
for (i = NINDIR(fs) - 1; i >= 0; i--) {
7920
nfreework = freework;
7924
* Reclaim blocks. Traverses into nested indirect levels and
7925
* arranges for the current level to be freed when subordinates
7926
* are free when journaling.
7928
for (i = freework->fw_off; i < NINDIR(fs); i++, nb = nnb) {
7929
if (i != NINDIR(fs) - 1) {
2920
7939
if (level != 0) {
2921
if ((error = indir_trunc(freeblks, fsbtodb(fs, nb),
2922
level - 1, lbn + (i * lbnadd), countp)) != 0)
7940
nlbn = (lbn + 1) - (i * lbnadd);
7942
nfreework = newfreework(ump, freeblks, freework,
7943
nlbn, nb, fs->fs_frag, 0, 0);
7946
indir_trunc(nfreework, fsbtodb(fs, nb), nlbn);
7948
struct freedep *freedep;
7951
* Attempt to aggregate freedep dependencies for
7952
* all blocks being released to the same CG.
7956
(nnb == 0 || (dtog(fs, nb) != dtog(fs, nnb)))) {
7957
freedep = newfreedep(freework);
7958
WORKLIST_INSERT_UNLOCKED(&wkhd,
7963
"indir_trunc: ino %d blkno %jd size %ld",
7964
freeblks->fb_inum, nb, fs->fs_bsize);
7965
ffs_blkfree(ump, fs, freeblks->fb_devvp, nb,
7966
fs->fs_bsize, freeblks->fb_inum,
7967
freeblks->fb_vtype, &wkhd);
2925
ffs_blkfree(ump, fs, freeblks->fb_devvp, nb, fs->fs_bsize,
2926
freeblks->fb_previousinum);
2927
fs_pendingblocks += nblocks;
2931
fs->fs_pendingblocks -= fs_pendingblocks;
2933
bp->b_flags |= B_INVAL | B_NOCACHE;
7971
bp->b_flags |= B_INVAL | B_NOCACHE;
7976
freedblocks = (nblocks * cnt);
7978
freedblocks += nblocks;
7979
freeblks_free(ump, freeblks, freedblocks);
7981
* If we are journaling set up the ref counts and offset so this
7982
* indirect can be completed when its children are free.
7986
freework->fw_off = i;
7987
freework->fw_ref += freedeps;
7988
freework->fw_ref -= NINDIR(fs) + 1;
7990
freeblks->fb_cgwait += freedeps;
7991
if (freework->fw_ref == 0)
7992
freework_freeblock(freework);
7997
* If we're not journaling we can free the indirect now.
7999
dbn = dbtofsb(fs, dbn);
8001
"indir_trunc 2: ino %d blkno %jd size %ld",
8002
freeblks->fb_inum, dbn, fs->fs_bsize);
8003
ffs_blkfree(ump, fs, freeblks->fb_devvp, dbn, fs->fs_bsize,
8004
freeblks->fb_inum, freeblks->fb_vtype, NULL);
8005
/* Non SUJ softdep does single-threaded truncations. */
8006
if (freework->fw_blkno == dbn) {
8007
freework->fw_state |= ALLCOMPLETE;
8009
handle_written_freework(freework);
2939
* Free an allocindir.
2940
* This routine must be called with splbio interrupts blocked.
8016
* Cancel an allocindir when it is removed via truncation. When bp is not
8017
* NULL the indirect never appeared on disk and is scheduled to be freed
8018
* independently of the indir so we can more easily track journal work.
2943
free_allocindir(aip, inodedep)
8021
cancel_allocindir(aip, bp, freeblks, trunc)
2944
8022
struct allocindir *aip;
2945
struct inodedep *inodedep;
8024
struct freeblks *freeblks;
8027
struct indirdep *indirdep;
2947
8028
struct freefrag *freefrag;
8029
struct newblk *newblk;
2949
mtx_assert(&lk, MA_OWNED);
2950
if ((aip->ai_state & DEPCOMPLETE) == 0)
2951
LIST_REMOVE(aip, ai_deps);
2952
if (aip->ai_state & ONWORKLIST)
2953
WORKLIST_REMOVE(&aip->ai_list);
8031
newblk = (struct newblk *)aip;
2954
8032
LIST_REMOVE(aip, ai_next);
2955
if ((freefrag = aip->ai_freefrag) != NULL) {
8034
* We must eliminate the pointer in bp if it must be freed on its
8035
* own due to partial truncate or pending journal work.
8037
if (bp && (trunc || newblk->nb_jnewblk)) {
8039
* Clear the pointer and mark the aip to be freed
8040
* directly if it never existed on disk.
8042
aip->ai_state |= DELAYEDFREE;
8043
indirdep = aip->ai_indirdep;
8044
if (indirdep->ir_state & UFS1FMT)
8045
((ufs1_daddr_t *)bp->b_data)[aip->ai_offset] = 0;
8047
((ufs2_daddr_t *)bp->b_data)[aip->ai_offset] = 0;
8050
* When truncating the previous pointer will be freed via
8051
* savedbp. Eliminate the freefrag which would dup free.
8053
if (trunc && (freefrag = newblk->nb_freefrag) != NULL) {
8054
newblk->nb_freefrag = NULL;
8055
if (freefrag->ff_jdep)
8057
WK_JFREEFRAG(freefrag->ff_jdep));
8058
jwork_move(&freeblks->fb_jwork, &freefrag->ff_jwork);
8059
WORKITEM_FREE(freefrag, D_FREEFRAG);
8062
* If the journal hasn't been written the jnewblk must be passed
8063
* to the call to ffs_blkfree that reclaims the space. We accomplish
8064
* this by leaving the journal dependency on the newblk to be freed
8065
* when a freework is created in handle_workitem_freeblocks().
8067
cancel_newblk(newblk, NULL, &freeblks->fb_jwork);
8068
WORKLIST_INSERT(&freeblks->fb_freeworkhd, &newblk->nb_list);
8072
* Create the mkdir dependencies for . and .. in a new directory. Link them
8073
* in to a newdirblk so any subsequent additions are tracked properly. The
8074
* caller is responsible for adding the mkdir1 dependency to the journal
8075
* and updating id_mkdiradd. This function returns with lk held.
8077
static struct mkdir *
8078
setup_newdir(dap, newinum, dinum, newdirbp, mkdirp)
8082
struct buf *newdirbp;
8083
struct mkdir **mkdirp;
8085
struct newblk *newblk;
8086
struct pagedep *pagedep;
8087
struct inodedep *inodedep;
8088
struct newdirblk *newdirblk = 0;
8089
struct mkdir *mkdir1, *mkdir2;
8090
struct worklist *wk;
8091
struct jaddref *jaddref;
8094
mp = dap->da_list.wk_mp;
8095
newdirblk = malloc(sizeof(struct newdirblk), M_NEWDIRBLK,
8097
workitem_alloc(&newdirblk->db_list, D_NEWDIRBLK, mp);
8098
LIST_INIT(&newdirblk->db_mkdir);
8099
mkdir1 = malloc(sizeof(struct mkdir), M_MKDIR, M_SOFTDEP_FLAGS);
8100
workitem_alloc(&mkdir1->md_list, D_MKDIR, mp);
8101
mkdir1->md_state = ATTACHED | MKDIR_BODY;
8102
mkdir1->md_diradd = dap;
8103
mkdir1->md_jaddref = NULL;
8104
mkdir2 = malloc(sizeof(struct mkdir), M_MKDIR, M_SOFTDEP_FLAGS);
8105
workitem_alloc(&mkdir2->md_list, D_MKDIR, mp);
8106
mkdir2->md_state = ATTACHED | MKDIR_PARENT;
8107
mkdir2->md_diradd = dap;
8108
mkdir2->md_jaddref = NULL;
8109
if (MOUNTEDSUJ(mp) == 0) {
8110
mkdir1->md_state |= DEPCOMPLETE;
8111
mkdir2->md_state |= DEPCOMPLETE;
8114
* Dependency on "." and ".." being written to disk.
8116
mkdir1->md_buf = newdirbp;
8118
LIST_INSERT_HEAD(&mkdirlisthd, mkdir1, md_mkdirs);
8120
* We must link the pagedep, allocdirect, and newdirblk for
8121
* the initial file page so the pointer to the new directory
8122
* is not written until the directory contents are live and
8123
* any subsequent additions are not marked live until the
8124
* block is reachable via the inode.
8126
if (pagedep_lookup(mp, newdirbp, newinum, 0, 0, &pagedep) == 0)
8127
panic("setup_newdir: lost pagedep");
8128
LIST_FOREACH(wk, &newdirbp->b_dep, wk_list)
8129
if (wk->wk_type == D_ALLOCDIRECT)
8132
panic("setup_newdir: lost allocdirect");
8133
if (pagedep->pd_state & NEWBLOCK)
8134
panic("setup_newdir: NEWBLOCK already set");
8135
newblk = WK_NEWBLK(wk);
8136
pagedep->pd_state |= NEWBLOCK;
8137
pagedep->pd_newdirblk = newdirblk;
8138
newdirblk->db_pagedep = pagedep;
8139
WORKLIST_INSERT(&newblk->nb_newdirblk, &newdirblk->db_list);
8140
WORKLIST_INSERT(&newdirblk->db_mkdir, &mkdir1->md_list);
8142
* Look up the inodedep for the parent directory so that we
8143
* can link mkdir2 into the pending dotdot jaddref or
8144
* the inode write if there is none. If the inode is
8145
* ALLCOMPLETE and no jaddref is present all dependencies have
8146
* been satisfied and mkdir2 can be freed.
8148
inodedep_lookup(mp, dinum, 0, &inodedep);
8149
if (MOUNTEDSUJ(mp)) {
2956
8150
if (inodedep == NULL)
2957
add_to_worklist(&freefrag->ff_list);
2959
WORKLIST_INSERT(&inodedep->id_bufwait,
2960
&freefrag->ff_list);
8151
panic("setup_newdir: Lost parent.");
8152
jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
8154
KASSERT(jaddref != NULL && jaddref->ja_parent == newinum &&
8155
(jaddref->ja_state & MKDIR_PARENT),
8156
("setup_newdir: bad dotdot jaddref %p", jaddref));
8157
LIST_INSERT_HEAD(&mkdirlisthd, mkdir2, md_mkdirs);
8158
mkdir2->md_jaddref = jaddref;
8159
jaddref->ja_mkdir = mkdir2;
8160
} else if (inodedep == NULL ||
8161
(inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) {
8162
dap->da_state &= ~MKDIR_PARENT;
8163
WORKITEM_FREE(mkdir2, D_MKDIR);
8166
LIST_INSERT_HEAD(&mkdirlisthd, mkdir2, md_mkdirs);
8167
WORKLIST_INSERT(&inodedep->id_bufwait, &mkdir2->md_list);
2962
WORKITEM_FREE(aip, D_ALLOCINDIR);
3528
* Called when the effective link count and the reference count
3529
* on an inode drops to zero. At this point there are no names
3530
* referencing the file in the filesystem and no active file
3531
* references. The space associated with the file will be freed
3532
* as soon as the necessary soft dependencies are cleared.
9170
* Attach a sbdep dependency to the superblock buf so that we can keep
9171
* track of the head of the linked list of referenced but unlinked inodes.
3535
softdep_releasefile(ip)
3536
struct inode *ip; /* inode with the zero effective link count */
9174
softdep_setup_sbupdate(ump, fs, bp)
9175
struct ufsmount *ump;
3538
struct inodedep *inodedep;
9179
struct sbdep *sbdep;
9180
struct worklist *wk;
3542
if (ip->i_effnlink > 0)
3543
panic("softdep_releasefile: file still referenced");
3545
* We may be called several times as the on-disk link count
3546
* drops to zero. We only want to account for the space once.
3548
if (ip->i_flag & IN_SPACECOUNTED)
3551
* We have to deactivate a snapshot otherwise copyonwrites may
3552
* add blocks and the cleanup may remove blocks after we have
3553
* tried to account for them.
3555
if ((ip->i_flags & SF_SNAPSHOT) != 0)
3556
ffs_snapremove(ITOV(ip));
3558
* If we are tracking an nlinkdelta, we have to also remember
3559
* whether we accounted for the freed space yet.
9182
if (MOUNTEDSUJ(UFSTOVFS(ump)) == 0)
9184
LIST_FOREACH(wk, &bp->b_dep, wk_list)
9185
if (wk->wk_type == D_SBDEP)
9189
sbdep = malloc(sizeof(struct sbdep), M_SBDEP, M_SOFTDEP_FLAGS);
9190
workitem_alloc(&sbdep->sb_list, D_SBDEP, UFSTOVFS(ump));
9192
sbdep->sb_ump = ump;
3561
9193
ACQUIRE_LOCK(&lk);
3562
if ((inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, 0, &inodedep)))
3563
inodedep->id_state |= SPACECOUNTED;
9194
WORKLIST_INSERT(&bp->b_dep, &sbdep->sb_list);
3564
9195
FREE_LOCK(&lk);
3567
if (fs->fs_magic == FS_UFS2_MAGIC)
3568
extblocks = btodb(fragroundup(fs, ip->i_din2->di_extsize));
3569
UFS_LOCK(ip->i_ump);
3570
ip->i_fs->fs_pendingblocks += DIP(ip, i_blocks) - extblocks;
3571
ip->i_fs->fs_pendinginodes += 1;
3572
UFS_UNLOCK(ip->i_ump);
3573
ip->i_flag |= IN_SPACECOUNTED;
9199
* Return the first unlinked inodedep which is ready to be the head of the
9200
* list. The inodedep and all those after it must have valid next pointers.
9202
static struct inodedep *
9203
first_unlinked_inodedep(ump)
9204
struct ufsmount *ump;
9206
struct inodedep *inodedep;
9207
struct inodedep *idp;
9209
mtx_assert(&lk, MA_OWNED);
9210
for (inodedep = TAILQ_LAST(&ump->softdep_unlinked, inodedeplst);
9211
inodedep; inodedep = idp) {
9212
if ((inodedep->id_state & UNLINKNEXT) == 0)
9214
idp = TAILQ_PREV(inodedep, inodedeplst, id_unlinked);
9215
if (idp == NULL || (idp->id_state & UNLINKNEXT) == 0)
9217
if ((inodedep->id_state & UNLINKPREV) == 0)
9224
* Set the sujfree unlinked head pointer prior to writing a superblock.
9227
initiate_write_sbdep(sbdep)
9228
struct sbdep *sbdep;
9230
struct inodedep *inodedep;
9234
bpfs = sbdep->sb_fs;
9235
fs = sbdep->sb_ump->um_fs;
9236
inodedep = first_unlinked_inodedep(sbdep->sb_ump);
9238
fs->fs_sujfree = inodedep->id_ino;
9239
inodedep->id_state |= UNLINKPREV;
9242
bpfs->fs_sujfree = fs->fs_sujfree;
9246
* After a superblock is written determine whether it must be written again
9247
* due to a changing unlinked list head.
9250
handle_written_sbdep(sbdep, bp)
9251
struct sbdep *sbdep;
9254
struct inodedep *inodedep;
9258
mtx_assert(&lk, MA_OWNED);
9260
mp = UFSTOVFS(sbdep->sb_ump);
9262
* If the superblock doesn't match the in-memory list start over.
9264
inodedep = first_unlinked_inodedep(sbdep->sb_ump);
9265
if ((inodedep && fs->fs_sujfree != inodedep->id_ino) ||
9266
(inodedep == NULL && fs->fs_sujfree != 0)) {
9270
WORKITEM_FREE(sbdep, D_SBDEP);
9271
if (fs->fs_sujfree == 0)
9274
* Now that we have a record of this inode in stable store allow it
9275
* to be written to free up pending work. Inodes may see a lot of
9276
* write activity after they are unlinked which we must not hold up.
9278
for (; inodedep != NULL; inodedep = TAILQ_NEXT(inodedep, id_unlinked)) {
9279
if ((inodedep->id_state & UNLINKLINKS) != UNLINKLINKS)
9280
panic("handle_written_sbdep: Bad inodedep %p (0x%X)",
9281
inodedep, inodedep->id_state);
9282
if (inodedep->id_state & UNLINKONLIST)
9284
inodedep->id_state |= DEPCOMPLETE | UNLINKONLIST;
9291
* Mark an inodedep as unlinked and insert it into the in-memory unlinked list.
9294
unlinked_inodedep(mp, inodedep)
9296
struct inodedep *inodedep;
9298
struct ufsmount *ump;
9300
mtx_assert(&lk, MA_OWNED);
9301
if (MOUNTEDSUJ(mp) == 0)
9304
ump->um_fs->fs_fmod = 1;
9305
if (inodedep->id_state & UNLINKED)
9306
panic("unlinked_inodedep: %p already unlinked\n", inodedep);
9307
inodedep->id_state |= UNLINKED;
9308
TAILQ_INSERT_HEAD(&ump->softdep_unlinked, inodedep, id_unlinked);
9312
* Remove an inodedep from the unlinked inodedep list. This may require
9313
* disk writes if the inode has made it that far.
9316
clear_unlinked_inodedep(inodedep)
9317
struct inodedep *inodedep;
9319
struct ufsmount *ump;
9320
struct inodedep *idp;
9321
struct inodedep *idn;
9329
ump = VFSTOUFS(inodedep->id_list.wk_mp);
9331
ino = inodedep->id_ino;
9334
mtx_assert(&lk, MA_OWNED);
9335
KASSERT((inodedep->id_state & UNLINKED) != 0,
9336
("clear_unlinked_inodedep: inodedep %p not unlinked",
9339
* If nothing has yet been written simply remove us from
9340
* the in memory list and return. This is the most common
9341
* case where handle_workitem_remove() loses the final
9344
if ((inodedep->id_state & UNLINKLINKS) == 0)
9347
* If we have a NEXT pointer and no PREV pointer we can simply
9348
* clear NEXT's PREV and remove ourselves from the list. Be
9349
* careful not to clear PREV if the superblock points at
9352
idn = TAILQ_NEXT(inodedep, id_unlinked);
9353
if ((inodedep->id_state & UNLINKLINKS) == UNLINKNEXT) {
9354
if (idn && fs->fs_sujfree != idn->id_ino)
9355
idn->id_state &= ~UNLINKPREV;
9359
* Here we have an inodedep which is actually linked into
9360
* the list. We must remove it by forcing a write to the
9361
* link before us, whether it be the superblock or an inode.
9362
* Unfortunately the list may change while we're waiting
9363
* on the buf lock for either resource so we must loop until
9364
* we lock the right one. If both the superblock and an
9365
* inode point to this inode we must clear the inode first
9366
* followed by the superblock.
9368
idp = TAILQ_PREV(inodedep, inodedeplst, id_unlinked);
9370
if (idp && (idp->id_state & UNLINKNEXT))
9374
bp = getblk(ump->um_devvp, btodb(fs->fs_sblockloc),
9375
(int)fs->fs_sbsize, 0, 0, 0);
9377
error = bread(ump->um_devvp,
9378
fsbtodb(fs, ino_to_fsba(fs, pino)),
9379
(int)fs->fs_bsize, NOCRED, &bp);
9386
/* If the list has changed restart the loop. */
9387
idp = TAILQ_PREV(inodedep, inodedeplst, id_unlinked);
9389
if (idp && (idp->id_state & UNLINKNEXT))
9392
(inodedep->id_state & UNLINKPREV) != UNLINKPREV) {
9399
idn = TAILQ_NEXT(inodedep, id_unlinked);
9403
* Remove us from the in memory list. After this we cannot
9404
* access the inodedep.
9406
KASSERT((inodedep->id_state & UNLINKED) != 0,
9407
("clear_unlinked_inodedep: inodedep %p not unlinked",
9409
inodedep->id_state &= ~(UNLINKED | UNLINKLINKS | UNLINKONLIST);
9410
TAILQ_REMOVE(&ump->softdep_unlinked, inodedep, id_unlinked);
9413
* The predecessor's next pointer is manually updated here
9414
* so that the NEXT flag is never cleared for an element
9415
* that is in the list.
9418
bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
9419
ffs_oldfscompat_write((struct fs *)bp->b_data, ump);
9420
softdep_setup_sbupdate(ump, (struct fs *)bp->b_data,
9422
} else if (fs->fs_magic == FS_UFS1_MAGIC)
9423
((struct ufs1_dinode *)bp->b_data +
9424
ino_to_fsbo(fs, pino))->di_freelink = nino;
9426
((struct ufs2_dinode *)bp->b_data +
9427
ino_to_fsbo(fs, pino))->di_freelink = nino;
9429
* If the bwrite fails we have no recourse to recover. The
9430
* filesystem is corrupted already.
9435
* If the superblock pointer still needs to be cleared force
9438
if (fs->fs_sujfree == ino) {
9440
bp = getblk(ump->um_devvp, btodb(fs->fs_sblockloc),
9441
(int)fs->fs_sbsize, 0, 0, 0);
9442
bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
9443
ffs_oldfscompat_write((struct fs *)bp->b_data, ump);
9444
softdep_setup_sbupdate(ump, (struct fs *)bp->b_data,
9450
if (fs->fs_sujfree != ino)
9452
panic("clear_unlinked_inodedep: Failed to clear free head");
9454
if (inodedep->id_ino == fs->fs_sujfree)
9455
panic("clear_unlinked_inodedep: Freeing head of free list");
9456
inodedep->id_state &= ~(UNLINKED | UNLINKLINKS | UNLINKONLIST);
9457
TAILQ_REMOVE(&ump->softdep_unlinked, inodedep, id_unlinked);
3577
9462
* This workitem decrements the inode's link count.
3578
9463
* If the link count reaches zero, the file is removed.
3581
handle_workitem_remove(dirrem, xp)
9466
handle_workitem_remove(dirrem, flags)
3582
9467
struct dirrem *dirrem;
3585
struct thread *td = curthread;
3586
9470
struct inodedep *inodedep;
9471
struct workhead dotdotwk;
9472
struct worklist *wk;
9473
struct ufsmount *ump;
3587
9475
struct vnode *vp;
3588
9476
struct inode *ip;
3592
if ((vp = xp) == NULL &&
3593
(error = ffs_vgetf(dirrem->dm_list.wk_mp,
3594
dirrem->dm_oldinum, LK_EXCLUSIVE, &vp, FFSV_FORCEINSMQ)) != 0) {
3595
softdep_error("handle_workitem_remove: vget", error);
9479
if (dirrem->dm_state & ONWORKLIST)
9480
panic("handle_workitem_remove: dirrem %p still on worklist",
9482
oldinum = dirrem->dm_oldinum;
9483
mp = dirrem->dm_list.wk_mp;
9485
flags |= LK_EXCLUSIVE;
9486
if (ffs_vgetf(mp, oldinum, flags, &vp, FFSV_FORCEINSMQ) != 0)
3599
9489
ACQUIRE_LOCK(&lk);
3600
if ((inodedep_lookup(dirrem->dm_list.wk_mp,
3601
dirrem->dm_oldinum, 0, &inodedep)) == 0)
9490
if ((inodedep_lookup(mp, oldinum, 0, &inodedep)) == 0)
3602
9491
panic("handle_workitem_remove: lost inodedep");
9492
if (dirrem->dm_state & ONDEPLIST)
9493
LIST_REMOVE(dirrem, dm_inonext);
9494
KASSERT(LIST_EMPTY(&dirrem->dm_jremrefhd),
9495
("handle_workitem_remove: Journal entries not written."));
9498
* Move all dependencies waiting on the remove to complete
9499
* from the dirrem to the inode inowait list to be completed
9500
* after the inode has been updated and written to disk. Any
9501
* marked MKDIR_PARENT are saved to be completed when the .. ref
9504
LIST_INIT(&dotdotwk);
9505
while ((wk = LIST_FIRST(&dirrem->dm_jwork)) != NULL) {
9506
WORKLIST_REMOVE(wk);
9507
if (wk->wk_state & MKDIR_PARENT) {
9508
wk->wk_state &= ~MKDIR_PARENT;
9509
WORKLIST_INSERT(&dotdotwk, wk);
9512
WORKLIST_INSERT(&inodedep->id_inowait, wk);
9514
LIST_SWAP(&dirrem->dm_jwork, &dotdotwk, worklist, wk_list);
3604
9516
* Normal file deletion.
4236
10236
* postpone fsck, we are stuck with this argument.
4238
10238
for (; adp; adp = TAILQ_NEXT(adp, ad_next))
4239
dp->di_ib[adp->ad_lbn - NDADDR] = 0;
10239
dp->di_ib[adp->ad_offset - NDADDR] = 0;
10243
* Cancel an indirdep as a result of truncation. Release all of the
10244
* children allocindirs and place their journal work on the appropriate
10248
cancel_indirdep(indirdep, bp, freeblks)
10249
struct indirdep *indirdep;
10251
struct freeblks *freeblks;
10253
struct allocindir *aip;
10256
* None of the indirect pointers will ever be visible,
10257
* so they can simply be tossed. GOINGAWAY ensures
10258
* that allocated pointers will be saved in the buffer
10259
* cache until they are freed. Note that they will
10260
* only be able to be found by their physical address
10261
* since the inode mapping the logical address will
10262
* be gone. The save buffer used for the safe copy
10263
* was allocated in setup_allocindir_phase2 using
10264
* the physical address so it could be used for this
10265
* purpose. Hence we swap the safe copy with the real
10266
* copy, allowing the safe copy to be freed and holding
10267
* on to the real copy for later use in indir_trunc.
10269
if (indirdep->ir_state & GOINGAWAY)
10270
panic("cancel_indirdep: already gone");
10271
if ((indirdep->ir_state & DEPCOMPLETE) == 0) {
10272
indirdep->ir_state |= DEPCOMPLETE;
10273
LIST_REMOVE(indirdep, ir_next);
10275
indirdep->ir_state |= GOINGAWAY;
10276
VFSTOUFS(indirdep->ir_list.wk_mp)->um_numindirdeps += 1;
10278
* Pass in bp for blocks still have journal writes
10279
* pending so we can cancel them on their own.
10281
while ((aip = LIST_FIRST(&indirdep->ir_deplisthd)) != 0)
10282
cancel_allocindir(aip, bp, freeblks, 0);
10283
while ((aip = LIST_FIRST(&indirdep->ir_donehd)) != 0)
10284
cancel_allocindir(aip, NULL, freeblks, 0);
10285
while ((aip = LIST_FIRST(&indirdep->ir_writehd)) != 0)
10286
cancel_allocindir(aip, NULL, freeblks, 0);
10287
while ((aip = LIST_FIRST(&indirdep->ir_completehd)) != 0)
10288
cancel_allocindir(aip, NULL, freeblks, 0);
10290
* If there are pending partial truncations we need to keep the
10291
* old block copy around until they complete. This is because
10292
* the current b_data is not a perfect superset of the available
10295
if (TAILQ_EMPTY(&indirdep->ir_trunc))
10296
bcopy(bp->b_data, indirdep->ir_savebp->b_data, bp->b_bcount);
10298
bcopy(bp->b_data, indirdep->ir_saveddata, bp->b_bcount);
10299
WORKLIST_REMOVE(&indirdep->ir_list);
10300
WORKLIST_INSERT(&indirdep->ir_savebp->b_dep, &indirdep->ir_list);
10301
indirdep->ir_bp = NULL;
10302
indirdep->ir_freeblks = freeblks;
10306
* Free an indirdep once it no longer has new pointers to track.
10309
free_indirdep(indirdep)
10310
struct indirdep *indirdep;
10313
KASSERT(TAILQ_EMPTY(&indirdep->ir_trunc),
10314
("free_indirdep: Indir trunc list not empty."));
10315
KASSERT(LIST_EMPTY(&indirdep->ir_completehd),
10316
("free_indirdep: Complete head not empty."));
10317
KASSERT(LIST_EMPTY(&indirdep->ir_writehd),
10318
("free_indirdep: write head not empty."));
10319
KASSERT(LIST_EMPTY(&indirdep->ir_donehd),
10320
("free_indirdep: done head not empty."));
10321
KASSERT(LIST_EMPTY(&indirdep->ir_deplisthd),
10322
("free_indirdep: deplist head not empty."));
10323
KASSERT((indirdep->ir_state & DEPCOMPLETE),
10324
("free_indirdep: %p still on newblk list.", indirdep));
10325
KASSERT(indirdep->ir_saveddata == NULL,
10326
("free_indirdep: %p still has saved data.", indirdep));
10327
if (indirdep->ir_state & ONWORKLIST)
10328
WORKLIST_REMOVE(&indirdep->ir_list);
10329
WORKITEM_FREE(indirdep, D_INDIRDEP);
10333
* Called before a write to an indirdep. This routine is responsible for
10334
* rolling back pointers to a safe state which includes only those
10335
* allocindirs which have been completed.
10338
initiate_write_indirdep(indirdep, bp)
10339
struct indirdep *indirdep;
10343
indirdep->ir_state |= IOSTARTED;
10344
if (indirdep->ir_state & GOINGAWAY)
10345
panic("disk_io_initiation: indirdep gone");
10347
* If there are no remaining dependencies, this will be writing
10348
* the real pointers.
10350
if (LIST_EMPTY(&indirdep->ir_deplisthd) &&
10351
TAILQ_EMPTY(&indirdep->ir_trunc))
10354
* Replace up-to-date version with safe version.
10356
if (indirdep->ir_saveddata == NULL) {
10358
indirdep->ir_saveddata = malloc(bp->b_bcount, M_INDIRDEP,
10362
indirdep->ir_state &= ~ATTACHED;
10363
indirdep->ir_state |= UNDONE;
10364
bcopy(bp->b_data, indirdep->ir_saveddata, bp->b_bcount);
10365
bcopy(indirdep->ir_savebp->b_data, bp->b_data,
10370
* Called when an inode has been cleared in a cg bitmap. This finally
10371
* eliminates any canceled jaddrefs
10374
softdep_setup_inofree(mp, bp, ino, wkhd)
10378
struct workhead *wkhd;
10380
struct worklist *wk, *wkn;
10381
struct inodedep *inodedep;
10387
fs = VFSTOUFS(mp)->um_fs;
10388
cgp = (struct cg *)bp->b_data;
10389
inosused = cg_inosused(cgp);
10390
if (isset(inosused, ino % fs->fs_ipg))
10391
panic("softdep_setup_inofree: inode %d not freed.", ino);
10392
if (inodedep_lookup(mp, ino, 0, &inodedep))
10393
panic("softdep_setup_inofree: ino %d has existing inodedep %p",
10396
LIST_FOREACH_SAFE(wk, wkhd, wk_list, wkn) {
10397
if (wk->wk_type != D_JADDREF)
10399
WORKLIST_REMOVE(wk);
10401
* We can free immediately even if the jaddref
10402
* isn't attached in a background write as now
10403
* the bitmaps are reconciled.
10405
wk->wk_state |= COMPLETE | ATTACHED;
10406
free_jaddref(WK_JADDREF(wk));
10408
jwork_move(&bp->b_dep, wkhd);
10415
* Called via ffs_blkfree() after a set of frags has been cleared from a cg
10416
* map. Any dependencies waiting for the write to clear are added to the
10417
* buf's list and any jnewblks that are being canceled are discarded
10421
softdep_setup_blkfree(mp, bp, blkno, frags, wkhd)
10424
ufs2_daddr_t blkno;
10426
struct workhead *wkhd;
10428
struct bmsafemap *bmsafemap;
10429
struct jnewblk *jnewblk;
10430
struct worklist *wk;
10435
ufs2_daddr_t jstart;
10443
"softdep_setup_blkfree: blkno %jd frags %d wk head %p",
10444
blkno, frags, wkhd);
10447
/* Lookup the bmsafemap so we track when it is dirty. */
10448
fs = VFSTOUFS(mp)->um_fs;
10449
bmsafemap = bmsafemap_lookup(mp, bp, dtog(fs, blkno), NULL);
10451
* Detach any jnewblks which have been canceled. They must linger
10452
* until the bitmap is cleared again by ffs_blkfree() to prevent
10453
* an unjournaled allocation from hitting the disk.
10456
while ((wk = LIST_FIRST(wkhd)) != NULL) {
10458
"softdep_setup_blkfree: blkno %jd wk type %d",
10459
blkno, wk->wk_type);
10460
WORKLIST_REMOVE(wk);
10461
if (wk->wk_type != D_JNEWBLK) {
10462
WORKLIST_INSERT(&bmsafemap->sm_freehd, wk);
10465
jnewblk = WK_JNEWBLK(wk);
10466
KASSERT(jnewblk->jn_state & GOINGAWAY,
10467
("softdep_setup_blkfree: jnewblk not canceled."));
10470
* Assert that this block is free in the bitmap
10471
* before we discard the jnewblk.
10473
cgp = (struct cg *)bp->b_data;
10474
blksfree = cg_blksfree(cgp);
10475
bno = dtogd(fs, jnewblk->jn_blkno);
10476
for (i = jnewblk->jn_oldfrags;
10477
i < jnewblk->jn_frags; i++) {
10478
if (isset(blksfree, bno + i))
10480
panic("softdep_setup_blkfree: not free");
10484
* Even if it's not attached we can free immediately
10485
* as the new bitmap is correct.
10487
wk->wk_state |= COMPLETE | ATTACHED;
10488
free_jnewblk(jnewblk);
10494
* Assert that we are not freeing a block which has an outstanding
10495
* allocation dependency.
10497
fs = VFSTOUFS(mp)->um_fs;
10498
bmsafemap = bmsafemap_lookup(mp, bp, dtog(fs, blkno), NULL);
10499
end = blkno + frags;
10500
LIST_FOREACH(jnewblk, &bmsafemap->sm_jnewblkhd, jn_deps) {
10502
* Don't match against blocks that will be freed when the
10503
* background write is done.
10505
if ((jnewblk->jn_state & (ATTACHED | COMPLETE | DEPCOMPLETE)) ==
10506
(COMPLETE | DEPCOMPLETE))
10508
jstart = jnewblk->jn_blkno + jnewblk->jn_oldfrags;
10509
jend = jnewblk->jn_blkno + jnewblk->jn_frags;
10510
if ((blkno >= jstart && blkno < jend) ||
10511
(end > jstart && end <= jend)) {
10512
printf("state 0x%X %jd - %d %d dep %p\n",
10513
jnewblk->jn_state, jnewblk->jn_blkno,
10514
jnewblk->jn_oldfrags, jnewblk->jn_frags,
10516
panic("softdep_setup_blkfree: "
10517
"%jd-%jd(%d) overlaps with %jd-%jd",
10518
blkno, end, frags, jstart, jend);
10526
* Revert a block allocation when the journal record that describes it
10527
* is not yet written.
10530
jnewblk_rollback(jnewblk, fs, cgp, blksfree)
10531
struct jnewblk *jnewblk;
10536
ufs1_daddr_t fragno;
10542
cgbno = dtogd(fs, jnewblk->jn_blkno);
10544
* We have to test which frags need to be rolled back. We may
10545
* be operating on a stale copy when doing background writes.
10547
for (i = jnewblk->jn_oldfrags; i < jnewblk->jn_frags; i++)
10548
if (isclr(blksfree, cgbno + i))
10553
* This is mostly ffs_blkfree() sans some validation and
10554
* superblock updates.
10556
if (frags == fs->fs_frag) {
10557
fragno = fragstoblks(fs, cgbno);
10558
ffs_setblock(fs, blksfree, fragno);
10559
ffs_clusteracct(fs, cgp, fragno, 1);
10560
cgp->cg_cs.cs_nbfree++;
10562
cgbno += jnewblk->jn_oldfrags;
10563
bbase = cgbno - fragnum(fs, cgbno);
10564
/* Decrement the old frags. */
10565
blk = blkmap(fs, blksfree, bbase);
10566
ffs_fragacct(fs, blk, cgp->cg_frsum, -1);
10567
/* Deallocate the fragment */
10568
for (i = 0; i < frags; i++)
10569
setbit(blksfree, cgbno + i);
10570
cgp->cg_cs.cs_nffree += frags;
10571
/* Add back in counts associated with the new frags */
10572
blk = blkmap(fs, blksfree, bbase);
10573
ffs_fragacct(fs, blk, cgp->cg_frsum, 1);
10574
/* If a complete block has been reassembled, account for it. */
10575
fragno = fragstoblks(fs, bbase);
10576
if (ffs_isblock(fs, blksfree, fragno)) {
10577
cgp->cg_cs.cs_nffree -= fs->fs_frag;
10578
ffs_clusteracct(fs, cgp, fragno, 1);
10579
cgp->cg_cs.cs_nbfree++;
10583
jnewblk->jn_state &= ~ATTACHED;
10584
jnewblk->jn_state |= UNDONE;
10590
initiate_write_bmsafemap(bmsafemap, bp)
10591
struct bmsafemap *bmsafemap;
10592
struct buf *bp; /* The cg block. */
10594
struct jaddref *jaddref;
10595
struct jnewblk *jnewblk;
10602
if (bmsafemap->sm_state & IOSTARTED)
10604
bmsafemap->sm_state |= IOSTARTED;
10606
* Clear any inode allocations which are pending journal writes.
10608
if (LIST_FIRST(&bmsafemap->sm_jaddrefhd) != NULL) {
10609
cgp = (struct cg *)bp->b_data;
10610
fs = VFSTOUFS(bmsafemap->sm_list.wk_mp)->um_fs;
10611
inosused = cg_inosused(cgp);
10612
LIST_FOREACH(jaddref, &bmsafemap->sm_jaddrefhd, ja_bmdeps) {
10613
ino = jaddref->ja_ino % fs->fs_ipg;
10614
if (isset(inosused, ino)) {
10615
if ((jaddref->ja_mode & IFMT) == IFDIR)
10616
cgp->cg_cs.cs_ndir--;
10617
cgp->cg_cs.cs_nifree++;
10618
clrbit(inosused, ino);
10619
jaddref->ja_state &= ~ATTACHED;
10620
jaddref->ja_state |= UNDONE;
10623
panic("initiate_write_bmsafemap: inode %d "
10624
"marked free", jaddref->ja_ino);
10628
* Clear any block allocations which are pending journal writes.
10630
if (LIST_FIRST(&bmsafemap->sm_jnewblkhd) != NULL) {
10631
cgp = (struct cg *)bp->b_data;
10632
fs = VFSTOUFS(bmsafemap->sm_list.wk_mp)->um_fs;
10633
blksfree = cg_blksfree(cgp);
10634
LIST_FOREACH(jnewblk, &bmsafemap->sm_jnewblkhd, jn_deps) {
10635
if (jnewblk_rollback(jnewblk, fs, cgp, blksfree))
10637
panic("initiate_write_bmsafemap: block %jd "
10638
"marked free", jnewblk->jn_blkno);
10642
* Move allocation lists to the written lists so they can be
10643
* cleared once the block write is complete.
10645
LIST_SWAP(&bmsafemap->sm_inodedephd, &bmsafemap->sm_inodedepwr,
10646
inodedep, id_deps);
10647
LIST_SWAP(&bmsafemap->sm_newblkhd, &bmsafemap->sm_newblkwr,
10649
LIST_SWAP(&bmsafemap->sm_freehd, &bmsafemap->sm_freewr, worklist,