Merge tag 'sound-5.14-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai...
[linux-2.6-microblaze.git] / fs / xfs / libxfs / xfs_da_btree.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
4  * Copyright (c) 2013 Red Hat, Inc.
5  * All Rights Reserved.
6  */
7 #include "xfs.h"
8 #include "xfs_fs.h"
9 #include "xfs_shared.h"
10 #include "xfs_format.h"
11 #include "xfs_log_format.h"
12 #include "xfs_trans_resv.h"
13 #include "xfs_bit.h"
14 #include "xfs_mount.h"
15 #include "xfs_inode.h"
16 #include "xfs_dir2.h"
17 #include "xfs_dir2_priv.h"
18 #include "xfs_trans.h"
19 #include "xfs_bmap.h"
20 #include "xfs_attr_leaf.h"
21 #include "xfs_error.h"
22 #include "xfs_trace.h"
23 #include "xfs_buf_item.h"
24 #include "xfs_log.h"
25
26 /*
27  * xfs_da_btree.c
28  *
29  * Routines to implement directories as Btrees of hashed names.
30  */
31
32 /*========================================================================
33  * Function prototypes for the kernel.
34  *========================================================================*/
35
36 /*
37  * Routines used for growing the Btree.
38  */
39 STATIC int xfs_da3_root_split(xfs_da_state_t *state,
40                                             xfs_da_state_blk_t *existing_root,
41                                             xfs_da_state_blk_t *new_child);
42 STATIC int xfs_da3_node_split(xfs_da_state_t *state,
43                                             xfs_da_state_blk_t *existing_blk,
44                                             xfs_da_state_blk_t *split_blk,
45                                             xfs_da_state_blk_t *blk_to_add,
46                                             int treelevel,
47                                             int *result);
48 STATIC void xfs_da3_node_rebalance(xfs_da_state_t *state,
49                                          xfs_da_state_blk_t *node_blk_1,
50                                          xfs_da_state_blk_t *node_blk_2);
51 STATIC void xfs_da3_node_add(xfs_da_state_t *state,
52                                    xfs_da_state_blk_t *old_node_blk,
53                                    xfs_da_state_blk_t *new_node_blk);
54
55 /*
56  * Routines used for shrinking the Btree.
57  */
58 STATIC int xfs_da3_root_join(xfs_da_state_t *state,
59                                            xfs_da_state_blk_t *root_blk);
60 STATIC int xfs_da3_node_toosmall(xfs_da_state_t *state, int *retval);
61 STATIC void xfs_da3_node_remove(xfs_da_state_t *state,
62                                               xfs_da_state_blk_t *drop_blk);
63 STATIC void xfs_da3_node_unbalance(xfs_da_state_t *state,
64                                          xfs_da_state_blk_t *src_node_blk,
65                                          xfs_da_state_blk_t *dst_node_blk);
66
67 /*
68  * Utility routines.
69  */
70 STATIC int      xfs_da3_blk_unlink(xfs_da_state_t *state,
71                                   xfs_da_state_blk_t *drop_blk,
72                                   xfs_da_state_blk_t *save_blk);
73
74
75 kmem_zone_t *xfs_da_state_zone; /* anchor for state struct zone */
76
77 /*
78  * Allocate a dir-state structure.
79  * We don't put them on the stack since they're large.
80  */
81 struct xfs_da_state *
82 xfs_da_state_alloc(
83         struct xfs_da_args      *args)
84 {
85         struct xfs_da_state     *state;
86
87         state = kmem_cache_zalloc(xfs_da_state_zone, GFP_NOFS | __GFP_NOFAIL);
88         state->args = args;
89         state->mp = args->dp->i_mount;
90         return state;
91 }
92
93 /*
94  * Kill the altpath contents of a da-state structure.
95  */
96 STATIC void
97 xfs_da_state_kill_altpath(xfs_da_state_t *state)
98 {
99         int     i;
100
101         for (i = 0; i < state->altpath.active; i++)
102                 state->altpath.blk[i].bp = NULL;
103         state->altpath.active = 0;
104 }
105
106 /*
107  * Free a da-state structure.
108  */
109 void
110 xfs_da_state_free(xfs_da_state_t *state)
111 {
112         xfs_da_state_kill_altpath(state);
113 #ifdef DEBUG
114         memset((char *)state, 0, sizeof(*state));
115 #endif /* DEBUG */
116         kmem_cache_free(xfs_da_state_zone, state);
117 }
118
119 static inline int xfs_dabuf_nfsb(struct xfs_mount *mp, int whichfork)
120 {
121         if (whichfork == XFS_DATA_FORK)
122                 return mp->m_dir_geo->fsbcount;
123         return mp->m_attr_geo->fsbcount;
124 }
125
126 void
127 xfs_da3_node_hdr_from_disk(
128         struct xfs_mount                *mp,
129         struct xfs_da3_icnode_hdr       *to,
130         struct xfs_da_intnode           *from)
131 {
132         if (xfs_sb_version_hascrc(&mp->m_sb)) {
133                 struct xfs_da3_intnode  *from3 = (struct xfs_da3_intnode *)from;
134
135                 to->forw = be32_to_cpu(from3->hdr.info.hdr.forw);
136                 to->back = be32_to_cpu(from3->hdr.info.hdr.back);
137                 to->magic = be16_to_cpu(from3->hdr.info.hdr.magic);
138                 to->count = be16_to_cpu(from3->hdr.__count);
139                 to->level = be16_to_cpu(from3->hdr.__level);
140                 to->btree = from3->__btree;
141                 ASSERT(to->magic == XFS_DA3_NODE_MAGIC);
142         } else {
143                 to->forw = be32_to_cpu(from->hdr.info.forw);
144                 to->back = be32_to_cpu(from->hdr.info.back);
145                 to->magic = be16_to_cpu(from->hdr.info.magic);
146                 to->count = be16_to_cpu(from->hdr.__count);
147                 to->level = be16_to_cpu(from->hdr.__level);
148                 to->btree = from->__btree;
149                 ASSERT(to->magic == XFS_DA_NODE_MAGIC);
150         }
151 }
152
153 void
154 xfs_da3_node_hdr_to_disk(
155         struct xfs_mount                *mp,
156         struct xfs_da_intnode           *to,
157         struct xfs_da3_icnode_hdr       *from)
158 {
159         if (xfs_sb_version_hascrc(&mp->m_sb)) {
160                 struct xfs_da3_intnode  *to3 = (struct xfs_da3_intnode *)to;
161
162                 ASSERT(from->magic == XFS_DA3_NODE_MAGIC);
163                 to3->hdr.info.hdr.forw = cpu_to_be32(from->forw);
164                 to3->hdr.info.hdr.back = cpu_to_be32(from->back);
165                 to3->hdr.info.hdr.magic = cpu_to_be16(from->magic);
166                 to3->hdr.__count = cpu_to_be16(from->count);
167                 to3->hdr.__level = cpu_to_be16(from->level);
168         } else {
169                 ASSERT(from->magic == XFS_DA_NODE_MAGIC);
170                 to->hdr.info.forw = cpu_to_be32(from->forw);
171                 to->hdr.info.back = cpu_to_be32(from->back);
172                 to->hdr.info.magic = cpu_to_be16(from->magic);
173                 to->hdr.__count = cpu_to_be16(from->count);
174                 to->hdr.__level = cpu_to_be16(from->level);
175         }
176 }
177
178 /*
179  * Verify an xfs_da3_blkinfo structure. Note that the da3 fields are only
180  * accessible on v5 filesystems. This header format is common across da node,
181  * attr leaf and dir leaf blocks.
182  */
183 xfs_failaddr_t
184 xfs_da3_blkinfo_verify(
185         struct xfs_buf          *bp,
186         struct xfs_da3_blkinfo  *hdr3)
187 {
188         struct xfs_mount        *mp = bp->b_mount;
189         struct xfs_da_blkinfo   *hdr = &hdr3->hdr;
190
191         if (!xfs_verify_magic16(bp, hdr->magic))
192                 return __this_address;
193
194         if (xfs_sb_version_hascrc(&mp->m_sb)) {
195                 if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid))
196                         return __this_address;
197                 if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
198                         return __this_address;
199                 if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn)))
200                         return __this_address;
201         }
202
203         return NULL;
204 }
205
206 static xfs_failaddr_t
207 xfs_da3_node_verify(
208         struct xfs_buf          *bp)
209 {
210         struct xfs_mount        *mp = bp->b_mount;
211         struct xfs_da_intnode   *hdr = bp->b_addr;
212         struct xfs_da3_icnode_hdr ichdr;
213         xfs_failaddr_t          fa;
214
215         xfs_da3_node_hdr_from_disk(mp, &ichdr, hdr);
216
217         fa = xfs_da3_blkinfo_verify(bp, bp->b_addr);
218         if (fa)
219                 return fa;
220
221         if (ichdr.level == 0)
222                 return __this_address;
223         if (ichdr.level > XFS_DA_NODE_MAXDEPTH)
224                 return __this_address;
225         if (ichdr.count == 0)
226                 return __this_address;
227
228         /*
229          * we don't know if the node is for and attribute or directory tree,
230          * so only fail if the count is outside both bounds
231          */
232         if (ichdr.count > mp->m_dir_geo->node_ents &&
233             ichdr.count > mp->m_attr_geo->node_ents)
234                 return __this_address;
235
236         /* XXX: hash order check? */
237
238         return NULL;
239 }
240
241 static void
242 xfs_da3_node_write_verify(
243         struct xfs_buf  *bp)
244 {
245         struct xfs_mount        *mp = bp->b_mount;
246         struct xfs_buf_log_item *bip = bp->b_log_item;
247         struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
248         xfs_failaddr_t          fa;
249
250         fa = xfs_da3_node_verify(bp);
251         if (fa) {
252                 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
253                 return;
254         }
255
256         if (!xfs_sb_version_hascrc(&mp->m_sb))
257                 return;
258
259         if (bip)
260                 hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn);
261
262         xfs_buf_update_cksum(bp, XFS_DA3_NODE_CRC_OFF);
263 }
264
265 /*
266  * leaf/node format detection on trees is sketchy, so a node read can be done on
267  * leaf level blocks when detection identifies the tree as a node format tree
268  * incorrectly. In this case, we need to swap the verifier to match the correct
269  * format of the block being read.
270  */
271 static void
272 xfs_da3_node_read_verify(
273         struct xfs_buf          *bp)
274 {
275         struct xfs_da_blkinfo   *info = bp->b_addr;
276         xfs_failaddr_t          fa;
277
278         switch (be16_to_cpu(info->magic)) {
279                 case XFS_DA3_NODE_MAGIC:
280                         if (!xfs_buf_verify_cksum(bp, XFS_DA3_NODE_CRC_OFF)) {
281                                 xfs_verifier_error(bp, -EFSBADCRC,
282                                                 __this_address);
283                                 break;
284                         }
285                         fallthrough;
286                 case XFS_DA_NODE_MAGIC:
287                         fa = xfs_da3_node_verify(bp);
288                         if (fa)
289                                 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
290                         return;
291                 case XFS_ATTR_LEAF_MAGIC:
292                 case XFS_ATTR3_LEAF_MAGIC:
293                         bp->b_ops = &xfs_attr3_leaf_buf_ops;
294                         bp->b_ops->verify_read(bp);
295                         return;
296                 case XFS_DIR2_LEAFN_MAGIC:
297                 case XFS_DIR3_LEAFN_MAGIC:
298                         bp->b_ops = &xfs_dir3_leafn_buf_ops;
299                         bp->b_ops->verify_read(bp);
300                         return;
301                 default:
302                         xfs_verifier_error(bp, -EFSCORRUPTED, __this_address);
303                         break;
304         }
305 }
306
307 /* Verify the structure of a da3 block. */
308 static xfs_failaddr_t
309 xfs_da3_node_verify_struct(
310         struct xfs_buf          *bp)
311 {
312         struct xfs_da_blkinfo   *info = bp->b_addr;
313
314         switch (be16_to_cpu(info->magic)) {
315         case XFS_DA3_NODE_MAGIC:
316         case XFS_DA_NODE_MAGIC:
317                 return xfs_da3_node_verify(bp);
318         case XFS_ATTR_LEAF_MAGIC:
319         case XFS_ATTR3_LEAF_MAGIC:
320                 bp->b_ops = &xfs_attr3_leaf_buf_ops;
321                 return bp->b_ops->verify_struct(bp);
322         case XFS_DIR2_LEAFN_MAGIC:
323         case XFS_DIR3_LEAFN_MAGIC:
324                 bp->b_ops = &xfs_dir3_leafn_buf_ops;
325                 return bp->b_ops->verify_struct(bp);
326         default:
327                 return __this_address;
328         }
329 }
330
331 const struct xfs_buf_ops xfs_da3_node_buf_ops = {
332         .name = "xfs_da3_node",
333         .magic16 = { cpu_to_be16(XFS_DA_NODE_MAGIC),
334                      cpu_to_be16(XFS_DA3_NODE_MAGIC) },
335         .verify_read = xfs_da3_node_read_verify,
336         .verify_write = xfs_da3_node_write_verify,
337         .verify_struct = xfs_da3_node_verify_struct,
338 };
339
340 static int
341 xfs_da3_node_set_type(
342         struct xfs_trans        *tp,
343         struct xfs_buf          *bp)
344 {
345         struct xfs_da_blkinfo   *info = bp->b_addr;
346
347         switch (be16_to_cpu(info->magic)) {
348         case XFS_DA_NODE_MAGIC:
349         case XFS_DA3_NODE_MAGIC:
350                 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF);
351                 return 0;
352         case XFS_ATTR_LEAF_MAGIC:
353         case XFS_ATTR3_LEAF_MAGIC:
354                 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_ATTR_LEAF_BUF);
355                 return 0;
356         case XFS_DIR2_LEAFN_MAGIC:
357         case XFS_DIR3_LEAFN_MAGIC:
358                 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_LEAFN_BUF);
359                 return 0;
360         default:
361                 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, tp->t_mountp,
362                                 info, sizeof(*info));
363                 xfs_trans_brelse(tp, bp);
364                 return -EFSCORRUPTED;
365         }
366 }
367
368 int
369 xfs_da3_node_read(
370         struct xfs_trans        *tp,
371         struct xfs_inode        *dp,
372         xfs_dablk_t             bno,
373         struct xfs_buf          **bpp,
374         int                     whichfork)
375 {
376         int                     error;
377
378         error = xfs_da_read_buf(tp, dp, bno, 0, bpp, whichfork,
379                         &xfs_da3_node_buf_ops);
380         if (error || !*bpp || !tp)
381                 return error;
382         return xfs_da3_node_set_type(tp, *bpp);
383 }
384
385 int
386 xfs_da3_node_read_mapped(
387         struct xfs_trans        *tp,
388         struct xfs_inode        *dp,
389         xfs_daddr_t             mappedbno,
390         struct xfs_buf          **bpp,
391         int                     whichfork)
392 {
393         struct xfs_mount        *mp = dp->i_mount;
394         int                     error;
395
396         error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, mappedbno,
397                         XFS_FSB_TO_BB(mp, xfs_dabuf_nfsb(mp, whichfork)), 0,
398                         bpp, &xfs_da3_node_buf_ops);
399         if (error || !*bpp)
400                 return error;
401
402         if (whichfork == XFS_ATTR_FORK)
403                 xfs_buf_set_ref(*bpp, XFS_ATTR_BTREE_REF);
404         else
405                 xfs_buf_set_ref(*bpp, XFS_DIR_BTREE_REF);
406
407         if (!tp)
408                 return 0;
409         return xfs_da3_node_set_type(tp, *bpp);
410 }
411
412 /*========================================================================
413  * Routines used for growing the Btree.
414  *========================================================================*/
415
416 /*
417  * Create the initial contents of an intermediate node.
418  */
419 int
420 xfs_da3_node_create(
421         struct xfs_da_args      *args,
422         xfs_dablk_t             blkno,
423         int                     level,
424         struct xfs_buf          **bpp,
425         int                     whichfork)
426 {
427         struct xfs_da_intnode   *node;
428         struct xfs_trans        *tp = args->trans;
429         struct xfs_mount        *mp = tp->t_mountp;
430         struct xfs_da3_icnode_hdr ichdr = {0};
431         struct xfs_buf          *bp;
432         int                     error;
433         struct xfs_inode        *dp = args->dp;
434
435         trace_xfs_da_node_create(args);
436         ASSERT(level <= XFS_DA_NODE_MAXDEPTH);
437
438         error = xfs_da_get_buf(tp, dp, blkno, &bp, whichfork);
439         if (error)
440                 return error;
441         bp->b_ops = &xfs_da3_node_buf_ops;
442         xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF);
443         node = bp->b_addr;
444
445         if (xfs_sb_version_hascrc(&mp->m_sb)) {
446                 struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
447
448                 memset(hdr3, 0, sizeof(struct xfs_da3_node_hdr));
449                 ichdr.magic = XFS_DA3_NODE_MAGIC;
450                 hdr3->info.blkno = cpu_to_be64(bp->b_bn);
451                 hdr3->info.owner = cpu_to_be64(args->dp->i_ino);
452                 uuid_copy(&hdr3->info.uuid, &mp->m_sb.sb_meta_uuid);
453         } else {
454                 ichdr.magic = XFS_DA_NODE_MAGIC;
455         }
456         ichdr.level = level;
457
458         xfs_da3_node_hdr_to_disk(dp->i_mount, node, &ichdr);
459         xfs_trans_log_buf(tp, bp,
460                 XFS_DA_LOGRANGE(node, &node->hdr, args->geo->node_hdr_size));
461
462         *bpp = bp;
463         return 0;
464 }
465
466 /*
467  * Split a leaf node, rebalance, then possibly split
468  * intermediate nodes, rebalance, etc.
469  */
470 int                                                     /* error */
471 xfs_da3_split(
472         struct xfs_da_state     *state)
473 {
474         struct xfs_da_state_blk *oldblk;
475         struct xfs_da_state_blk *newblk;
476         struct xfs_da_state_blk *addblk;
477         struct xfs_da_intnode   *node;
478         int                     max;
479         int                     action = 0;
480         int                     error;
481         int                     i;
482
483         trace_xfs_da_split(state->args);
484
485         /*
486          * Walk back up the tree splitting/inserting/adjusting as necessary.
487          * If we need to insert and there isn't room, split the node, then
488          * decide which fragment to insert the new block from below into.
489          * Note that we may split the root this way, but we need more fixup.
490          */
491         max = state->path.active - 1;
492         ASSERT((max >= 0) && (max < XFS_DA_NODE_MAXDEPTH));
493         ASSERT(state->path.blk[max].magic == XFS_ATTR_LEAF_MAGIC ||
494                state->path.blk[max].magic == XFS_DIR2_LEAFN_MAGIC);
495
496         addblk = &state->path.blk[max];         /* initial dummy value */
497         for (i = max; (i >= 0) && addblk; state->path.active--, i--) {
498                 oldblk = &state->path.blk[i];
499                 newblk = &state->altpath.blk[i];
500
501                 /*
502                  * If a leaf node then
503                  *     Allocate a new leaf node, then rebalance across them.
504                  * else if an intermediate node then
505                  *     We split on the last layer, must we split the node?
506                  */
507                 switch (oldblk->magic) {
508                 case XFS_ATTR_LEAF_MAGIC:
509                         error = xfs_attr3_leaf_split(state, oldblk, newblk);
510                         if ((error != 0) && (error != -ENOSPC)) {
511                                 return error;   /* GROT: attr is inconsistent */
512                         }
513                         if (!error) {
514                                 addblk = newblk;
515                                 break;
516                         }
517                         /*
518                          * Entry wouldn't fit, split the leaf again. The new
519                          * extrablk will be consumed by xfs_da3_node_split if
520                          * the node is split.
521                          */
522                         state->extravalid = 1;
523                         if (state->inleaf) {
524                                 state->extraafter = 0;  /* before newblk */
525                                 trace_xfs_attr_leaf_split_before(state->args);
526                                 error = xfs_attr3_leaf_split(state, oldblk,
527                                                             &state->extrablk);
528                         } else {
529                                 state->extraafter = 1;  /* after newblk */
530                                 trace_xfs_attr_leaf_split_after(state->args);
531                                 error = xfs_attr3_leaf_split(state, newblk,
532                                                             &state->extrablk);
533                         }
534                         if (error)
535                                 return error;   /* GROT: attr inconsistent */
536                         addblk = newblk;
537                         break;
538                 case XFS_DIR2_LEAFN_MAGIC:
539                         error = xfs_dir2_leafn_split(state, oldblk, newblk);
540                         if (error)
541                                 return error;
542                         addblk = newblk;
543                         break;
544                 case XFS_DA_NODE_MAGIC:
545                         error = xfs_da3_node_split(state, oldblk, newblk, addblk,
546                                                          max - i, &action);
547                         addblk->bp = NULL;
548                         if (error)
549                                 return error;   /* GROT: dir is inconsistent */
550                         /*
551                          * Record the newly split block for the next time thru?
552                          */
553                         if (action)
554                                 addblk = newblk;
555                         else
556                                 addblk = NULL;
557                         break;
558                 }
559
560                 /*
561                  * Update the btree to show the new hashval for this child.
562                  */
563                 xfs_da3_fixhashpath(state, &state->path);
564         }
565         if (!addblk)
566                 return 0;
567
568         /*
569          * xfs_da3_node_split() should have consumed any extra blocks we added
570          * during a double leaf split in the attr fork. This is guaranteed as
571          * we can't be here if the attr fork only has a single leaf block.
572          */
573         ASSERT(state->extravalid == 0 ||
574                state->path.blk[max].magic == XFS_DIR2_LEAFN_MAGIC);
575
576         /*
577          * Split the root node.
578          */
579         ASSERT(state->path.active == 0);
580         oldblk = &state->path.blk[0];
581         error = xfs_da3_root_split(state, oldblk, addblk);
582         if (error)
583                 goto out;
584
585         /*
586          * Update pointers to the node which used to be block 0 and just got
587          * bumped because of the addition of a new root node.  Note that the
588          * original block 0 could be at any position in the list of blocks in
589          * the tree.
590          *
591          * Note: the magic numbers and sibling pointers are in the same physical
592          * place for both v2 and v3 headers (by design). Hence it doesn't matter
593          * which version of the xfs_da_intnode structure we use here as the
594          * result will be the same using either structure.
595          */
596         node = oldblk->bp->b_addr;
597         if (node->hdr.info.forw) {
598                 if (be32_to_cpu(node->hdr.info.forw) != addblk->blkno) {
599                         xfs_buf_mark_corrupt(oldblk->bp);
600                         error = -EFSCORRUPTED;
601                         goto out;
602                 }
603                 node = addblk->bp->b_addr;
604                 node->hdr.info.back = cpu_to_be32(oldblk->blkno);
605                 xfs_trans_log_buf(state->args->trans, addblk->bp,
606                                   XFS_DA_LOGRANGE(node, &node->hdr.info,
607                                   sizeof(node->hdr.info)));
608         }
609         node = oldblk->bp->b_addr;
610         if (node->hdr.info.back) {
611                 if (be32_to_cpu(node->hdr.info.back) != addblk->blkno) {
612                         xfs_buf_mark_corrupt(oldblk->bp);
613                         error = -EFSCORRUPTED;
614                         goto out;
615                 }
616                 node = addblk->bp->b_addr;
617                 node->hdr.info.forw = cpu_to_be32(oldblk->blkno);
618                 xfs_trans_log_buf(state->args->trans, addblk->bp,
619                                   XFS_DA_LOGRANGE(node, &node->hdr.info,
620                                   sizeof(node->hdr.info)));
621         }
622 out:
623         addblk->bp = NULL;
624         return error;
625 }
626
627 /*
628  * Split the root.  We have to create a new root and point to the two
629  * parts (the split old root) that we just created.  Copy block zero to
630  * the EOF, extending the inode in process.
631  */
632 STATIC int                                              /* error */
633 xfs_da3_root_split(
634         struct xfs_da_state     *state,
635         struct xfs_da_state_blk *blk1,
636         struct xfs_da_state_blk *blk2)
637 {
638         struct xfs_da_intnode   *node;
639         struct xfs_da_intnode   *oldroot;
640         struct xfs_da_node_entry *btree;
641         struct xfs_da3_icnode_hdr nodehdr;
642         struct xfs_da_args      *args;
643         struct xfs_buf          *bp;
644         struct xfs_inode        *dp;
645         struct xfs_trans        *tp;
646         struct xfs_dir2_leaf    *leaf;
647         xfs_dablk_t             blkno;
648         int                     level;
649         int                     error;
650         int                     size;
651
652         trace_xfs_da_root_split(state->args);
653
654         /*
655          * Copy the existing (incorrect) block from the root node position
656          * to a free space somewhere.
657          */
658         args = state->args;
659         error = xfs_da_grow_inode(args, &blkno);
660         if (error)
661                 return error;
662
663         dp = args->dp;
664         tp = args->trans;
665         error = xfs_da_get_buf(tp, dp, blkno, &bp, args->whichfork);
666         if (error)
667                 return error;
668         node = bp->b_addr;
669         oldroot = blk1->bp->b_addr;
670         if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
671             oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) {
672                 struct xfs_da3_icnode_hdr icnodehdr;
673
674                 xfs_da3_node_hdr_from_disk(dp->i_mount, &icnodehdr, oldroot);
675                 btree = icnodehdr.btree;
676                 size = (int)((char *)&btree[icnodehdr.count] - (char *)oldroot);
677                 level = icnodehdr.level;
678
679                 /*
680                  * we are about to copy oldroot to bp, so set up the type
681                  * of bp while we know exactly what it will be.
682                  */
683                 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF);
684         } else {
685                 struct xfs_dir3_icleaf_hdr leafhdr;
686
687                 leaf = (xfs_dir2_leaf_t *)oldroot;
688                 xfs_dir2_leaf_hdr_from_disk(dp->i_mount, &leafhdr, leaf);
689
690                 ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC ||
691                        leafhdr.magic == XFS_DIR3_LEAFN_MAGIC);
692                 size = (int)((char *)&leafhdr.ents[leafhdr.count] -
693                         (char *)leaf);
694                 level = 0;
695
696                 /*
697                  * we are about to copy oldroot to bp, so set up the type
698                  * of bp while we know exactly what it will be.
699                  */
700                 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_LEAFN_BUF);
701         }
702
703         /*
704          * we can copy most of the information in the node from one block to
705          * another, but for CRC enabled headers we have to make sure that the
706          * block specific identifiers are kept intact. We update the buffer
707          * directly for this.
708          */
709         memcpy(node, oldroot, size);
710         if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC) ||
711             oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) {
712                 struct xfs_da3_intnode *node3 = (struct xfs_da3_intnode *)node;
713
714                 node3->hdr.info.blkno = cpu_to_be64(bp->b_bn);
715         }
716         xfs_trans_log_buf(tp, bp, 0, size - 1);
717
718         bp->b_ops = blk1->bp->b_ops;
719         xfs_trans_buf_copy_type(bp, blk1->bp);
720         blk1->bp = bp;
721         blk1->blkno = blkno;
722
723         /*
724          * Set up the new root node.
725          */
726         error = xfs_da3_node_create(args,
727                 (args->whichfork == XFS_DATA_FORK) ? args->geo->leafblk : 0,
728                 level + 1, &bp, args->whichfork);
729         if (error)
730                 return error;
731
732         node = bp->b_addr;
733         xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
734         btree = nodehdr.btree;
735         btree[0].hashval = cpu_to_be32(blk1->hashval);
736         btree[0].before = cpu_to_be32(blk1->blkno);
737         btree[1].hashval = cpu_to_be32(blk2->hashval);
738         btree[1].before = cpu_to_be32(blk2->blkno);
739         nodehdr.count = 2;
740         xfs_da3_node_hdr_to_disk(dp->i_mount, node, &nodehdr);
741
742 #ifdef DEBUG
743         if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
744             oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) {
745                 ASSERT(blk1->blkno >= args->geo->leafblk &&
746                        blk1->blkno < args->geo->freeblk);
747                 ASSERT(blk2->blkno >= args->geo->leafblk &&
748                        blk2->blkno < args->geo->freeblk);
749         }
750 #endif
751
752         /* Header is already logged by xfs_da_node_create */
753         xfs_trans_log_buf(tp, bp,
754                 XFS_DA_LOGRANGE(node, btree, sizeof(xfs_da_node_entry_t) * 2));
755
756         return 0;
757 }
758
759 /*
760  * Split the node, rebalance, then add the new entry.
761  */
762 STATIC int                                              /* error */
763 xfs_da3_node_split(
764         struct xfs_da_state     *state,
765         struct xfs_da_state_blk *oldblk,
766         struct xfs_da_state_blk *newblk,
767         struct xfs_da_state_blk *addblk,
768         int                     treelevel,
769         int                     *result)
770 {
771         struct xfs_da_intnode   *node;
772         struct xfs_da3_icnode_hdr nodehdr;
773         xfs_dablk_t             blkno;
774         int                     newcount;
775         int                     error;
776         int                     useextra;
777         struct xfs_inode        *dp = state->args->dp;
778
779         trace_xfs_da_node_split(state->args);
780
781         node = oldblk->bp->b_addr;
782         xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
783
784         /*
785          * With V2 dirs the extra block is data or freespace.
786          */
787         useextra = state->extravalid && state->args->whichfork == XFS_ATTR_FORK;
788         newcount = 1 + useextra;
789         /*
790          * Do we have to split the node?
791          */
792         if (nodehdr.count + newcount > state->args->geo->node_ents) {
793                 /*
794                  * Allocate a new node, add to the doubly linked chain of
795                  * nodes, then move some of our excess entries into it.
796                  */
797                 error = xfs_da_grow_inode(state->args, &blkno);
798                 if (error)
799                         return error;   /* GROT: dir is inconsistent */
800
801                 error = xfs_da3_node_create(state->args, blkno, treelevel,
802                                            &newblk->bp, state->args->whichfork);
803                 if (error)
804                         return error;   /* GROT: dir is inconsistent */
805                 newblk->blkno = blkno;
806                 newblk->magic = XFS_DA_NODE_MAGIC;
807                 xfs_da3_node_rebalance(state, oldblk, newblk);
808                 error = xfs_da3_blk_link(state, oldblk, newblk);
809                 if (error)
810                         return error;
811                 *result = 1;
812         } else {
813                 *result = 0;
814         }
815
816         /*
817          * Insert the new entry(s) into the correct block
818          * (updating last hashval in the process).
819          *
820          * xfs_da3_node_add() inserts BEFORE the given index,
821          * and as a result of using node_lookup_int() we always
822          * point to a valid entry (not after one), but a split
823          * operation always results in a new block whose hashvals
824          * FOLLOW the current block.
825          *
826          * If we had double-split op below us, then add the extra block too.
827          */
828         node = oldblk->bp->b_addr;
829         xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
830         if (oldblk->index <= nodehdr.count) {
831                 oldblk->index++;
832                 xfs_da3_node_add(state, oldblk, addblk);
833                 if (useextra) {
834                         if (state->extraafter)
835                                 oldblk->index++;
836                         xfs_da3_node_add(state, oldblk, &state->extrablk);
837                         state->extravalid = 0;
838                 }
839         } else {
840                 newblk->index++;
841                 xfs_da3_node_add(state, newblk, addblk);
842                 if (useextra) {
843                         if (state->extraafter)
844                                 newblk->index++;
845                         xfs_da3_node_add(state, newblk, &state->extrablk);
846                         state->extravalid = 0;
847                 }
848         }
849
850         return 0;
851 }
852
853 /*
854  * Balance the btree elements between two intermediate nodes,
855  * usually one full and one empty.
856  *
857  * NOTE: if blk2 is empty, then it will get the upper half of blk1.
858  */
859 STATIC void
860 xfs_da3_node_rebalance(
861         struct xfs_da_state     *state,
862         struct xfs_da_state_blk *blk1,
863         struct xfs_da_state_blk *blk2)
864 {
865         struct xfs_da_intnode   *node1;
866         struct xfs_da_intnode   *node2;
867         struct xfs_da_intnode   *tmpnode;
868         struct xfs_da_node_entry *btree1;
869         struct xfs_da_node_entry *btree2;
870         struct xfs_da_node_entry *btree_s;
871         struct xfs_da_node_entry *btree_d;
872         struct xfs_da3_icnode_hdr nodehdr1;
873         struct xfs_da3_icnode_hdr nodehdr2;
874         struct xfs_trans        *tp;
875         int                     count;
876         int                     tmp;
877         int                     swap = 0;
878         struct xfs_inode        *dp = state->args->dp;
879
880         trace_xfs_da_node_rebalance(state->args);
881
882         node1 = blk1->bp->b_addr;
883         node2 = blk2->bp->b_addr;
884         xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr1, node1);
885         xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr2, node2);
886         btree1 = nodehdr1.btree;
887         btree2 = nodehdr2.btree;
888
889         /*
890          * Figure out how many entries need to move, and in which direction.
891          * Swap the nodes around if that makes it simpler.
892          */
893         if (nodehdr1.count > 0 && nodehdr2.count > 0 &&
894             ((be32_to_cpu(btree2[0].hashval) < be32_to_cpu(btree1[0].hashval)) ||
895              (be32_to_cpu(btree2[nodehdr2.count - 1].hashval) <
896                         be32_to_cpu(btree1[nodehdr1.count - 1].hashval)))) {
897                 tmpnode = node1;
898                 node1 = node2;
899                 node2 = tmpnode;
900                 xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr1, node1);
901                 xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr2, node2);
902                 btree1 = nodehdr1.btree;
903                 btree2 = nodehdr2.btree;
904                 swap = 1;
905         }
906
907         count = (nodehdr1.count - nodehdr2.count) / 2;
908         if (count == 0)
909                 return;
910         tp = state->args->trans;
911         /*
912          * Two cases: high-to-low and low-to-high.
913          */
914         if (count > 0) {
915                 /*
916                  * Move elements in node2 up to make a hole.
917                  */
918                 tmp = nodehdr2.count;
919                 if (tmp > 0) {
920                         tmp *= (uint)sizeof(xfs_da_node_entry_t);
921                         btree_s = &btree2[0];
922                         btree_d = &btree2[count];
923                         memmove(btree_d, btree_s, tmp);
924                 }
925
926                 /*
927                  * Move the req'd B-tree elements from high in node1 to
928                  * low in node2.
929                  */
930                 nodehdr2.count += count;
931                 tmp = count * (uint)sizeof(xfs_da_node_entry_t);
932                 btree_s = &btree1[nodehdr1.count - count];
933                 btree_d = &btree2[0];
934                 memcpy(btree_d, btree_s, tmp);
935                 nodehdr1.count -= count;
936         } else {
937                 /*
938                  * Move the req'd B-tree elements from low in node2 to
939                  * high in node1.
940                  */
941                 count = -count;
942                 tmp = count * (uint)sizeof(xfs_da_node_entry_t);
943                 btree_s = &btree2[0];
944                 btree_d = &btree1[nodehdr1.count];
945                 memcpy(btree_d, btree_s, tmp);
946                 nodehdr1.count += count;
947
948                 xfs_trans_log_buf(tp, blk1->bp,
949                         XFS_DA_LOGRANGE(node1, btree_d, tmp));
950
951                 /*
952                  * Move elements in node2 down to fill the hole.
953                  */
954                 tmp  = nodehdr2.count - count;
955                 tmp *= (uint)sizeof(xfs_da_node_entry_t);
956                 btree_s = &btree2[count];
957                 btree_d = &btree2[0];
958                 memmove(btree_d, btree_s, tmp);
959                 nodehdr2.count -= count;
960         }
961
962         /*
963          * Log header of node 1 and all current bits of node 2.
964          */
965         xfs_da3_node_hdr_to_disk(dp->i_mount, node1, &nodehdr1);
966         xfs_trans_log_buf(tp, blk1->bp,
967                 XFS_DA_LOGRANGE(node1, &node1->hdr,
968                                 state->args->geo->node_hdr_size));
969
970         xfs_da3_node_hdr_to_disk(dp->i_mount, node2, &nodehdr2);
971         xfs_trans_log_buf(tp, blk2->bp,
972                 XFS_DA_LOGRANGE(node2, &node2->hdr,
973                                 state->args->geo->node_hdr_size +
974                                 (sizeof(btree2[0]) * nodehdr2.count)));
975
976         /*
977          * Record the last hashval from each block for upward propagation.
978          * (note: don't use the swapped node pointers)
979          */
980         if (swap) {
981                 node1 = blk1->bp->b_addr;
982                 node2 = blk2->bp->b_addr;
983                 xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr1, node1);
984                 xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr2, node2);
985                 btree1 = nodehdr1.btree;
986                 btree2 = nodehdr2.btree;
987         }
988         blk1->hashval = be32_to_cpu(btree1[nodehdr1.count - 1].hashval);
989         blk2->hashval = be32_to_cpu(btree2[nodehdr2.count - 1].hashval);
990
991         /*
992          * Adjust the expected index for insertion.
993          */
994         if (blk1->index >= nodehdr1.count) {
995                 blk2->index = blk1->index - nodehdr1.count;
996                 blk1->index = nodehdr1.count + 1;       /* make it invalid */
997         }
998 }
999
1000 /*
1001  * Add a new entry to an intermediate node.
1002  */
1003 STATIC void
1004 xfs_da3_node_add(
1005         struct xfs_da_state     *state,
1006         struct xfs_da_state_blk *oldblk,
1007         struct xfs_da_state_blk *newblk)
1008 {
1009         struct xfs_da_intnode   *node;
1010         struct xfs_da3_icnode_hdr nodehdr;
1011         struct xfs_da_node_entry *btree;
1012         int                     tmp;
1013         struct xfs_inode        *dp = state->args->dp;
1014
1015         trace_xfs_da_node_add(state->args);
1016
1017         node = oldblk->bp->b_addr;
1018         xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
1019         btree = nodehdr.btree;
1020
1021         ASSERT(oldblk->index >= 0 && oldblk->index <= nodehdr.count);
1022         ASSERT(newblk->blkno != 0);
1023         if (state->args->whichfork == XFS_DATA_FORK)
1024                 ASSERT(newblk->blkno >= state->args->geo->leafblk &&
1025                        newblk->blkno < state->args->geo->freeblk);
1026
1027         /*
1028          * We may need to make some room before we insert the new node.
1029          */
1030         tmp = 0;
1031         if (oldblk->index < nodehdr.count) {
1032                 tmp = (nodehdr.count - oldblk->index) * (uint)sizeof(*btree);
1033                 memmove(&btree[oldblk->index + 1], &btree[oldblk->index], tmp);
1034         }
1035         btree[oldblk->index].hashval = cpu_to_be32(newblk->hashval);
1036         btree[oldblk->index].before = cpu_to_be32(newblk->blkno);
1037         xfs_trans_log_buf(state->args->trans, oldblk->bp,
1038                 XFS_DA_LOGRANGE(node, &btree[oldblk->index],
1039                                 tmp + sizeof(*btree)));
1040
1041         nodehdr.count += 1;
1042         xfs_da3_node_hdr_to_disk(dp->i_mount, node, &nodehdr);
1043         xfs_trans_log_buf(state->args->trans, oldblk->bp,
1044                 XFS_DA_LOGRANGE(node, &node->hdr,
1045                                 state->args->geo->node_hdr_size));
1046
1047         /*
1048          * Copy the last hash value from the oldblk to propagate upwards.
1049          */
1050         oldblk->hashval = be32_to_cpu(btree[nodehdr.count - 1].hashval);
1051 }
1052
1053 /*========================================================================
1054  * Routines used for shrinking the Btree.
1055  *========================================================================*/
1056
1057 /*
1058  * Deallocate an empty leaf node, remove it from its parent,
1059  * possibly deallocating that block, etc...
1060  */
1061 int
1062 xfs_da3_join(
1063         struct xfs_da_state     *state)
1064 {
1065         struct xfs_da_state_blk *drop_blk;
1066         struct xfs_da_state_blk *save_blk;
1067         int                     action = 0;
1068         int                     error;
1069
1070         trace_xfs_da_join(state->args);
1071
1072         drop_blk = &state->path.blk[ state->path.active-1 ];
1073         save_blk = &state->altpath.blk[ state->path.active-1 ];
1074         ASSERT(state->path.blk[0].magic == XFS_DA_NODE_MAGIC);
1075         ASSERT(drop_blk->magic == XFS_ATTR_LEAF_MAGIC ||
1076                drop_blk->magic == XFS_DIR2_LEAFN_MAGIC);
1077
1078         /*
1079          * Walk back up the tree joining/deallocating as necessary.
1080          * When we stop dropping blocks, break out.
1081          */
1082         for (  ; state->path.active >= 2; drop_blk--, save_blk--,
1083                  state->path.active--) {
1084                 /*
1085                  * See if we can combine the block with a neighbor.
1086                  *   (action == 0) => no options, just leave
1087                  *   (action == 1) => coalesce, then unlink
1088                  *   (action == 2) => block empty, unlink it
1089                  */
1090                 switch (drop_blk->magic) {
1091                 case XFS_ATTR_LEAF_MAGIC:
1092                         error = xfs_attr3_leaf_toosmall(state, &action);
1093                         if (error)
1094                                 return error;
1095                         if (action == 0)
1096                                 return 0;
1097                         xfs_attr3_leaf_unbalance(state, drop_blk, save_blk);
1098                         break;
1099                 case XFS_DIR2_LEAFN_MAGIC:
1100                         error = xfs_dir2_leafn_toosmall(state, &action);
1101                         if (error)
1102                                 return error;
1103                         if (action == 0)
1104                                 return 0;
1105                         xfs_dir2_leafn_unbalance(state, drop_blk, save_blk);
1106                         break;
1107                 case XFS_DA_NODE_MAGIC:
1108                         /*
1109                          * Remove the offending node, fixup hashvals,
1110                          * check for a toosmall neighbor.
1111                          */
1112                         xfs_da3_node_remove(state, drop_blk);
1113                         xfs_da3_fixhashpath(state, &state->path);
1114                         error = xfs_da3_node_toosmall(state, &action);
1115                         if (error)
1116                                 return error;
1117                         if (action == 0)
1118                                 return 0;
1119                         xfs_da3_node_unbalance(state, drop_blk, save_blk);
1120                         break;
1121                 }
1122                 xfs_da3_fixhashpath(state, &state->altpath);
1123                 error = xfs_da3_blk_unlink(state, drop_blk, save_blk);
1124                 xfs_da_state_kill_altpath(state);
1125                 if (error)
1126                         return error;
1127                 error = xfs_da_shrink_inode(state->args, drop_blk->blkno,
1128                                                          drop_blk->bp);
1129                 drop_blk->bp = NULL;
1130                 if (error)
1131                         return error;
1132         }
1133         /*
1134          * We joined all the way to the top.  If it turns out that
1135          * we only have one entry in the root, make the child block
1136          * the new root.
1137          */
1138         xfs_da3_node_remove(state, drop_blk);
1139         xfs_da3_fixhashpath(state, &state->path);
1140         error = xfs_da3_root_join(state, &state->path.blk[0]);
1141         return error;
1142 }
1143
1144 #ifdef  DEBUG
1145 static void
1146 xfs_da_blkinfo_onlychild_validate(struct xfs_da_blkinfo *blkinfo, __u16 level)
1147 {
1148         __be16  magic = blkinfo->magic;
1149
1150         if (level == 1) {
1151                 ASSERT(magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
1152                        magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC) ||
1153                        magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC) ||
1154                        magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC));
1155         } else {
1156                 ASSERT(magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
1157                        magic == cpu_to_be16(XFS_DA3_NODE_MAGIC));
1158         }
1159         ASSERT(!blkinfo->forw);
1160         ASSERT(!blkinfo->back);
1161 }
1162 #else   /* !DEBUG */
1163 #define xfs_da_blkinfo_onlychild_validate(blkinfo, level)
1164 #endif  /* !DEBUG */
1165
1166 /*
1167  * We have only one entry in the root.  Copy the only remaining child of
1168  * the old root to block 0 as the new root node.
1169  */
1170 STATIC int
1171 xfs_da3_root_join(
1172         struct xfs_da_state     *state,
1173         struct xfs_da_state_blk *root_blk)
1174 {
1175         struct xfs_da_intnode   *oldroot;
1176         struct xfs_da_args      *args;
1177         xfs_dablk_t             child;
1178         struct xfs_buf          *bp;
1179         struct xfs_da3_icnode_hdr oldroothdr;
1180         int                     error;
1181         struct xfs_inode        *dp = state->args->dp;
1182
1183         trace_xfs_da_root_join(state->args);
1184
1185         ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC);
1186
1187         args = state->args;
1188         oldroot = root_blk->bp->b_addr;
1189         xfs_da3_node_hdr_from_disk(dp->i_mount, &oldroothdr, oldroot);
1190         ASSERT(oldroothdr.forw == 0);
1191         ASSERT(oldroothdr.back == 0);
1192
1193         /*
1194          * If the root has more than one child, then don't do anything.
1195          */
1196         if (oldroothdr.count > 1)
1197                 return 0;
1198
1199         /*
1200          * Read in the (only) child block, then copy those bytes into
1201          * the root block's buffer and free the original child block.
1202          */
1203         child = be32_to_cpu(oldroothdr.btree[0].before);
1204         ASSERT(child != 0);
1205         error = xfs_da3_node_read(args->trans, dp, child, &bp, args->whichfork);
1206         if (error)
1207                 return error;
1208         xfs_da_blkinfo_onlychild_validate(bp->b_addr, oldroothdr.level);
1209
1210         /*
1211          * This could be copying a leaf back into the root block in the case of
1212          * there only being a single leaf block left in the tree. Hence we have
1213          * to update the b_ops pointer as well to match the buffer type change
1214          * that could occur. For dir3 blocks we also need to update the block
1215          * number in the buffer header.
1216          */
1217         memcpy(root_blk->bp->b_addr, bp->b_addr, args->geo->blksize);
1218         root_blk->bp->b_ops = bp->b_ops;
1219         xfs_trans_buf_copy_type(root_blk->bp, bp);
1220         if (oldroothdr.magic == XFS_DA3_NODE_MAGIC) {
1221                 struct xfs_da3_blkinfo *da3 = root_blk->bp->b_addr;
1222                 da3->blkno = cpu_to_be64(root_blk->bp->b_bn);
1223         }
1224         xfs_trans_log_buf(args->trans, root_blk->bp, 0,
1225                           args->geo->blksize - 1);
1226         error = xfs_da_shrink_inode(args, child, bp);
1227         return error;
1228 }
1229
1230 /*
1231  * Check a node block and its neighbors to see if the block should be
1232  * collapsed into one or the other neighbor.  Always keep the block
1233  * with the smaller block number.
1234  * If the current block is over 50% full, don't try to join it, return 0.
1235  * If the block is empty, fill in the state structure and return 2.
1236  * If it can be collapsed, fill in the state structure and return 1.
1237  * If nothing can be done, return 0.
1238  */
1239 STATIC int
1240 xfs_da3_node_toosmall(
1241         struct xfs_da_state     *state,
1242         int                     *action)
1243 {
1244         struct xfs_da_intnode   *node;
1245         struct xfs_da_state_blk *blk;
1246         struct xfs_da_blkinfo   *info;
1247         xfs_dablk_t             blkno;
1248         struct xfs_buf          *bp;
1249         struct xfs_da3_icnode_hdr nodehdr;
1250         int                     count;
1251         int                     forward;
1252         int                     error;
1253         int                     retval;
1254         int                     i;
1255         struct xfs_inode        *dp = state->args->dp;
1256
1257         trace_xfs_da_node_toosmall(state->args);
1258
1259         /*
1260          * Check for the degenerate case of the block being over 50% full.
1261          * If so, it's not worth even looking to see if we might be able
1262          * to coalesce with a sibling.
1263          */
1264         blk = &state->path.blk[ state->path.active-1 ];
1265         info = blk->bp->b_addr;
1266         node = (xfs_da_intnode_t *)info;
1267         xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
1268         if (nodehdr.count > (state->args->geo->node_ents >> 1)) {
1269                 *action = 0;    /* blk over 50%, don't try to join */
1270                 return 0;       /* blk over 50%, don't try to join */
1271         }
1272
1273         /*
1274          * Check for the degenerate case of the block being empty.
1275          * If the block is empty, we'll simply delete it, no need to
1276          * coalesce it with a sibling block.  We choose (arbitrarily)
1277          * to merge with the forward block unless it is NULL.
1278          */
1279         if (nodehdr.count == 0) {
1280                 /*
1281                  * Make altpath point to the block we want to keep and
1282                  * path point to the block we want to drop (this one).
1283                  */
1284                 forward = (info->forw != 0);
1285                 memcpy(&state->altpath, &state->path, sizeof(state->path));
1286                 error = xfs_da3_path_shift(state, &state->altpath, forward,
1287                                                  0, &retval);
1288                 if (error)
1289                         return error;
1290                 if (retval) {
1291                         *action = 0;
1292                 } else {
1293                         *action = 2;
1294                 }
1295                 return 0;
1296         }
1297
1298         /*
1299          * Examine each sibling block to see if we can coalesce with
1300          * at least 25% free space to spare.  We need to figure out
1301          * whether to merge with the forward or the backward block.
1302          * We prefer coalescing with the lower numbered sibling so as
1303          * to shrink a directory over time.
1304          */
1305         count  = state->args->geo->node_ents;
1306         count -= state->args->geo->node_ents >> 2;
1307         count -= nodehdr.count;
1308
1309         /* start with smaller blk num */
1310         forward = nodehdr.forw < nodehdr.back;
1311         for (i = 0; i < 2; forward = !forward, i++) {
1312                 struct xfs_da3_icnode_hdr thdr;
1313                 if (forward)
1314                         blkno = nodehdr.forw;
1315                 else
1316                         blkno = nodehdr.back;
1317                 if (blkno == 0)
1318                         continue;
1319                 error = xfs_da3_node_read(state->args->trans, dp, blkno, &bp,
1320                                 state->args->whichfork);
1321                 if (error)
1322                         return error;
1323
1324                 node = bp->b_addr;
1325                 xfs_da3_node_hdr_from_disk(dp->i_mount, &thdr, node);
1326                 xfs_trans_brelse(state->args->trans, bp);
1327
1328                 if (count - thdr.count >= 0)
1329                         break;  /* fits with at least 25% to spare */
1330         }
1331         if (i >= 2) {
1332                 *action = 0;
1333                 return 0;
1334         }
1335
1336         /*
1337          * Make altpath point to the block we want to keep (the lower
1338          * numbered block) and path point to the block we want to drop.
1339          */
1340         memcpy(&state->altpath, &state->path, sizeof(state->path));
1341         if (blkno < blk->blkno) {
1342                 error = xfs_da3_path_shift(state, &state->altpath, forward,
1343                                                  0, &retval);
1344         } else {
1345                 error = xfs_da3_path_shift(state, &state->path, forward,
1346                                                  0, &retval);
1347         }
1348         if (error)
1349                 return error;
1350         if (retval) {
1351                 *action = 0;
1352                 return 0;
1353         }
1354         *action = 1;
1355         return 0;
1356 }
1357
1358 /*
1359  * Pick up the last hashvalue from an intermediate node.
1360  */
1361 STATIC uint
1362 xfs_da3_node_lasthash(
1363         struct xfs_inode        *dp,
1364         struct xfs_buf          *bp,
1365         int                     *count)
1366 {
1367         struct xfs_da3_icnode_hdr nodehdr;
1368
1369         xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, bp->b_addr);
1370         if (count)
1371                 *count = nodehdr.count;
1372         if (!nodehdr.count)
1373                 return 0;
1374         return be32_to_cpu(nodehdr.btree[nodehdr.count - 1].hashval);
1375 }
1376
1377 /*
1378  * Walk back up the tree adjusting hash values as necessary,
1379  * when we stop making changes, return.
1380  */
1381 void
1382 xfs_da3_fixhashpath(
1383         struct xfs_da_state     *state,
1384         struct xfs_da_state_path *path)
1385 {
1386         struct xfs_da_state_blk *blk;
1387         struct xfs_da_intnode   *node;
1388         struct xfs_da_node_entry *btree;
1389         xfs_dahash_t            lasthash=0;
1390         int                     level;
1391         int                     count;
1392         struct xfs_inode        *dp = state->args->dp;
1393
1394         trace_xfs_da_fixhashpath(state->args);
1395
1396         level = path->active-1;
1397         blk = &path->blk[ level ];
1398         switch (blk->magic) {
1399         case XFS_ATTR_LEAF_MAGIC:
1400                 lasthash = xfs_attr_leaf_lasthash(blk->bp, &count);
1401                 if (count == 0)
1402                         return;
1403                 break;
1404         case XFS_DIR2_LEAFN_MAGIC:
1405                 lasthash = xfs_dir2_leaf_lasthash(dp, blk->bp, &count);
1406                 if (count == 0)
1407                         return;
1408                 break;
1409         case XFS_DA_NODE_MAGIC:
1410                 lasthash = xfs_da3_node_lasthash(dp, blk->bp, &count);
1411                 if (count == 0)
1412                         return;
1413                 break;
1414         }
1415         for (blk--, level--; level >= 0; blk--, level--) {
1416                 struct xfs_da3_icnode_hdr nodehdr;
1417
1418                 node = blk->bp->b_addr;
1419                 xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
1420                 btree = nodehdr.btree;
1421                 if (be32_to_cpu(btree[blk->index].hashval) == lasthash)
1422                         break;
1423                 blk->hashval = lasthash;
1424                 btree[blk->index].hashval = cpu_to_be32(lasthash);
1425                 xfs_trans_log_buf(state->args->trans, blk->bp,
1426                                   XFS_DA_LOGRANGE(node, &btree[blk->index],
1427                                                   sizeof(*btree)));
1428
1429                 lasthash = be32_to_cpu(btree[nodehdr.count - 1].hashval);
1430         }
1431 }
1432
1433 /*
1434  * Remove an entry from an intermediate node.
1435  */
1436 STATIC void
1437 xfs_da3_node_remove(
1438         struct xfs_da_state     *state,
1439         struct xfs_da_state_blk *drop_blk)
1440 {
1441         struct xfs_da_intnode   *node;
1442         struct xfs_da3_icnode_hdr nodehdr;
1443         struct xfs_da_node_entry *btree;
1444         int                     index;
1445         int                     tmp;
1446         struct xfs_inode        *dp = state->args->dp;
1447
1448         trace_xfs_da_node_remove(state->args);
1449
1450         node = drop_blk->bp->b_addr;
1451         xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
1452         ASSERT(drop_blk->index < nodehdr.count);
1453         ASSERT(drop_blk->index >= 0);
1454
1455         /*
1456          * Copy over the offending entry, or just zero it out.
1457          */
1458         index = drop_blk->index;
1459         btree = nodehdr.btree;
1460         if (index < nodehdr.count - 1) {
1461                 tmp  = nodehdr.count - index - 1;
1462                 tmp *= (uint)sizeof(xfs_da_node_entry_t);
1463                 memmove(&btree[index], &btree[index + 1], tmp);
1464                 xfs_trans_log_buf(state->args->trans, drop_blk->bp,
1465                     XFS_DA_LOGRANGE(node, &btree[index], tmp));
1466                 index = nodehdr.count - 1;
1467         }
1468         memset(&btree[index], 0, sizeof(xfs_da_node_entry_t));
1469         xfs_trans_log_buf(state->args->trans, drop_blk->bp,
1470             XFS_DA_LOGRANGE(node, &btree[index], sizeof(btree[index])));
1471         nodehdr.count -= 1;
1472         xfs_da3_node_hdr_to_disk(dp->i_mount, node, &nodehdr);
1473         xfs_trans_log_buf(state->args->trans, drop_blk->bp,
1474             XFS_DA_LOGRANGE(node, &node->hdr, state->args->geo->node_hdr_size));
1475
1476         /*
1477          * Copy the last hash value from the block to propagate upwards.
1478          */
1479         drop_blk->hashval = be32_to_cpu(btree[index - 1].hashval);
1480 }
1481
1482 /*
1483  * Unbalance the elements between two intermediate nodes,
1484  * move all Btree elements from one node into another.
1485  */
1486 STATIC void
1487 xfs_da3_node_unbalance(
1488         struct xfs_da_state     *state,
1489         struct xfs_da_state_blk *drop_blk,
1490         struct xfs_da_state_blk *save_blk)
1491 {
1492         struct xfs_da_intnode   *drop_node;
1493         struct xfs_da_intnode   *save_node;
1494         struct xfs_da_node_entry *drop_btree;
1495         struct xfs_da_node_entry *save_btree;
1496         struct xfs_da3_icnode_hdr drop_hdr;
1497         struct xfs_da3_icnode_hdr save_hdr;
1498         struct xfs_trans        *tp;
1499         int                     sindex;
1500         int                     tmp;
1501         struct xfs_inode        *dp = state->args->dp;
1502
1503         trace_xfs_da_node_unbalance(state->args);
1504
1505         drop_node = drop_blk->bp->b_addr;
1506         save_node = save_blk->bp->b_addr;
1507         xfs_da3_node_hdr_from_disk(dp->i_mount, &drop_hdr, drop_node);
1508         xfs_da3_node_hdr_from_disk(dp->i_mount, &save_hdr, save_node);
1509         drop_btree = drop_hdr.btree;
1510         save_btree = save_hdr.btree;
1511         tp = state->args->trans;
1512
1513         /*
1514          * If the dying block has lower hashvals, then move all the
1515          * elements in the remaining block up to make a hole.
1516          */
1517         if ((be32_to_cpu(drop_btree[0].hashval) <
1518                         be32_to_cpu(save_btree[0].hashval)) ||
1519             (be32_to_cpu(drop_btree[drop_hdr.count - 1].hashval) <
1520                         be32_to_cpu(save_btree[save_hdr.count - 1].hashval))) {
1521                 /* XXX: check this - is memmove dst correct? */
1522                 tmp = save_hdr.count * sizeof(xfs_da_node_entry_t);
1523                 memmove(&save_btree[drop_hdr.count], &save_btree[0], tmp);
1524
1525                 sindex = 0;
1526                 xfs_trans_log_buf(tp, save_blk->bp,
1527                         XFS_DA_LOGRANGE(save_node, &save_btree[0],
1528                                 (save_hdr.count + drop_hdr.count) *
1529                                                 sizeof(xfs_da_node_entry_t)));
1530         } else {
1531                 sindex = save_hdr.count;
1532                 xfs_trans_log_buf(tp, save_blk->bp,
1533                         XFS_DA_LOGRANGE(save_node, &save_btree[sindex],
1534                                 drop_hdr.count * sizeof(xfs_da_node_entry_t)));
1535         }
1536
1537         /*
1538          * Move all the B-tree elements from drop_blk to save_blk.
1539          */
1540         tmp = drop_hdr.count * (uint)sizeof(xfs_da_node_entry_t);
1541         memcpy(&save_btree[sindex], &drop_btree[0], tmp);
1542         save_hdr.count += drop_hdr.count;
1543
1544         xfs_da3_node_hdr_to_disk(dp->i_mount, save_node, &save_hdr);
1545         xfs_trans_log_buf(tp, save_blk->bp,
1546                 XFS_DA_LOGRANGE(save_node, &save_node->hdr,
1547                                 state->args->geo->node_hdr_size));
1548
1549         /*
1550          * Save the last hashval in the remaining block for upward propagation.
1551          */
1552         save_blk->hashval = be32_to_cpu(save_btree[save_hdr.count - 1].hashval);
1553 }
1554
1555 /*========================================================================
1556  * Routines used for finding things in the Btree.
1557  *========================================================================*/
1558
1559 /*
1560  * Walk down the Btree looking for a particular filename, filling
1561  * in the state structure as we go.
1562  *
1563  * We will set the state structure to point to each of the elements
1564  * in each of the nodes where either the hashval is or should be.
1565  *
1566  * We support duplicate hashval's so for each entry in the current
1567  * node that could contain the desired hashval, descend.  This is a
1568  * pruned depth-first tree search.
1569  */
1570 int                                                     /* error */
1571 xfs_da3_node_lookup_int(
1572         struct xfs_da_state     *state,
1573         int                     *result)
1574 {
1575         struct xfs_da_state_blk *blk;
1576         struct xfs_da_blkinfo   *curr;
1577         struct xfs_da_intnode   *node;
1578         struct xfs_da_node_entry *btree;
1579         struct xfs_da3_icnode_hdr nodehdr;
1580         struct xfs_da_args      *args;
1581         xfs_dablk_t             blkno;
1582         xfs_dahash_t            hashval;
1583         xfs_dahash_t            btreehashval;
1584         int                     probe;
1585         int                     span;
1586         int                     max;
1587         int                     error;
1588         int                     retval;
1589         unsigned int            expected_level = 0;
1590         uint16_t                magic;
1591         struct xfs_inode        *dp = state->args->dp;
1592
1593         args = state->args;
1594
1595         /*
1596          * Descend thru the B-tree searching each level for the right
1597          * node to use, until the right hashval is found.
1598          */
1599         blkno = args->geo->leafblk;
1600         for (blk = &state->path.blk[0], state->path.active = 1;
1601                          state->path.active <= XFS_DA_NODE_MAXDEPTH;
1602                          blk++, state->path.active++) {
1603                 /*
1604                  * Read the next node down in the tree.
1605                  */
1606                 blk->blkno = blkno;
1607                 error = xfs_da3_node_read(args->trans, args->dp, blkno,
1608                                         &blk->bp, args->whichfork);
1609                 if (error) {
1610                         blk->blkno = 0;
1611                         state->path.active--;
1612                         return error;
1613                 }
1614                 curr = blk->bp->b_addr;
1615                 magic = be16_to_cpu(curr->magic);
1616
1617                 if (magic == XFS_ATTR_LEAF_MAGIC ||
1618                     magic == XFS_ATTR3_LEAF_MAGIC) {
1619                         blk->magic = XFS_ATTR_LEAF_MAGIC;
1620                         blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL);
1621                         break;
1622                 }
1623
1624                 if (magic == XFS_DIR2_LEAFN_MAGIC ||
1625                     magic == XFS_DIR3_LEAFN_MAGIC) {
1626                         blk->magic = XFS_DIR2_LEAFN_MAGIC;
1627                         blk->hashval = xfs_dir2_leaf_lasthash(args->dp,
1628                                                               blk->bp, NULL);
1629                         break;
1630                 }
1631
1632                 if (magic != XFS_DA_NODE_MAGIC && magic != XFS_DA3_NODE_MAGIC) {
1633                         xfs_buf_mark_corrupt(blk->bp);
1634                         return -EFSCORRUPTED;
1635                 }
1636
1637                 blk->magic = XFS_DA_NODE_MAGIC;
1638
1639                 /*
1640                  * Search an intermediate node for a match.
1641                  */
1642                 node = blk->bp->b_addr;
1643                 xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
1644                 btree = nodehdr.btree;
1645
1646                 /* Tree taller than we can handle; bail out! */
1647                 if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) {
1648                         xfs_buf_mark_corrupt(blk->bp);
1649                         return -EFSCORRUPTED;
1650                 }
1651
1652                 /* Check the level from the root. */
1653                 if (blkno == args->geo->leafblk)
1654                         expected_level = nodehdr.level - 1;
1655                 else if (expected_level != nodehdr.level) {
1656                         xfs_buf_mark_corrupt(blk->bp);
1657                         return -EFSCORRUPTED;
1658                 } else
1659                         expected_level--;
1660
1661                 max = nodehdr.count;
1662                 blk->hashval = be32_to_cpu(btree[max - 1].hashval);
1663
1664                 /*
1665                  * Binary search.  (note: small blocks will skip loop)
1666                  */
1667                 probe = span = max / 2;
1668                 hashval = args->hashval;
1669                 while (span > 4) {
1670                         span /= 2;
1671                         btreehashval = be32_to_cpu(btree[probe].hashval);
1672                         if (btreehashval < hashval)
1673                                 probe += span;
1674                         else if (btreehashval > hashval)
1675                                 probe -= span;
1676                         else
1677                                 break;
1678                 }
1679                 ASSERT((probe >= 0) && (probe < max));
1680                 ASSERT((span <= 4) ||
1681                         (be32_to_cpu(btree[probe].hashval) == hashval));
1682
1683                 /*
1684                  * Since we may have duplicate hashval's, find the first
1685                  * matching hashval in the node.
1686                  */
1687                 while (probe > 0 &&
1688                        be32_to_cpu(btree[probe].hashval) >= hashval) {
1689                         probe--;
1690                 }
1691                 while (probe < max &&
1692                        be32_to_cpu(btree[probe].hashval) < hashval) {
1693                         probe++;
1694                 }
1695
1696                 /*
1697                  * Pick the right block to descend on.
1698                  */
1699                 if (probe == max) {
1700                         blk->index = max - 1;
1701                         blkno = be32_to_cpu(btree[max - 1].before);
1702                 } else {
1703                         blk->index = probe;
1704                         blkno = be32_to_cpu(btree[probe].before);
1705                 }
1706
1707                 /* We can't point back to the root. */
1708                 if (XFS_IS_CORRUPT(dp->i_mount, blkno == args->geo->leafblk))
1709                         return -EFSCORRUPTED;
1710         }
1711
1712         if (XFS_IS_CORRUPT(dp->i_mount, expected_level != 0))
1713                 return -EFSCORRUPTED;
1714
1715         /*
1716          * A leaf block that ends in the hashval that we are interested in
1717          * (final hashval == search hashval) means that the next block may
1718          * contain more entries with the same hashval, shift upward to the
1719          * next leaf and keep searching.
1720          */
1721         for (;;) {
1722                 if (blk->magic == XFS_DIR2_LEAFN_MAGIC) {
1723                         retval = xfs_dir2_leafn_lookup_int(blk->bp, args,
1724                                                         &blk->index, state);
1725                 } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
1726                         retval = xfs_attr3_leaf_lookup_int(blk->bp, args);
1727                         blk->index = args->index;
1728                         args->blkno = blk->blkno;
1729                 } else {
1730                         ASSERT(0);
1731                         return -EFSCORRUPTED;
1732                 }
1733                 if (((retval == -ENOENT) || (retval == -ENOATTR)) &&
1734                     (blk->hashval == args->hashval)) {
1735                         error = xfs_da3_path_shift(state, &state->path, 1, 1,
1736                                                          &retval);
1737                         if (error)
1738                                 return error;
1739                         if (retval == 0) {
1740                                 continue;
1741                         } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
1742                                 /* path_shift() gives ENOENT */
1743                                 retval = -ENOATTR;
1744                         }
1745                 }
1746                 break;
1747         }
1748         *result = retval;
1749         return 0;
1750 }
1751
1752 /*========================================================================
1753  * Utility routines.
1754  *========================================================================*/
1755
1756 /*
1757  * Compare two intermediate nodes for "order".
1758  */
1759 STATIC int
1760 xfs_da3_node_order(
1761         struct xfs_inode *dp,
1762         struct xfs_buf  *node1_bp,
1763         struct xfs_buf  *node2_bp)
1764 {
1765         struct xfs_da_intnode   *node1;
1766         struct xfs_da_intnode   *node2;
1767         struct xfs_da_node_entry *btree1;
1768         struct xfs_da_node_entry *btree2;
1769         struct xfs_da3_icnode_hdr node1hdr;
1770         struct xfs_da3_icnode_hdr node2hdr;
1771
1772         node1 = node1_bp->b_addr;
1773         node2 = node2_bp->b_addr;
1774         xfs_da3_node_hdr_from_disk(dp->i_mount, &node1hdr, node1);
1775         xfs_da3_node_hdr_from_disk(dp->i_mount, &node2hdr, node2);
1776         btree1 = node1hdr.btree;
1777         btree2 = node2hdr.btree;
1778
1779         if (node1hdr.count > 0 && node2hdr.count > 0 &&
1780             ((be32_to_cpu(btree2[0].hashval) < be32_to_cpu(btree1[0].hashval)) ||
1781              (be32_to_cpu(btree2[node2hdr.count - 1].hashval) <
1782               be32_to_cpu(btree1[node1hdr.count - 1].hashval)))) {
1783                 return 1;
1784         }
1785         return 0;
1786 }
1787
1788 /*
1789  * Link a new block into a doubly linked list of blocks (of whatever type).
1790  */
1791 int                                                     /* error */
1792 xfs_da3_blk_link(
1793         struct xfs_da_state     *state,
1794         struct xfs_da_state_blk *old_blk,
1795         struct xfs_da_state_blk *new_blk)
1796 {
1797         struct xfs_da_blkinfo   *old_info;
1798         struct xfs_da_blkinfo   *new_info;
1799         struct xfs_da_blkinfo   *tmp_info;
1800         struct xfs_da_args      *args;
1801         struct xfs_buf          *bp;
1802         int                     before = 0;
1803         int                     error;
1804         struct xfs_inode        *dp = state->args->dp;
1805
1806         /*
1807          * Set up environment.
1808          */
1809         args = state->args;
1810         ASSERT(args != NULL);
1811         old_info = old_blk->bp->b_addr;
1812         new_info = new_blk->bp->b_addr;
1813         ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC ||
1814                old_blk->magic == XFS_DIR2_LEAFN_MAGIC ||
1815                old_blk->magic == XFS_ATTR_LEAF_MAGIC);
1816
1817         switch (old_blk->magic) {
1818         case XFS_ATTR_LEAF_MAGIC:
1819                 before = xfs_attr_leaf_order(old_blk->bp, new_blk->bp);
1820                 break;
1821         case XFS_DIR2_LEAFN_MAGIC:
1822                 before = xfs_dir2_leafn_order(dp, old_blk->bp, new_blk->bp);
1823                 break;
1824         case XFS_DA_NODE_MAGIC:
1825                 before = xfs_da3_node_order(dp, old_blk->bp, new_blk->bp);
1826                 break;
1827         }
1828
1829         /*
1830          * Link blocks in appropriate order.
1831          */
1832         if (before) {
1833                 /*
1834                  * Link new block in before existing block.
1835                  */
1836                 trace_xfs_da_link_before(args);
1837                 new_info->forw = cpu_to_be32(old_blk->blkno);
1838                 new_info->back = old_info->back;
1839                 if (old_info->back) {
1840                         error = xfs_da3_node_read(args->trans, dp,
1841                                                 be32_to_cpu(old_info->back),
1842                                                 &bp, args->whichfork);
1843                         if (error)
1844                                 return error;
1845                         ASSERT(bp != NULL);
1846                         tmp_info = bp->b_addr;
1847                         ASSERT(tmp_info->magic == old_info->magic);
1848                         ASSERT(be32_to_cpu(tmp_info->forw) == old_blk->blkno);
1849                         tmp_info->forw = cpu_to_be32(new_blk->blkno);
1850                         xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
1851                 }
1852                 old_info->back = cpu_to_be32(new_blk->blkno);
1853         } else {
1854                 /*
1855                  * Link new block in after existing block.
1856                  */
1857                 trace_xfs_da_link_after(args);
1858                 new_info->forw = old_info->forw;
1859                 new_info->back = cpu_to_be32(old_blk->blkno);
1860                 if (old_info->forw) {
1861                         error = xfs_da3_node_read(args->trans, dp,
1862                                                 be32_to_cpu(old_info->forw),
1863                                                 &bp, args->whichfork);
1864                         if (error)
1865                                 return error;
1866                         ASSERT(bp != NULL);
1867                         tmp_info = bp->b_addr;
1868                         ASSERT(tmp_info->magic == old_info->magic);
1869                         ASSERT(be32_to_cpu(tmp_info->back) == old_blk->blkno);
1870                         tmp_info->back = cpu_to_be32(new_blk->blkno);
1871                         xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
1872                 }
1873                 old_info->forw = cpu_to_be32(new_blk->blkno);
1874         }
1875
1876         xfs_trans_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1);
1877         xfs_trans_log_buf(args->trans, new_blk->bp, 0, sizeof(*tmp_info) - 1);
1878         return 0;
1879 }
1880
1881 /*
1882  * Unlink a block from a doubly linked list of blocks.
1883  */
1884 STATIC int                                              /* error */
1885 xfs_da3_blk_unlink(
1886         struct xfs_da_state     *state,
1887         struct xfs_da_state_blk *drop_blk,
1888         struct xfs_da_state_blk *save_blk)
1889 {
1890         struct xfs_da_blkinfo   *drop_info;
1891         struct xfs_da_blkinfo   *save_info;
1892         struct xfs_da_blkinfo   *tmp_info;
1893         struct xfs_da_args      *args;
1894         struct xfs_buf          *bp;
1895         int                     error;
1896
1897         /*
1898          * Set up environment.
1899          */
1900         args = state->args;
1901         ASSERT(args != NULL);
1902         save_info = save_blk->bp->b_addr;
1903         drop_info = drop_blk->bp->b_addr;
1904         ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC ||
1905                save_blk->magic == XFS_DIR2_LEAFN_MAGIC ||
1906                save_blk->magic == XFS_ATTR_LEAF_MAGIC);
1907         ASSERT(save_blk->magic == drop_blk->magic);
1908         ASSERT((be32_to_cpu(save_info->forw) == drop_blk->blkno) ||
1909                (be32_to_cpu(save_info->back) == drop_blk->blkno));
1910         ASSERT((be32_to_cpu(drop_info->forw) == save_blk->blkno) ||
1911                (be32_to_cpu(drop_info->back) == save_blk->blkno));
1912
1913         /*
1914          * Unlink the leaf block from the doubly linked chain of leaves.
1915          */
1916         if (be32_to_cpu(save_info->back) == drop_blk->blkno) {
1917                 trace_xfs_da_unlink_back(args);
1918                 save_info->back = drop_info->back;
1919                 if (drop_info->back) {
1920                         error = xfs_da3_node_read(args->trans, args->dp,
1921                                                 be32_to_cpu(drop_info->back),
1922                                                 &bp, args->whichfork);
1923                         if (error)
1924                                 return error;
1925                         ASSERT(bp != NULL);
1926                         tmp_info = bp->b_addr;
1927                         ASSERT(tmp_info->magic == save_info->magic);
1928                         ASSERT(be32_to_cpu(tmp_info->forw) == drop_blk->blkno);
1929                         tmp_info->forw = cpu_to_be32(save_blk->blkno);
1930                         xfs_trans_log_buf(args->trans, bp, 0,
1931                                                     sizeof(*tmp_info) - 1);
1932                 }
1933         } else {
1934                 trace_xfs_da_unlink_forward(args);
1935                 save_info->forw = drop_info->forw;
1936                 if (drop_info->forw) {
1937                         error = xfs_da3_node_read(args->trans, args->dp,
1938                                                 be32_to_cpu(drop_info->forw),
1939                                                 &bp, args->whichfork);
1940                         if (error)
1941                                 return error;
1942                         ASSERT(bp != NULL);
1943                         tmp_info = bp->b_addr;
1944                         ASSERT(tmp_info->magic == save_info->magic);
1945                         ASSERT(be32_to_cpu(tmp_info->back) == drop_blk->blkno);
1946                         tmp_info->back = cpu_to_be32(save_blk->blkno);
1947                         xfs_trans_log_buf(args->trans, bp, 0,
1948                                                     sizeof(*tmp_info) - 1);
1949                 }
1950         }
1951
1952         xfs_trans_log_buf(args->trans, save_blk->bp, 0, sizeof(*save_info) - 1);
1953         return 0;
1954 }
1955
1956 /*
1957  * Move a path "forward" or "!forward" one block at the current level.
1958  *
1959  * This routine will adjust a "path" to point to the next block
1960  * "forward" (higher hashvalues) or "!forward" (lower hashvals) in the
1961  * Btree, including updating pointers to the intermediate nodes between
1962  * the new bottom and the root.
1963  */
1964 int                                                     /* error */
1965 xfs_da3_path_shift(
1966         struct xfs_da_state     *state,
1967         struct xfs_da_state_path *path,
1968         int                     forward,
1969         int                     release,
1970         int                     *result)
1971 {
1972         struct xfs_da_state_blk *blk;
1973         struct xfs_da_blkinfo   *info;
1974         struct xfs_da_args      *args;
1975         struct xfs_da_node_entry *btree;
1976         struct xfs_da3_icnode_hdr nodehdr;
1977         struct xfs_buf          *bp;
1978         xfs_dablk_t             blkno = 0;
1979         int                     level;
1980         int                     error;
1981         struct xfs_inode        *dp = state->args->dp;
1982
1983         trace_xfs_da_path_shift(state->args);
1984
1985         /*
1986          * Roll up the Btree looking for the first block where our
1987          * current index is not at the edge of the block.  Note that
1988          * we skip the bottom layer because we want the sibling block.
1989          */
1990         args = state->args;
1991         ASSERT(args != NULL);
1992         ASSERT(path != NULL);
1993         ASSERT((path->active > 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1994         level = (path->active-1) - 1;   /* skip bottom layer in path */
1995         for (; level >= 0; level--) {
1996                 blk = &path->blk[level];
1997                 xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr,
1998                                            blk->bp->b_addr);
1999
2000                 if (forward && (blk->index < nodehdr.count - 1)) {
2001                         blk->index++;
2002                         blkno = be32_to_cpu(nodehdr.btree[blk->index].before);
2003                         break;
2004                 } else if (!forward && (blk->index > 0)) {
2005                         blk->index--;
2006                         blkno = be32_to_cpu(nodehdr.btree[blk->index].before);
2007                         break;
2008                 }
2009         }
2010         if (level < 0) {
2011                 *result = -ENOENT;      /* we're out of our tree */
2012                 ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
2013                 return 0;
2014         }
2015
2016         /*
2017          * Roll down the edge of the subtree until we reach the
2018          * same depth we were at originally.
2019          */
2020         for (blk++, level++; level < path->active; blk++, level++) {
2021                 /*
2022                  * Read the next child block into a local buffer.
2023                  */
2024                 error = xfs_da3_node_read(args->trans, dp, blkno, &bp,
2025                                           args->whichfork);
2026                 if (error)
2027                         return error;
2028
2029                 /*
2030                  * Release the old block (if it's dirty, the trans doesn't
2031                  * actually let go) and swap the local buffer into the path
2032                  * structure. This ensures failure of the above read doesn't set
2033                  * a NULL buffer in an active slot in the path.
2034                  */
2035                 if (release)
2036                         xfs_trans_brelse(args->trans, blk->bp);
2037                 blk->blkno = blkno;
2038                 blk->bp = bp;
2039
2040                 info = blk->bp->b_addr;
2041                 ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
2042                        info->magic == cpu_to_be16(XFS_DA3_NODE_MAGIC) ||
2043                        info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
2044                        info->magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC) ||
2045                        info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC) ||
2046                        info->magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC));
2047
2048
2049                 /*
2050                  * Note: we flatten the magic number to a single type so we
2051                  * don't have to compare against crc/non-crc types elsewhere.
2052                  */
2053                 switch (be16_to_cpu(info->magic)) {
2054                 case XFS_DA_NODE_MAGIC:
2055                 case XFS_DA3_NODE_MAGIC:
2056                         blk->magic = XFS_DA_NODE_MAGIC;
2057                         xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr,
2058                                                    bp->b_addr);
2059                         btree = nodehdr.btree;
2060                         blk->hashval = be32_to_cpu(btree[nodehdr.count - 1].hashval);
2061                         if (forward)
2062                                 blk->index = 0;
2063                         else
2064                                 blk->index = nodehdr.count - 1;
2065                         blkno = be32_to_cpu(btree[blk->index].before);
2066                         break;
2067                 case XFS_ATTR_LEAF_MAGIC:
2068                 case XFS_ATTR3_LEAF_MAGIC:
2069                         blk->magic = XFS_ATTR_LEAF_MAGIC;
2070                         ASSERT(level == path->active-1);
2071                         blk->index = 0;
2072                         blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL);
2073                         break;
2074                 case XFS_DIR2_LEAFN_MAGIC:
2075                 case XFS_DIR3_LEAFN_MAGIC:
2076                         blk->magic = XFS_DIR2_LEAFN_MAGIC;
2077                         ASSERT(level == path->active-1);
2078                         blk->index = 0;
2079                         blk->hashval = xfs_dir2_leaf_lasthash(args->dp,
2080                                                               blk->bp, NULL);
2081                         break;
2082                 default:
2083                         ASSERT(0);
2084                         break;
2085                 }
2086         }
2087         *result = 0;
2088         return 0;
2089 }
2090
2091
2092 /*========================================================================
2093  * Utility routines.
2094  *========================================================================*/
2095
2096 /*
2097  * Implement a simple hash on a character string.
2098  * Rotate the hash value by 7 bits, then XOR each character in.
2099  * This is implemented with some source-level loop unrolling.
2100  */
2101 xfs_dahash_t
2102 xfs_da_hashname(const uint8_t *name, int namelen)
2103 {
2104         xfs_dahash_t hash;
2105
2106         /*
2107          * Do four characters at a time as long as we can.
2108          */
2109         for (hash = 0; namelen >= 4; namelen -= 4, name += 4)
2110                 hash = (name[0] << 21) ^ (name[1] << 14) ^ (name[2] << 7) ^
2111                        (name[3] << 0) ^ rol32(hash, 7 * 4);
2112
2113         /*
2114          * Now do the rest of the characters.
2115          */
2116         switch (namelen) {
2117         case 3:
2118                 return (name[0] << 14) ^ (name[1] << 7) ^ (name[2] << 0) ^
2119                        rol32(hash, 7 * 3);
2120         case 2:
2121                 return (name[0] << 7) ^ (name[1] << 0) ^ rol32(hash, 7 * 2);
2122         case 1:
2123                 return (name[0] << 0) ^ rol32(hash, 7 * 1);
2124         default: /* case 0: */
2125                 return hash;
2126         }
2127 }
2128
2129 enum xfs_dacmp
2130 xfs_da_compname(
2131         struct xfs_da_args *args,
2132         const unsigned char *name,
2133         int             len)
2134 {
2135         return (args->namelen == len && memcmp(args->name, name, len) == 0) ?
2136                                         XFS_CMP_EXACT : XFS_CMP_DIFFERENT;
2137 }
2138
2139 int
2140 xfs_da_grow_inode_int(
2141         struct xfs_da_args      *args,
2142         xfs_fileoff_t           *bno,
2143         int                     count)
2144 {
2145         struct xfs_trans        *tp = args->trans;
2146         struct xfs_inode        *dp = args->dp;
2147         int                     w = args->whichfork;
2148         xfs_rfsblock_t          nblks = dp->i_nblocks;
2149         struct xfs_bmbt_irec    map, *mapp;
2150         int                     nmap, error, got, i, mapi;
2151
2152         /*
2153          * Find a spot in the file space to put the new block.
2154          */
2155         error = xfs_bmap_first_unused(tp, dp, count, bno, w);
2156         if (error)
2157                 return error;
2158
2159         /*
2160          * Try mapping it in one filesystem block.
2161          */
2162         nmap = 1;
2163         error = xfs_bmapi_write(tp, dp, *bno, count,
2164                         xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG,
2165                         args->total, &map, &nmap);
2166         if (error)
2167                 return error;
2168
2169         ASSERT(nmap <= 1);
2170         if (nmap == 1) {
2171                 mapp = &map;
2172                 mapi = 1;
2173         } else if (nmap == 0 && count > 1) {
2174                 xfs_fileoff_t           b;
2175                 int                     c;
2176
2177                 /*
2178                  * If we didn't get it and the block might work if fragmented,
2179                  * try without the CONTIG flag.  Loop until we get it all.
2180                  */
2181                 mapp = kmem_alloc(sizeof(*mapp) * count, 0);
2182                 for (b = *bno, mapi = 0; b < *bno + count; ) {
2183                         nmap = min(XFS_BMAP_MAX_NMAP, count);
2184                         c = (int)(*bno + count - b);
2185                         error = xfs_bmapi_write(tp, dp, b, c,
2186                                         xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA,
2187                                         args->total, &mapp[mapi], &nmap);
2188                         if (error)
2189                                 goto out_free_map;
2190                         if (nmap < 1)
2191                                 break;
2192                         mapi += nmap;
2193                         b = mapp[mapi - 1].br_startoff +
2194                             mapp[mapi - 1].br_blockcount;
2195                 }
2196         } else {
2197                 mapi = 0;
2198                 mapp = NULL;
2199         }
2200
2201         /*
2202          * Count the blocks we got, make sure it matches the total.
2203          */
2204         for (i = 0, got = 0; i < mapi; i++)
2205                 got += mapp[i].br_blockcount;
2206         if (got != count || mapp[0].br_startoff != *bno ||
2207             mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
2208             *bno + count) {
2209                 error = -ENOSPC;
2210                 goto out_free_map;
2211         }
2212
2213         /* account for newly allocated blocks in reserved blocks total */
2214         args->total -= dp->i_nblocks - nblks;
2215
2216 out_free_map:
2217         if (mapp != &map)
2218                 kmem_free(mapp);
2219         return error;
2220 }
2221
2222 /*
2223  * Add a block to the btree ahead of the file.
2224  * Return the new block number to the caller.
2225  */
2226 int
2227 xfs_da_grow_inode(
2228         struct xfs_da_args      *args,
2229         xfs_dablk_t             *new_blkno)
2230 {
2231         xfs_fileoff_t           bno;
2232         int                     error;
2233
2234         trace_xfs_da_grow_inode(args);
2235
2236         bno = args->geo->leafblk;
2237         error = xfs_da_grow_inode_int(args, &bno, args->geo->fsbcount);
2238         if (!error)
2239                 *new_blkno = (xfs_dablk_t)bno;
2240         return error;
2241 }
2242
2243 /*
2244  * Ick.  We need to always be able to remove a btree block, even
2245  * if there's no space reservation because the filesystem is full.
2246  * This is called if xfs_bunmapi on a btree block fails due to ENOSPC.
2247  * It swaps the target block with the last block in the file.  The
2248  * last block in the file can always be removed since it can't cause
2249  * a bmap btree split to do that.
2250  */
2251 STATIC int
2252 xfs_da3_swap_lastblock(
2253         struct xfs_da_args      *args,
2254         xfs_dablk_t             *dead_blknop,
2255         struct xfs_buf          **dead_bufp)
2256 {
2257         struct xfs_da_blkinfo   *dead_info;
2258         struct xfs_da_blkinfo   *sib_info;
2259         struct xfs_da_intnode   *par_node;
2260         struct xfs_da_intnode   *dead_node;
2261         struct xfs_dir2_leaf    *dead_leaf2;
2262         struct xfs_da_node_entry *btree;
2263         struct xfs_da3_icnode_hdr par_hdr;
2264         struct xfs_inode        *dp;
2265         struct xfs_trans        *tp;
2266         struct xfs_mount        *mp;
2267         struct xfs_buf          *dead_buf;
2268         struct xfs_buf          *last_buf;
2269         struct xfs_buf          *sib_buf;
2270         struct xfs_buf          *par_buf;
2271         xfs_dahash_t            dead_hash;
2272         xfs_fileoff_t           lastoff;
2273         xfs_dablk_t             dead_blkno;
2274         xfs_dablk_t             last_blkno;
2275         xfs_dablk_t             sib_blkno;
2276         xfs_dablk_t             par_blkno;
2277         int                     error;
2278         int                     w;
2279         int                     entno;
2280         int                     level;
2281         int                     dead_level;
2282
2283         trace_xfs_da_swap_lastblock(args);
2284
2285         dead_buf = *dead_bufp;
2286         dead_blkno = *dead_blknop;
2287         tp = args->trans;
2288         dp = args->dp;
2289         w = args->whichfork;
2290         ASSERT(w == XFS_DATA_FORK);
2291         mp = dp->i_mount;
2292         lastoff = args->geo->freeblk;
2293         error = xfs_bmap_last_before(tp, dp, &lastoff, w);
2294         if (error)
2295                 return error;
2296         if (XFS_IS_CORRUPT(mp, lastoff == 0))
2297                 return -EFSCORRUPTED;
2298         /*
2299          * Read the last block in the btree space.
2300          */
2301         last_blkno = (xfs_dablk_t)lastoff - args->geo->fsbcount;
2302         error = xfs_da3_node_read(tp, dp, last_blkno, &last_buf, w);
2303         if (error)
2304                 return error;
2305         /*
2306          * Copy the last block into the dead buffer and log it.
2307          */
2308         memcpy(dead_buf->b_addr, last_buf->b_addr, args->geo->blksize);
2309         xfs_trans_log_buf(tp, dead_buf, 0, args->geo->blksize - 1);
2310         dead_info = dead_buf->b_addr;
2311         /*
2312          * Get values from the moved block.
2313          */
2314         if (dead_info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
2315             dead_info->magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) {
2316                 struct xfs_dir3_icleaf_hdr leafhdr;
2317                 struct xfs_dir2_leaf_entry *ents;
2318
2319                 dead_leaf2 = (xfs_dir2_leaf_t *)dead_info;
2320                 xfs_dir2_leaf_hdr_from_disk(dp->i_mount, &leafhdr,
2321                                             dead_leaf2);
2322                 ents = leafhdr.ents;
2323                 dead_level = 0;
2324                 dead_hash = be32_to_cpu(ents[leafhdr.count - 1].hashval);
2325         } else {
2326                 struct xfs_da3_icnode_hdr deadhdr;
2327
2328                 dead_node = (xfs_da_intnode_t *)dead_info;
2329                 xfs_da3_node_hdr_from_disk(dp->i_mount, &deadhdr, dead_node);
2330                 btree = deadhdr.btree;
2331                 dead_level = deadhdr.level;
2332                 dead_hash = be32_to_cpu(btree[deadhdr.count - 1].hashval);
2333         }
2334         sib_buf = par_buf = NULL;
2335         /*
2336          * If the moved block has a left sibling, fix up the pointers.
2337          */
2338         if ((sib_blkno = be32_to_cpu(dead_info->back))) {
2339                 error = xfs_da3_node_read(tp, dp, sib_blkno, &sib_buf, w);
2340                 if (error)
2341                         goto done;
2342                 sib_info = sib_buf->b_addr;
2343                 if (XFS_IS_CORRUPT(mp,
2344                                    be32_to_cpu(sib_info->forw) != last_blkno ||
2345                                    sib_info->magic != dead_info->magic)) {
2346                         error = -EFSCORRUPTED;
2347                         goto done;
2348                 }
2349                 sib_info->forw = cpu_to_be32(dead_blkno);
2350                 xfs_trans_log_buf(tp, sib_buf,
2351                         XFS_DA_LOGRANGE(sib_info, &sib_info->forw,
2352                                         sizeof(sib_info->forw)));
2353                 sib_buf = NULL;
2354         }
2355         /*
2356          * If the moved block has a right sibling, fix up the pointers.
2357          */
2358         if ((sib_blkno = be32_to_cpu(dead_info->forw))) {
2359                 error = xfs_da3_node_read(tp, dp, sib_blkno, &sib_buf, w);
2360                 if (error)
2361                         goto done;
2362                 sib_info = sib_buf->b_addr;
2363                 if (XFS_IS_CORRUPT(mp,
2364                                    be32_to_cpu(sib_info->back) != last_blkno ||
2365                                    sib_info->magic != dead_info->magic)) {
2366                         error = -EFSCORRUPTED;
2367                         goto done;
2368                 }
2369                 sib_info->back = cpu_to_be32(dead_blkno);
2370                 xfs_trans_log_buf(tp, sib_buf,
2371                         XFS_DA_LOGRANGE(sib_info, &sib_info->back,
2372                                         sizeof(sib_info->back)));
2373                 sib_buf = NULL;
2374         }
2375         par_blkno = args->geo->leafblk;
2376         level = -1;
2377         /*
2378          * Walk down the tree looking for the parent of the moved block.
2379          */
2380         for (;;) {
2381                 error = xfs_da3_node_read(tp, dp, par_blkno, &par_buf, w);
2382                 if (error)
2383                         goto done;
2384                 par_node = par_buf->b_addr;
2385                 xfs_da3_node_hdr_from_disk(dp->i_mount, &par_hdr, par_node);
2386                 if (XFS_IS_CORRUPT(mp,
2387                                    level >= 0 && level != par_hdr.level + 1)) {
2388                         error = -EFSCORRUPTED;
2389                         goto done;
2390                 }
2391                 level = par_hdr.level;
2392                 btree = par_hdr.btree;
2393                 for (entno = 0;
2394                      entno < par_hdr.count &&
2395                      be32_to_cpu(btree[entno].hashval) < dead_hash;
2396                      entno++)
2397                         continue;
2398                 if (XFS_IS_CORRUPT(mp, entno == par_hdr.count)) {
2399                         error = -EFSCORRUPTED;
2400                         goto done;
2401                 }
2402                 par_blkno = be32_to_cpu(btree[entno].before);
2403                 if (level == dead_level + 1)
2404                         break;
2405                 xfs_trans_brelse(tp, par_buf);
2406                 par_buf = NULL;
2407         }
2408         /*
2409          * We're in the right parent block.
2410          * Look for the right entry.
2411          */
2412         for (;;) {
2413                 for (;
2414                      entno < par_hdr.count &&
2415                      be32_to_cpu(btree[entno].before) != last_blkno;
2416                      entno++)
2417                         continue;
2418                 if (entno < par_hdr.count)
2419                         break;
2420                 par_blkno = par_hdr.forw;
2421                 xfs_trans_brelse(tp, par_buf);
2422                 par_buf = NULL;
2423                 if (XFS_IS_CORRUPT(mp, par_blkno == 0)) {
2424                         error = -EFSCORRUPTED;
2425                         goto done;
2426                 }
2427                 error = xfs_da3_node_read(tp, dp, par_blkno, &par_buf, w);
2428                 if (error)
2429                         goto done;
2430                 par_node = par_buf->b_addr;
2431                 xfs_da3_node_hdr_from_disk(dp->i_mount, &par_hdr, par_node);
2432                 if (XFS_IS_CORRUPT(mp, par_hdr.level != level)) {
2433                         error = -EFSCORRUPTED;
2434                         goto done;
2435                 }
2436                 btree = par_hdr.btree;
2437                 entno = 0;
2438         }
2439         /*
2440          * Update the parent entry pointing to the moved block.
2441          */
2442         btree[entno].before = cpu_to_be32(dead_blkno);
2443         xfs_trans_log_buf(tp, par_buf,
2444                 XFS_DA_LOGRANGE(par_node, &btree[entno].before,
2445                                 sizeof(btree[entno].before)));
2446         *dead_blknop = last_blkno;
2447         *dead_bufp = last_buf;
2448         return 0;
2449 done:
2450         if (par_buf)
2451                 xfs_trans_brelse(tp, par_buf);
2452         if (sib_buf)
2453                 xfs_trans_brelse(tp, sib_buf);
2454         xfs_trans_brelse(tp, last_buf);
2455         return error;
2456 }
2457
2458 /*
2459  * Remove a btree block from a directory or attribute.
2460  */
2461 int
2462 xfs_da_shrink_inode(
2463         struct xfs_da_args      *args,
2464         xfs_dablk_t             dead_blkno,
2465         struct xfs_buf          *dead_buf)
2466 {
2467         struct xfs_inode        *dp;
2468         int                     done, error, w, count;
2469         struct xfs_trans        *tp;
2470
2471         trace_xfs_da_shrink_inode(args);
2472
2473         dp = args->dp;
2474         w = args->whichfork;
2475         tp = args->trans;
2476         count = args->geo->fsbcount;
2477         for (;;) {
2478                 /*
2479                  * Remove extents.  If we get ENOSPC for a dir we have to move
2480                  * the last block to the place we want to kill.
2481                  */
2482                 error = xfs_bunmapi(tp, dp, dead_blkno, count,
2483                                     xfs_bmapi_aflag(w), 0, &done);
2484                 if (error == -ENOSPC) {
2485                         if (w != XFS_DATA_FORK)
2486                                 break;
2487                         error = xfs_da3_swap_lastblock(args, &dead_blkno,
2488                                                       &dead_buf);
2489                         if (error)
2490                                 break;
2491                 } else {
2492                         break;
2493                 }
2494         }
2495         xfs_trans_binval(tp, dead_buf);
2496         return error;
2497 }
2498
2499 static int
2500 xfs_dabuf_map(
2501         struct xfs_inode        *dp,
2502         xfs_dablk_t             bno,
2503         unsigned int            flags,
2504         int                     whichfork,
2505         struct xfs_buf_map      **mapp,
2506         int                     *nmaps)
2507 {
2508         struct xfs_mount        *mp = dp->i_mount;
2509         int                     nfsb = xfs_dabuf_nfsb(mp, whichfork);
2510         struct xfs_bmbt_irec    irec, *irecs = &irec;
2511         struct xfs_buf_map      *map = *mapp;
2512         xfs_fileoff_t           off = bno;
2513         int                     error = 0, nirecs, i;
2514
2515         if (nfsb > 1)
2516                 irecs = kmem_zalloc(sizeof(irec) * nfsb, KM_NOFS);
2517
2518         nirecs = nfsb;
2519         error = xfs_bmapi_read(dp, bno, nfsb, irecs, &nirecs,
2520                         xfs_bmapi_aflag(whichfork));
2521         if (error)
2522                 goto out_free_irecs;
2523
2524         /*
2525          * Use the caller provided map for the single map case, else allocate a
2526          * larger one that needs to be free by the caller.
2527          */
2528         if (nirecs > 1) {
2529                 map = kmem_zalloc(nirecs * sizeof(struct xfs_buf_map), KM_NOFS);
2530                 if (!map) {
2531                         error = -ENOMEM;
2532                         goto out_free_irecs;
2533                 }
2534                 *mapp = map;
2535         }
2536
2537         for (i = 0; i < nirecs; i++) {
2538                 if (irecs[i].br_startblock == HOLESTARTBLOCK ||
2539                     irecs[i].br_startblock == DELAYSTARTBLOCK)
2540                         goto invalid_mapping;
2541                 if (off != irecs[i].br_startoff)
2542                         goto invalid_mapping;
2543
2544                 map[i].bm_bn = XFS_FSB_TO_DADDR(mp, irecs[i].br_startblock);
2545                 map[i].bm_len = XFS_FSB_TO_BB(mp, irecs[i].br_blockcount);
2546                 off += irecs[i].br_blockcount;
2547         }
2548
2549         if (off != bno + nfsb)
2550                 goto invalid_mapping;
2551
2552         *nmaps = nirecs;
2553 out_free_irecs:
2554         if (irecs != &irec)
2555                 kmem_free(irecs);
2556         return error;
2557
2558 invalid_mapping:
2559         /* Caller ok with no mapping. */
2560         if (XFS_IS_CORRUPT(mp, !(flags & XFS_DABUF_MAP_HOLE_OK))) {
2561                 error = -EFSCORRUPTED;
2562                 if (xfs_error_level >= XFS_ERRLEVEL_LOW) {
2563                         xfs_alert(mp, "%s: bno %u inode %llu",
2564                                         __func__, bno, dp->i_ino);
2565
2566                         for (i = 0; i < nirecs; i++) {
2567                                 xfs_alert(mp,
2568 "[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d",
2569                                         i, irecs[i].br_startoff,
2570                                         irecs[i].br_startblock,
2571                                         irecs[i].br_blockcount,
2572                                         irecs[i].br_state);
2573                         }
2574                 }
2575         } else {
2576                 *nmaps = 0;
2577         }
2578         goto out_free_irecs;
2579 }
2580
2581 /*
2582  * Get a buffer for the dir/attr block.
2583  */
2584 int
2585 xfs_da_get_buf(
2586         struct xfs_trans        *tp,
2587         struct xfs_inode        *dp,
2588         xfs_dablk_t             bno,
2589         struct xfs_buf          **bpp,
2590         int                     whichfork)
2591 {
2592         struct xfs_mount        *mp = dp->i_mount;
2593         struct xfs_buf          *bp;
2594         struct xfs_buf_map      map, *mapp = &map;
2595         int                     nmap = 1;
2596         int                     error;
2597
2598         *bpp = NULL;
2599         error = xfs_dabuf_map(dp, bno, 0, whichfork, &mapp, &nmap);
2600         if (error || nmap == 0)
2601                 goto out_free;
2602
2603         error = xfs_trans_get_buf_map(tp, mp->m_ddev_targp, mapp, nmap, 0, &bp);
2604         if (error)
2605                 goto out_free;
2606
2607         *bpp = bp;
2608
2609 out_free:
2610         if (mapp != &map)
2611                 kmem_free(mapp);
2612
2613         return error;
2614 }
2615
2616 /*
2617  * Get a buffer for the dir/attr block, fill in the contents.
2618  */
2619 int
2620 xfs_da_read_buf(
2621         struct xfs_trans        *tp,
2622         struct xfs_inode        *dp,
2623         xfs_dablk_t             bno,
2624         unsigned int            flags,
2625         struct xfs_buf          **bpp,
2626         int                     whichfork,
2627         const struct xfs_buf_ops *ops)
2628 {
2629         struct xfs_mount        *mp = dp->i_mount;
2630         struct xfs_buf          *bp;
2631         struct xfs_buf_map      map, *mapp = &map;
2632         int                     nmap = 1;
2633         int                     error;
2634
2635         *bpp = NULL;
2636         error = xfs_dabuf_map(dp, bno, flags, whichfork, &mapp, &nmap);
2637         if (error || !nmap)
2638                 goto out_free;
2639
2640         error = xfs_trans_read_buf_map(mp, tp, mp->m_ddev_targp, mapp, nmap, 0,
2641                         &bp, ops);
2642         if (error)
2643                 goto out_free;
2644
2645         if (whichfork == XFS_ATTR_FORK)
2646                 xfs_buf_set_ref(bp, XFS_ATTR_BTREE_REF);
2647         else
2648                 xfs_buf_set_ref(bp, XFS_DIR_BTREE_REF);
2649         *bpp = bp;
2650 out_free:
2651         if (mapp != &map)
2652                 kmem_free(mapp);
2653
2654         return error;
2655 }
2656
2657 /*
2658  * Readahead the dir/attr block.
2659  */
2660 int
2661 xfs_da_reada_buf(
2662         struct xfs_inode        *dp,
2663         xfs_dablk_t             bno,
2664         unsigned int            flags,
2665         int                     whichfork,
2666         const struct xfs_buf_ops *ops)
2667 {
2668         struct xfs_buf_map      map;
2669         struct xfs_buf_map      *mapp;
2670         int                     nmap;
2671         int                     error;
2672
2673         mapp = &map;
2674         nmap = 1;
2675         error = xfs_dabuf_map(dp, bno, flags, whichfork, &mapp, &nmap);
2676         if (error || !nmap)
2677                 goto out_free;
2678
2679         xfs_buf_readahead_map(dp->i_mount->m_ddev_targp, mapp, nmap, ops);
2680
2681 out_free:
2682         if (mapp != &map)
2683                 kmem_free(mapp);
2684
2685         return error;
2686 }