Merge tag 'riscv-for-linus-4.16-merge_window' of git://git.kernel.org/pub/scm/linux...
[linux-2.6-microblaze.git] / fs / xfs / libxfs / xfs_da_btree.c
1 /*
2  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3  * Copyright (c) 2013 Red Hat, Inc.
4  * All Rights Reserved.
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License as
8  * published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it would be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write the Free Software Foundation,
17  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
18  */
19 #include "xfs.h"
20 #include "xfs_fs.h"
21 #include "xfs_shared.h"
22 #include "xfs_format.h"
23 #include "xfs_log_format.h"
24 #include "xfs_trans_resv.h"
25 #include "xfs_bit.h"
26 #include "xfs_mount.h"
27 #include "xfs_da_format.h"
28 #include "xfs_da_btree.h"
29 #include "xfs_dir2.h"
30 #include "xfs_dir2_priv.h"
31 #include "xfs_inode.h"
32 #include "xfs_trans.h"
33 #include "xfs_inode_item.h"
34 #include "xfs_alloc.h"
35 #include "xfs_bmap.h"
36 #include "xfs_attr.h"
37 #include "xfs_attr_leaf.h"
38 #include "xfs_error.h"
39 #include "xfs_trace.h"
40 #include "xfs_cksum.h"
41 #include "xfs_buf_item.h"
42 #include "xfs_log.h"
43
44 /*
45  * xfs_da_btree.c
46  *
47  * Routines to implement directories as Btrees of hashed names.
48  */
49
50 /*========================================================================
51  * Function prototypes for the kernel.
52  *========================================================================*/
53
54 /*
55  * Routines used for growing the Btree.
56  */
57 STATIC int xfs_da3_root_split(xfs_da_state_t *state,
58                                             xfs_da_state_blk_t *existing_root,
59                                             xfs_da_state_blk_t *new_child);
60 STATIC int xfs_da3_node_split(xfs_da_state_t *state,
61                                             xfs_da_state_blk_t *existing_blk,
62                                             xfs_da_state_blk_t *split_blk,
63                                             xfs_da_state_blk_t *blk_to_add,
64                                             int treelevel,
65                                             int *result);
66 STATIC void xfs_da3_node_rebalance(xfs_da_state_t *state,
67                                          xfs_da_state_blk_t *node_blk_1,
68                                          xfs_da_state_blk_t *node_blk_2);
69 STATIC void xfs_da3_node_add(xfs_da_state_t *state,
70                                    xfs_da_state_blk_t *old_node_blk,
71                                    xfs_da_state_blk_t *new_node_blk);
72
73 /*
74  * Routines used for shrinking the Btree.
75  */
76 STATIC int xfs_da3_root_join(xfs_da_state_t *state,
77                                            xfs_da_state_blk_t *root_blk);
78 STATIC int xfs_da3_node_toosmall(xfs_da_state_t *state, int *retval);
79 STATIC void xfs_da3_node_remove(xfs_da_state_t *state,
80                                               xfs_da_state_blk_t *drop_blk);
81 STATIC void xfs_da3_node_unbalance(xfs_da_state_t *state,
82                                          xfs_da_state_blk_t *src_node_blk,
83                                          xfs_da_state_blk_t *dst_node_blk);
84
85 /*
86  * Utility routines.
87  */
88 STATIC int      xfs_da3_blk_unlink(xfs_da_state_t *state,
89                                   xfs_da_state_blk_t *drop_blk,
90                                   xfs_da_state_blk_t *save_blk);
91
92
93 kmem_zone_t *xfs_da_state_zone; /* anchor for state struct zone */
94
95 /*
96  * Allocate a dir-state structure.
97  * We don't put them on the stack since they're large.
98  */
99 xfs_da_state_t *
100 xfs_da_state_alloc(void)
101 {
102         return kmem_zone_zalloc(xfs_da_state_zone, KM_NOFS);
103 }
104
105 /*
106  * Kill the altpath contents of a da-state structure.
107  */
108 STATIC void
109 xfs_da_state_kill_altpath(xfs_da_state_t *state)
110 {
111         int     i;
112
113         for (i = 0; i < state->altpath.active; i++)
114                 state->altpath.blk[i].bp = NULL;
115         state->altpath.active = 0;
116 }
117
118 /*
119  * Free a da-state structure.
120  */
121 void
122 xfs_da_state_free(xfs_da_state_t *state)
123 {
124         xfs_da_state_kill_altpath(state);
125 #ifdef DEBUG
126         memset((char *)state, 0, sizeof(*state));
127 #endif /* DEBUG */
128         kmem_zone_free(xfs_da_state_zone, state);
129 }
130
131 static xfs_failaddr_t
132 xfs_da3_node_verify(
133         struct xfs_buf          *bp)
134 {
135         struct xfs_mount        *mp = bp->b_target->bt_mount;
136         struct xfs_da_intnode   *hdr = bp->b_addr;
137         struct xfs_da3_icnode_hdr ichdr;
138         const struct xfs_dir_ops *ops;
139
140         ops = xfs_dir_get_ops(mp, NULL);
141
142         ops->node_hdr_from_disk(&ichdr, hdr);
143
144         if (xfs_sb_version_hascrc(&mp->m_sb)) {
145                 struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
146
147                 if (ichdr.magic != XFS_DA3_NODE_MAGIC)
148                         return __this_address;
149
150                 if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_meta_uuid))
151                         return __this_address;
152                 if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn)
153                         return __this_address;
154                 if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->info.lsn)))
155                         return __this_address;
156         } else {
157                 if (ichdr.magic != XFS_DA_NODE_MAGIC)
158                         return __this_address;
159         }
160         if (ichdr.level == 0)
161                 return __this_address;
162         if (ichdr.level > XFS_DA_NODE_MAXDEPTH)
163                 return __this_address;
164         if (ichdr.count == 0)
165                 return __this_address;
166
167         /*
168          * we don't know if the node is for and attribute or directory tree,
169          * so only fail if the count is outside both bounds
170          */
171         if (ichdr.count > mp->m_dir_geo->node_ents &&
172             ichdr.count > mp->m_attr_geo->node_ents)
173                 return __this_address;
174
175         /* XXX: hash order check? */
176
177         return NULL;
178 }
179
180 static void
181 xfs_da3_node_write_verify(
182         struct xfs_buf  *bp)
183 {
184         struct xfs_mount        *mp = bp->b_target->bt_mount;
185         struct xfs_buf_log_item *bip = bp->b_log_item;
186         struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
187         xfs_failaddr_t          fa;
188
189         fa = xfs_da3_node_verify(bp);
190         if (fa) {
191                 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
192                 return;
193         }
194
195         if (!xfs_sb_version_hascrc(&mp->m_sb))
196                 return;
197
198         if (bip)
199                 hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn);
200
201         xfs_buf_update_cksum(bp, XFS_DA3_NODE_CRC_OFF);
202 }
203
204 /*
205  * leaf/node format detection on trees is sketchy, so a node read can be done on
206  * leaf level blocks when detection identifies the tree as a node format tree
207  * incorrectly. In this case, we need to swap the verifier to match the correct
208  * format of the block being read.
209  */
210 static void
211 xfs_da3_node_read_verify(
212         struct xfs_buf          *bp)
213 {
214         struct xfs_da_blkinfo   *info = bp->b_addr;
215         xfs_failaddr_t          fa;
216
217         switch (be16_to_cpu(info->magic)) {
218                 case XFS_DA3_NODE_MAGIC:
219                         if (!xfs_buf_verify_cksum(bp, XFS_DA3_NODE_CRC_OFF)) {
220                                 xfs_verifier_error(bp, -EFSBADCRC,
221                                                 __this_address);
222                                 break;
223                         }
224                         /* fall through */
225                 case XFS_DA_NODE_MAGIC:
226                         fa = xfs_da3_node_verify(bp);
227                         if (fa)
228                                 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
229                         return;
230                 case XFS_ATTR_LEAF_MAGIC:
231                 case XFS_ATTR3_LEAF_MAGIC:
232                         bp->b_ops = &xfs_attr3_leaf_buf_ops;
233                         bp->b_ops->verify_read(bp);
234                         return;
235                 case XFS_DIR2_LEAFN_MAGIC:
236                 case XFS_DIR3_LEAFN_MAGIC:
237                         bp->b_ops = &xfs_dir3_leafn_buf_ops;
238                         bp->b_ops->verify_read(bp);
239                         return;
240                 default:
241                         xfs_verifier_error(bp, -EFSCORRUPTED, __this_address);
242                         break;
243         }
244 }
245
246 /* Verify the structure of a da3 block. */
247 static xfs_failaddr_t
248 xfs_da3_node_verify_struct(
249         struct xfs_buf          *bp)
250 {
251         struct xfs_da_blkinfo   *info = bp->b_addr;
252
253         switch (be16_to_cpu(info->magic)) {
254         case XFS_DA3_NODE_MAGIC:
255         case XFS_DA_NODE_MAGIC:
256                 return xfs_da3_node_verify(bp);
257         case XFS_ATTR_LEAF_MAGIC:
258         case XFS_ATTR3_LEAF_MAGIC:
259                 bp->b_ops = &xfs_attr3_leaf_buf_ops;
260                 return bp->b_ops->verify_struct(bp);
261         case XFS_DIR2_LEAFN_MAGIC:
262         case XFS_DIR3_LEAFN_MAGIC:
263                 bp->b_ops = &xfs_dir3_leafn_buf_ops;
264                 return bp->b_ops->verify_struct(bp);
265         default:
266                 return __this_address;
267         }
268 }
269
270 const struct xfs_buf_ops xfs_da3_node_buf_ops = {
271         .name = "xfs_da3_node",
272         .verify_read = xfs_da3_node_read_verify,
273         .verify_write = xfs_da3_node_write_verify,
274         .verify_struct = xfs_da3_node_verify_struct,
275 };
276
277 int
278 xfs_da3_node_read(
279         struct xfs_trans        *tp,
280         struct xfs_inode        *dp,
281         xfs_dablk_t             bno,
282         xfs_daddr_t             mappedbno,
283         struct xfs_buf          **bpp,
284         int                     which_fork)
285 {
286         int                     err;
287
288         err = xfs_da_read_buf(tp, dp, bno, mappedbno, bpp,
289                                         which_fork, &xfs_da3_node_buf_ops);
290         if (!err && tp && *bpp) {
291                 struct xfs_da_blkinfo   *info = (*bpp)->b_addr;
292                 int                     type;
293
294                 switch (be16_to_cpu(info->magic)) {
295                 case XFS_DA_NODE_MAGIC:
296                 case XFS_DA3_NODE_MAGIC:
297                         type = XFS_BLFT_DA_NODE_BUF;
298                         break;
299                 case XFS_ATTR_LEAF_MAGIC:
300                 case XFS_ATTR3_LEAF_MAGIC:
301                         type = XFS_BLFT_ATTR_LEAF_BUF;
302                         break;
303                 case XFS_DIR2_LEAFN_MAGIC:
304                 case XFS_DIR3_LEAFN_MAGIC:
305                         type = XFS_BLFT_DIR_LEAFN_BUF;
306                         break;
307                 default:
308                         type = 0;
309                         ASSERT(0);
310                         break;
311                 }
312                 xfs_trans_buf_set_type(tp, *bpp, type);
313         }
314         return err;
315 }
316
317 /*========================================================================
318  * Routines used for growing the Btree.
319  *========================================================================*/
320
321 /*
322  * Create the initial contents of an intermediate node.
323  */
324 int
325 xfs_da3_node_create(
326         struct xfs_da_args      *args,
327         xfs_dablk_t             blkno,
328         int                     level,
329         struct xfs_buf          **bpp,
330         int                     whichfork)
331 {
332         struct xfs_da_intnode   *node;
333         struct xfs_trans        *tp = args->trans;
334         struct xfs_mount        *mp = tp->t_mountp;
335         struct xfs_da3_icnode_hdr ichdr = {0};
336         struct xfs_buf          *bp;
337         int                     error;
338         struct xfs_inode        *dp = args->dp;
339
340         trace_xfs_da_node_create(args);
341         ASSERT(level <= XFS_DA_NODE_MAXDEPTH);
342
343         error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, whichfork);
344         if (error)
345                 return error;
346         bp->b_ops = &xfs_da3_node_buf_ops;
347         xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF);
348         node = bp->b_addr;
349
350         if (xfs_sb_version_hascrc(&mp->m_sb)) {
351                 struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
352
353                 memset(hdr3, 0, sizeof(struct xfs_da3_node_hdr));
354                 ichdr.magic = XFS_DA3_NODE_MAGIC;
355                 hdr3->info.blkno = cpu_to_be64(bp->b_bn);
356                 hdr3->info.owner = cpu_to_be64(args->dp->i_ino);
357                 uuid_copy(&hdr3->info.uuid, &mp->m_sb.sb_meta_uuid);
358         } else {
359                 ichdr.magic = XFS_DA_NODE_MAGIC;
360         }
361         ichdr.level = level;
362
363         dp->d_ops->node_hdr_to_disk(node, &ichdr);
364         xfs_trans_log_buf(tp, bp,
365                 XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size));
366
367         *bpp = bp;
368         return 0;
369 }
370
371 /*
372  * Split a leaf node, rebalance, then possibly split
373  * intermediate nodes, rebalance, etc.
374  */
375 int                                                     /* error */
376 xfs_da3_split(
377         struct xfs_da_state     *state)
378 {
379         struct xfs_da_state_blk *oldblk;
380         struct xfs_da_state_blk *newblk;
381         struct xfs_da_state_blk *addblk;
382         struct xfs_da_intnode   *node;
383         int                     max;
384         int                     action = 0;
385         int                     error;
386         int                     i;
387
388         trace_xfs_da_split(state->args);
389
390         /*
391          * Walk back up the tree splitting/inserting/adjusting as necessary.
392          * If we need to insert and there isn't room, split the node, then
393          * decide which fragment to insert the new block from below into.
394          * Note that we may split the root this way, but we need more fixup.
395          */
396         max = state->path.active - 1;
397         ASSERT((max >= 0) && (max < XFS_DA_NODE_MAXDEPTH));
398         ASSERT(state->path.blk[max].magic == XFS_ATTR_LEAF_MAGIC ||
399                state->path.blk[max].magic == XFS_DIR2_LEAFN_MAGIC);
400
401         addblk = &state->path.blk[max];         /* initial dummy value */
402         for (i = max; (i >= 0) && addblk; state->path.active--, i--) {
403                 oldblk = &state->path.blk[i];
404                 newblk = &state->altpath.blk[i];
405
406                 /*
407                  * If a leaf node then
408                  *     Allocate a new leaf node, then rebalance across them.
409                  * else if an intermediate node then
410                  *     We split on the last layer, must we split the node?
411                  */
412                 switch (oldblk->magic) {
413                 case XFS_ATTR_LEAF_MAGIC:
414                         error = xfs_attr3_leaf_split(state, oldblk, newblk);
415                         if ((error != 0) && (error != -ENOSPC)) {
416                                 return error;   /* GROT: attr is inconsistent */
417                         }
418                         if (!error) {
419                                 addblk = newblk;
420                                 break;
421                         }
422                         /*
423                          * Entry wouldn't fit, split the leaf again. The new
424                          * extrablk will be consumed by xfs_da3_node_split if
425                          * the node is split.
426                          */
427                         state->extravalid = 1;
428                         if (state->inleaf) {
429                                 state->extraafter = 0;  /* before newblk */
430                                 trace_xfs_attr_leaf_split_before(state->args);
431                                 error = xfs_attr3_leaf_split(state, oldblk,
432                                                             &state->extrablk);
433                         } else {
434                                 state->extraafter = 1;  /* after newblk */
435                                 trace_xfs_attr_leaf_split_after(state->args);
436                                 error = xfs_attr3_leaf_split(state, newblk,
437                                                             &state->extrablk);
438                         }
439                         if (error)
440                                 return error;   /* GROT: attr inconsistent */
441                         addblk = newblk;
442                         break;
443                 case XFS_DIR2_LEAFN_MAGIC:
444                         error = xfs_dir2_leafn_split(state, oldblk, newblk);
445                         if (error)
446                                 return error;
447                         addblk = newblk;
448                         break;
449                 case XFS_DA_NODE_MAGIC:
450                         error = xfs_da3_node_split(state, oldblk, newblk, addblk,
451                                                          max - i, &action);
452                         addblk->bp = NULL;
453                         if (error)
454                                 return error;   /* GROT: dir is inconsistent */
455                         /*
456                          * Record the newly split block for the next time thru?
457                          */
458                         if (action)
459                                 addblk = newblk;
460                         else
461                                 addblk = NULL;
462                         break;
463                 }
464
465                 /*
466                  * Update the btree to show the new hashval for this child.
467                  */
468                 xfs_da3_fixhashpath(state, &state->path);
469         }
470         if (!addblk)
471                 return 0;
472
473         /*
474          * xfs_da3_node_split() should have consumed any extra blocks we added
475          * during a double leaf split in the attr fork. This is guaranteed as
476          * we can't be here if the attr fork only has a single leaf block.
477          */
478         ASSERT(state->extravalid == 0 ||
479                state->path.blk[max].magic == XFS_DIR2_LEAFN_MAGIC);
480
481         /*
482          * Split the root node.
483          */
484         ASSERT(state->path.active == 0);
485         oldblk = &state->path.blk[0];
486         error = xfs_da3_root_split(state, oldblk, addblk);
487         if (error) {
488                 addblk->bp = NULL;
489                 return error;   /* GROT: dir is inconsistent */
490         }
491
492         /*
493          * Update pointers to the node which used to be block 0 and just got
494          * bumped because of the addition of a new root node.  Note that the
495          * original block 0 could be at any position in the list of blocks in
496          * the tree.
497          *
498          * Note: the magic numbers and sibling pointers are in the same physical
499          * place for both v2 and v3 headers (by design). Hence it doesn't matter
500          * which version of the xfs_da_intnode structure we use here as the
501          * result will be the same using either structure.
502          */
503         node = oldblk->bp->b_addr;
504         if (node->hdr.info.forw) {
505                 ASSERT(be32_to_cpu(node->hdr.info.forw) == addblk->blkno);
506                 node = addblk->bp->b_addr;
507                 node->hdr.info.back = cpu_to_be32(oldblk->blkno);
508                 xfs_trans_log_buf(state->args->trans, addblk->bp,
509                                   XFS_DA_LOGRANGE(node, &node->hdr.info,
510                                   sizeof(node->hdr.info)));
511         }
512         node = oldblk->bp->b_addr;
513         if (node->hdr.info.back) {
514                 ASSERT(be32_to_cpu(node->hdr.info.back) == addblk->blkno);
515                 node = addblk->bp->b_addr;
516                 node->hdr.info.forw = cpu_to_be32(oldblk->blkno);
517                 xfs_trans_log_buf(state->args->trans, addblk->bp,
518                                   XFS_DA_LOGRANGE(node, &node->hdr.info,
519                                   sizeof(node->hdr.info)));
520         }
521         addblk->bp = NULL;
522         return 0;
523 }
524
525 /*
526  * Split the root.  We have to create a new root and point to the two
527  * parts (the split old root) that we just created.  Copy block zero to
528  * the EOF, extending the inode in process.
529  */
530 STATIC int                                              /* error */
531 xfs_da3_root_split(
532         struct xfs_da_state     *state,
533         struct xfs_da_state_blk *blk1,
534         struct xfs_da_state_blk *blk2)
535 {
536         struct xfs_da_intnode   *node;
537         struct xfs_da_intnode   *oldroot;
538         struct xfs_da_node_entry *btree;
539         struct xfs_da3_icnode_hdr nodehdr;
540         struct xfs_da_args      *args;
541         struct xfs_buf          *bp;
542         struct xfs_inode        *dp;
543         struct xfs_trans        *tp;
544         struct xfs_dir2_leaf    *leaf;
545         xfs_dablk_t             blkno;
546         int                     level;
547         int                     error;
548         int                     size;
549
550         trace_xfs_da_root_split(state->args);
551
552         /*
553          * Copy the existing (incorrect) block from the root node position
554          * to a free space somewhere.
555          */
556         args = state->args;
557         error = xfs_da_grow_inode(args, &blkno);
558         if (error)
559                 return error;
560
561         dp = args->dp;
562         tp = args->trans;
563         error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, args->whichfork);
564         if (error)
565                 return error;
566         node = bp->b_addr;
567         oldroot = blk1->bp->b_addr;
568         if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
569             oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) {
570                 struct xfs_da3_icnode_hdr icnodehdr;
571
572                 dp->d_ops->node_hdr_from_disk(&icnodehdr, oldroot);
573                 btree = dp->d_ops->node_tree_p(oldroot);
574                 size = (int)((char *)&btree[icnodehdr.count] - (char *)oldroot);
575                 level = icnodehdr.level;
576
577                 /*
578                  * we are about to copy oldroot to bp, so set up the type
579                  * of bp while we know exactly what it will be.
580                  */
581                 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF);
582         } else {
583                 struct xfs_dir3_icleaf_hdr leafhdr;
584                 struct xfs_dir2_leaf_entry *ents;
585
586                 leaf = (xfs_dir2_leaf_t *)oldroot;
587                 dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
588                 ents = dp->d_ops->leaf_ents_p(leaf);
589
590                 ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC ||
591                        leafhdr.magic == XFS_DIR3_LEAFN_MAGIC);
592                 size = (int)((char *)&ents[leafhdr.count] - (char *)leaf);
593                 level = 0;
594
595                 /*
596                  * we are about to copy oldroot to bp, so set up the type
597                  * of bp while we know exactly what it will be.
598                  */
599                 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_LEAFN_BUF);
600         }
601
602         /*
603          * we can copy most of the information in the node from one block to
604          * another, but for CRC enabled headers we have to make sure that the
605          * block specific identifiers are kept intact. We update the buffer
606          * directly for this.
607          */
608         memcpy(node, oldroot, size);
609         if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC) ||
610             oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) {
611                 struct xfs_da3_intnode *node3 = (struct xfs_da3_intnode *)node;
612
613                 node3->hdr.info.blkno = cpu_to_be64(bp->b_bn);
614         }
615         xfs_trans_log_buf(tp, bp, 0, size - 1);
616
617         bp->b_ops = blk1->bp->b_ops;
618         xfs_trans_buf_copy_type(bp, blk1->bp);
619         blk1->bp = bp;
620         blk1->blkno = blkno;
621
622         /*
623          * Set up the new root node.
624          */
625         error = xfs_da3_node_create(args,
626                 (args->whichfork == XFS_DATA_FORK) ? args->geo->leafblk : 0,
627                 level + 1, &bp, args->whichfork);
628         if (error)
629                 return error;
630
631         node = bp->b_addr;
632         dp->d_ops->node_hdr_from_disk(&nodehdr, node);
633         btree = dp->d_ops->node_tree_p(node);
634         btree[0].hashval = cpu_to_be32(blk1->hashval);
635         btree[0].before = cpu_to_be32(blk1->blkno);
636         btree[1].hashval = cpu_to_be32(blk2->hashval);
637         btree[1].before = cpu_to_be32(blk2->blkno);
638         nodehdr.count = 2;
639         dp->d_ops->node_hdr_to_disk(node, &nodehdr);
640
641 #ifdef DEBUG
642         if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
643             oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) {
644                 ASSERT(blk1->blkno >= args->geo->leafblk &&
645                        blk1->blkno < args->geo->freeblk);
646                 ASSERT(blk2->blkno >= args->geo->leafblk &&
647                        blk2->blkno < args->geo->freeblk);
648         }
649 #endif
650
651         /* Header is already logged by xfs_da_node_create */
652         xfs_trans_log_buf(tp, bp,
653                 XFS_DA_LOGRANGE(node, btree, sizeof(xfs_da_node_entry_t) * 2));
654
655         return 0;
656 }
657
658 /*
659  * Split the node, rebalance, then add the new entry.
660  */
661 STATIC int                                              /* error */
662 xfs_da3_node_split(
663         struct xfs_da_state     *state,
664         struct xfs_da_state_blk *oldblk,
665         struct xfs_da_state_blk *newblk,
666         struct xfs_da_state_blk *addblk,
667         int                     treelevel,
668         int                     *result)
669 {
670         struct xfs_da_intnode   *node;
671         struct xfs_da3_icnode_hdr nodehdr;
672         xfs_dablk_t             blkno;
673         int                     newcount;
674         int                     error;
675         int                     useextra;
676         struct xfs_inode        *dp = state->args->dp;
677
678         trace_xfs_da_node_split(state->args);
679
680         node = oldblk->bp->b_addr;
681         dp->d_ops->node_hdr_from_disk(&nodehdr, node);
682
683         /*
684          * With V2 dirs the extra block is data or freespace.
685          */
686         useextra = state->extravalid && state->args->whichfork == XFS_ATTR_FORK;
687         newcount = 1 + useextra;
688         /*
689          * Do we have to split the node?
690          */
691         if (nodehdr.count + newcount > state->args->geo->node_ents) {
692                 /*
693                  * Allocate a new node, add to the doubly linked chain of
694                  * nodes, then move some of our excess entries into it.
695                  */
696                 error = xfs_da_grow_inode(state->args, &blkno);
697                 if (error)
698                         return error;   /* GROT: dir is inconsistent */
699
700                 error = xfs_da3_node_create(state->args, blkno, treelevel,
701                                            &newblk->bp, state->args->whichfork);
702                 if (error)
703                         return error;   /* GROT: dir is inconsistent */
704                 newblk->blkno = blkno;
705                 newblk->magic = XFS_DA_NODE_MAGIC;
706                 xfs_da3_node_rebalance(state, oldblk, newblk);
707                 error = xfs_da3_blk_link(state, oldblk, newblk);
708                 if (error)
709                         return error;
710                 *result = 1;
711         } else {
712                 *result = 0;
713         }
714
715         /*
716          * Insert the new entry(s) into the correct block
717          * (updating last hashval in the process).
718          *
719          * xfs_da3_node_add() inserts BEFORE the given index,
720          * and as a result of using node_lookup_int() we always
721          * point to a valid entry (not after one), but a split
722          * operation always results in a new block whose hashvals
723          * FOLLOW the current block.
724          *
725          * If we had double-split op below us, then add the extra block too.
726          */
727         node = oldblk->bp->b_addr;
728         dp->d_ops->node_hdr_from_disk(&nodehdr, node);
729         if (oldblk->index <= nodehdr.count) {
730                 oldblk->index++;
731                 xfs_da3_node_add(state, oldblk, addblk);
732                 if (useextra) {
733                         if (state->extraafter)
734                                 oldblk->index++;
735                         xfs_da3_node_add(state, oldblk, &state->extrablk);
736                         state->extravalid = 0;
737                 }
738         } else {
739                 newblk->index++;
740                 xfs_da3_node_add(state, newblk, addblk);
741                 if (useextra) {
742                         if (state->extraafter)
743                                 newblk->index++;
744                         xfs_da3_node_add(state, newblk, &state->extrablk);
745                         state->extravalid = 0;
746                 }
747         }
748
749         return 0;
750 }
751
752 /*
753  * Balance the btree elements between two intermediate nodes,
754  * usually one full and one empty.
755  *
756  * NOTE: if blk2 is empty, then it will get the upper half of blk1.
757  */
758 STATIC void
759 xfs_da3_node_rebalance(
760         struct xfs_da_state     *state,
761         struct xfs_da_state_blk *blk1,
762         struct xfs_da_state_blk *blk2)
763 {
764         struct xfs_da_intnode   *node1;
765         struct xfs_da_intnode   *node2;
766         struct xfs_da_intnode   *tmpnode;
767         struct xfs_da_node_entry *btree1;
768         struct xfs_da_node_entry *btree2;
769         struct xfs_da_node_entry *btree_s;
770         struct xfs_da_node_entry *btree_d;
771         struct xfs_da3_icnode_hdr nodehdr1;
772         struct xfs_da3_icnode_hdr nodehdr2;
773         struct xfs_trans        *tp;
774         int                     count;
775         int                     tmp;
776         int                     swap = 0;
777         struct xfs_inode        *dp = state->args->dp;
778
779         trace_xfs_da_node_rebalance(state->args);
780
781         node1 = blk1->bp->b_addr;
782         node2 = blk2->bp->b_addr;
783         dp->d_ops->node_hdr_from_disk(&nodehdr1, node1);
784         dp->d_ops->node_hdr_from_disk(&nodehdr2, node2);
785         btree1 = dp->d_ops->node_tree_p(node1);
786         btree2 = dp->d_ops->node_tree_p(node2);
787
788         /*
789          * Figure out how many entries need to move, and in which direction.
790          * Swap the nodes around if that makes it simpler.
791          */
792         if (nodehdr1.count > 0 && nodehdr2.count > 0 &&
793             ((be32_to_cpu(btree2[0].hashval) < be32_to_cpu(btree1[0].hashval)) ||
794              (be32_to_cpu(btree2[nodehdr2.count - 1].hashval) <
795                         be32_to_cpu(btree1[nodehdr1.count - 1].hashval)))) {
796                 tmpnode = node1;
797                 node1 = node2;
798                 node2 = tmpnode;
799                 dp->d_ops->node_hdr_from_disk(&nodehdr1, node1);
800                 dp->d_ops->node_hdr_from_disk(&nodehdr2, node2);
801                 btree1 = dp->d_ops->node_tree_p(node1);
802                 btree2 = dp->d_ops->node_tree_p(node2);
803                 swap = 1;
804         }
805
806         count = (nodehdr1.count - nodehdr2.count) / 2;
807         if (count == 0)
808                 return;
809         tp = state->args->trans;
810         /*
811          * Two cases: high-to-low and low-to-high.
812          */
813         if (count > 0) {
814                 /*
815                  * Move elements in node2 up to make a hole.
816                  */
817                 tmp = nodehdr2.count;
818                 if (tmp > 0) {
819                         tmp *= (uint)sizeof(xfs_da_node_entry_t);
820                         btree_s = &btree2[0];
821                         btree_d = &btree2[count];
822                         memmove(btree_d, btree_s, tmp);
823                 }
824
825                 /*
826                  * Move the req'd B-tree elements from high in node1 to
827                  * low in node2.
828                  */
829                 nodehdr2.count += count;
830                 tmp = count * (uint)sizeof(xfs_da_node_entry_t);
831                 btree_s = &btree1[nodehdr1.count - count];
832                 btree_d = &btree2[0];
833                 memcpy(btree_d, btree_s, tmp);
834                 nodehdr1.count -= count;
835         } else {
836                 /*
837                  * Move the req'd B-tree elements from low in node2 to
838                  * high in node1.
839                  */
840                 count = -count;
841                 tmp = count * (uint)sizeof(xfs_da_node_entry_t);
842                 btree_s = &btree2[0];
843                 btree_d = &btree1[nodehdr1.count];
844                 memcpy(btree_d, btree_s, tmp);
845                 nodehdr1.count += count;
846
847                 xfs_trans_log_buf(tp, blk1->bp,
848                         XFS_DA_LOGRANGE(node1, btree_d, tmp));
849
850                 /*
851                  * Move elements in node2 down to fill the hole.
852                  */
853                 tmp  = nodehdr2.count - count;
854                 tmp *= (uint)sizeof(xfs_da_node_entry_t);
855                 btree_s = &btree2[count];
856                 btree_d = &btree2[0];
857                 memmove(btree_d, btree_s, tmp);
858                 nodehdr2.count -= count;
859         }
860
861         /*
862          * Log header of node 1 and all current bits of node 2.
863          */
864         dp->d_ops->node_hdr_to_disk(node1, &nodehdr1);
865         xfs_trans_log_buf(tp, blk1->bp,
866                 XFS_DA_LOGRANGE(node1, &node1->hdr, dp->d_ops->node_hdr_size));
867
868         dp->d_ops->node_hdr_to_disk(node2, &nodehdr2);
869         xfs_trans_log_buf(tp, blk2->bp,
870                 XFS_DA_LOGRANGE(node2, &node2->hdr,
871                                 dp->d_ops->node_hdr_size +
872                                 (sizeof(btree2[0]) * nodehdr2.count)));
873
874         /*
875          * Record the last hashval from each block for upward propagation.
876          * (note: don't use the swapped node pointers)
877          */
878         if (swap) {
879                 node1 = blk1->bp->b_addr;
880                 node2 = blk2->bp->b_addr;
881                 dp->d_ops->node_hdr_from_disk(&nodehdr1, node1);
882                 dp->d_ops->node_hdr_from_disk(&nodehdr2, node2);
883                 btree1 = dp->d_ops->node_tree_p(node1);
884                 btree2 = dp->d_ops->node_tree_p(node2);
885         }
886         blk1->hashval = be32_to_cpu(btree1[nodehdr1.count - 1].hashval);
887         blk2->hashval = be32_to_cpu(btree2[nodehdr2.count - 1].hashval);
888
889         /*
890          * Adjust the expected index for insertion.
891          */
892         if (blk1->index >= nodehdr1.count) {
893                 blk2->index = blk1->index - nodehdr1.count;
894                 blk1->index = nodehdr1.count + 1;       /* make it invalid */
895         }
896 }
897
898 /*
899  * Add a new entry to an intermediate node.
900  */
901 STATIC void
902 xfs_da3_node_add(
903         struct xfs_da_state     *state,
904         struct xfs_da_state_blk *oldblk,
905         struct xfs_da_state_blk *newblk)
906 {
907         struct xfs_da_intnode   *node;
908         struct xfs_da3_icnode_hdr nodehdr;
909         struct xfs_da_node_entry *btree;
910         int                     tmp;
911         struct xfs_inode        *dp = state->args->dp;
912
913         trace_xfs_da_node_add(state->args);
914
915         node = oldblk->bp->b_addr;
916         dp->d_ops->node_hdr_from_disk(&nodehdr, node);
917         btree = dp->d_ops->node_tree_p(node);
918
919         ASSERT(oldblk->index >= 0 && oldblk->index <= nodehdr.count);
920         ASSERT(newblk->blkno != 0);
921         if (state->args->whichfork == XFS_DATA_FORK)
922                 ASSERT(newblk->blkno >= state->args->geo->leafblk &&
923                        newblk->blkno < state->args->geo->freeblk);
924
925         /*
926          * We may need to make some room before we insert the new node.
927          */
928         tmp = 0;
929         if (oldblk->index < nodehdr.count) {
930                 tmp = (nodehdr.count - oldblk->index) * (uint)sizeof(*btree);
931                 memmove(&btree[oldblk->index + 1], &btree[oldblk->index], tmp);
932         }
933         btree[oldblk->index].hashval = cpu_to_be32(newblk->hashval);
934         btree[oldblk->index].before = cpu_to_be32(newblk->blkno);
935         xfs_trans_log_buf(state->args->trans, oldblk->bp,
936                 XFS_DA_LOGRANGE(node, &btree[oldblk->index],
937                                 tmp + sizeof(*btree)));
938
939         nodehdr.count += 1;
940         dp->d_ops->node_hdr_to_disk(node, &nodehdr);
941         xfs_trans_log_buf(state->args->trans, oldblk->bp,
942                 XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size));
943
944         /*
945          * Copy the last hash value from the oldblk to propagate upwards.
946          */
947         oldblk->hashval = be32_to_cpu(btree[nodehdr.count - 1].hashval);
948 }
949
950 /*========================================================================
951  * Routines used for shrinking the Btree.
952  *========================================================================*/
953
954 /*
955  * Deallocate an empty leaf node, remove it from its parent,
956  * possibly deallocating that block, etc...
957  */
958 int
959 xfs_da3_join(
960         struct xfs_da_state     *state)
961 {
962         struct xfs_da_state_blk *drop_blk;
963         struct xfs_da_state_blk *save_blk;
964         int                     action = 0;
965         int                     error;
966
967         trace_xfs_da_join(state->args);
968
969         drop_blk = &state->path.blk[ state->path.active-1 ];
970         save_blk = &state->altpath.blk[ state->path.active-1 ];
971         ASSERT(state->path.blk[0].magic == XFS_DA_NODE_MAGIC);
972         ASSERT(drop_blk->magic == XFS_ATTR_LEAF_MAGIC ||
973                drop_blk->magic == XFS_DIR2_LEAFN_MAGIC);
974
975         /*
976          * Walk back up the tree joining/deallocating as necessary.
977          * When we stop dropping blocks, break out.
978          */
979         for (  ; state->path.active >= 2; drop_blk--, save_blk--,
980                  state->path.active--) {
981                 /*
982                  * See if we can combine the block with a neighbor.
983                  *   (action == 0) => no options, just leave
984                  *   (action == 1) => coalesce, then unlink
985                  *   (action == 2) => block empty, unlink it
986                  */
987                 switch (drop_blk->magic) {
988                 case XFS_ATTR_LEAF_MAGIC:
989                         error = xfs_attr3_leaf_toosmall(state, &action);
990                         if (error)
991                                 return error;
992                         if (action == 0)
993                                 return 0;
994                         xfs_attr3_leaf_unbalance(state, drop_blk, save_blk);
995                         break;
996                 case XFS_DIR2_LEAFN_MAGIC:
997                         error = xfs_dir2_leafn_toosmall(state, &action);
998                         if (error)
999                                 return error;
1000                         if (action == 0)
1001                                 return 0;
1002                         xfs_dir2_leafn_unbalance(state, drop_blk, save_blk);
1003                         break;
1004                 case XFS_DA_NODE_MAGIC:
1005                         /*
1006                          * Remove the offending node, fixup hashvals,
1007                          * check for a toosmall neighbor.
1008                          */
1009                         xfs_da3_node_remove(state, drop_blk);
1010                         xfs_da3_fixhashpath(state, &state->path);
1011                         error = xfs_da3_node_toosmall(state, &action);
1012                         if (error)
1013                                 return error;
1014                         if (action == 0)
1015                                 return 0;
1016                         xfs_da3_node_unbalance(state, drop_blk, save_blk);
1017                         break;
1018                 }
1019                 xfs_da3_fixhashpath(state, &state->altpath);
1020                 error = xfs_da3_blk_unlink(state, drop_blk, save_blk);
1021                 xfs_da_state_kill_altpath(state);
1022                 if (error)
1023                         return error;
1024                 error = xfs_da_shrink_inode(state->args, drop_blk->blkno,
1025                                                          drop_blk->bp);
1026                 drop_blk->bp = NULL;
1027                 if (error)
1028                         return error;
1029         }
1030         /*
1031          * We joined all the way to the top.  If it turns out that
1032          * we only have one entry in the root, make the child block
1033          * the new root.
1034          */
1035         xfs_da3_node_remove(state, drop_blk);
1036         xfs_da3_fixhashpath(state, &state->path);
1037         error = xfs_da3_root_join(state, &state->path.blk[0]);
1038         return error;
1039 }
1040
1041 #ifdef  DEBUG
1042 static void
1043 xfs_da_blkinfo_onlychild_validate(struct xfs_da_blkinfo *blkinfo, __u16 level)
1044 {
1045         __be16  magic = blkinfo->magic;
1046
1047         if (level == 1) {
1048                 ASSERT(magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
1049                        magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC) ||
1050                        magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC) ||
1051                        magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC));
1052         } else {
1053                 ASSERT(magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
1054                        magic == cpu_to_be16(XFS_DA3_NODE_MAGIC));
1055         }
1056         ASSERT(!blkinfo->forw);
1057         ASSERT(!blkinfo->back);
1058 }
1059 #else   /* !DEBUG */
1060 #define xfs_da_blkinfo_onlychild_validate(blkinfo, level)
1061 #endif  /* !DEBUG */
1062
1063 /*
1064  * We have only one entry in the root.  Copy the only remaining child of
1065  * the old root to block 0 as the new root node.
1066  */
1067 STATIC int
1068 xfs_da3_root_join(
1069         struct xfs_da_state     *state,
1070         struct xfs_da_state_blk *root_blk)
1071 {
1072         struct xfs_da_intnode   *oldroot;
1073         struct xfs_da_args      *args;
1074         xfs_dablk_t             child;
1075         struct xfs_buf          *bp;
1076         struct xfs_da3_icnode_hdr oldroothdr;
1077         struct xfs_da_node_entry *btree;
1078         int                     error;
1079         struct xfs_inode        *dp = state->args->dp;
1080
1081         trace_xfs_da_root_join(state->args);
1082
1083         ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC);
1084
1085         args = state->args;
1086         oldroot = root_blk->bp->b_addr;
1087         dp->d_ops->node_hdr_from_disk(&oldroothdr, oldroot);
1088         ASSERT(oldroothdr.forw == 0);
1089         ASSERT(oldroothdr.back == 0);
1090
1091         /*
1092          * If the root has more than one child, then don't do anything.
1093          */
1094         if (oldroothdr.count > 1)
1095                 return 0;
1096
1097         /*
1098          * Read in the (only) child block, then copy those bytes into
1099          * the root block's buffer and free the original child block.
1100          */
1101         btree = dp->d_ops->node_tree_p(oldroot);
1102         child = be32_to_cpu(btree[0].before);
1103         ASSERT(child != 0);
1104         error = xfs_da3_node_read(args->trans, dp, child, -1, &bp,
1105                                              args->whichfork);
1106         if (error)
1107                 return error;
1108         xfs_da_blkinfo_onlychild_validate(bp->b_addr, oldroothdr.level);
1109
1110         /*
1111          * This could be copying a leaf back into the root block in the case of
1112          * there only being a single leaf block left in the tree. Hence we have
1113          * to update the b_ops pointer as well to match the buffer type change
1114          * that could occur. For dir3 blocks we also need to update the block
1115          * number in the buffer header.
1116          */
1117         memcpy(root_blk->bp->b_addr, bp->b_addr, args->geo->blksize);
1118         root_blk->bp->b_ops = bp->b_ops;
1119         xfs_trans_buf_copy_type(root_blk->bp, bp);
1120         if (oldroothdr.magic == XFS_DA3_NODE_MAGIC) {
1121                 struct xfs_da3_blkinfo *da3 = root_blk->bp->b_addr;
1122                 da3->blkno = cpu_to_be64(root_blk->bp->b_bn);
1123         }
1124         xfs_trans_log_buf(args->trans, root_blk->bp, 0,
1125                           args->geo->blksize - 1);
1126         error = xfs_da_shrink_inode(args, child, bp);
1127         return error;
1128 }
1129
1130 /*
1131  * Check a node block and its neighbors to see if the block should be
1132  * collapsed into one or the other neighbor.  Always keep the block
1133  * with the smaller block number.
1134  * If the current block is over 50% full, don't try to join it, return 0.
1135  * If the block is empty, fill in the state structure and return 2.
1136  * If it can be collapsed, fill in the state structure and return 1.
1137  * If nothing can be done, return 0.
1138  */
1139 STATIC int
1140 xfs_da3_node_toosmall(
1141         struct xfs_da_state     *state,
1142         int                     *action)
1143 {
1144         struct xfs_da_intnode   *node;
1145         struct xfs_da_state_blk *blk;
1146         struct xfs_da_blkinfo   *info;
1147         xfs_dablk_t             blkno;
1148         struct xfs_buf          *bp;
1149         struct xfs_da3_icnode_hdr nodehdr;
1150         int                     count;
1151         int                     forward;
1152         int                     error;
1153         int                     retval;
1154         int                     i;
1155         struct xfs_inode        *dp = state->args->dp;
1156
1157         trace_xfs_da_node_toosmall(state->args);
1158
1159         /*
1160          * Check for the degenerate case of the block being over 50% full.
1161          * If so, it's not worth even looking to see if we might be able
1162          * to coalesce with a sibling.
1163          */
1164         blk = &state->path.blk[ state->path.active-1 ];
1165         info = blk->bp->b_addr;
1166         node = (xfs_da_intnode_t *)info;
1167         dp->d_ops->node_hdr_from_disk(&nodehdr, node);
1168         if (nodehdr.count > (state->args->geo->node_ents >> 1)) {
1169                 *action = 0;    /* blk over 50%, don't try to join */
1170                 return 0;       /* blk over 50%, don't try to join */
1171         }
1172
1173         /*
1174          * Check for the degenerate case of the block being empty.
1175          * If the block is empty, we'll simply delete it, no need to
1176          * coalesce it with a sibling block.  We choose (arbitrarily)
1177          * to merge with the forward block unless it is NULL.
1178          */
1179         if (nodehdr.count == 0) {
1180                 /*
1181                  * Make altpath point to the block we want to keep and
1182                  * path point to the block we want to drop (this one).
1183                  */
1184                 forward = (info->forw != 0);
1185                 memcpy(&state->altpath, &state->path, sizeof(state->path));
1186                 error = xfs_da3_path_shift(state, &state->altpath, forward,
1187                                                  0, &retval);
1188                 if (error)
1189                         return error;
1190                 if (retval) {
1191                         *action = 0;
1192                 } else {
1193                         *action = 2;
1194                 }
1195                 return 0;
1196         }
1197
1198         /*
1199          * Examine each sibling block to see if we can coalesce with
1200          * at least 25% free space to spare.  We need to figure out
1201          * whether to merge with the forward or the backward block.
1202          * We prefer coalescing with the lower numbered sibling so as
1203          * to shrink a directory over time.
1204          */
1205         count  = state->args->geo->node_ents;
1206         count -= state->args->geo->node_ents >> 2;
1207         count -= nodehdr.count;
1208
1209         /* start with smaller blk num */
1210         forward = nodehdr.forw < nodehdr.back;
1211         for (i = 0; i < 2; forward = !forward, i++) {
1212                 struct xfs_da3_icnode_hdr thdr;
1213                 if (forward)
1214                         blkno = nodehdr.forw;
1215                 else
1216                         blkno = nodehdr.back;
1217                 if (blkno == 0)
1218                         continue;
1219                 error = xfs_da3_node_read(state->args->trans, dp,
1220                                         blkno, -1, &bp, state->args->whichfork);
1221                 if (error)
1222                         return error;
1223
1224                 node = bp->b_addr;
1225                 dp->d_ops->node_hdr_from_disk(&thdr, node);
1226                 xfs_trans_brelse(state->args->trans, bp);
1227
1228                 if (count - thdr.count >= 0)
1229                         break;  /* fits with at least 25% to spare */
1230         }
1231         if (i >= 2) {
1232                 *action = 0;
1233                 return 0;
1234         }
1235
1236         /*
1237          * Make altpath point to the block we want to keep (the lower
1238          * numbered block) and path point to the block we want to drop.
1239          */
1240         memcpy(&state->altpath, &state->path, sizeof(state->path));
1241         if (blkno < blk->blkno) {
1242                 error = xfs_da3_path_shift(state, &state->altpath, forward,
1243                                                  0, &retval);
1244         } else {
1245                 error = xfs_da3_path_shift(state, &state->path, forward,
1246                                                  0, &retval);
1247         }
1248         if (error)
1249                 return error;
1250         if (retval) {
1251                 *action = 0;
1252                 return 0;
1253         }
1254         *action = 1;
1255         return 0;
1256 }
1257
1258 /*
1259  * Pick up the last hashvalue from an intermediate node.
1260  */
1261 STATIC uint
1262 xfs_da3_node_lasthash(
1263         struct xfs_inode        *dp,
1264         struct xfs_buf          *bp,
1265         int                     *count)
1266 {
1267         struct xfs_da_intnode    *node;
1268         struct xfs_da_node_entry *btree;
1269         struct xfs_da3_icnode_hdr nodehdr;
1270
1271         node = bp->b_addr;
1272         dp->d_ops->node_hdr_from_disk(&nodehdr, node);
1273         if (count)
1274                 *count = nodehdr.count;
1275         if (!nodehdr.count)
1276                 return 0;
1277         btree = dp->d_ops->node_tree_p(node);
1278         return be32_to_cpu(btree[nodehdr.count - 1].hashval);
1279 }
1280
1281 /*
1282  * Walk back up the tree adjusting hash values as necessary,
1283  * when we stop making changes, return.
1284  */
1285 void
1286 xfs_da3_fixhashpath(
1287         struct xfs_da_state     *state,
1288         struct xfs_da_state_path *path)
1289 {
1290         struct xfs_da_state_blk *blk;
1291         struct xfs_da_intnode   *node;
1292         struct xfs_da_node_entry *btree;
1293         xfs_dahash_t            lasthash=0;
1294         int                     level;
1295         int                     count;
1296         struct xfs_inode        *dp = state->args->dp;
1297
1298         trace_xfs_da_fixhashpath(state->args);
1299
1300         level = path->active-1;
1301         blk = &path->blk[ level ];
1302         switch (blk->magic) {
1303         case XFS_ATTR_LEAF_MAGIC:
1304                 lasthash = xfs_attr_leaf_lasthash(blk->bp, &count);
1305                 if (count == 0)
1306                         return;
1307                 break;
1308         case XFS_DIR2_LEAFN_MAGIC:
1309                 lasthash = xfs_dir2_leaf_lasthash(dp, blk->bp, &count);
1310                 if (count == 0)
1311                         return;
1312                 break;
1313         case XFS_DA_NODE_MAGIC:
1314                 lasthash = xfs_da3_node_lasthash(dp, blk->bp, &count);
1315                 if (count == 0)
1316                         return;
1317                 break;
1318         }
1319         for (blk--, level--; level >= 0; blk--, level--) {
1320                 struct xfs_da3_icnode_hdr nodehdr;
1321
1322                 node = blk->bp->b_addr;
1323                 dp->d_ops->node_hdr_from_disk(&nodehdr, node);
1324                 btree = dp->d_ops->node_tree_p(node);
1325                 if (be32_to_cpu(btree[blk->index].hashval) == lasthash)
1326                         break;
1327                 blk->hashval = lasthash;
1328                 btree[blk->index].hashval = cpu_to_be32(lasthash);
1329                 xfs_trans_log_buf(state->args->trans, blk->bp,
1330                                   XFS_DA_LOGRANGE(node, &btree[blk->index],
1331                                                   sizeof(*btree)));
1332
1333                 lasthash = be32_to_cpu(btree[nodehdr.count - 1].hashval);
1334         }
1335 }
1336
1337 /*
1338  * Remove an entry from an intermediate node.
1339  */
1340 STATIC void
1341 xfs_da3_node_remove(
1342         struct xfs_da_state     *state,
1343         struct xfs_da_state_blk *drop_blk)
1344 {
1345         struct xfs_da_intnode   *node;
1346         struct xfs_da3_icnode_hdr nodehdr;
1347         struct xfs_da_node_entry *btree;
1348         int                     index;
1349         int                     tmp;
1350         struct xfs_inode        *dp = state->args->dp;
1351
1352         trace_xfs_da_node_remove(state->args);
1353
1354         node = drop_blk->bp->b_addr;
1355         dp->d_ops->node_hdr_from_disk(&nodehdr, node);
1356         ASSERT(drop_blk->index < nodehdr.count);
1357         ASSERT(drop_blk->index >= 0);
1358
1359         /*
1360          * Copy over the offending entry, or just zero it out.
1361          */
1362         index = drop_blk->index;
1363         btree = dp->d_ops->node_tree_p(node);
1364         if (index < nodehdr.count - 1) {
1365                 tmp  = nodehdr.count - index - 1;
1366                 tmp *= (uint)sizeof(xfs_da_node_entry_t);
1367                 memmove(&btree[index], &btree[index + 1], tmp);
1368                 xfs_trans_log_buf(state->args->trans, drop_blk->bp,
1369                     XFS_DA_LOGRANGE(node, &btree[index], tmp));
1370                 index = nodehdr.count - 1;
1371         }
1372         memset(&btree[index], 0, sizeof(xfs_da_node_entry_t));
1373         xfs_trans_log_buf(state->args->trans, drop_blk->bp,
1374             XFS_DA_LOGRANGE(node, &btree[index], sizeof(btree[index])));
1375         nodehdr.count -= 1;
1376         dp->d_ops->node_hdr_to_disk(node, &nodehdr);
1377         xfs_trans_log_buf(state->args->trans, drop_blk->bp,
1378             XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size));
1379
1380         /*
1381          * Copy the last hash value from the block to propagate upwards.
1382          */
1383         drop_blk->hashval = be32_to_cpu(btree[index - 1].hashval);
1384 }
1385
1386 /*
1387  * Unbalance the elements between two intermediate nodes,
1388  * move all Btree elements from one node into another.
1389  */
1390 STATIC void
1391 xfs_da3_node_unbalance(
1392         struct xfs_da_state     *state,
1393         struct xfs_da_state_blk *drop_blk,
1394         struct xfs_da_state_blk *save_blk)
1395 {
1396         struct xfs_da_intnode   *drop_node;
1397         struct xfs_da_intnode   *save_node;
1398         struct xfs_da_node_entry *drop_btree;
1399         struct xfs_da_node_entry *save_btree;
1400         struct xfs_da3_icnode_hdr drop_hdr;
1401         struct xfs_da3_icnode_hdr save_hdr;
1402         struct xfs_trans        *tp;
1403         int                     sindex;
1404         int                     tmp;
1405         struct xfs_inode        *dp = state->args->dp;
1406
1407         trace_xfs_da_node_unbalance(state->args);
1408
1409         drop_node = drop_blk->bp->b_addr;
1410         save_node = save_blk->bp->b_addr;
1411         dp->d_ops->node_hdr_from_disk(&drop_hdr, drop_node);
1412         dp->d_ops->node_hdr_from_disk(&save_hdr, save_node);
1413         drop_btree = dp->d_ops->node_tree_p(drop_node);
1414         save_btree = dp->d_ops->node_tree_p(save_node);
1415         tp = state->args->trans;
1416
1417         /*
1418          * If the dying block has lower hashvals, then move all the
1419          * elements in the remaining block up to make a hole.
1420          */
1421         if ((be32_to_cpu(drop_btree[0].hashval) <
1422                         be32_to_cpu(save_btree[0].hashval)) ||
1423             (be32_to_cpu(drop_btree[drop_hdr.count - 1].hashval) <
1424                         be32_to_cpu(save_btree[save_hdr.count - 1].hashval))) {
1425                 /* XXX: check this - is memmove dst correct? */
1426                 tmp = save_hdr.count * sizeof(xfs_da_node_entry_t);
1427                 memmove(&save_btree[drop_hdr.count], &save_btree[0], tmp);
1428
1429                 sindex = 0;
1430                 xfs_trans_log_buf(tp, save_blk->bp,
1431                         XFS_DA_LOGRANGE(save_node, &save_btree[0],
1432                                 (save_hdr.count + drop_hdr.count) *
1433                                                 sizeof(xfs_da_node_entry_t)));
1434         } else {
1435                 sindex = save_hdr.count;
1436                 xfs_trans_log_buf(tp, save_blk->bp,
1437                         XFS_DA_LOGRANGE(save_node, &save_btree[sindex],
1438                                 drop_hdr.count * sizeof(xfs_da_node_entry_t)));
1439         }
1440
1441         /*
1442          * Move all the B-tree elements from drop_blk to save_blk.
1443          */
1444         tmp = drop_hdr.count * (uint)sizeof(xfs_da_node_entry_t);
1445         memcpy(&save_btree[sindex], &drop_btree[0], tmp);
1446         save_hdr.count += drop_hdr.count;
1447
1448         dp->d_ops->node_hdr_to_disk(save_node, &save_hdr);
1449         xfs_trans_log_buf(tp, save_blk->bp,
1450                 XFS_DA_LOGRANGE(save_node, &save_node->hdr,
1451                                 dp->d_ops->node_hdr_size));
1452
1453         /*
1454          * Save the last hashval in the remaining block for upward propagation.
1455          */
1456         save_blk->hashval = be32_to_cpu(save_btree[save_hdr.count - 1].hashval);
1457 }
1458
1459 /*========================================================================
1460  * Routines used for finding things in the Btree.
1461  *========================================================================*/
1462
1463 /*
1464  * Walk down the Btree looking for a particular filename, filling
1465  * in the state structure as we go.
1466  *
1467  * We will set the state structure to point to each of the elements
1468  * in each of the nodes where either the hashval is or should be.
1469  *
1470  * We support duplicate hashval's so for each entry in the current
1471  * node that could contain the desired hashval, descend.  This is a
1472  * pruned depth-first tree search.
1473  */
1474 int                                                     /* error */
1475 xfs_da3_node_lookup_int(
1476         struct xfs_da_state     *state,
1477         int                     *result)
1478 {
1479         struct xfs_da_state_blk *blk;
1480         struct xfs_da_blkinfo   *curr;
1481         struct xfs_da_intnode   *node;
1482         struct xfs_da_node_entry *btree;
1483         struct xfs_da3_icnode_hdr nodehdr;
1484         struct xfs_da_args      *args;
1485         xfs_dablk_t             blkno;
1486         xfs_dahash_t            hashval;
1487         xfs_dahash_t            btreehashval;
1488         int                     probe;
1489         int                     span;
1490         int                     max;
1491         int                     error;
1492         int                     retval;
1493         unsigned int            expected_level = 0;
1494         struct xfs_inode        *dp = state->args->dp;
1495
1496         args = state->args;
1497
1498         /*
1499          * Descend thru the B-tree searching each level for the right
1500          * node to use, until the right hashval is found.
1501          */
1502         blkno = args->geo->leafblk;
1503         for (blk = &state->path.blk[0], state->path.active = 1;
1504                          state->path.active <= XFS_DA_NODE_MAXDEPTH;
1505                          blk++, state->path.active++) {
1506                 /*
1507                  * Read the next node down in the tree.
1508                  */
1509                 blk->blkno = blkno;
1510                 error = xfs_da3_node_read(args->trans, args->dp, blkno,
1511                                         -1, &blk->bp, args->whichfork);
1512                 if (error) {
1513                         blk->blkno = 0;
1514                         state->path.active--;
1515                         return error;
1516                 }
1517                 curr = blk->bp->b_addr;
1518                 blk->magic = be16_to_cpu(curr->magic);
1519
1520                 if (blk->magic == XFS_ATTR_LEAF_MAGIC ||
1521                     blk->magic == XFS_ATTR3_LEAF_MAGIC) {
1522                         blk->magic = XFS_ATTR_LEAF_MAGIC;
1523                         blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL);
1524                         break;
1525                 }
1526
1527                 if (blk->magic == XFS_DIR2_LEAFN_MAGIC ||
1528                     blk->magic == XFS_DIR3_LEAFN_MAGIC) {
1529                         blk->magic = XFS_DIR2_LEAFN_MAGIC;
1530                         blk->hashval = xfs_dir2_leaf_lasthash(args->dp,
1531                                                               blk->bp, NULL);
1532                         break;
1533                 }
1534
1535                 blk->magic = XFS_DA_NODE_MAGIC;
1536
1537
1538                 /*
1539                  * Search an intermediate node for a match.
1540                  */
1541                 node = blk->bp->b_addr;
1542                 dp->d_ops->node_hdr_from_disk(&nodehdr, node);
1543                 btree = dp->d_ops->node_tree_p(node);
1544
1545                 /* Tree taller than we can handle; bail out! */
1546                 if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH)
1547                         return -EFSCORRUPTED;
1548
1549                 /* Check the level from the root. */
1550                 if (blkno == args->geo->leafblk)
1551                         expected_level = nodehdr.level - 1;
1552                 else if (expected_level != nodehdr.level)
1553                         return -EFSCORRUPTED;
1554                 else
1555                         expected_level--;
1556
1557                 max = nodehdr.count;
1558                 blk->hashval = be32_to_cpu(btree[max - 1].hashval);
1559
1560                 /*
1561                  * Binary search.  (note: small blocks will skip loop)
1562                  */
1563                 probe = span = max / 2;
1564                 hashval = args->hashval;
1565                 while (span > 4) {
1566                         span /= 2;
1567                         btreehashval = be32_to_cpu(btree[probe].hashval);
1568                         if (btreehashval < hashval)
1569                                 probe += span;
1570                         else if (btreehashval > hashval)
1571                                 probe -= span;
1572                         else
1573                                 break;
1574                 }
1575                 ASSERT((probe >= 0) && (probe < max));
1576                 ASSERT((span <= 4) ||
1577                         (be32_to_cpu(btree[probe].hashval) == hashval));
1578
1579                 /*
1580                  * Since we may have duplicate hashval's, find the first
1581                  * matching hashval in the node.
1582                  */
1583                 while (probe > 0 &&
1584                        be32_to_cpu(btree[probe].hashval) >= hashval) {
1585                         probe--;
1586                 }
1587                 while (probe < max &&
1588                        be32_to_cpu(btree[probe].hashval) < hashval) {
1589                         probe++;
1590                 }
1591
1592                 /*
1593                  * Pick the right block to descend on.
1594                  */
1595                 if (probe == max) {
1596                         blk->index = max - 1;
1597                         blkno = be32_to_cpu(btree[max - 1].before);
1598                 } else {
1599                         blk->index = probe;
1600                         blkno = be32_to_cpu(btree[probe].before);
1601                 }
1602
1603                 /* We can't point back to the root. */
1604                 if (blkno == args->geo->leafblk)
1605                         return -EFSCORRUPTED;
1606         }
1607
1608         if (expected_level != 0)
1609                 return -EFSCORRUPTED;
1610
1611         /*
1612          * A leaf block that ends in the hashval that we are interested in
1613          * (final hashval == search hashval) means that the next block may
1614          * contain more entries with the same hashval, shift upward to the
1615          * next leaf and keep searching.
1616          */
1617         for (;;) {
1618                 if (blk->magic == XFS_DIR2_LEAFN_MAGIC) {
1619                         retval = xfs_dir2_leafn_lookup_int(blk->bp, args,
1620                                                         &blk->index, state);
1621                 } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
1622                         retval = xfs_attr3_leaf_lookup_int(blk->bp, args);
1623                         blk->index = args->index;
1624                         args->blkno = blk->blkno;
1625                 } else {
1626                         ASSERT(0);
1627                         return -EFSCORRUPTED;
1628                 }
1629                 if (((retval == -ENOENT) || (retval == -ENOATTR)) &&
1630                     (blk->hashval == args->hashval)) {
1631                         error = xfs_da3_path_shift(state, &state->path, 1, 1,
1632                                                          &retval);
1633                         if (error)
1634                                 return error;
1635                         if (retval == 0) {
1636                                 continue;
1637                         } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
1638                                 /* path_shift() gives ENOENT */
1639                                 retval = -ENOATTR;
1640                         }
1641                 }
1642                 break;
1643         }
1644         *result = retval;
1645         return 0;
1646 }
1647
1648 /*========================================================================
1649  * Utility routines.
1650  *========================================================================*/
1651
1652 /*
1653  * Compare two intermediate nodes for "order".
1654  */
1655 STATIC int
1656 xfs_da3_node_order(
1657         struct xfs_inode *dp,
1658         struct xfs_buf  *node1_bp,
1659         struct xfs_buf  *node2_bp)
1660 {
1661         struct xfs_da_intnode   *node1;
1662         struct xfs_da_intnode   *node2;
1663         struct xfs_da_node_entry *btree1;
1664         struct xfs_da_node_entry *btree2;
1665         struct xfs_da3_icnode_hdr node1hdr;
1666         struct xfs_da3_icnode_hdr node2hdr;
1667
1668         node1 = node1_bp->b_addr;
1669         node2 = node2_bp->b_addr;
1670         dp->d_ops->node_hdr_from_disk(&node1hdr, node1);
1671         dp->d_ops->node_hdr_from_disk(&node2hdr, node2);
1672         btree1 = dp->d_ops->node_tree_p(node1);
1673         btree2 = dp->d_ops->node_tree_p(node2);
1674
1675         if (node1hdr.count > 0 && node2hdr.count > 0 &&
1676             ((be32_to_cpu(btree2[0].hashval) < be32_to_cpu(btree1[0].hashval)) ||
1677              (be32_to_cpu(btree2[node2hdr.count - 1].hashval) <
1678               be32_to_cpu(btree1[node1hdr.count - 1].hashval)))) {
1679                 return 1;
1680         }
1681         return 0;
1682 }
1683
1684 /*
1685  * Link a new block into a doubly linked list of blocks (of whatever type).
1686  */
1687 int                                                     /* error */
1688 xfs_da3_blk_link(
1689         struct xfs_da_state     *state,
1690         struct xfs_da_state_blk *old_blk,
1691         struct xfs_da_state_blk *new_blk)
1692 {
1693         struct xfs_da_blkinfo   *old_info;
1694         struct xfs_da_blkinfo   *new_info;
1695         struct xfs_da_blkinfo   *tmp_info;
1696         struct xfs_da_args      *args;
1697         struct xfs_buf          *bp;
1698         int                     before = 0;
1699         int                     error;
1700         struct xfs_inode        *dp = state->args->dp;
1701
1702         /*
1703          * Set up environment.
1704          */
1705         args = state->args;
1706         ASSERT(args != NULL);
1707         old_info = old_blk->bp->b_addr;
1708         new_info = new_blk->bp->b_addr;
1709         ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC ||
1710                old_blk->magic == XFS_DIR2_LEAFN_MAGIC ||
1711                old_blk->magic == XFS_ATTR_LEAF_MAGIC);
1712
1713         switch (old_blk->magic) {
1714         case XFS_ATTR_LEAF_MAGIC:
1715                 before = xfs_attr_leaf_order(old_blk->bp, new_blk->bp);
1716                 break;
1717         case XFS_DIR2_LEAFN_MAGIC:
1718                 before = xfs_dir2_leafn_order(dp, old_blk->bp, new_blk->bp);
1719                 break;
1720         case XFS_DA_NODE_MAGIC:
1721                 before = xfs_da3_node_order(dp, old_blk->bp, new_blk->bp);
1722                 break;
1723         }
1724
1725         /*
1726          * Link blocks in appropriate order.
1727          */
1728         if (before) {
1729                 /*
1730                  * Link new block in before existing block.
1731                  */
1732                 trace_xfs_da_link_before(args);
1733                 new_info->forw = cpu_to_be32(old_blk->blkno);
1734                 new_info->back = old_info->back;
1735                 if (old_info->back) {
1736                         error = xfs_da3_node_read(args->trans, dp,
1737                                                 be32_to_cpu(old_info->back),
1738                                                 -1, &bp, args->whichfork);
1739                         if (error)
1740                                 return error;
1741                         ASSERT(bp != NULL);
1742                         tmp_info = bp->b_addr;
1743                         ASSERT(tmp_info->magic == old_info->magic);
1744                         ASSERT(be32_to_cpu(tmp_info->forw) == old_blk->blkno);
1745                         tmp_info->forw = cpu_to_be32(new_blk->blkno);
1746                         xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
1747                 }
1748                 old_info->back = cpu_to_be32(new_blk->blkno);
1749         } else {
1750                 /*
1751                  * Link new block in after existing block.
1752                  */
1753                 trace_xfs_da_link_after(args);
1754                 new_info->forw = old_info->forw;
1755                 new_info->back = cpu_to_be32(old_blk->blkno);
1756                 if (old_info->forw) {
1757                         error = xfs_da3_node_read(args->trans, dp,
1758                                                 be32_to_cpu(old_info->forw),
1759                                                 -1, &bp, args->whichfork);
1760                         if (error)
1761                                 return error;
1762                         ASSERT(bp != NULL);
1763                         tmp_info = bp->b_addr;
1764                         ASSERT(tmp_info->magic == old_info->magic);
1765                         ASSERT(be32_to_cpu(tmp_info->back) == old_blk->blkno);
1766                         tmp_info->back = cpu_to_be32(new_blk->blkno);
1767                         xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
1768                 }
1769                 old_info->forw = cpu_to_be32(new_blk->blkno);
1770         }
1771
1772         xfs_trans_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1);
1773         xfs_trans_log_buf(args->trans, new_blk->bp, 0, sizeof(*tmp_info) - 1);
1774         return 0;
1775 }
1776
1777 /*
1778  * Unlink a block from a doubly linked list of blocks.
1779  */
1780 STATIC int                                              /* error */
1781 xfs_da3_blk_unlink(
1782         struct xfs_da_state     *state,
1783         struct xfs_da_state_blk *drop_blk,
1784         struct xfs_da_state_blk *save_blk)
1785 {
1786         struct xfs_da_blkinfo   *drop_info;
1787         struct xfs_da_blkinfo   *save_info;
1788         struct xfs_da_blkinfo   *tmp_info;
1789         struct xfs_da_args      *args;
1790         struct xfs_buf          *bp;
1791         int                     error;
1792
1793         /*
1794          * Set up environment.
1795          */
1796         args = state->args;
1797         ASSERT(args != NULL);
1798         save_info = save_blk->bp->b_addr;
1799         drop_info = drop_blk->bp->b_addr;
1800         ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC ||
1801                save_blk->magic == XFS_DIR2_LEAFN_MAGIC ||
1802                save_blk->magic == XFS_ATTR_LEAF_MAGIC);
1803         ASSERT(save_blk->magic == drop_blk->magic);
1804         ASSERT((be32_to_cpu(save_info->forw) == drop_blk->blkno) ||
1805                (be32_to_cpu(save_info->back) == drop_blk->blkno));
1806         ASSERT((be32_to_cpu(drop_info->forw) == save_blk->blkno) ||
1807                (be32_to_cpu(drop_info->back) == save_blk->blkno));
1808
1809         /*
1810          * Unlink the leaf block from the doubly linked chain of leaves.
1811          */
1812         if (be32_to_cpu(save_info->back) == drop_blk->blkno) {
1813                 trace_xfs_da_unlink_back(args);
1814                 save_info->back = drop_info->back;
1815                 if (drop_info->back) {
1816                         error = xfs_da3_node_read(args->trans, args->dp,
1817                                                 be32_to_cpu(drop_info->back),
1818                                                 -1, &bp, args->whichfork);
1819                         if (error)
1820                                 return error;
1821                         ASSERT(bp != NULL);
1822                         tmp_info = bp->b_addr;
1823                         ASSERT(tmp_info->magic == save_info->magic);
1824                         ASSERT(be32_to_cpu(tmp_info->forw) == drop_blk->blkno);
1825                         tmp_info->forw = cpu_to_be32(save_blk->blkno);
1826                         xfs_trans_log_buf(args->trans, bp, 0,
1827                                                     sizeof(*tmp_info) - 1);
1828                 }
1829         } else {
1830                 trace_xfs_da_unlink_forward(args);
1831                 save_info->forw = drop_info->forw;
1832                 if (drop_info->forw) {
1833                         error = xfs_da3_node_read(args->trans, args->dp,
1834                                                 be32_to_cpu(drop_info->forw),
1835                                                 -1, &bp, args->whichfork);
1836                         if (error)
1837                                 return error;
1838                         ASSERT(bp != NULL);
1839                         tmp_info = bp->b_addr;
1840                         ASSERT(tmp_info->magic == save_info->magic);
1841                         ASSERT(be32_to_cpu(tmp_info->back) == drop_blk->blkno);
1842                         tmp_info->back = cpu_to_be32(save_blk->blkno);
1843                         xfs_trans_log_buf(args->trans, bp, 0,
1844                                                     sizeof(*tmp_info) - 1);
1845                 }
1846         }
1847
1848         xfs_trans_log_buf(args->trans, save_blk->bp, 0, sizeof(*save_info) - 1);
1849         return 0;
1850 }
1851
1852 /*
1853  * Move a path "forward" or "!forward" one block at the current level.
1854  *
1855  * This routine will adjust a "path" to point to the next block
1856  * "forward" (higher hashvalues) or "!forward" (lower hashvals) in the
1857  * Btree, including updating pointers to the intermediate nodes between
1858  * the new bottom and the root.
1859  */
1860 int                                                     /* error */
1861 xfs_da3_path_shift(
1862         struct xfs_da_state     *state,
1863         struct xfs_da_state_path *path,
1864         int                     forward,
1865         int                     release,
1866         int                     *result)
1867 {
1868         struct xfs_da_state_blk *blk;
1869         struct xfs_da_blkinfo   *info;
1870         struct xfs_da_intnode   *node;
1871         struct xfs_da_args      *args;
1872         struct xfs_da_node_entry *btree;
1873         struct xfs_da3_icnode_hdr nodehdr;
1874         struct xfs_buf          *bp;
1875         xfs_dablk_t             blkno = 0;
1876         int                     level;
1877         int                     error;
1878         struct xfs_inode        *dp = state->args->dp;
1879
1880         trace_xfs_da_path_shift(state->args);
1881
1882         /*
1883          * Roll up the Btree looking for the first block where our
1884          * current index is not at the edge of the block.  Note that
1885          * we skip the bottom layer because we want the sibling block.
1886          */
1887         args = state->args;
1888         ASSERT(args != NULL);
1889         ASSERT(path != NULL);
1890         ASSERT((path->active > 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1891         level = (path->active-1) - 1;   /* skip bottom layer in path */
1892         for (blk = &path->blk[level]; level >= 0; blk--, level--) {
1893                 node = blk->bp->b_addr;
1894                 dp->d_ops->node_hdr_from_disk(&nodehdr, node);
1895                 btree = dp->d_ops->node_tree_p(node);
1896
1897                 if (forward && (blk->index < nodehdr.count - 1)) {
1898                         blk->index++;
1899                         blkno = be32_to_cpu(btree[blk->index].before);
1900                         break;
1901                 } else if (!forward && (blk->index > 0)) {
1902                         blk->index--;
1903                         blkno = be32_to_cpu(btree[blk->index].before);
1904                         break;
1905                 }
1906         }
1907         if (level < 0) {
1908                 *result = -ENOENT;      /* we're out of our tree */
1909                 ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
1910                 return 0;
1911         }
1912
1913         /*
1914          * Roll down the edge of the subtree until we reach the
1915          * same depth we were at originally.
1916          */
1917         for (blk++, level++; level < path->active; blk++, level++) {
1918                 /*
1919                  * Read the next child block into a local buffer.
1920                  */
1921                 error = xfs_da3_node_read(args->trans, dp, blkno, -1, &bp,
1922                                           args->whichfork);
1923                 if (error)
1924                         return error;
1925
1926                 /*
1927                  * Release the old block (if it's dirty, the trans doesn't
1928                  * actually let go) and swap the local buffer into the path
1929                  * structure. This ensures failure of the above read doesn't set
1930                  * a NULL buffer in an active slot in the path.
1931                  */
1932                 if (release)
1933                         xfs_trans_brelse(args->trans, blk->bp);
1934                 blk->blkno = blkno;
1935                 blk->bp = bp;
1936
1937                 info = blk->bp->b_addr;
1938                 ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
1939                        info->magic == cpu_to_be16(XFS_DA3_NODE_MAGIC) ||
1940                        info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
1941                        info->magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC) ||
1942                        info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC) ||
1943                        info->magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC));
1944
1945
1946                 /*
1947                  * Note: we flatten the magic number to a single type so we
1948                  * don't have to compare against crc/non-crc types elsewhere.
1949                  */
1950                 switch (be16_to_cpu(info->magic)) {
1951                 case XFS_DA_NODE_MAGIC:
1952                 case XFS_DA3_NODE_MAGIC:
1953                         blk->magic = XFS_DA_NODE_MAGIC;
1954                         node = (xfs_da_intnode_t *)info;
1955                         dp->d_ops->node_hdr_from_disk(&nodehdr, node);
1956                         btree = dp->d_ops->node_tree_p(node);
1957                         blk->hashval = be32_to_cpu(btree[nodehdr.count - 1].hashval);
1958                         if (forward)
1959                                 blk->index = 0;
1960                         else
1961                                 blk->index = nodehdr.count - 1;
1962                         blkno = be32_to_cpu(btree[blk->index].before);
1963                         break;
1964                 case XFS_ATTR_LEAF_MAGIC:
1965                 case XFS_ATTR3_LEAF_MAGIC:
1966                         blk->magic = XFS_ATTR_LEAF_MAGIC;
1967                         ASSERT(level == path->active-1);
1968                         blk->index = 0;
1969                         blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL);
1970                         break;
1971                 case XFS_DIR2_LEAFN_MAGIC:
1972                 case XFS_DIR3_LEAFN_MAGIC:
1973                         blk->magic = XFS_DIR2_LEAFN_MAGIC;
1974                         ASSERT(level == path->active-1);
1975                         blk->index = 0;
1976                         blk->hashval = xfs_dir2_leaf_lasthash(args->dp,
1977                                                               blk->bp, NULL);
1978                         break;
1979                 default:
1980                         ASSERT(0);
1981                         break;
1982                 }
1983         }
1984         *result = 0;
1985         return 0;
1986 }
1987
1988
1989 /*========================================================================
1990  * Utility routines.
1991  *========================================================================*/
1992
1993 /*
1994  * Implement a simple hash on a character string.
1995  * Rotate the hash value by 7 bits, then XOR each character in.
1996  * This is implemented with some source-level loop unrolling.
1997  */
1998 xfs_dahash_t
1999 xfs_da_hashname(const uint8_t *name, int namelen)
2000 {
2001         xfs_dahash_t hash;
2002
2003         /*
2004          * Do four characters at a time as long as we can.
2005          */
2006         for (hash = 0; namelen >= 4; namelen -= 4, name += 4)
2007                 hash = (name[0] << 21) ^ (name[1] << 14) ^ (name[2] << 7) ^
2008                        (name[3] << 0) ^ rol32(hash, 7 * 4);
2009
2010         /*
2011          * Now do the rest of the characters.
2012          */
2013         switch (namelen) {
2014         case 3:
2015                 return (name[0] << 14) ^ (name[1] << 7) ^ (name[2] << 0) ^
2016                        rol32(hash, 7 * 3);
2017         case 2:
2018                 return (name[0] << 7) ^ (name[1] << 0) ^ rol32(hash, 7 * 2);
2019         case 1:
2020                 return (name[0] << 0) ^ rol32(hash, 7 * 1);
2021         default: /* case 0: */
2022                 return hash;
2023         }
2024 }
2025
2026 enum xfs_dacmp
2027 xfs_da_compname(
2028         struct xfs_da_args *args,
2029         const unsigned char *name,
2030         int             len)
2031 {
2032         return (args->namelen == len && memcmp(args->name, name, len) == 0) ?
2033                                         XFS_CMP_EXACT : XFS_CMP_DIFFERENT;
2034 }
2035
2036 static xfs_dahash_t
2037 xfs_default_hashname(
2038         struct xfs_name *name)
2039 {
2040         return xfs_da_hashname(name->name, name->len);
2041 }
2042
2043 const struct xfs_nameops xfs_default_nameops = {
2044         .hashname       = xfs_default_hashname,
2045         .compname       = xfs_da_compname
2046 };
2047
2048 int
2049 xfs_da_grow_inode_int(
2050         struct xfs_da_args      *args,
2051         xfs_fileoff_t           *bno,
2052         int                     count)
2053 {
2054         struct xfs_trans        *tp = args->trans;
2055         struct xfs_inode        *dp = args->dp;
2056         int                     w = args->whichfork;
2057         xfs_rfsblock_t          nblks = dp->i_d.di_nblocks;
2058         struct xfs_bmbt_irec    map, *mapp;
2059         int                     nmap, error, got, i, mapi;
2060
2061         /*
2062          * Find a spot in the file space to put the new block.
2063          */
2064         error = xfs_bmap_first_unused(tp, dp, count, bno, w);
2065         if (error)
2066                 return error;
2067
2068         /*
2069          * Try mapping it in one filesystem block.
2070          */
2071         nmap = 1;
2072         ASSERT(args->firstblock != NULL);
2073         error = xfs_bmapi_write(tp, dp, *bno, count,
2074                         xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG,
2075                         args->firstblock, args->total, &map, &nmap,
2076                         args->dfops);
2077         if (error)
2078                 return error;
2079
2080         ASSERT(nmap <= 1);
2081         if (nmap == 1) {
2082                 mapp = &map;
2083                 mapi = 1;
2084         } else if (nmap == 0 && count > 1) {
2085                 xfs_fileoff_t           b;
2086                 int                     c;
2087
2088                 /*
2089                  * If we didn't get it and the block might work if fragmented,
2090                  * try without the CONTIG flag.  Loop until we get it all.
2091                  */
2092                 mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP);
2093                 for (b = *bno, mapi = 0; b < *bno + count; ) {
2094                         nmap = MIN(XFS_BMAP_MAX_NMAP, count);
2095                         c = (int)(*bno + count - b);
2096                         error = xfs_bmapi_write(tp, dp, b, c,
2097                                         xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA,
2098                                         args->firstblock, args->total,
2099                                         &mapp[mapi], &nmap, args->dfops);
2100                         if (error)
2101                                 goto out_free_map;
2102                         if (nmap < 1)
2103                                 break;
2104                         mapi += nmap;
2105                         b = mapp[mapi - 1].br_startoff +
2106                             mapp[mapi - 1].br_blockcount;
2107                 }
2108         } else {
2109                 mapi = 0;
2110                 mapp = NULL;
2111         }
2112
2113         /*
2114          * Count the blocks we got, make sure it matches the total.
2115          */
2116         for (i = 0, got = 0; i < mapi; i++)
2117                 got += mapp[i].br_blockcount;
2118         if (got != count || mapp[0].br_startoff != *bno ||
2119             mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
2120             *bno + count) {
2121                 error = -ENOSPC;
2122                 goto out_free_map;
2123         }
2124
2125         /* account for newly allocated blocks in reserved blocks total */
2126         args->total -= dp->i_d.di_nblocks - nblks;
2127
2128 out_free_map:
2129         if (mapp != &map)
2130                 kmem_free(mapp);
2131         return error;
2132 }
2133
2134 /*
2135  * Add a block to the btree ahead of the file.
2136  * Return the new block number to the caller.
2137  */
2138 int
2139 xfs_da_grow_inode(
2140         struct xfs_da_args      *args,
2141         xfs_dablk_t             *new_blkno)
2142 {
2143         xfs_fileoff_t           bno;
2144         int                     error;
2145
2146         trace_xfs_da_grow_inode(args);
2147
2148         bno = args->geo->leafblk;
2149         error = xfs_da_grow_inode_int(args, &bno, args->geo->fsbcount);
2150         if (!error)
2151                 *new_blkno = (xfs_dablk_t)bno;
2152         return error;
2153 }
2154
2155 /*
2156  * Ick.  We need to always be able to remove a btree block, even
2157  * if there's no space reservation because the filesystem is full.
2158  * This is called if xfs_bunmapi on a btree block fails due to ENOSPC.
2159  * It swaps the target block with the last block in the file.  The
2160  * last block in the file can always be removed since it can't cause
2161  * a bmap btree split to do that.
2162  */
2163 STATIC int
2164 xfs_da3_swap_lastblock(
2165         struct xfs_da_args      *args,
2166         xfs_dablk_t             *dead_blknop,
2167         struct xfs_buf          **dead_bufp)
2168 {
2169         struct xfs_da_blkinfo   *dead_info;
2170         struct xfs_da_blkinfo   *sib_info;
2171         struct xfs_da_intnode   *par_node;
2172         struct xfs_da_intnode   *dead_node;
2173         struct xfs_dir2_leaf    *dead_leaf2;
2174         struct xfs_da_node_entry *btree;
2175         struct xfs_da3_icnode_hdr par_hdr;
2176         struct xfs_inode        *dp;
2177         struct xfs_trans        *tp;
2178         struct xfs_mount        *mp;
2179         struct xfs_buf          *dead_buf;
2180         struct xfs_buf          *last_buf;
2181         struct xfs_buf          *sib_buf;
2182         struct xfs_buf          *par_buf;
2183         xfs_dahash_t            dead_hash;
2184         xfs_fileoff_t           lastoff;
2185         xfs_dablk_t             dead_blkno;
2186         xfs_dablk_t             last_blkno;
2187         xfs_dablk_t             sib_blkno;
2188         xfs_dablk_t             par_blkno;
2189         int                     error;
2190         int                     w;
2191         int                     entno;
2192         int                     level;
2193         int                     dead_level;
2194
2195         trace_xfs_da_swap_lastblock(args);
2196
2197         dead_buf = *dead_bufp;
2198         dead_blkno = *dead_blknop;
2199         tp = args->trans;
2200         dp = args->dp;
2201         w = args->whichfork;
2202         ASSERT(w == XFS_DATA_FORK);
2203         mp = dp->i_mount;
2204         lastoff = args->geo->freeblk;
2205         error = xfs_bmap_last_before(tp, dp, &lastoff, w);
2206         if (error)
2207                 return error;
2208         if (unlikely(lastoff == 0)) {
2209                 XFS_ERROR_REPORT("xfs_da_swap_lastblock(1)", XFS_ERRLEVEL_LOW,
2210                                  mp);
2211                 return -EFSCORRUPTED;
2212         }
2213         /*
2214          * Read the last block in the btree space.
2215          */
2216         last_blkno = (xfs_dablk_t)lastoff - args->geo->fsbcount;
2217         error = xfs_da3_node_read(tp, dp, last_blkno, -1, &last_buf, w);
2218         if (error)
2219                 return error;
2220         /*
2221          * Copy the last block into the dead buffer and log it.
2222          */
2223         memcpy(dead_buf->b_addr, last_buf->b_addr, args->geo->blksize);
2224         xfs_trans_log_buf(tp, dead_buf, 0, args->geo->blksize - 1);
2225         dead_info = dead_buf->b_addr;
2226         /*
2227          * Get values from the moved block.
2228          */
2229         if (dead_info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
2230             dead_info->magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) {
2231                 struct xfs_dir3_icleaf_hdr leafhdr;
2232                 struct xfs_dir2_leaf_entry *ents;
2233
2234                 dead_leaf2 = (xfs_dir2_leaf_t *)dead_info;
2235                 dp->d_ops->leaf_hdr_from_disk(&leafhdr, dead_leaf2);
2236                 ents = dp->d_ops->leaf_ents_p(dead_leaf2);
2237                 dead_level = 0;
2238                 dead_hash = be32_to_cpu(ents[leafhdr.count - 1].hashval);
2239         } else {
2240                 struct xfs_da3_icnode_hdr deadhdr;
2241
2242                 dead_node = (xfs_da_intnode_t *)dead_info;
2243                 dp->d_ops->node_hdr_from_disk(&deadhdr, dead_node);
2244                 btree = dp->d_ops->node_tree_p(dead_node);
2245                 dead_level = deadhdr.level;
2246                 dead_hash = be32_to_cpu(btree[deadhdr.count - 1].hashval);
2247         }
2248         sib_buf = par_buf = NULL;
2249         /*
2250          * If the moved block has a left sibling, fix up the pointers.
2251          */
2252         if ((sib_blkno = be32_to_cpu(dead_info->back))) {
2253                 error = xfs_da3_node_read(tp, dp, sib_blkno, -1, &sib_buf, w);
2254                 if (error)
2255                         goto done;
2256                 sib_info = sib_buf->b_addr;
2257                 if (unlikely(
2258                     be32_to_cpu(sib_info->forw) != last_blkno ||
2259                     sib_info->magic != dead_info->magic)) {
2260                         XFS_ERROR_REPORT("xfs_da_swap_lastblock(2)",
2261                                          XFS_ERRLEVEL_LOW, mp);
2262                         error = -EFSCORRUPTED;
2263                         goto done;
2264                 }
2265                 sib_info->forw = cpu_to_be32(dead_blkno);
2266                 xfs_trans_log_buf(tp, sib_buf,
2267                         XFS_DA_LOGRANGE(sib_info, &sib_info->forw,
2268                                         sizeof(sib_info->forw)));
2269                 sib_buf = NULL;
2270         }
2271         /*
2272          * If the moved block has a right sibling, fix up the pointers.
2273          */
2274         if ((sib_blkno = be32_to_cpu(dead_info->forw))) {
2275                 error = xfs_da3_node_read(tp, dp, sib_blkno, -1, &sib_buf, w);
2276                 if (error)
2277                         goto done;
2278                 sib_info = sib_buf->b_addr;
2279                 if (unlikely(
2280                        be32_to_cpu(sib_info->back) != last_blkno ||
2281                        sib_info->magic != dead_info->magic)) {
2282                         XFS_ERROR_REPORT("xfs_da_swap_lastblock(3)",
2283                                          XFS_ERRLEVEL_LOW, mp);
2284                         error = -EFSCORRUPTED;
2285                         goto done;
2286                 }
2287                 sib_info->back = cpu_to_be32(dead_blkno);
2288                 xfs_trans_log_buf(tp, sib_buf,
2289                         XFS_DA_LOGRANGE(sib_info, &sib_info->back,
2290                                         sizeof(sib_info->back)));
2291                 sib_buf = NULL;
2292         }
2293         par_blkno = args->geo->leafblk;
2294         level = -1;
2295         /*
2296          * Walk down the tree looking for the parent of the moved block.
2297          */
2298         for (;;) {
2299                 error = xfs_da3_node_read(tp, dp, par_blkno, -1, &par_buf, w);
2300                 if (error)
2301                         goto done;
2302                 par_node = par_buf->b_addr;
2303                 dp->d_ops->node_hdr_from_disk(&par_hdr, par_node);
2304                 if (level >= 0 && level != par_hdr.level + 1) {
2305                         XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)",
2306                                          XFS_ERRLEVEL_LOW, mp);
2307                         error = -EFSCORRUPTED;
2308                         goto done;
2309                 }
2310                 level = par_hdr.level;
2311                 btree = dp->d_ops->node_tree_p(par_node);
2312                 for (entno = 0;
2313                      entno < par_hdr.count &&
2314                      be32_to_cpu(btree[entno].hashval) < dead_hash;
2315                      entno++)
2316                         continue;
2317                 if (entno == par_hdr.count) {
2318                         XFS_ERROR_REPORT("xfs_da_swap_lastblock(5)",
2319                                          XFS_ERRLEVEL_LOW, mp);
2320                         error = -EFSCORRUPTED;
2321                         goto done;
2322                 }
2323                 par_blkno = be32_to_cpu(btree[entno].before);
2324                 if (level == dead_level + 1)
2325                         break;
2326                 xfs_trans_brelse(tp, par_buf);
2327                 par_buf = NULL;
2328         }
2329         /*
2330          * We're in the right parent block.
2331          * Look for the right entry.
2332          */
2333         for (;;) {
2334                 for (;
2335                      entno < par_hdr.count &&
2336                      be32_to_cpu(btree[entno].before) != last_blkno;
2337                      entno++)
2338                         continue;
2339                 if (entno < par_hdr.count)
2340                         break;
2341                 par_blkno = par_hdr.forw;
2342                 xfs_trans_brelse(tp, par_buf);
2343                 par_buf = NULL;
2344                 if (unlikely(par_blkno == 0)) {
2345                         XFS_ERROR_REPORT("xfs_da_swap_lastblock(6)",
2346                                          XFS_ERRLEVEL_LOW, mp);
2347                         error = -EFSCORRUPTED;
2348                         goto done;
2349                 }
2350                 error = xfs_da3_node_read(tp, dp, par_blkno, -1, &par_buf, w);
2351                 if (error)
2352                         goto done;
2353                 par_node = par_buf->b_addr;
2354                 dp->d_ops->node_hdr_from_disk(&par_hdr, par_node);
2355                 if (par_hdr.level != level) {
2356                         XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)",
2357                                          XFS_ERRLEVEL_LOW, mp);
2358                         error = -EFSCORRUPTED;
2359                         goto done;
2360                 }
2361                 btree = dp->d_ops->node_tree_p(par_node);
2362                 entno = 0;
2363         }
2364         /*
2365          * Update the parent entry pointing to the moved block.
2366          */
2367         btree[entno].before = cpu_to_be32(dead_blkno);
2368         xfs_trans_log_buf(tp, par_buf,
2369                 XFS_DA_LOGRANGE(par_node, &btree[entno].before,
2370                                 sizeof(btree[entno].before)));
2371         *dead_blknop = last_blkno;
2372         *dead_bufp = last_buf;
2373         return 0;
2374 done:
2375         if (par_buf)
2376                 xfs_trans_brelse(tp, par_buf);
2377         if (sib_buf)
2378                 xfs_trans_brelse(tp, sib_buf);
2379         xfs_trans_brelse(tp, last_buf);
2380         return error;
2381 }
2382
2383 /*
2384  * Remove a btree block from a directory or attribute.
2385  */
2386 int
2387 xfs_da_shrink_inode(
2388         xfs_da_args_t   *args,
2389         xfs_dablk_t     dead_blkno,
2390         struct xfs_buf  *dead_buf)
2391 {
2392         xfs_inode_t *dp;
2393         int done, error, w, count;
2394         xfs_trans_t *tp;
2395
2396         trace_xfs_da_shrink_inode(args);
2397
2398         dp = args->dp;
2399         w = args->whichfork;
2400         tp = args->trans;
2401         count = args->geo->fsbcount;
2402         for (;;) {
2403                 /*
2404                  * Remove extents.  If we get ENOSPC for a dir we have to move
2405                  * the last block to the place we want to kill.
2406                  */
2407                 error = xfs_bunmapi(tp, dp, dead_blkno, count,
2408                                     xfs_bmapi_aflag(w), 0, args->firstblock,
2409                                     args->dfops, &done);
2410                 if (error == -ENOSPC) {
2411                         if (w != XFS_DATA_FORK)
2412                                 break;
2413                         error = xfs_da3_swap_lastblock(args, &dead_blkno,
2414                                                       &dead_buf);
2415                         if (error)
2416                                 break;
2417                 } else {
2418                         break;
2419                 }
2420         }
2421         xfs_trans_binval(tp, dead_buf);
2422         return error;
2423 }
2424
2425 /*
2426  * See if the mapping(s) for this btree block are valid, i.e.
2427  * don't contain holes, are logically contiguous, and cover the whole range.
2428  */
2429 STATIC int
2430 xfs_da_map_covers_blocks(
2431         int             nmap,
2432         xfs_bmbt_irec_t *mapp,
2433         xfs_dablk_t     bno,
2434         int             count)
2435 {
2436         int             i;
2437         xfs_fileoff_t   off;
2438
2439         for (i = 0, off = bno; i < nmap; i++) {
2440                 if (mapp[i].br_startblock == HOLESTARTBLOCK ||
2441                     mapp[i].br_startblock == DELAYSTARTBLOCK) {
2442                         return 0;
2443                 }
2444                 if (off != mapp[i].br_startoff) {
2445                         return 0;
2446                 }
2447                 off += mapp[i].br_blockcount;
2448         }
2449         return off == bno + count;
2450 }
2451
2452 /*
2453  * Convert a struct xfs_bmbt_irec to a struct xfs_buf_map.
2454  *
2455  * For the single map case, it is assumed that the caller has provided a pointer
2456  * to a valid xfs_buf_map.  For the multiple map case, this function will
2457  * allocate the xfs_buf_map to hold all the maps and replace the caller's single
2458  * map pointer with the allocated map.
2459  */
2460 static int
2461 xfs_buf_map_from_irec(
2462         struct xfs_mount        *mp,
2463         struct xfs_buf_map      **mapp,
2464         int                     *nmaps,
2465         struct xfs_bmbt_irec    *irecs,
2466         int                     nirecs)
2467 {
2468         struct xfs_buf_map      *map;
2469         int                     i;
2470
2471         ASSERT(*nmaps == 1);
2472         ASSERT(nirecs >= 1);
2473
2474         if (nirecs > 1) {
2475                 map = kmem_zalloc(nirecs * sizeof(struct xfs_buf_map),
2476                                   KM_SLEEP | KM_NOFS);
2477                 if (!map)
2478                         return -ENOMEM;
2479                 *mapp = map;
2480         }
2481
2482         *nmaps = nirecs;
2483         map = *mapp;
2484         for (i = 0; i < *nmaps; i++) {
2485                 ASSERT(irecs[i].br_startblock != DELAYSTARTBLOCK &&
2486                        irecs[i].br_startblock != HOLESTARTBLOCK);
2487                 map[i].bm_bn = XFS_FSB_TO_DADDR(mp, irecs[i].br_startblock);
2488                 map[i].bm_len = XFS_FSB_TO_BB(mp, irecs[i].br_blockcount);
2489         }
2490         return 0;
2491 }
2492
2493 /*
2494  * Map the block we are given ready for reading. There are three possible return
2495  * values:
2496  *      -1 - will be returned if we land in a hole and mappedbno == -2 so the
2497  *           caller knows not to execute a subsequent read.
2498  *       0 - if we mapped the block successfully
2499  *      >0 - positive error number if there was an error.
2500  */
2501 static int
2502 xfs_dabuf_map(
2503         struct xfs_inode        *dp,
2504         xfs_dablk_t             bno,
2505         xfs_daddr_t             mappedbno,
2506         int                     whichfork,
2507         struct xfs_buf_map      **map,
2508         int                     *nmaps)
2509 {
2510         struct xfs_mount        *mp = dp->i_mount;
2511         int                     nfsb;
2512         int                     error = 0;
2513         struct xfs_bmbt_irec    irec;
2514         struct xfs_bmbt_irec    *irecs = &irec;
2515         int                     nirecs;
2516
2517         ASSERT(map && *map);
2518         ASSERT(*nmaps == 1);
2519
2520         if (whichfork == XFS_DATA_FORK)
2521                 nfsb = mp->m_dir_geo->fsbcount;
2522         else
2523                 nfsb = mp->m_attr_geo->fsbcount;
2524
2525         /*
2526          * Caller doesn't have a mapping.  -2 means don't complain
2527          * if we land in a hole.
2528          */
2529         if (mappedbno == -1 || mappedbno == -2) {
2530                 /*
2531                  * Optimize the one-block case.
2532                  */
2533                 if (nfsb != 1)
2534                         irecs = kmem_zalloc(sizeof(irec) * nfsb,
2535                                             KM_SLEEP | KM_NOFS);
2536
2537                 nirecs = nfsb;
2538                 error = xfs_bmapi_read(dp, (xfs_fileoff_t)bno, nfsb, irecs,
2539                                        &nirecs, xfs_bmapi_aflag(whichfork));
2540                 if (error)
2541                         goto out;
2542         } else {
2543                 irecs->br_startblock = XFS_DADDR_TO_FSB(mp, mappedbno);
2544                 irecs->br_startoff = (xfs_fileoff_t)bno;
2545                 irecs->br_blockcount = nfsb;
2546                 irecs->br_state = 0;
2547                 nirecs = 1;
2548         }
2549
2550         if (!xfs_da_map_covers_blocks(nirecs, irecs, bno, nfsb)) {
2551                 error = mappedbno == -2 ? -1 : -EFSCORRUPTED;
2552                 if (unlikely(error == -EFSCORRUPTED)) {
2553                         if (xfs_error_level >= XFS_ERRLEVEL_LOW) {
2554                                 int i;
2555                                 xfs_alert(mp, "%s: bno %lld dir: inode %lld",
2556                                         __func__, (long long)bno,
2557                                         (long long)dp->i_ino);
2558                                 for (i = 0; i < *nmaps; i++) {
2559                                         xfs_alert(mp,
2560 "[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d",
2561                                                 i,
2562                                                 (long long)irecs[i].br_startoff,
2563                                                 (long long)irecs[i].br_startblock,
2564                                                 (long long)irecs[i].br_blockcount,
2565                                                 irecs[i].br_state);
2566                                 }
2567                         }
2568                         XFS_ERROR_REPORT("xfs_da_do_buf(1)",
2569                                          XFS_ERRLEVEL_LOW, mp);
2570                 }
2571                 goto out;
2572         }
2573         error = xfs_buf_map_from_irec(mp, map, nmaps, irecs, nirecs);
2574 out:
2575         if (irecs != &irec)
2576                 kmem_free(irecs);
2577         return error;
2578 }
2579
2580 /*
2581  * Get a buffer for the dir/attr block.
2582  */
2583 int
2584 xfs_da_get_buf(
2585         struct xfs_trans        *trans,
2586         struct xfs_inode        *dp,
2587         xfs_dablk_t             bno,
2588         xfs_daddr_t             mappedbno,
2589         struct xfs_buf          **bpp,
2590         int                     whichfork)
2591 {
2592         struct xfs_buf          *bp;
2593         struct xfs_buf_map      map;
2594         struct xfs_buf_map      *mapp;
2595         int                     nmap;
2596         int                     error;
2597
2598         *bpp = NULL;
2599         mapp = &map;
2600         nmap = 1;
2601         error = xfs_dabuf_map(dp, bno, mappedbno, whichfork,
2602                                 &mapp, &nmap);
2603         if (error) {
2604                 /* mapping a hole is not an error, but we don't continue */
2605                 if (error == -1)
2606                         error = 0;
2607                 goto out_free;
2608         }
2609
2610         bp = xfs_trans_get_buf_map(trans, dp->i_mount->m_ddev_targp,
2611                                     mapp, nmap, 0);
2612         error = bp ? bp->b_error : -EIO;
2613         if (error) {
2614                 if (bp)
2615                         xfs_trans_brelse(trans, bp);
2616                 goto out_free;
2617         }
2618
2619         *bpp = bp;
2620
2621 out_free:
2622         if (mapp != &map)
2623                 kmem_free(mapp);
2624
2625         return error;
2626 }
2627
2628 /*
2629  * Get a buffer for the dir/attr block, fill in the contents.
2630  */
2631 int
2632 xfs_da_read_buf(
2633         struct xfs_trans        *trans,
2634         struct xfs_inode        *dp,
2635         xfs_dablk_t             bno,
2636         xfs_daddr_t             mappedbno,
2637         struct xfs_buf          **bpp,
2638         int                     whichfork,
2639         const struct xfs_buf_ops *ops)
2640 {
2641         struct xfs_buf          *bp;
2642         struct xfs_buf_map      map;
2643         struct xfs_buf_map      *mapp;
2644         int                     nmap;
2645         int                     error;
2646
2647         *bpp = NULL;
2648         mapp = &map;
2649         nmap = 1;
2650         error = xfs_dabuf_map(dp, bno, mappedbno, whichfork,
2651                                 &mapp, &nmap);
2652         if (error) {
2653                 /* mapping a hole is not an error, but we don't continue */
2654                 if (error == -1)
2655                         error = 0;
2656                 goto out_free;
2657         }
2658
2659         error = xfs_trans_read_buf_map(dp->i_mount, trans,
2660                                         dp->i_mount->m_ddev_targp,
2661                                         mapp, nmap, 0, &bp, ops);
2662         if (error)
2663                 goto out_free;
2664
2665         if (whichfork == XFS_ATTR_FORK)
2666                 xfs_buf_set_ref(bp, XFS_ATTR_BTREE_REF);
2667         else
2668                 xfs_buf_set_ref(bp, XFS_DIR_BTREE_REF);
2669         *bpp = bp;
2670 out_free:
2671         if (mapp != &map)
2672                 kmem_free(mapp);
2673
2674         return error;
2675 }
2676
2677 /*
2678  * Readahead the dir/attr block.
2679  */
2680 int
2681 xfs_da_reada_buf(
2682         struct xfs_inode        *dp,
2683         xfs_dablk_t             bno,
2684         xfs_daddr_t             mappedbno,
2685         int                     whichfork,
2686         const struct xfs_buf_ops *ops)
2687 {
2688         struct xfs_buf_map      map;
2689         struct xfs_buf_map      *mapp;
2690         int                     nmap;
2691         int                     error;
2692
2693         mapp = &map;
2694         nmap = 1;
2695         error = xfs_dabuf_map(dp, bno, mappedbno, whichfork,
2696                                 &mapp, &nmap);
2697         if (error) {
2698                 /* mapping a hole is not an error, but we don't continue */
2699                 if (error == -1)
2700                         error = 0;
2701                 goto out_free;
2702         }
2703
2704         mappedbno = mapp[0].bm_bn;
2705         xfs_buf_readahead_map(dp->i_mount->m_ddev_targp, mapp, nmap, ops);
2706
2707 out_free:
2708         if (mapp != &map)
2709                 kmem_free(mapp);
2710
2711         return error;
2712 }