afs: Detect cell aliases 1 - Cells with root volumes
[linux-2.6-microblaze.git] / fs / afs / rotate.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* Handle fileserver selection and rotation.
3  *
4  * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
5  * Written by David Howells (dhowells@redhat.com)
6  */
7
8 #include <linux/kernel.h>
9 #include <linux/slab.h>
10 #include <linux/fs.h>
11 #include <linux/sched.h>
12 #include <linux/delay.h>
13 #include <linux/sched/signal.h>
14 #include "internal.h"
15 #include "afs_fs.h"
16
17 /*
18  * Begin iteration through a server list, starting with the vnode's last used
19  * server if possible, or the last recorded good server if not.
20  */
21 static bool afs_start_fs_iteration(struct afs_operation *op,
22                                    struct afs_vnode *vnode)
23 {
24         struct afs_cb_interest *cbi;
25         int i;
26
27         read_lock(&op->volume->servers_lock);
28         op->server_list = afs_get_serverlist(
29                 rcu_dereference_protected(op->volume->servers,
30                                           lockdep_is_held(&op->volume->servers_lock)));
31         read_unlock(&op->volume->servers_lock);
32
33         op->untried = (1UL << op->server_list->nr_servers) - 1;
34         op->index = READ_ONCE(op->server_list->preferred);
35
36         cbi = rcu_dereference_protected(vnode->cb_interest,
37                                         lockdep_is_held(&vnode->io_lock));
38         if (cbi) {
39                 /* See if the vnode's preferred record is still available */
40                 for (i = 0; i < op->server_list->nr_servers; i++) {
41                         if (op->server_list->servers[i].cb_interest == cbi) {
42                                 op->index = i;
43                                 goto found_interest;
44                         }
45                 }
46
47                 /* If we have a lock outstanding on a server that's no longer
48                  * serving this vnode, then we can't switch to another server
49                  * and have to return an error.
50                  */
51                 if (op->flags & AFS_OPERATION_CUR_ONLY) {
52                         op->error = -ESTALE;
53                         return false;
54                 }
55
56                 /* Note that the callback promise is effectively broken */
57                 write_seqlock(&vnode->cb_lock);
58                 ASSERTCMP(cbi, ==, rcu_access_pointer(vnode->cb_interest));
59                 rcu_assign_pointer(vnode->cb_interest, NULL);
60                 if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags))
61                         vnode->cb_break++;
62                 write_sequnlock(&vnode->cb_lock);
63
64                 afs_put_cb_interest(op->net, cbi);
65                 cbi = NULL;
66         }
67
68 found_interest:
69         return true;
70 }
71
72 /*
73  * Post volume busy note.
74  */
75 static void afs_busy(struct afs_volume *volume, u32 abort_code)
76 {
77         const char *m;
78
79         switch (abort_code) {
80         case VOFFLINE:          m = "offline";          break;
81         case VRESTARTING:       m = "restarting";       break;
82         case VSALVAGING:        m = "being salvaged";   break;
83         default:                m = "busy";             break;
84         }
85
86         pr_notice("kAFS: Volume %llu '%s' is %s\n", volume->vid, volume->name, m);
87 }
88
89 /*
90  * Sleep and retry the operation to the same fileserver.
91  */
92 static bool afs_sleep_and_retry(struct afs_operation *op)
93 {
94         if (!(op->flags & AFS_OPERATION_UNINTR)) {
95                 msleep_interruptible(1000);
96                 if (signal_pending(current)) {
97                         op->error = -ERESTARTSYS;
98                         return false;
99                 }
100         } else {
101                 msleep(1000);
102         }
103
104         return true;
105 }
106
107 /*
108  * Select the fileserver to use.  May be called multiple times to rotate
109  * through the fileservers.
110  */
111 bool afs_select_fileserver(struct afs_operation *op)
112 {
113         struct afs_addr_list *alist;
114         struct afs_server *server;
115         struct afs_vnode *vnode = op->file[0].vnode;
116         struct afs_error e;
117         u32 rtt;
118         int error = op->ac.error, i;
119
120         _enter("%lx[%d],%lx[%d],%d,%d",
121                op->untried, op->index,
122                op->ac.tried, op->ac.index,
123                error, op->ac.abort_code);
124
125         if (op->flags & AFS_OPERATION_STOP) {
126                 _leave(" = f [stopped]");
127                 return false;
128         }
129
130         op->nr_iterations++;
131
132         /* Evaluate the result of the previous operation, if there was one. */
133         switch (error) {
134         case SHRT_MAX:
135                 goto start;
136
137         case 0:
138         default:
139                 /* Success or local failure.  Stop. */
140                 op->error = error;
141                 op->flags |= AFS_OPERATION_STOP;
142                 _leave(" = f [okay/local %d]", error);
143                 return false;
144
145         case -ECONNABORTED:
146                 /* The far side rejected the operation on some grounds.  This
147                  * might involve the server being busy or the volume having been moved.
148                  */
149                 switch (op->ac.abort_code) {
150                 case VNOVOL:
151                         /* This fileserver doesn't know about the volume.
152                          * - May indicate that the VL is wrong - retry once and compare
153                          *   the results.
154                          * - May indicate that the fileserver couldn't attach to the vol.
155                          */
156                         if (op->flags & AFS_OPERATION_VNOVOL) {
157                                 op->error = -EREMOTEIO;
158                                 goto next_server;
159                         }
160
161                         write_lock(&op->volume->servers_lock);
162                         op->server_list->vnovol_mask |= 1 << op->index;
163                         write_unlock(&op->volume->servers_lock);
164
165                         set_bit(AFS_VOLUME_NEEDS_UPDATE, &op->volume->flags);
166                         error = afs_check_volume_status(op->volume, op);
167                         if (error < 0)
168                                 goto failed_set_error;
169
170                         if (test_bit(AFS_VOLUME_DELETED, &op->volume->flags)) {
171                                 op->error = -ENOMEDIUM;
172                                 goto failed;
173                         }
174
175                         /* If the server list didn't change, then assume that
176                          * it's the fileserver having trouble.
177                          */
178                         if (rcu_access_pointer(op->volume->servers) == op->server_list) {
179                                 op->error = -EREMOTEIO;
180                                 goto next_server;
181                         }
182
183                         /* Try again */
184                         op->flags |= AFS_OPERATION_VNOVOL;
185                         _leave(" = t [vnovol]");
186                         return true;
187
188                 case VSALVAGE: /* TODO: Should this return an error or iterate? */
189                 case VVOLEXISTS:
190                 case VNOSERVICE:
191                 case VONLINE:
192                 case VDISKFULL:
193                 case VOVERQUOTA:
194                         op->error = afs_abort_to_error(op->ac.abort_code);
195                         goto next_server;
196
197                 case VOFFLINE:
198                         if (!test_and_set_bit(AFS_VOLUME_OFFLINE, &op->volume->flags)) {
199                                 afs_busy(op->volume, op->ac.abort_code);
200                                 clear_bit(AFS_VOLUME_BUSY, &op->volume->flags);
201                         }
202                         if (op->flags & AFS_OPERATION_NO_VSLEEP) {
203                                 op->error = -EADV;
204                                 goto failed;
205                         }
206                         if (op->flags & AFS_OPERATION_CUR_ONLY) {
207                                 op->error = -ESTALE;
208                                 goto failed;
209                         }
210                         goto busy;
211
212                 case VSALVAGING:
213                 case VRESTARTING:
214                 case VBUSY:
215                         /* Retry after going round all the servers unless we
216                          * have a file lock we need to maintain.
217                          */
218                         if (op->flags & AFS_OPERATION_NO_VSLEEP) {
219                                 op->error = -EBUSY;
220                                 goto failed;
221                         }
222                         if (!test_and_set_bit(AFS_VOLUME_BUSY, &op->volume->flags)) {
223                                 afs_busy(op->volume, op->ac.abort_code);
224                                 clear_bit(AFS_VOLUME_OFFLINE, &op->volume->flags);
225                         }
226                 busy:
227                         if (op->flags & AFS_OPERATION_CUR_ONLY) {
228                                 if (!afs_sleep_and_retry(op))
229                                         goto failed;
230
231                                  /* Retry with same server & address */
232                                 _leave(" = t [vbusy]");
233                                 return true;
234                         }
235
236                         op->flags |= AFS_OPERATION_VBUSY;
237                         goto next_server;
238
239                 case VMOVED:
240                         /* The volume migrated to another server.  We consider
241                          * consider all locks and callbacks broken and request
242                          * an update from the VLDB.
243                          *
244                          * We also limit the number of VMOVED hops we will
245                          * honour, just in case someone sets up a loop.
246                          */
247                         if (op->flags & AFS_OPERATION_VMOVED) {
248                                 op->error = -EREMOTEIO;
249                                 goto failed;
250                         }
251                         op->flags |= AFS_OPERATION_VMOVED;
252
253                         set_bit(AFS_VOLUME_WAIT, &op->volume->flags);
254                         set_bit(AFS_VOLUME_NEEDS_UPDATE, &op->volume->flags);
255                         error = afs_check_volume_status(op->volume, op);
256                         if (error < 0)
257                                 goto failed_set_error;
258
259                         /* If the server list didn't change, then the VLDB is
260                          * out of sync with the fileservers.  This is hopefully
261                          * a temporary condition, however, so we don't want to
262                          * permanently block access to the file.
263                          *
264                          * TODO: Try other fileservers if we can.
265                          *
266                          * TODO: Retry a few times with sleeps.
267                          */
268                         if (rcu_access_pointer(op->volume->servers) == op->server_list) {
269                                 op->error = -ENOMEDIUM;
270                                 goto failed;
271                         }
272
273                         goto restart_from_beginning;
274
275                 default:
276                         clear_bit(AFS_VOLUME_OFFLINE, &op->volume->flags);
277                         clear_bit(AFS_VOLUME_BUSY, &op->volume->flags);
278                         op->error = afs_abort_to_error(op->ac.abort_code);
279                         goto failed;
280                 }
281
282         case -ETIMEDOUT:
283         case -ETIME:
284                 if (op->error != -EDESTADDRREQ)
285                         goto iterate_address;
286                 /* Fall through */
287         case -ERFKILL:
288         case -EADDRNOTAVAIL:
289         case -ENETUNREACH:
290         case -EHOSTUNREACH:
291         case -EHOSTDOWN:
292         case -ECONNREFUSED:
293                 _debug("no conn");
294                 op->error = error;
295                 goto iterate_address;
296
297         case -ECONNRESET:
298                 _debug("call reset");
299                 op->error = error;
300                 goto failed;
301         }
302
303 restart_from_beginning:
304         _debug("restart");
305         afs_end_cursor(&op->ac);
306         afs_put_cb_interest(op->net, op->cbi);
307         op->cbi = NULL;
308         afs_put_serverlist(op->net, op->server_list);
309         op->server_list = NULL;
310 start:
311         _debug("start");
312         /* See if we need to do an update of the volume record.  Note that the
313          * volume may have moved or even have been deleted.
314          */
315         error = afs_check_volume_status(op->volume, op);
316         if (error < 0)
317                 goto failed_set_error;
318
319         if (!afs_start_fs_iteration(op, vnode))
320                 goto failed;
321
322         _debug("__ VOL %llx __", op->volume->vid);
323
324 pick_server:
325         _debug("pick [%lx]", op->untried);
326
327         error = afs_wait_for_fs_probes(op->server_list, op->untried);
328         if (error < 0)
329                 goto failed_set_error;
330
331         /* Pick the untried server with the lowest RTT.  If we have outstanding
332          * callbacks, we stick with the server we're already using if we can.
333          */
334         if (op->cbi) {
335                 _debug("cbi %u", op->index);
336                 if (test_bit(op->index, &op->untried))
337                         goto selected_server;
338                 afs_put_cb_interest(op->net, op->cbi);
339                 op->cbi = NULL;
340                 _debug("nocbi");
341         }
342
343         op->index = -1;
344         rtt = U32_MAX;
345         for (i = 0; i < op->server_list->nr_servers; i++) {
346                 struct afs_server *s = op->server_list->servers[i].server;
347
348                 if (!test_bit(i, &op->untried) || !s->probe.responded)
349                         continue;
350                 if (s->probe.rtt < rtt) {
351                         op->index = i;
352                         rtt = s->probe.rtt;
353                 }
354         }
355
356         if (op->index == -1)
357                 goto no_more_servers;
358
359 selected_server:
360         _debug("use %d", op->index);
361         __clear_bit(op->index, &op->untried);
362
363         /* We're starting on a different fileserver from the list.  We need to
364          * check it, create a callback intercept, find its address list and
365          * probe its capabilities before we use it.
366          */
367         ASSERTCMP(op->ac.alist, ==, NULL);
368         server = op->server_list->servers[op->index].server;
369
370         if (!afs_check_server_record(op, server))
371                 goto failed;
372
373         _debug("USING SERVER: %pU", &server->uuid);
374
375         /* Make sure we've got a callback interest record for this server.  We
376          * have to link it in before we send the request as we can be sent a
377          * break request before we've finished decoding the reply and
378          * installing the vnode.
379          */
380         error = afs_register_server_cb_interest(vnode, op->server_list,
381                                                 op->index);
382         if (error < 0)
383                 goto failed_set_error;
384
385         op->cbi = afs_get_cb_interest(
386                 rcu_dereference_protected(vnode->cb_interest,
387                                           lockdep_is_held(&vnode->io_lock)));
388
389         read_lock(&server->fs_lock);
390         alist = rcu_dereference_protected(server->addresses,
391                                           lockdep_is_held(&server->fs_lock));
392         afs_get_addrlist(alist);
393         read_unlock(&server->fs_lock);
394
395         memset(&op->ac, 0, sizeof(op->ac));
396
397         if (!op->ac.alist)
398                 op->ac.alist = alist;
399         else
400                 afs_put_addrlist(alist);
401
402         op->ac.index = -1;
403
404 iterate_address:
405         ASSERT(op->ac.alist);
406         /* Iterate over the current server's address list to try and find an
407          * address on which it will respond to us.
408          */
409         if (!afs_iterate_addresses(&op->ac))
410                 goto next_server;
411
412         _debug("address [%u] %u/%u", op->index, op->ac.index, op->ac.alist->nr_addrs);
413
414         _leave(" = t");
415         return true;
416
417 next_server:
418         _debug("next");
419         afs_end_cursor(&op->ac);
420         goto pick_server;
421
422 no_more_servers:
423         /* That's all the servers poked to no good effect.  Try again if some
424          * of them were busy.
425          */
426         if (op->flags & AFS_OPERATION_VBUSY)
427                 goto restart_from_beginning;
428
429         e.error = -EDESTADDRREQ;
430         e.responded = false;
431         for (i = 0; i < op->server_list->nr_servers; i++) {
432                 struct afs_server *s = op->server_list->servers[i].server;
433
434                 afs_prioritise_error(&e, READ_ONCE(s->probe.error),
435                                      s->probe.abort_code);
436         }
437
438         error = e.error;
439
440 failed_set_error:
441         op->error = error;
442 failed:
443         op->flags |= AFS_OPERATION_STOP;
444         afs_end_cursor(&op->ac);
445         _leave(" = f [failed %d]", op->error);
446         return false;
447 }
448
449 /*
450  * Select the same fileserver we used for a vnode before and only that
451  * fileserver.  We use this when we have a lock on that file, which is backed
452  * only by the fileserver we obtained it from.
453  */
454 bool afs_select_current_fileserver(struct afs_operation *op)
455 {
456         struct afs_cb_interest *cbi;
457         struct afs_addr_list *alist;
458         int error = op->ac.error;
459
460         _enter("");
461
462         switch (error) {
463         case SHRT_MAX:
464                 cbi = op->cbi;
465                 if (!cbi) {
466                         op->error = -ESTALE;
467                         op->flags |= AFS_OPERATION_STOP;
468                         return false;
469                 }
470
471                 read_lock(&cbi->server->fs_lock);
472                 alist = rcu_dereference_protected(cbi->server->addresses,
473                                                   lockdep_is_held(&cbi->server->fs_lock));
474                 afs_get_addrlist(alist);
475                 read_unlock(&cbi->server->fs_lock);
476                 if (!alist) {
477                         op->error = -ESTALE;
478                         op->flags |= AFS_OPERATION_STOP;
479                         return false;
480                 }
481
482                 memset(&op->ac, 0, sizeof(op->ac));
483                 op->ac.alist = alist;
484                 op->ac.index = -1;
485                 goto iterate_address;
486
487         case 0:
488         default:
489                 /* Success or local failure.  Stop. */
490                 op->error = error;
491                 op->flags |= AFS_OPERATION_STOP;
492                 _leave(" = f [okay/local %d]", error);
493                 return false;
494
495         case -ECONNABORTED:
496                 op->error = afs_abort_to_error(op->ac.abort_code);
497                 op->flags |= AFS_OPERATION_STOP;
498                 _leave(" = f [abort]");
499                 return false;
500
501         case -ERFKILL:
502         case -EADDRNOTAVAIL:
503         case -ENETUNREACH:
504         case -EHOSTUNREACH:
505         case -EHOSTDOWN:
506         case -ECONNREFUSED:
507         case -ETIMEDOUT:
508         case -ETIME:
509                 _debug("no conn");
510                 op->error = error;
511                 goto iterate_address;
512         }
513
514 iterate_address:
515         /* Iterate over the current server's address list to try and find an
516          * address on which it will respond to us.
517          */
518         if (afs_iterate_addresses(&op->ac)) {
519                 _leave(" = t");
520                 return true;
521         }
522
523         afs_end_cursor(&op->ac);
524         return false;
525 }
526
527 /*
528  * Dump cursor state in the case of the error being EDESTADDRREQ.
529  */
530 void afs_dump_edestaddrreq(const struct afs_operation *op)
531 {
532         static int count;
533         int i;
534
535         if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR) || count > 3)
536                 return;
537         count++;
538
539         rcu_read_lock();
540
541         pr_notice("EDESTADDR occurred\n");
542         pr_notice("FC: cbb=%x cbb2=%x fl=%x err=%hd\n",
543                   op->file[0].cb_break_before,
544                   op->file[1].cb_break_before, op->flags, op->error);
545         pr_notice("FC: ut=%lx ix=%d ni=%u\n",
546                   op->untried, op->index, op->nr_iterations);
547
548         if (op->server_list) {
549                 const struct afs_server_list *sl = op->server_list;
550                 pr_notice("FC: SL nr=%u pr=%u vnov=%hx\n",
551                           sl->nr_servers, sl->preferred, sl->vnovol_mask);
552                 for (i = 0; i < sl->nr_servers; i++) {
553                         const struct afs_server *s = sl->servers[i].server;
554                         pr_notice("FC: server fl=%lx av=%u %pU\n",
555                                   s->flags, s->addr_version, &s->uuid);
556                         if (s->addresses) {
557                                 const struct afs_addr_list *a =
558                                         rcu_dereference(s->addresses);
559                                 pr_notice("FC:  - av=%u nr=%u/%u/%u pr=%u\n",
560                                           a->version,
561                                           a->nr_ipv4, a->nr_addrs, a->max_addrs,
562                                           a->preferred);
563                                 pr_notice("FC:  - R=%lx F=%lx\n",
564                                           a->responded, a->failed);
565                                 if (a == op->ac.alist)
566                                         pr_notice("FC:  - current\n");
567                         }
568                 }
569         }
570
571         pr_notice("AC: t=%lx ax=%u ac=%d er=%d r=%u ni=%u\n",
572                   op->ac.tried, op->ac.index, op->ac.abort_code, op->ac.error,
573                   op->ac.responded, op->ac.nr_iterations);
574         rcu_read_unlock();
575 }