2 * Copyright 2017 Omnibond Systems, L.L.C.
6 #include "orangefs-kernel.h"
7 #include "orangefs-bufmap.h"
9 struct orangefs_dir_part {
10 struct orangefs_dir_part *next;
16 struct orangefs_dir_part *part;
21 #define PART_SHIFT (24)
22 #define PART_SIZE (1<<24)
23 #define PART_MASK (~(PART_SIZE - 1))
26 * There can be up to 512 directory entries. Each entry is encoded as
28 * 4 bytes: string size (n)
30 * 1 byte: trailing zero
35 * The trailer_buf starts with a struct orangefs_readdir_response_s
36 * which must be skipped to get to the directory data.
38 * The data which is received from the userspace daemon is termed a
39 * part and is stored in a linked list in case more than one part is
40 * needed for a large directory.
42 * The position pointer (ctx->pos) encodes the part and offset on which
43 * to begin reading at. Bits above PART_SHIFT encode the part and bits
44 * below PART_SHIFT encode the offset. Parts are stored in a linked
45 * list which grows as data is received from the server. The overhead
46 * associated with managing the list is presumed to be small compared to
47 * the overhead of communicating with the server.
49 * As data is received from the server, it is placed at the end of the
50 * part list. Data is parsed from the current position as it is needed.
51 * When data is determined to be corrupt, it is either because the
52 * userspace component has sent back corrupt data or because the file
53 * pointer has been moved to an invalid location. Since the two cannot
54 * be differentiated, return EIO.
56 * Part zero is synthesized to contains `.' and `..'. Part one is the
57 * first part of the part list.
60 static int do_readdir(struct orangefs_inode_s *oi,
61 struct orangefs_dir *od, struct dentry *dentry,
62 struct orangefs_kernel_op_s *op)
64 struct orangefs_readdir_response_s *resp;
68 * Despite the badly named field, readdir does not use shared
69 * memory. However, there are a limited number of readdir
70 * slots, which must be allocated here. This flag simply tells
71 * the op scheduler to return the op here for retry.
73 op->uses_shared_memory = 1;
74 op->upcall.req.readdir.refn = oi->refn;
75 op->upcall.req.readdir.token = od->token;
76 op->upcall.req.readdir.max_dirent_count =
77 ORANGEFS_MAX_DIRENT_COUNT_READDIR;
80 bufi = orangefs_readdir_index_get();
86 op->upcall.req.readdir.buf_index = bufi;
88 r = service_operation(op, "orangefs_readdir",
89 get_interruptible_flag(dentry->d_inode));
91 orangefs_readdir_index_put(bufi);
93 if (op_state_purged(op)) {
95 vfree(op->downcall.trailer_buf);
97 } else if (r == -EIO) {
98 vfree(op->downcall.trailer_buf);
105 vfree(op->downcall.trailer_buf);
108 } else if (op->downcall.status) {
109 vfree(op->downcall.trailer_buf);
110 od->error = op->downcall.status;
111 return op->downcall.status;
115 * The maximum size is size per entry times the 512 entries plus
116 * the header. This is well under the limit.
118 if (op->downcall.trailer_size > PART_SIZE) {
119 vfree(op->downcall.trailer_buf);
124 resp = (struct orangefs_readdir_response_s *)
125 op->downcall.trailer_buf;
126 od->token = resp->token;
130 static int parse_readdir(struct orangefs_dir *od,
131 struct orangefs_kernel_op_s *op)
133 struct orangefs_dir_part *part, *new;
138 while (part && part->next) {
143 new = (void *)op->downcall.trailer_buf;
145 new->len = op->downcall.trailer_size -
146 sizeof(struct orangefs_readdir_response_s);
152 od->end = count << PART_SHIFT;
157 static int orangefs_dir_more(struct orangefs_inode_s *oi,
158 struct orangefs_dir *od, struct dentry *dentry)
160 struct orangefs_kernel_op_s *op;
163 op = op_alloc(ORANGEFS_VFS_OP_READDIR);
168 r = do_readdir(oi, od, dentry, op);
173 r = parse_readdir(od, op);
185 static int fill_from_part(struct orangefs_dir_part *part,
186 struct dir_context *ctx)
188 const int offset = sizeof(struct orangefs_readdir_response_s);
189 struct orangefs_khandle *khandle;
193 i = ctx->pos & ~PART_MASK;
195 /* The file offset from userspace is too large. */
200 * If the seek pointer is positioned just before an entry it
201 * should find the next entry.
206 while (i < part->len) {
207 if (part->len < i + sizeof *len)
209 len = (void *)part + offset + i;
211 * len is the size of the string itself. padlen is the
212 * total size of the encoded string.
214 padlen = (sizeof *len + *len + 1) +
215 (8 - (sizeof *len + *len + 1)%8)%8;
216 if (part->len < i + padlen + sizeof *khandle)
218 s = (void *)part + offset + i + sizeof *len;
221 khandle = (void *)part + offset + i + padlen;
222 if (!dir_emit(ctx, s, *len,
223 orangefs_khandle_to_ino(khandle),
226 i += padlen + sizeof *khandle;
228 BUG_ON(i > part->len);
229 ctx->pos = (ctx->pos & PART_MASK) | i;
237 static int orangefs_dir_fill(struct orangefs_inode_s *oi,
238 struct orangefs_dir *od, struct dentry *dentry,
239 struct dir_context *ctx)
241 struct orangefs_dir_part *part;
244 count = ((ctx->pos & PART_MASK) >> PART_SHIFT) - 1;
247 while (part->next && count) {
251 /* This means the userspace file offset is invalid. */
257 while (part && part->len) {
259 r = fill_from_part(part, ctx);
264 /* Userspace buffer is full. */
268 * The part ran out of data. Move to the next
270 ctx->pos = (ctx->pos & PART_MASK) +
278 static int orangefs_dir_iterate(struct file *file,
279 struct dir_context *ctx)
281 struct orangefs_inode_s *oi;
282 struct orangefs_dir *od;
283 struct dentry *dentry;
286 dentry = file->f_path.dentry;
287 oi = ORANGEFS_I(dentry->d_inode);
288 od = file->private_data;
294 if (!dir_emit_dot(file, ctx))
299 if (!dir_emit_dotdot(file, ctx))
301 ctx->pos = 1 << PART_SHIFT;
305 * The seek position is in the first synthesized part but is not
308 if ((ctx->pos & PART_MASK) == 0)
314 * Must read more if the user has sought past what has been read
315 * so far. Stop a user who has sought past the end.
317 while (od->token != ORANGEFS_ITERATE_END &&
318 ctx->pos > od->end) {
319 r = orangefs_dir_more(oi, od, dentry);
323 if (od->token == ORANGEFS_ITERATE_END && ctx->pos > od->end)
326 /* Then try to fill if there's any left in the buffer. */
327 if (ctx->pos < od->end) {
328 r = orangefs_dir_fill(oi, od, dentry, ctx);
333 /* Finally get some more and try to fill. */
334 if (od->token != ORANGEFS_ITERATE_END) {
335 r = orangefs_dir_more(oi, od, dentry);
338 r = orangefs_dir_fill(oi, od, dentry, ctx);
344 static int orangefs_dir_open(struct inode *inode, struct file *file)
346 struct orangefs_dir *od;
347 file->private_data = kmalloc(sizeof(struct orangefs_dir),
349 if (!file->private_data)
351 od = file->private_data;
352 od->token = ORANGEFS_ITERATE_START;
354 od->end = 1 << PART_SHIFT;
359 static int orangefs_dir_release(struct inode *inode, struct file *file)
361 struct orangefs_dir *od = file->private_data;
362 struct orangefs_dir_part *part = od->part;
363 orangefs_flush_inode(inode);
365 struct orangefs_dir_part *next = part->next;
373 const struct file_operations orangefs_dir_operations = {
374 .llseek = default_llseek,
375 .read = generic_read_dir,
376 .iterate = orangefs_dir_iterate,
377 .open = orangefs_dir_open,
378 .release = orangefs_dir_release