tracing: Add better comments for the filtering temp buffer use case

[linux-2.6-microblaze.git] / kernel / trace / trace.c
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c

index a21ef9c..a0d66a0 100644 (file)
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2734,9 +2734,45 @@ trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
         if (!tr->no_filter_buffering_ref &&
             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
             (entry = this_cpu_read(trace_buffered_event))) {
-               /* Try to use the per cpu buffer first */
+               /*
+                * Filtering is on, so try to use the per cpu buffer first.
+                * This buffer will simulate a ring_buffer_event,
+                * where the type_len is zero and the array[0] will
+                * hold the full length.
+                * (see include/linux/ring-buffer.h for details on
+                *  how the ring_buffer_event is structured).
+                *
+                * Using a temp buffer during filtering and copying it
+                * on a matched filter is quicker than writing directly
+                * into the ring buffer and then discarding it when
+                * it doesn't match. That is because the discard
+                * requires several atomic operations to get right.
+                * Copying on match and doing nothing on a failed match
+                * is still quicker than no copy on match, but having
+                * to discard out of the ring buffer on a failed match.
+                */
+               int max_len = PAGE_SIZE - struct_size(entry, array, 1);
+
                 val = this_cpu_inc_return(trace_buffered_event_cnt);
-               if ((len < (PAGE_SIZE - sizeof(*entry))) && val == 1) {
+
+               /*
+                * Preemption is disabled, but interrupts and NMIs
+                * can still come in now. If that happens after
+                * the above increment, then it will have to go
+                * back to the old method of allocating the event
+                * on the ring buffer, and if the filter fails, it
+                * will have to call ring_buffer_discard_commit()
+                * to remove it.
+                *
+                * Need to also check the unlikely case that the
+                * length is bigger than the temp buffer size.
+                * If that happens, then the reserve is pretty much
+                * guaranteed to fail, as the ring buffer currently
+                * only allows events less than a page. But that may
+                * change in the future, so let the ring buffer reserve
+                * handle the failure in that case.
+                */
+               if (val == 1 && likely(len <= max_len)) {
                         trace_event_setup(entry, type, trace_ctx);
                         entry->array[0] = len;
                         return entry;
@@ -6145,7 +6181,7 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr,
  ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
                                   unsigned long size, int cpu_id)
  {
-       int ret = size;
+       int ret;
  
         mutex_lock(&trace_types_lock);