struct addr_location al;
 
        if (machine__resolve(trace->host, &al, sample) < 0 ||
-           thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, trace->max_stack))
+           thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, evsel->attr.sample_max_stack))
                return -1;
 
        return 0;
        trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
                                  evlist->threads->nr > 1 ||
                                  perf_evlist__first(evlist)->attr.inherit;
+
+       /*
+        * Now that we already used evsel->attr to ask the kernel to setup the
+        * events, lets reuse evsel->attr.sample_max_stack as the limit in
+        * trace__resolve_callchain(), allowing per-event max-stack settings
+        * to override an explicitely set --max-stack global setting.
+        */
+       evlist__for_each_entry(evlist, evsel) {
+               if ((evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) &&
+                   evsel->attr.sample_max_stack == 0)
+                       evsel->attr.sample_max_stack = trace->max_stack;
+       }
 again:
        before = trace->nr_events;