Merge tag 'please-pull-mce' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 31 May 2012 17:53:37 +0000 (10:53 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 31 May 2012 17:53:37 +0000 (10:53 -0700)
Pull mce cleanup from Tony Luck:
 "One more mce cleanup before the 3.5 merge window closes"

* tag 'please-pull-mce' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras:
  x86/mce: Cleanup timer mess

1  2 
arch/x86/kernel/cpu/mcheck/mce.c

@@@ -591,7 -591,7 +591,7 @@@ void machine_check_poll(enum mcp_flags 
        struct mce m;
        int i;
  
 -      percpu_inc(mce_poll_count);
 +      this_cpu_inc(mce_poll_count);
  
        mce_gather_info(&m, NULL);
  
@@@ -955,10 -955,9 +955,10 @@@ struct mce_info 
        atomic_t                inuse;
        struct task_struct      *t;
        __u64                   paddr;
 +      int                     restartable;
  } mce_info[MCE_INFO_MAX];
  
 -static void mce_save_info(__u64 addr)
 +static void mce_save_info(__u64 addr, int c)
  {
        struct mce_info *mi;
  
                if (atomic_cmpxchg(&mi->inuse, 0, 1) == 0) {
                        mi->t = current;
                        mi->paddr = addr;
 +                      mi->restartable = c;
                        return;
                }
        }
@@@ -1028,7 -1026,7 +1028,7 @@@ void do_machine_check(struct pt_regs *r
  
        atomic_inc(&mce_entry);
  
 -      percpu_inc(mce_exception_count);
 +      this_cpu_inc(mce_exception_count);
  
        if (!banks)
                goto out;
                        mce_panic("Fatal machine check on current CPU", &m, msg);
                if (worst == MCE_AR_SEVERITY) {
                        /* schedule action before return to userland */
 -                      mce_save_info(m.addr);
 +                      mce_save_info(m.addr, m.mcgstatus & MCG_STATUS_RIPV);
                        set_thread_flag(TIF_MCE_NOTIFY);
                } else if (kill_it) {
                        force_sig(SIGBUS, current);
@@@ -1195,13 -1193,7 +1195,13 @@@ void mce_notify_process(void
  
        pr_err("Uncorrected hardware memory error in user-access at %llx",
                 mi->paddr);
 -      if (memory_failure(pfn, MCE_VECTOR, MF_ACTION_REQUIRED) < 0) {
 +      /*
 +       * We must call memory_failure() here even if the current process is
 +       * doomed. We still need to mark the page as poisoned and alert any
 +       * other users of the page.
 +       */
 +      if (memory_failure(pfn, MCE_VECTOR, MF_ACTION_REQUIRED) < 0 ||
 +                         mi->restartable == 0) {
                pr_err("Memory error not recovered");
                force_sig(SIGBUS, current);
        }
@@@ -1251,15 -1243,15 +1251,15 @@@ void mce_log_therm_throt_event(__u64 st
   * poller finds an MCE, poll 2x faster.  When the poller finds no more
   * errors, poll 2x slower (up to check_interval seconds).
   */
- static int check_interval = 5 * 60; /* 5 minutes */
+ static unsigned long check_interval = 5 * 60; /* 5 minutes */
  
- static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */
+ static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */
  static DEFINE_PER_CPU(struct timer_list, mce_timer);
  
- static void mce_start_timer(unsigned long data)
+ static void mce_timer_fn(unsigned long data)
  {
-       struct timer_list *t = &per_cpu(mce_timer, data);
-       int *n;
+       struct timer_list *t = &__get_cpu_var(mce_timer);
+       unsigned long iv;
  
        WARN_ON(smp_processor_id() != data);
  
         * Alert userspace if needed.  If we logged an MCE, reduce the
         * polling interval, otherwise increase the polling interval.
         */
-       n = &__get_cpu_var(mce_next_interval);
+       iv = __this_cpu_read(mce_next_interval);
        if (mce_notify_irq())
-               *n = max(*n/2, HZ/100);
+               iv = max(iv, (unsigned long) HZ/100);
        else
-               *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ));
+               iv = min(iv * 2, round_jiffies_relative(check_interval * HZ));
+       __this_cpu_write(mce_next_interval, iv);
  
-       t->expires = jiffies + *n;
+       t->expires = jiffies + iv;
        add_timer_on(t, smp_processor_id());
  }
  
@@@ -1445,43 -1438,6 +1446,43 @@@ static int __cpuinit __mcheck_cpu_apply
                 */
                 if (c->x86 == 6 && banks > 0)
                        mce_banks[0].ctl = 0;
 +
 +               /*
 +                * Turn off MC4_MISC thresholding banks on those models since
 +                * they're not supported there.
 +                */
 +               if (c->x86 == 0x15 &&
 +                   (c->x86_model >= 0x10 && c->x86_model <= 0x1f)) {
 +                       int i;
 +                       u64 val, hwcr;
 +                       bool need_toggle;
 +                       u32 msrs[] = {
 +                              0x00000413, /* MC4_MISC0 */
 +                              0xc0000408, /* MC4_MISC1 */
 +                       };
 +
 +                       rdmsrl(MSR_K7_HWCR, hwcr);
 +
 +                       /* McStatusWrEn has to be set */
 +                       need_toggle = !(hwcr & BIT(18));
 +
 +                       if (need_toggle)
 +                               wrmsrl(MSR_K7_HWCR, hwcr | BIT(18));
 +
 +                       for (i = 0; i < ARRAY_SIZE(msrs); i++) {
 +                               rdmsrl(msrs[i], val);
 +
 +                               /* CntP bit set? */
 +                               if (val & BIT_64(62)) {
 +                                      val &= ~BIT_64(62);
 +                                      wrmsrl(msrs[i], val);
 +                               }
 +                       }
 +
 +                       /* restore old settings */
 +                       if (need_toggle)
 +                               wrmsrl(MSR_K7_HWCR, hwcr);
 +               }
        }
  
        if (c->x86_vendor == X86_VENDOR_INTEL) {
@@@ -1556,17 -1512,17 +1557,17 @@@ static void __mcheck_cpu_init_vendor(st
  static void __mcheck_cpu_init_timer(void)
  {
        struct timer_list *t = &__get_cpu_var(mce_timer);
-       int *n = &__get_cpu_var(mce_next_interval);
+       unsigned long iv = __this_cpu_read(mce_next_interval);
  
-       setup_timer(t, mce_start_timer, smp_processor_id());
+       setup_timer(t, mce_timer_fn, smp_processor_id());
  
        if (mce_ignore_ce)
                return;
  
-       *n = check_interval * HZ;
-       if (!*n)
+       __this_cpu_write(mce_next_interval, iv);
+       if (!iv)
                return;
-       t->expires = round_jiffies(jiffies + *n);
+       t->expires = round_jiffies(jiffies + iv);
        add_timer_on(t, smp_processor_id());
  }
  
@@@ -2276,7 -2232,7 +2277,7 @@@ mce_cpu_callback(struct notifier_block 
        case CPU_DOWN_FAILED_FROZEN:
                if (!mce_ignore_ce && check_interval) {
                        t->expires = round_jiffies(jiffies +
-                                          __get_cpu_var(mce_next_interval));
+                                       per_cpu(mce_next_interval, cpu));
                        add_timer_on(t, cpu);
                }
                smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);