__release(ptl2);
}
+static inline bool is_pte_pages_stable(pte_t *dst_pte, pte_t *src_pte,
+ pte_t orig_dst_pte, pte_t orig_src_pte,
+ pmd_t *dst_pmd, pmd_t dst_pmdval)
+{
+ return pte_same(ptep_get(src_pte), orig_src_pte) &&
+ pte_same(ptep_get(dst_pte), orig_dst_pte) &&
+ pmd_same(dst_pmdval, pmdp_get_lockless(dst_pmd));
+}
static int move_present_pte(struct mm_struct *mm,
struct vm_area_struct *dst_vma,
unsigned long dst_addr, unsigned long src_addr,
pte_t *dst_pte, pte_t *src_pte,
pte_t orig_dst_pte, pte_t orig_src_pte,
+ pmd_t *dst_pmd, pmd_t dst_pmdval,
spinlock_t *dst_ptl, spinlock_t *src_ptl,
struct folio *src_folio)
{
double_pt_lock(dst_ptl, src_ptl);
- if (!pte_same(ptep_get(src_pte), orig_src_pte) ||
- !pte_same(ptep_get(dst_pte), orig_dst_pte)) {
+ if (!is_pte_pages_stable(dst_pte, src_pte, orig_dst_pte, orig_src_pte,
+ dst_pmd, dst_pmdval)) {
err = -EAGAIN;
goto out;
}
unsigned long dst_addr, unsigned long src_addr,
pte_t *dst_pte, pte_t *src_pte,
pte_t orig_dst_pte, pte_t orig_src_pte,
+ pmd_t *dst_pmd, pmd_t dst_pmdval,
spinlock_t *dst_ptl, spinlock_t *src_ptl)
{
if (!pte_swp_exclusive(orig_src_pte))
double_pt_lock(dst_ptl, src_ptl);
- if (!pte_same(ptep_get(src_pte), orig_src_pte) ||
- !pte_same(ptep_get(dst_pte), orig_dst_pte)) {
+ if (!is_pte_pages_stable(dst_pte, src_pte, orig_dst_pte, orig_src_pte,
+ dst_pmd, dst_pmdval)) {
double_pt_unlock(dst_ptl, src_ptl);
return -EAGAIN;
}
unsigned long dst_addr, unsigned long src_addr,
pte_t *dst_pte, pte_t *src_pte,
pte_t orig_dst_pte, pte_t orig_src_pte,
+ pmd_t *dst_pmd, pmd_t dst_pmdval,
spinlock_t *dst_ptl, spinlock_t *src_ptl)
{
pte_t zero_pte;
double_pt_lock(dst_ptl, src_ptl);
- if (!pte_same(ptep_get(src_pte), orig_src_pte) ||
- !pte_same(ptep_get(dst_pte), orig_dst_pte)) {
+ if (!is_pte_pages_stable(dst_pte, src_pte, orig_dst_pte, orig_src_pte,
+ dst_pmd, dst_pmdval)) {
double_pt_unlock(dst_ptl, src_ptl);
return -EAGAIN;
}
pte_t *src_pte = NULL;
pte_t *dst_pte = NULL;
pmd_t dummy_pmdval;
+ pmd_t dst_pmdval;
struct folio *src_folio = NULL;
struct anon_vma *src_anon_vma = NULL;
struct mmu_notifier_range range;
retry:
/*
* Use the maywrite version to indicate that dst_pte will be modified,
- * but since we will use pte_same() to detect the change of the pte
- * entry, there is no need to get pmdval, so just pass a dummy variable
- * to it.
+ * since dst_pte needs to be none, the subsequent pte_same() check
+ * cannot prevent the dst_pte page from being freed concurrently, so we
+ * also need to abtain dst_pmdval and recheck pmd_same() later.
*/
- dst_pte = pte_offset_map_rw_nolock(mm, dst_pmd, dst_addr, &dummy_pmdval,
+ dst_pte = pte_offset_map_rw_nolock(mm, dst_pmd, dst_addr, &dst_pmdval,
&dst_ptl);
/* Retry if a huge pmd materialized from under us */
goto out;
}
- /* same as dst_pte */
+ /*
+ * Unlike dst_pte, the subsequent pte_same() check can ensure the
+ * stability of the src_pte page, so there is no need to get pmdval,
+ * just pass a dummy variable to it.
+ */
src_pte = pte_offset_map_rw_nolock(mm, src_pmd, src_addr, &dummy_pmdval,
&src_ptl);
}
/* Sanity checks before the operation */
- if (WARN_ON_ONCE(pmd_none(*dst_pmd)) || WARN_ON_ONCE(pmd_none(*src_pmd)) ||
- WARN_ON_ONCE(pmd_trans_huge(*dst_pmd)) || WARN_ON_ONCE(pmd_trans_huge(*src_pmd))) {
+ if (pmd_none(*dst_pmd) || pmd_none(*src_pmd) ||
+ pmd_trans_huge(*dst_pmd) || pmd_trans_huge(*src_pmd)) {
err = -EINVAL;
goto out;
}
err = move_zeropage_pte(mm, dst_vma, src_vma,
dst_addr, src_addr, dst_pte, src_pte,
orig_dst_pte, orig_src_pte,
- dst_ptl, src_ptl);
+ dst_pmd, dst_pmdval, dst_ptl, src_ptl);
goto out;
}
err = move_present_pte(mm, dst_vma, src_vma,
dst_addr, src_addr, dst_pte, src_pte,
- orig_dst_pte, orig_src_pte,
- dst_ptl, src_ptl, src_folio);
+ orig_dst_pte, orig_src_pte, dst_pmd,
+ dst_pmdval, dst_ptl, src_ptl, src_folio);
} else {
entry = pte_to_swp_entry(orig_src_pte);
if (non_swap_entry(entry)) {
goto out;
}
- err = move_swap_pte(mm, dst_addr, src_addr,
- dst_pte, src_pte,
- orig_dst_pte, orig_src_pte,
- dst_ptl, src_ptl);
+ err = move_swap_pte(mm, dst_addr, src_addr, dst_pte, src_pte,
+ orig_dst_pte, orig_src_pte, dst_pmd,
+ dst_pmdval, dst_ptl, src_ptl);
}
out: