Skip to content

Commit 0f34d11

Browse files
mhklinuxliuw
authored andcommitted
x86/hyperv: Make encrypted/decrypted changes safe for load_unaligned_zeropad()
In a CoCo VM, when transitioning memory from encrypted to decrypted, or vice versa, the caller of set_memory_encrypted() or set_memory_decrypted() is responsible for ensuring the memory isn't in use and isn't referenced while the transition is in progress. The transition has multiple steps, and the memory is in an inconsistent state until all steps are complete. A reference while the state is inconsistent could result in an exception that can't be cleanly fixed up. However, the kernel load_unaligned_zeropad() mechanism could cause a stray reference that can't be prevented by the caller of set_memory_encrypted() or set_memory_decrypted(), so there's specific code to handle this case. But a CoCo VM running on Hyper-V may be configured to run with a paravisor, with the #VC or #VE exception routed to the paravisor. There's no architectural way to forward the exceptions back to the guest kernel, and in such a case, the load_unaligned_zeropad() specific code doesn't work. To avoid this problem, mark pages as "not present" while a transition is in progress. If load_unaligned_zeropad() causes a stray reference, a normal page fault is generated instead of #VC or #VE, and the page-fault-based fixup handlers for load_unaligned_zeropad() resolve the reference. When the encrypted/decrypted transition is complete, mark the pages as "present" again. Signed-off-by: Michael Kelley <mhklinux@outlook.com> Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> Link: https://lore.kernel.org/r/20240116022008.1023398-4-mhklinux@outlook.com Signed-off-by: Wei Liu <wei.liu@kernel.org> Message-ID: <20240116022008.1023398-4-mhklinux@outlook.com>
1 parent 030ad7a commit 0f34d11

1 file changed

Lines changed: 49 additions & 4 deletions

File tree

arch/x86/hyperv/ivm.c

Lines changed: 49 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <asm/io.h>
1616
#include <asm/coco.h>
1717
#include <asm/mem_encrypt.h>
18+
#include <asm/set_memory.h>
1819
#include <asm/mshyperv.h>
1920
#include <asm/hypervisor.h>
2021
#include <asm/mtrr.h>
@@ -502,6 +503,31 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[],
502503
return -EFAULT;
503504
}
504505

506+
/*
507+
* When transitioning memory between encrypted and decrypted, the caller
508+
* of set_memory_encrypted() or set_memory_decrypted() is responsible for
509+
* ensuring that the memory isn't in use and isn't referenced while the
510+
* transition is in progress. The transition has multiple steps, and the
511+
* memory is in an inconsistent state until all steps are complete. A
512+
* reference while the state is inconsistent could result in an exception
513+
* that can't be cleanly fixed up.
514+
*
515+
* But the Linux kernel load_unaligned_zeropad() mechanism could cause a
516+
* stray reference that can't be prevented by the caller, so Linux has
517+
* specific code to handle this case. But when the #VC and #VE exceptions
518+
* routed to a paravisor, the specific code doesn't work. To avoid this
519+
* problem, mark the pages as "not present" while the transition is in
520+
* progress. If load_unaligned_zeropad() causes a stray reference, a normal
521+
* page fault is generated instead of #VC or #VE, and the page-fault-based
522+
* handlers for load_unaligned_zeropad() resolve the reference. When the
523+
* transition is complete, hv_vtom_set_host_visibility() marks the pages
524+
* as "present" again.
525+
*/
526+
static bool hv_vtom_clear_present(unsigned long kbuffer, int pagecount, bool enc)
527+
{
528+
return !set_memory_np(kbuffer, pagecount);
529+
}
530+
505531
/*
506532
* hv_vtom_set_host_visibility - Set specified memory visible to host.
507533
*
@@ -522,8 +548,10 @@ static bool hv_vtom_set_host_visibility(unsigned long kbuffer, int pagecount, bo
522548
int i, pfn;
523549

524550
pfn_array = kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
525-
if (!pfn_array)
526-
return false;
551+
if (!pfn_array) {
552+
result = false;
553+
goto err_set_memory_p;
554+
}
527555

528556
for (i = 0, pfn = 0; i < pagecount; i++) {
529557
/*
@@ -548,14 +576,30 @@ static bool hv_vtom_set_host_visibility(unsigned long kbuffer, int pagecount, bo
548576
}
549577
}
550578

551-
err_free_pfn_array:
579+
err_free_pfn_array:
552580
kfree(pfn_array);
581+
582+
err_set_memory_p:
583+
/*
584+
* Set the PTE PRESENT bits again to revert what hv_vtom_clear_present()
585+
* did. Do this even if there is an error earlier in this function in
586+
* order to avoid leaving the memory range in a "broken" state. Setting
587+
* the PRESENT bits shouldn't fail, but return an error if it does.
588+
*/
589+
if (set_memory_p(kbuffer, pagecount))
590+
result = false;
591+
553592
return result;
554593
}
555594

556595
static bool hv_vtom_tlb_flush_required(bool private)
557596
{
558-
return true;
597+
/*
598+
* Since hv_vtom_clear_present() marks the PTEs as "not present"
599+
* and flushes the TLB, they can't be in the TLB. That makes the
600+
* flush controlled by this function redundant, so return "false".
601+
*/
602+
return false;
559603
}
560604

561605
static bool hv_vtom_cache_flush_required(void)
@@ -618,6 +662,7 @@ void __init hv_vtom_init(void)
618662
x86_platform.hyper.is_private_mmio = hv_is_private_mmio;
619663
x86_platform.guest.enc_cache_flush_required = hv_vtom_cache_flush_required;
620664
x86_platform.guest.enc_tlb_flush_required = hv_vtom_tlb_flush_required;
665+
x86_platform.guest.enc_status_change_prepare = hv_vtom_clear_present;
621666
x86_platform.guest.enc_status_change_finish = hv_vtom_set_host_visibility;
622667

623668
/* Set WB as the default cache mode. */

0 commit comments

Comments
 (0)