Skip to content

Commit 6cfebad

Browse files
committed
Move guest-written page tables to the scratch region
Instead of updating page tables in place, and using the heap region memory for new page tables, this changes the hyperlight_guest virtual memory subsystem to treat snapshot page tables as copy-on-write and to allocate all new page tables in the scratch region. Signed-off-by: Lucy Menon <168595099+syntactically@users.noreply.github.com>
1 parent 5594582 commit 6cfebad

3 files changed

Lines changed: 211 additions & 71 deletions

File tree

src/hyperlight_common/src/arch/amd64/vmem.rs

Lines changed: 167 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,9 @@ limitations under the License.
2323
//! - PT (Page Table) - bits 20:12 - 512 entries, each covering 4KB pages
2424
//!
2525
//! The code uses an iterator-based approach to walk the page table hierarchy,
26-
//! allocating intermediate tables as needed and setting appropriate flags on leaf PTEs.
26+
//! allocating intermediate tables as needed and setting appropriate flags on leaf PTEs
2727
28-
use crate::vmem::{Mapping, MappingKind, TableOps};
28+
use crate::vmem::{BasicMapping, Mapping, MappingKind, TableOps};
2929

3030
// Paging Flags
3131
//
@@ -94,20 +94,114 @@ fn bits<const HIGH_BIT: u8, const LOW_BIT: u8>(x: u64) -> u64 {
9494
(x & ((1 << (HIGH_BIT + 1)) - 1)) >> LOW_BIT
9595
}
9696

97+
/// Helper function to generate a page table entry that points to another table
98+
#[allow(clippy::identity_op)]
99+
#[allow(clippy::precedence)]
100+
fn pte_for_table<Op: TableOps>(table_addr: Op::TableAddr) -> u64 {
101+
Op::to_phys(table_addr) |
102+
PAGE_ACCESSED_CLEAR | // accessed bit cleared (will be set by CPU when page is accessed - but we dont use the access bit for anything at present)
103+
PAGE_CACHE_ENABLED | // leave caching enabled
104+
PAGE_WRITE_BACK | // use write-back caching
105+
PAGE_USER_ACCESS_DISABLED |// dont allow user access (no code runs in user mode for now)
106+
PAGE_RW | // R/W - we don't use block-level permissions
107+
PAGE_PRESENT // P - this entry is present
108+
}
109+
110+
/// Helper function to write a page table entry, updating the whole
111+
/// chain of tables back to the root if necessary
112+
unsafe fn write_entry_updating<Op: TableOps, P: UpdateParent<Op>>(
113+
op: &Op,
114+
parent: P,
115+
addr: Op::TableAddr,
116+
entry: u64,
117+
) {
118+
if let Some(again) = unsafe { op.write_entry(addr, entry) } {
119+
parent.update_parent(op, again);
120+
}
121+
}
122+
123+
/// A helper trait that allows us to move a page table (e.g. from the
124+
/// snapshot to the scratch region), keeping track of the context that
125+
/// needs to be updated when that is moved (and potentially
126+
/// recursively updating, if necessary)
127+
///
128+
/// This is done via a trait so that the selected impl knows the exact
129+
/// nesting depth of tables, in order to assist
130+
/// inlining/specialisation in generating efficient code.
131+
trait UpdateParent<Op: TableOps>: Copy {
132+
fn update_parent(self, op: &Op, new_ptr: Op::TableAddr);
133+
}
134+
135+
/// A struct implementing [`UpdateParent`] that keeps track of the
136+
/// fact that the parent table is itself another table, whose own
137+
/// ancestors may need to be recursively updated
138+
struct UpdateParentTable<Op: TableOps, P: UpdateParent<Op>> {
139+
parent: P,
140+
entry_ptr: Op::TableAddr,
141+
}
142+
impl<Op: TableOps, P: UpdateParent<Op>> Clone for UpdateParentTable<Op, P> {
143+
fn clone(&self) -> Self {
144+
*self
145+
}
146+
}
147+
impl<Op: TableOps, P: UpdateParent<Op>> Copy for UpdateParentTable<Op, P> {}
148+
impl<Op: TableOps, P: UpdateParent<Op>> UpdateParentTable<Op, P> {
149+
fn new(parent: P, entry_ptr: Op::TableAddr) -> Self {
150+
UpdateParentTable { parent, entry_ptr }
151+
}
152+
}
153+
impl<Op: TableOps, P: UpdateParent<Op>> UpdateParent<Op> for UpdateParentTable<Op, P> {
154+
fn update_parent(self, op: &Op, new_ptr: Op::TableAddr) {
155+
let pte = pte_for_table::<Op>(new_ptr);
156+
unsafe {
157+
write_entry_updating(op, self.parent, self.entry_ptr, pte);
158+
}
159+
}
160+
}
161+
162+
/// A struct implementing [`UpdateParent`] that keeps track of the
163+
/// fact that the parent "table" is actually the CR3 system register.
164+
#[derive(Copy, Clone)]
165+
struct UpdateParentCR3 {}
166+
impl<Op: TableOps> UpdateParent<Op> for UpdateParentCR3 {
167+
fn update_parent(self, _op: &Op, new_ptr: Op::TableAddr) {
168+
unsafe {
169+
core::arch::asm!("mov cr3, {}", in(reg) Op::to_phys(new_ptr));
170+
}
171+
}
172+
}
173+
174+
/// A struct implementing [`UpdateParent`] that panics when used, for
175+
/// the occasional situations in which we should never do an update
176+
#[derive(Copy, Clone)]
177+
struct UpdateParentNone {}
178+
impl<Op: TableOps> UpdateParent<Op> for UpdateParentNone {
179+
// This is only used in contexts which should absolutely never try
180+
// to update a page table, and so in no case should possibly ever
181+
// call an update_parent. Therefore, this is impossible unless an
182+
// extremely significant invariant violation has occurred.
183+
#[allow(clippy::panic)]
184+
fn update_parent(self, _op: &Op, _new_ptr: Op::TableAddr) {
185+
panic!("UpdateParentNone: tried to update parent");
186+
}
187+
}
188+
97189
/// A helper structure indicating a mapping operation that needs to be
98190
/// performed
99-
struct MapRequest<T> {
100-
table_base: T,
191+
struct MapRequest<Op: TableOps, P: UpdateParent<Op>> {
192+
table_base: Op::TableAddr,
101193
vmin: VirtAddr,
102194
len: u64,
195+
update_parent: P,
103196
}
104197

105198
/// A helper structure indicating that a particular PTE needs to be
106199
/// modified
107-
struct MapResponse<T> {
108-
entry_ptr: T,
200+
struct MapResponse<Op: TableOps, P: UpdateParent<Op>> {
201+
entry_ptr: Op::TableAddr,
109202
vmin: VirtAddr,
110203
len: u64,
204+
update_parent: P,
111205
}
112206

113207
/// Iterator that walks through page table entries at a specific level.
@@ -122,14 +216,14 @@ struct MapResponse<T> {
122216
/// - PDPT: HIGH_BIT=38, LOW_BIT=30 (9 bits = 512 entries, each covering 1GB)
123217
/// - PD: HIGH_BIT=29, LOW_BIT=21 (9 bits = 512 entries, each covering 2MB)
124218
/// - PT: HIGH_BIT=20, LOW_BIT=12 (9 bits = 512 entries, each covering 4KB)
125-
struct ModifyPteIterator<const HIGH_BIT: u8, const LOW_BIT: u8, Op: TableOps> {
126-
request: MapRequest<Op::TableAddr>,
219+
struct ModifyPteIterator<const HIGH_BIT: u8, const LOW_BIT: u8, Op: TableOps, P: UpdateParent<Op>> {
220+
request: MapRequest<Op, P>,
127221
n: u64,
128222
}
129-
impl<const HIGH_BIT: u8, const LOW_BIT: u8, Op: TableOps> Iterator
130-
for ModifyPteIterator<HIGH_BIT, LOW_BIT, Op>
223+
impl<const HIGH_BIT: u8, const LOW_BIT: u8, Op: TableOps, P: UpdateParent<Op>> Iterator
224+
for ModifyPteIterator<HIGH_BIT, LOW_BIT, Op, P>
131225
{
132-
type Item = MapResponse<Op::TableAddr>;
226+
type Item = MapResponse<Op, P>;
133227
fn next(&mut self) -> Option<Self::Item> {
134228
// Each page table entry at this level covers a region of size (1 << LOW_BIT) bytes.
135229
// For example, at the PT level (LOW_BIT=12), each entry covers 4KB (0x1000 bytes).
@@ -188,47 +282,45 @@ impl<const HIGH_BIT: u8, const LOW_BIT: u8, Op: TableOps> Iterator
188282
entry_ptr,
189283
vmin: next_vmin,
190284
len: next_len,
285+
update_parent: self.request.update_parent,
191286
})
192287
}
193288
}
194-
fn modify_ptes<const HIGH_BIT: u8, const LOW_BIT: u8, Op: TableOps>(
195-
r: MapRequest<Op::TableAddr>,
196-
) -> ModifyPteIterator<HIGH_BIT, LOW_BIT, Op> {
289+
fn modify_ptes<const HIGH_BIT: u8, const LOW_BIT: u8, Op: TableOps, P: UpdateParent<Op>>(
290+
r: MapRequest<Op, P>,
291+
) -> ModifyPteIterator<HIGH_BIT, LOW_BIT, Op, P> {
197292
ModifyPteIterator { request: r, n: 0 }
198293
}
199294

200295
/// Page-mapping callback to allocate a next-level page table if necessary.
201296
/// # Safety
202297
/// This function modifies page table data structures, and should not be called concurrently
203298
/// with any other operations that modify the page tables.
204-
unsafe fn alloc_pte_if_needed<Op: TableOps>(
299+
unsafe fn alloc_pte_if_needed<Op: TableOps, P: UpdateParent<Op>>(
205300
op: &Op,
206-
x: MapResponse<Op::TableAddr>,
207-
) -> MapRequest<Op::TableAddr> {
301+
x: MapResponse<Op, P>,
302+
) -> MapRequest<Op, UpdateParentTable<Op, P>> {
303+
let new_update_parent = UpdateParentTable::new(x.update_parent, x.entry_ptr);
208304
if let Some(pte) = unsafe { read_pte_if_present(op, x.entry_ptr) } {
209305
return MapRequest {
210306
table_base: Op::from_phys(pte & PTE_ADDR_MASK),
211307
vmin: x.vmin,
212308
len: x.len,
309+
update_parent: new_update_parent,
213310
};
214311
}
215312

216313
let page_addr = unsafe { op.alloc_table() };
217314

218-
#[allow(clippy::identity_op)]
219-
#[allow(clippy::precedence)]
220-
let pte = Op::to_phys(page_addr) |
221-
PAGE_ACCESSED_CLEAR | // accessed bit cleared (will be set by CPU when page is accessed - but we dont use the access bit for anything at present)
222-
PAGE_CACHE_ENABLED | // leave caching enabled
223-
PAGE_WRITE_BACK | // use write-back caching
224-
PAGE_USER_ACCESS_DISABLED |// dont allow user access (no code runs in user mode for now)
225-
PAGE_RW | // R/W - we don't use block-level permissions
226-
PAGE_PRESENT; // P - this entry is present
227-
unsafe { op.write_entry(x.entry_ptr, pte) };
315+
let pte = pte_for_table::<Op>(page_addr);
316+
unsafe {
317+
write_entry_updating(op, x.update_parent, x.entry_ptr, pte);
318+
};
228319
MapRequest {
229320
table_base: page_addr,
230321
vmin: x.vmin,
231322
len: x.len,
323+
update_parent: new_update_parent,
232324
}
233325
}
234326

@@ -238,7 +330,11 @@ unsafe fn alloc_pte_if_needed<Op: TableOps>(
238330
/// with any other operations that modify the page tables.
239331
#[allow(clippy::identity_op)]
240332
#[allow(clippy::precedence)]
241-
unsafe fn map_page<Op: TableOps>(op: &Op, mapping: &Mapping, r: MapResponse<Op::TableAddr>) {
333+
unsafe fn map_page<Op: TableOps, P: UpdateParent<Op>>(
334+
op: &Op,
335+
mapping: &Mapping,
336+
r: MapResponse<Op, P>,
337+
) {
242338
let pte = match &mapping.kind {
243339
MappingKind::BasicMapping(bm) =>
244340
// TODO: Support not readable
@@ -262,7 +358,7 @@ unsafe fn map_page<Op: TableOps>(op: &Op, mapping: &Mapping, r: MapResponse<Op::
262358
}
263359
};
264360
unsafe {
265-
op.write_entry(r.entry_ptr, pte);
361+
write_entry_updating(op, r.update_parent, r.entry_ptr, pte);
266362
}
267363
}
268364

@@ -283,17 +379,18 @@ unsafe fn map_page<Op: TableOps>(op: &Op, mapping: &Mapping, r: MapResponse<Op::
283379
/// 4. PT (20:12) - write final PTE with physical address and flags
284380
#[allow(clippy::missing_safety_doc)]
285381
pub unsafe fn map<Op: TableOps>(op: &Op, mapping: Mapping) {
286-
modify_ptes::<47, 39, Op>(MapRequest {
382+
modify_ptes::<47, 39, Op, _>(MapRequest {
287383
table_base: op.root_table(),
288384
vmin: mapping.virt_base,
289385
len: mapping.len,
386+
update_parent: UpdateParentCR3 {},
290387
})
291388
.map(|r| unsafe { alloc_pte_if_needed(op, r) })
292-
.flat_map(modify_ptes::<38, 30, Op>)
389+
.flat_map(modify_ptes::<38, 30, Op, _>)
293390
.map(|r| unsafe { alloc_pte_if_needed(op, r) })
294-
.flat_map(modify_ptes::<29, 21, Op>)
391+
.flat_map(modify_ptes::<29, 21, Op, _>)
295392
.map(|r| unsafe { alloc_pte_if_needed(op, r) })
296-
.flat_map(modify_ptes::<20, 12, Op>)
393+
.flat_map(modify_ptes::<20, 12, Op, _>)
297394
.map(|r| unsafe { map_page(op, &mapping, r) })
298395
.for_each(drop);
299396
}
@@ -302,14 +399,15 @@ pub unsafe fn map<Op: TableOps>(op: &Op, mapping: Mapping) {
302399
/// This function traverses page table data structures, and should not
303400
/// be called concurrently with any other operations that modify the
304401
/// page table.
305-
unsafe fn require_pte_exist<Op: TableOps>(
402+
unsafe fn require_pte_exist<Op: TableOps, P: UpdateParent<Op>>(
306403
op: &Op,
307-
x: MapResponse<Op::TableAddr>,
308-
) -> Option<MapRequest<Op::TableAddr>> {
404+
x: MapResponse<Op, P>,
405+
) -> Option<MapRequest<Op, UpdateParentTable<Op, P>>> {
309406
unsafe { read_pte_if_present(op, x.entry_ptr) }.map(|pte| MapRequest {
310407
table_base: Op::from_phys(pte & PTE_ADDR_MASK),
311408
vmin: x.vmin,
312409
len: x.len,
410+
update_parent: UpdateParentTable::new(x.update_parent, x.entry_ptr),
313411
})
314412
}
315413

@@ -322,20 +420,34 @@ unsafe fn require_pte_exist<Op: TableOps>(
322420
/// Returns `Some(pte)` containing the leaf page table entry if the address is mapped,
323421
/// or `None` if any level of the page table hierarchy has a non-present entry.
324422
#[allow(clippy::missing_safety_doc)]
325-
pub unsafe fn virt_to_phys<Op: TableOps>(op: &Op, address: u64) -> Option<u64> {
326-
modify_ptes::<47, 39, Op>(MapRequest {
423+
pub unsafe fn virt_to_phys<Op: TableOps>(
424+
op: &Op,
425+
address: u64,
426+
) -> Option<(PhysAddr, BasicMapping)> {
427+
modify_ptes::<47, 39, Op, _>(MapRequest {
327428
table_base: op.root_table(),
328429
vmin: address,
329430
len: 1,
431+
update_parent: UpdateParentNone {},
330432
})
331-
.filter_map(|r| unsafe { require_pte_exist::<Op>(op, r) })
332-
.flat_map(modify_ptes::<38, 30, Op>)
333-
.filter_map(|r| unsafe { require_pte_exist::<Op>(op, r) })
334-
.flat_map(modify_ptes::<29, 21, Op>)
335-
.filter_map(|r| unsafe { require_pte_exist::<Op>(op, r) })
336-
.flat_map(modify_ptes::<20, 12, Op>)
433+
.filter_map(|r| unsafe { require_pte_exist(op, r) })
434+
.flat_map(modify_ptes::<38, 30, Op, _>)
435+
.filter_map(|r| unsafe { require_pte_exist(op, r) })
436+
.flat_map(modify_ptes::<29, 21, Op, _>)
437+
.filter_map(|r| unsafe { require_pte_exist(op, r) })
438+
.flat_map(modify_ptes::<20, 12, Op, _>)
337439
.filter_map(|r| unsafe { read_pte_if_present(op, r.entry_ptr) })
338440
.next()
441+
.map(|r| {
442+
(
443+
r & PTE_ADDR_MASK,
444+
BasicMapping {
445+
readable: true,
446+
writable: (r & PAGE_RW) != 0,
447+
executable: (r & PAGE_NX) == 0,
448+
},
449+
)
450+
})
339451
}
340452

341453
pub const PAGE_SIZE: usize = 4096;
@@ -396,8 +508,9 @@ mod tests {
396508
self.tables.borrow()[addr.0][addr.1]
397509
}
398510

399-
unsafe fn write_entry(&self, addr: Self::TableAddr, entry: u64) {
511+
unsafe fn write_entry(&self, addr: Self::TableAddr, entry: u64) -> Option<Self::TableAddr> {
400512
self.tables.borrow_mut()[addr.0][addr.1] = entry;
513+
None
401514
}
402515

403516
fn to_phys(addr: Self::TableAddr) -> PhysAddr {
@@ -629,7 +742,7 @@ mod tests {
629742
let result = unsafe { virt_to_phys(&ops, 0x1000) };
630743
assert!(result.is_some(), "Should find mapped address");
631744
let pte = result.unwrap();
632-
assert_eq!(pte & PTE_ADDR_MASK, 0x1000);
745+
assert_eq!(pte.0, 0x1000);
633746
}
634747

635748
#[test]
@@ -674,9 +787,10 @@ mod tests {
674787
table_base: ops.root_table(),
675788
vmin: 0x1000,
676789
len: PAGE_SIZE as u64,
790+
update_parent: UpdateParentNone {},
677791
};
678792

679-
let responses: Vec<_> = modify_ptes::<20, 12, MockTableOps>(request).collect();
793+
let responses: Vec<_> = modify_ptes::<20, 12, MockTableOps, _>(request).collect();
680794
assert_eq!(responses.len(), 1, "Single page should yield one response");
681795
assert_eq!(responses[0].vmin, 0x1000);
682796
assert_eq!(responses[0].len, PAGE_SIZE as u64);
@@ -689,9 +803,10 @@ mod tests {
689803
table_base: ops.root_table(),
690804
vmin: 0x1000,
691805
len: 3 * PAGE_SIZE as u64,
806+
update_parent: UpdateParentNone {},
692807
};
693808

694-
let responses: Vec<_> = modify_ptes::<20, 12, MockTableOps>(request).collect();
809+
let responses: Vec<_> = modify_ptes::<20, 12, MockTableOps, _>(request).collect();
695810
assert_eq!(responses.len(), 3, "3 pages should yield 3 responses");
696811
}
697812

@@ -702,9 +817,10 @@ mod tests {
702817
table_base: ops.root_table(),
703818
vmin: 0x1000,
704819
len: 0,
820+
update_parent: UpdateParentNone {},
705821
};
706822

707-
let responses: Vec<_> = modify_ptes::<20, 12, MockTableOps>(request).collect();
823+
let responses: Vec<_> = modify_ptes::<20, 12, MockTableOps, _>(request).collect();
708824
assert_eq!(responses.len(), 0, "Zero length should yield no responses");
709825
}
710826

@@ -717,9 +833,10 @@ mod tests {
717833
table_base: ops.root_table(),
718834
vmin: 0x1800,
719835
len: 0x1000,
836+
update_parent: UpdateParentNone {},
720837
};
721838

722-
let responses: Vec<_> = modify_ptes::<20, 12, MockTableOps>(request).collect();
839+
let responses: Vec<_> = modify_ptes::<20, 12, MockTableOps, _>(request).collect();
723840
assert_eq!(
724841
responses.len(),
725842
2,

0 commit comments

Comments
 (0)