diff --git a/Cargo.lock b/Cargo.lock index 4acc328c..c0b47d22 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,25 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "aarch64" +version = "0.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0adf345d8b4e2861016511db094993ee8a9f74195f55ccf62d1305d35ab91bfa" +dependencies = [ + "aarch64-cpu", + "tock-registers", +] + +[[package]] +name = "aarch64-cpu" +version = "9.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac42a04a61c19fc8196dd728022a784baecc5d63d7e256c01ad1b3fbfab26287" +dependencies = [ + "tock-registers", +] + [[package]] name = "ahash" version = "0.7.8" @@ -1350,6 +1369,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" +[[package]] +name = "tock-registers" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "696941a0aee7e276a165a978b37918fd5d22c55c3d6bda197813070ca9c0f21c" + [[package]] name = "toml_datetime" version = "0.6.5" @@ -1416,6 +1441,8 @@ dependencies = [ name = "uhyve-interface" version = "0.1.0" dependencies = [ + "aarch64", + "log", "num_enum", "x86_64", ] diff --git a/Cargo.toml b/Cargo.toml index d30aa9cb..97d8a628 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -42,6 +42,7 @@ instrument = ["rftrace", "rftrace-frontend"] [dependencies] byte-unit = { version = "5", features = ["byte"] } clap = { version = "4.5", features = ["derive", "env"] } +nix = { version = "0.28", features = ["mman", "pthread", "signal"] } core_affinity = "0.8" either = "1.10" env_logger = "0.11" @@ -51,20 +52,18 @@ hermit-entry = { version = "0.9", features = ["loader"] } lazy_static = "1.4" libc = "0.2" log = "0.4" +mac_address = "1.1" thiserror = "1.0" time = "0.3" -uhyve-interface = { version = "0.1", path = "uhyve-interface" } - +tun-tap = { version = "0.1.3", default-features = false } +uhyve-interface = { version = "0.1", path = "uhyve-interface", features = ["std"] } +virtio-bindings = { version = "0.2", features = ["virtio-v4_14_0"] } rftrace = { version = "0.1", optional = true } rftrace-frontend = { version = "0.1", optional = true } [target.'cfg(target_os = "linux")'.dependencies] kvm-bindings = "0.7" kvm-ioctls = "0.16" -mac_address = "1.1" -nix = { version = "0.28", features = ["mman", "pthread", "signal"] } -tun-tap = { version = "0.1", default-features = false } -virtio-bindings = { version = "0.2", features = ["virtio-v4_14_0"] } vmm-sys-util = "0.12" [target.'cfg(target_os = "macos")'.dependencies] diff --git a/benches/vm/mod.rs b/benches/vm/mod.rs index 4fdad1d5..d7f2944c 100644 --- a/benches/vm/mod.rs +++ b/benches/vm/mod.rs @@ -1,6 +1,9 @@ use byte_unit::Byte; use criterion::{criterion_group, Criterion}; -use uhyvelib::{params::Params, vm::Vm, Uhyve}; +use uhyvelib::{ + params::Params, + vm::{UhyveVm, VcpuDefault}, +}; pub fn load_vm_hello_world(c: &mut Criterion) { let path = [env!("CARGO_MANIFEST_DIR"), "benches_data/hello_world"] @@ -10,12 +13,11 @@ pub fn load_vm_hello_world(c: &mut Criterion) { memory_size: Byte::from_u64(1024 * 4096 * 500).try_into().unwrap(), ..Default::default() }; - let mut vm = Uhyve::new(path, params).expect("Unable to create VM"); + + let mut vm = UhyveVm::::new(path, params).expect("Unable to create VM"); c.bench_function("vm::load_kernel(hello world)", |b| { - b.iter(|| unsafe { - vm.load_kernel().unwrap(); - }) + b.iter(|| vm.load_kernel().unwrap()) }); } diff --git a/src/arch/aarch64/mod.rs b/src/arch/aarch64/mod.rs index cd087dab..8c91caa6 100644 --- a/src/arch/aarch64/mod.rs +++ b/src/arch/aarch64/mod.rs @@ -1,6 +1,15 @@ +use std::mem::size_of; + use bitflags::bitflags; +use uhyve_interface::{GuestPhysAddr, GuestVirtAddr}; + +use crate::{ + consts::{BOOT_INFO_ADDR, BOOT_PGT}, + mem::MmapMemory, + paging::PagetableError, +}; -pub const RAM_START: u64 = 0x00; +pub const RAM_START: GuestPhysAddr = GuestPhysAddr::new(0x00); pub const PT_DEVICE: u64 = 0x707; pub const PT_PT: u64 = 0x713; @@ -19,6 +28,16 @@ pub const MT_DEVICE_GRE: u64 = 2; pub const MT_NORMAL_NC: u64 = 3; pub const MT_NORMAL: u64 = 4; +/// Number of Offset bits of a virtual address for a 4 KiB page, which are shifted away to get its Page Frame Number (PFN). +const PAGE_BITS: usize = 12; +const PAGE_SIZE: usize = 1 << PAGE_BITS; + +/// Number of bits of the index in each table (L0Table, L1Table, L2Table, L3Table). +const PAGE_MAP_BITS: usize = 9; + +/// A mask where PAGE_MAP_BITS are set to calculate a table index. +const PAGE_MAP_MASK: u64 = 0x1FF; + #[inline(always)] pub const fn mair(attr: u64, mt: u64) -> u64 { attr << (mt * 8) @@ -58,3 +77,142 @@ bitflags! { const D_BIT = 0x00000200; } } + +/// An entry in a L0 page table (coarses). Adapted from hermit-os/kernel. +#[derive(Clone, Copy, Debug)] +struct PageTableEntry { + /// Physical memory address this entry refers, combined with flags from PageTableEntryFlags. + physical_address_and_flags: GuestPhysAddr, +} + +impl PageTableEntry { + /// Return the stored physical address. + pub fn address(&self) -> GuestPhysAddr { + // For other granules than 4KiB or hugepages we should check the DESCRIPTOR_TYPE bit and modify the address translation accordingly. + GuestPhysAddr( + self.physical_address_and_flags.as_u64() & !(PAGE_SIZE as u64 - 1) & !(u64::MAX << 48), + ) + } +} +impl From for PageTableEntry { + fn from(i: u64) -> Self { + Self { + physical_address_and_flags: GuestPhysAddr::new(i), + } + } +} + +/// Returns whether the given virtual address is a valid one in the AArch64 memory model. +/// +/// Current AArch64 supports only 48-bit for virtual memory addresses. +/// The upper bits must always be 0 or 1 and indicate whether TBBR0 or TBBR1 contains the +/// base address. So always enforce 0 here. +fn is_valid_address(virtual_address: GuestVirtAddr) -> bool { + virtual_address < GuestVirtAddr(0x1_0000_0000_0000) +} + +/// Converts a virtual address in the guest to a physical address in the guest +pub fn virt_to_phys( + addr: GuestVirtAddr, + mem: &MmapMemory, + pagetable_l0: GuestPhysAddr, +) -> Result { + if !is_valid_address(addr) { + return Err(PagetableError::InvalidAddress); + } + + // Assumptions: + // - We use 4KiB granule + // - We use maximum VA length + // => We have 4 level paging + + // Safety: + // - We are only working in the vm's memory + // - the memory location of the pagetable is not altered by hermit. + // - Our indices can't be larger than 512, so we stay in the borders of the page. + // - We are page_aligned, and thus also PageTableEntry aligned. + let mut pagetable: &[PageTableEntry] = + unsafe { std::mem::transmute(mem.slice_at(pagetable_l0, PAGE_SIZE).unwrap()) }; + // TODO: Depending on the virtual address length and granule (defined in TCR register by TG and TxSZ), we could reduce the number of pagetable walks. Hermit doesn't do this at the moment. + for level in 0..3 { + let table_index = + (addr.as_u64() >> PAGE_BITS >> ((3 - level) * PAGE_MAP_BITS) & PAGE_MAP_MASK) as usize; + let pte = PageTableEntry::from(pagetable[table_index]); + // TODO: We could stop here if we have a "Block Entry" (ARM equivalent to huge page). Currently not supported. + + pagetable = unsafe { std::mem::transmute(mem.slice_at(pte.address(), PAGE_SIZE).unwrap()) }; + } + let table_index = (addr.as_u64() >> PAGE_BITS & PAGE_MAP_MASK) as usize; + let pte = PageTableEntry::from(pagetable[table_index]); + + Ok(pte.address()) +} + +pub fn init_guest_mem(mem: &mut [u8]) { + let mem_addr = std::ptr::addr_of_mut!(mem[0]); + + assert!(mem.len() >= BOOT_PGT.as_u64() as usize + 512 * size_of::()); + let pgt_slice = unsafe { + std::slice::from_raw_parts_mut(mem_addr.offset(BOOT_PGT.as_u64() as isize) as *mut u64, 512) + }; + pgt_slice.fill(0); + pgt_slice[0] = BOOT_PGT.as_u64() + 0x1000 + PT_PT; + pgt_slice[511] = BOOT_PGT.as_u64() + PT_PT + PT_SELF; + + assert!(mem.len() >= BOOT_PGT.as_u64() as usize + 0x1000 + 512 * size_of::()); + let pgt_slice = unsafe { + std::slice::from_raw_parts_mut( + mem_addr.offset(BOOT_PGT.as_u64() as isize + 0x1000) as *mut u64, + 512, + ) + }; + pgt_slice.fill(0); + pgt_slice[0] = BOOT_PGT.as_u64() + 0x2000 + PT_PT; + + assert!(mem.len() >= BOOT_PGT.as_u64() as usize + 0x2000 + 512 * size_of::()); + let pgt_slice = unsafe { + std::slice::from_raw_parts_mut( + mem_addr.offset(BOOT_PGT.as_u64() as isize + 0x2000) as *mut u64, + 512, + ) + }; + pgt_slice.fill(0); + pgt_slice[0] = BOOT_PGT.as_u64() + 0x3000 + PT_PT; + pgt_slice[1] = BOOT_PGT.as_u64() + 0x4000 + PT_PT; + pgt_slice[2] = BOOT_PGT.as_u64() + 0x5000 + PT_PT; + + assert!(mem.len() >= BOOT_PGT.as_u64() as usize + 0x3000 + 512 * size_of::()); + let pgt_slice = unsafe { + std::slice::from_raw_parts_mut( + mem_addr.offset(BOOT_PGT.as_u64() as isize + 0x3000) as *mut u64, + 512, + ) + }; + pgt_slice.fill(0); + // map uhyve ports into the virtual address space + pgt_slice[0] = PT_MEM_CD; + // map BootInfo into the virtual address space + pgt_slice[BOOT_INFO_ADDR.as_u64() as usize / PAGE_SIZE] = BOOT_INFO_ADDR.as_u64() + PT_MEM; + + assert!(mem.len() >= BOOT_PGT.as_u64() as usize + 0x4000 + 512 * size_of::()); + let pgt_slice = unsafe { + std::slice::from_raw_parts_mut( + mem_addr.offset(BOOT_PGT.as_u64() as isize + 0x4000) as *mut u64, + 512, + ) + }; + for (idx, i) in pgt_slice.iter_mut().enumerate() { + *i = 0x200000u64 + (idx * PAGE_SIZE) as u64 + PT_MEM; + } + + assert!(mem.len() >= BOOT_PGT.as_u64() as usize + 0x5000 + 512 * size_of::()); + let pgt_slice = unsafe { + std::slice::from_raw_parts_mut( + mem_addr.offset(BOOT_PGT.as_u64() as isize + 0x5000) as *mut u64, + 512, + ) + }; + for (idx, i) in pgt_slice.iter_mut().enumerate() { + *i = 0x400000u64 + (idx * PAGE_SIZE) as u64 + PT_MEM; + } +} diff --git a/src/arch/x86_64/mod.rs b/src/arch/x86_64/mod.rs index 7db3388c..4944d577 100644 --- a/src/arch/x86_64/mod.rs +++ b/src/arch/x86_64/mod.rs @@ -9,8 +9,18 @@ use std::{ use log::{debug, warn}; use raw_cpuid::{CpuId, CpuIdReaderNative}; use thiserror::Error; +use uhyve_interface::{GuestPhysAddr, GuestVirtAddr}; +use x86_64::{ + structures::paging::{ + page_table::{FrameError, PageTableEntry}, + Page, PageTable, PageTableFlags, PageTableIndex, Size2MiB, + }, + PhysAddr, +}; + +use crate::{consts::*, mem::MmapMemory, paging::PagetableError}; -pub const RAM_START: u64 = 0x00; +pub const RAM_START: GuestPhysAddr = GuestPhysAddr::new(0x00); const MHZ_TO_HZ: u64 = 1000000; const KHZ_TO_HZ: u64 = 1000; @@ -101,7 +111,135 @@ pub fn get_cpu_frequency_from_os() -> std::result::Result u64 { + ((base & 0xff000000u64) << (56 - 24)) + | ((flags & 0x0000f0ffu64) << 40) + | ((limit & 0x000f0000u64) << (48 - 16)) + | ((base & 0x00ffffffu64) << 16) + | (limit & 0x0000ffffu64) +} + +pub const MIN_PHYSMEM_SIZE: usize = BOOT_PDE.as_u64() as usize + 0x1000; + +/// Creates the pagetables and the GDT in the guest memory space. +/// +/// The memory slice must be larger than [`MIN_PHYSMEM_SIZE`]. +/// Also, the memory `mem` needs to be zeroed for [`PAGE_SIZE`] bytes at the +/// offsets [`BOOT_PML4`] and [`BOOT_PDPTE`], otherwise the integrity of the +/// pagetables and thus the integrity of the guest's memory is not ensured +pub fn initialize_pagetables(mem: &mut [u8]) { + assert!(mem.len() >= MIN_PHYSMEM_SIZE); + let mem_addr = std::ptr::addr_of_mut!(mem[0]); + + let (gdt_entry, pml4, pdpte, pde); + // Safety: + // We only operate in `mem`, which is plain bytes and we have ownership of + // these and it is asserted to be large enough. + unsafe { + gdt_entry = mem_addr + .add(BOOT_GDT.as_u64() as usize) + .cast::<[u64; 3]>() + .as_mut() + .unwrap(); + + pml4 = mem_addr + .add(BOOT_PML4.as_u64() as usize) + .cast::() + .as_mut() + .unwrap(); + pdpte = mem_addr + .add(BOOT_PDPTE.as_u64() as usize) + .cast::() + .as_mut() + .unwrap(); + pde = mem_addr + .add(BOOT_PDE.as_u64() as usize) + .cast::() + .as_mut() + .unwrap(); + + /* For simplicity we currently use 2MB pages and only a single + PML4/PDPTE/PDE. */ + + // per default is the memory zeroed, which we allocate by the system + // call mmap, so the following is not necessary: + /*libc::memset(pml4 as *mut _ as *mut libc::c_void, 0x00, PAGE_SIZE); + libc::memset(pdpte as *mut _ as *mut libc::c_void, 0x00, PAGE_SIZE); + libc::memset(pde as *mut _ as *mut libc::c_void, 0x00, PAGE_SIZE);*/ + } + // initialize GDT + gdt_entry[BOOT_GDT_NULL] = 0; + gdt_entry[BOOT_GDT_CODE] = create_gdt_entry(0xA09B, 0, 0xFFFFF); + gdt_entry[BOOT_GDT_DATA] = create_gdt_entry(0xC093, 0, 0xFFFFF); + + pml4[0].set_addr( + BOOT_PDPTE, + PageTableFlags::PRESENT | PageTableFlags::WRITABLE, + ); + pml4[511].set_addr( + BOOT_PML4, + PageTableFlags::PRESENT | PageTableFlags::WRITABLE, + ); + pdpte[0].set_addr(BOOT_PDE, PageTableFlags::PRESENT | PageTableFlags::WRITABLE); + + for i in 0..512 { + let addr = PhysAddr::new(i as u64 * Page::::SIZE); + pde[i].set_addr( + addr, + PageTableFlags::PRESENT | PageTableFlags::WRITABLE | PageTableFlags::HUGE_PAGE, + ); + } +} + +/// Converts a virtual address in the guest to a physical address in the guest +pub fn virt_to_phys( + addr: GuestVirtAddr, + mem: &MmapMemory, + pagetable_l0: GuestPhysAddr, +) -> Result { + /// Number of Offset bits of a virtual address for a 4 KiB page, which are shifted away to get its Page Frame Number (PFN). + pub const PAGE_BITS: u64 = 12; + + /// Number of bits of the index in each table (PML4, PDPT, PDT, PGT). + pub const PAGE_MAP_BITS: usize = 9; + + let mut page_table = + unsafe { (mem.host_address(pagetable_l0).unwrap() as *mut PageTable).as_mut() }.unwrap(); + let mut page_bits = 39; + let mut entry = PageTableEntry::new(); + + for _i in 0..4 { + let index = + PageTableIndex::new(((addr.as_u64() >> page_bits) & ((1 << PAGE_MAP_BITS) - 1)) as u16); + entry = page_table[index].clone(); + + match entry.frame() { + Ok(frame) => { + page_table = unsafe { + (mem.host_address(frame.start_address()).unwrap() as *mut PageTable).as_mut() + } + .unwrap(); + page_bits -= PAGE_MAP_BITS; + } + Err(FrameError::FrameNotPresent) => return Err(PagetableError::InvalidAddress), + Err(FrameError::HugeFrame) => { + return Ok(entry.addr() + (addr.as_u64() & !((!0_u64) << page_bits))); + } + } + } + + Ok(entry.addr() + (addr.as_u64() & !((!0u64) << PAGE_BITS))) +} + +pub fn init_guest_mem(mem: &mut [u8]) { + // TODO: we should maybe return an error on failure (e.g., the memory is too small) + initialize_pagetables(mem); +} + +#[cfg(test)] mod tests { + use super::*; // test is derived from // https://github.com/gz/rust-cpuid/blob/master/examples/tsc_frequency.rs #[test] @@ -180,13 +318,87 @@ mod tests { assert!(freq > 0); assert!(freq < 10000); //More than 10Ghz is probably wrong } -} -// Constructor for a conventional segment GDT (or LDT) entry -pub fn create_gdt_entry(flags: u64, base: u64, limit: u64) -> u64 { - ((base & 0xff000000u64) << (56 - 24)) - | ((flags & 0x0000f0ffu64) << 40) - | ((limit & 0x000f0000u64) << (48 - 16)) - | ((base & 0x00ffffffu64) << 16) - | (limit & 0x0000ffffu64) + #[test] + fn test_pagetable_initialization() { + let mut mem: Vec = vec![0; MIN_PHYSMEM_SIZE]; + initialize_pagetables((&mut mem[0..MIN_PHYSMEM_SIZE]).try_into().unwrap()); + + // Test pagetable setup + let addr_pdpte = u64::from_le_bytes( + mem[(BOOT_PML4.as_u64() as usize)..(BOOT_PML4.as_u64() as usize + 8)] + .try_into() + .unwrap(), + ); + assert_eq!( + addr_pdpte, + BOOT_PDPTE.as_u64() | (PageTableFlags::PRESENT | PageTableFlags::WRITABLE).bits() + ); + let addr_pde = u64::from_le_bytes( + mem[(BOOT_PDPTE.as_u64() as usize)..(BOOT_PDPTE.as_u64() as usize + 8)] + .try_into() + .unwrap(), + ); + assert_eq!( + addr_pde, + BOOT_PDE.as_u64() | (PageTableFlags::PRESENT | PageTableFlags::WRITABLE).bits() + ); + + for i in (0..4096).step_by(8) { + let addr = BOOT_PDE.as_u64() as usize + i; + let entry = u64::from_le_bytes(mem[addr..(addr + 8)].try_into().unwrap()); + assert!( + PageTableFlags::from_bits_truncate(entry) + .difference( + PageTableFlags::PRESENT + | PageTableFlags::WRITABLE | PageTableFlags::HUGE_PAGE + ) + .is_empty(), + "Pagetable bits at {addr:#x} are incorrect" + ) + } + + // Test GDT + let gdt_results = [0x0, 0xAF9B000000FFFF, 0xCF93000000FFFF]; + for (i, res) in gdt_results.iter().enumerate() { + let gdt_addr = BOOT_GDT.as_u64() as usize + i * 8; + let gdt_entry = u64::from_le_bytes(mem[gdt_addr..gdt_addr + 8].try_into().unwrap()); + assert_eq!(*res, gdt_entry); + } + } + + #[test] + fn test_virt_to_phys() { + let mem = MmapMemory::new(0, MIN_PHYSMEM_SIZE * 2, GuestPhysAddr::new(0), true, true); + initialize_pagetables(unsafe { mem.as_slice_mut() }.try_into().unwrap()); + + // Get the address of the first entry in PML4 (the address of the PML4 itself) + let virt_addr = GuestVirtAddr::new(0xFFFFFFFFFFFFF000); + let p_addr = virt_to_phys(virt_addr, &mem, BOOT_PML4).unwrap(); + assert_eq!(p_addr, BOOT_PML4); + + // The last entry on the PML4 is the address of the PML4 with flags + let virt_addr = GuestVirtAddr::new(0xFFFFFFFFFFFFF000 | (4096 - 8)); + let p_addr = virt_to_phys(virt_addr, &mem, BOOT_PML4).unwrap(); + assert_eq!( + mem.read::(p_addr).unwrap(), + BOOT_PML4.as_u64() | (PageTableFlags::PRESENT | PageTableFlags::WRITABLE).bits() + ); + + // the first entry on the 3rd level entry in the pagetables is the address of the boot pdpte + let virt_addr = GuestVirtAddr::new(0xFFFFFFFFFFE00000); + let p_addr = virt_to_phys(virt_addr, &mem, BOOT_PML4).unwrap(); + assert_eq!(p_addr, BOOT_PDPTE); + + // the first entry on the 2rd level entry in the pagetables is the address of the boot pde + let virt_addr = GuestVirtAddr::new(0xFFFFFFFFC0000000); + let p_addr = virt_to_phys(virt_addr, &mem, BOOT_PML4).unwrap(); + assert_eq!(p_addr, BOOT_PDE); + // That address points to a huge page + assert!( + PageTableFlags::from_bits_truncate(mem.read::(p_addr).unwrap()).contains( + PageTableFlags::HUGE_PAGE | PageTableFlags::PRESENT | PageTableFlags::WRITABLE + ) + ); + } } diff --git a/src/bin/uhyve.rs b/src/bin/uhyve.rs index 5122c4fd..82c5e438 100644 --- a/src/bin/uhyve.rs +++ b/src/bin/uhyve.rs @@ -11,7 +11,7 @@ use either::Either; use thiserror::Error; use uhyvelib::{ params::{CpuCount, GuestMemorySize, Params}, - Uhyve, + vm::UhyveVm, }; #[cfg(feature = "instrument")] @@ -262,6 +262,8 @@ impl From for Params { pit, #[cfg(target_os = "linux")] gdb_port, + #[cfg(target_os = "macos")] + gdb_port: None, kernel_args, } } @@ -279,9 +281,10 @@ fn run_uhyve() -> i32 { let affinity = args.cpu_args.clone().get_affinity(&mut app); let params = Params::from(args); - Uhyve::new(kernel, params) - .expect("Unable to create VM! Is the hypervisor interface (e.g. KVM) activated?") - .run(affinity) + let vm = UhyveVm::new(kernel, params) + .expect("Unable to create VM! Is the hypervisor interface (e.g. KVM) activated?"); + + vm.run(affinity) } fn main() { diff --git a/src/consts.rs b/src/consts.rs index c36aed25..b33727f3 100644 --- a/src/consts.rs +++ b/src/consts.rs @@ -1,17 +1,19 @@ +use uhyve_interface::GuestPhysAddr; + pub const PAGE_SIZE: usize = 0x1000; pub const GDT_KERNEL_CODE: u16 = 1; pub const GDT_KERNEL_DATA: u16 = 2; pub const APIC_DEFAULT_BASE: u64 = 0xfee00000; -pub const BOOT_GDT: u64 = 0x1000; -pub const BOOT_GDT_NULL: u64 = 0; -pub const BOOT_GDT_CODE: u64 = 1; -pub const BOOT_GDT_DATA: u64 = 2; -pub const BOOT_GDT_MAX: u64 = 3; -pub const BOOT_PML4: u64 = 0x10000; -pub const BOOT_PGT: u64 = BOOT_PML4; -pub const BOOT_PDPTE: u64 = 0x11000; -pub const BOOT_PDE: u64 = 0x12000; -pub const BOOT_INFO_ADDR: u64 = 0x9000; +pub const BOOT_GDT: GuestPhysAddr = GuestPhysAddr::new(0x1000); +pub const BOOT_GDT_NULL: usize = 0; +pub const BOOT_GDT_CODE: usize = 1; +pub const BOOT_GDT_DATA: usize = 2; +pub const BOOT_GDT_MAX: usize = 3; +pub const BOOT_PML4: GuestPhysAddr = GuestPhysAddr::new(0x10000); +pub const BOOT_PGT: GuestPhysAddr = BOOT_PML4; +pub const BOOT_PDPTE: GuestPhysAddr = GuestPhysAddr::new(0x11000); +pub const BOOT_PDE: GuestPhysAddr = GuestPhysAddr::new(0x12000); +pub const BOOT_INFO_ADDR: GuestPhysAddr = GuestPhysAddr::new(0x9000); pub const EFER_SCE: u64 = 1; /* System Call Extensions */ pub const EFER_LME: u64 = 1 << 8; /* Long mode enable */ pub const EFER_LMA: u64 = 1 << 10; /* Long mode active (read-only) */ diff --git a/src/hypercall.rs b/src/hypercall.rs new file mode 100644 index 00000000..0205419c --- /dev/null +++ b/src/hypercall.rs @@ -0,0 +1,218 @@ +use std::{ + ffi::{OsStr, OsString}, + io::{self, Error, ErrorKind, Write}, + os::unix::ffi::OsStrExt, +}; + +use uhyve_interface::{parameters::*, GuestPhysAddr, Hypercall, HypercallAddress, MAX_ARGC_ENVC}; + +use crate::{ + consts::BOOT_PML4, + mem::{MemoryError, MmapMemory}, + virt_to_phys, +}; + +/// `addr` is the address of the hypercall parameter in the guest's memory space. `data` is the +/// parameter that was send to that address by the guest. +/// +/// # Safety +/// +/// - The return value is only valid, as long as the guest is halted. +/// - This fn must not be called multiple times on the same data, to avoid creating mutable aliasing. +pub unsafe fn address_to_hypercall( + mem: &MmapMemory, + addr: u16, + data: GuestPhysAddr, +) -> Option> { + if let Ok(hypercall_port) = HypercallAddress::try_from(addr) { + Some(match hypercall_port { + HypercallAddress::FileClose => { + let sysclose = mem.get_ref_mut::(data).unwrap(); + // let sysclose = unsafe { &mut *(self.host_address(data) as *mut CloseParams) }; + Hypercall::FileClose(sysclose) + } + HypercallAddress::FileLseek => { + let syslseek = mem.get_ref_mut::(data).unwrap(); + Hypercall::FileLseek(syslseek) + } + HypercallAddress::FileOpen => { + let sysopen = mem.get_ref_mut::(data).unwrap(); + Hypercall::FileOpen(sysopen) + } + HypercallAddress::FileRead => { + let sysread = mem.get_ref_mut::(data).unwrap(); + Hypercall::FileRead(sysread) + } + HypercallAddress::FileWrite => { + let syswrite = mem.get_ref_mut(data).unwrap(); + Hypercall::FileWrite(syswrite) + } + HypercallAddress::FileUnlink => { + let sysunlink = mem.get_ref_mut(data).unwrap(); + Hypercall::FileUnlink(sysunlink) + } + HypercallAddress::Exit => { + let sysexit = mem.get_ref_mut(data).unwrap(); + Hypercall::Exit(sysexit) + } + HypercallAddress::Cmdsize => { + let syssize = mem.get_ref_mut(data).unwrap(); + Hypercall::Cmdsize(syssize) + } + HypercallAddress::Cmdval => { + let syscmdval = mem.get_ref_mut(data).unwrap(); + Hypercall::Cmdval(syscmdval) + } + HypercallAddress::Uart => Hypercall::SerialWriteByte(data.as_u64() as u8), + _ => unimplemented!(), + }) + } else { + None + } +} + +/// unlink deletes a name from the filesystem. This is used to handle `unlink` syscalls from the guest. +/// TODO: UNSAFE AS *%@#. It has to be checked that the VM is allowed to unlink that file! +pub fn unlink(mem: &MmapMemory, sysunlink: &mut UnlinkParams) { + unsafe { + sysunlink.ret = libc::unlink(mem.host_address(sysunlink.name).unwrap() as *const i8); + } +} + +/// Handles an open syscall by opening a file on the host. +pub fn open(mem: &MmapMemory, sysopen: &mut OpenParams) { + unsafe { + sysopen.ret = libc::open( + mem.host_address(sysopen.name).unwrap() as *const i8, + sysopen.flags, + sysopen.mode, + ); + } +} + +/// Handles an close syscall by closing the file on the host. +pub fn close(sysclose: &mut CloseParams) { + unsafe { + sysclose.ret = libc::close(sysclose.fd); + } +} + +/// Handles an read syscall on the host. +pub fn read(mem: &MmapMemory, sysread: &mut ReadPrams) { + unsafe { + let bytes_read = libc::read( + sysread.fd, + mem.host_address(virt_to_phys(sysread.buf, mem, BOOT_PML4).unwrap()) + .unwrap() as *mut libc::c_void, + sysread.len, + ); + if bytes_read >= 0 { + sysread.ret = bytes_read; + } else { + sysread.ret = -1; + } + } +} + +/// Handles an write syscall on the host. +pub fn write(mem: &MmapMemory, syswrite: &WriteParams) -> io::Result<()> { + let mut bytes_written: usize = 0; + while bytes_written != syswrite.len { + unsafe { + let step = libc::write( + syswrite.fd, + mem.host_address( + virt_to_phys(syswrite.buf + bytes_written as u64, mem, BOOT_PML4).unwrap(), + ) + .map_err(|e| match e { + MemoryError::BoundsViolation => { + unreachable!("Bounds violation after host_address function") + } + MemoryError::WrongMemoryError => { + Error::new(ErrorKind::AddrNotAvailable, e.to_string()) + } + })? as *const libc::c_void, + syswrite.len - bytes_written, + ); + if step >= 0 { + bytes_written += step as usize; + } else { + return Err(io::Error::last_os_error()); + } + } + } + + Ok(()) +} + +/// Handles an write syscall on the host. +pub fn lseek(syslseek: &mut LseekParams) { + unsafe { + syslseek.offset = + libc::lseek(syslseek.fd, syslseek.offset as i64, syslseek.whence) as isize; + } +} + +/// Handles an UART syscall by writing to stdout. +pub fn uart(buf: &[u8]) -> io::Result<()> { + io::stdout().write_all(buf) +} + +/// Copies the arguments of the application into the VM's memory to the destinations specified in `syscmdval`. +pub fn copy_argv(path: &OsStr, argv: &[OsString], syscmdval: &CmdvalParams, mem: &MmapMemory) { + // copy kernel path as first argument + let argvp = mem + .host_address(syscmdval.argv) + .expect("Systemcall parameters for Cmdval are invalid") as *const GuestPhysAddr; + let arg_addrs = unsafe { std::slice::from_raw_parts(argvp, argv.len() + 1) }; + + { + let len = path.len(); + // Safety: we drop path_dest before anything else is done with mem + let path_dest = unsafe { + mem.slice_at_mut(arg_addrs[0], len + 1) + .expect("Systemcall parameters for Cmdval are invalid") + }; + + path_dest[0..len].copy_from_slice(path.as_bytes()); + path_dest[len] = 0; // argv strings are zero terminated + } + + // Copy the application arguments into the vm memory + for (counter, argument) in argv.iter().enumerate() { + let len = argument.as_bytes().len(); + let arg_dest = unsafe { + mem.slice_at_mut(arg_addrs[counter], len + 1) + .expect("Systemcall parameters for Cmdval are invalid") + }; + arg_dest[0..len].copy_from_slice(argument.as_bytes()); + arg_dest[len] = 0; + } +} + +/// Copies the environment variables into the VM's memory to the destinations specified in `syscmdval`. +pub fn copy_env(syscmdval: &CmdvalParams, mem: &MmapMemory) { + let env_len = std::env::vars_os().count(); + let envp = mem + .host_address(syscmdval.envp) + .expect("Systemcall parameters for Cmdval are invalid") as *const GuestPhysAddr; + let env_addrs = unsafe { std::slice::from_raw_parts(envp, env_len) }; + + // Copy the environment variables into the vm memory + for (counter, (key, value)) in std::env::vars_os().enumerate() { + if counter >= MAX_ARGC_ENVC.try_into().unwrap() { + warn!("Environment is larger than the maximum that can be copied to the VM. Remaining environment is ignored"); + break; + } + + let len = key.len() + value.len() + 1; + let env_dest = unsafe { + mem.slice_at_mut(env_addrs[counter], len + 1) + .expect("Systemcall parameters for Cmdval are invalid") + }; + env_dest[0..key.len()].copy_from_slice(key.as_bytes()); + env_dest[key.len()] = b'='; + env_dest[key.len() + 1..len].copy_from_slice(value.as_bytes()); + env_dest[len] = 0; + } +} diff --git a/src/lib.rs b/src/lib.rs index 32efc03d..7c2f2177 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -19,10 +19,17 @@ pub use linux as os; pub mod macos; #[cfg(target_os = "macos")] pub use macos as os; +mod hypercall; +pub mod mem; +pub mod paging; pub mod params; #[cfg(target_os = "linux")] pub mod shared_queue; +mod vcpu; +pub mod virtio; +pub mod virtqueue; pub mod vm; pub use arch::*; -pub use os::uhyve::Uhyve; +pub use os::HypervisorError; +pub type HypervisorResult = Result; diff --git a/src/linux/gdb/breakpoints.rs b/src/linux/gdb/breakpoints.rs index 3644d3b1..454f9cdf 100644 --- a/src/linux/gdb/breakpoints.rs +++ b/src/linux/gdb/breakpoints.rs @@ -1,10 +1,13 @@ use std::collections::{hash_map::Entry, HashMap}; use gdbstub::target::{self, ext::breakpoints::WatchKind, TargetResult}; +use uhyve_interface::GuestVirtAddr; use super::GdbUhyve; -use crate::arch::x86_64::registers; - +use crate::{ + arch::x86_64::{registers, virt_to_phys}, + consts::BOOT_PML4, +}; #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] pub struct SwBreakpoint { addr: u64, @@ -49,7 +52,15 @@ impl target::ext::breakpoints::SwBreakpoint for GdbUhyve { let sw_breakpoint = SwBreakpoint::new(addr, kind); if let Entry::Vacant(entry) = self.sw_breakpoints.entry(sw_breakpoint) { - let instructions = unsafe { self.vcpu.memory(addr, kind) }; + // Safety: mem is not altered during the lifetime of `instructions` + let instructions = unsafe { + self.vm.mem.slice_at_mut( + virt_to_phys(GuestVirtAddr::new(addr), &self.vm.mem, BOOT_PML4) + .map_err(|_err| ())?, + kind, + ) + } + .unwrap(); entry.insert(instructions.into()); instructions.fill(SwBreakpoint::OPCODE); Ok(true) @@ -62,7 +73,15 @@ impl target::ext::breakpoints::SwBreakpoint for GdbUhyve { let sw_breakpoint = SwBreakpoint::new(addr, kind); if let Entry::Occupied(entry) = self.sw_breakpoints.entry(sw_breakpoint) { - let instructions = unsafe { self.vcpu.memory(addr, kind) }; + // Safety: mem is not altered during the lifetime of `instructions` + let instructions = unsafe { + self.vm.mem.slice_at_mut( + virt_to_phys(GuestVirtAddr::new(addr), &self.vm.mem, BOOT_PML4) + .map_err(|_err| ())?, + kind, + ) + } + .unwrap(); instructions.copy_from_slice(&entry.remove()); Ok(true) } else { diff --git a/src/linux/gdb/mod.rs b/src/linux/gdb/mod.rs index 4560ceb0..5dd525f0 100644 --- a/src/linux/gdb/mod.rs +++ b/src/linux/gdb/mod.rs @@ -2,7 +2,13 @@ mod breakpoints; mod regs; mod section_offsets; -use std::{io::Read, net::TcpStream, sync::Once, thread, time::Duration}; +use std::{ + io::Read, + net::TcpStream, + sync::{Arc, Once}, + thread, + time::Duration, +}; use gdbstub::{ common::Signal, @@ -17,26 +23,28 @@ use kvm_bindings::{ }; use libc::EINVAL; use nix::sys::pthread::pthread_self; +use uhyve_interface::GuestVirtAddr; use x86_64::registers::debug::Dr6Flags; use self::breakpoints::SwBreakpoints; use super::HypervisorError; use crate::{ - arch::x86_64::registers::debug::HwBreakpoints, - linux::{vcpu::UhyveCPU, KickSignal}, - vm::{VcpuStopReason, VirtualCPU}, - Uhyve, + arch::x86_64::{registers::debug::HwBreakpoints, virt_to_phys}, + consts::BOOT_PML4, + linux::{x86_64::kvm_cpu::KvmCpu, KickSignal}, + vcpu::{VcpuStopReason, VirtualCPU}, + vm::UhyveVm, }; pub struct GdbUhyve { - vm: Uhyve, - vcpu: UhyveCPU, + vm: Arc>, + vcpu: KvmCpu, hw_breakpoints: HwBreakpoints, sw_breakpoints: SwBreakpoints, } impl GdbUhyve { - pub fn new(vm: Uhyve, vcpu: UhyveCPU) -> Self { + pub fn new(vm: Arc>, vcpu: KvmCpu) -> Self { Self { vm, vcpu, @@ -119,13 +127,30 @@ impl SingleThreadBase for GdbUhyve { } fn read_addrs(&mut self, start_addr: u64, data: &mut [u8]) -> TargetResult { - let src = unsafe { self.vcpu.memory(start_addr, data.len()) }; + let guest_addr = GuestVirtAddr::try_new(start_addr).map_err(|_e| TargetError::NonFatal)?; + // Safety: mem is copied to data before mem can be modified. + let src = unsafe { + self.vm.mem.slice_at( + virt_to_phys(guest_addr, &self.vm.mem, BOOT_PML4).map_err(|_err| ())?, + data.len(), + ) + } + .unwrap(); data.copy_from_slice(src); Ok(data.len()) } fn write_addrs(&mut self, start_addr: u64, data: &[u8]) -> TargetResult<(), Self> { - let mem = unsafe { self.vcpu.memory(start_addr, data.len()) }; + // Safety: self.vm.mem is not altered during the lifetime of mem. + let mem = unsafe { + self.vm.mem.slice_at_mut( + virt_to_phys(GuestVirtAddr::new(start_addr), &self.vm.mem, BOOT_PML4) + .map_err(|_err| ())?, + data.len(), + ) + } + .unwrap(); + mem.copy_from_slice(data); Ok(()) } diff --git a/src/linux/gdb/section_offsets.rs b/src/linux/gdb/section_offsets.rs index 20a6ed82..9bf88b9f 100644 --- a/src/linux/gdb/section_offsets.rs +++ b/src/linux/gdb/section_offsets.rs @@ -1,10 +1,6 @@ -use gdbstub::target::{ - ext::section_offsets::Offsets, - {self}, -}; +use gdbstub::target::{self, ext::section_offsets::Offsets}; use super::GdbUhyve; -use crate::vm::Vm; impl target::ext::section_offsets::SectionOffsets for GdbUhyve { fn get_section_offsets(&mut self) -> Result, Self::Error> { diff --git a/src/linux/mod.rs b/src/linux/mod.rs index 6af6e386..211da3da 100755 --- a/src/linux/mod.rs +++ b/src/linux/mod.rs @@ -1,14 +1,13 @@ +#[cfg(target_arch = "x86_64")] +pub mod x86_64; + pub mod gdb; -pub mod uhyve; -pub mod vcpu; -pub mod virtio; -pub mod virtqueue; pub type HypervisorError = kvm_ioctls::Error; pub type DebugExitInfo = kvm_bindings::kvm_debug_exit_arch; use std::{ - io, mem, + io, net::{TcpListener, TcpStream}, os::unix::prelude::JoinHandleExt, sync::{Arc, Barrier}, @@ -26,9 +25,12 @@ use nix::sys::{ }; use crate::{ - linux::gdb::{GdbUhyve, UhyveGdbEventLoop}, - vm::{VirtualCPU, Vm}, - Uhyve, + linux::{ + gdb::{GdbUhyve, UhyveGdbEventLoop}, + x86_64::kvm_cpu::KvmCpu, + }, + vcpu::VirtualCPU, + vm::UhyveVm, }; lazy_static! { @@ -48,7 +50,7 @@ impl KickSignal { assert!(kick_signal <= SIGRTMAX()); // TODO: Remove the transmute once realtime signals are properly supported by nix // https://github.com/nix-rust/nix/issues/495 - unsafe { mem::transmute(kick_signal) } + unsafe { std::mem::transmute(kick_signal) } } fn register_handler() -> nix::Result<()> { @@ -68,16 +70,14 @@ impl KickSignal { } } -impl Uhyve { +impl UhyveVm { /// Runs the VM. /// /// Blocks until the VM has finished execution. pub fn run(mut self, cpu_affinity: Option>) -> i32 { KickSignal::register_handler().unwrap(); - unsafe { - self.load_kernel().expect("Unabled to load the kernel"); - } + self.load_kernel().expect("Unabled to load the kernel"); if self.gdb_port.is_none() { self.run_no_gdb(cpu_affinity) @@ -93,7 +93,7 @@ impl Uhyve { let this = Arc::new(self); let threads = (0..this.num_cpus()) .map(|cpu_id| { - let vm = this.clone(); + let parent_vm = this.clone(); let barrier = barrier.clone(); let local_cpu_affinity = cpu_affinity .as_ref() @@ -109,9 +109,7 @@ impl Uhyve { None => debug!("No affinity specified, not binding thread"), } - let mut cpu = vm.create_cpu(cpu_id).unwrap(); - cpu.init(vm.get_entry_point(), vm.stack_address(), cpu_id) - .unwrap(); + let mut cpu = KvmCpu::new(cpu_id, parent_vm.clone()).unwrap(); thread::sleep(std::time::Duration::from_millis(cpu_id as u64 * 50)); @@ -126,7 +124,8 @@ impl Uhyve { } Err(err) => { error!("CPU {} crashed with {:?}", cpu_id, err); - None + barrier.wait(); + Some(err.errno()) } } }) @@ -144,12 +143,11 @@ impl Uhyve { .into_iter() .filter_map(|thread| thread.join().unwrap()) .collect::>(); - assert_eq!( - 1, - code.len(), - "more than one thread finished with an exit code" - ); - code[0] + match code.len() { + 0 => panic!("No return code from any CPU? Maybe all have been kicked?"), + 1 => code[0], + _ => panic!("more than one thread finished with an exit code (codes: {code:?})"), + } } fn run_gdb(self, cpu_affinity: Option>) -> i32 { @@ -167,13 +165,12 @@ impl Uhyve { None => debug!("No affinity specified, not binding thread"), } - let mut cpu = self.create_cpu(cpu_id).unwrap(); - cpu.init(self.get_entry_point(), self.stack_address(), cpu_id) - .unwrap(); + let this = Arc::new(self); + let cpu = KvmCpu::new(cpu_id, this.clone()).unwrap(); - let connection = wait_for_gdb_connection(self.gdb_port.unwrap()).unwrap(); + let connection = wait_for_gdb_connection(this.gdb_port.unwrap()).unwrap(); let debugger = GdbStub::new(connection); - let mut debuggable_vcpu = GdbUhyve::new(self, cpu); + let mut debuggable_vcpu = GdbUhyve::new(this, cpu); match debugger .run_blocking::(&mut debuggable_vcpu) diff --git a/src/linux/uhyve.rs b/src/linux/uhyve.rs deleted file mode 100755 index 1c2582fd..00000000 --- a/src/linux/uhyve.rs +++ /dev/null @@ -1,362 +0,0 @@ -//! This file contains the entry point to the Hypervisor. The Uhyve utilizes KVM to -//! create a Virtual Machine and load the kernel. - -use std::{ - cmp, - ffi::OsString, - fmt, mem, - os::raw::c_void, - path::{Path, PathBuf}, - ptr::{self, NonNull}, - sync::{Arc, Mutex}, -}; - -use hermit_entry::boot_info::RawBootInfo; -use kvm_bindings::*; -use kvm_ioctls::VmFd; -use log::debug; -use nix::sys::mman::*; -use vmm_sys_util::eventfd::EventFd; -use x86_64::{ - structures::paging::{Page, PageTable, PageTableFlags, Size2MiB}, - PhysAddr, -}; - -use crate::{ - consts::*, - linux::{vcpu::*, virtio::*, KVM}, - params::Params, - vm::{HypervisorResult, Vm}, - x86_64::create_gdt_entry, -}; - -const KVM_32BIT_MAX_MEM_SIZE: usize = 1 << 32; -const KVM_32BIT_GAP_SIZE: usize = 768 << 20; -const KVM_32BIT_GAP_START: usize = KVM_32BIT_MAX_MEM_SIZE - KVM_32BIT_GAP_SIZE; - -pub struct Uhyve { - vm: VmFd, - offset: u64, - entry_point: u64, - stack_address: u64, - mem: MmapMemory, - num_cpus: u32, - path: PathBuf, - args: Vec, - boot_info: *const RawBootInfo, - verbose: bool, - virtio_device: Arc>, - pub(super) gdb_port: Option, -} - -impl fmt::Debug for Uhyve { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("Uhyve") - .field("entry_point", &self.entry_point) - .field("stack_address", &self.stack_address) - .field("mem", &self.mem) - .field("num_cpus", &self.num_cpus) - .field("path", &self.path) - .field("boot_info", &self.boot_info) - .field("verbose", &self.verbose) - .field("virtio_device", &self.virtio_device) - .finish() - } -} - -impl Uhyve { - pub fn new(kernel_path: PathBuf, params: Params) -> HypervisorResult { - let memory_size = params.memory_size.get(); - - let vm = KVM.create_vm()?; - - let mem = MmapMemory::new(0, memory_size, 0, params.thp, params.ksm); - - let sz = cmp::min(memory_size, KVM_32BIT_GAP_START); - - // create virtio interface - // TODO: Remove allow once fixed: - // https://github.com/rust-lang/rust-clippy/issues/11382 - #[allow(clippy::arc_with_non_send_sync)] - let virtio_device = Arc::new(Mutex::new(VirtioNetPciDevice::new())); - - let kvm_mem = kvm_userspace_memory_region { - slot: 0, - flags: mem.flags, - memory_size: sz as u64, - guest_phys_addr: mem.guest_address as u64, - userspace_addr: mem.host_address as u64, - }; - - unsafe { vm.set_user_memory_region(kvm_mem) }?; - - if memory_size > KVM_32BIT_GAP_START + KVM_32BIT_GAP_SIZE { - let kvm_mem = kvm_userspace_memory_region { - slot: 1, - flags: mem.flags, - memory_size: (memory_size - KVM_32BIT_GAP_START - KVM_32BIT_GAP_SIZE) as u64, - guest_phys_addr: (mem.guest_address + KVM_32BIT_GAP_START + KVM_32BIT_GAP_SIZE) - as u64, - userspace_addr: (mem.host_address + KVM_32BIT_GAP_START + KVM_32BIT_GAP_SIZE) - as u64, - }; - - unsafe { vm.set_user_memory_region(kvm_mem) }?; - } - - debug!("Initialize interrupt controller"); - - // create basic interrupt controller - vm.create_irq_chip()?; - - if params.pit { - vm.create_pit2(kvm_pit_config::default()).unwrap(); - } - - // enable x2APIC support - let mut cap: kvm_enable_cap = kvm_bindings::kvm_enable_cap { - cap: KVM_CAP_X2APIC_API, - flags: 0, - ..Default::default() - }; - cap.args[0] = - (KVM_X2APIC_API_USE_32BIT_IDS | KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK).into(); - vm.enable_cap(&cap) - .expect("Unable to enable x2apic support"); - - // currently, we support only system, which provides the - // cpu feature TSC_DEADLINE - let mut cap: kvm_enable_cap = kvm_bindings::kvm_enable_cap { - cap: KVM_CAP_TSC_DEADLINE_TIMER, - ..Default::default() - }; - cap.args[0] = 0; - vm.enable_cap(&cap) - .expect_err("Processor feature `tsc deadline` isn't supported!"); - - let cap: kvm_enable_cap = kvm_bindings::kvm_enable_cap { - cap: KVM_CAP_IRQFD, - ..Default::default() - }; - vm.enable_cap(&cap) - .expect_err("The support of KVM_CAP_IRQFD is currently required"); - - let mut cap: kvm_enable_cap = kvm_bindings::kvm_enable_cap { - cap: KVM_CAP_X86_DISABLE_EXITS, - flags: 0, - ..Default::default() - }; - cap.args[0] = - (KVM_X86_DISABLE_EXITS_PAUSE | KVM_X86_DISABLE_EXITS_MWAIT | KVM_X86_DISABLE_EXITS_HLT) - .into(); - vm.enable_cap(&cap) - .expect("Unable to disable exists due pause instructions"); - - let evtfd = EventFd::new(0).unwrap(); - vm.register_irqfd(&evtfd, UHYVE_IRQ_NET)?; - - let cpu_count = params.cpu_count.get(); - - assert!( - params.gdb_port.is_none() || cpu_count == 1, - "gdbstub is only supported with one CPU" - ); - - let hyve = Uhyve { - vm, - offset: 0, - entry_point: 0, - stack_address: 0, - mem, - num_cpus: cpu_count, - path: kernel_path, - args: params.kernel_args, - boot_info: ptr::null(), - verbose: params.verbose, - virtio_device, - gdb_port: params.gdb_port, - }; - - hyve.init_guest_mem(); - - Ok(hyve) - } -} - -impl Vm for Uhyve { - fn verbose(&self) -> bool { - self.verbose - } - - fn set_offset(&mut self, offset: u64) { - self.offset = offset; - } - - fn get_offset(&self) -> u64 { - self.offset - } - - fn set_entry_point(&mut self, entry: u64) { - self.entry_point = entry; - } - - fn get_entry_point(&self) -> u64 { - self.entry_point - } - - fn set_stack_address(&mut self, stack_addresss: u64) { - self.stack_address = stack_addresss; - } - - fn stack_address(&self) -> u64 { - self.stack_address - } - - fn num_cpus(&self) -> u32 { - self.num_cpus - } - - fn guest_mem(&self) -> (*mut u8, usize) { - (self.mem.host_address as *mut u8, self.mem.memory_size) - } - - fn kernel_path(&self) -> &Path { - self.path.as_path() - } - - fn create_cpu(&self, id: u32) -> HypervisorResult { - Ok(UhyveCPU::new( - id, - self.path.clone(), - self.args.clone(), - self.vm.create_vcpu(id.into())?, - self.mem.host_address, - self.virtio_device.clone(), - )) - } - - fn set_boot_info(&mut self, header: *const RawBootInfo) { - self.boot_info = header; - } - - /// Initialize the page tables for the guest - fn init_guest_mem(&self) { - debug!("Initialize guest memory"); - - let (mem_addr, _) = self.guest_mem(); - - unsafe { - let pml4 = &mut *((mem_addr as u64 + BOOT_PML4) as *mut PageTable); - let pdpte = &mut *((mem_addr as u64 + BOOT_PDPTE) as *mut PageTable); - let pde = &mut *((mem_addr as u64 + BOOT_PDE) as *mut PageTable); - let gdt_entry: u64 = mem_addr as u64 + BOOT_GDT; - - // initialize GDT - *((gdt_entry) as *mut u64) = create_gdt_entry(0, 0, 0); - *((gdt_entry + mem::size_of::<*mut u64>() as u64) as *mut u64) = - create_gdt_entry(0xA09B, 0, 0xFFFFF); /* code */ - *((gdt_entry + 2 * mem::size_of::<*mut u64>() as u64) as *mut u64) = - create_gdt_entry(0xC093, 0, 0xFFFFF); /* data */ - - /* For simplicity we currently use 2MB pages and only a single - PML4/PDPTE/PDE. */ - - // per default is the memory zeroed, which we allocate by the system call mmap - /*libc::memset(pml4 as *mut _ as *mut libc::c_void, 0x00, PAGE_SIZE); - libc::memset(pdpte as *mut _ as *mut libc::c_void, 0x00, PAGE_SIZE); - libc::memset(pde as *mut _ as *mut libc::c_void, 0x00, PAGE_SIZE);*/ - - pml4[0].set_addr( - PhysAddr::new(BOOT_PDPTE), - PageTableFlags::PRESENT | PageTableFlags::WRITABLE, - ); - pml4[511].set_addr( - PhysAddr::new(BOOT_PML4), - PageTableFlags::PRESENT | PageTableFlags::WRITABLE, - ); - pdpte[0].set_addr( - PhysAddr::new(BOOT_PDE), - PageTableFlags::PRESENT | PageTableFlags::WRITABLE, - ); - - for i in 0..512 { - let addr = PhysAddr::new(i as u64 * Page::::SIZE); - pde[i].set_addr( - addr, - PageTableFlags::PRESENT | PageTableFlags::WRITABLE | PageTableFlags::HUGE_PAGE, - ); - } - } - } -} - -// TODO: Investigate soundness -// https://github.com/hermitcore/uhyve/issues/229 -#[allow(clippy::non_send_fields_in_send_ty)] -unsafe impl Send for Uhyve {} -unsafe impl Sync for Uhyve {} - -#[derive(Debug)] -struct MmapMemory { - flags: u32, - memory_size: usize, - guest_address: usize, - host_address: usize, -} - -impl MmapMemory { - pub fn new( - flags: u32, - memory_size: usize, - guest_address: u64, - huge_pages: bool, - mergeable: bool, - ) -> MmapMemory { - let host_address = unsafe { - mmap_anonymous( - None, - memory_size.try_into().unwrap(), - ProtFlags::PROT_READ | ProtFlags::PROT_WRITE, - MapFlags::MAP_PRIVATE | MapFlags::MAP_NORESERVE, - ) - .expect("mmap failed") - }; - - if mergeable { - debug!("Enable kernel feature to merge same pages"); - unsafe { - madvise(host_address, memory_size, MmapAdvise::MADV_MERGEABLE).unwrap(); - } - } - - if huge_pages { - debug!("Uhyve uses huge pages"); - unsafe { - madvise(host_address, memory_size, MmapAdvise::MADV_HUGEPAGE).unwrap(); - } - } - - MmapMemory { - flags, - memory_size, - guest_address: guest_address as usize, - host_address: host_address.as_ptr() as usize, - } - } - - #[allow(dead_code)] - fn as_slice_mut(&mut self) -> &mut [u8] { - unsafe { std::slice::from_raw_parts_mut(self.host_address as *mut u8, self.memory_size) } - } -} - -impl Drop for MmapMemory { - fn drop(&mut self) { - if self.memory_size > 0 { - let host_addr = NonNull::new(self.host_address as *mut c_void).unwrap(); - unsafe { - munmap(host_addr, self.memory_size).unwrap(); - } - } - } -} diff --git a/src/linux/vcpu.rs b/src/linux/x86_64/kvm_cpu.rs old mode 100755 new mode 100644 similarity index 56% rename from src/linux/vcpu.rs rename to src/linux/x86_64/kvm_cpu.rs index fdde66c2..9729ae10 --- a/src/linux/vcpu.rs +++ b/src/linux/x86_64/kvm_cpu.rs @@ -1,22 +1,20 @@ -use std::{ - ffi::OsString, - path::{Path, PathBuf}, - slice, - sync::{Arc, Mutex}, -}; +use std::sync::{Arc, Mutex}; use kvm_bindings::*; -use kvm_ioctls::{VcpuExit, VcpuFd}; -use uhyve_interface::Hypercall; -use x86_64::{ - registers::control::{Cr0Flags, Cr4Flags}, - structures::paging::PageTableFlags, -}; +use kvm_ioctls::{VcpuExit, VcpuFd, VmFd}; +use uhyve_interface::{GuestPhysAddr, Hypercall}; +use vmm_sys_util::eventfd::EventFd; +use x86_64::registers::control::{Cr0Flags, Cr4Flags}; use crate::{ consts::*, - linux::{virtio::*, KVM}, - vm::{HypervisorResult, VcpuStopReason, VirtualCPU}, + hypercall, + linux::KVM, + mem::MmapMemory, + vcpu::{VcpuStopReason, VirtualCPU}, + virtio::*, + vm::UhyveVm, + HypervisorError, HypervisorResult, }; const CPUID_EXT_HYPERVISOR: u32 = 1 << 31; @@ -26,42 +24,103 @@ const MSR_IA32_MISC_ENABLE: u32 = 0x000001a0; const PCI_CONFIG_DATA_PORT: u16 = 0xCFC; const PCI_CONFIG_ADDRESS_PORT: u16 = 0xCF8; -pub struct UhyveCPU { - id: u32, - vcpu: VcpuFd, - vm_start: usize, - kernel_path: PathBuf, - args: Vec, - virtio_device: Arc>, - pci_addr: Option, -} +const KVM_32BIT_MAX_MEM_SIZE: usize = 1 << 32; +const KVM_32BIT_GAP_SIZE: usize = 768 << 20; +const KVM_32BIT_GAP_START: usize = KVM_32BIT_MAX_MEM_SIZE - KVM_32BIT_GAP_SIZE; + +static KVM_ACCESS: Mutex> = Mutex::new(None); + +pub fn initialize_kvm(mem: &MmapMemory, use_pit: bool) -> HypervisorResult<()> { + let sz = std::cmp::min(mem.memory_size, KVM_32BIT_GAP_START); + + let kvm_mem = kvm_userspace_memory_region { + slot: 0, + flags: mem.flags, + memory_size: sz as u64, + guest_phys_addr: mem.guest_address.as_u64(), + userspace_addr: mem.host_address as u64, + }; + + // TODO: make vm a global struct in linux blah + let vm = KVM.create_vm()?; + unsafe { vm.set_user_memory_region(kvm_mem) }?; + + if mem.memory_size > KVM_32BIT_GAP_START + KVM_32BIT_GAP_SIZE { + let kvm_mem = kvm_userspace_memory_region { + slot: 1, + flags: mem.flags, + memory_size: (mem.memory_size - KVM_32BIT_GAP_START - KVM_32BIT_GAP_SIZE) as u64, + guest_phys_addr: mem.guest_address.as_u64() + + (KVM_32BIT_GAP_START + KVM_32BIT_GAP_SIZE) as u64, + userspace_addr: (mem.host_address as usize + KVM_32BIT_GAP_START + KVM_32BIT_GAP_SIZE) + as u64, + }; -impl UhyveCPU { - pub unsafe fn memory(&mut self, start_addr: u64, len: usize) -> &mut [u8] { - let phys = self.virt_to_phys(start_addr.try_into().unwrap()); - let host = self.host_address(phys); - slice::from_raw_parts_mut(host as *mut u8, len) + unsafe { vm.set_user_memory_region(kvm_mem) }?; } - pub fn new( - id: u32, - kernel_path: PathBuf, - args: Vec, - vcpu: VcpuFd, - vm_start: usize, - virtio_device: Arc>, - ) -> UhyveCPU { - UhyveCPU { - id, - vcpu, - vm_start, - kernel_path, - args, - virtio_device, - pci_addr: None, - } + debug!("Initialize interrupt controller"); + + // create basic interrupt controller + vm.create_irq_chip()?; + + if use_pit { + vm.create_pit2(kvm_pit_config::default()).unwrap(); } + // enable x2APIC support + let mut cap: kvm_enable_cap = kvm_bindings::kvm_enable_cap { + cap: KVM_CAP_X2APIC_API, + flags: 0, + ..Default::default() + }; + cap.args[0] = (KVM_X2APIC_API_USE_32BIT_IDS | KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK).into(); + vm.enable_cap(&cap) + .expect("Unable to enable x2apic support"); + + // currently, we support only system, which provides the + // cpu feature TSC_DEADLINE + let mut cap: kvm_enable_cap = kvm_bindings::kvm_enable_cap { + cap: KVM_CAP_TSC_DEADLINE_TIMER, + ..Default::default() + }; + cap.args[0] = 0; + vm.enable_cap(&cap) + .expect_err("Processor feature `tsc deadline` isn't supported!"); + + let cap: kvm_enable_cap = kvm_bindings::kvm_enable_cap { + cap: KVM_CAP_IRQFD, + ..Default::default() + }; + vm.enable_cap(&cap) + .expect_err("The support of KVM_CAP_IRQFD is currently required"); + + let mut cap: kvm_enable_cap = kvm_bindings::kvm_enable_cap { + cap: KVM_CAP_X86_DISABLE_EXITS, + flags: 0, + ..Default::default() + }; + cap.args[0] = + (KVM_X86_DISABLE_EXITS_PAUSE | KVM_X86_DISABLE_EXITS_MWAIT | KVM_X86_DISABLE_EXITS_HLT) + .into(); + vm.enable_cap(&cap) + .expect("Unable to disable exists due pause instructions"); + + let evtfd = EventFd::new(0).unwrap(); + vm.register_irqfd(&evtfd, UHYVE_IRQ_NET)?; + + *KVM_ACCESS.lock().unwrap() = Some(vm); + Ok(()) +} + +pub struct KvmCpu { + id: u32, + vcpu: VcpuFd, + parent_vm: Arc>, + pci_addr: Option, +} + +impl KvmCpu { fn setup_cpuid(&self) -> Result<(), kvm_ioctls::Error> { //debug!("Setup cpuid"); @@ -182,7 +241,7 @@ impl UhyveCPU { | Cr0Flags::PAGING; sregs.cr0 = cr0.bits(); - sregs.cr3 = BOOT_PML4; + sregs.cr3 = BOOT_PML4.as_u64(); let cr4 = Cr4Flags::PHYSICAL_ADDRESS_EXTENSION; sregs.cr4 = cr4.bits(); @@ -213,15 +272,15 @@ impl UhyveCPU { sregs.ss = seg; //sregs.fs = seg; //sregs.gs = seg; - sregs.gdt.base = BOOT_GDT; - sregs.gdt.limit = ((std::mem::size_of::() * BOOT_GDT_MAX as usize) - 1) as u16; + sregs.gdt.base = BOOT_GDT.as_u64(); + sregs.gdt.limit = ((std::mem::size_of::() * BOOT_GDT_MAX) - 1) as u16; self.vcpu.set_sregs(&sregs)?; let mut regs = self.vcpu.get_regs()?; regs.rflags = 2; regs.rip = entry_point; - regs.rdi = BOOT_INFO_ADDR; + regs.rdi = BOOT_INFO_ADDR.as_u64(); regs.rsi = cpu_id.into(); regs.rsp = stack_address; @@ -245,9 +304,7 @@ impl UhyveCPU { pub fn get_vcpu_mut(&mut self) -> &mut VcpuFd { &mut self.vcpu } -} -impl VirtualCPU for UhyveCPU { fn init(&mut self, entry_point: u64, stack_address: u64, cpu_id: u32) -> HypervisorResult<()> { self.setup_long_mode(entry_point, stack_address, cpu_id)?; self.setup_cpuid()?; @@ -262,45 +319,25 @@ impl VirtualCPU for UhyveCPU { Ok(()) } +} - fn kernel_path(&self) -> &Path { - self.kernel_path.as_path() - } - - fn args(&self) -> &[OsString] { - self.args.as_slice() - } - - fn host_address(&self, addr: usize) -> usize { - addr + self.vm_start - } - - fn virt_to_phys(&self, addr: usize) -> usize { - /// Number of Offset bits of a virtual address for a 4 KiB page, which are shifted away to get its Page Frame Number (PFN). - pub const PAGE_BITS: usize = 12; - - /// Number of bits of the index in each table (PML4, PDPT, PDT, PGT). - pub const PAGE_MAP_BITS: usize = 9; - - let executable_disable_mask = !usize::try_from(PageTableFlags::NO_EXECUTE.bits()).unwrap(); - let mut page_table = self.host_address(BOOT_PML4 as usize) as *const usize; - let mut page_bits = 39; - let mut entry: usize = 0; - - for _i in 0..4 { - let index = (addr >> page_bits) & ((1 << PAGE_MAP_BITS) - 1); - entry = unsafe { *page_table.add(index) & executable_disable_mask }; - - // bit 7 is set if this entry references a 1 GiB (PDPT) or 2 MiB (PDT) page. - if entry & usize::try_from(PageTableFlags::HUGE_PAGE.bits()).unwrap() != 0 { - return (entry & ((!0usize) << page_bits)) | (addr & !((!0usize) << page_bits)); - } else { - page_table = self.host_address(entry & !((1 << PAGE_BITS) - 1)) as *const usize; - page_bits -= PAGE_MAP_BITS; - } - } +impl VirtualCPU for KvmCpu { + fn new(id: u32, parent_vm: Arc>) -> HypervisorResult { + let vcpu = KVM_ACCESS + .lock() + .unwrap() + .as_mut() + .expect("KVM is not initialized yet") + .create_vcpu(id as u64)?; + let mut kvcpu = KvmCpu { + id, + vcpu, + parent_vm: parent_vm.clone(), + pci_addr: None, + }; + kvcpu.init(parent_vm.get_entry_point(), parent_vm.stack_address(), id)?; - (entry & ((!0usize) << PAGE_BITS)) | (addr & !((!0usize) << PAGE_BITS)) + Ok(kvcpu) } fn r#continue(&mut self) -> HypervisorResult { @@ -318,7 +355,8 @@ impl VirtualCPU for UhyveCPU { PCI_CONFIG_DATA_PORT => { if let Some(pci_addr) = self.pci_addr { if pci_addr & 0x1ff800 == 0 { - let virtio_device = self.virtio_device.lock().unwrap(); + let virtio_device = + self.parent_vm.virtio_device.lock().unwrap(); virtio_device.handle_read(pci_addr & 0x3ff, addr); } else { unsafe { *(addr.as_ptr() as *mut u32) = 0xffffffff }; @@ -329,28 +367,28 @@ impl VirtualCPU for UhyveCPU { } PCI_CONFIG_ADDRESS_PORT => {} VIRTIO_PCI_STATUS => { - let virtio_device = self.virtio_device.lock().unwrap(); + let virtio_device = self.parent_vm.virtio_device.lock().unwrap(); virtio_device.read_status(addr); } VIRTIO_PCI_HOST_FEATURES => { - let virtio_device = self.virtio_device.lock().unwrap(); + let virtio_device = self.parent_vm.virtio_device.lock().unwrap(); virtio_device.read_host_features(addr); } VIRTIO_PCI_GUEST_FEATURES => { - let mut virtio_device = self.virtio_device.lock().unwrap(); + let mut virtio_device = self.parent_vm.virtio_device.lock().unwrap(); virtio_device.read_requested_features(addr); } VIRTIO_PCI_CONFIG_OFF_MSIX_OFF..=VIRTIO_PCI_CONFIG_OFF_MSIX_OFF_MAX => { - let virtio_device = self.virtio_device.lock().unwrap(); + let virtio_device = self.parent_vm.virtio_device.lock().unwrap(); virtio_device .read_mac_byte(addr, port - VIRTIO_PCI_CONFIG_OFF_MSIX_OFF); } VIRTIO_PCI_ISR => { - let mut virtio_device = self.virtio_device.lock().unwrap(); + let mut virtio_device = self.parent_vm.virtio_device.lock().unwrap(); virtio_device.reset_interrupt() } VIRTIO_PCI_LINK_STATUS_MSIX_OFF => { - let virtio_device = self.virtio_device.lock().unwrap(); + let virtio_device = self.parent_vm.virtio_device.lock().unwrap(); virtio_device.read_link_status(addr); } _ => { @@ -358,23 +396,42 @@ impl VirtualCPU for UhyveCPU { } }, VcpuExit::IoOut(port, addr) => { - let data_addr: usize = unsafe { (*(addr.as_ptr() as *const u32)) as usize }; - if let Some(hypercall) = - unsafe { self.address_to_hypercall(port, data_addr) } - { + let data_addr = + GuestPhysAddr::new(unsafe { (*(addr.as_ptr() as *const u32)) as u64 }); + if let Some(hypercall) = unsafe { + hypercall::address_to_hypercall(&self.parent_vm.mem, port, data_addr) + } { match hypercall { - Hypercall::Cmdsize(syssize) => self.cmdsize(syssize), - Hypercall::Cmdval(syscmdval) => self.cmdval(syscmdval), + Hypercall::Cmdsize(syssize) => syssize + .update(self.parent_vm.kernel_path(), self.parent_vm.args()), + Hypercall::Cmdval(syscmdval) => { + hypercall::copy_argv( + self.parent_vm.kernel_path().as_os_str(), + self.parent_vm.args(), + syscmdval, + &self.parent_vm.mem, + ); + hypercall::copy_env(syscmdval, &self.parent_vm.mem); + } Hypercall::Exit(sysexit) => { - return Ok(VcpuStopReason::Exit(self.exit(sysexit))); + return Ok(VcpuStopReason::Exit(sysexit.arg)); + } + Hypercall::FileClose(sysclose) => hypercall::close(sysclose), + Hypercall::FileLseek(syslseek) => hypercall::lseek(syslseek), + Hypercall::FileOpen(sysopen) => { + hypercall::open(&self.parent_vm.mem, sysopen) + } + Hypercall::FileRead(sysread) => { + hypercall::read(&self.parent_vm.mem, sysread) + } + Hypercall::FileWrite(syswrite) => { + hypercall::write(&self.parent_vm.mem, syswrite) + .map_err(|_e| HypervisorError::new(libc::EFAULT))? + } + Hypercall::FileUnlink(sysunlink) => { + hypercall::unlink(&self.parent_vm.mem, sysunlink) } - Hypercall::FileClose(sysclose) => self.close(sysclose), - Hypercall::FileLseek(syslseek) => self.lseek(syslseek), - Hypercall::FileOpen(sysopen) => self.open(sysopen), - Hypercall::FileRead(sysread) => self.read(sysread), - Hypercall::FileWrite(syswrite) => self.write(syswrite)?, - Hypercall::FileUnlink(sysunlink) => self.unlink(sysunlink), - Hypercall::SerialWriteByte(buf) => self.uart(&[buf])?, + Hypercall::SerialWriteByte(buf) => hypercall::uart(&[buf])?, _ => panic!("Got unknown hypercall {:?}", hypercall), }; } else { @@ -384,7 +441,7 @@ impl VirtualCPU for UhyveCPU { if let Some(pci_addr) = self.pci_addr { if pci_addr & 0x1ff800 == 0 { let mut virtio_device = - self.virtio_device.lock().unwrap(); + self.parent_vm.virtio_device.lock().unwrap(); virtio_device.handle_write(pci_addr & 0x3ff, addr); } } @@ -393,24 +450,29 @@ impl VirtualCPU for UhyveCPU { self.pci_addr = Some(unsafe { *(addr.as_ptr() as *const u32) }); } VIRTIO_PCI_STATUS => { - let mut virtio_device = self.virtio_device.lock().unwrap(); + let mut virtio_device = + self.parent_vm.virtio_device.lock().unwrap(); virtio_device.write_status(addr); } VIRTIO_PCI_GUEST_FEATURES => { - let mut virtio_device = self.virtio_device.lock().unwrap(); + let mut virtio_device = + self.parent_vm.virtio_device.lock().unwrap(); virtio_device.write_requested_features(addr); } VIRTIO_PCI_QUEUE_NOTIFY => { - let mut virtio_device = self.virtio_device.lock().unwrap(); - virtio_device.handle_notify_output(addr, self); + let mut virtio_device = + self.parent_vm.virtio_device.lock().unwrap(); + virtio_device.handle_notify_output(addr, &self.parent_vm.mem); } VIRTIO_PCI_QUEUE_SEL => { - let mut virtio_device = self.virtio_device.lock().unwrap(); + let mut virtio_device = + self.parent_vm.virtio_device.lock().unwrap(); virtio_device.write_selected_queue(addr); } VIRTIO_PCI_QUEUE_PFN => { - let mut virtio_device = self.virtio_device.lock().unwrap(); - virtio_device.write_pfn(addr, self); + let mut virtio_device = + self.parent_vm.virtio_device.lock().unwrap(); + virtio_device.write_pfn(addr, &self.parent_vm.mem); } _ => { panic!("Unhandled IO exit: 0x{:x}", port); @@ -461,16 +523,16 @@ impl VirtualCPU for UhyveCPU { println!("Segment registers:"); println!("------------------"); println!("register selector base limit type p dpl db s l g avl"); - UhyveCPU::show_segment("cs ", &sregs.cs); - UhyveCPU::show_segment("ss ", &sregs.ss); - UhyveCPU::show_segment("ds ", &sregs.ds); - UhyveCPU::show_segment("es ", &sregs.es); - UhyveCPU::show_segment("fs ", &sregs.fs); - UhyveCPU::show_segment("gs ", &sregs.gs); - UhyveCPU::show_segment("tr ", &sregs.tr); - UhyveCPU::show_segment("ldt", &sregs.ldt); - UhyveCPU::show_dtable("gdt", &sregs.gdt); - UhyveCPU::show_dtable("idt", &sregs.idt); + KvmCpu::show_segment("cs ", &sregs.cs); + KvmCpu::show_segment("ss ", &sregs.ss); + KvmCpu::show_segment("ds ", &sregs.ds); + KvmCpu::show_segment("es ", &sregs.es); + KvmCpu::show_segment("fs ", &sregs.fs); + KvmCpu::show_segment("gs ", &sregs.gs); + KvmCpu::show_segment("tr ", &sregs.tr); + KvmCpu::show_segment("ldt", &sregs.ldt); + KvmCpu::show_dtable("gdt", &sregs.gdt); + KvmCpu::show_dtable("idt", &sregs.idt); println!(); println!("\nAPIC:"); diff --git a/src/linux/x86_64/mod.rs b/src/linux/x86_64/mod.rs new file mode 100644 index 00000000..0452b284 --- /dev/null +++ b/src/linux/x86_64/mod.rs @@ -0,0 +1 @@ +pub mod kvm_cpu; diff --git a/src/macos/aarch64/mod.rs b/src/macos/aarch64/mod.rs index 58cca2f3..f1096356 100644 --- a/src/macos/aarch64/mod.rs +++ b/src/macos/aarch64/mod.rs @@ -1,4 +1,3 @@ -pub mod uhyve; pub mod vcpu; /// The size of a page. diff --git a/src/macos/aarch64/uhyve.rs b/src/macos/aarch64/uhyve.rs deleted file mode 100644 index 317e37f4..00000000 --- a/src/macos/aarch64/uhyve.rs +++ /dev/null @@ -1,251 +0,0 @@ -use std::{ - ffi::OsString, - path::{Path, PathBuf}, - ptr, -}; - -use hermit_entry::boot_info::RawBootInfo; -use libc::{self, c_void}; -use log::debug; -use xhypervisor::{create_vm, map_mem, unmap_mem, MemPerm}; - -use crate::{ - aarch64::{PT_MEM, PT_MEM_CD, PT_PT, PT_SELF}, - consts::{BOOT_INFO_ADDR, BOOT_PGT, PAGE_SIZE}, - macos::aarch64::{vcpu::*, HYPERVISOR_PAGE_SIZE}, - params::Params, - vm::{HypervisorResult, Vm}, -}; - -pub struct Uhyve { - offset: u64, - entry_point: u64, - stack_address: u64, - mem_size: usize, - guest_mem: *mut c_void, - num_cpus: u32, - path: PathBuf, - args: Vec, - boot_info: *const RawBootInfo, - verbose: bool, -} - -impl std::fmt::Debug for Uhyve { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("Uhyve") - .field("entry_point", &self.entry_point) - .field("stack_address", &self.stack_address) - .field("mem_size", &self.mem_size) - .field("guest_mem", &self.guest_mem) - .field("num_cpus", &self.num_cpus) - .field("path", &self.path) - .field("boot_info", &self.boot_info) - .field("verbose", &self.verbose) - .finish() - } -} - -impl Uhyve { - pub fn new(kernel_path: PathBuf, params: Params) -> HypervisorResult { - let memory_size = params.memory_size.get(); - - assert!(HYPERVISOR_PAGE_SIZE < memory_size); - - let mem = unsafe { - libc::mmap( - std::ptr::null_mut(), - memory_size, - libc::PROT_READ | libc::PROT_WRITE, - libc::MAP_PRIVATE | libc::MAP_ANON | libc::MAP_NORESERVE, - -1, - 0, - ) - }; - - assert_ne!(libc::MAP_FAILED, mem, "mmap failed"); - - debug!("Allocate memory for the guest at 0x{:x}", mem as usize); - - debug!("Create VM..."); - create_vm()?; - - debug!("Map guest memory..."); - unsafe { - map_mem( - std::slice::from_raw_parts(mem as *mut u8, HYPERVISOR_PAGE_SIZE), - 0, - MemPerm::Read, - )?; - - map_mem( - std::slice::from_raw_parts_mut( - (mem as *mut u8).offset(HYPERVISOR_PAGE_SIZE.try_into().unwrap()), - memory_size - HYPERVISOR_PAGE_SIZE, - ), - HYPERVISOR_PAGE_SIZE.try_into().unwrap(), - MemPerm::ExecAndWrite, - )?; - } - - let hyve = Uhyve { - offset: 0, - entry_point: 0, - stack_address: 0, - mem_size: memory_size, - guest_mem: mem, - num_cpus: params.cpu_count.get(), - path: kernel_path, - args: params.kernel_args, - boot_info: ptr::null(), - verbose: params.verbose, - }; - - hyve.init_guest_mem(); - - Ok(hyve) - } -} - -impl Vm for Uhyve { - fn verbose(&self) -> bool { - self.verbose - } - - fn set_offset(&mut self, offset: u64) { - self.offset = offset; - } - - fn get_offset(&self) -> u64 { - self.offset - } - - fn set_entry_point(&mut self, entry: u64) { - self.entry_point = entry; - } - - fn get_entry_point(&self) -> u64 { - self.entry_point - } - - fn set_stack_address(&mut self, stack_address: u64) { - self.stack_address = stack_address; - } - - fn stack_address(&self) -> u64 { - self.stack_address - } - - fn num_cpus(&self) -> u32 { - self.num_cpus - } - - fn guest_mem(&self) -> (*mut u8, usize) { - (self.guest_mem as *mut u8, self.mem_size) - } - - fn kernel_path(&self) -> &Path { - self.path.as_path() - } - - fn create_cpu(&self, id: u32) -> HypervisorResult { - Ok(UhyveCPU::new( - id, - self.path.clone(), - self.args.clone(), - self.guest_mem as usize, - )) - } - - fn set_boot_info(&mut self, header: *const RawBootInfo) { - self.boot_info = header; - } - - fn init_guest_mem(&self) { - debug!("Initialize guest memory"); - - let (mem_addr, _) = self.guest_mem(); - - let pgt_slice = unsafe { - std::slice::from_raw_parts_mut( - mem_addr.offset(BOOT_PGT.try_into().unwrap()) as *mut u64, - 512, - ) - }; - for i in pgt_slice.iter_mut() { - *i = 0; - } - pgt_slice[0] = BOOT_PGT + 0x1000 + PT_PT; - pgt_slice[511] = BOOT_PGT + PT_PT + PT_SELF; - - let pgt_slice = unsafe { - std::slice::from_raw_parts_mut( - mem_addr.offset((BOOT_PGT + 0x1000).try_into().unwrap()) as *mut u64, - 512, - ) - }; - for i in pgt_slice.iter_mut() { - *i = 0; - } - pgt_slice[0] = BOOT_PGT + 0x2000 + PT_PT; - - let pgt_slice = unsafe { - std::slice::from_raw_parts_mut( - mem_addr.offset((BOOT_PGT + 0x2000).try_into().unwrap()) as *mut u64, - 512, - ) - }; - for i in pgt_slice.iter_mut() { - *i = 0; - } - pgt_slice[0] = BOOT_PGT + 0x3000 + PT_PT; - pgt_slice[1] = BOOT_PGT + 0x4000 + PT_PT; - pgt_slice[2] = BOOT_PGT + 0x5000 + PT_PT; - - let pgt_slice = unsafe { - std::slice::from_raw_parts_mut( - mem_addr.offset((BOOT_PGT + 0x3000).try_into().unwrap()) as *mut u64, - 512, - ) - }; - for i in pgt_slice.iter_mut() { - *i = 0; - } - // map uhyve ports into the virtual address space - pgt_slice[0] = PT_MEM_CD; - // map BootInfo into the virtual address space - pgt_slice[BOOT_INFO_ADDR as usize / PAGE_SIZE] = BOOT_INFO_ADDR + PT_MEM; - - let pgt_slice = unsafe { - std::slice::from_raw_parts_mut( - mem_addr.offset((BOOT_PGT + 0x4000).try_into().unwrap()) as *mut u64, - 512, - ) - }; - for (idx, i) in pgt_slice.iter_mut().enumerate() { - *i = 0x200000u64 + (idx * PAGE_SIZE) as u64 + PT_MEM; - } - - let pgt_slice = unsafe { - std::slice::from_raw_parts_mut( - mem_addr.offset((BOOT_PGT + 0x5000).try_into().unwrap()) as *mut u64, - 512, - ) - }; - for (idx, i) in pgt_slice.iter_mut().enumerate() { - *i = 0x400000u64 + (idx * PAGE_SIZE) as u64 + PT_MEM; - } - } -} - -impl Drop for Uhyve { - fn drop(&mut self) { - unmap_mem(0, self.mem_size).unwrap(); - - unsafe { - libc::munmap(self.guest_mem, self.mem_size); - } - } -} - -unsafe impl Send for Uhyve {} -unsafe impl Sync for Uhyve {} diff --git a/src/macos/aarch64/vcpu.rs b/src/macos/aarch64/vcpu.rs index 43a96e53..35d86869 100644 --- a/src/macos/aarch64/vcpu.rs +++ b/src/macos/aarch64/vcpu.rs @@ -1,13 +1,10 @@ #![allow(non_snake_case)] #![allow(clippy::identity_op)] -use std::{ - ffi::OsString, - path::{Path, PathBuf}, -}; +use std::sync::Arc; use log::debug; -use uhyve_interface::Hypercall; +use uhyve_interface::{GuestPhysAddr, Hypercall}; use xhypervisor::{self, Register, SystemRegister, VirtualCpuExitReason}; use crate::{ @@ -16,30 +13,19 @@ use crate::{ PSR, TCR_FLAGS, TCR_TG1_4K, VA_BITS, }, consts::*, - vm::{HypervisorResult, VcpuStopReason, VirtualCPU}, + hypercall::{self, copy_argv, copy_env}, + vcpu::{VcpuStopReason, VirtualCPU}, + vm::UhyveVm, + HypervisorResult, }; -pub struct UhyveCPU { +pub struct XhyveCpu { id: u32, - kernel_path: PathBuf, - args: Vec, vcpu: xhypervisor::VirtualCpu, - vm_start: usize, + parent_vm: Arc>, } -impl UhyveCPU { - pub fn new(id: u32, kernel_path: PathBuf, args: Vec, vm_start: usize) -> UhyveCPU { - Self { - id, - kernel_path, - args, - vcpu: xhypervisor::VirtualCpu::new().unwrap(), - vm_start, - } - } -} - -impl VirtualCPU for UhyveCPU { +impl XhyveCpu { fn init(&mut self, entry_point: u64, stack_address: u64, cpu_id: u32) -> HypervisorResult<()> { debug!("Initialize VirtualCPU"); @@ -49,7 +35,8 @@ impl VirtualCPU for UhyveCPU { self.vcpu.write_register(Register::PC, entry_point)?; self.vcpu .write_system_register(SystemRegister::SP_EL1, stack_address)?; - self.vcpu.write_register(Register::X0, BOOT_INFO_ADDR)?; + self.vcpu + .write_register(Register::X0, BOOT_INFO_ADDR.as_u64())?; self.vcpu.write_register(Register::X1, cpu_id.into())?; /* @@ -101,7 +88,7 @@ impl VirtualCPU for UhyveCPU { self.vcpu .write_system_register(SystemRegister::TTBR1_EL1, 0)?; self.vcpu - .write_system_register(SystemRegister::TTBR0_EL1, BOOT_PGT)?; + .write_system_register(SystemRegister::TTBR0_EL1, BOOT_PGT.as_u64())?; /* * Prepare system control register (SCTRL) @@ -146,21 +133,18 @@ impl VirtualCPU for UhyveCPU { Ok(()) } +} - fn kernel_path(&self) -> &Path { - self.kernel_path.as_path() - } - - fn args(&self) -> &[OsString] { - self.args.as_slice() - } - - fn host_address(&self, addr: usize) -> usize { - addr + self.vm_start - } +impl VirtualCPU for XhyveCpu { + fn new(id: u32, parent_vm: Arc>) -> HypervisorResult { + let mut vcpu = XhyveCpu { + id, + parent_vm: parent_vm.clone(), + vcpu: xhypervisor::VirtualCpu::new().unwrap(), + }; + vcpu.init(parent_vm.get_entry_point(), parent_vm.stack_address(), id)?; - fn virt_to_phys(&self, _addr: usize) -> usize { - 0 + Ok(vcpu) } fn r#continue(&mut self) -> HypervisorResult { @@ -177,18 +161,43 @@ impl VirtualCPU for UhyveCPU { let addr: u16 = exception.physical_address.try_into().unwrap(); let pc = self.vcpu.read_register(Register::PC)?; - let data_addr = self.vcpu.read_register(Register::X8)?; - if let Some(hypercall) = - unsafe { self.address_to_hypercall(addr, data_addr as usize) } - { + let data_addr = GuestPhysAddr::new(self.vcpu.read_register(Register::X8)?); + if let Some(hypercall) = unsafe { + hypercall::address_to_hypercall(&self.parent_vm.mem, addr, data_addr) + } { match hypercall { Hypercall::SerialWriteByte(_char) => { let x8 = (self.vcpu.read_register(Register::X8)? & 0xFF) as u8; - self.uart(&[x8]).unwrap(); + hypercall::uart(&[x8]).unwrap(); } Hypercall::Exit(sysexit) => { - return Ok(VcpuStopReason::Exit(self.exit(sysexit))); + return Ok(VcpuStopReason::Exit(sysexit.arg)); + } + Hypercall::Cmdsize(syssize) => syssize + .update(self.parent_vm.kernel_path(), self.parent_vm.args()), + Hypercall::Cmdval(syscmdval) => { + copy_argv( + self.parent_vm.kernel_path().as_os_str(), + self.parent_vm.args(), + syscmdval, + &self.parent_vm.mem, + ); + copy_env(syscmdval, &self.parent_vm.mem); + } + Hypercall::FileClose(sysclose) => hypercall::close(sysclose), + Hypercall::FileLseek(syslseek) => hypercall::lseek(syslseek), + Hypercall::FileOpen(sysopen) => { + hypercall::open(&self.parent_vm.mem, sysopen) + } + Hypercall::FileRead(sysread) => { + hypercall::read(&self.parent_vm.mem, sysread) + } + Hypercall::FileWrite(syswrite) => { + hypercall::write(&self.parent_vm.mem, syswrite).unwrap() + } + Hypercall::FileUnlink(sysunlink) => { + hypercall::unlink(&self.parent_vm.mem, sysunlink) } _ => { panic! {"Hypercall {hypercall:?} not implemented on macos-aarch64"} @@ -300,7 +309,7 @@ impl VirtualCPU for UhyveCPU { } } -impl Drop for UhyveCPU { +impl Drop for XhyveCpu { fn drop(&mut self) { self.vcpu.destroy().unwrap(); } diff --git a/src/macos/mod.rs b/src/macos/mod.rs index 51cc3254..1c58ce18 100644 --- a/src/macos/mod.rs +++ b/src/macos/mod.rs @@ -1,9 +1,9 @@ #[cfg(target_arch = "aarch64")] pub mod aarch64; -#[cfg(target_arch = "aarch64")] -pub use crate::macos::aarch64::{uhyve, vcpu}; #[cfg(target_arch = "x86_64")] pub mod x86_64; + +pub mod xhyve; use std::{ sync::{mpsc, Arc}, thread, @@ -11,21 +11,21 @@ use std::{ use core_affinity::CoreId; +#[cfg(target_arch = "aarch64")] +pub use crate::macos::aarch64::vcpu::XhyveCpu; #[cfg(target_arch = "x86_64")] -pub use crate::macos::x86_64::{uhyve, vcpu}; -use crate::vm::{VirtualCPU, Vm}; +pub use crate::macos::x86_64::vcpu::XhyveCpu; +use crate::{vcpu::VirtualCPU, vm::UhyveVm}; pub type HypervisorError = xhypervisor::Error; pub type DebugExitInfo = (); -impl uhyve::Uhyve { +impl UhyveVm { /// Runs the VM. /// /// Blocks until the VM has finished execution. pub fn run(mut self, cpu_affinity: Option>) -> i32 { - unsafe { - self.load_kernel().expect("Unabled to load the kernel"); - } + self.load_kernel().expect("Unabled to load the kernel"); // For communication of the exit code from one vcpu to this thread as return // value. @@ -34,7 +34,7 @@ impl uhyve::Uhyve { let this = Arc::new(self); (0..this.num_cpus()).for_each(|cpu_id| { - let vm = this.clone(); + let parent_vm = this.clone(); let exit_tx = exit_tx.clone(); let local_cpu_affinity = match &cpu_affinity { @@ -53,9 +53,7 @@ impl uhyve::Uhyve { None => debug!("No affinity specified, not binding thread"), } - let mut cpu = vm.create_cpu(cpu_id).unwrap(); - cpu.init(vm.get_entry_point(), vm.stack_address(), cpu_id) - .unwrap(); + let mut cpu = XhyveCpu::new(cpu_id, parent_vm.clone()).unwrap(); // jump into the VM and execute code of the guest let result = cpu.run(); diff --git a/src/macos/x86_64/mod.rs b/src/macos/x86_64/mod.rs index 19fdd515..b5dc20fe 100644 --- a/src/macos/x86_64/mod.rs +++ b/src/macos/x86_64/mod.rs @@ -1,3 +1,2 @@ mod ioapic; -pub mod uhyve; pub mod vcpu; diff --git a/src/macos/x86_64/uhyve.rs b/src/macos/x86_64/uhyve.rs deleted file mode 100644 index 35d3635a..00000000 --- a/src/macos/x86_64/uhyve.rs +++ /dev/null @@ -1,224 +0,0 @@ -use std::{ - ffi::OsString, - mem, - path::{Path, PathBuf}, - ptr, - sync::{Arc, Mutex}, -}; - -use hermit_entry::boot_info::RawBootInfo; -use libc::{self, c_void}; -use log::debug; -use x86_64::{ - structures::paging::{Page, PageTable, PageTableFlags, Size2MiB}, - PhysAddr, -}; -use xhypervisor::{create_vm, map_mem, unmap_mem, MemPerm}; - -use crate::{ - consts::*, - macos::x86_64::{ioapic::IoApic, vcpu::*}, - params::Params, - vm::{HypervisorResult, Vm}, - x86_64::create_gdt_entry, -}; - -pub struct Uhyve { - offset: u64, - entry_point: u64, - stack_address: u64, - mem_size: usize, - guest_mem: *mut c_void, - num_cpus: u32, - path: PathBuf, - args: Vec, - boot_info: *const RawBootInfo, - ioapic: Arc>, - verbose: bool, -} - -impl std::fmt::Debug for Uhyve { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("Uhyve") - .field("entry_point", &self.entry_point) - .field("stack_address", &self.stack_address) - .field("mem_size", &self.mem_size) - .field("guest_mem", &self.guest_mem) - .field("num_cpus", &self.num_cpus) - .field("path", &self.path) - .field("boot_info", &self.boot_info) - .field("ioapic", &self.ioapic) - .field("verbose", &self.verbose) - .finish() - } -} - -impl Uhyve { - pub fn new(kernel_path: PathBuf, params: Params) -> HypervisorResult { - let memory_size = params.memory_size.get(); - - let mem = unsafe { - libc::mmap( - std::ptr::null_mut(), - memory_size, - libc::PROT_READ | libc::PROT_WRITE, - libc::MAP_PRIVATE | libc::MAP_ANON | libc::MAP_NORESERVE, - -1, - 0, - ) - }; - - assert_ne!(libc::MAP_FAILED, mem, "mmap failed"); - - debug!("Allocate memory for the guest at 0x{:x}", mem as usize); - - debug!("Create VM..."); - create_vm()?; - - debug!("Map guest memory..."); - unsafe { - map_mem( - std::slice::from_raw_parts(mem as *mut u8, memory_size), - 0, - MemPerm::ExecAndWrite, - )?; - } - - let hyve = Uhyve { - offset: 0, - entry_point: 0, - stack_address: 0, - mem_size: memory_size, - guest_mem: mem, - num_cpus: params.cpu_count.get(), - path: kernel_path, - args: params.kernel_args, - boot_info: ptr::null(), - ioapic: Arc::new(Mutex::new(IoApic::new())), - verbose: params.verbose, - }; - - hyve.init_guest_mem(); - - Ok(hyve) - } -} - -impl Vm for Uhyve { - fn verbose(&self) -> bool { - self.verbose - } - - fn set_offset(&mut self, offset: u64) { - self.offset = offset; - } - - fn get_offset(&self) -> u64 { - self.offset - } - - fn set_entry_point(&mut self, entry: u64) { - self.entry_point = entry; - } - - fn get_entry_point(&self) -> u64 { - self.entry_point - } - - fn set_stack_address(&mut self, stack_address: u64) { - self.stack_address = stack_address; - } - - fn stack_address(&self) -> u64 { - self.stack_address - } - - fn num_cpus(&self) -> u32 { - self.num_cpus - } - - fn guest_mem(&self) -> (*mut u8, usize) { - (self.guest_mem as *mut u8, self.mem_size) - } - - fn kernel_path(&self) -> &Path { - self.path.as_path() - } - - fn create_cpu(&self, id: u32) -> HypervisorResult { - Ok(UhyveCPU::new( - id, - self.path.clone(), - self.args.clone(), - self.guest_mem as usize, - self.ioapic.clone(), - )) - } - - fn set_boot_info(&mut self, header: *const RawBootInfo) { - self.boot_info = header; - } - - /// Initialize the page tables for the guest - fn init_guest_mem(&self) { - debug!("Initialize guest memory"); - - let (mem_addr, _) = self.guest_mem(); - - unsafe { - let pml4 = &mut *((mem_addr as u64 + BOOT_PML4) as *mut PageTable); - let pdpte = &mut *((mem_addr as u64 + BOOT_PDPTE) as *mut PageTable); - let pde = &mut *((mem_addr as u64 + BOOT_PDE) as *mut PageTable); - let gdt_entry: u64 = mem_addr as u64 + BOOT_GDT; - - // initialize GDT - *((gdt_entry) as *mut u64) = create_gdt_entry(0, 0, 0); - *((gdt_entry + mem::size_of::<*mut u64>() as u64) as *mut u64) = - create_gdt_entry(0xA09B, 0, 0xFFFFF); /* code */ - *((gdt_entry + 2 * mem::size_of::<*mut u64>() as u64) as *mut u64) = - create_gdt_entry(0xC093, 0, 0xFFFFF); /* data */ - - /* For simplicity we currently use 2MB pages and only a single - PML4/PDPTE/PDE. */ - - // per default is the memory zeroed, which we allocate by the system call mmap - /*libc::memset(pml4 as *mut _ as *mut libc::c_void, 0x00, PAGE_SIZE); - libc::memset(pdpte as *mut _ as *mut libc::c_void, 0x00, PAGE_SIZE); - libc::memset(pde as *mut _ as *mut libc::c_void, 0x00, PAGE_SIZE);*/ - - pml4[0].set_addr( - PhysAddr::new(BOOT_PDPTE), - PageTableFlags::PRESENT | PageTableFlags::WRITABLE, - ); - pml4[511].set_addr( - PhysAddr::new(BOOT_PML4), - PageTableFlags::PRESENT | PageTableFlags::WRITABLE, - ); - pdpte[0].set_addr( - PhysAddr::new(BOOT_PDE), - PageTableFlags::PRESENT | PageTableFlags::WRITABLE, - ); - - for i in 0..512 { - let addr = PhysAddr::new(i as u64 * Page::::SIZE); - pde[i].set_addr( - addr, - PageTableFlags::PRESENT | PageTableFlags::WRITABLE | PageTableFlags::HUGE_PAGE, - ); - } - } - } -} - -impl Drop for Uhyve { - fn drop(&mut self) { - unmap_mem(0, self.mem_size).unwrap(); - - unsafe { - libc::munmap(self.guest_mem, self.mem_size); - } - } -} - -unsafe impl Send for Uhyve {} -unsafe impl Sync for Uhyve {} diff --git a/src/macos/x86_64/vcpu.rs b/src/macos/x86_64/vcpu.rs index 764b25e1..ca76ad7b 100644 --- a/src/macos/x86_64/vcpu.rs +++ b/src/macos/x86_64/vcpu.rs @@ -2,18 +2,16 @@ use std::{ arch::x86_64::__cpuid_count, - ffi::OsString, - path::{Path, PathBuf}, sync::{Arc, Mutex}, }; use burst::x86::{disassemble_64, InstructionOperation, OperandType}; use lazy_static::lazy_static; use log::{debug, trace}; -use uhyve_interface::Hypercall; +use uhyve_interface::{GuestPhysAddr, Hypercall}; use x86_64::{ registers::control::{Cr0Flags, Cr4Flags}, - structures::{gdt::SegmentSelector, paging::PageTableFlags}, + structures::gdt::SegmentSelector, PrivilegeLevel, }; use xhypervisor::{ @@ -33,10 +31,16 @@ use xhypervisor::{ use crate::{ consts::*, + hypercall, + hypercall::{copy_argv, copy_env}, macos::x86_64::ioapic::IoApic, - vm::{HypervisorResult, VcpuStopReason, VirtualCPU}, + vcpu::{VcpuStopReason, VirtualCPU}, + vm::UhyveVm, + HypervisorResult, }; +static IOAPIC: Mutex> = Mutex::new(None); + /// Extracted from `x86::msr`. mod msr { /// See Section 17.13, Time-Stamp Counter. @@ -151,35 +155,14 @@ lazy_static! { }; } -pub struct UhyveCPU { +pub struct XhyveCpu { id: u32, - kernel_path: PathBuf, - args: Vec, vcpu: xhypervisor::VirtualCpu, - vm_start: usize, + parent_vm: Arc>, apic_base: u64, - ioapic: Arc>, } -impl UhyveCPU { - pub fn new( - id: u32, - kernel_path: PathBuf, - args: Vec, - vm_start: usize, - ioapic: Arc>, - ) -> UhyveCPU { - UhyveCPU { - id, - kernel_path, - args, - vcpu: xhypervisor::VirtualCpu::new().unwrap(), - vm_start, - apic_base: APIC_DEFAULT_BASE, - ioapic, - } - } - +impl XhyveCpu { fn setup_system_gdt(&mut self) -> Result<(), xhypervisor::Error> { debug!("Setup GDT"); @@ -202,10 +185,11 @@ impl UhyveCPU { self.vcpu.write_vmcs(VMCS_GUEST_GS_BASE, 0)?; self.vcpu.write_vmcs(VMCS_GUEST_GS_AR, 0x4093)?; - self.vcpu.write_vmcs(VMCS_GUEST_GDTR_BASE, BOOT_GDT)?; + self.vcpu + .write_vmcs(VMCS_GUEST_GDTR_BASE, BOOT_GDT.as_u64())?; self.vcpu.write_vmcs( VMCS_GUEST_GDTR_LIMIT, - ((std::mem::size_of::() * BOOT_GDT_MAX as usize) - 1) as u64, + ((std::mem::size_of::() * BOOT_GDT_MAX) - 1) as u64, )?; self.vcpu.write_vmcs(VMCS_GUEST_IDTR_BASE, 0)?; self.vcpu.write_vmcs(VMCS_GUEST_IDTR_LIMIT, 0xffff)?; @@ -270,7 +254,8 @@ impl UhyveCPU { self.vcpu.write_register(&Register::CR0, cr0.bits())?; self.vcpu.write_register(&Register::CR4, cr4.bits())?; - self.vcpu.write_register(&Register::CR3, BOOT_PML4)?; + self.vcpu + .write_register(&Register::CR3, BOOT_PML4.as_u64())?; self.vcpu.write_register(&Register::DR7, 0)?; self.vcpu.write_vmcs(VMCS_GUEST_SYSENTER_ESP, 0)?; self.vcpu.write_vmcs(VMCS_GUEST_SYSENTER_EIP, 0)?; @@ -507,8 +492,15 @@ impl UhyveCPU { let qualification = self.vcpu.read_vmcs(VMCS_RO_EXIT_QUALIFIC)?; let read = (qualification & (1 << 0)) != 0; let write = (qualification & (1 << 1)) != 0; - let code = - unsafe { std::slice::from_raw_parts(self.host_address(rip as usize) as *const u8, 8) }; + let code = unsafe { + std::slice::from_raw_parts( + self.parent_vm + .mem + .host_address(GuestPhysAddr::new(rip)) + .unwrap(), + 8, + ) + }; if let Ok(instr) = disassemble_64(code, rip as usize, code.len()) { match instr.operation { @@ -542,14 +534,21 @@ impl UhyveCPU { } }; - self.ioapic + IOAPIC .lock() .unwrap() + .as_mut() + .expect("IOAPIC not initialized") .write(address - IOAPIC_BASE, val); } if read { - let value = self.ioapic.lock().unwrap().read(address - IOAPIC_BASE); + let value = IOAPIC + .lock() + .unwrap() + .as_mut() + .expect("IOAPIC not initialized") + .read(address - IOAPIC_BASE); match instr.operands[0].operand { OperandType::REG_EDI => { @@ -593,9 +592,7 @@ impl UhyveCPU { pub fn get_vcpu(&self) -> &xhypervisor::VirtualCpu { &self.vcpu } -} -impl VirtualCPU for UhyveCPU { fn init(&mut self, entry_point: u64, stack_address: u64, cpu_id: u32) -> HypervisorResult<()> { self.setup_capabilities()?; self.setup_msr()?; @@ -616,7 +613,8 @@ impl VirtualCPU for UhyveCPU { self.vcpu.write_register(&Register::RCX, 0)?; self.vcpu.write_register(&Register::RDX, 0)?; self.vcpu.write_register(&Register::RSI, cpu_id.into())?; - self.vcpu.write_register(&Register::RDI, BOOT_INFO_ADDR)?; + self.vcpu + .write_register(&Register::RDI, BOOT_INFO_ADDR.as_u64())?; self.vcpu.write_register(&Register::R8, 0)?; self.vcpu.write_register(&Register::R9, 0)?; self.vcpu.write_register(&Register::R10, 0)?; @@ -630,45 +628,19 @@ impl VirtualCPU for UhyveCPU { Ok(()) } +} - fn kernel_path(&self) -> &Path { - self.kernel_path.as_path() - } - - fn args(&self) -> &[OsString] { - self.args.as_slice() - } - - fn host_address(&self, addr: usize) -> usize { - addr + self.vm_start - } - - fn virt_to_phys(&self, addr: usize) -> usize { - /// Number of Offset bits of a virtual address for a 4 KiB page, which are shifted away to get its Page Frame Number (PFN). - pub const PAGE_BITS: usize = 12; - - /// Number of bits of the index in each table (PML4, PDPT, PDT, PGT). - pub const PAGE_MAP_BITS: usize = 9; - - let executable_disable_mask = !usize::try_from(PageTableFlags::NO_EXECUTE.bits()).unwrap(); - let mut page_table = self.host_address(BOOT_PML4 as usize) as *const usize; - let mut page_bits = 39; - let mut entry: usize = 0; - - for _i in 0..4 { - let index = (addr >> page_bits) & ((1 << PAGE_MAP_BITS) - 1); - entry = unsafe { *page_table.add(index) & executable_disable_mask }; - - // bit 7 is set if this entry references a 1 GiB (PDPT) or 2 MiB (PDT) page. - if entry & usize::try_from(PageTableFlags::HUGE_PAGE.bits()).unwrap() != 0 { - return (entry & ((!0usize) << page_bits)) | (addr & !((!0usize) << page_bits)); - } else { - page_table = self.host_address(entry & !((1 << PAGE_BITS) - 1)) as *const usize; - page_bits -= PAGE_MAP_BITS; - } - } +impl VirtualCPU for XhyveCpu { + fn new(id: u32, parent_vm: Arc>) -> HypervisorResult { + let mut vcpu = XhyveCpu { + id, + parent_vm: parent_vm.clone(), + vcpu: xhypervisor::VirtualCpu::new().unwrap(), + apic_base: APIC_DEFAULT_BASE, + }; + vcpu.init(parent_vm.get_entry_point(), parent_vm.stack_address(), id)?; - (entry & ((!0usize) << PAGE_BITS)) | (addr & !((!0usize) << PAGE_BITS)) + Ok(vcpu) } fn r#continue(&mut self) -> HypervisorResult { @@ -744,30 +716,42 @@ impl VirtualCPU for UhyveCPU { assert!(!input, "Invalid I/O operation"); - let data_addr: u64 = self.vcpu.read_register(&Register::RAX)? & 0xFFFFFFFF; - if let Some(hypercall) = - unsafe { self.address_to_hypercall(port, data_addr as usize) } - { + let data_addr = + GuestPhysAddr::new(self.vcpu.read_register(&Register::RAX)? & 0xFFFFFFFF); + if let Some(hypercall) = unsafe { + hypercall::address_to_hypercall(&self.parent_vm.mem, port, data_addr) + } { match hypercall { - Hypercall::Cmdsize(syssize) => self.cmdsize(syssize), - Hypercall::Cmdval(syscmdval) => self.cmdval(syscmdval), + Hypercall::Cmdsize(syssize) => { + syssize.update(self.parent_vm.kernel_path(), self.parent_vm.args()) + } + Hypercall::Cmdval(syscmdval) => { + copy_argv( + self.parent_vm.kernel_path().as_os_str(), + self.parent_vm.args(), + syscmdval, + &self.parent_vm.mem, + ); + copy_env(syscmdval, &self.parent_vm.mem); + } Hypercall::Exit(sysexit) => { - return Ok(VcpuStopReason::Exit(self.exit(sysexit))) + return Ok(VcpuStopReason::Exit(sysexit.arg)); + } + Hypercall::FileClose(sysclose) => hypercall::close(sysclose), + Hypercall::FileLseek(syslseek) => hypercall::lseek(syslseek), + Hypercall::FileOpen(sysopen) => { + hypercall::open(&self.parent_vm.mem, sysopen) + } + Hypercall::FileRead(sysread) => { + hypercall::read(&self.parent_vm.mem, sysread) } - Hypercall::FileClose(sysclose) => self.close(sysclose), - Hypercall::FileLseek(syslseek) => self.lseek(syslseek), - Hypercall::FileOpen(sysopen) => self.open(sysopen), - Hypercall::FileRead(sysread) => self.read(sysread), Hypercall::FileWrite(syswrite) => { - // Return an error for proper handling - self.write(syswrite).unwrap() + hypercall::write(&self.parent_vm.mem, syswrite).unwrap() } - Hypercall::FileUnlink(sysunlink) => self.unlink(sysunlink), - Hypercall::SerialWriteByte(_char) => { - // TODO Not sure why this call works different on macos... - let al = (self.vcpu.read_register(&Register::RAX)? & 0xFF) as u8; - self.uart(&[al]).unwrap(); + Hypercall::FileUnlink(sysunlink) => { + hypercall::unlink(&self.parent_vm.mem, sysunlink) } + Hypercall::SerialWriteByte(buf) => hypercall::uart(&[buf]).unwrap(), _ => panic!("Got unknown hypercall {:?}", hypercall), } self.vcpu.write_register(&Register::RIP, rip + len)?; @@ -952,7 +936,7 @@ impl VirtualCPU for UhyveCPU { } } -impl Drop for UhyveCPU { +impl Drop for XhyveCpu { fn drop(&mut self) { self.vcpu.destroy().unwrap(); } diff --git a/src/macos/xhyve.rs b/src/macos/xhyve.rs new file mode 100644 index 00000000..a15781c7 --- /dev/null +++ b/src/macos/xhyve.rs @@ -0,0 +1,12 @@ +use xhypervisor::{create_vm, map_mem, MemPerm}; + +use crate::{mem::MmapMemory, HypervisorResult}; + +pub fn initialize_xhyve(mem: &mut MmapMemory) -> HypervisorResult<()> { + debug!("Create VM..."); + create_vm()?; + + debug!("Map guest memory..."); + map_mem(unsafe { mem.as_slice_mut() }, 0, MemPerm::ExecAndWrite)?; + Ok(()) +} diff --git a/src/mem.rs b/src/mem.rs new file mode 100644 index 00000000..d5e0a5e8 --- /dev/null +++ b/src/mem.rs @@ -0,0 +1,209 @@ +use std::{mem::MaybeUninit, ops::Index, os::raw::c_void, ptr::NonNull}; + +use nix::sys::mman::*; +use thiserror::Error; +use uhyve_interface::GuestPhysAddr; + +#[derive(Error, Debug)] +pub enum MemoryError { + #[error("Memory bounds exceeded")] + BoundsViolation, + #[error("The desired guest location is not part of this memory")] + WrongMemoryError, +} + +/// A general purpose VM memory section that can exploit some Linux Kernel features. +#[derive(Debug)] +pub struct MmapMemory { + // TODO: make private + pub flags: u32, + pub memory_size: usize, + pub guest_address: GuestPhysAddr, + pub host_address: *mut u8, +} + +impl MmapMemory { + pub fn new( + flags: u32, + memory_size: usize, + guest_address: GuestPhysAddr, + huge_pages: bool, + mergeable: bool, + ) -> MmapMemory { + let host_address = unsafe { + mmap_anonymous( + None, + memory_size.try_into().unwrap(), + ProtFlags::PROT_READ | ProtFlags::PROT_WRITE, + MapFlags::MAP_PRIVATE | MapFlags::MAP_NORESERVE, + ) + .expect("mmap failed") + }; + + if mergeable { + #[cfg(target_os = "linux")] + { + debug!("Enable kernel feature to merge same pages"); + unsafe { + madvise(host_address, memory_size, MmapAdvise::MADV_MERGEABLE).unwrap(); + } + } + #[cfg(not(target_os = "linux"))] + { + error!("OS does not support same page merging"); + } + } + + if huge_pages { + #[cfg(target_os = "linux")] + { + debug!("Uhyve uses huge pages"); + unsafe { + madvise(host_address, memory_size, MmapAdvise::MADV_HUGEPAGE).unwrap(); + } + } + #[cfg(not(target_os = "linux"))] + { + error!("OS does not support huge pages"); + } + } + + MmapMemory { + flags, + memory_size, + guest_address, + host_address: host_address.as_ptr() as *mut u8, + } + } + + /// This can create multiple aliasing. During the lifetime of the returned slice, the memory must not be altered, dropped or simmilar. + #[allow(clippy::mut_from_ref)] + pub unsafe fn as_slice_mut(&self) -> &mut [u8] { + std::slice::from_raw_parts_mut(self.host_address, self.memory_size) + } + + /// Same as [`as_slice_mut`], but for `MaybeUninit`. Actually the memory is initialized, as Mmap zero initializes it, but some fns like [`hermit_entry::elf::load_kernel`] require [`MaybeUninit`]s. + #[allow(clippy::mut_from_ref)] + pub unsafe fn as_slice_uninit_mut(&self) -> &mut [MaybeUninit] { + std::slice::from_raw_parts_mut(self.host_address as *mut MaybeUninit, self.memory_size) + } + + /// Read a section of the memory. + /// + /// # Safety + /// + /// This is unsafe, as can create multiple aliasing. During the lifetime of + /// the returned slice, the memory must not be altered to prevent undfined + /// behaviour. + pub unsafe fn slice_at(&self, addr: GuestPhysAddr, len: usize) -> Result<&[u8], MemoryError> { + if addr.as_u64() as usize + len >= self.memory_size - self.guest_address.as_u64() as usize { + Err(MemoryError::BoundsViolation) + } else { + Ok(unsafe { std::slice::from_raw_parts(self.host_address(addr)?, len) }) + } + } + + /// Writeable access to a section of the memory. + /// + /// # Safety + /// + /// This is unsafe, as it can create multiple aliasing. During the lifetime of + /// the returned slice, the memory must not be altered to prevent undfined + /// behavior. + pub unsafe fn slice_at_mut( + &self, + addr: GuestPhysAddr, + len: usize, + ) -> Result<&mut [u8], MemoryError> { + if addr.as_u64() as usize + len >= self.memory_size - self.guest_address.as_u64() as usize { + Err(MemoryError::BoundsViolation) + } else { + Ok(unsafe { std::slice::from_raw_parts_mut(self.host_address(addr)? as *mut u8, len) }) + } + } + + /// Returns the host address of the given internal physical address in the + /// memory, if the address is valid. + pub fn host_address(&self, addr: GuestPhysAddr) -> Result<*const u8, MemoryError> { + if addr < self.guest_address + || addr.as_u64() as usize > self.guest_address.as_u64() as usize + self.memory_size + { + return Err(MemoryError::WrongMemoryError); + } + Ok( + // Safety: + // - The new ptr is checked to be within the mmap'd memory region above + // - to overflow an isize, the guest memory needs to be larger than 2^63 (which is rather unlikely anytime soon). + unsafe { self.host_address.add((addr - self.guest_address) as usize) as usize } + as *const u8, + ) + } + + /// Read the value in the memory at the given address + pub fn read(&self, addr: GuestPhysAddr) -> Result { + Ok(unsafe { self.host_address(addr)?.cast::().read_unaligned() }) + } + + /// Get a reference to the type at the given address in the memory. + pub unsafe fn get_ref(&self, addr: GuestPhysAddr) -> Result<&T, MemoryError> { + Ok(unsafe { &*(self.host_address(addr)? as *const T) }) + } + + /// Get a mutable reference to the type at the given address in the memory. + pub unsafe fn get_ref_mut(&self, addr: GuestPhysAddr) -> Result<&mut T, MemoryError> { + Ok(unsafe { &mut *(self.host_address(addr)? as *mut T) }) + } +} + +impl Drop for MmapMemory { + fn drop(&mut self) { + if self.memory_size > 0 { + let host_addr = NonNull::new(self.host_address as *mut c_void).unwrap(); + unsafe { + munmap(host_addr, self.memory_size).unwrap(); + } + } + } +} + +impl Index for MmapMemory { + type Output = u8; + + #[inline(always)] + fn index(&self, index: usize) -> &Self::Output { + assert!(index < self.memory_size); + + // Safety: + // - The new ptr is checked to be within the mmap'd memory region above + // - to overflow an isize, the guest memory needs to be larger than 2^63 (which is rather unlikely anytime soon). + unsafe { &*self.host_address.add(index) } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::consts::PAGE_SIZE; + + #[test] + fn test_mmap_memory_readwrite() { + let mem = MmapMemory::new(0, 40 * PAGE_SIZE, GuestPhysAddr::new(0x1000), true, true); + unsafe { + mem.as_slice_mut()[0xfe] = 0xaa; + mem.as_slice_mut()[0xff] = 0xbb; + mem.as_slice_mut()[0x100] = 0x78; + mem.as_slice_mut()[0x101] = 0x56; + mem.as_slice_mut()[0x102] = 0x34; + mem.as_slice_mut()[0x103] = 0x12; + } + assert_eq!( + mem.read::(GuestPhysAddr::new(0x1100)).unwrap(), + 0x12345678 + ); + // unaligned read + assert_eq!( + mem.read::(GuestPhysAddr::new(0x10fe)).unwrap(), + 0x12345678bbaa + ); + } +} diff --git a/src/paging.rs b/src/paging.rs new file mode 100644 index 00000000..a8d27925 --- /dev/null +++ b/src/paging.rs @@ -0,0 +1,8 @@ +//! General paging related code +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum PagetableError { + #[error("The accessed virtual address is not mapped")] + InvalidAddress, +} diff --git a/src/params.rs b/src/params.rs index 78a2dc8a..11d69f64 100644 --- a/src/params.rs +++ b/src/params.rs @@ -32,7 +32,6 @@ pub struct Params { pub pit: bool, /// GDB server port - #[cfg(target_os = "linux")] pub gdb_port: Option, /// Arguments to forward to the kernel @@ -52,7 +51,6 @@ impl Default for Params { #[cfg(target_os = "linux")] pit: false, cpu_count: Default::default(), - #[cfg(target_os = "linux")] gdb_port: Default::default(), kernel_args: Default::default(), } diff --git a/src/vcpu.rs b/src/vcpu.rs new file mode 100644 index 00000000..f0424ddc --- /dev/null +++ b/src/vcpu.rs @@ -0,0 +1,32 @@ +use std::sync::Arc; + +use crate::vm::UhyveVm; +/// The trait and fns that a virtual cpu requires +use crate::{os::DebugExitInfo, HypervisorResult}; + +/// Reasons for vCPU exits. +pub enum VcpuStopReason { + /// The vCPU stopped for debugging. + Debug(DebugExitInfo), + + /// The vCPU exited with the specified exit code. + Exit(i32), + + /// The vCPU got kicked. + Kick, +} + +/// Functionality a virtual CPU backend must provide to be used by uhyve +pub trait VirtualCPU: Sized { + /// Create a new CPU object + fn new(id: u32, vm: Arc>) -> HypervisorResult; + + /// Continues execution. + fn r#continue(&mut self) -> HypervisorResult; + + /// Start the execution of the CPU. The function will run until it crashes (`Err`) or terminate with an exit code (`Ok`). + fn run(&mut self) -> HypervisorResult>; + + /// Prints the VCPU's registers to stdout. + fn print_registers(&self); +} diff --git a/src/linux/virtio.rs b/src/virtio.rs similarity index 93% rename from src/linux/virtio.rs rename to src/virtio.rs index 54aa76a1..2280fbff 100644 --- a/src/linux/virtio.rs +++ b/src/virtio.rs @@ -3,9 +3,10 @@ use std::{fmt, mem::size_of, ptr::copy_nonoverlapping, sync::Mutex, vec::Vec}; use log::info; use mac_address::*; use tun_tap::*; +use uhyve_interface::GuestPhysAddr; use virtio_bindings::bindings::virtio_net::*; -use crate::{linux::virtqueue::*, vm::VirtualCPU}; +use crate::{mem::MmapMemory, virtqueue::*}; const STATUS_ACKNOWLEDGE: u8 = 0b00000001; const STATUS_DRIVER: u8 = 0b00000010; @@ -123,15 +124,15 @@ impl VirtioNetPciDevice { //TODO: how to read packets without synchronization issues } - pub fn handle_notify_output(&mut self, dest: &[u8], cpu: &impl VirtualCPU) { + pub fn handle_notify_output(&mut self, dest: &[u8], mem: &MmapMemory) { let tx_num = read_u16!(dest, 0); if tx_num == 1 && self.read_status_reg() & STATUS_DRIVER_OK == STATUS_DRIVER_OK { - self.send_available_packets(cpu); + self.send_available_packets(mem); } } // Sends packets using the tun_tap crate, subject to change - fn send_available_packets(&mut self, cpu: &impl VirtualCPU) { + fn send_available_packets(&mut self, mem: &MmapMemory) { let tx_queue = &mut self.virt_queues[TX_QUEUE]; let mut send_indices = Vec::new(); for index in tx_queue.avail_iter() { @@ -139,14 +140,14 @@ impl VirtioNetPciDevice { } for index in send_indices { let desc = unsafe { tx_queue.get_descriptor(index) }; - let gpa = unsafe { *(desc.addr as *const usize) }; - let hva = (*cpu).host_address(gpa) as *mut u8; + let gpa = GuestPhysAddr::new(unsafe { *(desc.addr as *const u64) }); + let hva = mem.host_address(gpa).unwrap(); match &self.iface { Some(tap) => unsafe { let vec = vec![0; (desc.len as usize) - size_of::()]; let slice: &[u8] = &vec; copy_nonoverlapping( - hva as *const u8, + hva, slice.as_ptr() as *mut u8, (desc.len as usize) - size_of::(), ); @@ -263,18 +264,18 @@ impl VirtioNetPciDevice { } // Register virtqueue - pub fn write_pfn(&mut self, dest: &[u8], vcpu: &impl VirtualCPU) { + pub fn write_pfn(&mut self, dest: &[u8], mem: &MmapMemory) { let status = self.read_status_reg(); if status & STATUS_FEATURES_OK != 0 && status & STATUS_DRIVER_OK == 0 && self.selected_queue_num as usize == self.virt_queues.len() { - let gpa = unsafe { + let gpa = GuestPhysAddr::new(unsafe { #[allow(clippy::cast_ptr_alignment)] - *(dest.as_ptr() as *const usize) - }; - let hva = (*vcpu).host_address(gpa) as *mut u8; - let queue = unsafe { Virtqueue::new(hva, QUEUE_LIMIT) }; + *(dest.as_ptr() as *const u64) + }); + let hva = mem.host_address(gpa).unwrap(); + let queue = unsafe { Virtqueue::new(hva as *mut u8, QUEUE_LIMIT) }; self.virt_queues.push(queue); } } diff --git a/src/linux/virtqueue.rs b/src/virtqueue.rs similarity index 100% rename from src/linux/virtqueue.rs rename to src/virtqueue.rs diff --git a/src/vm.rs b/src/vm.rs index df6362bb..8f0ba51a 100644 --- a/src/vm.rs +++ b/src/vm.rs @@ -1,6 +1,12 @@ use std::{ - ffi::OsString, fs, io, io::Write, mem, mem::MaybeUninit, num::NonZeroU32, - os::unix::ffi::OsStrExt, path::Path, slice, time::SystemTime, + ffi::OsString, + fmt, fs, io, + marker::PhantomData, + num::NonZeroU32, + path::PathBuf, + ptr, + sync::{Arc, Mutex}, + time::SystemTime, }; use hermit_entry::{ @@ -9,16 +15,16 @@ use hermit_entry::{ }; use log::{error, warn}; use thiserror::Error; -use uhyve_interface::{parameters::*, Hypercall, HypercallAddress, MAX_ARGC_ENVC}; #[cfg(target_arch = "x86_64")] use crate::arch::x86_64::{ detect_freq_from_cpuid, detect_freq_from_cpuid_hypervisor_info, get_cpu_frequency_from_os, }; +#[cfg(all(target_arch = "x86_64", target_os = "linux"))] +use crate::linux::x86_64::kvm_cpu::initialize_kvm; use crate::{ - arch, - consts::*, - os::{vcpu::UhyveCPU, DebugExitInfo, HypervisorError}, + arch, consts::*, mem::MmapMemory, os::HypervisorError, params::Params, vcpu::VirtualCPU, + virtio::*, }; pub type HypervisorResult = Result; @@ -35,317 +41,168 @@ pub enum LoadKernelError { pub type LoadKernelResult = Result; -/// Reasons for vCPU exits. -pub enum VcpuStopReason { - /// The vCPU stopped for debugging. - Debug(DebugExitInfo), - - /// The vCPU exited with the specified exit code. - Exit(i32), - - /// The vCPU got kicked. - Kick, -} - -pub trait VirtualCPU { - /// Initialize the cpu to start running the code ad entry_point. - fn init(&mut self, entry_point: u64, stack_address: u64, cpu_id: u32) -> HypervisorResult<()>; - - /// Continues execution. - fn r#continue(&mut self) -> HypervisorResult; - - /// Start the execution of the CPU. The function will run until it crashes (`Err`) or terminate with an exit code (`Ok`). - fn run(&mut self) -> HypervisorResult>; - - /// Prints the VCPU's registers to stdout. - fn print_registers(&self); - - /// Translates an address from the VM's physical space into the hosts virtual space. - fn host_address(&self, addr: usize) -> usize; - - /// Looks up the guests pagetable and translates a guest's virtual address to a guest's physical address. - fn virt_to_phys(&self, addr: usize) -> usize; - - /// Returns the (host) path of the kernel binary. - fn kernel_path(&self) -> &Path; - - fn args(&self) -> &[OsString]; - - /// `addr` is the address of the hypercall parameter in the guest's memory space. `data` is the - /// parameter that was send to that address by the guest. - /// - /// # Safety - /// - /// - `data` must be a valid pointer to the data attached to the hypercall. - /// - The return value is only valid, as long as the guest is halted. - /// - This fn must not be called multiple times on the same data, to avoid creating mutable aliasing. - unsafe fn address_to_hypercall(&self, addr: u16, data: usize) -> Option> { - if let Ok(hypercall_port) = HypercallAddress::try_from(addr) { - Some(match hypercall_port { - HypercallAddress::FileClose => { - let sysclose = unsafe { &mut *(self.host_address(data) as *mut CloseParams) }; - Hypercall::FileClose(sysclose) - } - HypercallAddress::FileLseek => { - let syslseek = unsafe { &mut *(self.host_address(data) as *mut LseekParams) }; - Hypercall::FileLseek(syslseek) - } - HypercallAddress::FileOpen => { - let sysopen = unsafe { &mut *(self.host_address(data) as *mut OpenParams) }; - Hypercall::FileOpen(sysopen) - } - HypercallAddress::FileRead => { - let sysread = unsafe { &mut *(self.host_address(data) as *mut ReadPrams) }; - Hypercall::FileRead(sysread) - } - HypercallAddress::FileWrite => { - let syswrite = unsafe { &*(self.host_address(data) as *const WriteParams) }; - Hypercall::FileWrite(syswrite) - } - HypercallAddress::FileUnlink => { - let sysunlink = unsafe { &mut *(self.host_address(data) as *mut UnlinkParams) }; - Hypercall::FileUnlink(sysunlink) - } - HypercallAddress::Exit => { - let sysexit = unsafe { &*(self.host_address(data) as *const ExitParams) }; - Hypercall::Exit(sysexit) - } - HypercallAddress::Cmdsize => { - let syssize = unsafe { &mut *(self.host_address(data) as *mut CmdsizeParams) }; - Hypercall::Cmdsize(syssize) - } - HypercallAddress::Cmdval => { - let syscmdval = unsafe { &*(self.host_address(data) as *const CmdvalParams) }; - Hypercall::Cmdval(syscmdval) - } - HypercallAddress::Uart => Hypercall::SerialWriteByte(data as u8), - _ => unimplemented!(), +// TODO: move to architecture specific section +fn detect_cpu_freq() -> u32 { + #[cfg(target_arch = "aarch64")] + let mhz: u32 = 0; + #[cfg(target_arch = "x86_64")] + let mhz = { + let cpuid = raw_cpuid::CpuId::new(); + let mhz: u32 = detect_freq_from_cpuid(&cpuid).unwrap_or_else(|_| { + debug!("Failed to detect from cpuid"); + detect_freq_from_cpuid_hypervisor_info(&cpuid).unwrap_or_else(|_| { + debug!("Failed to detect from hypervisor_info"); + get_cpu_frequency_from_os().unwrap_or(0) }) - } else { - None - } - } - - fn cmdsize(&self, syssize: &mut CmdsizeParams) { - syssize.argc = 0; - syssize.envc = 0; - - let path = self.kernel_path(); - syssize.argsz[0] = path.as_os_str().len() as i32 + 1; - - let mut counter = 0; - for argument in self.args() { - syssize.argsz[(counter + 1) as usize] = argument.len() as i32 + 1; - - counter += 1; - } - - syssize.argc = counter + 1; - - let mut counter = 0; - for (key, value) in std::env::vars_os() { - if counter < MAX_ARGC_ENVC.try_into().unwrap() { - syssize.envsz[counter as usize] = (key.len() + value.len()) as i32 + 2; - counter += 1; - } - } - syssize.envc = counter; + }); + debug!("detected a cpu frequency of {} Mhz", mhz); - if counter >= MAX_ARGC_ENVC.try_into().unwrap() { - warn!("Environment is too large!"); - } + mhz + }; + if mhz == 0 { + warn!("Unable to determine processor frequency"); } + mhz +} - /// Copies the arguments end environment of the application into the VM's memory. - fn cmdval(&self, syscmdval: &CmdvalParams) { - let argv = self.host_address(syscmdval.argv.as_u64() as usize); - - // copy kernel path as first argument - { - let path = self.kernel_path().as_os_str(); - - let argvptr = unsafe { self.host_address(*(argv as *mut *mut u8) as usize) }; - let len = path.len(); - let slice = unsafe { slice::from_raw_parts_mut(argvptr as *mut u8, len + 1) }; +#[cfg(target_os = "linux")] +pub type VcpuDefault = crate::linux::x86_64::kvm_cpu::KvmCpu; +#[cfg(target_os = "macos")] +pub type VcpuDefault = crate::macos::XhyveCpu; + +pub struct UhyveVm { + /// The starting position of the image in physical memory + offset: u64, + entry_point: u64, + stack_address: u64, + pub mem: Arc, + num_cpus: u32, + path: PathBuf, + args: Vec, + boot_info: *const RawBootInfo, + verbose: bool, + pub virtio_device: Arc>, + #[allow(dead_code)] // gdb is not supported on macos + pub(super) gdb_port: Option, + _vcpu_type: PhantomData, +} +impl UhyveVm { + pub fn new(kernel_path: PathBuf, params: Params) -> HypervisorResult> { + let memory_size = params.memory_size.get(); + + #[cfg(target_os = "linux")] + let mem = MmapMemory::new(0, memory_size, arch::RAM_START, params.thp, params.ksm); + #[cfg(not(target_os = "linux"))] + let mem = MmapMemory::new(0, memory_size, arch::RAM_START, false, false); + + // create virtio interface + // TODO: Remove allow once fixed: + // https://github.com/rust-lang/rust-clippy/issues/11382 + #[allow(clippy::arc_with_non_send_sync)] + let virtio_device = Arc::new(Mutex::new(VirtioNetPciDevice::new())); + + #[cfg(target_os = "linux")] + initialize_kvm(&mem, params.pit)?; + + let cpu_count = params.cpu_count.get(); + + assert!( + params.gdb_port.is_none() || cfg!(target_os = "linux"), + "gdb is only supported on linux (yet)" + ); + assert!( + params.gdb_port.is_none() || cpu_count == 1, + "gdbstub is only supported with one CPU" + ); + + let mut vm = Self { + offset: 0, + entry_point: 0, + stack_address: 0, + mem: mem.into(), + num_cpus: cpu_count, + path: kernel_path, + args: params.kernel_args, + boot_info: ptr::null(), + verbose: params.verbose, + virtio_device, + gdb_port: params.gdb_port, + _vcpu_type: PhantomData, + }; - // Create string for environment variable - slice[0..len].copy_from_slice(path.as_bytes()); - slice[len] = 0; - } + vm.init_guest_mem(); - // Copy the application arguments into the vm memory - for (counter, argument) in self.args().iter().enumerate() { - let argvptr = unsafe { - self.host_address( - *((argv + (counter + 1) * mem::size_of::()) as *mut *mut u8) as usize, - ) - }; - let len = argument.len(); - let slice = unsafe { slice::from_raw_parts_mut(argvptr as *mut u8, len + 1) }; - - // Create string for environment variable - slice[0..len].copy_from_slice(argument.as_bytes()); - slice[len] = 0; - } - - // Copy the environment variables into the vm memory - let mut counter = 0; - let envp = self.host_address(syscmdval.envp.as_u64() as usize); - for (key, value) in std::env::vars_os() { - if counter < MAX_ARGC_ENVC.try_into().unwrap() { - let envptr = unsafe { - self.host_address( - *((envp + counter as usize * mem::size_of::()) as *mut *mut u8) - as usize, - ) - }; - let len = key.len() + value.len(); - let slice = unsafe { slice::from_raw_parts_mut(envptr as *mut u8, len + 2) }; - - // Create string for environment variable - slice[0..key.len()].copy_from_slice(key.as_bytes()); - slice[key.len()..(key.len() + 1)].copy_from_slice("=".as_bytes()); - slice[(key.len() + 1)..(len + 1)].copy_from_slice(value.as_bytes()); - slice[len + 1] = 0; - counter += 1; - } - } + Ok(vm) } - /// unlink deletes a name from the filesystem. This is used to handle `unlink` syscalls from the guest. - /// TODO: UNSAFE AS *%@#. It has to be checked that the VM is allowed to unlink that file! - fn unlink(&self, sysunlink: &mut UnlinkParams) { - unsafe { - sysunlink.ret = - libc::unlink(self.host_address(sysunlink.name.as_u64() as usize) as *const i8); - } + fn verbose(&self) -> bool { + self.verbose } - /// Reads the exit code from an VM and returns it - fn exit(&self, sysexit: &ExitParams) -> i32 { - sysexit.arg + /// Returns the section offsets relative to their base addresses + pub fn get_offset(&self) -> u64 { + self.offset } - /// Handles an open syscall by opening a file on the host. - fn open(&self, sysopen: &mut OpenParams) { - unsafe { - sysopen.ret = libc::open( - self.host_address(sysopen.name.as_u64() as usize) as *const i8, - sysopen.flags, - sysopen.mode, - ); - } + pub fn get_entry_point(&self) -> u64 { + self.entry_point } - /// Handles an close syscall by closing the file on the host. - fn close(&self, sysclose: &mut CloseParams) { - unsafe { - sysclose.ret = libc::close(sysclose.fd); - } + pub fn stack_address(&self) -> u64 { + self.stack_address } - /// Handles an read syscall on the host. - fn read(&self, sysread: &mut ReadPrams) { - unsafe { - let buffer = self.virt_to_phys(sysread.buf.as_u64() as usize); - - let bytes_read = libc::read( - sysread.fd, - self.host_address(buffer) as *mut libc::c_void, - sysread.len, - ); - if bytes_read >= 0 { - sysread.ret = bytes_read; - } else { - sysread.ret = -1; - } - } + /// Returns the number of cores for the vm. + pub fn num_cpus(&self) -> u32 { + self.num_cpus } - /// Handles an write syscall on the host. - fn write(&self, syswrite: &WriteParams) -> io::Result<()> { - let mut bytes_written: usize = 0; - let buffer = self.virt_to_phys(syswrite.buf.as_u64() as usize); - - while bytes_written != syswrite.len { - unsafe { - let step = libc::write( - syswrite.fd, - self.host_address(buffer + bytes_written) as *const libc::c_void, - syswrite.len - bytes_written, - ); - if step >= 0 { - bytes_written += step as usize; - } else { - return Err(io::Error::last_os_error()); - } - } - } - - Ok(()) + pub fn kernel_path(&self) -> &PathBuf { + &self.path } - /// Handles an write syscall on the host. - fn lseek(&self, syslseek: &mut LseekParams) { - unsafe { - syslseek.offset = - libc::lseek(syslseek.fd, syslseek.offset as i64, syslseek.whence) as isize; - } + pub fn args(&self) -> &Vec { + &self.args } - /// Handles an UART syscall by writing to stdout. - fn uart(&self, buf: &[u8]) -> io::Result<()> { - io::stdout().write_all(buf) + /// Initialize the page tables for the guest + fn init_guest_mem(&mut self) { + debug!("Initialize guest memory"); + crate::arch::init_guest_mem( + unsafe { self.mem.as_slice_mut() } // slice only lives during this fn call + .try_into() + .expect("Guest memory is not large enough for pagetables"), + ); } -} -pub trait Vm { - /// Returns the number of cores for the vm. - fn num_cpus(&self) -> u32; - /// Returns a pointer to the address of the guest memory and the size of the memory in bytes. - fn guest_mem(&self) -> (*mut u8, usize); - #[doc(hidden)] - fn set_offset(&mut self, offset: u64); - /// Returns the section offsets relative to their base addresses - fn get_offset(&self) -> u64; - /// Sets the elf entry point. - fn set_entry_point(&mut self, entry: u64); - fn get_entry_point(&self) -> u64; - fn set_stack_address(&mut self, stack_addresss: u64); - fn stack_address(&self) -> u64; - fn kernel_path(&self) -> &Path; - fn create_cpu(&self, id: u32) -> HypervisorResult; - fn set_boot_info(&mut self, header: *const RawBootInfo); - fn verbose(&self) -> bool; - fn init_guest_mem(&self); - - unsafe fn load_kernel(&mut self) -> LoadKernelResult<()> { + pub fn load_kernel(&mut self) -> LoadKernelResult<()> { let elf = fs::read(self.kernel_path())?; let object = KernelObject::parse(&elf).map_err(LoadKernelError::ParseKernelError)?; // TODO: should be a random start address, if we have a relocatable executable - let start_address = object.start_addr().unwrap_or(0x400000); - self.set_offset(start_address); + let kernel_start_address = object.start_addr().unwrap_or(0x400000) as usize; + let kernel_end_address = kernel_start_address + object.mem_size(); + self.offset = kernel_start_address as u64; - let (vm_mem, vm_mem_len) = self.guest_mem(); - if start_address as usize + object.mem_size() > vm_mem_len { + if kernel_end_address > self.mem.memory_size - self.mem.guest_address.as_u64() as usize { return Err(LoadKernelError::InsufficientMemory); } - let vm_slice = { - let vm_slice = slice::from_raw_parts_mut(vm_mem as *mut MaybeUninit, vm_mem_len); - &mut vm_slice[start_address as usize..][..object.mem_size()] - }; - let LoadedKernel { load_info, entry_point, - } = object.load_kernel(vm_slice, start_address); - self.set_entry_point(entry_point); + } = object.load_kernel( + // Safety: Slice only lives during this fn call, so no aliasing happens + &mut unsafe { self.mem.as_slice_uninit_mut() } + [kernel_start_address..kernel_end_address], + kernel_start_address as u64, + ); + self.entry_point = entry_point; let boot_info = BootInfo { hardware_info: HardwareInfo { - phys_addr_range: arch::RAM_START..arch::RAM_START + vm_mem_len as u64, + phys_addr_range: arch::RAM_START.as_u64() + ..arch::RAM_START.as_u64() + self.mem.memory_size as u64, serial_port_base: self.verbose().then(|| { SerialPortBase::new((uhyve_interface::HypercallAddress::Uart as u16).into()) .unwrap() @@ -360,36 +217,40 @@ pub trait Vm { boot_time: SystemTime::now().into(), }, }; - let raw_boot_info_ptr = vm_mem.add(BOOT_INFO_ADDR as usize) as *mut RawBootInfo; - *raw_boot_info_ptr = RawBootInfo::from(boot_info); - self.set_boot_info(raw_boot_info_ptr); - self.set_stack_address(start_address.checked_sub(KERNEL_STACK_SIZE).expect( - "there should be enough space for the boot stack before the kernel start address", - )); + unsafe { + let raw_boot_info_ptr = + self.mem.host_address.add(BOOT_INFO_ADDR.as_u64() as usize) as *mut RawBootInfo; + *raw_boot_info_ptr = RawBootInfo::from(boot_info); + self.boot_info = raw_boot_info_ptr; + } + + self.stack_address = (kernel_start_address as u64) + .checked_sub(KERNEL_STACK_SIZE) + .expect( + "there should be enough space for the boot stack before the kernel start address", + ); Ok(()) } } -fn detect_cpu_freq() -> u32 { - #[cfg(target_arch = "aarch64")] - let mhz: u32 = 0; - #[cfg(target_arch = "x86_64")] - let mhz = { - let cpuid = raw_cpuid::CpuId::new(); - let mhz: u32 = detect_freq_from_cpuid(&cpuid).unwrap_or_else(|_| { - debug!("Failed to detect from cpuid"); - detect_freq_from_cpuid_hypervisor_info(&cpuid).unwrap_or_else(|_| { - debug!("Failed to detect from hypervisor_info"); - get_cpu_frequency_from_os().unwrap_or(0) - }) - }); - debug!("detected a cpu frequency of {} Mhz", mhz); - - mhz - }; - if mhz == 0 { - warn!("Unable to determine processor frequency"); +impl fmt::Debug for UhyveVm { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("UhyveVm") + .field("entry_point", &self.entry_point) + .field("stack_address", &self.stack_address) + .field("mem", &self.mem) + .field("num_cpus", &self.num_cpus) + .field("path", &self.path) + .field("boot_info", &self.boot_info) + .field("verbose", &self.verbose) + .field("virtio_device", &self.virtio_device) + .finish() } - mhz } + +// TODO: Investigate soundness +// https://github.com/hermitcore/uhyve/issues/229 +#[allow(clippy::non_send_fields_in_send_ty)] +unsafe impl Send for UhyveVm {} +unsafe impl Sync for UhyveVm {} diff --git a/tests/common.rs b/tests/common.rs index 173f1225..3a9e598e 100644 --- a/tests/common.rs +++ b/tests/common.rs @@ -5,7 +5,7 @@ use std::{ }; use byte_unit::{Byte, Unit}; -use uhyvelib::{params::Params, Uhyve}; +use uhyvelib::{params::Params, vm::UhyveVm}; /// Uses Cargo to build a kernel in the `tests/test-kernels` directory. /// Returns a path to the build binary. @@ -48,6 +48,6 @@ pub fn run_simple_vm(kernel_path: PathBuf) { .unwrap(), ..Default::default() }; - let code = Uhyve::new(kernel_path, params).unwrap().run(None); + let code = UhyveVm::new(kernel_path, params).unwrap().run(None); assert_eq!(0, code); } diff --git a/tests/gdb.rs b/tests/gdb.rs index 93c9fcd2..b3593f6d 100644 --- a/tests/gdb.rs +++ b/tests/gdb.rs @@ -12,7 +12,7 @@ use std::{ use assert_fs::{assert::PathAssert, fixture::PathChild, TempDir}; use common::build_hermit_bin; -use uhyvelib::{params::Params, Uhyve}; +use uhyvelib::{params::Params, vm::UhyveVm}; #[test] fn gdb() -> io::Result<()> { @@ -22,7 +22,7 @@ fn gdb() -> io::Result<()> { let bin_path_clone = bin_path.clone(); let vm = thread::spawn(move || { let bin_path = bin_path_clone; - let vm = Uhyve::new( + let vm = UhyveVm::new( bin_path, Params { verbose: true, diff --git a/uhyve-interface/Cargo.toml b/uhyve-interface/Cargo.toml index 9f82b93d..6e92d2b8 100644 --- a/uhyve-interface/Cargo.toml +++ b/uhyve-interface/Cargo.toml @@ -14,4 +14,13 @@ categories = ["os"] [dependencies] num_enum = { version = "0.7", default-features = false } +log = { version = "0.4", optional = true } + +[features] +std = ["dep:log"] + +[target.'cfg(target_arch = "x86_64")'.dependencies] x86_64 = { version = "0.15", default-features = false } + +[target.'cfg(target_arch = "aarch64")'.dependencies] +aarch64 = { version = "0.0.11", default-features = false } diff --git a/uhyve-interface/src/lib.rs b/uhyve-interface/src/lib.rs index 02c34c1a..cfd33277 100644 --- a/uhyve-interface/src/lib.rs +++ b/uhyve-interface/src/lib.rs @@ -7,13 +7,25 @@ //! that port is the physical memory address (of the VM) of the parameters of that hypercall. //! - On `aarch64` you write to the respective [`HypercallAddress`]. The 64-bit value written to that location is the guest's physical memory address of the hypercall's parameter. -#![no_std] +#![cfg_attr(not(feature = "std"), no_std)] // TODO: Throw this out, once https://github.com/rust-lang/rfcs/issues/2783 or https://github.com/rust-lang/rust/issues/86772 is resolved use num_enum::TryFromPrimitive; pub mod elf; pub mod parameters; + +#[cfg(target_arch = "aarch64")] +pub use ::aarch64::paging::PhysAddr as GuestPhysAddr; +#[cfg(target_arch = "aarch64")] +pub use ::aarch64::paging::VirtAddr as GuestVirtAddr; +#[cfg(target_arch = "x86_64")] +pub use ::x86_64::addr::PhysAddr as GuestPhysAddr; +#[cfg(target_arch = "x86_64")] +pub use ::x86_64::addr::VirtAddr as GuestVirtAddr; + +#[cfg(not(target_pointer_width = "64"))] +compile_error!("Using uhyve-interface on a non-64-bit system is not (yet?) supported"); use parameters::*; /// The version of the uhyve interface. Note: This is not the same as the semver of the crate but diff --git a/uhyve-interface/src/parameters.rs b/uhyve-interface/src/parameters.rs index 888c53cb..2f3887fe 100644 --- a/uhyve-interface/src/parameters.rs +++ b/uhyve-interface/src/parameters.rs @@ -1,8 +1,8 @@ //! Parameters for hypercalls. -use x86_64::PhysAddr; +use std::path::Path; -use crate::MAX_ARGC_ENVC; +use crate::{GuestPhysAddr, GuestVirtAddr, MAX_ARGC_ENVC}; /// Parameters for a [`Cmdsize`](crate::Hypercall::Cmdsize) hypercall which provides the lengths of the items in the argument end environment vector. #[repr(C, packed)] @@ -17,15 +17,46 @@ pub struct CmdsizeParams { /// Length of the items in the environment. pub envsz: [i32; MAX_ARGC_ENVC], } +impl CmdsizeParams { + #[cfg(feature = "std")] + /// Update the struct with the lengths of the given command. + /// - `path` is usually the path and name of the application. E.g., "/home/hermit/app" + /// - `args` is a list of strings that form the parameters. (E.g., `["-v", "myarg"]`) + /// + /// Note that this hypercall only transfers the sizes. It usually has to be followed up with the [`Cmdval` Hypercall](crate::Hypercall::Cmdval). + pub fn update(&mut self, path: &Path, args: &[std::ffi::OsString]) { + self.argc = 0; + + self.argsz[0] = path.as_os_str().len() as i32 + 1; + + self.argc += 1; + for argument in args { + self.argsz[(self.argc) as usize] = argument.len() as i32 + 1; + + self.argc += 1; + } + + self.envc = 0; + // let mut counter = 0; + for (key, value) in std::env::vars_os() { + if self.envc < MAX_ARGC_ENVC.try_into().unwrap() { + self.envsz[self.envc as usize] = (key.len() + value.len()) as i32 + 2; + self.envc += 1; + } else { + log::warn!("Environment is too large! {key:?}={value:?} will not be passed!"); + } + } + } +} /// Parameters for a [`Cmdval`](crate::Hypercall::Cmdval) hypercall, which copies the arguments end environment of the application into the VM's memory. #[repr(C, packed)] #[derive(Debug, Copy, Clone)] pub struct CmdvalParams { - /// Pointer to a memory section in the VM memory large enough to store the argument string. - pub argv: PhysAddr, - /// Pointer to a memory section in the VM memory large enough to store the environment values. - pub envp: PhysAddr, + /// Pointer to a memory section in the VM memory which holds addresses for the destinations of the individual arguments + pub argv: GuestPhysAddr, + /// Pointer to a memory section in the VM memory which holds addresses for the destinations of the individual environment variables + pub envp: GuestPhysAddr, } /// Parameters for a [`Exit`](crate::Hypercall::Exit) hypercall. @@ -41,7 +72,7 @@ pub struct ExitParams { #[derive(Debug, Copy, Clone)] pub struct UnlinkParams { /// Address of the file that should be unlinked. - pub name: PhysAddr, + pub name: GuestPhysAddr, /// On success, `0` is returned. On error, `-1` is returned. pub ret: i32, } @@ -53,7 +84,7 @@ pub struct WriteParams { /// File descriptor of the file. pub fd: i32, /// Buffer to be written into the file. - pub buf: PhysAddr, + pub buf: GuestVirtAddr, /// Number of bytes in the buffer to be written. pub len: usize, } @@ -65,7 +96,7 @@ pub struct ReadPrams { /// File descriptor of the file. pub fd: i32, /// Buffer to read the file into. - pub buf: PhysAddr, + pub buf: GuestVirtAddr, /// Number of bytes to read into the buffer. pub len: usize, /// Number of bytes read on success. `-1` on failure. @@ -87,7 +118,7 @@ pub struct CloseParams { #[derive(Debug, Copy, Clone)] pub struct OpenParams { /// Pathname of the file to be opened. - pub name: PhysAddr, + pub name: GuestPhysAddr, /// Posix file access mode flags. pub flags: i32, /// Access permissions upon opening/creating a file.