summary refs log tree commit diff
path: root/aarch64/src/lib.rs
blob: 20ce8f8595656ff6b6a0c0154e92c4096d685c70 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
// Copyright 2018 The Chromium OS Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

use std::collections::BTreeMap;
use std::error::Error as StdError;
use std::ffi::{CStr, CString};
use std::fmt::{self, Display};
use std::fs::File;
use std::io;
use std::os::unix::io::FromRawFd;
use std::sync::Arc;

use arch::{
    get_serial_cmdline, GetSerialCmdlineError, RunnableLinuxVm, SerialHardware, SerialParameters,
    VmComponents, VmImage,
};
use devices::{Bus, BusError, MemoryParams, PciAddress, PciConfigMmio, PciDevice, PciInterruptPin};
use io_jail::Minijail;
use msg_socket::MsgOnSocket;
use remain::sorted;
use resources::SystemAllocator;
use sync::Mutex;
use sys_util::{EventFd, GuestAddress, GuestMemory, GuestMemoryError};
use vm_control::VmIrqRequestSocket;

use kvm::*;
use kvm_sys::kvm_device_attr;

mod fdt;

// We place the kernel at offset 8MB
const AARCH64_KERNEL_OFFSET: u64 = 0x80000;
const AARCH64_FDT_MAX_SIZE: u64 = 0x200000;
const AARCH64_INITRD_ALIGN: u64 = 0x1000000;

// These constants indicate the address space used by the ARM vGIC.
const AARCH64_GIC_DIST_SIZE: u64 = 0x10000;
const AARCH64_GIC_CPUI_SIZE: u64 = 0x20000;

// This indicates the start of DRAM inside the physical address space.
const AARCH64_PHYS_MEM_START: u64 = 0x80000000;
const AARCH64_AXI_BASE: u64 = 0x40000000;

// These constants indicate the placement of the GIC registers in the physical
// address space.
const AARCH64_GIC_DIST_BASE: u64 = AARCH64_AXI_BASE - AARCH64_GIC_DIST_SIZE;
const AARCH64_GIC_CPUI_BASE: u64 = AARCH64_GIC_DIST_BASE - AARCH64_GIC_CPUI_SIZE;
const AARCH64_GIC_REDIST_SIZE: u64 = 0x20000;

// This is the minimum number of SPI interrupts aligned to 32 + 32 for the
// PPI (16) and GSI (16).
const AARCH64_GIC_NR_IRQS: u32 = 64;

// PSR (Processor State Register) bits
const PSR_MODE_EL1H: u64 = 0x00000005;
const PSR_F_BIT: u64 = 0x00000040;
const PSR_I_BIT: u64 = 0x00000080;
const PSR_A_BIT: u64 = 0x00000100;
const PSR_D_BIT: u64 = 0x00000200;

macro_rules! offset__of {
    ($str:ty, $($field:ident).+ $([$idx:expr])*) => {
        unsafe { &(*(0 as *const $str))$(.$field)*  $([$idx])* as *const _ as usize }
    }
}

const KVM_REG_ARM64: u64 = 0x6000000000000000;
const KVM_REG_SIZE_U64: u64 = 0x0030000000000000;
const KVM_REG_ARM_COPROC_SHIFT: u64 = 16;
const KVM_REG_ARM_CORE: u64 = 0x0010 << KVM_REG_ARM_COPROC_SHIFT;

macro_rules! arm64_core_reg {
    ($reg: tt) => {
        KVM_REG_ARM64
            | KVM_REG_SIZE_U64
            | KVM_REG_ARM_CORE
            | ((offset__of!(kvm_sys::user_pt_regs, $reg) / 4) as u64)
    };
}

fn get_kernel_addr() -> GuestAddress {
    GuestAddress(AARCH64_PHYS_MEM_START + AARCH64_KERNEL_OFFSET)
}

// Serial device requires 8 bytes of registers;
const AARCH64_SERIAL_SIZE: u64 = 0x8;
// This was the speed kvmtool used, not sure if it matters.
const AARCH64_SERIAL_SPEED: u32 = 1843200;
// The serial device gets the first interrupt line
// Which gets mapped to the first SPI interrupt (physical 32).
const AARCH64_SERIAL_1_3_IRQ: u32 = 0;
const AARCH64_SERIAL_2_4_IRQ: u32 = 2;

// Place the RTC device at page 2
const AARCH64_RTC_ADDR: u64 = 0x2000;
// The RTC device gets one 4k page
const AARCH64_RTC_SIZE: u64 = 0x1000;
// The RTC device gets the second interrupt line
const AARCH64_RTC_IRQ: u32 = 1;

// PCI MMIO configuration region base address.
const AARCH64_PCI_CFG_BASE: u64 = 0x10000;
// PCI MMIO configuration region size.
const AARCH64_PCI_CFG_SIZE: u64 = 0x1000000;
// This is the base address of MMIO devices.
const AARCH64_MMIO_BASE: u64 = 0x1010000;
// Size of the whole MMIO region.
const AARCH64_MMIO_SIZE: u64 = 0x100000;
// Virtio devices start at SPI interrupt number 3
const AARCH64_IRQ_BASE: u32 = 3;

// PMU PPI interrupt, same as qemu
const AARCH64_PMU_IRQ: u32 = 7;

#[sorted]
#[derive(Debug)]
pub enum Error {
    CloneEventFd(sys_util::Error),
    Cmdline(kernel_cmdline::Error),
    CreateDevices(Box<dyn StdError>),
    CreateEventFd(sys_util::Error),
    CreateFdt(arch::fdt::Error),
    CreateGICFailure(sys_util::Error),
    CreateKvm(sys_util::Error),
    CreatePciRoot(arch::DeviceRegistrationError),
    CreateSerialDevices(arch::DeviceRegistrationError),
    CreateSocket(io::Error),
    CreateVcpu(sys_util::Error),
    CreateVm(sys_util::Error),
    GetSerialCmdline(GetSerialCmdlineError),
    InitrdLoadFailure(arch::LoadImageError),
    JailServers(servers::ProxyError),
    KernelLoadFailure(arch::LoadImageError),
    KernelMissing,
    ReadPreferredTarget(sys_util::Error),
    RegisterIrqfd(sys_util::Error),
    RegisterPci(BusError),
    RegisterVsock(arch::DeviceRegistrationError),
    SetDeviceAttr(sys_util::Error),
    SetReg(sys_util::Error),
    SetupGuestMemory(GuestMemoryError),
    VcpuInit(sys_util::Error),
}

impl Display for Error {
    #[remain::check]
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        use self::Error::*;

        #[sorted]
        match self {
            CloneEventFd(e) => write!(f, "unable to clone an EventFd: {}", e),
            Cmdline(e) => write!(f, "the given kernel command line was invalid: {}", e),
            CreateDevices(e) => write!(f, "error creating devices: {}", e),
            CreateEventFd(e) => write!(f, "unable to make an EventFd: {}", e),
            CreateFdt(e) => write!(f, "FDT could not be created: {}", e),
            CreateGICFailure(e) => write!(f, "failed to create GIC: {}", e),
            CreateKvm(e) => write!(f, "failed to open /dev/kvm: {}", e),
            CreatePciRoot(e) => write!(f, "failed to create a PCI root hub: {}", e),
            CreateSerialDevices(e) => write!(f, "unable to create serial devices: {}", e),
            CreateSocket(e) => write!(f, "failed to create socket: {}", e),
            CreateVcpu(e) => write!(f, "failed to create VCPU: {}", e),
            CreateVm(e) => write!(f, "failed to create vm: {}", e),
            GetSerialCmdline(e) => write!(f, "failed to get serial cmdline: {}", e),
            InitrdLoadFailure(e) => write!(f, "initrd cound not be loaded: {}", e),
            JailServers(e) => write!(f, "failed to jail servers: {}", e),
            KernelLoadFailure(e) => write!(f, "kernel cound not be loaded: {}", e),
            KernelMissing => write!(f, "aarch64 requires a kernel"),
            ReadPreferredTarget(e) => write!(f, "failed to read preferred target: {}", e),
            RegisterIrqfd(e) => write!(f, "failed to register irq fd: {}", e),
            RegisterPci(e) => write!(f, "error registering PCI bus: {}", e),
            RegisterVsock(e) => write!(f, "error registering virtual socket device: {}", e),
            SetDeviceAttr(e) => write!(f, "failed to set device attr: {}", e),
            SetReg(e) => write!(f, "failed to set register: {}", e),
            SetupGuestMemory(e) => write!(f, "failed to set up guest memory: {}", e),
            VcpuInit(e) => write!(f, "failed to initialize VCPU: {}", e),
        }
    }
}

pub type Result<T> = std::result::Result<T, Error>;

impl std::error::Error for Error {}

/// Returns a Vec of the valid memory addresses.
/// These should be used to configure the GuestMemory structure for the platfrom.
pub fn arch_memory_regions(params: MemoryParams) -> Vec<(GuestAddress, u64)> {
    vec![(GuestAddress(AARCH64_PHYS_MEM_START), params.size)]
}

fn fdt_offset(mem_size: u64) -> u64 {
    // Put fdt up near the top of memory
    // TODO(sonnyrao): will have to handle this differently if there's
    // > 4GB memory
    mem_size - AARCH64_FDT_MAX_SIZE - 0x10000
}

pub struct AArch64;

impl arch::LinuxArch for AArch64 {
    type Error = Error;
    type MemoryParams = MemoryParams;

    fn build_vm<F, E>(
        mut components: VmComponents,
        _split_irqchip: bool,
        _ioapic_device_socket: VmIrqRequestSocket,
        serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>,
        serial_jail: Option<Minijail>,
        servers: Vec<ServerStub>,
        create_devices: F,
    ) -> Result<RunnableLinuxVm>
    where
        F: FnOnce(
            &GuestMemory,
            MemoryParams,
            &mut Vm,
            &mut SystemAllocator,
            &EventFd,
        ) -> std::result::Result<Vec<(Box<dyn PciDevice>, Option<Minijail>)>, E>,
        E: StdError + 'static,
    {
        let mut resources =
            Self::get_resource_allocator(components.memory_size, components.wayland_dmabuf);
        let mem_params = components.memory_params();
        let mem = Self::setup_memory(mem_params)?;
        let kvm = Kvm::new().map_err(Error::CreateKvm)?;
        let mut vm = Vm::new(&kvm, mem.clone()).map_err(Error::CreateVm)?;

        let vcpu_count = components.vcpu_count;
        let mut vcpus = Vec::with_capacity(vcpu_count as usize);
        for cpu_id in 0..vcpu_count {
            let vcpu = Vcpu::new(cpu_id as libc::c_ulong, &kvm, &vm).map_err(Error::CreateVcpu)?;
            Self::configure_vcpu(
                vm.get_memory(),
                &kvm,
                &vm,
                &vcpu,
                cpu_id as u64,
                vcpu_count as u64,
            )?;
            vcpus.push(vcpu);
        }

        let vcpu_affinity = components.vcpu_affinity;

        let (irq_chip, is_gicv3) = Self::create_irq_chip(&vm, vcpu_count as u64)?;

        let mut use_pmu = true;
        for vcpu in &vcpus {
            use_pmu &= vcpu.arm_pmu_init(AARCH64_PMU_IRQ as u64 + 16).is_ok();
        }

        let mut mmio_bus = devices::Bus::new();

        let exit_evt = EventFd::new().map_err(Error::CreateEventFd)?;

        // Event used by PMDevice to notify crosvm that
        // guest OS is trying to suspend.
        let suspend_evt = EventFd::new().map_err(Error::CreateEventFd)?;

        let servers = jail_servers(servers).map_err(Error::JailServers)?;

        let pci_devices = create_devices(&mem, mem_params, &mut vm, &mut resources, &exit_evt)
            .map_err(|e| Error::CreateDevices(Box::new(e)))?;
        let (pci, pci_irqs, pid_debug_label_map) = arch::generate_pci_root(
            pci_devices,
            &mut None,
            &mut mmio_bus,
            &mut resources,
            &mut vm,
        )
        .map_err(Error::CreatePciRoot)?;
        let pci_bus = Arc::new(Mutex::new(PciConfigMmio::new(pci)));

        // ARM doesn't really use the io bus like x86, so just create an empty bus.
        let io_bus = devices::Bus::new();

        Self::add_arch_devs(&mut vm, &mut mmio_bus)?;

        let com_evt_1_3 = EventFd::new().map_err(Error::CreateEventFd)?;
        let com_evt_2_4 = EventFd::new().map_err(Error::CreateEventFd)?;
        arch::add_serial_devices(
            &mut mmio_bus,
            &com_evt_1_3,
            &com_evt_2_4,
            serial_parameters,
            serial_jail,
        )
        .map_err(Error::CreateSerialDevices)?;

        vm.register_irqfd(&com_evt_1_3, AARCH64_SERIAL_1_3_IRQ)
            .map_err(Error::RegisterIrqfd)?;
        vm.register_irqfd(&com_evt_2_4, AARCH64_SERIAL_2_4_IRQ)
            .map_err(Error::RegisterIrqfd)?;

        mmio_bus
            .insert(
                pci_bus.clone(),
                AARCH64_PCI_CFG_BASE,
                AARCH64_PCI_CFG_SIZE,
                false,
            )
            .map_err(Error::RegisterPci)?;

        let mut cmdline = Self::get_base_linux_cmdline();
        get_serial_cmdline(&mut cmdline, serial_parameters, "mmio")
            .map_err(Error::GetSerialCmdline)?;
        for param in components.extra_kernel_params {
            cmdline.insert_str(&param).map_err(Error::Cmdline)?;
        }

        let kernel_image = if let VmImage::Kernel(ref mut img) = components.vm_image {
            img
        } else {
            return Err(Error::KernelMissing);
        };

        // separate out kernel loading from other setup to get a specific error for
        // kernel loading
        let kernel_size = arch::load_image(&mem, kernel_image, get_kernel_addr(), u64::max_value())
            .map_err(Error::KernelLoadFailure)?;
        let kernel_end = get_kernel_addr().offset() + kernel_size as u64;
        Self::setup_system_memory(
            &mem,
            components.memory_size,
            vcpu_count,
            &CString::new(cmdline).unwrap(),
            components.initrd_image,
            pci_irqs,
            components.android_fstab,
            kernel_end,
            is_gicv3,
            use_pmu,
        )?;

        Ok(RunnableLinuxVm {
            vm,
            kvm,
            resources,
            exit_evt,
            vcpus,
            vcpu_affinity,
            irq_chip,
            split_irqchip: None,
            gsi_relay: None,
            servers,
            io_bus,
            mmio_bus,
            pid_debug_label_map,
            suspend_evt,
        })
    }

    fn setup_memory(params: MemoryParams) -> Result<GuestMemory> {
        let arch_mem_regions = arch_memory_regions(params);
        let mem = GuestMemory::new(&arch_mem_regions).map_err(Error::SetupGuestMemory)?;
        Ok(mem)
    }
}

impl AArch64 {
    fn setup_system_memory(
        mem: &GuestMemory,
        mem_size: u64,
        vcpu_count: u32,
        cmdline: &CStr,
        initrd_file: Option<File>,
        pci_irqs: Vec<(PciAddress, u32, PciInterruptPin)>,
        android_fstab: Option<File>,
        kernel_end: u64,
        is_gicv3: bool,
        use_pmu: bool,
    ) -> Result<()> {
        let initrd = match initrd_file {
            Some(initrd_file) => {
                let mut initrd_file = initrd_file;
                let initrd_addr =
                    (kernel_end + (AARCH64_INITRD_ALIGN - 1)) & !(AARCH64_INITRD_ALIGN - 1);
                let initrd_max_size = mem_size - (initrd_addr - AARCH64_PHYS_MEM_START);
                let initrd_addr = GuestAddress(initrd_addr);
                let initrd_size =
                    arch::load_image(mem, &mut initrd_file, initrd_addr, initrd_max_size)
                        .map_err(Error::InitrdLoadFailure)?;
                Some((initrd_addr, initrd_size))
            }
            None => None,
        };
        let (pci_device_base, pci_device_size) = Self::get_high_mmio_base_size(mem_size);
        fdt::create_fdt(
            AARCH64_FDT_MAX_SIZE as usize,
            mem,
            pci_irqs,
            vcpu_count,
            fdt_offset(mem_size),
            pci_device_base,
            pci_device_size,
            cmdline,
            initrd,
            android_fstab,
            is_gicv3,
            use_pmu,
        )
        .map_err(Error::CreateFdt)?;
        Ok(())
    }

    fn get_high_mmio_base_size(mem_size: u64) -> (u64, u64) {
        let base = AARCH64_PHYS_MEM_START + mem_size;
        let size = u64::max_value() - base;
        (base, size)
    }

    /// This returns a base part of the kernel command for this architecture
    fn get_base_linux_cmdline() -> kernel_cmdline::Cmdline {
        let mut cmdline = kernel_cmdline::Cmdline::new(sys_util::pagesize());
        cmdline.insert_str("panic=-1").unwrap();
        cmdline
    }

    /// Returns a system resource allocator.
    fn get_resource_allocator(mem_size: u64, gpu_allocation: bool) -> SystemAllocator {
        let (high_mmio_base, high_mmio_size) = Self::get_high_mmio_base_size(mem_size);
        SystemAllocator::builder()
            .add_high_mmio_addresses(high_mmio_base, high_mmio_size)
            .add_low_mmio_addresses(AARCH64_MMIO_BASE, AARCH64_MMIO_SIZE)
            .create_allocator(AARCH64_IRQ_BASE, gpu_allocation)
            .unwrap()
    }

    /// This adds any early platform devices for this architecture.
    ///
    /// # Arguments
    ///
    /// * `vm` - The vm to add irqs to.
    /// * `bus` - The bus to add devices to.
    fn add_arch_devs(vm: &mut Vm, bus: &mut Bus) -> Result<()> {
        let rtc_evt = EventFd::new().map_err(Error::CreateEventFd)?;
        vm.register_irqfd(&rtc_evt, AARCH64_RTC_IRQ)
            .map_err(Error::RegisterIrqfd)?;

        let rtc = Arc::new(Mutex::new(devices::pl030::Pl030::new(rtc_evt)));
        bus.insert(rtc, AARCH64_RTC_ADDR, AARCH64_RTC_SIZE, false)
            .expect("failed to add rtc device");

        Ok(())
    }

    /// The creates the interrupt controller device and optionally returns the fd for it.
    /// Some architectures may not have a separate descriptor for the interrupt
    /// controller, so they would return None even on success.
    ///
    /// # Arguments
    ///
    /// * `vm` - the vm object
    /// * `vcpu_count` - the number of vCPUs
    fn create_irq_chip(vm: &Vm, vcpu_count: u64) -> Result<(Option<File>, bool)> {
        let cpu_if_addr: u64 = AARCH64_GIC_CPUI_BASE;
        let dist_if_addr: u64 = AARCH64_GIC_DIST_BASE;
        let redist_addr: u64 = dist_if_addr - (AARCH64_GIC_REDIST_SIZE * vcpu_count);
        let raw_cpu_if_addr = &cpu_if_addr as *const u64;
        let raw_dist_if_addr = &dist_if_addr as *const u64;
        let raw_redist_addr = &redist_addr as *const u64;

        let cpu_if_attr = kvm_device_attr {
            group: kvm_sys::KVM_DEV_ARM_VGIC_GRP_ADDR,
            attr: kvm_sys::KVM_VGIC_V2_ADDR_TYPE_CPU as u64,
            addr: raw_cpu_if_addr as u64,
            flags: 0,
        };
        let redist_attr = kvm_device_attr {
            group: kvm_sys::KVM_DEV_ARM_VGIC_GRP_ADDR,
            attr: kvm_sys::KVM_VGIC_V3_ADDR_TYPE_REDIST as u64,
            addr: raw_redist_addr as u64,
            flags: 0,
        };
        let mut dist_attr = kvm_device_attr {
            group: kvm_sys::KVM_DEV_ARM_VGIC_GRP_ADDR,
            addr: raw_dist_if_addr as u64,
            attr: 0,
            flags: 0,
        };

        let mut kcd = kvm_sys::kvm_create_device {
            type_: kvm_sys::kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V3,
            fd: 0,
            flags: 0,
        };

        let mut cpu_redist_attr = redist_attr;
        let mut is_gicv3 = true;
        dist_attr.attr = kvm_sys::KVM_VGIC_V3_ADDR_TYPE_DIST as u64;
        if vm.create_device(&mut kcd).is_err() {
            is_gicv3 = false;
            cpu_redist_attr = cpu_if_attr;
            kcd.type_ = kvm_sys::kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V2;
            dist_attr.attr = kvm_sys::KVM_VGIC_V2_ADDR_TYPE_DIST as u64;
            vm.create_device(&mut kcd)
                .map_err(|e| Error::CreateGICFailure(e))?;
        }

        let is_gicv3 = is_gicv3;
        let cpu_redist_attr = cpu_redist_attr;
        let dist_attr = dist_attr;

        // Safe because the kernel is passing us an FD back inside
        // the struct after we successfully did the create_device ioctl
        let vgic_fd = unsafe { File::from_raw_fd(kcd.fd as i32) };

        // Safe because we allocated the struct that's being passed in
        let ret = unsafe {
            sys_util::ioctl_with_ref(&vgic_fd, kvm_sys::KVM_SET_DEVICE_ATTR(), &cpu_redist_attr)
        };
        if ret != 0 {
            return Err(Error::CreateGICFailure(sys_util::Error::new(ret)));
        }

        // Safe because we allocated the struct that's being passed in
        let ret = unsafe {
            sys_util::ioctl_with_ref(&vgic_fd, kvm_sys::KVM_SET_DEVICE_ATTR(), &dist_attr)
        };
        if ret != 0 {
            return Err(Error::CreateGICFailure(sys_util::Error::new(ret)));
        }

        // We need to tell the kernel how many irqs to support with this vgic
        let nr_irqs: u32 = AARCH64_GIC_NR_IRQS;
        let nr_irqs_ptr = &nr_irqs as *const u32;
        let nr_irqs_attr = kvm_device_attr {
            group: kvm_sys::KVM_DEV_ARM_VGIC_GRP_NR_IRQS,
            attr: 0,
            addr: nr_irqs_ptr as u64,
            flags: 0,
        };
        // Safe because we allocated the struct that's being passed in
        let ret = unsafe {
            sys_util::ioctl_with_ref(&vgic_fd, kvm_sys::KVM_SET_DEVICE_ATTR(), &nr_irqs_attr)
        };
        if ret != 0 {
            return Err(Error::CreateGICFailure(sys_util::Error::new(ret)));
        }

        // Finalize the GIC
        let init_gic_attr = kvm_device_attr {
            group: kvm_sys::KVM_DEV_ARM_VGIC_GRP_CTRL,
            attr: kvm_sys::KVM_DEV_ARM_VGIC_CTRL_INIT as u64,
            addr: 0,
            flags: 0,
        };

        // Safe because we allocated the struct that's being passed in
        let ret = unsafe {
            sys_util::ioctl_with_ref(&vgic_fd, kvm_sys::KVM_SET_DEVICE_ATTR(), &init_gic_attr)
        };
        if ret != 0 {
            return Err(Error::SetDeviceAttr(sys_util::Error::new(ret)));
        }
        Ok((Some(vgic_fd), is_gicv3))
    }

    fn configure_vcpu(
        guest_mem: &GuestMemory,
        kvm: &Kvm,
        vm: &Vm,
        vcpu: &Vcpu,
        cpu_id: u64,
        _num_cpus: u64,
    ) -> Result<()> {
        let mut kvi = kvm_sys::kvm_vcpu_init {
            target: kvm_sys::KVM_ARM_TARGET_GENERIC_V8,
            features: [0; 7],
        };

        // This reads back the kernel's preferred target type.
        vm.arm_preferred_target(&mut kvi)
            .map_err(Error::ReadPreferredTarget)?;

        kvi.features[0] |= 1 << kvm_sys::KVM_ARM_VCPU_PSCI_0_2;
        if kvm.check_extension(Cap::ArmPmuV3) {
            kvi.features[0] |= 1 << kvm_sys::KVM_ARM_VCPU_PMU_V3;
        }

        // Non-boot cpus are powered off initially
        if cpu_id > 0 {
            kvi.features[0] |= 1 << kvm_sys::KVM_ARM_VCPU_POWER_OFF;
        }
        vcpu.arm_vcpu_init(&kvi).map_err(Error::VcpuInit)?;

        // set up registers
        let mut data: u64;
        let mut reg_id: u64;

        // All interrupts masked
        data = PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | PSR_MODE_EL1H;
        reg_id = arm64_core_reg!(pstate);
        vcpu.set_one_reg(reg_id, data).map_err(Error::SetReg)?;

        // Other cpus are powered off initially
        if cpu_id == 0 {
            data = AARCH64_PHYS_MEM_START + AARCH64_KERNEL_OFFSET;
            reg_id = arm64_core_reg!(pc);
            vcpu.set_one_reg(reg_id, data).map_err(Error::SetReg)?;

            /* X0 -- fdt address */
            let mem_size = guest_mem.memory_size();
            data = (AARCH64_PHYS_MEM_START + fdt_offset(mem_size)) as u64;
            // hack -- can't get this to do offsetof(regs[0]) but luckily it's at offset 0
            reg_id = arm64_core_reg!(regs);
            vcpu.set_one_reg(reg_id, data).map_err(Error::SetReg)?;
        }
        Ok(())
    }
}