summary refs log blame commit diff
path: root/devices/src/virtio/wl.rs
blob: 1ec6873e7f2511caac49881a13bccd9d130631bc (plain) (tree)


































                                                                                                    
                                          

                  

                                          

                                        
                                        

                                 





                                                   
                
                        
 
                             
                               
 


                                    
                                                                                    
                                                                                                 
 


                             
                                                                                     

                                                                                       
                                         




                                           

                                            

                                              

                                               

                                         

                                                



                                               

                                               
                                     
                                    

                                       
                                          
 






                                                              















                                                                       
                                                                       


                                 














                                                                                  














                                                                                






















                                                                                     













                                                                                
 
                                                                                  


                                     








                                                                                
                                                                         










                                                                     
                             











                                                                                        
                                                                     

                                                                         

                                                                           
                                                     




























                                                        





                                                 
                                             












                                                             





                                                   





                                                    















                                                      

                                                                

                                                         
                                                     



                                

                                                                                         




                                                                                          












                                                                          








                                                                        





                                     

                                                                         
                                                                                        
                                                              









                                                                 
                   
                        







                                        
                         
                   
                        
                          
                          
























                                                                                      
                                                                                   
                                                                            

         
























































                                                                              






















                                            







                                       








                                   











                                                
                                    

                                                                

                                       
                                    














                                                                                                  





                                 
                            
      




                        

                           


                

                   












                                             

                                                                          
                                                              

                                                            


                                                                 

                                                                  






                               


                                                        
                                                               

                                 










                                                           





                                                     










                                                            


                                       


                                                                
                                                                            










                                                                                             



                                                                                 




                                             
                                 

                                                                       




                                                                                    
                                                                                 





                                                                                            
                                     
                               




                                             





















                                                                                         























                                                                                                    
                                                                          























                                                                                                
             


         
                                                        
                          













                                                                  



             
                                                             



                                       
                                              
                                   
                                                                

     
                                                               
                                        

                                
                                               
                                                            
                                                                 

     










                                                              
                                                                 
                                                                                               







                                                                       
             




                                              


         
                                                                                  
                                                                                           
                                                  


                                              

                                                                                          
                                            
                                                                                         


                                                           
                                           

                               















                                                                              
         





                                                                                                   


                                         


                                                           


                                













                             
        




                          
                               
                               









                                                   
                                                                                                   


                                            
                                                                                
                                 









                                                  





















                                                                                


                                                       













                                                        



                                                                                 






                                                                           






                                                                         
                          










                                                                







                                                                                                 



                                                                




                                                                
                         







                                                        














                                                                            



                                                            






                                                    

                                     



                                                                            

                                   
                             








                                                        
















                                                                               
                                           



                                                                                        
                 
                                                               



             























                                                                                                   
                                                                 





                                                                          
                                         






                                                               
 
                                          













                                                                                                 
         
                      






                                                                           
                                                            

                                                           
         
                                                  






                                                               




















                                                                                  

                                                                                               
                                                                            


                                                        
                                                                    

                                                                                                        

                                                                          



                                                        











                                                                                                 
                                                                                     





































                                                                                                 
                                                                              


























                                                                                                    



                                                 



                                  


















                                                       

                                      






                                               
                                                                               












                                                                             


















                                                                                              

              
                     
                                        










                                                                 























                                                                                                  
                                        
















                                                                                                    
                                                                                       

                                                 
                                                                               


                                                               
                                                                                              


















                                                                                              

                                                                      












                                                                                                   
                                                                                        





                                                                    
                                                                                             























                                                                  
                               









                                                                                        
                                           































                                                                             












                                                                     













                                                                                       
                                                                         






                                                         
                                                                 








                                                      

                                                     






                                                                  


         
// Copyright 2017 The Chromium OS Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

//! This module implements the virtio wayland used by the guest to access the host's wayland server.
//!
//! The virtio wayland protocol is done over two queues: `in` and `out`. The `in` queue is used for
//! sending commands to the guest that are generated by the host, usually messages from the wayland
//! server. The `out` queue is for commands from the guest, usually requests to allocate shared
//! memory, open a wayland server connection, or send data over an existing connection.
//!
//! Each `WlVfd` represents one virtual file descriptor created by either the guest or the host.
//! Virtual file descriptors contain actual file descriptors, either a shared memory file descriptor
//! or a unix domain socket to the wayland server. In the shared memory case, there is also an
//! associated slot that indicates which KVM memory slot the memory is installed into, as well as a
//! page frame number that the guest can access the memory from.
//!
//! The types starting with `Ctrl` are structures representing the virtio wayland protocol "on the
//! wire." They are decoded/encoded as some variant of `WlOp` for requests and `WlResp` for
//! responses.
//!
//! There is one `WlState` instance that contains every known vfd and the current state of `in`
//! queue. The `in` queue requires extra state to buffer messages to the guest in case the `in`
//! queue is already full. The `WlState` also has a control socket necessary to fulfill certain
//! requests, such as those registering guest memory.
//!
//! The `Worker` is responsible for the poll loop over all possible events, encoding/decoding from
//! the virtio queue, and routing messages in and out of `WlState`. Possible events include the kill
//! event, available descriptors on the `in` or `out` queue, and incoming data on any vfd's socket.

use std::cell::RefCell;
use std::collections::btree_map::Entry;
use std::collections::{BTreeSet as Set, BTreeMap as Map, VecDeque};
use std::convert::From;
use std::ffi::CStr;
use std::error::{self, Error as StdError};
use std::fmt;
use std::fs::File;
use std::io::{self, Seek, SeekFrom, Read};
use std::mem::{size_of, size_of_val};
#[cfg(feature = "wl-dmabuf")]
use std::os::raw::{c_uint, c_ulonglong};
use std::os::unix::io::{AsRawFd, RawFd};
#[cfg(feature = "wl-dmabuf")]
use std::os::unix::io::FromRawFd;
use std::os::unix::net::{UnixDatagram, UnixStream};
use std::path::{PathBuf, Path};
use std::rc::Rc;
use std::result;
use std::sync::Arc;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::thread;
use std::time::Duration;

#[cfg(feature = "wl-dmabuf")]
use libc::{dup, EBADF, EINVAL};

use data_model::*;
use data_model::VolatileMemoryError;

use sys_util::{Error, Result, EventFd, Scm, SharedMemory, GuestAddress, GuestMemory,
               GuestMemoryError, PollContext, PollToken, FileFlags, pipe, round_up_to_page_size};

#[cfg(feature = "wl-dmabuf")]
use sys_util::ioctl_with_ref;

use vm_control::{VmControlError, VmRequest, VmResponse, MaybeOwnedFd, GpuMemoryDesc};
use super::{VirtioDevice, Queue, DescriptorChain, INTERRUPT_STATUS_USED_RING, TYPE_WL};

const VIRTWL_SEND_MAX_ALLOCS: usize = 28;
const VIRTIO_WL_CMD_VFD_NEW: u32 = 256;
const VIRTIO_WL_CMD_VFD_CLOSE: u32 = 257;
const VIRTIO_WL_CMD_VFD_SEND: u32 = 258;
const VIRTIO_WL_CMD_VFD_RECV: u32 = 259;
const VIRTIO_WL_CMD_VFD_NEW_CTX: u32 = 260;
const VIRTIO_WL_CMD_VFD_NEW_PIPE: u32 = 261;
const VIRTIO_WL_CMD_VFD_HUP: u32 = 262;
#[cfg(feature = "wl-dmabuf")]
const VIRTIO_WL_CMD_VFD_NEW_DMABUF: u32 = 263;
#[cfg(feature = "wl-dmabuf")]
const VIRTIO_WL_CMD_VFD_DMABUF_SYNC: u32 = 264;
const VIRTIO_WL_RESP_OK: u32 = 4096;
const VIRTIO_WL_RESP_VFD_NEW: u32 = 4097;
#[cfg(feature = "wl-dmabuf")]
const VIRTIO_WL_RESP_VFD_NEW_DMABUF: u32 = 4098;
const VIRTIO_WL_RESP_ERR: u32 = 4352;
const VIRTIO_WL_RESP_OUT_OF_MEMORY: u32 = 4353;
const VIRTIO_WL_RESP_INVALID_ID: u32 = 4354;
const VIRTIO_WL_RESP_INVALID_TYPE: u32 = 4355;
const VIRTIO_WL_RESP_INVALID_FLAGS: u32 = 4356;
const VIRTIO_WL_RESP_INVALID_CMD: u32 = 4357;
const VIRTIO_WL_VFD_WRITE: u32 = 0x1;
const VIRTIO_WL_VFD_READ: u32 = 0x2;
const VIRTIO_WL_VFD_MAP: u32 = 0x2;
const VIRTIO_WL_VFD_CONTROL: u32 = 0x4;
const VIRTIO_WL_F_TRANS_FLAGS: u32 = 0x01;

const QUEUE_SIZE: u16 = 16;
const QUEUE_SIZES: &'static [u16] = &[QUEUE_SIZE, QUEUE_SIZE];

const NEXT_VFD_ID_BASE: u32 = 0x40000000;
const VFD_ID_HOST_MASK: u32 = NEXT_VFD_ID_BASE;
const IN_BUFFER_LEN: usize = 4080;

#[cfg(feature = "wl-dmabuf")]
const VIRTIO_WL_VFD_DMABUF_SYNC_VALID_FLAG_MASK: u32 = 0x7;

#[cfg(feature = "wl-dmabuf")]
const DMA_BUF_IOCTL_BASE: c_uint = 0x62;

#[cfg(feature = "wl-dmabuf")]
#[repr(C)]
#[derive(Copy, Clone)]
struct dma_buf_sync {
    flags: c_ulonglong,
}

#[cfg(feature = "wl-dmabuf")]
ioctl_iow_nr!(DMA_BUF_IOCTL_SYNC, DMA_BUF_IOCTL_BASE, 0, dma_buf_sync);

fn parse_new(addr: GuestAddress, mem: &GuestMemory) -> WlResult<WlOp> {
    const ID_OFFSET: u64 = 8;
    const FLAGS_OFFSET: u64 = 12;
    const SIZE_OFFSET: u64 = 24;

    let id: Le32 = mem.read_obj_from_addr(mem.checked_offset(addr, ID_OFFSET)
                                              .ok_or(WlError::CheckedOffset)?)?;
    let flags: Le32 =
        mem.read_obj_from_addr(mem.checked_offset(addr, FLAGS_OFFSET)
                                    .ok_or(WlError::CheckedOffset)?)?;
    let size: Le32 = mem.read_obj_from_addr(mem.checked_offset(addr, SIZE_OFFSET)
                                                .ok_or(WlError::CheckedOffset)?)?;
    Ok(WlOp::NewAlloc {
           id: id.into(),
           flags: flags.into(),
           size: size.into(),
       })
}

fn parse_new_pipe(addr: GuestAddress, mem: &GuestMemory) -> WlResult<WlOp> {
    const ID_OFFSET: u64 = 8;
    const FLAGS_OFFSET: u64 = 12;

    let id: Le32 = mem.read_obj_from_addr(mem.checked_offset(addr, ID_OFFSET)
                                              .ok_or(WlError::CheckedOffset)?)?;
    let flags: Le32 =
        mem.read_obj_from_addr(mem.checked_offset(addr, FLAGS_OFFSET)
                                    .ok_or(WlError::CheckedOffset)?)?;
    Ok(WlOp::NewPipe {
           id: id.into(),
           flags: flags.into(),
       })
}

#[cfg(feature = "wl-dmabuf")]
fn parse_new_dmabuf(addr: GuestAddress, mem: &GuestMemory) -> WlResult<WlOp> {
    const ID_OFFSET: u64 = 8;
    const WIDTH_OFFSET: u64 = 28;
    const HEIGHT_OFFSET: u64 = 32;
    const FORMAT_OFFSET: u64 = 36;

    let id: Le32 = mem.read_obj_from_addr(mem.checked_offset(addr, ID_OFFSET)
                                              .ok_or(WlError::CheckedOffset)?)?;
    let width: Le32 = mem.read_obj_from_addr(mem.checked_offset(addr, WIDTH_OFFSET)
                                                .ok_or(WlError::CheckedOffset)?)?;
    let height: Le32 = mem.read_obj_from_addr(mem.checked_offset(addr, HEIGHT_OFFSET)
                                                .ok_or(WlError::CheckedOffset)?)?;
    let format: Le32 = mem.read_obj_from_addr(mem.checked_offset(addr, FORMAT_OFFSET)
                                                .ok_or(WlError::CheckedOffset)?)?;
    Ok(WlOp::NewDmabuf {
           id: id.into(),
           width: width.into(),
           height: height.into(),
           format: format.into(),
       })
}

#[cfg(feature = "wl-dmabuf")]
fn parse_dmabuf_sync(addr: GuestAddress, mem: &GuestMemory) -> WlResult<WlOp> {
    const ID_OFFSET: u64 = 8;
    const FLAGS_OFFSET: u64 = 12;
    let id: Le32 = mem.read_obj_from_addr(mem.checked_offset(addr, ID_OFFSET)
                                              .ok_or(WlError::CheckedOffset)?)?;
    let flags: Le32 =
        mem.read_obj_from_addr(mem.checked_offset(addr, FLAGS_OFFSET)
                                    .ok_or(WlError::CheckedOffset)?)?;
    Ok(WlOp::DmabufSync {
           id: id.into(),
           flags: flags.into(),
       })
}

fn parse_send(addr: GuestAddress, len: u32, mem: &GuestMemory) -> WlResult<WlOp> {
    const ID_OFFSET: u64 = 8;
    const VFD_COUNT_OFFSET: u64 = 12;
    const VFDS_OFFSET: u64 = 16;

    let id: Le32 = mem.read_obj_from_addr(mem.checked_offset(addr, ID_OFFSET)
                                              .ok_or(WlError::CheckedOffset)?)?;
    let vfd_count: Le32 =
        mem.read_obj_from_addr(mem.checked_offset(addr, VFD_COUNT_OFFSET)
                                    .ok_or(WlError::CheckedOffset)?)?;
    let vfd_count: u32 = vfd_count.into();
    let vfds_addr = mem.checked_offset(addr, VFDS_OFFSET)
        .ok_or(WlError::CheckedOffset)?;
    let data_addr = mem.checked_offset(vfds_addr, (vfd_count * 4) as u64)
        .ok_or(WlError::CheckedOffset)?;
    Ok(WlOp::Send {
           id: id.into(),
           vfds_addr: vfds_addr,
           vfd_count: vfd_count,
           data_addr: data_addr,
           data_len: len - (VFDS_OFFSET as u32) - vfd_count * 4,
       })
}

fn parse_id(addr: GuestAddress, mem: &GuestMemory) -> WlResult<u32> {
    const ID_OFFSET: u64 = 8;
    let id: Le32 = mem.read_obj_from_addr(mem.checked_offset(addr, ID_OFFSET)
                                              .ok_or(WlError::CheckedOffset)?)?;
    Ok(id.into())
}

fn parse_desc(desc: &DescriptorChain, mem: &GuestMemory) -> WlResult<WlOp> {
    let type_: Le32 = mem.read_obj_from_addr(desc.addr)?;
    match type_.into() {
        VIRTIO_WL_CMD_VFD_NEW => parse_new(desc.addr, mem),
        VIRTIO_WL_CMD_VFD_CLOSE => Ok(WlOp::Close { id: parse_id(desc.addr, mem)? }),
        VIRTIO_WL_CMD_VFD_SEND => parse_send(desc.addr, desc.len, mem),
        VIRTIO_WL_CMD_VFD_NEW_CTX => Ok(WlOp::NewCtx { id: parse_id(desc.addr, mem)? }),
        VIRTIO_WL_CMD_VFD_NEW_PIPE => parse_new_pipe(desc.addr, mem),
        #[cfg(feature = "wl-dmabuf")]
        VIRTIO_WL_CMD_VFD_NEW_DMABUF => parse_new_dmabuf(desc.addr, mem),
        #[cfg(feature = "wl-dmabuf")]
        VIRTIO_WL_CMD_VFD_DMABUF_SYNC => parse_dmabuf_sync(desc.addr, mem),
        v => Ok(WlOp::InvalidCommand { op_type: v }),
    }
}

fn encode_vfd_new(desc_mem: VolatileSlice,
                  resp: bool,
                  vfd_id: u32,
                  flags: u32,
                  pfn: u64,
                  size: u32)
                  -> WlResult<u32> {
    let ctrl_vfd_new = CtrlVfdNew {
        hdr: CtrlHeader {
            type_: Le32::from(if resp {
                                  VIRTIO_WL_RESP_VFD_NEW
                              } else {
                                  VIRTIO_WL_CMD_VFD_NEW
                              }),
            flags: Le32::from(0),
        },
        id: Le32::from(vfd_id),
        flags: Le32::from(flags),
        pfn: Le64::from(pfn),
        size: Le32::from(size),
    };

    desc_mem.get_ref(0)?.store(ctrl_vfd_new);
    Ok(size_of::<CtrlVfdNew>() as u32)
}

#[cfg(feature = "wl-dmabuf")]
fn encode_vfd_new_dmabuf(desc_mem: VolatileSlice,
                         vfd_id: u32,
                         flags: u32,
                         pfn: u64,
                         size: u32,
                         desc: GpuMemoryDesc)
                  -> WlResult<u32> {
    let ctrl_vfd_new_dmabuf = CtrlVfdNewDmabuf {
        hdr: CtrlHeader {
            type_: Le32::from(VIRTIO_WL_RESP_VFD_NEW_DMABUF),
            flags: Le32::from(0),
        },
        id: Le32::from(vfd_id),
        flags: Le32::from(flags),
        pfn: Le64::from(pfn),
        size: Le32::from(size),
        width: Le32::from(0),
        height: Le32::from(0),
        format: Le32::from(0),
        stride0: Le32::from(desc.planes[0].stride),
        stride1: Le32::from(desc.planes[1].stride),
        stride2: Le32::from(desc.planes[2].stride),
        offset0: Le32::from(desc.planes[0].offset),
        offset1: Le32::from(desc.planes[1].offset),
        offset2: Le32::from(desc.planes[2].offset),
    };

    desc_mem.get_ref(0)?.store(ctrl_vfd_new_dmabuf);
    Ok(size_of::<CtrlVfdNewDmabuf>() as u32)
}

fn encode_vfd_recv(desc_mem: VolatileSlice,
                   vfd_id: u32,
                   data: &[u8],
                   vfd_ids: &[u32])
                   -> WlResult<u32> {
    let ctrl_vfd_recv = CtrlVfdRecv {
        hdr: CtrlHeader {
            type_: Le32::from(VIRTIO_WL_CMD_VFD_RECV),
            flags: Le32::from(0),
        },
        id: Le32::from(vfd_id),
        vfd_count: Le32::from(vfd_ids.len() as u32),
    };
    desc_mem.get_ref(0)?.store(ctrl_vfd_recv);

    let vfd_slice = desc_mem
        .get_slice(size_of::<CtrlVfdRecv>() as u64,
                   (vfd_ids.len() * size_of::<Le32>()) as u64)?;
    for (i, &recv_vfd_id) in vfd_ids.iter().enumerate() {
        vfd_slice
            .get_ref((size_of::<Le32>() * i) as u64)?
            .store(recv_vfd_id);
    }

    let data_slice = desc_mem
        .get_slice((size_of::<CtrlVfdRecv>() + vfd_ids.len() * size_of::<Le32>()) as u64,
                   data.len() as u64)?;
    data_slice.copy_from(data);

    Ok((size_of::<CtrlVfdRecv>() + vfd_ids.len() * size_of::<Le32>() + data.len()) as u32)
}

fn encode_vfd_hup(desc_mem: VolatileSlice, vfd_id: u32) -> WlResult<u32> {
    let ctrl_vfd_new = CtrlVfd {
        hdr: CtrlHeader {
            type_: Le32::from(VIRTIO_WL_CMD_VFD_HUP),
            flags: Le32::from(0),
        },
        id: Le32::from(vfd_id),
    };

    desc_mem.get_ref(0)?.store(ctrl_vfd_new);
    Ok(size_of_val(&ctrl_vfd_new) as u32)
}

fn encode_resp(desc_mem: VolatileSlice, resp: WlResp) -> WlResult<u32> {
    match resp {
        WlResp::VfdNew {
            id,
            flags,
            pfn,
            size,
            resp,
        } => encode_vfd_new(desc_mem, resp, id, flags, pfn, size),
        #[cfg(feature = "wl-dmabuf")]
        WlResp::VfdNewDmabuf {
            id,
            flags,
            pfn,
            size,
            desc,
        } => encode_vfd_new_dmabuf(desc_mem, id, flags, pfn, size, desc),
        WlResp::VfdRecv { id, data, vfds } => encode_vfd_recv(desc_mem, id, data, vfds),
        WlResp::VfdHup { id } => encode_vfd_hup(desc_mem, id),
        r => {
            desc_mem.get_ref(0)?.store(Le32::from(r.get_code()));
            Ok(size_of::<Le32>() as u32)
        }
    }
}

#[derive(Debug)]
enum WlError {
    NewAlloc(Error),
    NewPipe(Error),
    AllocSetSize(Error),
    SocketConnect(io::Error),
    SocketNonBlock(io::Error),
    VmControl(VmControlError),
    VmBadResponse,
    CheckedOffset,
    GuestMemory(GuestMemoryError),
    VolatileMemory(VolatileMemoryError),
    SendVfd(Error),
    WritePipe(io::Error),
    RecvVfd(Error),
    ReadPipe(io::Error),
    PollContextAdd(Error),
    DmabufSync(io::Error),
}

impl fmt::Display for WlError {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "{}", self.description())
    }
}

impl error::Error for WlError {
    fn description(&self) -> &str {
        match *self {
            WlError::NewAlloc(_) => "Failed to create shared memory allocation",
            WlError::NewPipe(_) => "Failed to create pipe",
            WlError::AllocSetSize(_) => "Failed to set size of shared memory",
            WlError::SocketConnect(_) => "Failed to connect socket",
            WlError::SocketNonBlock(_) => "Failed to set socket as non-blocking",
            WlError::VmControl(_) => "Failed to control parent VM",
            WlError::VmBadResponse => "Invalid response from parent VM",
            WlError::CheckedOffset => "Overflow in calculation",
            WlError::GuestMemory(_) => "Access violation in guest memory",
            WlError::VolatileMemory(_) => "Access violating in guest volatile memory",
            WlError::SendVfd(_) => "Failed to send on a socket",
            WlError::WritePipe(_) => "Failed to write to a pipe",
            WlError::RecvVfd(_) => "Failed to recv on a socket",
            WlError::ReadPipe(_) => "Failed to read a pipe",
            WlError::PollContextAdd(_) => "Failed to listen to FD on poll context",
            WlError::DmabufSync(_) => "Failed to synchronize DMABuf access",
        }
    }
}

type WlResult<T> = result::Result<T, WlError>;

impl From<GuestMemoryError> for WlError {
    fn from(e: GuestMemoryError) -> WlError {
        WlError::GuestMemory(e)
    }
}

impl From<VolatileMemoryError> for WlError {
    fn from(e: VolatileMemoryError) -> WlError {
        WlError::VolatileMemory(e)
    }
}

#[derive(Clone)]
struct VmRequester {
    inner: Rc<RefCell<(Scm, UnixDatagram)>>,
}

impl VmRequester {
    fn new(vm_socket: UnixDatagram) -> VmRequester {
        VmRequester { inner: Rc::new(RefCell::new((Scm::new(1), vm_socket))) }
    }

    fn request(&self, request: VmRequest) -> WlResult<VmResponse> {
        let mut inner = self.inner.borrow_mut();
        let (ref mut scm, ref mut vm_socket) = *inner;
        request
            .send(scm, vm_socket)
            .map_err(WlError::VmControl)?;
        VmResponse::recv(scm, vm_socket).map_err(WlError::VmControl)
    }
}

#[repr(C)]
#[derive(Copy, Clone)]
struct CtrlHeader {
    type_: Le32,
    flags: Le32,
}

#[repr(C)]
#[derive(Copy, Clone)]
struct CtrlVfdNew {
    hdr: CtrlHeader,
    id: Le32,
    flags: Le32,
    pfn: Le64,
    size: Le32,
}

unsafe impl DataInit for CtrlVfdNew {}

#[repr(C)]
#[derive(Copy, Clone)]
#[cfg(feature = "wl-dmabuf")]
struct CtrlVfdNewDmabuf {
    hdr: CtrlHeader,
    id: Le32,
    flags: Le32,
    pfn: Le64,
    size: Le32,
    width: Le32,
    height: Le32,
    format: Le32,
    stride0: Le32,
    stride1: Le32,
    stride2: Le32,
    offset0: Le32,
    offset1: Le32,
    offset2: Le32,
}

#[cfg(feature = "wl-dmabuf")]
unsafe impl DataInit for CtrlVfdNewDmabuf {}

#[repr(C)]
#[derive(Copy, Clone)]
struct CtrlVfdRecv {
    hdr: CtrlHeader,
    id: Le32,
    vfd_count: Le32,
}

unsafe impl DataInit for CtrlVfdRecv {}

#[repr(C)]
#[derive(Copy, Clone)]
struct CtrlVfd {
    hdr: CtrlHeader,
    id: Le32,
}

unsafe impl DataInit for CtrlVfd {}

#[derive(Debug)]
enum WlOp {
    NewAlloc { id: u32, flags: u32, size: u32 },
    Close { id: u32 },
    Send {
        id: u32,
        vfds_addr: GuestAddress,
        vfd_count: u32,
        data_addr: GuestAddress,
        data_len: u32,
    },
    NewCtx { id: u32 },
    NewPipe { id: u32, flags: u32 },
    #[cfg(feature = "wl-dmabuf")]
    NewDmabuf { id: u32, width: u32, height: u32, format: u32 },
    #[cfg(feature = "wl-dmabuf")]
    DmabufSync { id: u32, flags: u32 },
    InvalidCommand { op_type: u32 },
}

#[derive(Debug)]
#[allow(dead_code)]
enum WlResp<'a> {
    Ok,
    VfdNew {
        id: u32,
        flags: u32,
        pfn: u64,
        size: u32,
        // The VfdNew variant can be either a response or a command depending on this `resp`. This
        // is important for the `get_code` method.
        resp: bool,
    },
    #[cfg(feature = "wl-dmabuf")]
    VfdNewDmabuf {
        id: u32,
        flags: u32,
        pfn: u64,
        size: u32,
        desc: GpuMemoryDesc,
    },
    VfdRecv {
        id: u32,
        data: &'a [u8],
        vfds: &'a [u32],
    },
    VfdHup { id: u32 },
    Err(Box<error::Error>),
    OutOfMemory,
    InvalidId,
    InvalidType,
    InvalidFlags,
    InvalidCommand,
}

impl<'a> WlResp<'a> {
    fn get_code(&self) -> u32 {
        match self {
            &WlResp::Ok => VIRTIO_WL_RESP_OK,
            &WlResp::VfdNew { resp, .. } => {
                if resp {
                    VIRTIO_WL_RESP_VFD_NEW
                } else {
                    VIRTIO_WL_CMD_VFD_NEW
                }
            }
            #[cfg(feature = "wl-dmabuf")]
            &WlResp::VfdNewDmabuf { .. } => VIRTIO_WL_RESP_VFD_NEW_DMABUF,
            &WlResp::VfdRecv { .. } => VIRTIO_WL_CMD_VFD_RECV,
            &WlResp::VfdHup { .. } => VIRTIO_WL_CMD_VFD_HUP,
            &WlResp::Err(_) => VIRTIO_WL_RESP_ERR,
            &WlResp::OutOfMemory => VIRTIO_WL_RESP_OUT_OF_MEMORY,
            &WlResp::InvalidId => VIRTIO_WL_RESP_INVALID_ID,
            &WlResp::InvalidType => VIRTIO_WL_RESP_INVALID_TYPE,
            &WlResp::InvalidFlags => VIRTIO_WL_RESP_INVALID_FLAGS,
            &WlResp::InvalidCommand => VIRTIO_WL_RESP_INVALID_CMD,
        }
    }
}

#[derive(Default)]
struct WlVfd {
    socket: Option<UnixStream>,
    guest_shared_memory: Option<(u64 /* size */, File)>,
    remote_pipe: Option<File>,
    local_pipe: Option<(u32 /* flags */, File)>,
    slot: Option<(u32 /* slot */, u64 /* pfn */, VmRequester)>,
    #[cfg(feature = "wl-dmabuf")]
    is_dmabuf: bool,
}

impl fmt::Debug for WlVfd {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "WlVfd {{")?;
        if let Some(ref s) = self.socket {
            write!(f, " socket: {}", s.as_raw_fd())?;
        }
        if let Some(&(slot, pfn, _)) = self.slot.as_ref() {
            write!(f, " slot: {} pfn: {}", slot, pfn)?;
        }
        if let Some(ref s) = self.remote_pipe {
            write!(f, " remote: {}", s.as_raw_fd())?;
        }
        if let Some((_, ref s)) = self.local_pipe {
            write!(f, " local: {}", s.as_raw_fd())?;
        }
        write!(f, " }}")
    }
}

impl WlVfd {
    fn connect<P: AsRef<Path>>(path: P) -> WlResult<WlVfd> {
        let socket = UnixStream::connect(path)
            .map_err(WlError::SocketConnect)?;
        socket
            .set_nonblocking(true)
            .map_err(WlError::SocketNonBlock)?;
        let mut vfd = WlVfd::default();
        vfd.socket = Some(socket);
        Ok(vfd)
    }

    fn allocate(vm: VmRequester, size: u64) -> WlResult<WlVfd> {
        let size_page_aligned = round_up_to_page_size(size as usize) as u64;
        let mut vfd_shm = SharedMemory::new(Some(CStr::from_bytes_with_nul(b"virtwl_alloc\0")
                                                     .unwrap()))
                .map_err(WlError::NewAlloc)?;
        vfd_shm
            .set_size(size_page_aligned)
            .map_err(WlError::AllocSetSize)?;
        let register_response =
            vm.request(VmRequest::RegisterMemory(MaybeOwnedFd::Borrowed(vfd_shm.as_raw_fd()),
                                                   vfd_shm.size() as usize))?;
        match register_response {
            VmResponse::RegisterMemory { pfn, slot } => {
                let mut vfd = WlVfd::default();
                vfd.guest_shared_memory = Some((vfd_shm.size(), vfd_shm.into()));
                vfd.slot = Some((slot, pfn, vm));
                Ok(vfd)
            }
            _ => Err(WlError::VmBadResponse),
        }
    }

    #[cfg(feature = "wl-dmabuf")]
    fn dmabuf(vm: VmRequester, width: u32, height: u32, format: u32) ->
        WlResult<(WlVfd, GpuMemoryDesc)> {
        let allocate_and_register_gpu_memory_response =
            vm.request(VmRequest::AllocateAndRegisterGpuMemory { width: width,
                                                                 height: height,
                                                                 format: format })?;
        match allocate_and_register_gpu_memory_response {
            VmResponse::AllocateAndRegisterGpuMemory { fd, pfn, slot, desc } => {
                let mut vfd = WlVfd::default();
                // Duplicate FD for shared memory instance.
                let raw_fd = unsafe { File::from_raw_fd(dup(fd.as_raw_fd())) };
                let vfd_shm = SharedMemory::from_raw_fd(raw_fd).map_err(WlError::NewAlloc)?;
                vfd.guest_shared_memory = Some((vfd_shm.size(), vfd_shm.into()));
                vfd.slot = Some((slot, pfn, vm));
                vfd.is_dmabuf = true;
                Ok((vfd, desc))
            }
            _ => Err(WlError::VmBadResponse),
        }
    }

    #[cfg(feature = "wl-dmabuf")]
    fn dmabuf_sync(&self, flags: u32) -> WlResult<()> {
        if !self.is_dmabuf {
            return Err(WlError::DmabufSync(io::Error::from_raw_os_error(EINVAL)));
        }

        match self.guest_shared_memory.as_ref() {
            Some(&(_, ref fd)) => {
                let sync = dma_buf_sync {
                    flags: flags as u64,
                };
                // Safe as fd is a valid dmabuf and incorrect flags will return an error.
                if unsafe { ioctl_with_ref(fd, DMA_BUF_IOCTL_SYNC(), &sync) } < 0 {
                    Err(WlError::DmabufSync(io::Error::last_os_error()))
                } else {
                    Ok(())
                }
            }
            None => Err(WlError::DmabufSync(io::Error::from_raw_os_error(EBADF)))
        }
    }

    fn pipe_remote_read_local_write() -> WlResult<WlVfd> {
        let (read_pipe, write_pipe) = pipe(true).map_err(WlError::NewPipe)?;
        let mut vfd = WlVfd::default();
        vfd.remote_pipe = Some(read_pipe);
        vfd.local_pipe = Some((VIRTIO_WL_VFD_WRITE, write_pipe));
        Ok(vfd)
    }

    fn pipe_remote_write_local_read() -> WlResult<WlVfd> {
        let (read_pipe, write_pipe) = pipe(true).map_err(WlError::NewPipe)?;
        let mut vfd = WlVfd::default();
        vfd.remote_pipe = Some(write_pipe);
        vfd.local_pipe = Some((VIRTIO_WL_VFD_READ, read_pipe));
        Ok(vfd)
    }

    fn from_file(vm: VmRequester, mut fd: File) -> WlResult<WlVfd> {
        // We need to determine if the given file is more like shared memory or a pipe/socket. A
        // quick and easy check is to seek to the end of the file. If it works we assume it's not a
        // pipe/socket because those have no end. We can even use that seek location as an indicator
        // for how big the shared memory chunk to map into guest memory is. If seeking to the end
        // fails, we assume it's a socket or pipe with read/write semantics.
        match fd.seek(SeekFrom::End(0)) {
            Ok(fd_size) => {
                let size = round_up_to_page_size(fd_size as usize) as u64;
                let register_response =
                    vm.request(VmRequest::RegisterMemory(MaybeOwnedFd::Borrowed(fd.as_raw_fd()),
                                                           size as usize))?;

                match register_response {
                    VmResponse::RegisterMemory { pfn, slot } => {
                        let mut vfd = WlVfd::default();
                        vfd.guest_shared_memory = Some((size, fd));
                        vfd.slot = Some((slot, pfn, vm));
                        Ok(vfd)
                    }
                    _ => Err(WlError::VmBadResponse),
                }
            }
            _ => {
                let flags = match FileFlags::from_file(&fd) {
                    Ok(FileFlags::Read) => VIRTIO_WL_VFD_READ,
                    Ok(FileFlags::Write) => VIRTIO_WL_VFD_WRITE,
                    Ok(FileFlags::ReadWrite) => VIRTIO_WL_VFD_READ | VIRTIO_WL_VFD_WRITE,
                    _ => 0,
                };
                let mut vfd = WlVfd::default();
                vfd.local_pipe = Some((flags, fd));
                Ok(vfd)
            }
        }
    }

    fn flags(&self, use_transition_flags: bool) -> u32 {
        let mut flags = 0;
        if use_transition_flags {
            if self.socket.is_some() {
                flags |= VIRTIO_WL_VFD_WRITE | VIRTIO_WL_VFD_READ;
            }
            if let Some((f, _)) = self.local_pipe {
                flags |= f;
            }
        } else {
            if self.socket.is_some() {
                flags |= VIRTIO_WL_VFD_CONTROL;
            }
            if self.slot.is_some() {
                flags |= VIRTIO_WL_VFD_WRITE | VIRTIO_WL_VFD_MAP
            }
        }
        flags
    }

    // Page frame number in the guest this VFD was mapped at.
    fn pfn(&self) -> Option<u64> {
        self.slot.as_ref().map(|s| s.1)
    }

    // Size in bytes of the shared memory VFD.
    fn size(&self) -> Option<u64> {
        self.guest_shared_memory.as_ref().map(|&(size, _)| size)
    }

    // The FD that gets sent if this VFD is sent over a socket.
    fn send_fd(&self) -> Option<RawFd> {
        self.guest_shared_memory
            .as_ref()
            .map(|&(_, ref fd)| fd.as_raw_fd())
            .or(self.socket.as_ref().map(|s| s.as_raw_fd()))
            .or(self.remote_pipe.as_ref().map(|p| p.as_raw_fd()))
    }

    // The FD that is used for polling for events on this VFD.
    fn poll_fd(&self) -> Option<&AsRawFd> {
        self.socket
            .as_ref()
            .map(|s| s as &AsRawFd)
            .or(self.local_pipe
                    .as_ref()
                    .map(|&(_, ref p)| p as &AsRawFd))

    }

    // Sends data/files from the guest to the host over this VFD.
    fn send(&mut self, scm: &mut Scm, fds: &[RawFd], data: VolatileSlice) -> WlResult<WlResp> {
        if let Some(ref socket) = self.socket {
            scm.send(socket, &[data], fds)
                .map_err(WlError::SendVfd)?;
            Ok(WlResp::Ok)
        } else if let Some((_, ref mut local_pipe)) = self.local_pipe {
            // Impossible to send fds over a simple pipe.
            if !fds.is_empty() {
                return Ok(WlResp::InvalidType);
            }
            data.write_all_to(local_pipe)
                .map_err(WlError::WritePipe)?;
            Ok(WlResp::Ok)
        } else {
            Ok(WlResp::InvalidType)
        }
    }

    // Receives data/files from the host for this VFD and queues it for the guest.
    fn recv(&mut self, scm: &mut Scm, in_file_queue: &mut Vec<File>) -> WlResult<Vec<u8>> {
        if let Some(socket) = self.socket.take() {
            let mut buf = Vec::new();
            buf.resize(IN_BUFFER_LEN, 0);
            let old_len = in_file_queue.len();
            // If any errors happen, the socket will get dropped, preventing more reading.
            let len = scm.recv(&socket, &mut [&mut buf[..]], in_file_queue)
                .map_err(WlError::RecvVfd)?;
            // If any data gets read, the put the socket back for future recv operations.
            if len != 0 || in_file_queue.len() != old_len {
                buf.truncate(len);
                buf.shrink_to_fit();
                self.socket = Some(socket);
                return Ok(buf);
            }
            Ok(Vec::new())
        } else if let Some((flags, mut local_pipe)) = self.local_pipe.take() {
            let mut buf = Vec::new();
            buf.resize(IN_BUFFER_LEN, 0);
            let len = local_pipe
                .read(&mut buf[..])
                .map_err(WlError::ReadPipe)?;
            if len != 0 {
                buf.truncate(len);
                buf.shrink_to_fit();
                self.local_pipe = Some((flags, local_pipe));
                return Ok(buf);
            }
            Ok(Vec::new())
        } else {
            Ok(Vec::new())
        }
    }

    // Called after this VFD is sent over a socket to ensure the local end of the VFD receives hang
    // up events.
    fn close_remote(&mut self) {
        self.remote_pipe = None;
    }

    fn close(&mut self) -> WlResult<()> {
        if let Some((slot, _, vm)) = self.slot.take() {
            vm.request(VmRequest::UnregisterMemory(slot))?;
        }
        self.socket = None;
        self.remote_pipe = None;
        self.local_pipe = None;
        Ok(())
    }
}

impl Drop for WlVfd {
    fn drop(&mut self) {
        let _ = self.close();
    }
}

#[derive(Debug)]
enum WlRecv {
    Vfd { id: u32 },
    Data { buf: Vec<u8> },
    Hup,
}

struct WlState {
    wayland_path: PathBuf,
    vm: VmRequester,
    use_transition_flags: bool,
    poll_ctx: PollContext<u32>,
    vfds: Map<u32, WlVfd>,
    next_vfd_id: u32,
    scm: Scm,
    in_file_queue: Vec<File>,
    in_queue: VecDeque<(u32 /* vfd_id */, WlRecv)>,
    current_recv_vfd: Option<u32>,
    recv_vfds: Vec<u32>,
}

impl WlState {
    fn new(wayland_path: PathBuf, vm_socket: UnixDatagram, use_transition_flags: bool) -> WlState {
        WlState {
            wayland_path: wayland_path,
            vm: VmRequester::new(vm_socket),
            poll_ctx: PollContext::new().expect("failed to create PollContext"),
            use_transition_flags,
            scm: Scm::new(VIRTWL_SEND_MAX_ALLOCS),
            vfds: Map::new(),
            next_vfd_id: NEXT_VFD_ID_BASE,
            in_file_queue: Vec::new(),
            in_queue: VecDeque::new(),
            current_recv_vfd: None,
            recv_vfds: Vec::new(),
        }
    }

    fn new_pipe(&mut self, id: u32, flags: u32) -> WlResult<WlResp> {
        if id & VFD_ID_HOST_MASK != 0 {
            return Ok(WlResp::InvalidId);
        }

        if flags & !(VIRTIO_WL_VFD_WRITE | VIRTIO_WL_VFD_READ) != 0 {
            return Ok(WlResp::InvalidFlags);
        }

        if flags & VIRTIO_WL_VFD_WRITE != 0 && flags & VIRTIO_WL_VFD_READ != 0 {
            return Ok(WlResp::InvalidFlags);
        }

        match self.vfds.entry(id) {
            Entry::Vacant(entry) => {
                let vfd = if flags & VIRTIO_WL_VFD_WRITE != 0 {
                    WlVfd::pipe_remote_read_local_write()?
                } else if flags & VIRTIO_WL_VFD_READ != 0 {
                    WlVfd::pipe_remote_write_local_read()?
                } else {
                    return Ok(WlResp::InvalidFlags);
                };
                self.poll_ctx
                    .add(vfd.poll_fd().unwrap(), id)
                    .map_err(WlError::PollContextAdd)?;
                let resp = WlResp::VfdNew {
                    id: id,
                    flags: 0,
                    pfn: 0,
                    size: 0,
                    resp: true,
                };
                entry.insert(vfd);
                Ok(resp)
            }
            Entry::Occupied(_) => Ok(WlResp::InvalidId),
        }
    }

    fn new_alloc(&mut self, id: u32, flags: u32, size: u32) -> WlResult<WlResp> {
        if id & VFD_ID_HOST_MASK != 0 {
            return Ok(WlResp::InvalidId);
        }

        if self.use_transition_flags {
            if flags != 0 {
                return Ok(WlResp::InvalidFlags);
            }
        } else if flags & !(VIRTIO_WL_VFD_WRITE | VIRTIO_WL_VFD_MAP) != 0 {
            return Ok(WlResp::Err(Box::from("invalid flags")));
        }

        match self.vfds.entry(id) {
            Entry::Vacant(entry) => {
                let vfd = WlVfd::allocate(self.vm.clone(), size as u64)?;
                let resp = WlResp::VfdNew {
                    id: id,
                    flags,
                    pfn: vfd.pfn().unwrap_or_default(),
                    size: vfd.size().unwrap_or_default() as u32,
                    resp: true,
                };
                entry.insert(vfd);
                Ok(resp)
            }
            Entry::Occupied(_) => Ok(WlResp::InvalidId),
        }
    }

    #[cfg(feature = "wl-dmabuf")]
    fn new_dmabuf(&mut self, id: u32, width: u32, height: u32, format: u32) -> WlResult<WlResp> {
        if id & VFD_ID_HOST_MASK != 0 {
            return Ok(WlResp::InvalidId);
        }

        match self.vfds.entry(id) {
            Entry::Vacant(entry) => {
                let (vfd, desc) = WlVfd::dmabuf(self.vm.clone(),
                                                width,
                                                height,
                                                format)?;
                let resp = WlResp::VfdNewDmabuf {
                    id: id,
                    flags: 0,
                    pfn: vfd.pfn().unwrap_or_default(),
                    size: vfd.size().unwrap_or_default() as u32,
                    desc,
                };
                entry.insert(vfd);
                Ok(resp)
            }
            Entry::Occupied(_) => Ok(WlResp::InvalidId),
        }
    }

    #[cfg(feature = "wl-dmabuf")]
    fn dmabuf_sync(&mut self, vfd_id: u32, flags: u32) -> WlResult<WlResp> {
        if flags & !(VIRTIO_WL_VFD_DMABUF_SYNC_VALID_FLAG_MASK) != 0 {
            return Ok(WlResp::InvalidFlags);
        }

        match self.vfds.get_mut(&vfd_id) {
            Some(vfd) =>  {
                vfd.dmabuf_sync(flags)?;
                Ok(WlResp::Ok)
            }
            None => Ok(WlResp::InvalidId),
        }
    }

    fn new_context(&mut self, id: u32) -> WlResult<WlResp> {
        if id & VFD_ID_HOST_MASK != 0 {
            return Ok(WlResp::InvalidId);
        }

        let flags = if self.use_transition_flags {
            VIRTIO_WL_VFD_WRITE | VIRTIO_WL_VFD_READ
        } else {
            VIRTIO_WL_VFD_CONTROL
        };

        match self.vfds.entry(id) {
            Entry::Vacant(entry) => {
                let vfd = entry.insert(WlVfd::connect(&self.wayland_path)?);
                self.poll_ctx
                    .add(vfd.poll_fd().unwrap(), id)
                    .map_err(WlError::PollContextAdd)?;
                Ok(WlResp::VfdNew {
                       id: id,
                       flags,
                       pfn: 0,
                       size: 0,
                       resp: true,
                   })
            }
            Entry::Occupied(_) => Ok(WlResp::InvalidId),
        }
    }

    fn process_poll_context(&mut self) {
        let events = match self.poll_ctx.wait_timeout(Duration::from_secs(0)) {
            Ok(v) => v.to_owned(),
            Err(e) => {
                error!("failed polling for vfd evens: {:?}", e);
                return;
            }
        };

        for event in events.as_ref().iter_readable() {
            if let Err(e) = self.recv(event.token()) {
                error!("failed to recv from vfd: {:?}", e)
            }
        }

        for event in events.as_ref().iter_hungup() {
            if !event.readable() {
                let vfd_id = event.token();
                if let Some(fd) = self.vfds.get(&vfd_id).and_then(|vfd| vfd.poll_fd()) {
                    if let Err(e) = self.poll_ctx.delete(fd) {
                        warn!("failed to remove hungup vfd from poll context: {:?}", e);
                    }
                }
                self.in_queue.push_back((vfd_id, WlRecv::Hup));
            }
        }
    }

    fn close(&mut self, vfd_id: u32) -> WlResult<WlResp> {
        let mut to_delete = Set::new();
        for &(dest_vfd_id, ref q) in self.in_queue.iter() {
            if dest_vfd_id == vfd_id {
                if let &WlRecv::Vfd { id } = q {
                    to_delete.insert(id);
                }
            }
        }
        for vfd_id in to_delete {
            // Sorry sub-error, we can't have cascading errors leaving us in an inconsistent state.
            let _ = self.close(vfd_id);
        }
        match self.vfds.remove(&vfd_id) {
            Some(mut vfd) => {
                self.in_queue.retain(|&(id, _)| id != vfd_id);
                vfd.close()?;
                Ok(WlResp::Ok)
            }
            None => Ok(WlResp::InvalidId),
        }
    }

    fn send(&mut self, vfd_id: u32, vfds: VolatileSlice, data: VolatileSlice) -> WlResult<WlResp> {
        let vfd_count = vfds.size() as usize / size_of::<Le32>();
        let mut vfd_ids = [Le32::from(0); VIRTWL_SEND_MAX_ALLOCS];
        vfds.copy_to(&mut vfd_ids[..]);
        let mut fds = [0; VIRTWL_SEND_MAX_ALLOCS];
        for (&id, fd) in vfd_ids[..vfd_count].iter().zip(fds.iter_mut()) {
            match self.vfds.get(&id.into()) {
                Some(vfd) => {
                    match vfd.send_fd() {
                        Some(vfd_fd) => *fd = vfd_fd,
                        None => return Ok(WlResp::InvalidType),
                    }
                }
                None => return Ok(WlResp::InvalidId),
            }
        }

        match self.vfds.get_mut(&vfd_id) {
            Some(vfd) => {
                match vfd.send(&mut self.scm, &fds[..vfd_count], data)? {
                    WlResp::Ok => {}
                    _ => return Ok(WlResp::InvalidType),
                }
            }
            None => return Ok(WlResp::InvalidId),
        }
        // The vfds with remote FDs need to be closed so that the local side can receive
        // hangup events.
        for &id in &vfd_ids[..vfd_count] {
            // The following unwrap can not panic because the IDs were already checked earlier in
            // this method.
            self.vfds.get_mut(&id.into()).unwrap().close_remote();
        }
        Ok(WlResp::Ok)
    }

    fn recv(&mut self, vfd_id: u32) -> WlResult<()> {
        let buf = match self.vfds.get_mut(&vfd_id) {
            Some(vfd) => vfd.recv(&mut self.scm, &mut self.in_file_queue)?,
            None => return Ok(()),
        };
        if self.in_file_queue.is_empty() && buf.is_empty() {
            self.in_queue.push_back((vfd_id, WlRecv::Hup));
            return Ok(());
        }
        for file in self.in_file_queue.drain(..) {
            let vfd = WlVfd::from_file(self.vm.clone(), file)?;
            if let Some(poll_fd) = vfd.poll_fd() {
                self.poll_ctx
                    .add(poll_fd, self.next_vfd_id)
                    .map_err(WlError::PollContextAdd)?;
            }
            self.vfds.insert(self.next_vfd_id, vfd);
            self.in_queue
                .push_back((vfd_id, WlRecv::Vfd { id: self.next_vfd_id }));
            self.next_vfd_id += 1;
        }
        self.in_queue
            .push_back((vfd_id, WlRecv::Data { buf: buf }));

        Ok(())
    }

    fn execute(&mut self, mem: &GuestMemory, op: WlOp) -> WlResult<WlResp> {
        match op {
            WlOp::NewAlloc { id, flags, size } => self.new_alloc(id, flags, size),
            WlOp::Close { id } => self.close(id),
            WlOp::Send {
                id,
                vfds_addr,
                vfd_count,
                data_addr,
                data_len,
            } => {
                let vfd_mem =
                    mem.get_slice(vfds_addr.0, (vfd_count as u64) * size_of::<Le32>() as u64)?;
                let data_mem = mem.get_slice(data_addr.0, data_len as u64)?;
                self.send(id, vfd_mem, data_mem)
            }
            WlOp::NewCtx { id } => self.new_context(id),
            WlOp::NewPipe { id, flags } => self.new_pipe(id, flags),
            #[cfg(feature = "wl-dmabuf")]
            WlOp::NewDmabuf { id, width, height, format } => self.new_dmabuf(id, width, height, format),
            #[cfg(feature = "wl-dmabuf")]
            WlOp::DmabufSync { id, flags } => self.dmabuf_sync(id, flags),
            WlOp::InvalidCommand { op_type } => {
                warn!("unexpected command {}", op_type);
                Ok(WlResp::InvalidCommand)
            }
        }
    }

    fn next_recv(&self) -> Option<WlResp> {
        if let Some(q) = self.in_queue.front() {
            match q {
                &(vfd_id, WlRecv::Vfd { id }) => {
                    if self.current_recv_vfd.is_none() || self.current_recv_vfd == Some(vfd_id) {
                        match self.vfds.get(&id) {
                            Some(vfd) => {
                                Some(WlResp::VfdNew {
                                         id: id,
                                         flags: vfd.flags(self.use_transition_flags),
                                         pfn: vfd.pfn().unwrap_or_default(),
                                         size: vfd.size().unwrap_or_default() as u32,
                                         resp: false,
                                     })
                            }
                            _ => {
                                Some(WlResp::VfdNew {
                                         id: id,
                                         flags: 0,
                                         pfn: 0,
                                         size: 0,
                                         resp: false,
                                     })
                            }
                        }
                    } else {
                        Some(WlResp::VfdRecv {
                                 id: self.current_recv_vfd.unwrap(),
                                 data: &[],
                                 vfds: &self.recv_vfds[..],
                             })
                    }
                }
                &(vfd_id, WlRecv::Data { ref buf }) => {
                    if self.current_recv_vfd.is_none() || self.current_recv_vfd == Some(vfd_id) {
                        Some(WlResp::VfdRecv {
                                 id: vfd_id,
                                 data: &buf[..],
                                 vfds: &self.recv_vfds[..],
                             })
                    } else {
                        Some(WlResp::VfdRecv {
                                 id: self.current_recv_vfd.unwrap(),
                                 data: &[],
                                 vfds: &self.recv_vfds[..],
                             })
                    }
                }
                &(vfd_id, WlRecv::Hup) => Some(WlResp::VfdHup { id: vfd_id }),
            }
        } else {
            None

        }
    }

    fn pop_recv(&mut self) {
        if let Some(q) = self.in_queue.front() {
            match q {
                &(vfd_id, WlRecv::Vfd { id }) => {
                    if self.current_recv_vfd.is_none() || self.current_recv_vfd == Some(vfd_id) {
                        self.recv_vfds.push(id);
                        self.current_recv_vfd = Some(vfd_id);
                    } else {
                        self.recv_vfds.clear();
                        self.current_recv_vfd = None;
                        return;
                    }
                }
                &(vfd_id, WlRecv::Data { .. }) => {
                    self.recv_vfds.clear();
                    self.current_recv_vfd = None;
                    if !(self.current_recv_vfd.is_none() || self.current_recv_vfd == Some(vfd_id)) {
                        return;
                    }
                }
                &(_, WlRecv::Hup) => {
                    self.recv_vfds.clear();
                    self.current_recv_vfd = None;
                }
            }
        }
        self.in_queue.pop_front();
    }
}

struct Worker {
    mem: GuestMemory,
    interrupt_evt: EventFd,
    interrupt_status: Arc<AtomicUsize>,
    in_queue: Queue,
    out_queue: Queue,
    state: WlState,
    in_desc_chains: VecDeque<(u16, GuestAddress, u32)>,
}

impl Worker {
    fn new(mem: GuestMemory,
           interrupt_evt: EventFd,
           interrupt_status: Arc<AtomicUsize>,
           in_queue: Queue,
           out_queue: Queue,
           wayland_path: PathBuf,
           vm_socket: UnixDatagram,
           use_transition_flags: bool)
           -> Worker {
        Worker {
            mem: mem,
            interrupt_evt: interrupt_evt,
            interrupt_status: interrupt_status,
            in_queue: in_queue,
            out_queue: out_queue,
            state: WlState::new(wayland_path, vm_socket, use_transition_flags),
            in_desc_chains: VecDeque::with_capacity(QUEUE_SIZE as usize),
        }
    }

    fn signal_used_queue(&self) {
        self.interrupt_status
            .fetch_or(INTERRUPT_STATUS_USED_RING as usize, Ordering::SeqCst);
        let _ = self.interrupt_evt.write(1);
    }

    fn run(&mut self, mut queue_evts: Vec<EventFd>, kill_evt: EventFd) {
        let in_queue_evt = queue_evts.remove(0);
        let out_queue_evt = queue_evts.remove(0);
        #[derive(PollToken)]
        enum Token {
            InQueue,
            OutQueue,
            Kill,
            State,
        }

        let poll_ctx: PollContext<Token> =
            match PollContext::new()
                      .and_then(|pc| pc.add(&in_queue_evt, Token::InQueue).and(Ok(pc)))
                      .and_then(|pc| pc.add(&out_queue_evt, Token::OutQueue).and(Ok(pc)))
                      .and_then(|pc| pc.add(&kill_evt, Token::Kill).and(Ok(pc)))
                      .and_then(|pc| pc.add(&self.state.poll_ctx, Token::State).and(Ok(pc))) {
                Ok(pc) => pc,
                Err(e) => {
                    error!("failed creating PollContext: {:?}", e);
                    return;
                }
            };

        'poll: loop {
            let mut signal_used = false;
            let events = match poll_ctx.wait() {
                Ok(v) => v,
                Err(e) => {
                    error!("failed polling for events: {:?}", e);
                    break;
                }
            };

            for event in events.iter() {
                match event.token() {
                    Token::InQueue => {
                        let _ = in_queue_evt.read();
                        // Used to buffer descriptor indexes that are invalid for our uses.
                        let mut rejects = [0u16; QUEUE_SIZE as usize];
                        let mut rejects_len = 0;
                        let min_in_desc_len = (size_of::<CtrlVfdRecv>() +
                                               size_of::<Le32>() * VIRTWL_SEND_MAX_ALLOCS) as
                                              u32;
                        self.in_desc_chains.extend(self.in_queue.iter(&self.mem).filter_map(|d| {
                            if d.len >= min_in_desc_len && d.is_write_only() {
                                Some((d.index, d.addr, d.len))
                            } else {
                                // Can not use queue.add_used directly because it's being borrowed
                                // for the iterator chain, so we buffer the descriptor index in
                                // rejects.
                                rejects[rejects_len] = d.index;
                                rejects_len += 1;
                                None
                            }
                        }));
                        for &reject in &rejects[..rejects_len] {
                            signal_used = true;
                            self.in_queue.add_used(&self.mem, reject, 0);
                        }
                    }
                    Token::OutQueue => {
                        let _ = out_queue_evt.read();
                        // Used to buffer filled in descriptors that will be added to the used queue
                        // after iterating the available queue.
                        let mut used_descs = [(0u16, 0u32); QUEUE_SIZE as usize];
                        let mut used_descs_len = 0;
                        let min_resp_desc_len = size_of::<CtrlHeader>() as u32;
                        for desc in self.out_queue.iter(&self.mem) {
                            // Expects that each descriptor chain is made of one "in" followed by
                            // one "out" descriptor.
                            if !desc.is_write_only() {
                                if let Some(resp_desc) = desc.next_descriptor() {
                                    if resp_desc.is_write_only() &&
                                       resp_desc.len >= min_resp_desc_len {
                                        let resp = match parse_desc(&desc, &self.mem) {
                                            Ok(op) => {
                                                match self.state.execute(&self.mem, op) {
                                                    Ok(r) => r,
                                                    Err(e) => WlResp::Err(Box::new(e)),
                                                }
                                            }
                                            Err(e) => WlResp::Err(Box::new(e)),
                                        };

                                        let resp_mem = self.mem
                                            .get_slice(resp_desc.addr.0, resp_desc.len as u64)
                                            .unwrap();
                                        let used_len = encode_resp(resp_mem, resp)
                                            .unwrap_or_default();

                                        used_descs[used_descs_len] = (desc.index, used_len);
                                    }
                                }
                            } else {
                                // Chains that are unusable get sent straight back to the used
                                // queue.
                                used_descs[used_descs_len] = (desc.index, 0);
                            }
                            used_descs_len += 1;
                        }
                        for &(index, len) in &used_descs[..used_descs_len] {
                            signal_used = true;
                            self.out_queue.add_used(&self.mem, index, len);
                        }
                    }
                    Token::Kill => break 'poll,
                    Token::State => self.state.process_poll_context(),
                }
            }

            // Because this loop should be retried after the in queue is usable or after one of the
            // VFDs was read, we do it after the poll event responses.
            while !self.in_desc_chains.is_empty() {
                let mut should_pop = false;
                if let Some(in_resp) = self.state.next_recv() {
                    // self.in_desc_chains is not empty (checked by loop condition) so unwrap is
                    // safe.
                    let (index, addr, desc_len) = self.in_desc_chains.pop_front().unwrap();
                    // This memory location is valid because it came from a queue which always
                    // checks the descriptor memory locations.
                    let desc_mem = self.mem.get_slice(addr.0, desc_len as u64).unwrap();
                    let len = match encode_resp(desc_mem, in_resp) {
                        Ok(len) => {
                            should_pop = true;
                            len
                        }
                        Err(e) => {
                            error!("failed to encode response to descriptor chain: {:?}", e);
                            0
                        }
                    };
                    signal_used = true;
                    self.in_queue.add_used(&self.mem, index, len);
                } else {
                    break;
                }
                if should_pop {
                    self.state.pop_recv();
                }
            }

            if signal_used {
                self.signal_used_queue();
            }
        }
    }
}

pub struct Wl {
    kill_evt: Option<EventFd>,
    wayland_path: PathBuf,
    vm_socket: Option<UnixDatagram>,
    use_transition_flags: bool,
}

impl Wl {
    pub fn new<P: AsRef<Path>>(wayland_path: P, vm_socket: UnixDatagram) -> Result<Wl> {
        // let kill_evt = EventFd::new()?;
        //     workers_kill_evt: Some(kill_evt.try_clone()?),
        Ok(Wl {
               kill_evt: None,
               wayland_path: wayland_path.as_ref().to_owned(),
               vm_socket: Some(vm_socket),
               use_transition_flags: false,
           })
    }
}

impl Drop for Wl {
    fn drop(&mut self) {
        if let Some(kill_evt) = self.kill_evt.take() {
            // Ignore the result because there is nothing we can do about it.
            let _ = kill_evt.write(1);
        }
    }
}

impl VirtioDevice for Wl {
    fn keep_fds(&self) -> Vec<RawFd> {
        let mut keep_fds = Vec::new();

        if let Some(ref vm_socket) = self.vm_socket {
            keep_fds.push(vm_socket.as_raw_fd());
        }

        keep_fds
    }

    fn device_type(&self) -> u32 {
        TYPE_WL
    }

    fn queue_max_sizes(&self) -> &[u16] {
        QUEUE_SIZES
    }

    fn features(&self, page: u32) -> u32 {
        match page {
            0 => (1 << VIRTIO_WL_F_TRANS_FLAGS),
            _ => 0x0,
        }
    }

    fn ack_features(&mut self, page: u32, value: u32) {
        if page == 0 && value & (1 << VIRTIO_WL_F_TRANS_FLAGS) != 0 {
            self.use_transition_flags = true;
        }
    }

    fn activate(&mut self,
                mem: GuestMemory,
                interrupt_evt: EventFd,
                status: Arc<AtomicUsize>,
                mut queues: Vec<Queue>,
                queue_evts: Vec<EventFd>) {
        if queues.len() != QUEUE_SIZES.len() || queue_evts.len() != QUEUE_SIZES.len() {
            return;
        }

        let (self_kill_evt, kill_evt) =
            match EventFd::new().and_then(|e| Ok((e.try_clone()?, e))) {
                Ok(v) => v,
                Err(e) => {
                    error!("failed creating kill EventFd pair: {:?}", e);
                    return;
                }
            };
        self.kill_evt = Some(self_kill_evt);

        if let Some(vm_socket) = self.vm_socket.take() {
            let wayland_path = self.wayland_path.clone();
            let use_transition_flags = self.use_transition_flags;
            let worker_result = thread::Builder::new()
                .name("virtio_wl".to_string())
                .spawn(move || {
                    Worker::new(mem,
                                interrupt_evt,
                                status,
                                queues.remove(0),
                                queues.remove(0),
                                wayland_path,
                                vm_socket,
                                use_transition_flags)
                            .run(queue_evts, kill_evt);
                });

            if let Err(e) = worker_result {
                error!("failed to spawn virtio_wl worker: {}", e);
                return;
            }
        }
    }
}