diff options
author | Alyssa Ross <hi@alyssa.is> | 2020-03-09 03:02:23 +0000 |
---|---|---|
committer | Alyssa Ross <hi@alyssa.is> | 2020-03-09 03:02:23 +0000 |
commit | 1fb99239c0e5976cbad2fa8fdc45f15d219f7ed2 (patch) | |
tree | 0f44cc95b797eb50f78bf4246bdeb908f5981d7e | |
parent | da70e9a5c947c0fef40479cc99ae13f52b4e1065 (diff) | |
parent | 9515b05c086c55b9e3fbddbc56fb6eb3e9a510a8 (diff) | |
download | crosvm-1fb99239c0e5976cbad2fa8fdc45f15d219f7ed2.tar crosvm-1fb99239c0e5976cbad2fa8fdc45f15d219f7ed2.tar.gz crosvm-1fb99239c0e5976cbad2fa8fdc45f15d219f7ed2.tar.bz2 crosvm-1fb99239c0e5976cbad2fa8fdc45f15d219f7ed2.tar.lz crosvm-1fb99239c0e5976cbad2fa8fdc45f15d219f7ed2.tar.xz crosvm-1fb99239c0e5976cbad2fa8fdc45f15d219f7ed2.tar.zst crosvm-1fb99239c0e5976cbad2fa8fdc45f15d219f7ed2.zip |
Merge remote-tracking branch 'origin/master' into master
86 files changed, 6154 insertions, 1168 deletions
diff --git a/Cargo.lock b/Cargo.lock index 1aea075..bb75235 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -12,10 +12,18 @@ dependencies = [ "kvm 0.1.0", "kvm_sys 0.1.0", "libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)", - "remain 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", + "remain 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "resources 0.1.0", "sync 0.1.0", "sys_util 0.1.0", + "vm_control 0.1.0", +] + +[[package]] +name = "acpi_tables" +version = "0.1.0" +dependencies = [ + "data_model 0.1.0", ] [[package]] @@ -30,6 +38,7 @@ dependencies = [ "resources 0.1.0", "sync 0.1.0", "sys_util 0.1.0", + "vm_control 0.1.0", ] [[package]] @@ -84,10 +93,23 @@ dependencies = [ ] [[package]] +name = "cros_async" +version = "0.1.0" +dependencies = [ + "futures 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)", + "paste 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", + "pin-utils 0.1.0-alpha.4 (registry+https://github.com/rust-lang/crates.io-index)", + "sys_util 0.1.0", + "syscall_defines 0.1.0", +] + +[[package]] name = "crosvm" version = "0.1.0" dependencies = [ "aarch64 0.1.0", + "acpi_tables 0.1.0", "arch 0.1.0", "assertions 0.1.0", "audio_streams 0.1.0", @@ -106,13 +128,14 @@ dependencies = [ "kvm_sys 0.1.0", "libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)", "libcras 0.1.0", + "minijail-sys 0.0.11", "msg_socket 0.1.0", "net_util 0.1.0", "p9 0.1.0", "protobuf 2.8.1 (registry+https://github.com/rust-lang/crates.io-index)", "protos 0.1.0", "rand_ish 0.1.0", - "remain 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", + "remain 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "resources 0.1.0", "sync 0.1.0", "sys_util 0.1.0", @@ -164,7 +187,7 @@ dependencies = [ "net_util 0.1.0", "p9 0.1.0", "protos 0.1.0", - "remain 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", + "remain 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "resources 0.1.0", "sync 0.1.0", "sys_util 0.1.0", @@ -186,7 +209,7 @@ dependencies = [ "libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)", "protobuf 2.8.1 (registry+https://github.com/rust-lang/crates.io-index)", "protos 0.1.0", - "remain 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", + "remain 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "sys_util 0.1.0", ] @@ -200,6 +223,88 @@ dependencies = [ ] [[package]] +name = "futures" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "futures-channel 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", + "futures-core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", + "futures-executor 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", + "futures-io 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", + "futures-sink 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", + "futures-task 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", + "futures-util 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "futures-channel" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "futures-core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", + "futures-sink 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "futures-core" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "futures-executor" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "futures-core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", + "futures-task 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", + "futures-util 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "futures-io" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "futures-macro" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "proc-macro-hack 0.5.11 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro2 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 1.0.14 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "futures-sink" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "futures-task" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "futures-util" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "futures-channel 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", + "futures-core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", + "futures-io 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", + "futures-macro 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", + "futures-sink 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", + "futures-task 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr 2.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "pin-utils 0.1.0-alpha.4 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro-hack 0.5.11 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro-nested 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", + "slab 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] name = "getopts" version = "0.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -240,7 +345,7 @@ name = "io_jail" version = "0.1.0" dependencies = [ "libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)", - "net_sys 0.1.0", + "minijail-sys 0.0.11", ] [[package]] @@ -311,6 +416,19 @@ dependencies = [ ] [[package]] +name = "memchr" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "minijail-sys" +version = "0.0.11" +dependencies = [ + "libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)", + "pkg-config 0.3.11 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] name = "msg_on_socket_derive" version = "0.1.0" dependencies = [ @@ -323,7 +441,10 @@ dependencies = [ name = "msg_socket" version = "0.1.0" dependencies = [ + "cros_async 0.1.0", "data_model 0.1.0", + "futures 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)", "msg_on_socket_derive 0.1.0", "sys_util 0.1.0", ] @@ -362,6 +483,31 @@ dependencies = [ ] [[package]] +name = "paste" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "paste-impl 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro-hack 0.5.11 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "paste-impl" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "proc-macro-hack 0.5.11 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro2 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 1.0.14 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "pin-utils" +version = "0.1.0-alpha.4" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] name = "pkg-config" version = "0.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -376,14 +522,21 @@ dependencies = [ ] [[package]] -name = "proc-macro2" -version = "0.4.21" +name = "proc-macro-hack" +version = "0.5.11" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro2 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 1.0.14 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] +name = "proc-macro-nested" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] name = "proc-macro2" version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -444,14 +597,6 @@ dependencies = [ [[package]] name = "quote" -version = "0.6.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "proc-macro2 0.4.21 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "quote" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ @@ -464,12 +609,12 @@ version = "0.1.0" [[package]] name = "remain" -version = "0.1.2" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "proc-macro2 0.4.21 (registry+https://github.com/rust-lang/crates.io-index)", - "quote 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)", - "syn 0.15.26 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro2 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 1.0.14 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -483,14 +628,9 @@ dependencies = [ ] [[package]] -name = "syn" -version = "0.15.26" +name = "slab" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "proc-macro2 0.4.21 (registry+https://github.com/rust-lang/crates.io-index)", - "quote 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)", - "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", -] [[package]] name = "syn" @@ -551,11 +691,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "unicode-xid" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "unicode-xid" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -573,7 +708,7 @@ dependencies = [ "assertions 0.1.0", "data_model 0.1.0", "libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)", - "remain 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", + "remain 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "sys_util 0.1.0", "usb_sys 0.1.0", ] @@ -628,9 +763,9 @@ dependencies = [ name = "x86_64" version = "0.1.0" dependencies = [ + "acpi_tables 0.1.0", "arch 0.1.0", "assertions 0.1.0", - "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)", "data_model 0.1.0", "devices 0.1.0", "io_jail 0.1.0", @@ -639,32 +774,45 @@ dependencies = [ "kvm 0.1.0", "kvm_sys 0.1.0", "libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)", - "remain 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", + "remain 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "resources 0.1.0", "sync 0.1.0", "sys_util 0.1.0", + "vm_control 0.1.0", ] [metadata] "checksum bitflags 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3d155346769a6855b86399e9bc3814ab343cd3d62c7e985113d46a0ec3c281fd" "checksum cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)" = "f159dfd43363c4d08055a07703eb7a3406b0dac4d0584d96965a3262db3c9d16" "checksum cfg-if 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "0c4e7bb64a8ebb0d856483e1e682ea3422f883c5f5615a90d51a2c82fe87fdd3" +"checksum futures 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b6f16056ecbb57525ff698bb955162d0cd03bee84e6241c27ff75c08d8ca5987" +"checksum futures-channel 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "fcae98ca17d102fd8a3603727b9259fcf7fa4239b603d2142926189bc8999b86" +"checksum futures-core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "79564c427afefab1dfb3298535b21eda083ef7935b4f0ecbfcb121f0aec10866" +"checksum futures-executor 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "1e274736563f686a837a0568b478bdabfeaec2dca794b5649b04e2fe1627c231" +"checksum futures-io 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "e676577d229e70952ab25f3945795ba5b16d63ca794ca9d2c860e5595d20b5ff" +"checksum futures-macro 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "52e7c56c15537adb4f76d0b7a76ad131cb4d2f4f32d3b0bcabcbe1c7c5e87764" +"checksum futures-sink 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "171be33efae63c2d59e6dbba34186fe0d6394fb378069a76dfd80fdcffd43c16" +"checksum futures-task 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0bae52d6b29cf440e298856fec3965ee6fa71b06aa7495178615953fd669e5f9" +"checksum futures-util 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c0d66274fb76985d3c62c886d1da7ac4c0903a8c9f754e8fe0f35a6a6cc39e76" "checksum getopts 0.2.18 (registry+https://github.com/rust-lang/crates.io-index)" = "0a7292d30132fb5424b354f5dc02512a86e4c516fe544bb7a25e7f266951b797" "checksum libc 0.2.44 (registry+https://github.com/rust-lang/crates.io-index)" = "10923947f84a519a45c8fefb7dd1b3e8c08747993381adee176d7a82b4195311" "checksum log 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)" = "d4fcce5fa49cc693c312001daf1d13411c4a5283796bac1084299ea3e567113f" +"checksum memchr 2.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3197e20c7edb283f87c071ddfc7a2cca8f8e0b888c242959846a6fce03c72223" "checksum num_cpus 1.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5a69d464bdc213aaaff628444e99578ede64e9c854025aa43b9796530afa9238" +"checksum paste 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "423a519e1c6e828f1e73b720f9d9ed2fa643dce8a7737fb43235ce0b41eeaa49" +"checksum paste-impl 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "4214c9e912ef61bf42b81ba9a47e8aad1b2ffaf739ab162bf96d1e011f54e6c5" +"checksum pin-utils 0.1.0-alpha.4 (registry+https://github.com/rust-lang/crates.io-index)" = "5894c618ce612a3fa23881b152b608bafb8c56cfc22f434a3ba3120b40f7b587" "checksum pkg-config 0.3.11 (registry+https://github.com/rust-lang/crates.io-index)" = "110d5ee3593dbb73f56294327fe5668bcc997897097cbc76b51e7aed3f52452f" -"checksum proc-macro2 0.4.21 (registry+https://github.com/rust-lang/crates.io-index)" = "ab2fc21ba78ac73e4ff6b3818ece00be4e175ffbef4d0a717d978b48b24150c4" +"checksum proc-macro-hack 0.5.11 (registry+https://github.com/rust-lang/crates.io-index)" = "ecd45702f76d6d3c75a80564378ae228a85f0b59d2f3ed43c91b4a69eb2ebfc5" +"checksum proc-macro-nested 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "369a6ed065f249a159e06c45752c780bda2fb53c995718f9e484d08daa9eb42e" "checksum proc-macro2 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)" = "3acb317c6ff86a4e579dfa00fc5e6cca91ecbb4e7eb2df0468805b674eb88548" "checksum protobuf 2.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "40361836defdd5871ff7e84096c6f6444af7fc157f8ef1789f54f147687caa20" "checksum protobuf-codegen 2.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "12c6abd78435445fc86898ebbd0521a68438063d4a73e23527b7134e6bf58b4a" "checksum protoc 2.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3998c4bc0af8ccbd3cc68245ee9f72663c5ae2fb78bc48ff7719aef11562edea" "checksum protoc-rust 2.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "234c97039c32bb58a883d0deafa57db37e59428ce536f3bdfe1c46cffec04113" -"checksum quote 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)" = "53fa22a1994bd0f9372d7a816207d8a2677ad0325b073f5c5332760f0fb62b5c" "checksum quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "053a8c8bcc71fcce321828dc897a98ab9760bef03a4fc36693c231e5b3216cfe" -"checksum remain 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "3bec2543b50be4539fdc27fde082e218cf4c3895358ca77f5c52fe930589e209" -"checksum syn 0.15.26 (registry+https://github.com/rust-lang/crates.io-index)" = "f92e629aa1d9c827b2bb8297046c1ccffc57c99b947a680d3ccff1f136a3bee9" +"checksum remain 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "99c861227fc40c8da6fdaa3d58144ac84c0537080a43eb1d7d45c28f88dcb888" +"checksum slab 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "c111b5bd5695e56cffe5129854aa230b39c93a305372fdbb2668ca2394eea9f8" "checksum syn 1.0.14 (registry+https://github.com/rust-lang/crates.io-index)" = "af6f3550d8dff9ef7dc34d384ac6f107e5d31c8f57d9f28e0081503f547ac8f5" "checksum unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526" -"checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" "checksum unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c" diff --git a/Cargo.toml b/Cargo.toml index e626ae0..66a616a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -38,6 +38,7 @@ wl-dmabuf = ["devices/wl-dmabuf", "gpu_buffer", "resources/wl-dmabuf"] x = ["devices/x"] virtio-gpu-next = ["gpu_renderer/virtio-gpu-next"] composite-disk = ["protos/composite-disk", "protobuf", "disk/composite-disk"] +gfxstream = ["devices/gfxstream"] [dependencies] arch = { path = "arch" } @@ -58,6 +59,7 @@ kvm = { path = "kvm" } kvm_sys = { path = "kvm_sys" } libc = "0.2.44" libcras = "*" +minijail-sys = "*" # provided by ebuild msg_socket = { path = "msg_socket" } net_util = { path = "net_util" } p9 = { path = "p9" } @@ -70,6 +72,7 @@ sync = { path = "sync" } sys_util = "*" vhost = { path = "vhost" } vm_control = { path = "vm_control" } +acpi_tables = { path = "acpi_tables" } [target.'cfg(target_arch = "x86_64")'.dependencies] x86_64 = { path = "x86_64" } @@ -85,6 +88,7 @@ assertions = { path = "assertions" } audio_streams = { path = "../../third_party/adhd/audio_streams" } # ignored by ebuild data_model = { path = "data_model" } libcras = { path = "../../third_party/adhd/cras/client/libcras" } # ignored by ebuild +minijail-sys = { path = "../../aosp/external/minijail" } # ignored by ebuild poll_token_derive = { path = "sys_util/poll_token_derive" } sync = { path = "sync" } sys_util = { path = "sys_util" } diff --git a/README.md b/README.md index 92fd1b4..09e8c1c 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,11 @@ makes crosvm unique is a focus on safety within the programming language and a sandbox around the virtual devices to protect the kernel from attack in case of an exploit in the devices. +## IRC + +The channel #crosvm on [freenode](https://webchat.freenode.net/#crosvm) is used +for technical discussion related to crosvm development and integration. + ## Building with Docker See the [README](docker/README.md) from the `docker` subdirectory to learn how diff --git a/aarch64/Cargo.toml b/aarch64/Cargo.toml index 8c754d1..7e346ac 100644 --- a/aarch64/Cargo.toml +++ b/aarch64/Cargo.toml @@ -17,3 +17,4 @@ remain = "*" resources = { path = "../resources" } sync = { path = "../sync" } sys_util = { path = "../sys_util" } +vm_control = { path = "../vm_control" } diff --git a/aarch64/src/fdt.rs b/aarch64/src/fdt.rs index 23d0481..9dcafa5 100644 --- a/aarch64/src/fdt.rs +++ b/aarch64/src/fdt.rs @@ -197,6 +197,12 @@ fn create_chosen_node( let kaslr_seed = u64::from_le_bytes(kaslr_seed_bytes); property_u64(fdt, "kaslr-seed", kaslr_seed)?; + let mut rng_seed_bytes = [0u8; 256]; + random_file + .read_exact(&mut rng_seed_bytes) + .map_err(Error::FdtIoError)?; + property(fdt, "rng-seed", &rng_seed_bytes)?; + if let Some((initrd_addr, initrd_size)) = initrd { let initrd_start = initrd_addr.offset() as u32; let initrd_end = initrd_start + initrd_size as u32; diff --git a/aarch64/src/lib.rs b/aarch64/src/lib.rs index 13855a6..f8a36b9 100644 --- a/aarch64/src/lib.rs +++ b/aarch64/src/lib.rs @@ -21,6 +21,7 @@ use remain::sorted; use resources::SystemAllocator; use sync::Mutex; use sys_util::{EventFd, GuestAddress, GuestMemory, GuestMemoryError}; +use vm_control::VmIrqRequestSocket; use kvm::*; use kvm_sys::kvm_device_attr; @@ -195,6 +196,7 @@ impl arch::LinuxArch for AArch64 { fn build_vm<F, E>( mut components: VmComponents, _split_irqchip: bool, + _ioapic_device_socket: VmIrqRequestSocket, serial_parameters: &BTreeMap<u8, SerialParameters>, serial_jail: Option<Minijail>, create_devices: F, @@ -237,11 +239,20 @@ impl arch::LinuxArch for AArch64 { let exit_evt = EventFd::new().map_err(Error::CreateEventFd)?; + // Event used by PMDevice to notify crosvm that + // guest OS is trying to suspend. + let suspend_evt = EventFd::new().map_err(Error::CreateEventFd)?; + let pci_devices = create_devices(&mem, &mut vm, &mut resources, &exit_evt) .map_err(|e| Error::CreateDevices(Box::new(e)))?; - let (pci, pci_irqs, pid_debug_label_map) = - arch::generate_pci_root(pci_devices, &mut mmio_bus, &mut resources, &mut vm) - .map_err(Error::CreatePciRoot)?; + let (pci, pci_irqs, pid_debug_label_map) = arch::generate_pci_root( + pci_devices, + &mut None, + &mut mmio_bus, + &mut resources, + &mut vm, + ) + .map_err(Error::CreatePciRoot)?; let pci_bus = Arc::new(Mutex::new(PciConfigMmio::new(pci))); // ARM doesn't really use the io bus like x86, so just create an empty bus. @@ -310,9 +321,12 @@ impl arch::LinuxArch for AArch64 { vcpus, vcpu_affinity, irq_chip, + split_irqchip: None, + gsi_relay: None, io_bus, mmio_bus, pid_debug_label_map, + suspend_evt, }) } } diff --git a/acpi_tables/Cargo.toml b/acpi_tables/Cargo.toml new file mode 100644 index 0000000..80518bb --- /dev/null +++ b/acpi_tables/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "acpi_tables" +version = "0.1.0" +authors = ["The Chromium OS Authors"] +edition = "2018" + +[dependencies] +data_model = { path = "../data_model" } diff --git a/acpi_tables/src/lib.rs b/acpi_tables/src/lib.rs new file mode 100644 index 0000000..49cf760 --- /dev/null +++ b/acpi_tables/src/lib.rs @@ -0,0 +1,12 @@ +// Copyright 2020 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +pub mod rsdp; +pub mod sdt; + +pub use self::sdt::HEADER_LEN; + +fn generate_checksum(data: &[u8]) -> u8 { + (255 - data.iter().fold(0u8, |acc, x| acc.wrapping_add(*x))).wrapping_add(1) +} diff --git a/acpi_tables/src/rsdp.rs b/acpi_tables/src/rsdp.rs new file mode 100644 index 0000000..4bf64c9 --- /dev/null +++ b/acpi_tables/src/rsdp.rs @@ -0,0 +1,67 @@ +// Copyright 2020 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +use data_model::DataInit; + +#[repr(packed)] +#[derive(Clone, Copy, Default)] +pub struct RSDP { + pub signature: [u8; 8], + pub checksum: u8, + pub oem_id: [u8; 6], + pub revision: u8, + _rsdt_addr: u32, + pub length: u32, + pub xsdt_addr: u64, + pub extended_checksum: u8, + _reserved: [u8; 3], +} + +// Safe as RSDP structure only contains raw data +unsafe impl DataInit for RSDP {} + +impl RSDP { + pub fn new(oem_id: [u8; 6], xsdt_addr: u64) -> Self { + let mut rsdp = RSDP { + signature: *b"RSD PTR ", + checksum: 0, + oem_id, + revision: 2, + _rsdt_addr: 0, + length: std::mem::size_of::<RSDP>() as u32, + xsdt_addr, + extended_checksum: 0, + _reserved: [0; 3], + }; + + rsdp.checksum = super::generate_checksum(&rsdp.as_slice()[0..19]); + rsdp.extended_checksum = super::generate_checksum(&rsdp.as_slice()); + rsdp + } + + pub fn len() -> usize { + std::mem::size_of::<RSDP>() + } +} + +#[cfg(test)] +mod tests { + use super::RSDP; + use data_model::DataInit; + + #[test] + fn test_rsdp() { + let rsdp = RSDP::new(*b"CHYPER", 0xdead_beef); + let sum = rsdp + .as_slice() + .iter() + .fold(0u8, |acc, x| acc.wrapping_add(*x)); + assert_eq!(sum, 0); + let sum: u8 = rsdp + .as_slice() + .iter() + .fold(0u8, |acc, x| acc.wrapping_add(*x)); + assert_eq!(sum, 0); + } +} diff --git a/acpi_tables/src/sdt.rs b/acpi_tables/src/sdt.rs new file mode 100644 index 0000000..e8a9ea2 --- /dev/null +++ b/acpi_tables/src/sdt.rs @@ -0,0 +1,107 @@ +// Copyright 2020 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +use data_model::DataInit; + +/// SDT represents for System Description Table. The structure SDT is a +/// generic format for creating various ACPI tables like DSDT/FADT/MADT. +pub struct SDT { + data: Vec<u8>, +} + +pub const HEADER_LEN: u32 = 36; +const LENGTH_OFFSET: usize = 4; +const CHECKSUM_OFFSET: usize = 9; + +#[allow(clippy::len_without_is_empty)] +impl SDT { + /// Set up the ACPI table header at the front of the SDT. + /// The arguments correspond to the elements in the ACPI + /// table headers. + pub fn new( + signature: [u8; 4], + length: u32, + revision: u8, + oem_id: [u8; 6], + oem_table: [u8; 8], + oem_revision: u32, + ) -> Self { + // The length represents for the length of the entire table + // which includes this header. And the header is 36 bytes, so + // lenght should be >= 36. For the case who gives a number less + // than the header len, use the header len directly. + let len: u32 = if length < HEADER_LEN { + HEADER_LEN + } else { + length + }; + let mut data = Vec::with_capacity(length as usize); + data.extend_from_slice(&signature); + data.extend_from_slice(&len.to_le_bytes()); + data.push(revision); + data.push(0); // checksum + data.extend_from_slice(&oem_id); + data.extend_from_slice(&oem_table); + data.extend_from_slice(&oem_revision.to_le_bytes()); + data.extend_from_slice(b"CROS"); + data.extend_from_slice(&0u32.to_le_bytes()); + + data.resize(length as usize, 0); + let mut sdt = SDT { data }; + + sdt.update_checksum(); + sdt + } + + fn update_checksum(&mut self) { + self.data[CHECKSUM_OFFSET] = 0; + let checksum = super::generate_checksum(self.data.as_slice()); + self.data[CHECKSUM_OFFSET] = checksum; + } + + pub fn as_slice(&self) -> &[u8] { + &self.data.as_slice() + } + + pub fn append<T: DataInit>(&mut self, value: T) { + self.data.extend_from_slice(value.as_slice()); + self.write(LENGTH_OFFSET, self.data.len() as u32); + } + + /// Write a value at the given offset + pub fn write<T: DataInit>(&mut self, offset: usize, value: T) { + let value_len = std::mem::size_of::<T>(); + if (offset + value_len) > self.data.len() { + return; + } + + self.data[offset..offset + value_len].copy_from_slice(&value.as_slice()); + self.update_checksum(); + } + + pub fn len(&self) -> usize { + self.data.len() + } +} + +#[cfg(test)] +mod tests { + use super::SDT; + + #[test] + fn test_sdt() { + let mut sdt = SDT::new(*b"TEST", 40, 1, *b"CROSVM", *b"TESTTEST", 1); + let sum: u8 = sdt + .as_slice() + .iter() + .fold(0u8, |acc, x| acc.wrapping_add(*x)); + assert_eq!(sum, 0); + sdt.write(36, 0x12345678 as u32); + let sum: u8 = sdt + .as_slice() + .iter() + .fold(0u8, |acc, x| acc.wrapping_add(*x)); + assert_eq!(sum, 0); + } +} diff --git a/arch/Cargo.toml b/arch/Cargo.toml index bf28560..6b4e529 100644 --- a/arch/Cargo.toml +++ b/arch/Cargo.toml @@ -13,3 +13,4 @@ libc = "*" resources = { path = "../resources" } sync = { path = "../sync" } sys_util = { path = "../sys_util" } +vm_control = { path = "../vm_control" } diff --git a/arch/src/lib.rs b/arch/src/lib.rs index 33259de..ab08c21 100644 --- a/arch/src/lib.rs +++ b/arch/src/lib.rs @@ -15,6 +15,7 @@ use std::os::unix::io::AsRawFd; use std::path::PathBuf; use std::sync::Arc; +use devices::split_irqchip_common::GsiRelay; use devices::virtio::VirtioDevice; use devices::{ Bus, BusDevice, BusError, PciDevice, PciDeviceError, PciInterruptPin, PciRoot, ProxyDevice, @@ -25,6 +26,7 @@ use kvm::{IoeventAddress, Kvm, Vcpu, Vm}; use resources::SystemAllocator; use sync::Mutex; use sys_util::{syslog, EventFd, GuestAddress, GuestMemory, GuestMemoryError}; +use vm_control::VmIrqRequestSocket; pub enum VmImage { Kernel(File), @@ -60,9 +62,12 @@ pub struct RunnableLinuxVm { pub vcpus: Vec<Vcpu>, pub vcpu_affinity: Vec<usize>, pub irq_chip: Option<File>, + pub split_irqchip: Option<(Arc<Mutex<devices::Pic>>, Arc<Mutex<devices::Ioapic>>)>, + pub gsi_relay: Option<Arc<GsiRelay>>, pub io_bus: Bus, pub mmio_bus: Bus, pub pid_debug_label_map: BTreeMap<u32, String>, + pub suspend_evt: EventFd, } /// The device and optional jail. @@ -87,6 +92,7 @@ pub trait LinuxArch { fn build_vm<F, E>( components: VmComponents, split_irqchip: bool, + ioapic_device_socket: VmIrqRequestSocket, serial_parameters: &BTreeMap<u8, SerialParameters>, serial_jail: Option<Minijail>, create_devices: F, @@ -114,6 +120,8 @@ pub enum DeviceRegistrationError { CreatePipe(sys_util::Error), // Unable to create serial device from serial parameters CreateSerialDevice(devices::SerialError), + /// Could not clone an event fd. + EventFdClone(sys_util::Error), /// Could not create an event fd. EventFdCreate(sys_util::Error), /// Could not add a device to the mmio bus. @@ -145,6 +153,7 @@ impl Display for DeviceRegistrationError { CreatePipe(e) => write!(f, "failed to create pipe: {}", e), CreateSerialDevice(e) => write!(f, "failed to create serial device: {}", e), Cmdline(e) => write!(f, "unable to add device to kernel command line: {}", e), + EventFdClone(e) => write!(f, "failed to clone eventfd: {}", e), EventFdCreate(e) => write!(f, "failed to create eventfd: {}", e), MmioInsert(e) => write!(f, "failed to add to mmio bus: {}", e), RegisterIoevent(e) => write!(f, "failed to register ioevent to VM: {}", e), @@ -162,6 +171,7 @@ impl Display for DeviceRegistrationError { /// Creates a root PCI device for use by this Vm. pub fn generate_pci_root( devices: Vec<(Box<dyn PciDevice>, Option<Minijail>)>, + gsi_relay: &mut Option<GsiRelay>, mmio_bus: &mut Bus, resources: &mut SystemAllocator, vm: &mut Vm, @@ -189,8 +199,20 @@ pub fn generate_pci_root( 3 => PciInterruptPin::IntD, _ => unreachable!(), // Obviously not possible, but the compiler is not smart enough. }; - vm.register_irqfd_resample(&irqfd, &irq_resample_fd, irq_num) - .map_err(DeviceRegistrationError::RegisterIrqfd)?; + if let Some(relay) = gsi_relay { + relay.register_irqfd_resample( + irqfd + .try_clone() + .map_err(DeviceRegistrationError::EventFdClone)?, + irq_resample_fd + .try_clone() + .map_err(DeviceRegistrationError::EventFdClone)?, + irq_num as usize, + ); + } else { + vm.register_irqfd_resample(&irqfd, &irq_resample_fd, irq_num) + .map_err(DeviceRegistrationError::RegisterIrqfd)?; + } keep_fds.push(irqfd.as_raw_fd()); keep_fds.push(irq_resample_fd.as_raw_fd()); device.assign_irq(irqfd, irq_resample_fd, irq_num, pci_irq_pin); diff --git a/build_test.py b/build_test.py index 93bd3cd..4745138 100755 --- a/build_test.py +++ b/build_test.py @@ -160,10 +160,20 @@ def check_build(sysroot, triple, kind, test_it, clean): is_release = kind == 'release' + # The pkgconfig dir could be in either lib or lib64 depending on the target. + # Rather than checking to see which one is valid, just add both and let + # pkg-config search. + libdir = os.path.join(sysroot, 'usr', 'lib', 'pkgconfig') + lib64dir = os.path.join(sysroot, 'usr', 'lib64', 'pkgconfig') + env = os.environ.copy() env['TARGET_CC'] = '%s-clang'%triple env['SYSROOT'] = sysroot env['CARGO_TARGET_DIR'] = target_path + env['PKG_CONFIG_ALLOW_CROSS'] = '1' + env['PKG_CONFIG_LIBDIR'] = libdir + ':' + lib64dir + env['PKG_CONFIG_SYSROOT_DIR'] = sysroot + env['RUSTFLAGS'] = '-C linker=' + env['TARGET_CC'] if test_it: if not test_target(triple, is_release, env): diff --git a/crosvm_plugin/crosvm.h b/crosvm_plugin/crosvm.h index 63763f1..d1bea2a 100644 --- a/crosvm_plugin/crosvm.h +++ b/crosvm_plugin/crosvm.h @@ -47,7 +47,7 @@ extern "C" { * do not indicate anything about what version of crosvm is running. */ #define CROSVM_API_MAJOR 0 -#define CROSVM_API_MINOR 19 +#define CROSVM_API_MINOR 22 #define CROSVM_API_PATCH 0 enum crosvm_address_space { @@ -115,6 +115,14 @@ int crosvm_check_extension(struct crosvm*, uint32_t __extension, bool *has_extension); /* + * Enable an extended capability for the VM. Currently |__flags| and + * |__args| must be zero. No values for |__capability| are supported, + * so all calls will fail. + */ +int crosvm_enable_capability(struct crosvm*, uint32_t __capability, + uint32_t __flags, uint64_t __args[4]); + +/* * Queries x86 cpuid features which are supported by the hardware and * kvm. */ @@ -130,6 +138,13 @@ int crosvm_get_emulated_cpuid(struct crosvm*, uint32_t __entry_count, uint32_t *__out_count); /* + * Queries x86 hyper-v cpuid features which are emulated by kvm. + */ +int crosvm_get_hyperv_cpuid(struct crosvm_vcpu*, uint32_t __entry_count, + struct kvm_cpuid_entry2 *__cpuid_entries, + uint32_t *__out_count); + +/* * Queries kvm for list of supported MSRs. */ int crosvm_get_msr_index_list(struct crosvm*, uint32_t __entry_count, @@ -480,6 +495,16 @@ enum crosvm_vcpu_event_kind { * a `crosvm_pause_vcpus` call. */ CROSVM_VCPU_EVENT_KIND_PAUSED, + + /* + * Hyper-V hypercall. + */ + CROSVM_VCPU_EVENT_KIND_HYPERV_HCALL, + + /* + * Hyper-V synic change. + */ + CROSVM_VCPU_EVENT_KIND_HYPERV_SYNIC, }; struct crosvm_vcpu_event { @@ -538,6 +563,31 @@ struct crosvm_vcpu_event { /* CROSVM_VCPU_EVENT_KIND_PAUSED */ void *user; + /* CROSVM_VCPU_EVENT_KIND_HYPERV_HCALL */ + struct { + /* + * The |input| and |params| members are populated for the plugin to use. + * The |result| member is populated by the API to point to a uint64_t + * that the plugin should update before resuming. + */ + uint64_t input; + uint64_t *result; + uint64_t params[2]; + } hyperv_call; + + /* CROSVM_VCPU_EVENT_KIND_HYPERV_SYNIC */ + struct { + /* + * The |msr|, |control|, |evt_page|, and |msg_page| fields are populated + * for the plugin to use. + */ + uint32_t msr; + uint32_t _reserved; + uint64_t control; + uint64_t evt_page; + uint64_t msg_page; + } hyperv_synic; + uint8_t _reserved[64]; }; }; @@ -626,6 +676,15 @@ int crosvm_vcpu_set_msrs(struct crosvm_vcpu*, uint32_t __msr_count, int crosvm_vcpu_set_cpuid(struct crosvm_vcpu*, uint32_t __cpuid_count, const struct kvm_cpuid_entry2 *__cpuid_entries); +/* + * Enable an extended capability for a vcpu. Currently |__flags| and + * |__args| must be zero. The only permitted values for |__capability| + * are KVM_CAP_HYPERV_SYNIC or KVM_CAP_HYPERV_SYNIC2, though the latter + * also depends on kernel support. + */ +int crosvm_vcpu_enable_capability(struct crosvm_vcpu*, uint32_t __capability, + uint32_t __flags, uint64_t __args[4]); + /* Gets state of LAPIC of the VCPU. */ int crosvm_vcpu_get_lapic_state(struct crosvm_vcpu *, struct kvm_lapic_state *__lapic_state); diff --git a/crosvm_plugin/src/lib.rs b/crosvm_plugin/src/lib.rs index eb30e4b..05c19bf 100644 --- a/crosvm_plugin/src/lib.rs +++ b/crosvm_plugin/src/lib.rs @@ -59,6 +59,8 @@ const CROSVM_IRQ_ROUTE_MSI: u32 = 1; const CROSVM_VCPU_EVENT_KIND_INIT: u32 = 0; const CROSVM_VCPU_EVENT_KIND_IO_ACCESS: u32 = 1; const CROSVM_VCPU_EVENT_KIND_PAUSED: u32 = 2; +const CROSVM_VCPU_EVENT_KIND_HYPERV_HCALL: u32 = 3; +const CROSVM_VCPU_EVENT_KIND_HYPERV_SYNIC: u32 = 4; #[repr(C)] #[derive(Copy, Clone)] @@ -166,8 +168,11 @@ pub enum Stat { DestroyConnection, GetShutdownEventFd, CheckExtentsion, + EnableVmCapability, + EnableVcpuCapability, GetSupportedCpuid, GetEmulatedCpuid, + GetHypervCpuid, GetMsrIndexList, NetGetConfig, ReserveRange, @@ -922,10 +927,30 @@ struct anon_io_access { __reserved1: [u8; 2], } +#[derive(Copy, Clone)] +#[repr(C)] +struct anon_hyperv_call { + input: u64, + result: *mut u8, + params: [u64; 2], +} + +#[derive(Copy, Clone)] +#[repr(C)] +struct anon_hyperv_synic { + msr: u32, + reserved: u32, + control: u64, + evt_page: u64, + msg_page: u64, +} + #[repr(C)] union anon_vcpu_event { io_access: anon_io_access, user: *mut c_void, + hyperv_call: anon_hyperv_call, + hyperv_synic: anon_hyperv_synic, #[allow(dead_code)] __reserved: [u8; 64], } @@ -1115,6 +1140,33 @@ impl crosvm_vcpu { self.sregs.get = false; self.debugregs.get = false; Ok(()) + } else if wait.has_hyperv_call() { + let hv: &VcpuResponse_Wait_HypervCall = wait.get_hyperv_call(); + event.kind = CROSVM_VCPU_EVENT_KIND_HYPERV_HCALL; + self.resume_data = vec![0; 8]; + event.event.hyperv_call = anon_hyperv_call { + input: hv.input, + result: self.resume_data.as_mut_ptr(), + params: [hv.params0, hv.params1], + }; + self.regs.get = false; + self.sregs.get = false; + self.debugregs.get = false; + Ok(()) + } else if wait.has_hyperv_synic() { + let hv: &VcpuResponse_Wait_HypervSynic = wait.get_hyperv_synic(); + event.kind = CROSVM_VCPU_EVENT_KIND_HYPERV_SYNIC; + event.event.hyperv_synic = anon_hyperv_synic { + msr: hv.msr, + reserved: 0, + control: hv.control, + evt_page: hv.evt_page, + msg_page: hv.msg_page, + }; + self.regs.get = false; + self.sregs.get = false; + self.debugregs.get = false; + Ok(()) } else { Err(EPROTO) } @@ -1202,6 +1254,39 @@ impl crosvm_vcpu { Ok(()) } + fn get_hyperv_cpuid( + &mut self, + cpuid_entries: &mut [kvm_cpuid_entry2], + cpuid_count: &mut usize, + ) -> result::Result<(), c_int> { + *cpuid_count = 0; + + let mut r = VcpuRequest::new(); + r.mut_get_hyperv_cpuid(); + + let response = self.vcpu_transaction(&r)?; + if !response.has_get_hyperv_cpuid() { + return Err(EPROTO); + } + + let hyperv_cpuids: &VcpuResponse_CpuidResponse = response.get_get_hyperv_cpuid(); + + *cpuid_count = hyperv_cpuids.get_entries().len(); + if *cpuid_count > cpuid_entries.len() { + return Err(E2BIG); + } + + for (proto_entry, kvm_entry) in hyperv_cpuids + .get_entries() + .iter() + .zip(cpuid_entries.iter_mut()) + { + *kvm_entry = cpuid_proto_to_kvm(proto_entry); + } + + Ok(()) + } + fn get_msrs( &mut self, msr_entries: &mut [kvm_msr_entry], @@ -1255,6 +1340,13 @@ impl crosvm_vcpu { self.vcpu_transaction(&r)?; Ok(()) } + + fn enable_capability(&mut self, capability: u32) -> result::Result<(), c_int> { + let mut r = VcpuRequest::new(); + r.mut_enable_capability().capability = capability; + self.vcpu_transaction(&r)?; + Ok(()) + } } // crosvm API signals success as 0 and errors as negative values @@ -1337,6 +1429,17 @@ pub unsafe extern "C" fn crosvm_check_extension( } #[no_mangle] +pub unsafe extern "C" fn crosvm_enable_capability( + _self_: *mut crosvm, + _capability: u32, + _flags: u32, + _args: *const u64, +) -> c_int { + let _u = record(Stat::EnableVmCapability); + -EINVAL +} + +#[no_mangle] pub unsafe extern "C" fn crosvm_get_supported_cpuid( this: *mut crosvm, entry_count: u32, @@ -1804,6 +1907,22 @@ pub unsafe extern "C" fn crosvm_vcpu_set_xcrs( } #[no_mangle] +pub unsafe extern "C" fn crosvm_get_hyperv_cpuid( + this: *mut crosvm_vcpu, + entry_count: u32, + cpuid_entries: *mut kvm_cpuid_entry2, + out_count: *mut u32, +) -> c_int { + let _u = record(Stat::GetHypervCpuid); + let this = &mut *this; + let cpuid_entries = from_raw_parts_mut(cpuid_entries, entry_count as usize); + let mut cpuid_count: usize = 0; + let ret = this.get_hyperv_cpuid(cpuid_entries, &mut cpuid_count); + *out_count = cpuid_count as u32; + to_crosvm_rc(ret) +} + +#[no_mangle] pub unsafe extern "C" fn crosvm_vcpu_get_msrs( this: *mut crosvm_vcpu, msr_count: u32, @@ -1846,6 +1965,25 @@ pub unsafe extern "C" fn crosvm_vcpu_set_cpuid( } #[no_mangle] +pub unsafe extern "C" fn crosvm_vcpu_enable_capability( + this: *mut crosvm_vcpu, + capability: u32, + flags: u32, + args: *const u64, +) -> c_int { + let _u = record(Stat::EnableVcpuCapability); + let this = &mut *this; + let args = slice::from_raw_parts(args, 4); + + if flags != 0 || args.iter().any(|v| *v != 0) { + return -EINVAL; + } + + let ret = this.enable_capability(capability); + to_crosvm_rc(ret) +} + +#[no_mangle] pub unsafe extern "C" fn crosvm_vcpu_get_lapic_state( this: *mut crosvm_vcpu, state: *mut kvm_lapic_state, diff --git a/devices/Cargo.toml b/devices/Cargo.toml index 236cf4e..83aa406 100644 --- a/devices/Cargo.toml +++ b/devices/Cargo.toml @@ -9,6 +9,7 @@ gpu = ["gpu_buffer", "gpu_display", "gpu_renderer"] tpm = ["protos/trunks", "tpm2"] wl-dmabuf = [] x = ["gpu_display/x"] +gfxstream = ["gpu"] [dependencies] audio_streams = "*" diff --git a/devices/src/acpi.rs b/devices/src/acpi.rs new file mode 100644 index 0000000..990a782 --- /dev/null +++ b/devices/src/acpi.rs @@ -0,0 +1,129 @@ +// Copyright 2019 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +use crate::{BusDevice, BusResumeDevice}; +use sys_util::{error, warn, EventFd}; + +/// ACPI PM resource for handling OS suspend/resume request +pub struct ACPIPMResource { + suspend_evt: EventFd, + pm1_status: u16, + pm1_enable: u16, + pm1_control: u16, + sleep_control: u8, + sleep_status: u8, +} + +impl ACPIPMResource { + /// Constructs ACPI Power Management Resouce. + pub fn new(suspend_evt: EventFd) -> ACPIPMResource { + ACPIPMResource { + suspend_evt, + pm1_status: 0, + pm1_enable: 0, + pm1_control: 0, + sleep_control: 0, + sleep_status: 0, + } + } +} + +/// the ACPI PM register's base and length. +pub const ACPIPM_RESOURCE_BASE: u64 = 0x600; +pub const ACPIPM_RESOURCE_LEN: u8 = 8; +pub const ACPIPM_RESOURCE_EVENTBLK_LEN: u8 = 4; +pub const ACPIPM_RESOURCE_CONTROLBLK_LEN: u8 = 2; + +/// ACPI PM register value definations +const PM1_STATUS: u16 = 0; +const PM1_ENABLE: u16 = 2; +const PM1_CONTROL: u16 = 4; +const SLEEP_CONTROL: u16 = 6; +const SLEEP_STATUS: u16 = 7; +const BITMASK_PM1CNT_SLEEP_ENABLE: u16 = 0x2000; +const BITMASK_SLEEPCNT_SLEEP_ENABLE: u8 = 0x20; +const BITMASK_PM1CNT_WAKE_STATUS: u16 = 0x8000; +const BITMASK_SLEEPCNT_WAKE_STATUS: u8 = 0x80; + +impl BusDevice for ACPIPMResource { + fn debug_label(&self) -> String { + "ACPIPMResource".to_owned() + } + + fn read(&mut self, offset: u64, data: &mut [u8]) { + let val = match offset as u16 { + PM1_STATUS => self.pm1_status, + PM1_ENABLE => self.pm1_enable, + PM1_CONTROL => self.pm1_control, + SLEEP_CONTROL => self.sleep_control as u16, + SLEEP_STATUS => self.sleep_status as u16, + _ => { + warn!("ACPIPM: Bad read from offset {}", offset); + return; + } + }; + + let val_arr = val.to_ne_bytes(); + for i in 0..std::mem::size_of::<u16>() { + if i < data.len() { + data[i] = val_arr[i]; + } + } + } + + fn write(&mut self, offset: u64, data: &[u8]) { + let max_bytes = std::mem::size_of::<u16>(); + + // only allow maximum max_bytes to write + if data.len() > max_bytes { + warn!("ACPIPM: bad write size: {}", data.len()); + return; + } + + let mut val_arr = u16::to_ne_bytes(0 as u16); + for i in 0..std::mem::size_of::<u16>() { + if i < data.len() { + val_arr[i] = data[i]; + } + } + let val = u16::from_ne_bytes(val_arr); + + match offset as u16 { + PM1_STATUS => self.pm1_status &= !val, + PM1_ENABLE => self.pm1_enable = val, + PM1_CONTROL => { + if (val & BITMASK_PM1CNT_SLEEP_ENABLE) == BITMASK_PM1CNT_SLEEP_ENABLE { + if let Err(e) = self.suspend_evt.write(1) { + error!("ACPIPM: failed to trigger suspend event: {}", e); + } + } + self.pm1_control = val & !BITMASK_PM1CNT_SLEEP_ENABLE; + } + SLEEP_CONTROL => { + let sleep_control = val as u8; + if (sleep_control & BITMASK_SLEEPCNT_SLEEP_ENABLE) == BITMASK_SLEEPCNT_SLEEP_ENABLE + { + if let Err(e) = self.suspend_evt.write(1) { + error!("ACPIPM: failed to trigger suspend event: {}", e); + } + } + self.sleep_control = sleep_control as u8 & !BITMASK_SLEEPCNT_SLEEP_ENABLE; + } + SLEEP_STATUS => self.sleep_status &= !val as u8, + _ => { + warn!("ACPIPM: Bad write to offset {}", offset); + } + }; + } +} + +impl BusResumeDevice for ACPIPMResource { + fn resume_imminent(&mut self) { + let val = self.pm1_status; + self.pm1_status = val | BITMASK_PM1CNT_WAKE_STATUS; + + let val = self.sleep_status; + self.sleep_status = val | BITMASK_SLEEPCNT_WAKE_STATUS; + } +} diff --git a/devices/src/bus.rs b/devices/src/bus.rs index d4b46eb..3f93974 100644 --- a/devices/src/bus.rs +++ b/devices/src/bus.rs @@ -37,6 +37,12 @@ pub trait BusDevice: Send { fn on_sandboxed(&mut self) {} } +pub trait BusResumeDevice: Send { + /// notify the devices which are invoked + /// before the VM resumes form suspend. + fn resume_imminent(&mut self) {} +} + #[derive(Debug)] pub enum Error { /// The insertion failed because the new device overlapped with an old device. @@ -104,9 +110,13 @@ impl PartialOrd for BusRange { /// /// This doesn't have any restrictions on what kind of device or address space this applies to. The /// only restriction is that no two devices can overlap in this address space. +/// +/// the 'resume_notify_devices' contains the devices which requires to be notified before the system +/// resume back from S3 suspended state. #[derive(Clone)] pub struct Bus { devices: BTreeMap<BusRange, Arc<Mutex<dyn BusDevice>>>, + resume_notify_devices: Vec<Arc<Mutex<dyn BusResumeDevice>>>, } impl Bus { @@ -114,6 +124,7 @@ impl Bus { pub fn new() -> Bus { Bus { devices: BTreeMap::new(), + resume_notify_devices: Vec::new(), } } @@ -208,6 +219,19 @@ impl Bus { false } } + + /// Register `device` for notifications of VM resume from suspend. + pub fn notify_on_resume(&mut self, device: Arc<Mutex<dyn BusResumeDevice>>) { + self.resume_notify_devices.push(device); + } + + /// Call `notify_resume` to notify the device that suspend resume is imminent. + pub fn notify_resume(&mut self) { + let devices = self.resume_notify_devices.clone(); + for dev in devices { + dev.lock().resume_imminent(); + } + } } #[cfg(test)] diff --git a/devices/src/ioapic.rs b/devices/src/ioapic.rs index 6f8e358..09ccb89 100644 --- a/devices/src/ioapic.rs +++ b/devices/src/ioapic.rs @@ -8,7 +8,11 @@ use crate::split_irqchip_common::*; use crate::BusDevice; use bit_field::*; -use sys_util::warn; +use kvm::Vm; +use msg_socket::{MsgReceiver, MsgSender}; +use std::sync::Arc; +use sys_util::{error, warn, EventFd, Result}; +use vm_control::{VmIrqRequest, VmIrqRequestSocket, VmIrqResponse}; #[bitfield] #[derive(Clone, Copy, PartialEq)] @@ -37,11 +41,9 @@ pub enum DeliveryStatus { } const IOAPIC_VERSION_ID: u32 = 0x00170011; -#[allow(dead_code)] -const IOAPIC_BASE_ADDRESS: u32 = 0xfec00000; +pub const IOAPIC_BASE_ADDRESS: u64 = 0xfec00000; // The Intel manual does not specify this size, but KVM uses it. -#[allow(dead_code)] -const IOAPIC_MEM_LENGTH_BYTES: usize = 0x100; +pub const IOAPIC_MEM_LENGTH_BYTES: u64 = 0x100; // Constants for IOAPIC direct register offset. const IOAPIC_REG_ID: u8 = 0x00; @@ -49,10 +51,10 @@ const IOAPIC_REG_VERSION: u8 = 0x01; const IOAPIC_REG_ARBITRATION_ID: u8 = 0x02; // Register offsets -pub const IOREGSEL_OFF: u8 = 0x0; -pub const IOREGSEL_DUMMY_UPPER_32_BITS_OFF: u8 = 0x4; -pub const IOWIN_OFF: u8 = 0x10; -pub const IOWIN_SCALE: u8 = 0x2; +const IOREGSEL_OFF: u8 = 0x0; +const IOREGSEL_DUMMY_UPPER_32_BITS_OFF: u8 = 0x4; +const IOWIN_OFF: u8 = 0x10; +const IOWIN_SCALE: u8 = 0x2; /// Given an IRQ and whether or not the selector should refer to the high bits, return a selector /// suitable to use as an offset to read to/write from. @@ -88,6 +90,9 @@ pub struct Ioapic { redirect_table: [RedirectionTableEntry; kvm::NUM_IOAPIC_PINS], // IOREGSEL is technically 32 bits, but only bottom 8 are writable: all others are fixed to 0. ioregsel: u8, + relay: Arc<GsiRelay>, + irqfd: Vec<EventFd>, + socket: VmIrqRequestSocket, } impl BusDevice for Ioapic { @@ -148,17 +153,29 @@ impl BusDevice for Ioapic { } impl Ioapic { - pub fn new() -> Ioapic { + pub fn new(vm: &mut Vm, socket: VmIrqRequestSocket) -> Result<Ioapic> { let mut entry = RedirectionTableEntry::new(); entry.set_interrupt_mask(true); let entries = [entry; kvm::NUM_IOAPIC_PINS]; - Ioapic { + let mut irqfd = vec![]; + for i in 0..kvm::NUM_IOAPIC_PINS { + irqfd.push(EventFd::new()?); + vm.register_irqfd(&irqfd[i], i as u32)?; + } + Ok(Ioapic { id: 0, rtc_remote_irr: false, current_interrupt_level_bitmap: 0, redirect_table: entries, ioregsel: 0, - } + relay: Default::default(), + irqfd, + socket, + }) + } + + pub fn register_relay(&mut self, relay: Arc<GsiRelay>) { + self.relay = relay; } // The ioapic must be informed about EOIs in order to avoid sending multiple interrupts of the @@ -173,6 +190,12 @@ impl Ioapic { if self.redirect_table[i].get_vector() == vector && self.redirect_table[i].get_trigger_mode() == TriggerMode::Level { + if self.relay.irqfd_resample[i].is_some() { + self.service_irq(i, false); + } + if let Some(resample_evt) = &self.relay.irqfd_resample[i] { + resample_evt.write(1).unwrap(); + } self.redirect_table[i].set_remote_irr(false); } // There is an inherent race condition in hardware if the OS is finished processing an @@ -218,8 +241,7 @@ impl Ioapic { return false; } - // TODO(mutexlox): Pulse (assert and deassert) interrupt - let injected = true; + let injected = self.irqfd[irq].write(1).is_ok(); if entry.get_trigger_mode() == TriggerMode::Level && level && injected { entry.set_remote_irr(true); @@ -267,13 +289,42 @@ impl Ioapic { // is the fix for this. } - // TODO(mutexlox): route MSI. if self.redirect_table[index].get_trigger_mode() == TriggerMode::Level && self.current_interrupt_level_bitmap & (1 << index) != 0 && !self.redirect_table[index].get_interrupt_mask() { self.service_irq(index, true); } + + let mut address = MsiAddressMessage::new(); + let mut data = MsiDataMessage::new(); + let entry = &self.redirect_table[index]; + address.set_destination_mode(entry.get_dest_mode()); + address.set_destination_id(entry.get_dest_id()); + address.set_always_0xfee(0xfee); + data.set_vector(entry.get_vector()); + data.set_delivery_mode(entry.get_delivery_mode()); + data.set_trigger(entry.get_trigger_mode()); + + let request = VmIrqRequest::AddMsiRoute { + gsi: index as u32, + msi_address: address.get(0, 32), + msi_data: data.get(0, 32) as u32, + }; + if let Err(e) = self.socket.send(&request) { + error!("IOAPIC: failed to send AddMsiRoute request: {}", e); + return; + } + match self.socket.recv() { + Ok(response) => { + if let VmIrqResponse::Err(e) = response { + error!("IOAPIC: failed to add msi route: {}", e); + } + } + Err(e) => { + error!("IOAPIC: failed to receive AddMsiRoute response: {}", e); + } + } } } } @@ -307,6 +358,15 @@ mod tests { const DEFAULT_VECTOR: u8 = 0x3a; const DEFAULT_DESTINATION_ID: u8 = 0x5f; + fn new() -> Ioapic { + let kvm = kvm::Kvm::new().unwrap(); + let gm = sys_util::GuestMemory::new(&vec![(sys_util::GuestAddress(0), 0x1000)]).unwrap(); + let mut vm = Vm::new(&kvm, gm).unwrap(); + vm.enable_split_irqchip().unwrap(); + let (_, device_socket) = msg_socket::pair::<VmIrqResponse, VmIrqRequest>().unwrap(); + Ioapic::new(&mut vm, device_socket).unwrap() + } + fn set_up(trigger: TriggerMode) -> (Ioapic, usize) { let irq = kvm::NUM_IOAPIC_PINS - 1; let ioapic = set_up_with_irq(irq, trigger); @@ -314,7 +374,7 @@ mod tests { } fn set_up_with_irq(irq: usize, trigger: TriggerMode) -> Ioapic { - let mut ioapic = Ioapic::new(); + let mut ioapic = self::new(); set_up_redirection_table_entry(&mut ioapic, irq, trigger); ioapic } @@ -377,7 +437,7 @@ mod tests { #[test] fn write_read_ioregsel() { - let mut ioapic = Ioapic::new(); + let mut ioapic = self::new(); let data_write = [0x0f, 0xf0, 0x01, 0xff]; let mut data_read = [0; 4]; @@ -391,7 +451,7 @@ mod tests { // Verify that version register is actually read-only. #[test] fn write_read_ioaic_reg_version() { - let mut ioapic = Ioapic::new(); + let mut ioapic = self::new(); let before = read_reg(&mut ioapic, IOAPIC_REG_VERSION); let data_write = !before; @@ -402,7 +462,7 @@ mod tests { // Verify that only bits 27:24 of the IOAPICID are readable/writable. #[test] fn write_read_ioapic_reg_id() { - let mut ioapic = Ioapic::new(); + let mut ioapic = self::new(); write_reg(&mut ioapic, IOAPIC_REG_ID, 0x1f3e5d7c); assert_eq!(read_reg(&mut ioapic, IOAPIC_REG_ID), 0x0f000000); @@ -411,7 +471,7 @@ mod tests { // Write to read-only register IOAPICARB. #[test] fn write_read_ioapic_arbitration_id() { - let mut ioapic = Ioapic::new(); + let mut ioapic = self::new(); let data_write_id = 0x1f3e5d7c; let expected_result = 0x0f000000; @@ -436,7 +496,7 @@ mod tests { #[test] #[should_panic(expected = "index out of bounds: the len is 24 but the index is 24")] fn service_invalid_irq() { - let mut ioapic = Ioapic::new(); + let mut ioapic = self::new(); ioapic.service_irq(kvm::NUM_IOAPIC_PINS, false); } diff --git a/devices/src/lib.rs b/devices/src/lib.rs index 512d08b..1ecfc9c 100644 --- a/devices/src/lib.rs +++ b/devices/src/lib.rs @@ -15,6 +15,7 @@ pub mod pl030; mod proxy; #[macro_use] mod register_space; +pub mod acpi; mod serial; pub mod split_irqchip_common; pub mod usb; @@ -22,11 +23,12 @@ mod utils; pub mod vfio; pub mod virtio; +pub use self::acpi::ACPIPMResource; pub use self::bus::Error as BusError; -pub use self::bus::{Bus, BusDevice, BusRange}; +pub use self::bus::{Bus, BusDevice, BusRange, BusResumeDevice}; pub use self::cmos::Cmos; pub use self::i8042::I8042Device; -pub use self::ioapic::Ioapic; +pub use self::ioapic::{Ioapic, IOAPIC_BASE_ADDRESS, IOAPIC_MEM_LENGTH_BYTES}; pub use self::pci::{ Ac97Dev, PciConfigIo, PciConfigMmio, PciDevice, PciDeviceError, PciInterruptPin, PciRoot, VfioPciDevice, @@ -42,5 +44,5 @@ pub use self::serial::{ }; pub use self::usb::host_backend::host_backend_device_provider::HostBackendDeviceProvider; pub use self::usb::xhci::xhci_controller::XhciController; -pub use self::vfio::VfioDevice; +pub use self::vfio::{VfioContainer, VfioDevice}; pub use self::virtio::VirtioPciDevice; diff --git a/devices/src/pci/ac97.rs b/devices/src/pci/ac97.rs index eb19b5f..792df24 100644 --- a/devices/src/pci/ac97.rs +++ b/devices/src/pci/ac97.rs @@ -4,7 +4,7 @@ use std::os::unix::io::RawFd; -use audio_streams::StreamSource; +use audio_streams::shm_streams::ShmStreamSource; use resources::{Alloc, MmioType, SystemAllocator}; use sys_util::{error, EventFd, GuestMemory}; @@ -39,7 +39,7 @@ pub struct Ac97Dev { impl Ac97Dev { /// Creates an 'Ac97Dev' that uses the given `GuestMemory` and starts with all registers at /// default values. - pub fn new(mem: GuestMemory, audio_server: Box<dyn StreamSource>) -> Self { + pub fn new(mem: GuestMemory, audio_server: Box<dyn ShmStreamSource>) -> Self { let config_regs = PciConfiguration::new( 0x8086, PCI_DEVICE_ID_INTEL_82801AA_5, @@ -236,13 +236,13 @@ impl PciDevice for Ac97Dev { #[cfg(test)] mod tests { use super::*; - use audio_streams::DummyStreamSource; + use audio_streams::shm_streams::MockShmStreamSource; use sys_util::GuestAddress; #[test] fn create() { let mem = GuestMemory::new(&[(GuestAddress(0u64), 4 * 1024 * 1024)]).unwrap(); - let mut ac97_dev = Ac97Dev::new(mem, Box::new(DummyStreamSource::new())); + let mut ac97_dev = Ac97Dev::new(mem, Box::new(MockShmStreamSource::new())); let mut allocator = SystemAllocator::builder() .add_io_addresses(0x1000_0000, 0x1000_0000) .add_low_mmio_addresses(0x2000_0000, 0x1000_0000) diff --git a/devices/src/pci/ac97_bus_master.rs b/devices/src/pci/ac97_bus_master.rs index d3d2f85..cb28c5a 100644 --- a/devices/src/pci/ac97_bus_master.rs +++ b/devices/src/pci/ac97_bus_master.rs @@ -3,21 +3,21 @@ // found in the LICENSE file. use std; +use std::collections::VecDeque; +use std::convert::AsRef; use std::error::Error; use std::fmt::{self, Display}; -use std::io::Write; -use std::os::unix::io::RawFd; +use std::os::unix::io::{AsRawFd, RawFd}; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; use std::thread; -use std::time::Instant; +use std::time::{Duration, Instant}; use audio_streams::{ - capture::{CaptureBuffer, CaptureBufferStream}, - PlaybackBuffer, PlaybackBufferStream, SampleFormat, StreamControl, StreamSource, + shm_streams::{ShmStream, ShmStreamSource}, + DummyStreamControl, SampleFormat, StreamControl, StreamDirection, StreamEffect, }; -use data_model::{VolatileMemory, VolatileSlice}; -use sync::Mutex; +use sync::{Condvar, Mutex}; use sys_util::{ self, error, set_rt_prio_limit, set_rt_round_robin, warn, EventFd, GuestAddress, GuestMemory, }; @@ -26,6 +26,7 @@ use crate::pci::ac97_mixer::Ac97Mixer; use crate::pci::ac97_regs::*; const DEVICE_SAMPLE_RATE: usize = 48000; +const DEVICE_CHANNEL_COUNT: usize = 2; // Bus Master registers. Keeps the state of the bus master register values. Used to share the state // between the main and audio threads. @@ -76,8 +77,6 @@ impl Ac97BusMasterRegs { enum GuestMemoryError { // Failure getting the address of the audio buffer. ReadingGuestBufferAddress(sys_util::GuestMemoryError), - // Failure reading samples from guest memory. - ReadingGuestSamples(data_model::VolatileMemoryError), } impl std::error::Error for GuestMemoryError {} @@ -90,7 +89,6 @@ impl Display for GuestMemoryError { ReadingGuestBufferAddress(e) => { write!(f, "Failed to get the address of the audio buffer: {}.", e) } - ReadingGuestSamples(e) => write!(f, "Failed to read samples from guest memory: {}.", e), } } } @@ -106,12 +104,16 @@ type GuestMemoryResult<T> = std::result::Result<T, GuestMemoryError>; // Internal error type used for reporting errors from the audio thread. #[derive(Debug)] enum AudioError { + // Failed to create a new stream. + CreateStream(Box<dyn Error>), + // Guest did not provide a buffer when needed. + NoBufferAvailable, // Failure to read guest memory. ReadingGuestError(GuestMemoryError), - // Failure to get an buffer from the stream. - StreamError(Box<dyn Error>), - // Failure writing to the audio output. - WritingOutput(std::io::Error), + // Failure to respond to the ServerRequest. + RespondRequest(Box<dyn Error>), + // Failure to wait for a request from the stream. + WaitForAction(Box<dyn Error>), } impl std::error::Error for AudioError {} @@ -121,9 +123,11 @@ impl Display for AudioError { use self::AudioError::*; match self { + CreateStream(e) => write!(f, "Failed to create audio stream: {}.", e), + NoBufferAvailable => write!(f, "No buffer was available from the Guest"), ReadingGuestError(e) => write!(f, "Failed to read guest memory: {}.", e), - StreamError(e) => write!(f, "Failed to get a buffer from the stream: {}", e), - WritingOutput(e) => write!(f, "Failed to write audio output: {}.", e), + RespondRequest(e) => write!(f, "Failed to respond to the ServerRequest: {}", e), + WaitForAction(e) => write!(f, "Failed to wait for a message from the stream: {}", e), } } } @@ -134,6 +138,7 @@ type AudioResult<T> = std::result::Result<T, AudioError>; struct AudioThreadInfo { thread: Option<thread::JoinHandle<()>>, thread_run: Arc<AtomicBool>, + thread_semaphore: Arc<Condvar>, stream_control: Option<Box<dyn StreamControl>>, } @@ -142,6 +147,7 @@ impl AudioThreadInfo { Self { thread: None, thread_run: Arc::new(AtomicBool::new(false)), + thread_semaphore: Arc::new(Condvar::new()), stream_control: None, } } @@ -160,7 +166,7 @@ pub struct Ac97BusMaster { pi_info: AudioThreadInfo, // Audio server used to create playback or capture streams. - audio_server: Box<dyn StreamSource>, + audio_server: Box<dyn ShmStreamSource>, // Thread for hadlind IRQ resample events from the guest. irq_resample_thread: Option<thread::JoinHandle<()>>, @@ -169,7 +175,7 @@ pub struct Ac97BusMaster { impl Ac97BusMaster { /// Creates an Ac97BusMaster` object that plays audio from `mem` to streams provided by /// `audio_server`. - pub fn new(mem: GuestMemory, audio_server: Box<dyn StreamSource>) -> Self { + pub fn new(mem: GuestMemory, audio_server: Box<dyn ShmStreamSource>) -> Self { Ac97BusMaster { mem, regs: Arc::new(Mutex::new(Ac97BusMasterRegs::new())), @@ -186,7 +192,9 @@ impl Ac97BusMaster { /// Returns any file descriptors that need to be kept open when entering a jail. pub fn keep_fds(&self) -> Option<Vec<RawFd>> { - self.audio_server.keep_fds() + let mut fds = self.audio_server.keep_fds(); + fds.push(self.mem.as_raw_fd()); + Some(fds) } /// Provides the events needed to raise interrupts in the guest. @@ -386,7 +394,18 @@ impl Ac97BusMaster { && func_regs.sr & SR_DCH == SR_DCH && func_regs.civ != func_regs.lvi { + if func_regs.sr & SR_CELV != 0 { + // CELV means we'd already processed the buffer at CIV. + // Move CIV to the next buffer now that LVI has moved. + func_regs.move_to_next_buffer(); + } func_regs.sr &= !(SR_DCH | SR_CELV); + + match func { + Ac97Function::Input => self.pi_info.thread_semaphore.notify_one(), + Ac97Function::Output => self.po_info.thread_semaphore.notify_one(), + Ac97Function::Microphone => (), + } } } @@ -459,74 +478,79 @@ impl Ac97BusMaster { self.regs.lock().glob_cnt = new_glob_cnt; } - fn start_audio(&mut self, func: Ac97Function, mixer: &Ac97Mixer) -> Result<(), Box<dyn Error>> { + fn start_audio(&mut self, func: Ac97Function, mixer: &Ac97Mixer) -> AudioResult<()> { const AUDIO_THREAD_RTPRIO: u16 = 10; // Matches other cros audio clients. - let thread_info = match func { + let (direction, thread_info) = match func { Ac97Function::Microphone => return Ok(()), - Ac97Function::Input => &mut self.pi_info, - Ac97Function::Output => &mut self.po_info, + Ac97Function::Input => (StreamDirection::Capture, &mut self.pi_info), + Ac97Function::Output => (StreamDirection::Playback, &mut self.po_info), }; - let num_channels = 2; let buffer_samples = current_buffer_size(self.regs.lock().func_regs(func), &self.mem)?; - let buffer_frames = buffer_samples / num_channels; + let buffer_frames = buffer_samples / DEVICE_CHANNEL_COUNT; thread_info.thread_run.store(true, Ordering::Relaxed); let thread_run = thread_info.thread_run.clone(); + let thread_semaphore = thread_info.thread_semaphore.clone(); let thread_mem = self.mem.clone(); let thread_regs = self.regs.clone(); - match func { - Ac97Function::Input => { - let (stream_control, input_stream) = self.audio_server.new_capture_stream( - num_channels, - SampleFormat::S16LE, - DEVICE_SAMPLE_RATE, - buffer_frames, - )?; - self.pi_info.stream_control = Some(stream_control); - self.update_mixer_settings(mixer); - - self.pi_info.thread = Some(thread::spawn(move || { - if set_rt_prio_limit(u64::from(AUDIO_THREAD_RTPRIO)).is_err() - || set_rt_round_robin(i32::from(AUDIO_THREAD_RTPRIO)).is_err() - { - warn!("Failed to set audio thread to real time."); - } - if let Err(e) = - audio_in_thread(thread_regs, thread_mem, &thread_run, input_stream) - { - error!("Capture error: {}", e); - } - thread_run.store(false, Ordering::Relaxed); - })); - } - Ac97Function::Output => { - let (stream_control, output_stream) = self.audio_server.new_playback_stream( - num_channels, - SampleFormat::S16LE, - DEVICE_SAMPLE_RATE, - buffer_frames, - )?; - self.po_info.stream_control = Some(stream_control); - self.update_mixer_settings(mixer); - - self.po_info.thread = Some(thread::spawn(move || { - if set_rt_prio_limit(u64::from(AUDIO_THREAD_RTPRIO)).is_err() - || set_rt_round_robin(i32::from(AUDIO_THREAD_RTPRIO)).is_err() - { - warn!("Failed to set audio thread to real time."); - } - if let Err(e) = - audio_out_thread(thread_regs, thread_mem, &thread_run, output_stream) - { - error!("Playback error: {}", e); - } - thread_run.store(false, Ordering::Relaxed); - })); + let mut pending_buffers = VecDeque::with_capacity(2); + let starting_offsets = match direction { + StreamDirection::Capture => { + let mut offsets = [0, 0]; + let mut locked_regs = self.regs.lock(); + for i in 0..2 { + let buffer = next_guest_buffer(&mut locked_regs, &self.mem, func, 0)? + .ok_or(AudioError::NoBufferAvailable)?; + offsets[i] = buffer.offset as u64; + pending_buffers.push_back(Some(buffer)); + } + offsets } - Ac97Function::Microphone => (), + StreamDirection::Playback => [0, 0], }; + let stream = self + .audio_server + .new_stream( + direction, + DEVICE_CHANNEL_COUNT, + SampleFormat::S16LE, + DEVICE_SAMPLE_RATE, + buffer_frames, + StreamEffect::NoEffect, + self.mem.as_ref(), + starting_offsets, + ) + .map_err(AudioError::CreateStream)?; + + thread_info.stream_control = Some(Box::new(DummyStreamControl::new())); + thread_info.thread = Some(thread::spawn(move || { + if let Err(e) = set_rt_prio_limit(u64::from(AUDIO_THREAD_RTPRIO)) + .and_then(|_| set_rt_round_robin(i32::from(AUDIO_THREAD_RTPRIO))) + { + warn!("Failed to set audio thread to real time: {}", e); + } + + let message_interval = + Duration::from_secs_f64(buffer_frames as f64 / DEVICE_SAMPLE_RATE as f64); + + if let Err(e) = audio_thread( + func, + thread_regs, + thread_mem, + &thread_run, + thread_semaphore, + message_interval, + stream, + pending_buffers, + ) { + error!("{:?} error: {}", func, e); + } + thread_run.store(false, Ordering::Relaxed); + })); + self.update_mixer_settings(mixer); + Ok(()) } @@ -537,6 +561,7 @@ impl Ac97BusMaster { Ac97Function::Output => &mut self.po_info, }; thread_info.thread_run.store(false, Ordering::Relaxed); + thread_info.thread_semaphore.notify_one(); if let Some(thread) = thread_info.thread.take() { if let Err(e) = thread.join() { error!("Failed to join {:?} thread: {:?}.", func, e); @@ -565,62 +590,88 @@ impl Ac97BusMaster { } } -// Gets the next buffer from the guest. This will return `None` if the DMA controlled stopped bit is -// set, such as after an underrun where CIV hits LVI. -fn next_guest_buffer<'a>( - func_regs: &mut Ac97FunctionRegs, - mem: &'a GuestMemory, -) -> GuestMemoryResult<Option<VolatileSlice<'a>>> { - let sample_size = 2; +#[derive(Debug)] +struct GuestBuffer { + index: u8, + offset: usize, + frames: usize, +} - if func_regs.sr & SR_DCH != 0 { - return Ok(None); - } - let next_buffer = func_regs.civ; - let descriptor_addr = func_regs.bdbar + u32::from(next_buffer) * DESCRIPTOR_LENGTH as u32; +fn get_buffer_offset( + func_regs: &Ac97FunctionRegs, + mem: &GuestMemory, + index: u8, +) -> GuestMemoryResult<usize> { + let descriptor_addr = func_regs.bdbar + u32::from(index) * DESCRIPTOR_LENGTH as u32; let buffer_addr_reg: u32 = mem .read_obj_from_addr(GuestAddress(u64::from(descriptor_addr))) .map_err(GuestMemoryError::ReadingGuestBufferAddress)?; - let buffer_addr = buffer_addr_reg & !0x03u32; // The address must be aligned to four bytes. + let buffer_addr = GuestAddress((buffer_addr_reg & !0x03u32) as u64); // The address must be aligned to four bytes. + + mem.offset_from_base(buffer_addr) + .map_err(GuestMemoryError::ReadingGuestBufferAddress) +} + +fn get_buffer_samples( + func_regs: &Ac97FunctionRegs, + mem: &GuestMemory, + index: u8, +) -> GuestMemoryResult<usize> { + let descriptor_addr = func_regs.bdbar + u32::from(index) * DESCRIPTOR_LENGTH as u32; let control_reg: u32 = mem .read_obj_from_addr(GuestAddress(u64::from(descriptor_addr) + 4)) .map_err(GuestMemoryError::ReadingGuestBufferAddress)?; - let buffer_samples: usize = control_reg as usize & 0x0000_ffff; + let buffer_samples = control_reg as usize & 0x0000_ffff; + Ok(buffer_samples) +} - func_regs.picb = buffer_samples as u16; +// Gets the start address and length of the buffer at `civ + offset` from the +// guest. +// This will return `None` if `civ + offset` is past LVI; if the DMA controlled +// stopped bit is set, such as after an underrun where CIV hits LVI; or if +// `civ + offset == LVI and the CELV flag is set. +fn next_guest_buffer<'a>( + regs: &Ac97BusMasterRegs, + mem: &GuestMemory, + func: Ac97Function, + offset: usize, +) -> AudioResult<Option<GuestBuffer>> { + let func_regs = regs.func_regs(func); + let offset = (offset % 32) as u8; + let index = (func_regs.civ + offset) % 32; + + // Check that value is between `low` and `high` modulo some `n`. + fn check_between(low: u8, high: u8, value: u8) -> bool { + // If low <= high, value must be in the interval between them: + // 0 l h n + // ......+++++++...... + (low <= high && (low <= value && value <= high)) || + // If low > high, value must not be in the interval between them: + // 0 h l n + // +++++++++......++++ + (low > high && (low <= value || value <= high)) + }; - let samples_remaining = func_regs.picb as usize; - if samples_remaining == 0 { + // Check if + // * we're halted + // * `index` is not between CIV and LVI (mod 32) + // * `index is LVI and we've already processed LVI (SR_CELV is set) + // if any of these are true `index` isn't valid. + if func_regs.sr & SR_DCH != 0 + || !check_between(func_regs.civ, func_regs.lvi, index) + || func_regs.sr & SR_CELV != 0 + { return Ok(None); } - let read_pos = u64::from(buffer_addr); - Ok(Some( - mem.get_slice(read_pos, samples_remaining as u64 * sample_size) - .map_err(GuestMemoryError::ReadingGuestSamples)?, - )) -} -// Reads the next buffer from guest memory and writes it to `out_buffer`. -fn play_buffer( - regs: &mut Ac97BusMasterRegs, - mem: &GuestMemory, - out_buffer: &mut PlaybackBuffer, -) -> AudioResult<()> { - // If the current buffer had any samples in it, mark it as done. - if regs.func_regs_mut(Ac97Function::Output).picb > 0 { - buffer_completed(regs, mem, Ac97Function::Output)? - } - let func_regs = regs.func_regs_mut(Ac97Function::Output); - let buffer_len = func_regs.picb * 2; - if let Some(buffer) = next_guest_buffer(func_regs, mem)? { - out_buffer.copy_cb(buffer.size() as usize, |out| buffer.copy_to(out)); - } else { - let zeros = vec![0u8; buffer_len as usize]; - out_buffer - .write(&zeros) - .map_err(AudioError::WritingOutput)?; - } - Ok(()) + let offset = get_buffer_offset(func_regs, mem, index)?; + let frames = get_buffer_samples(func_regs, mem, index)? / DEVICE_CHANNEL_COUNT; + + Ok(Some(GuestBuffer { + index, + offset, + frames, + })) } // Marks the current buffer completed and moves to the next buffer for the given @@ -629,7 +680,7 @@ fn buffer_completed( regs: &mut Ac97BusMasterRegs, mem: &GuestMemory, func: Ac97Function, -) -> GuestMemoryResult<()> { +) -> AudioResult<()> { // check if the completed descriptor wanted an interrupt on completion. let civ = regs.func_regs(func).civ; let descriptor_addr = regs.func_regs(func).bdbar + u32::from(civ) * DESCRIPTOR_LENGTH as u32; @@ -661,51 +712,115 @@ fn buffer_completed( Ok(()) } -// Runs, playing back audio from the guest to `output_stream` until stopped or an error occurs. -fn audio_out_thread( +// Runs and updates the offset within the stream shm where samples can be +// found/placed for shm playback/capture streams, respectively +fn audio_thread( + func: Ac97Function, regs: Arc<Mutex<Ac97BusMasterRegs>>, mem: GuestMemory, thread_run: &AtomicBool, - mut output_stream: Box<dyn PlaybackBufferStream>, + lvi_semaphore: Arc<Condvar>, + message_interval: Duration, + mut stream: Box<dyn ShmStream>, + // A queue of the pending buffers at the server. + mut pending_buffers: VecDeque<Option<GuestBuffer>>, ) -> AudioResult<()> { - while thread_run.load(Ordering::Relaxed) { - output_stream - .next_playback_buffer() - .map_err(AudioError::StreamError) - .and_then(|mut pb_buf| play_buffer(&mut regs.lock(), &mem, &mut pb_buf))?; + if func == Ac97Function::Microphone { + return Ok(()); } - Ok(()) -} -// Reads samples from `in_buffer` and writes it to the next buffer from guest memory. -fn capture_buffer( - regs: &mut Ac97BusMasterRegs, - mem: &GuestMemory, - in_buffer: &mut CaptureBuffer, -) -> AudioResult<()> { - // If the current buffer had any samples in it, mark it as done. - if regs.func_regs_mut(Ac97Function::Input).picb > 0 { - buffer_completed(regs, mem, Ac97Function::Input)? - } - let func_regs = regs.func_regs_mut(Ac97Function::Input); - if let Some(buffer) = next_guest_buffer(func_regs, mem)? { - in_buffer.copy_cb(buffer.size() as usize, |inb| buffer.copy_from(inb)) + // Set up picb. + { + let mut locked_regs = regs.lock(); + locked_regs.func_regs_mut(func).picb = + current_buffer_size(locked_regs.func_regs(func), &mem)? as u16; } - Ok(()) -} -// Runs, capturing audio from `input_stream` to the guest until stopped or an error occurs. -fn audio_in_thread( - regs: Arc<Mutex<Ac97BusMasterRegs>>, - mem: GuestMemory, - thread_run: &AtomicBool, - mut input_stream: Box<dyn CaptureBufferStream>, -) -> AudioResult<()> { - while thread_run.load(Ordering::Relaxed) { - input_stream - .next_capture_buffer() - .map_err(AudioError::StreamError) - .and_then(|mut cp_buf| capture_buffer(&mut regs.lock(), &mem, &mut cp_buf))?; + 'audio_loop: while thread_run.load(Ordering::Relaxed) { + { + let mut locked_regs = regs.lock(); + while locked_regs.func_regs(func).sr & SR_DCH != 0 { + locked_regs = lvi_semaphore.wait(locked_regs); + if !thread_run.load(Ordering::Relaxed) { + break 'audio_loop; + } + } + } + + let timeout = Duration::from_secs(1); + let action = stream + .wait_for_next_action_with_timeout(timeout) + .map_err(AudioError::WaitForAction)?; + + let request = match action { + None => { + warn!("No audio message received within timeout of {:?}", timeout); + continue; + } + Some(request) => request, + }; + let start = Instant::now(); + + let next_buffer = { + let mut locked_regs = regs.lock(); + if pending_buffers.len() == 2 { + // When we have two pending buffers and receive a request for + // another, we know that oldest buffer has been completed. + // However, if that old buffer was an empty buffer we sent + // because the guest driver had no available buffers, we don't + // want to mark a buffer complete. + if let Some(Some(_)) = pending_buffers.pop_front() { + buffer_completed(&mut locked_regs, &mem, func)?; + } + } + + // We count the number of pending, real buffers at the server, and + // then use that as our offset from CIV. + let offset = pending_buffers.iter().filter(|e| e.is_some()).count(); + + // Get a buffer to respond to our request. If there's no buffer + // available, we'll wait one buffer interval and check again. + loop { + if let Some(buffer) = next_guest_buffer(&mut locked_regs, &mem, func, offset)? { + break Some(buffer); + } + let elapsed = start.elapsed(); + if elapsed > message_interval { + break None; + } + locked_regs = lvi_semaphore + .wait_timeout(locked_regs, message_interval - elapsed) + .0; + } + }; + + match next_buffer { + Some(ref buffer) => { + let requested_frames = request.requested_frames(); + if requested_frames != buffer.frames { + // We should be able to handle when the number of frames in + // the buffer doesn't match the number of frames requested, + // but we don't yet. + warn!( + "Stream requested {} frames but buffer had {} frames: {:?}", + requested_frames, buffer.frames, buffer + ); + } + + request + .set_buffer_offset_and_frames( + buffer.offset, + std::cmp::min(requested_frames, buffer.frames), + ) + .map_err(AudioError::RespondRequest)?; + } + None => { + request + .ignore_request() + .map_err(AudioError::RespondRequest)?; + } + } + pending_buffers.push_back(next_buffer); } Ok(()) } @@ -753,27 +868,20 @@ fn current_buffer_size( mem: &GuestMemory, ) -> GuestMemoryResult<usize> { let civ = func_regs.civ; - let descriptor_addr = func_regs.bdbar + u32::from(civ) * DESCRIPTOR_LENGTH as u32; - let control_reg: u32 = mem - .read_obj_from_addr(GuestAddress(u64::from(descriptor_addr) + 4)) - .map_err(GuestMemoryError::ReadingGuestBufferAddress)?; - let buffer_len: usize = control_reg as usize & 0x0000_ffff; - Ok(buffer_len) + get_buffer_samples(func_regs, mem, civ) } #[cfg(test)] mod test { use super::*; - use std::time; - - use audio_streams::DummyStreamSource; + use audio_streams::shm_streams::MockShmStreamSource; #[test] fn bm_bdbar() { let mut bm = Ac97BusMaster::new( GuestMemory::new(&[]).expect("Creating guest memory failed."), - Box::new(DummyStreamSource::new()), + Box::new(MockShmStreamSource::new()), ); let bdbars = [0x00u64, 0x10, 0x20]; @@ -797,7 +905,7 @@ mod test { fn bm_status_reg() { let mut bm = Ac97BusMaster::new( GuestMemory::new(&[]).expect("Creating guest memory failed."), - Box::new(DummyStreamSource::new()), + Box::new(MockShmStreamSource::new()), ); let sr_addrs = [0x06u64, 0x16, 0x26]; @@ -813,7 +921,7 @@ mod test { fn bm_global_control() { let mut bm = Ac97BusMaster::new( GuestMemory::new(&[]).expect("Creating guest memory failed."), - Box::new(DummyStreamSource::new()), + Box::new(MockShmStreamSource::new()), ); assert_eq!(bm.readl(GLOB_CNT_2C), 0x0000_0000); @@ -838,7 +946,9 @@ mod test { } #[test] + #[ignore] // flaky - see crbug.com/1058881 fn start_playback() { + const TIMEOUT: Duration = Duration::from_millis(500); const LVI_MASK: u8 = 0x1f; // Five bits for 32 total entries. const IOC_MASK: u32 = 0x8000_0000; // Interrupt on completion. let num_buffers = LVI_MASK as usize + 1; @@ -848,7 +958,8 @@ mod test { const GUEST_ADDR_BASE: u32 = 0x100_0000; let mem = GuestMemory::new(&[(GuestAddress(GUEST_ADDR_BASE as u64), 1024 * 1024 * 1024)]) .expect("Creating guest memory failed."); - let mut bm = Ac97BusMaster::new(mem.clone(), Box::new(DummyStreamSource::new())); + let stream_source = MockShmStreamSource::new(); + let mut bm = Ac97BusMaster::new(mem.clone(), Box::new(stream_source.clone())); let mixer = Ac97Mixer::new(); // Release cold reset. @@ -871,33 +982,26 @@ mod test { } bm.writeb(PO_LVI_15, LVI_MASK, &mixer); + assert_eq!(bm.readb(PO_CIV_14), 0); // Start. bm.writeb(PO_CR_1B, CR_IOCE | CR_RPBM, &mixer); + assert_eq!(bm.readw(PO_PICB_18), 0); + + let mut stream = stream_source.get_last_stream(); + // Trigger callback and see that CIV has not changed, since only 1 + // buffer has been sent. + assert!(stream.trigger_callback_with_timeout(TIMEOUT)); - std::thread::sleep(time::Duration::from_millis(50)); - let picb = bm.readw(PO_PICB_18); let mut civ = bm.readb(PO_CIV_14); assert_eq!(civ, 0); - let pos = (FRAGMENT_SIZE - (picb as usize * 2)) / 4; - // Check that frames are consumed at least at a reasonable rate. - // This wont be exact as during unit tests the thread scheduling is highly variable, so the - // test only checks that some samples are consumed. - assert!(pos > 1000); - - assert!(bm.readw(PO_SR_16) & SR_DCH == 0); // DMA is running. - - // civ should move eventually. - for _i in 0..30 { - if civ != 0 { - break; - } - std::thread::sleep(time::Duration::from_millis(20)); - civ = bm.readb(PO_CIV_14); - } - - assert_ne!(0, civ); + // After two more callbacks, CIV should now be 1 since we know that the + // first buffer must have been played. + assert!(stream.trigger_callback_with_timeout(TIMEOUT)); + assert!(stream.trigger_callback_with_timeout(TIMEOUT)); + civ = bm.readb(PO_CIV_14); + assert_eq!(civ, 1); // Buffer complete should be set as the IOC bit was set in the descriptor. assert!(bm.readw(PO_SR_16) & SR_BCIS != 0); @@ -905,17 +1009,30 @@ mod test { bm.writew(PO_SR_16, SR_BCIS); assert!(bm.readw(PO_SR_16) & SR_BCIS == 0); - // Set last valid to the next and wait until it is hit. - bm.writeb(PO_LVI_15, civ + 1, &mixer); - std::thread::sleep(time::Duration::from_millis(500)); + std::thread::sleep(Duration::from_millis(50)); + let picb = bm.readw(PO_PICB_18); + let pos = (FRAGMENT_SIZE - (picb as usize * 2)) / 4; + + // Check that frames are consumed at least at a reasonable rate. + // This can't be exact as during unit tests the thread scheduling is highly variable, so the + // test only checks that some samples are consumed. + assert!(pos > 0); + assert!(bm.readw(PO_SR_16) & SR_DCH == 0); // DMA is running. + + // Set last valid to next buffer to be sent and trigger callback so we hit it. + bm.writeb(PO_LVI_15, civ + 2, &mixer); + assert!(stream.trigger_callback_with_timeout(TIMEOUT)); + assert!(stream.trigger_callback_with_timeout(TIMEOUT)); + assert!(stream.trigger_callback_with_timeout(TIMEOUT)); assert!(bm.readw(PO_SR_16) & SR_LVBCI != 0); // Hit last buffer assert!(bm.readw(PO_SR_16) & SR_DCH == SR_DCH); // DMA stopped because of lack of buffers. - assert_eq!(bm.readw(PO_SR_16) & SR_CELV, SR_CELV); + assert!(bm.readw(PO_SR_16) & SR_CELV == SR_CELV); // Processed the last buffer assert_eq!(bm.readb(PO_LVI_15), bm.readb(PO_CIV_14)); assert!( bm.readl(GLOB_STA_30) & GS_POINT != 0, "POINT bit should be set." ); + // Clear the LVB bit bm.writeb(PO_SR_16, SR_LVBCI as u8, &mixer); assert!(bm.readw(PO_SR_16) & SR_LVBCI == 0); @@ -924,9 +1041,11 @@ mod test { assert!(bm.readw(PO_SR_16) & SR_DCH == 0); // DMA restarts. assert_eq!(bm.readw(PO_SR_16) & SR_CELV, 0); - let (restart_civ, restart_picb) = (bm.readb(PO_CIV_14), bm.readw(PO_PICB_18)); - std::thread::sleep(time::Duration::from_millis(20)); - assert!(bm.readw(PO_PICB_18) != restart_picb || bm.readb(PO_CIV_14) != restart_civ); + let restart_civ = bm.readb(PO_CIV_14); + assert!(stream.trigger_callback_with_timeout(TIMEOUT)); + assert!(stream.trigger_callback_with_timeout(TIMEOUT)); + assert!(stream.trigger_callback_with_timeout(TIMEOUT)); + assert!(bm.readb(PO_CIV_14) != restart_civ); // Stop. bm.writeb(PO_CR_1B, 0, &mixer); @@ -940,6 +1059,7 @@ mod test { #[test] fn start_capture() { + const TIMEOUT: Duration = Duration::from_millis(500); const LVI_MASK: u8 = 0x1f; // Five bits for 32 total entries. const IOC_MASK: u32 = 0x8000_0000; // Interrupt on completion. let num_buffers = LVI_MASK as usize + 1; @@ -949,7 +1069,8 @@ mod test { const GUEST_ADDR_BASE: u32 = 0x100_0000; let mem = GuestMemory::new(&[(GuestAddress(GUEST_ADDR_BASE as u64), 1024 * 1024 * 1024)]) .expect("Creating guest memory failed."); - let mut bm = Ac97BusMaster::new(mem.clone(), Box::new(DummyStreamSource::new())); + let stream_source = MockShmStreamSource::new(); + let mut bm = Ac97BusMaster::new(mem.clone(), Box::new(stream_source.clone())); let mixer = Ac97Mixer::new(); // Release cold reset. @@ -972,25 +1093,32 @@ mod test { bm.writeb(PI_CR_0B, CR_IOCE | CR_RPBM, &mixer); assert_eq!(bm.readw(PI_PICB_08), 0); - std::thread::sleep(time::Duration::from_millis(50)); + let mut stream = stream_source.get_last_stream(); + assert!(stream.trigger_callback_with_timeout(TIMEOUT)); + + // CIV is 1 here since we preemptively sent two buffer indices to the + // server before creating the stream. When we triggered the callback + // above, that means the first of those buffers was filled, so CIV + // increments to 1. + let civ = bm.readb(PI_CIV_04); + assert_eq!(civ, 1); + std::thread::sleep(Duration::from_millis(20)); let picb = bm.readw(PI_PICB_08); - assert!(picb > 1000); + assert!(picb > 0); assert!(bm.readw(PI_SR_06) & SR_DCH == 0); // DMA is running. - // civ should move eventually. - for _i in 0..10 { - let civ = bm.readb(PI_CIV_04); - if civ != 0 { - break; - } - std::thread::sleep(time::Duration::from_millis(20)); - } - assert_ne!(bm.readb(PI_CIV_04), 0); + // Trigger 2 callbacks so that we'll move to buffer 3 since at that + // point we can be certain that buffers 1 and 2 have been captured to. + assert!(stream.trigger_callback_with_timeout(TIMEOUT)); + assert!(stream.trigger_callback_with_timeout(TIMEOUT)); + assert_eq!(bm.readb(PI_CIV_04), 3); let civ = bm.readb(PI_CIV_04); - // Sets LVI to CIV + 1 to trigger last buffer hit - bm.writeb(PI_LVI_05, civ + 1, &mixer); - std::thread::sleep(time::Duration::from_millis(5000)); + // Sets LVI to CIV + 2 to trigger last buffer hit + bm.writeb(PI_LVI_05, civ + 2, &mixer); + assert!(stream.trigger_callback_with_timeout(TIMEOUT)); + assert!(stream.trigger_callback_with_timeout(TIMEOUT)); + assert!(stream.trigger_callback_with_timeout(TIMEOUT)); assert_ne!(bm.readw(PI_SR_06) & SR_LVBCI, 0); // Hit last buffer assert_eq!(bm.readw(PI_SR_06) & SR_DCH, SR_DCH); // DMA stopped because of lack of buffers. assert_eq!(bm.readw(PI_SR_06) & SR_CELV, SR_CELV); @@ -1009,7 +1137,9 @@ mod test { assert_eq!(bm.readw(PI_SR_06) & SR_CELV, 0); let restart_civ = bm.readb(PI_CIV_04); - std::thread::sleep(time::Duration::from_millis(200)); + assert!(stream.trigger_callback_with_timeout(TIMEOUT)); + assert!(stream.trigger_callback_with_timeout(TIMEOUT)); + assert!(stream.trigger_callback_with_timeout(TIMEOUT)); assert_ne!(bm.readb(PI_CIV_04), restart_civ); // Stop. diff --git a/devices/src/pci/msix.rs b/devices/src/pci/msix.rs index 282771f..6c79b4a 100644 --- a/devices/src/pci/msix.rs +++ b/devices/src/pci/msix.rs @@ -3,18 +3,20 @@ // found in the LICENSE file. use crate::pci::{PciCapability, PciCapabilityID}; -use msg_socket::{MsgReceiver, MsgSender}; +use msg_socket::{MsgError, MsgReceiver, MsgSender}; use std::convert::TryInto; +use std::fmt::{self, Display}; use std::os::unix::io::{AsRawFd, RawFd}; -use sys_util::{error, EventFd}; +use std::sync::Arc; +use sys_util::{error, Error as SysError, EventFd}; use vm_control::{MaybeOwnedFd, VmIrqRequest, VmIrqRequestSocket, VmIrqResponse}; use data_model::DataInit; const MAX_MSIX_VECTORS_PER_DEVICE: u16 = 2048; -const MSIX_TABLE_ENTRIES_MODULO: u64 = 16; -const MSIX_PBA_ENTRIES_MODULO: u64 = 8; -const BITS_PER_PBA_ENTRY: usize = 64; +pub const MSIX_TABLE_ENTRIES_MODULO: u64 = 16; +pub const MSIX_PBA_ENTRIES_MODULO: u64 = 8; +pub const BITS_PER_PBA_ENTRY: usize = 64; const FUNCTION_MASK_BIT: u16 = 0x4000; const MSIX_ENABLE_BIT: u16 = 0x8000; @@ -55,12 +57,40 @@ pub struct MsixConfig { irq_vec: Vec<IrqfdGsi>, masked: bool, enabled: bool, - msi_device_socket: VmIrqRequestSocket, + msi_device_socket: Arc<VmIrqRequestSocket>, msix_num: u16, } +enum MsixError { + AddMsiRoute(SysError), + AddMsiRouteRecv(MsgError), + AddMsiRouteSend(MsgError), + AllocateOneMsi(SysError), + AllocateOneMsiRecv(MsgError), + AllocateOneMsiSend(MsgError), +} + +impl Display for MsixError { + #[remain::check] + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + use self::MsixError::*; + + #[sorted] + match self { + AddMsiRoute(e) => write!(f, "AddMsiRoute failed: {}", e), + AddMsiRouteRecv(e) => write!(f, "failed to receive AddMsiRoute response: {}", e), + AddMsiRouteSend(e) => write!(f, "failed to send AddMsiRoute request: {}", e), + AllocateOneMsi(e) => write!(f, "AllocateOneMsi failed: {}", e), + AllocateOneMsiRecv(e) => write!(f, "failed to receive AllocateOneMsi response: {}", e), + AllocateOneMsiSend(e) => write!(f, "failed to send AllocateOneMsi request: {}", e), + } + } +} + +type MsixResult<T> = std::result::Result<T, MsixError>; + impl MsixConfig { - pub fn new(msix_vectors: u16, vm_socket: VmIrqRequestSocket) -> Self { + pub fn new(msix_vectors: u16, vm_socket: Arc<VmIrqRequestSocket>) -> Self { assert!(msix_vectors <= MAX_MSIX_VECTORS_PER_DEVICE); let mut table_entries: Vec<MsixTableEntry> = Vec::new(); @@ -127,7 +157,10 @@ impl MsixConfig { self.enabled = (reg & MSIX_ENABLE_BIT) == MSIX_ENABLE_BIT; if !old_enabled && self.enabled { - self.msix_enable(); + if let Err(e) = self.msix_enable() { + error!("failed to enable MSI-X: {}", e); + self.enabled = false; + } } // If the Function Mask bit was set, and has just been cleared, it's @@ -149,7 +182,7 @@ impl MsixConfig { } } - fn add_msi_route(&self, index: u16, gsi: u32) { + fn add_msi_route(&self, index: u16, gsi: u32) -> MsixResult<()> { let mut data: [u8; 8] = [0, 0, 0, 0, 0, 0, 0, 0]; self.read_msix_table((index * 16).into(), data.as_mut()); let msi_address: u64 = u64::from_le_bytes(data); @@ -158,44 +191,53 @@ impl MsixConfig { let msi_data: u32 = u32::from_le_bytes(data); if msi_address == 0 { - return; + return Ok(()); } - if let Err(e) = self.msi_device_socket.send(&VmIrqRequest::AddMsiRoute { - gsi, - msi_address, - msi_data, - }) { - error!("failed to send AddMsiRoute request: {:?}", e); - return; - } - if self.msi_device_socket.recv().is_err() { - error!("Faied to receive AddMsiRoute Response"); + self.msi_device_socket + .send(&VmIrqRequest::AddMsiRoute { + gsi, + msi_address, + msi_data, + }) + .map_err(MsixError::AddMsiRouteSend)?; + if let VmIrqResponse::Err(e) = self + .msi_device_socket + .recv() + .map_err(MsixError::AddMsiRouteRecv)? + { + return Err(MsixError::AddMsiRoute(e)); } + Ok(()) } - fn msix_enable(&mut self) { + fn msix_enable(&mut self) -> MsixResult<()> { self.irq_vec.clear(); for i in 0..self.msix_num { let irqfd = EventFd::new().unwrap(); - if let Err(e) = self.msi_device_socket.send(&VmIrqRequest::AllocateOneMsi { - irqfd: MaybeOwnedFd::Borrowed(irqfd.as_raw_fd()), - }) { - error!("failed to send AllocateOneMsi request: {:?}", e); - continue; - } + self.msi_device_socket + .send(&VmIrqRequest::AllocateOneMsi { + irqfd: MaybeOwnedFd::Borrowed(irqfd.as_raw_fd()), + }) + .map_err(MsixError::AllocateOneMsiSend)?; let irq_num: u32; - match self.msi_device_socket.recv() { - Ok(VmIrqResponse::AllocateOneMsi { gsi }) => irq_num = gsi, - _ => continue, + match self + .msi_device_socket + .recv() + .map_err(MsixError::AllocateOneMsiRecv)? + { + VmIrqResponse::AllocateOneMsi { gsi } => irq_num = gsi, + VmIrqResponse::Err(e) => return Err(MsixError::AllocateOneMsi(e)), + _ => unreachable!(), } self.irq_vec.push(IrqfdGsi { irqfd, gsi: irq_num, }); - self.add_msi_route(i, irq_num); + self.add_msi_route(i, irq_num)?; } + Ok(()) } /// Read MSI-X table @@ -304,7 +346,9 @@ impl MsixConfig { || old_entry.msg_data != new_entry.msg_data) { let irq_num = self.irq_vec[index].gsi; - self.add_msi_route(index as u16, irq_num); + if let Err(e) = self.add_msi_route(index as u16, irq_num) { + error!("add_msi_route failed: {}", e); + } } // After the MSI-X table entry has been updated, it is necessary to @@ -432,6 +476,17 @@ impl MsixConfig { pub fn get_msi_socket(&self) -> RawFd { self.msi_device_socket.as_ref().as_raw_fd() } + + /// Return irqfd of MSI-X Table entry + /// + /// # Arguments + /// * 'vector' - the index to the MSI-X table entry + pub fn get_irqfd(&self, vector: usize) -> Option<&EventFd> { + match self.irq_vec.get(vector) { + Some(irq) => Some(&irq.irqfd), + None => None, + } + } } // It is safe to implement DataInit; all members are simple numbers and any value is valid. diff --git a/devices/src/pci/vfio_pci.rs b/devices/src/pci/vfio_pci.rs index 216f5eb..4d5ad9e 100644 --- a/devices/src/pci/vfio_pci.rs +++ b/devices/src/pci/vfio_pci.rs @@ -17,6 +17,10 @@ use vm_control::{ VmMemoryRequest, VmMemoryResponse, }; +use crate::pci::msix::{ + MsixConfig, BITS_PER_PBA_ENTRY, MSIX_PBA_ENTRIES_MODULO, MSIX_TABLE_ENTRIES_MODULO, +}; + use crate::pci::pci_device::{Error as PciDeviceError, PciDevice}; use crate::pci::{PciClassCode, PciInterruptPin}; @@ -94,6 +98,7 @@ impl VfioPciConfig { const PCI_CAPABILITY_LIST: u32 = 0x34; const PCI_CAP_ID_MSI: u8 = 0x05; +const PCI_CAP_ID_MSIX: u8 = 0x11; // MSI registers const PCI_MSI_NEXT_POINTER: u32 = 0x1; // Next cap pointer @@ -122,43 +127,37 @@ struct VfioMsiCap { ctl: u16, address: u64, data: u16, - vm_socket_irq: VmIrqRequestSocket, + vm_socket_irq: Arc<VmIrqRequestSocket>, irqfd: Option<EventFd>, gsi: Option<u32>, } impl VfioMsiCap { - fn new(config: &VfioPciConfig, vm_socket_irq: VmIrqRequestSocket) -> Option<Self> { + fn new( + config: &VfioPciConfig, + msi_cap_start: u32, + vm_socket_irq: Arc<VmIrqRequestSocket>, + ) -> Self { // msi minimum size is 0xa let mut msi_len: u32 = MSI_LENGTH_32BIT; - let mut cap_next: u32 = config.read_config_byte(PCI_CAPABILITY_LIST).into(); - while cap_next != 0 { - let cap_id = config.read_config_byte(cap_next); - // find msi cap - if cap_id == PCI_CAP_ID_MSI { - let msi_ctl = config.read_config_word(cap_next + PCI_MSI_FLAGS); - if msi_ctl & PCI_MSI_FLAGS_64BIT != 0 { - msi_len = MSI_LENGTH_64BIT_WITHOUT_MASK; - } - if msi_ctl & PCI_MSI_FLAGS_MASKBIT != 0 { - msi_len = MSI_LENGTH_64BIT_WITH_MASK; - } - return Some(VfioMsiCap { - offset: cap_next, - size: msi_len, - ctl: 0, - address: 0, - data: 0, - vm_socket_irq, - irqfd: None, - gsi: None, - }); - } - let offset = cap_next + PCI_MSI_NEXT_POINTER; - cap_next = config.read_config_byte(offset).into(); + let msi_ctl = config.read_config_word(msi_cap_start + PCI_MSI_FLAGS); + if msi_ctl & PCI_MSI_FLAGS_64BIT != 0 { + msi_len = MSI_LENGTH_64BIT_WITHOUT_MASK; + } + if msi_ctl & PCI_MSI_FLAGS_MASKBIT != 0 { + msi_len = MSI_LENGTH_64BIT_WITH_MASK; } - None + VfioMsiCap { + offset: msi_cap_start, + size: msi_len, + ctl: 0, + address: 0, + data: 0, + vm_socket_irq, + irqfd: None, + gsi: None, + } } fn is_msi_reg(&self, index: u64, len: usize) -> bool { @@ -290,9 +289,153 @@ impl VfioMsiCap { fn get_msi_irqfd(&self) -> Option<&EventFd> { self.irqfd.as_ref() } +} + +// MSI-X registers in MSI-X capability +const PCI_MSIX_FLAGS: u32 = 0x02; // Message Control +const PCI_MSIX_FLAGS_QSIZE: u16 = 0x07FF; // Table size +const PCI_MSIX_TABLE: u32 = 0x04; // Table offset +const PCI_MSIX_TABLE_BIR: u32 = 0x07; // BAR index +const PCI_MSIX_TABLE_OFFSET: u32 = 0xFFFFFFF8; // Offset into specified BAR +const PCI_MSIX_PBA: u32 = 0x08; // Pending bit Array offset +const PCI_MSIX_PBA_BIR: u32 = 0x07; // BAR index +const PCI_MSIX_PBA_OFFSET: u32 = 0xFFFFFFF8; // Offset into specified BAR + +struct VfioMsixCap { + config: MsixConfig, + offset: u32, + table_size: u16, + table_pci_bar: u32, + table_offset: u64, + pba_pci_bar: u32, + pba_offset: u64, +} + +impl VfioMsixCap { + fn new( + config: &VfioPciConfig, + msix_cap_start: u32, + vm_socket_irq: Arc<VmIrqRequestSocket>, + ) -> Self { + let msix_ctl = config.read_config_word(msix_cap_start + PCI_MSIX_FLAGS); + let table_size = (msix_ctl & PCI_MSIX_FLAGS_QSIZE) + 1; + let table = config.read_config_dword(msix_cap_start + PCI_MSIX_TABLE); + let table_pci_bar = table & PCI_MSIX_TABLE_BIR; + let table_offset = (table & PCI_MSIX_TABLE_OFFSET) as u64; + let pba = config.read_config_dword(msix_cap_start + PCI_MSIX_PBA); + let pba_pci_bar = pba & PCI_MSIX_PBA_BIR; + let pba_offset = (pba & PCI_MSIX_PBA_OFFSET) as u64; + + VfioMsixCap { + config: MsixConfig::new(table_size, vm_socket_irq), + offset: msix_cap_start, + table_size, + table_pci_bar, + table_offset, + pba_pci_bar, + pba_offset, + } + } + + // only msix control register is writable and need special handle in pci r/w + fn is_msix_control_reg(&self, offset: u32, size: u32) -> bool { + let control_start = self.offset + PCI_MSIX_FLAGS; + let control_end = control_start + 2; + + if offset < control_end && offset + size > control_start { + true + } else { + false + } + } + + fn read_msix_control(&self, data: &mut u32) { + *data = self.config.read_msix_capability(*data); + } - fn get_vm_socket(&self) -> RawFd { - self.vm_socket_irq.as_ref().as_raw_fd() + fn write_msix_control(&mut self, data: &[u8]) -> Option<VfioMsiChange> { + let old_enabled = self.config.enabled(); + + self.config + .write_msix_capability(PCI_MSIX_FLAGS.into(), data); + + let new_enabled = self.config.enabled(); + if !old_enabled && new_enabled { + Some(VfioMsiChange::Enable) + } else if old_enabled && !new_enabled { + Some(VfioMsiChange::Disable) + } else { + None + } + } + + fn is_msix_table(&self, bar_index: u32, offset: u64) -> bool { + let table_size: u64 = (self.table_size * (MSIX_TABLE_ENTRIES_MODULO as u16)).into(); + if bar_index != self.table_pci_bar + || offset < self.table_offset + || offset >= self.table_offset + table_size + { + false + } else { + true + } + } + + fn read_table(&self, offset: u64, data: &mut [u8]) { + let offset = offset - self.table_offset; + self.config.read_msix_table(offset, data); + } + + fn write_table(&mut self, offset: u64, data: &[u8]) { + let offset = offset - self.table_offset; + self.config.write_msix_table(offset, data); + } + + fn is_msix_pba(&self, bar_index: u32, offset: u64) -> bool { + let pba_size: u64 = (((self.table_size + BITS_PER_PBA_ENTRY as u16 - 1) + / BITS_PER_PBA_ENTRY as u16) + * MSIX_PBA_ENTRIES_MODULO as u16) as u64; + if bar_index != self.pba_pci_bar + || offset < self.pba_offset + || offset >= self.pba_offset + pba_size + { + false + } else { + true + } + } + + fn read_pba(&self, offset: u64, data: &mut [u8]) { + let offset = offset - self.pba_offset; + self.config.read_pba_entries(offset, data); + } + + fn write_pba(&mut self, offset: u64, data: &[u8]) { + let offset = offset - self.pba_offset; + self.config.write_pba_entries(offset, data); + } + + fn is_msix_bar(&self, bar_index: u32) -> bool { + if bar_index == self.table_pci_bar || bar_index == self.pba_pci_bar { + true + } else { + false + } + } + + fn get_msix_irqfds(&self) -> Option<Vec<&EventFd>> { + let mut irqfds = Vec::new(); + + for i in 0..self.table_size { + let irqfd = self.config.get_irqfd(i as usize); + if let Some(fd) = irqfd { + irqfds.push(fd); + } else { + return None; + } + } + + Some(irqfds) } } @@ -320,8 +463,10 @@ pub struct VfioPciDevice { mmio_regions: Vec<MmioInfo>, io_regions: Vec<IoInfo>, msi_cap: Option<VfioMsiCap>, + msix_cap: Option<VfioMsixCap>, irq_type: Option<VfioIrqType>, vm_socket_mem: VmMemoryControlRequestSocket, + vm_socket_irq: Arc<VmIrqRequestSocket>, device_data: Option<DeviceData>, // scratch MemoryMapping to avoid unmap beform vm exit @@ -337,7 +482,29 @@ impl VfioPciDevice { ) -> Self { let dev = Arc::new(device); let config = VfioPciConfig::new(Arc::clone(&dev)); - let msi_cap = VfioMsiCap::new(&config, vfio_device_socket_irq); + let vm_socket_irq = Arc::new(vfio_device_socket_irq); + let mut msi_cap: Option<VfioMsiCap> = None; + let mut msix_cap: Option<VfioMsixCap> = None; + + let mut cap_next: u32 = config.read_config_byte(PCI_CAPABILITY_LIST).into(); + while cap_next != 0 { + let cap_id = config.read_config_byte(cap_next); + if cap_id == PCI_CAP_ID_MSI { + msi_cap = Some(VfioMsiCap::new( + &config, + cap_next, + Arc::clone(&vm_socket_irq), + )); + } else if cap_id == PCI_CAP_ID_MSIX { + msix_cap = Some(VfioMsixCap::new( + &config, + cap_next, + Arc::clone(&vm_socket_irq), + )); + } + let offset = cap_next + PCI_MSI_NEXT_POINTER; + cap_next = config.read_config_byte(offset).into(); + } let vendor_id = config.read_config_word(PCI_VENDOR_ID); let class_code = config.read_config_byte(PCI_BASE_CLASS_CODE); @@ -361,8 +528,10 @@ impl VfioPciDevice { mmio_regions: Vec::new(), io_regions: Vec::new(), msi_cap, + msix_cap, irq_type: None, vm_socket_mem: vfio_device_socket_mem, + vm_socket_irq, device_data, mem: Vec::new(), } @@ -400,7 +569,9 @@ impl VfioPciDevice { } if let Some(ref interrupt_evt) = self.interrupt_evt { - if let Err(e) = self.device.irq_enable(interrupt_evt, VfioIrqType::Intx) { + let mut fds = Vec::new(); + fds.push(interrupt_evt); + if let Err(e) = self.device.irq_enable(fds, VfioIrqType::Intx) { error!("Intx enable failed: {}", e); return; } @@ -433,13 +604,23 @@ impl VfioPciDevice { self.irq_type = None; } - fn enable_msi(&mut self) { - if let Some(irq_type) = &self.irq_type { - match irq_type { - VfioIrqType::Intx => self.disable_intx(), - _ => return, - } + fn disable_irqs(&mut self) { + match self.irq_type { + Some(VfioIrqType::Msi) => self.disable_msi(), + Some(VfioIrqType::Msix) => self.disable_msix(), + _ => (), + } + + // Above disable_msi() or disable_msix() will enable intx again. + // so disable_intx here again. + match self.irq_type { + Some(VfioIrqType::Intx) => self.disable_intx(), + _ => (), } + } + + fn enable_msi(&mut self) { + self.disable_irqs(); let irqfd = match &self.msi_cap { Some(cap) => { @@ -456,7 +637,9 @@ impl VfioPciDevice { } }; - if let Err(e) = self.device.irq_enable(irqfd, VfioIrqType::Msi) { + let mut fds = Vec::new(); + fds.push(irqfd); + if let Err(e) = self.device.irq_enable(fds, VfioIrqType::Msi) { error!("failed to enable msi: {}", e); self.enable_intx(); return; @@ -474,9 +657,48 @@ impl VfioPciDevice { self.enable_intx(); } + fn enable_msix(&mut self) { + self.disable_irqs(); + + let irqfds = match &self.msix_cap { + Some(cap) => cap.get_msix_irqfds(), + None => return, + }; + + if let Some(fds) = irqfds { + if let Err(e) = self.device.irq_enable(fds, VfioIrqType::Msix) { + error!("failed to enable msix: {}", e); + self.enable_intx(); + return; + } + } else { + self.enable_intx(); + return; + } + + self.irq_type = Some(VfioIrqType::Msix); + } + + fn disable_msix(&mut self) { + if let Err(e) = self.device.irq_disable(VfioIrqType::Msix) { + error!("failed to disable msix: {}", e); + return; + } + + self.enable_intx(); + } + fn add_bar_mmap(&self, index: u32, bar_addr: u64) -> Vec<MemoryMapping> { let mut mem_map: Vec<MemoryMapping> = Vec::new(); if self.device.get_region_flags(index) & VFIO_REGION_INFO_FLAG_MMAP != 0 { + // the bar storing msix table and pba couldn't mmap. + // these bars should be trapped, so that msix could be emulated. + if let Some(msix_cap) = &self.msix_cap { + if msix_cap.is_msix_bar(index) { + return mem_map; + } + } + let mmaps = self.device.get_region_mmap(index); if mmaps.is_empty() { return mem_map; @@ -566,10 +788,8 @@ impl PciDevice for VfioPciDevice { if let Some(ref interrupt_resample_evt) = self.interrupt_resample_evt { fds.push(interrupt_resample_evt.as_raw_fd()); } - if let Some(msi_cap) = &self.msi_cap { - fds.push(msi_cap.get_vm_socket()); - } fds.push(self.vm_socket_mem.as_raw_fd()); + fds.push(self.vm_socket_irq.as_ref().as_raw_fd()); fds } @@ -670,13 +890,6 @@ impl PciDevice for VfioPciDevice { } } - if let Err(e) = self.device.setup_dma_map() { - error!( - "failed to add all guest memory regions into iommu table: {}", - e - ); - } - // Quirk, enable igd memory for guest vga arbitrate, otherwise kernel vga arbitrate // driver doesn't claim this vga device, then xorg couldn't boot up. if self.is_intel_gfx() { @@ -759,6 +972,10 @@ impl PciDevice for VfioPciDevice { // Clear multifunction flags as pci_root doesn't // support multifunction. config &= !PCI_MULTI_FLAG; + } else if let Some(msix_cap) = &self.msix_cap { + if msix_cap.is_msix_control_reg(reg, 4) { + msix_cap.read_msix_control(&mut config); + } } // Quirk for intel graphic, set stolen memory size to 0 in pci_cfg[0x51] @@ -785,6 +1002,18 @@ impl PciDevice for VfioPciDevice { None => (), } + msi_change = None; + if let Some(msix_cap) = self.msix_cap.as_mut() { + if msix_cap.is_msix_control_reg(start as u32, data.len() as u32) { + msi_change = msix_cap.write_msix_control(data); + } + } + match msi_change { + Some(VfioMsiChange::Enable) => self.enable_msix(), + Some(VfioMsiChange::Disable) => self.disable_msix(), + None => (), + } + // if guest enable memory access, then enable bar mappable once if start == PCI_COMMAND as u64 && data.len() == 2 @@ -801,7 +1030,17 @@ impl PciDevice for VfioPciDevice { fn read_bar(&mut self, addr: u64, data: &mut [u8]) { if let Some(mmio_info) = self.find_region(addr) { let offset = addr - mmio_info.start; - self.device.region_read(mmio_info.bar_index, data, offset); + let bar_index = mmio_info.bar_index; + if let Some(msix_cap) = &self.msix_cap { + if msix_cap.is_msix_table(bar_index, offset) { + msix_cap.read_table(offset, data); + return; + } else if msix_cap.is_msix_pba(bar_index, offset) { + msix_cap.read_pba(offset, data); + return; + } + } + self.device.region_read(bar_index, data, offset); } } @@ -819,7 +1058,19 @@ impl PciDevice for VfioPciDevice { } let offset = addr - mmio_info.start; - self.device.region_write(mmio_info.bar_index, data, offset); + let bar_index = mmio_info.bar_index; + + if let Some(msix_cap) = self.msix_cap.as_mut() { + if msix_cap.is_msix_table(bar_index, offset) { + msix_cap.write_table(offset, data); + return; + } else if msix_cap.is_msix_pba(bar_index, offset) { + msix_cap.write_pba(offset, data); + return; + } + } + + self.device.region_write(bar_index, data, offset); } } } diff --git a/devices/src/pic.rs b/devices/src/pic.rs index 9b8235f..f562be6 100644 --- a/devices/src/pic.rs +++ b/devices/src/pic.rs @@ -7,12 +7,14 @@ // modern OSs that use a legacy BIOS. // The PIC is connected to the Local APIC on CPU0. -// Terminology note: The 8259A spec refers to "master" and "slave" PITs; the "slave"s are daisy +// Terminology note: The 8259A spec refers to "master" and "slave" PICs; the "slave"s are daisy // chained to the "master"s. // For the purposes of both using more descriptive terms and avoiding terms with lots of charged -// emotional context, this file refers to them instead as "primary" and "secondary" PITs. +// emotional context, this file refers to them instead as "primary" and "secondary" PICs. +use crate::split_irqchip_common::GsiRelay; use crate::BusDevice; +use std::sync::Arc; use sys_util::{debug, warn}; #[repr(usize)] @@ -30,7 +32,7 @@ enum PicInitState { Icw4 = 3, } -#[derive(Debug, Default, Clone, Copy, PartialEq)] +#[derive(Default)] struct PicState { last_irr: u8, // Edge detection. irr: u8, // Interrupt Request Register. @@ -53,12 +55,14 @@ struct PicState { elcr: u8, elcr_mask: u8, init_state: Option<PicInitState>, + is_primary: bool, + relay: Arc<GsiRelay>, } pub struct Pic { - // TODO(mutexlox): Implement an APIC and add necessary state to the Pic. - - // index 0 (aka PicSelect::Primary) is the primary pic, the rest are secondary. + // Indicates a pending INTR signal to LINT0 of vCPU, checked by vCPU thread. + interrupt_request: bool, + // Index 0 (aka PicSelect::Primary) is the primary pic, the rest are secondary. pics: [PicState; 2], } @@ -175,13 +179,19 @@ impl Pic { // The secondary PIC has IRQs 8-15, so we subtract 8 from the IRQ number to get the bit // that should be masked here. In this case, bits 8 - 8 = 0 and 13 - 8 = 5. secondary_pic.elcr_mask = !((1 << 0) | (1 << 5)); - // TODO(mutexlox): Add logic to initialize APIC interrupt-related fields. + primary_pic.is_primary = true; Pic { + interrupt_request: false, pics: [primary_pic, secondary_pic], } } + pub fn register_relay(&mut self, relay: Arc<GsiRelay>) { + self.pics[0].relay = relay.clone(); + self.pics[1].relay = relay; + } + pub fn service_irq(&mut self, irq: u8, level: bool) -> bool { assert!(irq <= 15, "Unexpectedly high value irq: {} vs 15", irq); @@ -205,8 +215,14 @@ impl Pic { self.get_irq(PicSelect::Primary).is_some() } + /// Determines whether the PIC has fired an interrupt to LAPIC. + pub fn interrupt_requested(&self) -> bool { + self.interrupt_request + } + /// Determines the external interrupt number that the PIC is prepared to inject, if any. pub fn get_external_interrupt(&mut self) -> Option<u8> { + self.interrupt_request = false; let irq_primary = if let Some(irq) = self.get_irq(PicSelect::Primary) { irq } else { @@ -385,6 +401,11 @@ impl Pic { fn clear_isr(pic: &mut PicState, irq: u8) { assert!(irq <= 7, "Unexpectedly high value for irq: {} vs 7", irq); pic.isr &= !(1 << irq); + Pic::set_irq_internal(pic, irq, false); + let irq = if pic.is_primary { irq } else { irq + 8 }; + if let Some(resample_evt) = &pic.relay.irqfd_resample[irq as usize] { + resample_evt.write(1).unwrap(); + } } fn update_irq(&mut self) -> bool { @@ -403,7 +424,7 @@ impl Pic { } if self.get_irq(PicSelect::Primary).is_some() { - // TODO(mutexlox): Signal local interrupt on APIC bus. + self.interrupt_request = true; // Note: this does not check if the interrupt is succesfully injected into // the CPU, just whether or not one is fired. true @@ -1082,26 +1103,6 @@ mod tests { assert_eq!(data.pic.pics[PicSelect::Primary as usize].priority_add, 6); } - /// Verify that no-op doesn't change state. - #[test] - fn no_op_ocw2() { - let mut data = set_up(); - icw_init_both_with_icw4(&mut data.pic, FULLY_NESTED_NO_AUTO_EOI); - - // TODO(mutexlox): Verify APIC interaction when it is implemented. - data.pic.service_irq(/*irq=*/ 5, /*level=*/ true); - assert_eq!(data.pic.get_external_interrupt(), Some(0x08 + 5)); - data.pic.service_irq(/*irq=*/ 5, /*level=*/ false); - - let orig = data.pic.pics[PicSelect::Primary as usize].clone(); - - // Run a no-op. - data.pic.write(PIC_PRIMARY_COMMAND, &[0x40]); - - // Nothing should have changed. - assert_eq!(orig, data.pic.pics[PicSelect::Primary as usize]); - } - /// Tests cascade IRQ that happens on secondary PIC. #[test] fn cascade_irq() { diff --git a/devices/src/split_irqchip_common.rs b/devices/src/split_irqchip_common.rs index 65ba809..1e513f2 100644 --- a/devices/src/split_irqchip_common.rs +++ b/devices/src/split_irqchip_common.rs @@ -5,6 +5,7 @@ // Common constants and types used for Split IRQ chip devices (e.g. PIC, PIT, IOAPIC). use bit_field::*; +use sys_util::EventFd; #[bitfield] #[derive(Clone, Copy, Debug, PartialEq)] @@ -48,7 +49,7 @@ pub struct MsiAddressMessage { #[bitfield] #[derive(Clone, Copy, PartialEq)] -struct MsiDataMessage { +pub struct MsiDataMessage { vector: BitField8, #[bits = 3] delivery_mode: DeliveryMode, @@ -58,3 +59,36 @@ struct MsiDataMessage { trigger: TriggerMode, reserved2: BitField16, } + +/// Acts as a relay of interrupt signals between devices and IRQ chips. +#[derive(Default)] +pub struct GsiRelay { + pub irqfd: [Option<EventFd>; kvm::NUM_IOAPIC_PINS], + pub irqfd_resample: [Option<EventFd>; kvm::NUM_IOAPIC_PINS], +} + +impl GsiRelay { + pub fn new() -> GsiRelay { + GsiRelay { + irqfd: Default::default(), + irqfd_resample: Default::default(), + } + } + + pub fn register_irqfd(&mut self, evt: EventFd, gsi: usize) { + if gsi >= kvm::NUM_IOAPIC_PINS { + // Invalid gsi; ignore. + return; + } + self.irqfd[gsi] = Some(evt); + } + + pub fn register_irqfd_resample(&mut self, evt: EventFd, resample_evt: EventFd, gsi: usize) { + if gsi >= kvm::NUM_IOAPIC_PINS { + // Invalid gsi; ignore. + return; + } + self.irqfd[gsi] = Some(evt); + self.irqfd_resample[gsi] = Some(resample_evt); + } +} diff --git a/devices/src/vfio.rs b/devices/src/vfio.rs index 791dc78..a574430 100644 --- a/devices/src/vfio.rs +++ b/devices/src/vfio.rs @@ -3,6 +3,7 @@ // found in the LICENSE file. use data_model::vec_with_array_field; +use std::collections::HashMap; use std::ffi::CString; use std::fmt; use std::fs::{File, OpenOptions}; @@ -11,7 +12,9 @@ use std::mem; use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; use std::os::unix::prelude::FileExt; use std::path::{Path, PathBuf}; +use std::sync::Arc; use std::u32; +use sync::Mutex; use kvm::Vm; use sys_util::{ @@ -76,25 +79,34 @@ fn get_error() -> Error { Error::last() } -struct VfioContainer { +/// VfioContainer contain multi VfioGroup, and delegate an IOMMU domain table +pub struct VfioContainer { container: File, + kvm_vfio_dev: Option<File>, + groups: HashMap<u32, Arc<VfioGroup>>, } const VFIO_API_VERSION: u8 = 0; impl VfioContainer { - fn new() -> Result<Self, VfioError> { + /// Open VfioContainer + pub fn new() -> Result<Self, VfioError> { let container = OpenOptions::new() .read(true) .write(true) .open("/dev/vfio/vfio") .map_err(VfioError::OpenContainer)?; - Ok(VfioContainer { container }) - } - - fn get_api_version(&self) -> i32 { // Safe as file is vfio container fd and ioctl is defined by kernel. - unsafe { ioctl(self, VFIO_GET_API_VERSION()) } + let version = unsafe { ioctl(&container, VFIO_GET_API_VERSION()) }; + if version as u8 != VFIO_API_VERSION { + return Err(VfioError::VfioApiVersion); + } + + Ok(VfioContainer { + container, + kvm_vfio_dev: None, + groups: HashMap::new(), + }) } fn check_extension(&self, val: u32) -> bool { @@ -150,6 +162,66 @@ impl VfioContainer { Ok(()) } + + fn init(&mut self, vm: &Vm, guest_mem: &GuestMemory) -> Result<(), VfioError> { + if !self.check_extension(VFIO_TYPE1v2_IOMMU) { + return Err(VfioError::VfioType1V2); + } + + if self.set_iommu(VFIO_TYPE1v2_IOMMU) < 0 { + return Err(VfioError::ContainerSetIOMMU(get_error())); + } + + // Add all guest memory regions into vfio container's iommu table, + // then vfio kernel driver could access guest memory from gfn + guest_mem.with_regions(|_index, guest_addr, size, host_addr, _fd_offset| { + // Safe because the guest regions are guaranteed not to overlap + unsafe { self.vfio_dma_map(guest_addr.0, size as u64, host_addr as u64) } + })?; + + let mut vfio_dev = kvm_sys::kvm_create_device { + type_: kvm_sys::kvm_device_type_KVM_DEV_TYPE_VFIO, + fd: 0, + flags: 0, + }; + vm.create_device(&mut vfio_dev) + .map_err(VfioError::CreateVfioKvmDevice)?; + // Safe as we are the owner of vfio_dev.fd which is valid value. + let kvm_vfio_file = unsafe { File::from_raw_fd(vfio_dev.fd as i32) }; + self.kvm_vfio_dev = Some(kvm_vfio_file); + + Ok(()) + } + + fn get_group( + &mut self, + id: u32, + vm: &Vm, + guest_mem: &GuestMemory, + ) -> Result<Arc<VfioGroup>, VfioError> { + match self.groups.get(&id) { + Some(group) => Ok(group.clone()), + None => { + let group = Arc::new(VfioGroup::new(self, id)?); + + if self.groups.is_empty() { + // Before the first group is added into container, do once cotainer + // initialize for a vm + self.init(vm, guest_mem)?; + } + + let kvm_vfio_file = self + .kvm_vfio_dev + .as_ref() + .expect("kvm vfio device should exist"); + group.kvm_device_add_group(kvm_vfio_file)?; + + self.groups.insert(id, group.clone()); + + Ok(group) + } + } + } } impl AsRawFd for VfioContainer { @@ -160,11 +232,10 @@ impl AsRawFd for VfioContainer { struct VfioGroup { group: File, - container: VfioContainer, } impl VfioGroup { - fn new(id: u32, vm: &Vm) -> Result<Self, VfioError> { + fn new(container: &VfioContainer, id: u32) -> Result<Self, VfioError> { let mut group_path = String::from("/dev/vfio/"); let s_id = &id; group_path.push_str(s_id.to_string().as_str()); @@ -190,14 +261,6 @@ impl VfioGroup { return Err(VfioError::GroupViable); } - let container = VfioContainer::new()?; - if container.get_api_version() as u8 != VFIO_API_VERSION { - return Err(VfioError::VfioApiVersion); - } - if !container.check_extension(VFIO_TYPE1v2_IOMMU) { - return Err(VfioError::VfioType1V2); - } - // Safe as we are the owner of group_file and container_raw_fd which are valid value, // and we verify the ret value let container_raw_fd = container.as_raw_fd(); @@ -206,32 +269,11 @@ impl VfioGroup { return Err(VfioError::GroupSetContainer(get_error())); } - ret = container.set_iommu(VFIO_TYPE1v2_IOMMU); - if ret < 0 { - return Err(VfioError::ContainerSetIOMMU(get_error())); - } - - Self::kvm_device_add_group(vm, &group_file)?; - - Ok(VfioGroup { - group: group_file, - container, - }) + Ok(VfioGroup { group: group_file }) } - fn kvm_device_add_group(vm: &Vm, group: &File) -> Result<File, VfioError> { - let mut vfio_dev = kvm_sys::kvm_create_device { - type_: kvm_sys::kvm_device_type_KVM_DEV_TYPE_VFIO, - fd: 0, - flags: 0, - }; - vm.create_device(&mut vfio_dev) - .map_err(VfioError::CreateVfioKvmDevice)?; - - // Safe as we are the owner of vfio_dev.fd which is valid value. - let vfio_dev_fd = unsafe { File::from_raw_fd(vfio_dev.fd as i32) }; - - let group_fd = group.as_raw_fd(); + fn kvm_device_add_group(&self, kvm_vfio_file: &File) -> Result<(), VfioError> { + let group_fd = self.as_raw_fd(); let group_fd_ptr = &group_fd as *const i32; let vfio_dev_attr = kvm_sys::kvm_device_attr { flags: 0, @@ -243,12 +285,16 @@ impl VfioGroup { // Safe as we are the owner of vfio_dev_fd and vfio_dev_attr which are valid value, // and we verify the return value. if 0 != unsafe { - ioctl_with_ref(&vfio_dev_fd, kvm_sys::KVM_SET_DEVICE_ATTR(), &vfio_dev_attr) + ioctl_with_ref( + kvm_vfio_file, + kvm_sys::KVM_SET_DEVICE_ATTR(), + &vfio_dev_attr, + ) } { return Err(VfioError::KvmSetDeviceAttr(get_error())); } - Ok(vfio_dev_fd) + Ok(()) } fn get_device(&self, name: &Path) -> Result<File, VfioError> { @@ -296,17 +342,22 @@ struct VfioRegion { /// Vfio device for exposing regions which could be read/write to kernel vfio device. pub struct VfioDevice { dev: File, - group: VfioGroup, + container: Arc<Mutex<VfioContainer>>, + group_fd: RawFd, // vec for vfio device's regions regions: Vec<VfioRegion>, - guest_mem: GuestMemory, } impl VfioDevice { /// Create a new vfio device, then guest read/write on this device could be /// transfered into kernel vfio. /// sysfspath specify the vfio device path in sys file system. - pub fn new(sysfspath: &Path, vm: &Vm, guest_mem: GuestMemory) -> Result<Self, VfioError> { + pub fn new( + sysfspath: &Path, + vm: &Vm, + guest_mem: &GuestMemory, + container: Arc<Mutex<VfioContainer>>, + ) -> Result<Self, VfioError> { let mut uuid_path = PathBuf::new(); uuid_path.push(sysfspath); uuid_path.push("iommu_group"); @@ -317,22 +368,26 @@ impl VfioDevice { .parse::<u32>() .map_err(|_| VfioError::InvalidPath)?; - let group = VfioGroup::new(group_id, vm)?; + let group = container.lock().get_group(group_id, vm, guest_mem)?; let new_dev = group.get_device(sysfspath)?; let dev_regions = Self::get_regions(&new_dev)?; Ok(VfioDevice { dev: new_dev, - group, + container, + group_fd: group.as_raw_fd(), regions: dev_regions, - guest_mem, }) } - /// enable vfio device's irq and associate Irqfd EventFd with device - pub fn irq_enable(&self, fd: &EventFd, irq_type: VfioIrqType) -> Result<(), VfioError> { - let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(1); - irq_set[0].argsz = (mem::size_of::<vfio_irq_set>() + mem::size_of::<u32>()) as u32; + /// Enable vfio device's irq and associate Irqfd EventFd with device. + /// When MSIx is enabled, multi vectors will be supported, so fds is vector and the vector + /// length is the num of MSIx vectors + pub fn irq_enable(&self, fds: Vec<&EventFd>, irq_type: VfioIrqType) -> Result<(), VfioError> { + let count = fds.len(); + let u32_size = mem::size_of::<u32>(); + let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(count); + irq_set[0].argsz = (mem::size_of::<vfio_irq_set>() + count * u32_size) as u32; irq_set[0].flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; match irq_type { VfioIrqType::Intx => irq_set[0].index = VFIO_PCI_INTX_IRQ_INDEX, @@ -340,15 +395,17 @@ impl VfioDevice { VfioIrqType::Msix => irq_set[0].index = VFIO_PCI_MSIX_IRQ_INDEX, } irq_set[0].start = 0; - irq_set[0].count = 1; + irq_set[0].count = count as u32; - { - // irq_set.data could be none, bool or fd according to flags, so irq_set.data - // is u8 default, here irq_set.data is fd as u32, so 4 default u8 are combined - // together as u32. It is safe as enough space is reserved through - // vec_with_array_field(u32)<1>. - let fds = unsafe { irq_set[0].data.as_mut_slice(4) }; - fds.copy_from_slice(&fd.as_raw_fd().to_le_bytes()[..]); + // irq_set.data could be none, bool or fd according to flags, so irq_set.data + // is u8 default, here irq_set.data is fd as u32, so 4 default u8 are combined + // together as u32. It is safe as enough space is reserved through + // vec_with_array_field(u32)<count>. + let mut data = unsafe { irq_set[0].data.as_mut_slice(count * u32_size) }; + for fd in fds.iter().take(count) { + let (left, right) = data.split_at_mut(u32_size); + left.copy_from_slice(&fd.as_raw_fd().to_ne_bytes()[..]); + data = right; } // Safe as we are the owner of self and irq_set which are valid value @@ -734,8 +791,8 @@ impl VfioDevice { pub fn keep_fds(&self) -> Vec<RawFd> { let mut fds = Vec::new(); fds.push(self.as_raw_fd()); - fds.push(self.group.as_raw_fd()); - fds.push(self.group.container.as_raw_fd()); + fds.push(self.group_fd); + fds.push(self.container.lock().as_raw_fd()); fds } @@ -746,23 +803,12 @@ impl VfioDevice { size: u64, user_addr: u64, ) -> Result<(), VfioError> { - self.group.container.vfio_dma_map(iova, size, user_addr) + self.container.lock().vfio_dma_map(iova, size, user_addr) } /// Remove (iova, user_addr) map from vfio container iommu table pub fn vfio_dma_unmap(&self, iova: u64, size: u64) -> Result<(), VfioError> { - self.group.container.vfio_dma_unmap(iova, size) - } - - /// Add all guest memory regions into vfio container's iommu table, - /// then vfio kernel driver could access guest memory from gfn - pub fn setup_dma_map(&self) -> Result<(), VfioError> { - self.guest_mem - .with_regions(|_index, guest_addr, size, host_addr, _fd_offset| { - // Safe because the guest regions are guaranteed not to overlap - unsafe { self.vfio_dma_map(guest_addr.0, size as u64, host_addr as u64) } - })?; - Ok(()) + self.container.lock().vfio_dma_unmap(iova, size) } } diff --git a/devices/src/virtio/block.rs b/devices/src/virtio/block.rs index 30cf4f7..c9dda55 100644 --- a/devices/src/virtio/block.rs +++ b/devices/src/virtio/block.rs @@ -20,6 +20,7 @@ use sync::Mutex; use sys_util::Error as SysError; use sys_util::Result as SysResult; use sys_util::{error, info, iov_max, warn, EventFd, GuestMemory, PollContext, PollToken, TimerFd}; +use virtio_sys::virtio_ring::VIRTIO_RING_F_EVENT_IDX; use vm_control::{DiskControlCommand, DiskControlResponseSocket, DiskControlResult}; use super::{ @@ -298,13 +299,13 @@ impl Worker { queue_index: usize, flush_timer: &mut TimerFd, flush_timer_armed: &mut bool, - ) -> bool { + ) { let queue = &mut self.queues[queue_index]; let disk_size = self.disk_size.lock(); - let mut needs_interrupt = false; while let Some(avail_desc) = queue.pop(&self.mem) { + queue.set_notify(&self.mem, false); let desc_index = avail_desc.index; let len = match Worker::process_one_request( @@ -325,10 +326,9 @@ impl Worker { }; queue.add_used(&self.mem, desc_index, len as u32); - needs_interrupt = true; + queue.trigger_interrupt(&self.mem, &self.interrupt); + queue.set_notify(&self.mem, true); } - - needs_interrupt } fn resize(&mut self, new_size: u64) -> DiskControlResult { @@ -344,14 +344,14 @@ impl Worker { return DiskControlResult::Err(SysError::new(libc::EIO)); } - if !self.sparse { - // Allocate new space if the disk image is not sparse. - if let Err(e) = self.disk_image.allocate(0, new_size) { - error!("Allocating disk space after resize failed! {}", e); - return DiskControlResult::Err(SysError::new(libc::EIO)); - } + // Allocate new space if the disk image is not sparse. + if let Err(e) = self.disk_image.allocate(0, new_size) { + error!("Allocating disk space after resize failed! {}", e); + return DiskControlResult::Err(SysError::new(libc::EIO)); } + self.sparse = false; + if let Ok(new_disk_size) = self.disk_image.get_len() { let mut disk_size = self.disk_size.lock(); *disk_size = new_disk_size; @@ -419,9 +419,7 @@ impl Worker { error!("failed reading queue EventFd: {}", e); break 'poll; } - if self.process_queue(0, &mut flush_timer, &mut flush_timer_armed) { - self.interrupt.signal_used_queue(self.queues[0].vector); - } + self.process_queue(0, &mut flush_timer, &mut flush_timer_armed); } Token::ControlRequest => { let req = match self.control_socket.recv() { @@ -513,6 +511,7 @@ impl Block { } let mut avail_features: u64 = 1 << VIRTIO_BLK_F_FLUSH; + avail_features |= 1 << VIRTIO_RING_F_EVENT_IDX; if read_only { avail_features |= 1 << VIRTIO_BLK_F_RO; } else { @@ -627,7 +626,8 @@ impl Block { } VIRTIO_BLK_T_DISCARD | VIRTIO_BLK_T_WRITE_ZEROES => { if req_type == VIRTIO_BLK_T_DISCARD && !sparse { - return Err(ExecuteError::Unsupported(req_type)); + // Discard is a hint; if this is a non-sparse disk, just ignore it. + return Ok(()); } while reader.available_bytes() >= size_of::<virtio_blk_discard_write_zeroes>() { @@ -873,8 +873,8 @@ mod tests { let b = Block::new(Box::new(f), false, true, 512, None).unwrap(); // writable device should set VIRTIO_BLK_F_FLUSH + VIRTIO_BLK_F_DISCARD // + VIRTIO_BLK_F_WRITE_ZEROES + VIRTIO_F_VERSION_1 + VIRTIO_BLK_F_BLK_SIZE - // + VIRTIO_BLK_F_SEG_MAX - assert_eq!(0x100006244, b.features()); + // + VIRTIO_BLK_F_SEG_MAX + VIRTIO_RING_F_EVENT_IDX + assert_eq!(0x120006244, b.features()); } // read-write block device, non-sparse @@ -883,8 +883,8 @@ mod tests { let b = Block::new(Box::new(f), false, false, 512, None).unwrap(); // writable device should set VIRTIO_BLK_F_FLUSH // + VIRTIO_BLK_F_WRITE_ZEROES + VIRTIO_F_VERSION_1 + VIRTIO_BLK_F_BLK_SIZE - // + VIRTIO_BLK_F_SEG_MAX - assert_eq!(0x100004244, b.features()); + // + VIRTIO_BLK_F_SEG_MAX + VIRTIO_RING_F_EVENT_IDX + assert_eq!(0x120004244, b.features()); } // read-only block device @@ -893,7 +893,8 @@ mod tests { let b = Block::new(Box::new(f), true, true, 512, None).unwrap(); // read-only device should set VIRTIO_BLK_F_FLUSH and VIRTIO_BLK_F_RO // + VIRTIO_F_VERSION_1 + VIRTIO_BLK_F_BLK_SIZE + VIRTIO_BLK_F_SEG_MAX - assert_eq!(0x100000264, b.features()); + // + VIRTIO_RING_F_EVENT_IDX + assert_eq!(0x120000264, b.features()); } } diff --git a/devices/src/virtio/descriptor_utils.rs b/devices/src/virtio/descriptor_utils.rs index fcd18ec..2e5dfd3 100644 --- a/devices/src/virtio/descriptor_utils.rs +++ b/devices/src/virtio/descriptor_utils.rs @@ -6,6 +6,7 @@ use std::cmp; use std::collections::VecDeque; use std::fmt::{self, Display}; use std::io::{self, Read, Write}; +use std::iter::FromIterator; use std::marker::PhantomData; use std::mem::{size_of, MaybeUninit}; use std::ptr::copy_nonoverlapping; @@ -215,6 +216,24 @@ pub struct Reader<'a> { buffer: DescriptorChainConsumer<'a>, } +// An iterator over `DataInit` objects on readable descriptors in the descriptor chain. +struct ReaderIterator<'a, T: DataInit> { + reader: &'a mut Reader<'a>, + phantom: PhantomData<T>, +} + +impl<'a, T: DataInit> Iterator for ReaderIterator<'a, T> { + type Item = io::Result<T>; + + fn next(&mut self) -> Option<io::Result<T>> { + if self.reader.available_bytes() == 0 { + None + } else { + Some(self.reader.read_obj()) + } + } +} + impl<'a> Reader<'a> { /// Construct a new Reader wrapper over `desc_chain`. pub fn new(mem: &'a GuestMemory, desc_chain: DescriptorChain<'a>) -> Result<Reader<'a>> { @@ -260,6 +279,16 @@ impl<'a> Reader<'a> { Ok(unsafe { obj.assume_init() }) } + /// Reads objects by consuming all the remaining data in the descriptor chain buffer and returns + /// them as a collection. Returns an error if the size of the remaining data is indivisible by + /// the size of an object of type `T`. + pub fn collect<C: FromIterator<io::Result<T>>, T: DataInit>(&'a mut self) -> C { + C::from_iter(ReaderIterator { + reader: self, + phantom: PhantomData, + }) + } + /// Reads data from the descriptor chain buffer into a file descriptor. /// Returns the number of bytes read from the descriptor chain buffer. /// The number of bytes read can be less than `count` if there isn't @@ -431,6 +460,11 @@ impl<'a> Writer<'a> { self.write_all(val.as_slice()) } + /// Writes a collection of objects into the descriptor chain buffer. + pub fn consume<T: DataInit, C: IntoIterator<Item = T>>(&mut self, vals: C) -> io::Result<()> { + vals.into_iter().map(|v| self.write_obj(v)).collect() + } + /// Returns number of bytes available for writing. May return an error if the combined /// lengths of all the buffers in the DescriptorChain would cause an overflow. pub fn available_bytes(&self) -> usize { @@ -1150,4 +1184,44 @@ mod tests { 48 ); } + + #[test] + fn consume_collect() { + use DescriptorType::*; + + let memory_start_addr = GuestAddress(0x0); + let memory = GuestMemory::new(&vec![(memory_start_addr, 0x10000)]).unwrap(); + let vs: Vec<Le64> = vec![ + 0x0101010101010101.into(), + 0x0202020202020202.into(), + 0x0303030303030303.into(), + ]; + + let write_chain = create_descriptor_chain( + &memory, + GuestAddress(0x0), + GuestAddress(0x100), + vec![(Writable, 24)], + 0, + ) + .expect("create_descriptor_chain failed"); + let mut writer = Writer::new(&memory, write_chain).expect("failed to create Writer"); + writer + .consume(vs.clone()) + .expect("failed to consume() a vector"); + + let read_chain = create_descriptor_chain( + &memory, + GuestAddress(0x0), + GuestAddress(0x100), + vec![(Readable, 24)], + 0, + ) + .expect("create_descriptor_chain failed"); + let mut reader = Reader::new(&memory, read_chain).expect("failed to create Reader"); + let vs_read = reader + .collect::<io::Result<Vec<Le64>>, _>() + .expect("failed to collect() values"); + assert_eq!(vs, vs_read); + } } diff --git a/devices/src/virtio/gpu/mod.rs b/devices/src/virtio/gpu/mod.rs index aa674a8..8cc211f 100644 --- a/devices/src/virtio/gpu/mod.rs +++ b/devices/src/virtio/gpu/mod.rs @@ -6,10 +6,10 @@ mod protocol; mod virtio_2d_backend; mod virtio_3d_backend; mod virtio_backend; +mod virtio_gfxstream_backend; use std::cell::RefCell; use std::collections::VecDeque; -use std::fs::File; use std::i64; use std::io::Read; use std::mem::{self, size_of}; @@ -40,6 +40,8 @@ use super::{PciCapabilityType, VirtioPciShmCap, VirtioPciShmCapID}; use self::protocol::*; use self::virtio_2d_backend::Virtio2DBackend; use self::virtio_3d_backend::Virtio3DBackend; +#[cfg(feature = "gfxstream")] +use self::virtio_gfxstream_backend::VirtioGfxStreamBackend; use crate::pci::{PciBarConfiguration, PciBarPrefetchable, PciBarRegionType, PciCapability}; use vm_control::VmMemoryControlRequestSocket; @@ -51,6 +53,8 @@ pub const DEFAULT_DISPLAY_HEIGHT: u32 = 1024; pub enum GpuMode { Mode2D, Mode3D, + #[cfg(feature = "gfxstream")] + ModeGfxStream, } #[derive(Debug)] @@ -64,16 +68,6 @@ pub struct GpuParameters { pub mode: GpuMode, } -pub const DEFAULT_GPU_PARAMS: GpuParameters = GpuParameters { - display_width: DEFAULT_DISPLAY_WIDTH, - display_height: DEFAULT_DISPLAY_HEIGHT, - renderer_use_egl: true, - renderer_use_gles: true, - renderer_use_glx: false, - renderer_use_surfaceless: true, - mode: GpuMode::Mode3D, -}; - // First queue is for virtio gpu commands. Second queue is for cursor commands, which we expect // there to be fewer of. const QUEUE_SIZES: &[u16] = &[256, 16]; @@ -83,6 +77,20 @@ const GPU_BAR_NUM: u8 = 4; const GPU_BAR_OFFSET: u64 = 0; const GPU_BAR_SIZE: u64 = 1 << 33; +impl Default for GpuParameters { + fn default() -> Self { + GpuParameters { + display_width: DEFAULT_DISPLAY_WIDTH, + display_height: DEFAULT_DISPLAY_HEIGHT, + renderer_use_egl: true, + renderer_use_gles: true, + renderer_use_glx: false, + renderer_use_surfaceless: true, + mode: GpuMode::Mode3D, + } + } +} + /// A virtio-gpu backend state tracker which supports display and potentially accelerated rendering. /// /// Commands from the virtio-gpu protocol can be submitted here using the methods, and they will be @@ -132,7 +140,7 @@ trait Backend { fn import_event_device(&mut self, event_device: EventDevice, scanout: u32); /// If supported, export the resource with the given id to a file. - fn export_resource(&mut self, id: u32) -> Option<File>; + fn export_resource(&mut self, id: u32) -> ResourceResponse; /// Gets the list of supported display resolutions as a slice of `(width, height)` tuples. fn display_info(&self) -> [(u32, u32); 1]; @@ -301,6 +309,8 @@ trait Backend { enum BackendKind { Virtio2D, Virtio3D, + #[cfg(feature = "gfxstream")] + VirtioGfxStream, } impl BackendKind { @@ -309,6 +319,8 @@ impl BackendKind { match self { BackendKind::Virtio2D => Virtio2DBackend::capsets(), BackendKind::Virtio3D => Virtio3DBackend::capsets(), + #[cfg(feature = "gfxstream")] + BackendKind::VirtioGfxStream => VirtioGfxStreamBackend::capsets(), } } @@ -317,6 +329,8 @@ impl BackendKind { match self { BackendKind::Virtio2D => Virtio2DBackend::features(), BackendKind::Virtio3D => Virtio3DBackend::features(), + #[cfg(feature = "gfxstream")] + BackendKind::VirtioGfxStream => VirtioGfxStreamBackend::features(), } } @@ -350,6 +364,16 @@ impl BackendKind { gpu_device_socket, pci_bar, ), + #[cfg(feature = "gfxstream")] + BackendKind::VirtioGfxStream => VirtioGfxStreamBackend::build( + possible_displays, + display_width, + display_height, + renderer_flags, + event_devices, + gpu_device_socket, + pci_bar, + ), } } } @@ -391,7 +415,7 @@ impl Frontend { } fn process_resource_bridge(&mut self, resource_bridge: &ResourceResponseSocket) { - let request = match resource_bridge.recv() { + let ResourceRequest::GetResource { id } = match resource_bridge.recv() { Ok(msg) => msg, Err(e) => { error!("error receiving resource bridge request: {}", e); @@ -399,13 +423,7 @@ impl Frontend { } }; - let response = match request { - ResourceRequest::GetResource { id } => self - .backend - .export_resource(id) - .map(ResourceResponse::Resource) - .unwrap_or(ResourceResponse::Invalid), - }; + let response = self.backend.export_resource(id); if let Err(e) = resource_bridge.send(&response) { error!("error sending resource bridge request: {}", e); @@ -1029,6 +1047,8 @@ impl Gpu { let backend_kind = match gpu_parameters.mode { GpuMode::Mode2D => BackendKind::Virtio2D, GpuMode::Mode3D => BackendKind::Virtio3D, + #[cfg(feature = "gfxstream")] + GpuMode::ModeGfxStream => BackendKind::VirtioGfxStream, }; Gpu { diff --git a/devices/src/virtio/gpu/virtio_2d_backend.rs b/devices/src/virtio/gpu/virtio_2d_backend.rs index d92b015..a51a664 100644 --- a/devices/src/virtio/gpu/virtio_2d_backend.rs +++ b/devices/src/virtio/gpu/virtio_2d_backend.rs @@ -9,7 +9,6 @@ use std::cmp::{max, min}; use std::collections::btree_map::Entry; use std::collections::BTreeMap as Map; use std::fmt::{self, Display}; -use std::fs::File; use std::marker::PhantomData; use std::rc::Rc; use std::usize; @@ -24,6 +23,7 @@ use vm_control::VmMemoryControlRequestSocket; use super::protocol::GpuResponse; pub use super::virtio_backend::{VirtioBackend, VirtioResource}; use crate::virtio::gpu::{Backend, DisplayBackend, VIRTIO_F_VERSION_1}; +use crate::virtio::resource_bridge::ResourceResponse; #[derive(Debug)] pub enum Error { @@ -481,8 +481,8 @@ impl Backend for Virtio2DBackend { } /// If supported, export the resource with the given id to a file. - fn export_resource(&mut self, _id: u32) -> Option<File> { - None + fn export_resource(&mut self, _id: u32) -> ResourceResponse { + ResourceResponse::Invalid } /// Creates a fence with the given id that can be used to determine when the previous command diff --git a/devices/src/virtio/gpu/virtio_3d_backend.rs b/devices/src/virtio/gpu/virtio_3d_backend.rs index 21a5ac1..7ae044c 100644 --- a/devices/src/virtio/gpu/virtio_3d_backend.rs +++ b/devices/src/virtio/gpu/virtio_3d_backend.rs @@ -8,7 +8,6 @@ use std::cell::RefCell; use std::collections::btree_map::Entry; use std::collections::BTreeMap as Map; -use std::fs::File; use std::os::unix::io::AsRawFd; use std::rc::Rc; use std::usize; @@ -35,6 +34,7 @@ use crate::virtio::gpu::{ Backend, DisplayBackend, VIRTIO_F_VERSION_1, VIRTIO_GPU_F_HOST_COHERENT, VIRTIO_GPU_F_MEMORY, VIRTIO_GPU_F_VIRGL, }; +use crate::virtio::resource_bridge::{PlaneInfo, ResourceInfo, ResourceResponse}; use vm_control::{MaybeOwnedFd, VmMemoryControlRequestSocket, VmMemoryRequest, VmMemoryResponse}; @@ -319,13 +319,19 @@ impl Backend for Virtio3DBackend { } /// If supported, export the resource with the given id to a file. - fn export_resource(&mut self, id: u32) -> Option<File> { - let test: Option<File> = self - .resources + fn export_resource(&mut self, id: u32) -> ResourceResponse { + self + .resources .get(&id) // Option<resource> .and_then(|resource| resource.gpu_resource.export().ok()) // Option<(Query, File)> - .and_then(|t| Some(t.1)); // Option<File> - return test; + .map(|(q, file)| { + ResourceResponse::Resource(ResourceInfo{file, planes: [ + PlaneInfo{offset: q.out_offsets[0], stride: q.out_strides[0]}, + PlaneInfo{offset: q.out_offsets[1], stride: q.out_strides[1]}, + PlaneInfo{offset: q.out_offsets[2], stride: q.out_strides[2]}, + PlaneInfo{offset: q.out_offsets[3], stride: q.out_strides[3]}, + ]}) + }).unwrap_or(ResourceResponse::Invalid) } /// Creates a fence with the given id that can be used to determine when the previous command diff --git a/devices/src/virtio/gpu/virtio_backend.rs b/devices/src/virtio/gpu/virtio_backend.rs index 605cf5a..bb1db4a 100644 --- a/devices/src/virtio/gpu/virtio_backend.rs +++ b/devices/src/virtio/gpu/virtio_backend.rs @@ -59,6 +59,7 @@ impl VirtioBackend { pub fn import_event_device(&mut self, event_device: EventDevice, scanout: u32) { // TODO(zachr): support more than one scanout. if scanout != 0 { + error!("got nonzero scanout: {:}, but only support zero.", scanout); return; } @@ -105,6 +106,9 @@ impl VirtioBackend { match display.create_surface(None, self.display_width, self.display_height) { Ok(surface_id) => { self.scanout_surface_id = Some(surface_id); + for (event_device_id, _) in &self.event_devices { + display.attach_event_device(surface_id, *event_device_id); + } } Err(e) => error!("failed to create display surface: {}", e), } diff --git a/devices/src/virtio/gpu/virtio_gfxstream_backend.rs b/devices/src/virtio/gpu/virtio_gfxstream_backend.rs new file mode 100644 index 0000000..aa02e15 --- /dev/null +++ b/devices/src/virtio/gpu/virtio_gfxstream_backend.rs @@ -0,0 +1,722 @@ +// Copyright 2020 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +//! Implementation of a virtio-gpu protocol command processor for +//! API passthrough. + +#![cfg(feature = "gfxstream")] + +use std::cell::RefCell; +use std::collections::btree_map::Entry; +use std::collections::BTreeMap as Map; +use std::fs::File; +use std::mem::transmute; +use std::os::raw::{c_char, c_int, c_uchar, c_uint, c_void}; +use std::panic; +use std::rc::Rc; +use std::usize; + +use data_model::*; +use gpu_display::*; +use gpu_renderer::RendererFlags; +use resources::Alloc; +use sys_util::{error, GuestAddress, GuestMemory}; +use vm_control::VmMemoryControlRequestSocket; + +use super::protocol::GpuResponse; +pub use super::virtio_backend::{VirtioBackend, VirtioResource}; +use crate::virtio::gpu::{Backend, DisplayBackend, VIRTIO_F_VERSION_1, VIRTIO_GPU_F_VIRGL}; + +// C definitions related to gfxstream +// In gfxstream, only write_fence is used +// (for synchronization of commands delivered) +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct GfxStreamRendererCallbacks { + pub version: c_int, + pub write_fence: unsafe extern "C" fn(cookie: *mut c_void, fence: u32), +} + +// virtio-gpu-3d transfer-related structs (begin) +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct virgl_renderer_resource_create_args { + pub handle: u32, + pub target: u32, + pub format: u32, + pub bind: u32, + pub width: u32, + pub height: u32, + pub depth: u32, + pub array_size: u32, + pub last_level: u32, + pub nr_samples: u32, + pub flags: u32, +} + +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct virgl_renderer_resource_info { + pub handle: u32, + pub virgl_format: u32, + pub width: u32, + pub height: u32, + pub depth: u32, + pub flags: u32, + pub tex_id: u32, + pub stride: u32, + pub drm_fourcc: c_int, +} + +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct virgl_box { + pub x: u32, + pub y: u32, + pub z: u32, + pub w: u32, + pub h: u32, + pub d: u32, +} + +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct iovec { + pub iov_base: *mut c_void, + pub iov_len: usize, +} + +// virtio-gpu-3d transfer-related structs (end) + +#[link(name = "gfxstream_backend")] +extern "C" { + + // Function to globally init gfxstream backend's internal state, taking display/renderer + // parameters. + fn gfxstream_backend_init( + display_width: u32, + display_height: u32, + display_type: u32, + renderer_cookie: *mut c_void, + renderer_flags: i32, + renderer_callbacks: *mut GfxStreamRendererCallbacks, + ); + + // virtio-gpu-3d ioctl functions (begin) + + // In gfxstream, the resource create/transfer ioctls correspond to creating buffers for API + // forwarding and the notification of new API calls forwarded by the guest, unless they + // correspond to minigbm resource targets (PIPE_TEXTURE_2D), in which case they create globally + // visible shared GL textures to support gralloc. + fn pipe_virgl_renderer_poll(); + fn pipe_virgl_renderer_resource_create( + args: *mut virgl_renderer_resource_create_args, + iov: *mut iovec, + num_iovs: u32, + ) -> c_int; + + fn pipe_virgl_renderer_resource_unref(res_handle: u32); + fn pipe_virgl_renderer_context_create(handle: u32, nlen: u32, name: *const c_char) -> c_int; + fn pipe_virgl_renderer_context_destroy(handle: u32); + fn pipe_virgl_renderer_transfer_read_iov( + handle: u32, + ctx_id: u32, + level: u32, + stride: u32, + layer_stride: u32, + box_: *mut virgl_box, + offset: u64, + iov: *mut iovec, + iovec_cnt: c_int, + ) -> c_int; + fn pipe_virgl_renderer_transfer_write_iov( + handle: u32, + ctx_id: u32, + level: c_int, + stride: u32, + layer_stride: u32, + box_: *mut virgl_box, + offset: u64, + iovec: *mut iovec, + iovec_cnt: c_uint, + ) -> c_int; + fn pipe_virgl_renderer_resource_attach_iov( + res_handle: c_int, + iov: *mut iovec, + num_iovs: c_int, + ) -> c_int; + fn pipe_virgl_renderer_resource_detach_iov( + res_handle: c_int, + iov: *mut *mut iovec, + num_iovs: *mut c_int, + ); + fn pipe_virgl_renderer_create_fence(client_fence_id: c_int, ctx_id: u32) -> c_int; + fn pipe_virgl_renderer_ctx_attach_resource(ctx_id: c_int, res_handle: c_int); + fn pipe_virgl_renderer_ctx_detach_resource(ctx_id: c_int, res_handle: c_int); + + fn stream_renderer_flush_resource_and_readback( + res_handle: u32, + x: u32, + y: u32, + width: u32, + height: u32, + pixels: *mut c_uchar, + max_bytes: u32, + ); +} + +// Fence state stuff (begin) + +struct FenceState { + latest_fence: u32, +} +impl FenceState { + pub fn write(&mut self, latest_fence: u32) { + if latest_fence > self.latest_fence { + self.latest_fence = latest_fence; + } + } +} + +struct VirglCookie { + fence_state: Rc<RefCell<FenceState>>, +} + +extern "C" fn write_fence(cookie: *mut c_void, fence: u32) { + assert!(!cookie.is_null()); + let cookie = unsafe { &*(cookie as *mut VirglCookie) }; + + // Track the most recent fence. + let mut fence_state = cookie.fence_state.borrow_mut(); + fence_state.write(fence); +} + +const GFXSTREAM_RENDERER_CALLBACKS: &GfxStreamRendererCallbacks = &GfxStreamRendererCallbacks { + version: 1, + write_fence, +}; + +// Fence state stuff (end) + +pub struct VirtioGfxStreamBackend { + base: VirtioBackend, + + /// Mapping from resource ids to in-use GuestMemory. + resources: Map<u32, Option<GuestMemory>>, + + /// All commands processed by this backend are synchronous + /// and are either completed immediately or handled in a different layer, + /// so we just need to keep track of the latest created fence + /// and return that in fence_poll(). + fence_state: Rc<RefCell<FenceState>>, +} + +impl VirtioGfxStreamBackend { + pub fn new( + display: GpuDisplay, + display_width: u32, + display_height: u32, + _gpu_device_socket: VmMemoryControlRequestSocket, + _pci_bar: Alloc, + ) -> VirtioGfxStreamBackend { + let fence_state = Rc::new(RefCell::new(FenceState { latest_fence: 0 })); + let cookie: *mut VirglCookie = Box::into_raw(Box::new(VirglCookie { + fence_state: Rc::clone(&fence_state), + })); + + let renderer_flags: RendererFlags = RendererFlags::new().use_surfaceless(true); + + let display_rc_refcell = Rc::new(RefCell::new(display)); + + let scanout_surface = match (display_rc_refcell.borrow_mut()).create_surface( + None, + display_width, + display_height, + ) { + Ok(surface) => surface, + Err(e) => { + error!("Failed to create display surface: {}", e); + 0 + } + }; + + unsafe { + gfxstream_backend_init( + display_width, + display_height, + 1, /* default to shmem display */ + cookie as *mut c_void, + renderer_flags.into(), + transmute(GFXSTREAM_RENDERER_CALLBACKS), + ); + } + + VirtioGfxStreamBackend { + base: VirtioBackend { + display: Rc::clone(&display_rc_refcell), + display_width, + display_height, + event_devices: Default::default(), + scanout_resource_id: None, + scanout_surface_id: Some(scanout_surface), + cursor_resource_id: None, + cursor_surface_id: None, + }, + resources: Default::default(), + fence_state, + } + } +} + +impl Backend for VirtioGfxStreamBackend { + /// Returns the number of capsets provided by the Backend. + fn capsets() -> u32 { + 1 + } + + /// Returns the bitset of virtio features provided by the Backend. + fn features() -> u64 { + 1 << VIRTIO_GPU_F_VIRGL | 1 << VIRTIO_F_VERSION_1 + } + + /// Returns the underlying Backend. + fn build( + possible_displays: &[DisplayBackend], + display_width: u32, + display_height: u32, + _renderer_flags: RendererFlags, + _event_devices: Vec<EventDevice>, + gpu_device_socket: VmMemoryControlRequestSocket, + pci_bar: Alloc, + ) -> Option<Box<dyn Backend>> { + let mut display_opt = None; + for display in possible_displays { + match display.build() { + Ok(c) => { + display_opt = Some(c); + break; + } + Err(e) => error!("failed to open display: {}", e), + }; + } + + let display = match display_opt { + Some(d) => d, + None => { + error!("failed to open any displays"); + return None; + } + }; + + Some(Box::new(VirtioGfxStreamBackend::new( + display, + display_width, + display_height, + gpu_device_socket, + pci_bar, + ))) + } + + /// Gets a reference to the display passed into `new`. + fn display(&self) -> &Rc<RefCell<GpuDisplay>> { + &self.base.display + } + + /// Processes the internal `display` events and returns `true` if the main display was closed. + fn process_display(&mut self) -> bool { + self.base.process_display() + } + + /// Gets the list of supported display resolutions as a slice of `(width, height)` tuples. + fn display_info(&self) -> [(u32, u32); 1] { + self.base.display_info() + } + + /// Attaches the given input device to the given surface of the display (to allow for input + /// from a X11 window for example). + fn import_event_device(&mut self, event_device: EventDevice, scanout: u32) { + self.base.import_event_device(event_device, scanout); + } + + /// If supported, export the resource with the given id to a file. + fn export_resource(&mut self, _id: u32) -> Option<File> { + None + } + + /// Creates a fence with the given id that can be used to determine when the previous command + /// completed. + fn create_fence(&mut self, ctx_id: u32, fence_id: u32) -> GpuResponse { + unsafe { + pipe_virgl_renderer_create_fence(fence_id as i32, ctx_id); + } + GpuResponse::OkNoData + } + + /// Returns the id of the latest fence to complete. + fn fence_poll(&mut self) -> u32 { + unsafe { + pipe_virgl_renderer_poll(); + } + self.fence_state.borrow().latest_fence + } + + fn create_resource_2d( + &mut self, + _id: u32, + _width: u32, + _height: u32, + _format: u32, + ) -> GpuResponse { + // Not considered for gfxstream + GpuResponse::ErrUnspec + } + + /// Removes the guest's reference count for the given resource id. + fn unref_resource(&mut self, id: u32) -> GpuResponse { + match self.resources.remove(&id) { + Some(_) => (), + None => { + return GpuResponse::ErrInvalidResourceId; + } + } + + unsafe { + pipe_virgl_renderer_resource_unref(id); + } + + GpuResponse::OkNoData + } + + /// Sets the given resource id as the source of scanout to the display. + fn set_scanout(&mut self, _scanout_id: u32, _resource_id: u32) -> GpuResponse { + GpuResponse::OkNoData + } + + /// Flushes the given rectangle of pixels of the given resource to the display. + fn flush_resource( + &mut self, + id: u32, + _x: u32, + _y: u32, + _width: u32, + _height: u32, + ) -> GpuResponse { + // For now, always update the whole display. + let mut display_ref = self.base.display.borrow_mut(); + + let scanout_surface_id = match self.base.scanout_surface_id { + Some(id) => id, + _ => { + error!("No scanout surface created for backend!"); + return GpuResponse::ErrInvalidResourceId; + } + }; + + let fb = match display_ref.framebuffer_region( + scanout_surface_id, + 0, + 0, + self.base.display_width, + self.base.display_height, + ) { + Some(fb) => fb, + None => { + panic!( + "failed to access framebuffer for surface {}", + scanout_surface_id + ); + } + }; + + let fb_volatile_slice = fb.as_volatile_slice(); + let fb_begin = fb_volatile_slice.as_ptr() as *mut c_uchar; + let fb_bytes = fb_volatile_slice.size() as usize; + + unsafe { + stream_renderer_flush_resource_and_readback( + id, + 0, + 0, + self.base.display_width, + self.base.display_height, + fb_begin, + fb_bytes as u32, + ); + } + + display_ref.flip(scanout_surface_id); + + GpuResponse::OkNoData + } + + /// Copes the given rectangle of pixels of the given resource's backing memory to the host side + /// resource. + fn transfer_to_resource_2d( + &mut self, + _id: u32, + _x: u32, + _y: u32, + _width: u32, + _height: u32, + _src_offset: u64, + _mem: &GuestMemory, + ) -> GpuResponse { + // Not considered for gfxstream + GpuResponse::ErrInvalidResourceId + } + + /// Attaches backing memory to the given resource, represented by a `Vec` of `(address, size)` + /// tuples in the guest's physical address space. + fn attach_backing( + &mut self, + id: u32, + mem: &GuestMemory, + vecs: Vec<(GuestAddress, usize)>, + ) -> GpuResponse { + match self.resources.get_mut(&id) { + Some(entry) => { + *entry = Some(mem.clone()); + } + None => { + return GpuResponse::ErrInvalidResourceId; + } + } + + let mut backing_iovecs: Vec<iovec> = Vec::new(); + + for (addr, len) in vecs { + let slice = mem.get_slice(addr.offset(), len as u64).unwrap(); + backing_iovecs.push(iovec { + iov_base: slice.as_ptr() as *mut c_void, + iov_len: len as usize, + }); + } + + unsafe { + pipe_virgl_renderer_resource_attach_iov( + id as i32, + backing_iovecs.as_mut_ptr() as *mut iovec, + backing_iovecs.len() as i32, + ); + } + GpuResponse::OkNoData + } + + /// Detaches any backing memory from the given resource, if there is any. + fn detach_backing(&mut self, id: u32) -> GpuResponse { + match self.resources.get_mut(&id) { + Some(entry) => { + *entry = None; + } + None => { + return GpuResponse::ErrInvalidResourceId; + } + } + + unsafe { + pipe_virgl_renderer_resource_detach_iov( + id as i32, + std::ptr::null_mut(), + std::ptr::null_mut(), + ); + } + GpuResponse::OkNoData + } + + fn update_cursor(&mut self, _id: u32, _x: u32, _y: u32) -> GpuResponse { + // Not considered for gfxstream + GpuResponse::OkNoData + } + + fn move_cursor(&mut self, _x: u32, _y: u32) -> GpuResponse { + // Not considered for gfxstream + GpuResponse::OkNoData + } + + fn get_capset_info(&self, index: u32) -> GpuResponse { + if 0 != index { + return GpuResponse::ErrUnspec; + } + GpuResponse::OkCapsetInfo { + id: index, + version: 1, + size: 0, + } + } + + fn get_capset(&self, id: u32, _version: u32) -> GpuResponse { + if 0 != id { + return GpuResponse::ErrUnspec; + } + GpuResponse::OkCapset(Vec::new()) + } + + fn create_renderer_context(&mut self, id: u32) -> GpuResponse { + unsafe { + pipe_virgl_renderer_context_create(id, 1, std::ptr::null_mut()); + } + GpuResponse::OkNoData + } + + fn destroy_renderer_context(&mut self, id: u32) -> GpuResponse { + unsafe { + pipe_virgl_renderer_context_destroy(id); + } + GpuResponse::OkNoData + } + + fn context_attach_resource(&mut self, ctx_id: u32, res_id: u32) -> GpuResponse { + unsafe { + pipe_virgl_renderer_ctx_attach_resource(ctx_id as i32, res_id as i32); + } + GpuResponse::OkNoData + } + + fn context_detach_resource(&mut self, ctx_id: u32, res_id: u32) -> GpuResponse { + unsafe { + pipe_virgl_renderer_ctx_detach_resource(ctx_id as i32, res_id as i32); + } + GpuResponse::OkNoData + } + + fn resource_create_3d( + &mut self, + id: u32, + target: u32, + format: u32, + bind: u32, + width: u32, + height: u32, + depth: u32, + array_size: u32, + last_level: u32, + nr_samples: u32, + flags: u32, + ) -> GpuResponse { + if id == 0 { + return GpuResponse::ErrInvalidResourceId; + } + + match self.resources.entry(id) { + Entry::Vacant(slot) => { + slot.insert(None /* no guest memory attached yet */); + } + Entry::Occupied(_) => { + return GpuResponse::ErrInvalidResourceId; + } + } + + let mut create_args = virgl_renderer_resource_create_args { + handle: id, + target, + format, + bind, + width, + height, + depth, + array_size, + last_level, + nr_samples, + flags, + }; + + unsafe { + pipe_virgl_renderer_resource_create( + &mut create_args as *mut virgl_renderer_resource_create_args, + std::ptr::null_mut(), + 0, + ); + } + + GpuResponse::OkNoData + } + + fn transfer_to_resource_3d( + &mut self, + ctx_id: u32, + res_id: u32, + x: u32, + y: u32, + z: u32, + width: u32, + height: u32, + depth: u32, + level: u32, + stride: u32, + layer_stride: u32, + offset: u64, + ) -> GpuResponse { + let mut transfer_box = virgl_box { + x, + y, + z, + w: width, + h: height, + d: depth, + }; + + unsafe { + pipe_virgl_renderer_transfer_write_iov( + res_id, + ctx_id, + level as i32, + stride, + layer_stride, + &mut transfer_box as *mut virgl_box, + offset, + std::ptr::null_mut(), + 0, + ); + } + GpuResponse::OkNoData + } + + fn transfer_from_resource_3d( + &mut self, + ctx_id: u32, + res_id: u32, + x: u32, + y: u32, + z: u32, + width: u32, + height: u32, + depth: u32, + level: u32, + stride: u32, + layer_stride: u32, + offset: u64, + ) -> GpuResponse { + let mut transfer_box = virgl_box { + x, + y, + z, + w: width, + h: height, + d: depth, + }; + + unsafe { + pipe_virgl_renderer_transfer_read_iov( + res_id, + ctx_id, + level, + stride, + layer_stride, + &mut transfer_box as *mut virgl_box, + offset, + std::ptr::null_mut(), + 0, + ); + } + GpuResponse::OkNoData + } + + // Not considered for gfxstream + fn submit_command(&mut self, _ctx_id: u32, _commands: &mut [u8]) -> GpuResponse { + GpuResponse::ErrUnspec + } + + // Not considered for gfxstream + fn force_ctx_0(&mut self) {} +} diff --git a/devices/src/virtio/input/event_source.rs b/devices/src/virtio/input/event_source.rs index d190e18..392c121 100644 --- a/devices/src/virtio/input/event_source.rs +++ b/devices/src/virtio/input/event_source.rs @@ -4,32 +4,15 @@ use super::constants::*; use super::evdev::{grab_evdev, ungrab_evdev}; -use super::virtio_input_event; use super::InputError; use super::Result; use data_model::DataInit; -use linux_input_sys::input_event; +use linux_input_sys::{input_event, virtio_input_event, InputEventDecoder}; use std::collections::VecDeque; use std::io::Read; use std::io::Write; -use std::mem::size_of; use std::os::unix::io::{AsRawFd, RawFd}; -use sys_util::{error, warn}; - -trait ConvertFromVirtioInputEvent { - fn from_virtio_input_event(other: &virtio_input_event) -> input_event; -} - -impl ConvertFromVirtioInputEvent for input_event { - fn from_virtio_input_event(other: &virtio_input_event) -> input_event { - input_event { - timestamp_fields: [0, 0], - type_: other.type_.into(), - code: other.code.into(), - value: other.value.into(), - } - } -} +use sys_util::warn; /// Encapsulates a socket or device node into an abstract event source, providing a common /// interface. @@ -58,21 +41,11 @@ pub trait EventSource: AsRawFd { fn send_event(&mut self, vio_evt: &virtio_input_event) -> Result<()>; } -// Try to read 16 events at a time to match what the linux guest driver does. -const READ_BUFFER_SIZE: usize = 16 * size_of::<input_event>(); - -// The read buffer needs to be aligned to the alignment of input_event, which is aligned as u64 -#[repr(align(8))] -pub struct ReadBuffer { - buffer: [u8; READ_BUFFER_SIZE], -} - /// Encapsulates implementation details common to all kinds of event sources. pub struct EventSourceImpl<T> { source: T, queue: VecDeque<virtio_input_event>, - read_buffer: ReadBuffer, - // The read index accounts for incomplete events read previously. + read_buffer: Vec<u8>, read_idx: usize, } @@ -86,38 +59,19 @@ impl<T> EventSourceImpl<T> where T: Read + Write, { - // Receive events from the source and store them in a queue, unless they should be filtered out. - fn receive_events<F: Fn(&input_event) -> bool>(&mut self, event_filter: F) -> Result<usize> { + // Receive events from the source and store them in a queue. + fn receive_events<E: InputEventDecoder>(&mut self) -> Result<usize> { let read = self .source - .read(&mut self.read_buffer.buffer[self.read_idx..]) + .read(&mut self.read_buffer[self.read_idx..]) .map_err(InputError::EventsReadError)?; let buff_size = read + self.read_idx; - for evt_slice in self.read_buffer.buffer[..buff_size].chunks_exact(input_event::EVENT_SIZE) - { - let input_evt = match input_event::from_slice(evt_slice) { - Some(x) => x, - None => { - // This shouldn't happen because all slices (even the last one) are guaranteed - // to have the correct size and be properly aligned. - error!( - "Failed converting a slice of sice {} to input_event", - evt_slice.len() - ); - // Skipping the event here effectively means no events will be received, because - // if from_slice fails once it will fail always. - continue; - } - }; - if !event_filter(&input_evt) { - continue; - } - let vio_evt = virtio_input_event::from_input_event(input_evt); - self.queue.push_back(vio_evt); + for evt_slice in self.read_buffer[..buff_size].chunks_exact(E::SIZE) { + self.queue.push_back(E::decode(evt_slice)); } - let remainder = buff_size % input_event::EVENT_SIZE; + let remainder = buff_size % E::SIZE; // If there is an incomplete event at the end of the buffer, it needs to be moved to the // beginning and the next read operation must write right after it. if remainder != 0 { @@ -125,13 +79,13 @@ where // The copy should only happen if there is at least one complete event in the buffer, // otherwise source and destination would be the same. if buff_size != remainder { - let (des, src) = self.read_buffer.buffer.split_at_mut(buff_size - remainder); + let (des, src) = self.read_buffer.split_at_mut(buff_size - remainder); des[..remainder].copy_from_slice(&src[..remainder]); } } self.read_idx = remainder; - let received_events = buff_size / input_event::EVENT_SIZE; + let received_events = buff_size / E::SIZE; Ok(received_events) } @@ -144,32 +98,42 @@ where self.queue.pop_front() } - fn send_event(&mut self, vio_evt: &virtio_input_event) -> Result<()> { - let evt = input_event::from_virtio_input_event(vio_evt); + fn send_event(&mut self, vio_evt: &virtio_input_event, encoding: EventType) -> Result<()> { // Miscellaneous events produced by the device are sent back to it by the kernel input // subsystem, but because these events are handled by the host kernel as well as the // guest the device would get them twice. Which would prompt the device to send the // event to the guest again entering an infinite loop. - if evt.type_ != EV_MSC { + if vio_evt.type_ != EV_MSC { + let evt; + let event_bytes = match encoding { + EventType::InputEvent => { + evt = input_event::from_virtio_input_event(vio_evt); + evt.as_slice() + } + EventType::VirtioInputEvent => vio_evt.as_slice(), + }; self.source - .write_all(evt.as_slice()) + .write_all(event_bytes) .map_err(InputError::EventsWriteError)?; } Ok(()) } - fn new(source: T) -> EventSourceImpl<T> { + fn new(source: T, capacity: usize) -> EventSourceImpl<T> { EventSourceImpl { source, queue: VecDeque::new(), - read_buffer: ReadBuffer { - buffer: [0u8; READ_BUFFER_SIZE], - }, + read_buffer: vec![0; capacity], read_idx: 0, } } } +enum EventType { + VirtioInputEvent, + InputEvent, +} + /// Encapsulates a (unix) socket as an event source. pub struct SocketEventSource<T> { evt_source_impl: EventSourceImpl<T>, @@ -181,7 +145,7 @@ where { pub fn new(source: T) -> SocketEventSource<T> { SocketEventSource { - evt_source_impl: EventSourceImpl::new(source), + evt_source_impl: EventSourceImpl::new(source, 16 * virtio_input_event::SIZE), } } } @@ -205,7 +169,7 @@ where } fn receive_events(&mut self) -> Result<usize> { - self.evt_source_impl.receive_events(|_evt| true) + self.evt_source_impl.receive_events::<virtio_input_event>() } fn available_events_count(&self) -> usize { @@ -217,7 +181,8 @@ where } fn send_event(&mut self, vio_evt: &virtio_input_event) -> Result<()> { - self.evt_source_impl.send_event(vio_evt) + self.evt_source_impl + .send_event(vio_evt, EventType::VirtioInputEvent) } } @@ -232,7 +197,7 @@ where { pub fn new(source: T) -> EvdevEventSource<T> { EvdevEventSource { - evt_source_impl: EventSourceImpl::new(source), + evt_source_impl: EventSourceImpl::new(source, 16 * input_event::SIZE), } } } @@ -256,7 +221,7 @@ where } fn receive_events(&mut self) -> Result<usize> { - self.evt_source_impl.receive_events(|_evt| true) + self.evt_source_impl.receive_events::<input_event>() } fn available_events_count(&self) -> usize { @@ -268,19 +233,20 @@ where } fn send_event(&mut self, vio_evt: &virtio_input_event) -> Result<()> { - self.evt_source_impl.send_event(vio_evt) + self.evt_source_impl + .send_event(vio_evt, EventType::InputEvent) } } #[cfg(test)] mod tests { - use crate::virtio::input::event_source::input_event; - use crate::virtio::input::event_source::EventSourceImpl; - use crate::virtio::input::virtio_input_event; - use data_model::{DataInit, Le16, Le32}; use std::cmp::min; - use std::io::Read; - use std::io::Write; + use std::io::{Read, Write}; + + use data_model::{DataInit, Le16, Le32}; + use linux_input_sys::InputEventDecoder; + + use crate::virtio::input::event_source::{input_event, virtio_input_event, EventSourceImpl}; struct SourceMock { events: Vec<u8>, @@ -317,7 +283,7 @@ mod tests { #[test] fn empty_new() { - let mut source = EventSourceImpl::new(SourceMock::new(&vec![])); + let mut source = EventSourceImpl::new(SourceMock::new(&vec![]), 128); assert_eq!( source.available_events(), 0, @@ -332,9 +298,9 @@ mod tests { #[test] fn empty_receive() { - let mut source = EventSourceImpl::new(SourceMock::new(&vec![])); + let mut source = EventSourceImpl::new(SourceMock::new(&vec![]), 128); assert_eq!( - source.receive_events(|_| true).unwrap(), + source.receive_events::<input_event>().unwrap(), 0, "zero events should be received" ); @@ -367,9 +333,9 @@ mod tests { #[test] fn partial_pop() { let evts = instantiate_input_events(4usize); - let mut source = EventSourceImpl::new(SourceMock::new(&evts)); + let mut source = EventSourceImpl::new(SourceMock::new(&evts), input_event::SIZE * 4); assert_eq!( - source.receive_events(|_| true).unwrap(), + source.receive_events::<input_event>().unwrap(), evts.len(), "should receive all events" ); @@ -383,9 +349,9 @@ mod tests { fn total_pop() { const EVENT_COUNT: usize = 4; let evts = instantiate_input_events(EVENT_COUNT); - let mut source = EventSourceImpl::new(SourceMock::new(&evts)); + let mut source = EventSourceImpl::new(SourceMock::new(&evts), input_event::SIZE * 4); assert_eq!( - source.receive_events(|_| true).unwrap(), + source.receive_events::<input_event>().unwrap(), evts.len(), "should receive all events" ); diff --git a/devices/src/virtio/input/mod.rs b/devices/src/virtio/input/mod.rs index 88a2637..c789dd8 100644 --- a/devices/src/virtio/input/mod.rs +++ b/devices/src/virtio/input/mod.rs @@ -20,12 +20,11 @@ use super::{ copy_config, DescriptorChain, DescriptorError, Interrupt, Queue, Reader, VirtioDevice, Writer, TYPE_INPUT, }; -use linux_input_sys::input_event; +use linux_input_sys::{virtio_input_event, InputEventDecoder}; use std::collections::BTreeMap; use std::fmt::{self, Display}; use std::io::Read; use std::io::Write; -use std::mem::size_of; use std::thread; const EVENT_QUEUE_SIZE: u16 = 64; @@ -341,29 +340,6 @@ impl VirtioInputConfig { } } -#[derive(Copy, Clone, Debug, Default)] -#[repr(C)] -pub struct virtio_input_event { - type_: Le16, - code: Le16, - value: Le32, -} - -// Safe because it only has data and has no implicit padding. -unsafe impl DataInit for virtio_input_event {} - -impl virtio_input_event { - const EVENT_SIZE: usize = size_of::<virtio_input_event>(); - - fn from_input_event(other: &input_event) -> virtio_input_event { - virtio_input_event { - type_: Le16::from(other.type_), - code: Le16::from(other.code), - value: Le32::from(other.value), - } - } -} - struct Worker<T: EventSource> { interrupt: Interrupt, event_source: T, @@ -381,7 +357,7 @@ impl<T: EventSource> Worker<T> { ) -> Result<usize> { let mut writer = Writer::new(mem, avail_desc).map_err(InputError::Descriptor)?; - while writer.available_bytes() >= virtio_input_event::EVENT_SIZE { + while writer.available_bytes() >= virtio_input_event::SIZE { if let Some(evt) = event_source.pop_available_event() { writer.write_obj(evt).map_err(InputError::WriteQueue)?; } else { @@ -437,7 +413,7 @@ impl<T: EventSource> Worker<T> { mem: &GuestMemory, ) -> Result<usize> { let mut reader = Reader::new(mem, avail_desc).map_err(InputError::Descriptor)?; - while reader.available_bytes() >= virtio_input_event::EVENT_SIZE { + while reader.available_bytes() >= virtio_input_event::SIZE { let evt: virtio_input_event = reader.read_obj().map_err(InputError::ReadQueue)?; event_source.send_event(&evt)?; } diff --git a/devices/src/virtio/mod.rs b/devices/src/virtio/mod.rs index a1701f5..7716fe0 100644 --- a/devices/src/virtio/mod.rs +++ b/devices/src/virtio/mod.rs @@ -75,8 +75,9 @@ const TYPE_IOMMU: u32 = 23; const TYPE_FS: u32 = 26; const TYPE_PMEM: u32 = 27; // Additional types invented by crosvm -const TYPE_WL: u32 = 30; -const TYPE_TPM: u32 = 31; +const MAX_VIRTIO_DEVICE_ID: u32 = 63; +const TYPE_WL: u32 = MAX_VIRTIO_DEVICE_ID; +const TYPE_TPM: u32 = MAX_VIRTIO_DEVICE_ID - 1; const VIRTIO_F_VERSION_1: u32 = 32; diff --git a/devices/src/virtio/net.rs b/devices/src/virtio/net.rs index 38ba5a7..44a39ab 100644 --- a/devices/src/virtio/net.rs +++ b/devices/src/virtio/net.rs @@ -192,7 +192,7 @@ where }; if bytes_written > 0 { - self.rx_queue.pop_peeked(); + self.rx_queue.pop_peeked(&self.mem); self.rx_queue.add_used(&self.mem, index, bytes_written); needs_interrupt = true; } diff --git a/devices/src/virtio/queue.rs b/devices/src/virtio/queue.rs index 21cf63e..2613d2f 100644 --- a/devices/src/virtio/queue.rs +++ b/devices/src/virtio/queue.rs @@ -7,14 +7,18 @@ use std::num::Wrapping; use std::sync::atomic::{fence, Ordering}; use sys_util::{error, GuestAddress, GuestMemory}; +use virtio_sys::virtio_ring::VIRTIO_RING_F_EVENT_IDX; -use super::VIRTIO_MSI_NO_VECTOR; +use super::{Interrupt, VIRTIO_MSI_NO_VECTOR}; const VIRTQ_DESC_F_NEXT: u16 = 0x1; const VIRTQ_DESC_F_WRITE: u16 = 0x2; #[allow(dead_code)] const VIRTQ_DESC_F_INDIRECT: u16 = 0x4; +const VIRTQ_USED_F_NO_NOTIFY: u16 = 0x1; +const VIRTQ_AVAIL_F_NO_INTERRUPT: u16 = 0x1; + /// An iterator over a single descriptor chain. Not to be confused with AvailIter, /// which iterates over the descriptor chain heads in a queue. pub struct DescIter<'a> { @@ -221,6 +225,10 @@ pub struct Queue { next_avail: Wrapping<u16>, next_used: Wrapping<u16>, + + // Device feature bits accepted by the driver + features: u64, + last_used: Wrapping<u16>, } impl Queue { @@ -236,6 +244,8 @@ impl Queue { used_ring: GuestAddress(0), next_avail: Wrapping(0), next_used: Wrapping(0), + features: 0, + last_used: Wrapping(0), } } @@ -255,6 +265,8 @@ impl Queue { self.used_ring = GuestAddress(0); self.next_avail = Wrapping(0); self.next_used = Wrapping(0); + self.features = 0; + self.last_used = Wrapping(0); } pub fn is_valid(&self, mem: &GuestMemory) -> bool { @@ -336,15 +348,22 @@ impl Queue { /// Remove the first available descriptor chain from the queue. /// This function should only be called immediately following `peek`. - pub fn pop_peeked(&mut self) { + pub fn pop_peeked(&mut self, mem: &GuestMemory) { self.next_avail += Wrapping(1); + if self.features & ((1u64) << VIRTIO_RING_F_EVENT_IDX) != 0 { + let avail_event_off = self + .used_ring + .unchecked_add((4 + 8 * self.actual_size()).into()); + mem.write_obj_at_addr(self.next_avail.0 as u16, avail_event_off) + .unwrap(); + } } /// If a new DescriptorHead is available, returns one and removes it from the queue. pub fn pop<'a>(&mut self, mem: &'a GuestMemory) -> Option<DescriptorChain<'a>> { let descriptor_chain = self.peek(mem); if descriptor_chain.is_some() { - self.pop_peeked(); + self.pop_peeked(mem); } descriptor_chain } @@ -381,4 +400,354 @@ impl Queue { mem.write_obj_at_addr(self.next_used.0 as u16, used_ring.unchecked_add(2)) .unwrap(); } + + /// Enable / Disable guest notify device that requests are available on + /// the descriptor chain. + pub fn set_notify(&mut self, mem: &GuestMemory, enable: bool) { + if self.features & ((1u64) << VIRTIO_RING_F_EVENT_IDX) != 0 { + let avail_index_addr = mem.checked_offset(self.avail_ring, 2).unwrap(); + let avail_index: u16 = mem.read_obj_from_addr(avail_index_addr).unwrap(); + let avail_event_off = self + .used_ring + .unchecked_add((4 + 8 * self.actual_size()).into()); + mem.write_obj_at_addr(avail_index, avail_event_off).unwrap(); + } else { + let mut used_flags: u16 = mem.read_obj_from_addr(self.used_ring).unwrap(); + if enable { + used_flags &= !VIRTQ_USED_F_NO_NOTIFY; + } else { + used_flags |= VIRTQ_USED_F_NO_NOTIFY; + } + mem.write_obj_at_addr(used_flags, self.used_ring).unwrap(); + } + } + + // Check Whether guest enable interrupt injection or not. + fn available_interrupt_enabled(&self, mem: &GuestMemory) -> bool { + if self.features & ((1u64) << VIRTIO_RING_F_EVENT_IDX) != 0 { + let used_event_off = self + .avail_ring + .unchecked_add((4 + 2 * self.actual_size()).into()); + let used_event: u16 = mem.read_obj_from_addr(used_event_off).unwrap(); + // if used_event >= self.last_used, driver handle interrupt quickly enough, new + // interrupt could be injected. + // if used_event < self.last_used, driver hasn't finished the last interrupt, + // so no need to inject new interrupt. + if self.next_used - Wrapping(used_event) - Wrapping(1) < self.next_used - self.last_used + { + true + } else { + false + } + } else { + let avail_flags: u16 = mem.read_obj_from_addr(self.avail_ring).unwrap(); + if avail_flags & VIRTQ_AVAIL_F_NO_INTERRUPT == VIRTQ_AVAIL_F_NO_INTERRUPT { + false + } else { + true + } + } + } + + /// inject interrupt into guest on this queue + /// return true: interrupt is injected into guest for this queue + /// false: interrupt isn't injected + pub fn trigger_interrupt(&mut self, mem: &GuestMemory, interrupt: &Interrupt) -> bool { + if self.available_interrupt_enabled(mem) { + self.last_used = self.next_used; + interrupt.signal_used_queue(self.vector); + true + } else { + false + } + } + + /// Acknowledges that this set of features should be enabled on this queue. + pub fn ack_features(&mut self, features: u64) { + self.features |= features; + } +} + +#[cfg(test)] +mod tests { + use super::*; + use data_model::{DataInit, Le16, Le32, Le64}; + use std::convert::TryInto; + use std::sync::atomic::AtomicUsize; + use std::sync::Arc; + use sys_util::EventFd; + + const GUEST_MEMORY_SIZE: u64 = 0x10000; + const DESC_OFFSET: u64 = 0; + const AVAIL_OFFSET: u64 = 0x200; + const USED_OFFSET: u64 = 0x400; + const QUEUE_SIZE: usize = 0x10; + const BUFFER_OFFSET: u64 = 0x8000; + const BUFFER_LEN: u32 = 0x400; + + #[derive(Copy, Clone, Debug)] + #[repr(C)] + struct Desc { + addr: Le64, + len: Le32, + flags: Le16, + next: Le16, + } + // Safe as this only runs in test + unsafe impl DataInit for Desc {} + + #[derive(Copy, Clone, Debug)] + #[repr(C)] + struct Avail { + flags: Le16, + idx: Le16, + ring: [Le16; QUEUE_SIZE], + used_event: Le16, + } + // Safe as this only runs in test + unsafe impl DataInit for Avail {} + impl Default for Avail { + fn default() -> Self { + Avail { + flags: Le16::from(0u16), + idx: Le16::from(0u16), + ring: [Le16::from(0u16); QUEUE_SIZE], + used_event: Le16::from(0u16), + } + } + } + + #[derive(Copy, Clone, Debug)] + #[repr(C)] + struct UsedElem { + id: Le32, + len: Le32, + } + // Safe as this only runs in test + unsafe impl DataInit for UsedElem {} + impl Default for UsedElem { + fn default() -> Self { + UsedElem { + id: Le32::from(0u32), + len: Le32::from(0u32), + } + } + } + + #[derive(Copy, Clone, Debug)] + #[repr(C)] + struct Used { + flags: Le16, + idx: Le16, + used_elem_ring: [UsedElem; QUEUE_SIZE], + avail_event: Le16, + } + // Safe as this only runs in test + unsafe impl DataInit for Used {} + impl Default for Used { + fn default() -> Self { + Used { + flags: Le16::from(0u16), + idx: Le16::from(0u16), + used_elem_ring: [UsedElem::default(); QUEUE_SIZE], + avail_event: Le16::from(0u16), + } + } + } + + fn setup_vq(queue: &mut Queue, mem: &GuestMemory) { + let desc = Desc { + addr: Le64::from(BUFFER_OFFSET), + len: Le32::from(BUFFER_LEN), + flags: Le16::from(0u16), + next: Le16::from(1u16), + }; + let _ = mem.write_obj_at_addr(desc, GuestAddress(DESC_OFFSET)); + + let avail = Avail::default(); + let _ = mem.write_obj_at_addr(avail, GuestAddress(AVAIL_OFFSET)); + + let used = Used::default(); + let _ = mem.write_obj_at_addr(used, GuestAddress(USED_OFFSET)); + + queue.desc_table = GuestAddress(DESC_OFFSET); + queue.avail_ring = GuestAddress(AVAIL_OFFSET); + queue.used_ring = GuestAddress(USED_OFFSET); + queue.ack_features((1u64) << VIRTIO_RING_F_EVENT_IDX); + } + + #[test] + fn queue_event_id_guest_fast() { + let mut queue = Queue::new(QUEUE_SIZE.try_into().unwrap()); + let memory_start_addr = GuestAddress(0x0); + let mem = GuestMemory::new(&vec![(memory_start_addr, GUEST_MEMORY_SIZE)]).unwrap(); + setup_vq(&mut queue, &mem); + + let interrupt = Interrupt::new( + Arc::new(AtomicUsize::new(0)), + EventFd::new().unwrap(), + EventFd::new().unwrap(), + None, + 10, + ); + + // Calculating the offset of used_event within Avail structure + let used_event_offset: u64 = + unsafe { &(*(::std::ptr::null::<Avail>())).used_event as *const _ as u64 }; + let used_event_address = GuestAddress(AVAIL_OFFSET + used_event_offset); + + // Assume driver submit 0x100 req to device, + // device has handled them, so increase self.next_used to 0x100 + let mut device_generate: Wrapping<u16> = Wrapping(0x100); + for _ in 0..device_generate.0 { + queue.add_used(&mem, 0x0, BUFFER_LEN); + } + + // At this moment driver hasn't handled any interrupts yet, so it + // should inject interrupt. + assert_eq!(queue.trigger_interrupt(&mem, &interrupt), true); + + // Driver handle all the interrupts and update avail.used_event to 0x100 + let mut driver_handled = device_generate; + let _ = mem.write_obj_at_addr(Le16::from(driver_handled.0), used_event_address); + + // At this moment driver have handled all the interrupts, and + // device doesn't generate more data, so interrupt isn't needed. + assert_eq!(queue.trigger_interrupt(&mem, &interrupt), false); + + // Assume driver submit another u16::MAX - 0x100 req to device, + // Device has handled all of them, so increase self.next_used to u16::MAX + for _ in device_generate.0..u16::max_value() { + queue.add_used(&mem, 0x0, BUFFER_LEN); + } + device_generate = Wrapping(u16::max_value()); + + // At this moment driver just handled 0x100 interrupts, so it + // should inject interrupt. + assert_eq!(queue.trigger_interrupt(&mem, &interrupt), true); + + // driver handle all the interrupts and update avail.used_event to u16::MAX + driver_handled = device_generate; + let _ = mem.write_obj_at_addr(Le16::from(driver_handled.0), used_event_address); + + // At this moment driver have handled all the interrupts, and + // device doesn't generate more data, so interrupt isn't needed. + assert_eq!(queue.trigger_interrupt(&mem, &interrupt), false); + + // Assume driver submit another 1 request, + // device has handled it, so wrap self.next_used to 0 + queue.add_used(&mem, 0x0, BUFFER_LEN); + device_generate += Wrapping(1); + + // At this moment driver has handled all the previous interrupts, so it + // should inject interrupt again. + assert_eq!(queue.trigger_interrupt(&mem, &interrupt), true); + + // driver handle that interrupts and update avail.used_event to 0 + driver_handled = device_generate; + let _ = mem.write_obj_at_addr(Le16::from(driver_handled.0), used_event_address); + + // At this moment driver have handled all the interrupts, and + // device doesn't generate more data, so interrupt isn't needed. + assert_eq!(queue.trigger_interrupt(&mem, &interrupt), false); + } + + #[test] + fn queue_event_id_guest_slow() { + let mut queue = Queue::new(QUEUE_SIZE.try_into().unwrap()); + let memory_start_addr = GuestAddress(0x0); + let mem = GuestMemory::new(&vec![(memory_start_addr, GUEST_MEMORY_SIZE)]).unwrap(); + setup_vq(&mut queue, &mem); + + let interrupt = Interrupt::new( + Arc::new(AtomicUsize::new(0)), + EventFd::new().unwrap(), + EventFd::new().unwrap(), + None, + 10, + ); + + // Calculating the offset of used_event within Avail structure + let used_event_offset: u64 = + unsafe { &(*(::std::ptr::null::<Avail>())).used_event as *const _ as u64 }; + let used_event_address = GuestAddress(AVAIL_OFFSET + used_event_offset); + + // Assume driver submit 0x100 req to device, + // device have handled 0x100 req, so increase self.next_used to 0x100 + let mut device_generate: Wrapping<u16> = Wrapping(0x100); + for _ in 0..device_generate.0 { + queue.add_used(&mem, 0x0, BUFFER_LEN); + } + + // At this moment driver hasn't handled any interrupts yet, so it + // should inject interrupt. + assert_eq!(queue.trigger_interrupt(&mem, &interrupt), true); + + // Driver handle part of the interrupts and update avail.used_event to 0x80 + let mut driver_handled = Wrapping(0x80); + let _ = mem.write_obj_at_addr(Le16::from(driver_handled.0), used_event_address); + + // At this moment driver hasn't finished last interrupt yet, + // so interrupt isn't needed. + assert_eq!(queue.trigger_interrupt(&mem, &interrupt), false); + + // Assume driver submit another 1 request, + // device has handled it, so increment self.next_used. + queue.add_used(&mem, 0x0, BUFFER_LEN); + device_generate += Wrapping(1); + + // At this moment driver hasn't finished last interrupt yet, + // so interrupt isn't needed. + assert_eq!(queue.trigger_interrupt(&mem, &interrupt), false); + + // Assume driver submit another u16::MAX - 0x101 req to device, + // Device has handled all of them, so increase self.next_used to u16::MAX + for _ in device_generate.0..u16::max_value() { + queue.add_used(&mem, 0x0, BUFFER_LEN); + } + device_generate = Wrapping(u16::max_value()); + + // At this moment driver hasn't finished last interrupt yet, + // so interrupt isn't needed. + assert_eq!(queue.trigger_interrupt(&mem, &interrupt), false); + + // driver handle most of the interrupts and update avail.used_event to u16::MAX - 1, + driver_handled = device_generate - Wrapping(1); + let _ = mem.write_obj_at_addr(Le16::from(driver_handled.0), used_event_address); + + // Assume driver submit another 1 request, + // device has handled it, so wrap self.next_used to 0 + queue.add_used(&mem, 0x0, BUFFER_LEN); + device_generate += Wrapping(1); + + // At this moment driver has already finished the last interrupt(0x100), + // and device service other request, so new interrupt is needed. + assert_eq!(queue.trigger_interrupt(&mem, &interrupt), true); + + // Assume driver submit another 1 request, + // device has handled it, so increment self.next_used to 1 + queue.add_used(&mem, 0x0, BUFFER_LEN); + device_generate += Wrapping(1); + + // At this moment driver hasn't finished last interrupt((Wrapping(0)) yet, + // so interrupt isn't needed. + assert_eq!(queue.trigger_interrupt(&mem, &interrupt), false); + + // driver handle all the remain interrupts and wrap avail.used_event to 0x1. + driver_handled = device_generate; + let _ = mem.write_obj_at_addr(Le16::from(driver_handled.0), used_event_address); + + // At this moment driver has handled all the interrupts, and + // device doesn't generate more data, so interrupt isn't needed. + assert_eq!(queue.trigger_interrupt(&mem, &interrupt), false); + + // Assume driver submit another 1 request, + // device has handled it, so increase self.next_used. + queue.add_used(&mem, 0x0, BUFFER_LEN); + device_generate += Wrapping(1); + + // At this moment driver has finished all the previous interrupts, so it + // should inject interrupt again. + assert_eq!(queue.trigger_interrupt(&mem, &interrupt), true); + } } diff --git a/devices/src/virtio/resource_bridge.rs b/devices/src/virtio/resource_bridge.rs index aaf776c..2a2343f 100644 --- a/devices/src/virtio/resource_bridge.rs +++ b/devices/src/virtio/resource_bridge.rs @@ -16,9 +16,22 @@ pub enum ResourceRequest { GetResource { id: u32 }, } +#[derive(MsgOnSocket, Clone)] +pub struct PlaneInfo { + pub offset: u32, + pub stride: u32, +} + +const RESOURE_PLANE_NUM: usize = 4; +#[derive(MsgOnSocket)] +pub struct ResourceInfo { + pub file: File, + pub planes: [PlaneInfo; RESOURE_PLANE_NUM], +} + #[derive(MsgOnSocket)] pub enum ResourceResponse { - Resource(File), + Resource(ResourceInfo), Invalid, } @@ -58,16 +71,16 @@ impl fmt::Display for ResourceBridgeError { impl std::error::Error for ResourceBridgeError {} -pub fn get_resource_fd( +pub fn get_resource_info( sock: &ResourceRequestSocket, id: u32, -) -> std::result::Result<File, ResourceBridgeError> { +) -> std::result::Result<ResourceInfo, ResourceBridgeError> { if let Err(e) = sock.send(&ResourceRequest::GetResource { id }) { return Err(ResourceBridgeError::SendFailure(id, e)); } match sock.recv() { - Ok(ResourceResponse::Resource(bridged_file)) => Ok(bridged_file), + Ok(ResourceResponse::Resource(info)) => Ok(info), Ok(ResourceResponse::Invalid) => Err(ResourceBridgeError::InvalidResource(id)), Err(e) => Err(ResourceBridgeError::RecieveFailure(id, e)), } diff --git a/devices/src/virtio/virtio_pci_common_config.rs b/devices/src/virtio/virtio_pci_common_config.rs index 78af4ff..97d7001 100644 --- a/devices/src/virtio/virtio_pci_common_config.rs +++ b/devices/src/virtio/virtio_pci_common_config.rs @@ -185,7 +185,11 @@ impl VirtioPciCommonConfig { 0x08 => self.driver_feature_select = value, 0x0c => { if self.driver_feature_select < 2 { - device.ack_features((value as u64) << (self.driver_feature_select * 32)); + let features: u64 = (value as u64) << (self.driver_feature_select * 32); + device.ack_features(features); + for queue in queues.iter_mut() { + queue.ack_features(features); + } } else { warn!( "invalid ack_features (page {}, value 0x{:x})", diff --git a/devices/src/virtio/virtio_pci_device.rs b/devices/src/virtio/virtio_pci_device.rs index 4b161d4..c6d6786 100644 --- a/devices/src/virtio/virtio_pci_device.rs +++ b/devices/src/virtio/virtio_pci_device.rs @@ -252,7 +252,10 @@ impl VirtioPciDevice { // One MSI-X vector per queue plus one for configuration changes. let msix_num = u16::try_from(num_queues + 1).map_err(|_| sys_util::Error::new(ERANGE))?; - let msix_config = Arc::new(Mutex::new(MsixConfig::new(msix_num, msi_device_socket))); + let msix_config = Arc::new(Mutex::new(MsixConfig::new( + msix_num, + Arc::new(msi_device_socket), + ))); let config_regs = PciConfiguration::new( VIRTIO_PCI_VENDOR_ID, diff --git a/devices/src/virtio/wl.rs b/devices/src/virtio/wl.rs index 5ace29c..65ad1cf 100644 --- a/devices/src/virtio/wl.rs +++ b/devices/src/virtio/wl.rs @@ -1093,10 +1093,10 @@ impl WlState { #[cfg(feature = "gpu")] VIRTIO_WL_CTRL_VFD_SEND_KIND_VIRTGPU if self.resource_bridge.is_some() => { let sock = self.resource_bridge.as_ref().unwrap(); - match get_resource_fd(sock, id) { - Ok(bridged_file) => { - *fd = bridged_file.as_raw_fd(); - bridged_files.push(bridged_file); + match get_resource_info(sock, id) { + Ok(info) => { + *fd = info.file.as_raw_fd(); + bridged_files.push(info.file); } Err(ResourceBridgeError::InvalidResource(id)) => { warn!("attempt to send non-existent gpu resource {}", id); diff --git a/disk/src/android_sparse.rs b/disk/src/android_sparse.rs new file mode 100644 index 0000000..07e5714 --- /dev/null +++ b/disk/src/android_sparse.rs @@ -0,0 +1,515 @@ +// Copyright 2019 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// https://android.googlesource.com/platform/system/core/+/7b444f0/libsparse/sparse_format.h + +use std::collections::BTreeMap; +use std::fmt::{self, Display}; +use std::fs::File; +use std::io::{self, ErrorKind, Read, Seek, SeekFrom}; +use std::mem; +use std::os::unix::io::{AsRawFd, RawFd}; + +use crate::DiskGetLen; +use data_model::{DataInit, Le16, Le32, VolatileSlice}; +use remain::sorted; +use sys_util::{ + FileAllocate, FileReadWriteAtVolatile, FileSetLen, FileSync, PunchHole, WriteZeroesAt, +}; + +#[sorted] +#[derive(Debug)] +pub enum Error { + InvalidMagicHeader, + InvalidSpecification(String), + ReadSpecificationError(io::Error), +} + +impl Display for Error { + #[remain::check] + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + use self::Error::*; + + #[sorted] + match self { + InvalidMagicHeader => write!(f, "invalid magic header for android sparse format"), + InvalidSpecification(s) => write!(f, "invalid specification: \"{}\"", s), + ReadSpecificationError(e) => write!(f, "failed to read specification: \"{}\"", e), + } + } +} + +pub type Result<T> = std::result::Result<T, Error>; + +pub const SPARSE_HEADER_MAGIC: u32 = 0xed26ff3a; +const MAJOR_VERSION: u16 = 1; + +#[repr(C)] +#[derive(Clone, Copy, Debug)] +struct SparseHeader { + magic: Le32, /* SPARSE_HEADER_MAGIC */ + major_version: Le16, /* (0x1) - reject images with higher major versions */ + minor_version: Le16, /* (0x0) - allow images with higer minor versions */ + file_hdr_sz: Le16, /* 28 bytes for first revision of the file format */ + chunk_hdr_size: Le16, /* 12 bytes for first revision of the file format */ + blk_sz: Le32, /* block size in bytes, must be a multiple of 4 (4096) */ + total_blks: Le32, /* total blocks in the non-sparse output image */ + total_chunks: Le32, /* total chunks in the sparse input image */ + image_checksum: Le32, /* CRC32 checksum of the original data, counting "don't care" */ + /* as 0. Standard 802.3 polynomial, use a Public Domain */ + /* table implementation */ +} + +unsafe impl DataInit for SparseHeader {} + +const CHUNK_TYPE_RAW: u16 = 0xCAC1; +const CHUNK_TYPE_FILL: u16 = 0xCAC2; +const CHUNK_TYPE_DONT_CARE: u16 = 0xCAC3; +const CHUNK_TYPE_CRC32: u16 = 0xCAC4; + +#[repr(C)] +#[derive(Clone, Copy, Debug)] +struct ChunkHeader { + chunk_type: Le16, /* 0xCAC1 -> raw; 0xCAC2 -> fill; 0xCAC3 -> don't care */ + reserved1: u16, + chunk_sz: Le32, /* in blocks in output image */ + total_sz: Le32, /* in bytes of chunk input file including chunk header and data */ +} + +unsafe impl DataInit for ChunkHeader {} + +#[derive(Clone, Debug, PartialEq, Eq)] +enum Chunk { + Raw(u64), // Offset into the file + Fill(Vec<u8>), + DontCare, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +struct ChunkWithSize { + chunk: Chunk, + expanded_size: u64, +} + +/* Following a Raw or Fill or CRC32 chunk is data. + * For a Raw chunk, it's the data in chunk_sz * blk_sz. + * For a Fill chunk, it's 4 bytes of the fill data. + * For a CRC32 chunk, it's 4 bytes of CRC32 + */ +#[derive(Debug)] +pub struct AndroidSparse { + file: File, + total_size: u64, + chunks: BTreeMap<u64, ChunkWithSize>, +} + +fn parse_chunk<T: Read + Seek>( + mut input: &mut T, + chunk_hdr_size: u64, + blk_sz: u64, +) -> Result<Option<ChunkWithSize>> { + let current_offset = input + .seek(SeekFrom::Current(0)) + .map_err(Error::ReadSpecificationError)?; + let chunk_header = + ChunkHeader::from_reader(&mut input).map_err(Error::ReadSpecificationError)?; + let chunk = match chunk_header.chunk_type.to_native() { + CHUNK_TYPE_RAW => { + input + .seek(SeekFrom::Current( + chunk_header.total_sz.to_native() as i64 - chunk_hdr_size as i64, + )) + .map_err(Error::ReadSpecificationError)?; + Chunk::Raw(current_offset + chunk_hdr_size as u64) + } + CHUNK_TYPE_FILL => { + if chunk_header.total_sz == chunk_hdr_size as u32 { + return Err(Error::InvalidSpecification(format!( + "Fill chunk did not have any data to fill" + ))); + } + let fill_size = chunk_header.total_sz.to_native() as u64 - chunk_hdr_size as u64; + let mut fill_bytes = vec![0u8; fill_size as usize]; + input + .read_exact(&mut fill_bytes) + .map_err(Error::ReadSpecificationError)?; + Chunk::Fill(fill_bytes) + } + CHUNK_TYPE_DONT_CARE => Chunk::DontCare, + CHUNK_TYPE_CRC32 => return Ok(None), // TODO(schuffelen): Validate crc32s in input + unknown_type => { + return Err(Error::InvalidSpecification(format!( + "Chunk had invalid type, was {:x}", + unknown_type + ))) + } + }; + let expanded_size = chunk_header.chunk_sz.to_native() as u64 * blk_sz; + Ok(Some(ChunkWithSize { + chunk, + expanded_size, + })) +} + +impl AndroidSparse { + pub fn from_file(mut file: File) -> Result<AndroidSparse> { + file.seek(SeekFrom::Start(0)) + .map_err(Error::ReadSpecificationError)?; + let sparse_header = + SparseHeader::from_reader(&mut file).map_err(Error::ReadSpecificationError)?; + if sparse_header.magic != SPARSE_HEADER_MAGIC { + return Err(Error::InvalidSpecification(format!( + "Header did not match magic constant. Expected {:x}, was {:x}", + SPARSE_HEADER_MAGIC, + sparse_header.magic.to_native() + ))); + } else if sparse_header.major_version != MAJOR_VERSION { + return Err(Error::InvalidSpecification(format!( + "Header major version did not match. Expected {}, was {}", + MAJOR_VERSION, + sparse_header.major_version.to_native(), + ))); + } else if (sparse_header.chunk_hdr_size.to_native() as usize) + < mem::size_of::<ChunkHeader>() + { + return Err(Error::InvalidSpecification(format!( + "Chunk header size does not fit chunk header struct, expected >={}, was {}", + sparse_header.chunk_hdr_size.to_native(), + mem::size_of::<ChunkHeader>() + ))); + } + let header_size = sparse_header.chunk_hdr_size.to_native() as u64; + let block_size = sparse_header.blk_sz.to_native() as u64; + let chunks = (0..sparse_header.total_chunks.to_native()) + .filter_map(|_| parse_chunk(&mut file, header_size, block_size).transpose()) + .collect::<Result<Vec<ChunkWithSize>>>()?; + let total_size = + sparse_header.total_blks.to_native() as u64 * sparse_header.blk_sz.to_native() as u64; + AndroidSparse::from_parts(file, total_size, chunks) + } + + fn from_parts(file: File, size: u64, chunks: Vec<ChunkWithSize>) -> Result<AndroidSparse> { + let mut chunks_map: BTreeMap<u64, ChunkWithSize> = BTreeMap::new(); + let mut expanded_location: u64 = 0; + for chunk_with_size in chunks { + let size = chunk_with_size.expanded_size; + if chunks_map + .insert(expanded_location, chunk_with_size) + .is_some() + { + return Err(Error::InvalidSpecification(format!( + "Two chunks were at {}", + expanded_location + ))); + } + expanded_location += size; + } + let image = AndroidSparse { + file, + total_size: size, + chunks: chunks_map, + }; + let calculated_len = image.get_len().map_err(Error::ReadSpecificationError)?; + if calculated_len != size { + return Err(Error::InvalidSpecification(format!( + "Header promised size {}, chunks added up to {}", + size, calculated_len + ))); + } + Ok(image) + } +} + +impl DiskGetLen for AndroidSparse { + fn get_len(&self) -> io::Result<u64> { + Ok(self.total_size) + } +} + +impl FileSetLen for AndroidSparse { + fn set_len(&self, _len: u64) -> io::Result<()> { + Err(io::Error::new( + ErrorKind::PermissionDenied, + "unsupported operation", + )) + } +} + +impl FileSync for AndroidSparse { + fn fsync(&mut self) -> io::Result<()> { + Ok(()) + } +} + +impl PunchHole for AndroidSparse { + fn punch_hole(&mut self, _offset: u64, _length: u64) -> io::Result<()> { + Err(io::Error::new( + ErrorKind::PermissionDenied, + "unsupported operation", + )) + } +} + +impl WriteZeroesAt for AndroidSparse { + fn write_zeroes_at(&mut self, _offset: u64, _length: usize) -> io::Result<usize> { + Err(io::Error::new( + ErrorKind::PermissionDenied, + "unsupported operation", + )) + } +} + +impl AsRawFd for AndroidSparse { + fn as_raw_fd(&self) -> RawFd { + self.file.as_raw_fd() + } +} + +impl FileAllocate for AndroidSparse { + fn allocate(&mut self, _offset: u64, _length: u64) -> io::Result<()> { + Err(io::Error::new( + ErrorKind::PermissionDenied, + "unsupported operation", + )) + } +} + +// Performs reads up to the chunk boundary. +impl FileReadWriteAtVolatile for AndroidSparse { + fn read_at_volatile(&mut self, slice: VolatileSlice, offset: u64) -> io::Result<usize> { + let found_chunk = self.chunks.range(..=offset).next_back(); + let ( + chunk_start, + ChunkWithSize { + chunk, + expanded_size, + }, + ) = found_chunk.ok_or(io::Error::new( + ErrorKind::UnexpectedEof, + format!("no chunk for offset {}", offset), + ))?; + let chunk_offset = offset - chunk_start; + let chunk_size = *expanded_size; + let subslice = if chunk_offset + slice.size() > chunk_size { + slice + .sub_slice(0, chunk_size - chunk_offset) + .map_err(|e| io::Error::new(ErrorKind::InvalidData, format!("{:?}", e)))? + } else { + slice + }; + match chunk { + Chunk::DontCare => { + subslice.write_bytes(0); + Ok(subslice.size() as usize) + } + Chunk::Raw(file_offset) => self + .file + .read_at_volatile(subslice, *file_offset + chunk_offset), + Chunk::Fill(fill_bytes) => { + let filled_memory: Vec<u8> = fill_bytes + .iter() + .cloned() + .cycle() + .skip(chunk_offset as usize) + .take(subslice.size() as usize) + .collect(); + subslice.copy_from(&filled_memory); + Ok(subslice.size() as usize) + } + } + } + fn write_at_volatile(&mut self, _slice: VolatileSlice, _offset: u64) -> io::Result<usize> { + Err(io::Error::new( + ErrorKind::PermissionDenied, + "unsupported operation", + )) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use data_model::VolatileMemory; + use std::io::{Cursor, Write}; + use sys_util::SharedMemory; + + const CHUNK_SIZE: usize = mem::size_of::<ChunkHeader>(); + + #[test] + fn parse_raw() { + let chunk_raw = ChunkHeader { + chunk_type: CHUNK_TYPE_RAW.into(), + reserved1: 0, + chunk_sz: 1.into(), + total_sz: (CHUNK_SIZE as u32 + 123).into(), + }; + let header_bytes = chunk_raw.as_slice(); + let mut chunk_bytes: Vec<u8> = Vec::new(); + chunk_bytes.extend_from_slice(header_bytes); + chunk_bytes.extend_from_slice(&[0u8; 123]); + let mut chunk_cursor = Cursor::new(chunk_bytes); + let chunk = parse_chunk(&mut chunk_cursor, CHUNK_SIZE as u64, 123) + .expect("Failed to parse") + .expect("Failed to determine chunk type"); + let expected_chunk = ChunkWithSize { + chunk: Chunk::Raw(CHUNK_SIZE as u64), + expanded_size: 123, + }; + assert_eq!(expected_chunk, chunk); + } + + #[test] + fn parse_dont_care() { + let chunk_raw = ChunkHeader { + chunk_type: CHUNK_TYPE_DONT_CARE.into(), + reserved1: 0, + chunk_sz: 100.into(), + total_sz: (CHUNK_SIZE as u32).into(), + }; + let header_bytes = chunk_raw.as_slice(); + let mut chunk_cursor = Cursor::new(header_bytes); + let chunk = parse_chunk(&mut chunk_cursor, CHUNK_SIZE as u64, 123) + .expect("Failed to parse") + .expect("Failed to determine chunk type"); + let expected_chunk = ChunkWithSize { + chunk: Chunk::DontCare, + expanded_size: 12300, + }; + assert_eq!(expected_chunk, chunk); + } + + #[test] + fn parse_fill() { + let chunk_raw = ChunkHeader { + chunk_type: CHUNK_TYPE_FILL.into(), + reserved1: 0, + chunk_sz: 100.into(), + total_sz: (CHUNK_SIZE as u32 + 4).into(), + }; + let header_bytes = chunk_raw.as_slice(); + let mut chunk_bytes: Vec<u8> = Vec::new(); + chunk_bytes.extend_from_slice(header_bytes); + chunk_bytes.extend_from_slice(&[123u8; 4]); + let mut chunk_cursor = Cursor::new(chunk_bytes); + let chunk = parse_chunk(&mut chunk_cursor, CHUNK_SIZE as u64, 123) + .expect("Failed to parse") + .expect("Failed to determine chunk type"); + let expected_chunk = ChunkWithSize { + chunk: Chunk::Fill(vec![123, 123, 123, 123]), + expanded_size: 12300, + }; + assert_eq!(expected_chunk, chunk); + } + + #[test] + fn parse_crc32() { + let chunk_raw = ChunkHeader { + chunk_type: CHUNK_TYPE_CRC32.into(), + reserved1: 0, + chunk_sz: 0.into(), + total_sz: (CHUNK_SIZE as u32 + 4).into(), + }; + let header_bytes = chunk_raw.as_slice(); + let mut chunk_bytes: Vec<u8> = Vec::new(); + chunk_bytes.extend_from_slice(header_bytes); + chunk_bytes.extend_from_slice(&[123u8; 4]); + let mut chunk_cursor = Cursor::new(chunk_bytes); + let chunk = + parse_chunk(&mut chunk_cursor, CHUNK_SIZE as u64, 123).expect("Failed to parse"); + assert_eq!(None, chunk); + } + + fn test_image(chunks: Vec<ChunkWithSize>) -> AndroidSparse { + let file: File = SharedMemory::anon().unwrap().into(); + let size = chunks.iter().map(|x| x.expanded_size).sum(); + AndroidSparse::from_parts(file, size, chunks).expect("Could not create image") + } + + #[test] + fn read_dontcare() { + let chunks = vec![ChunkWithSize { + chunk: Chunk::DontCare, + expanded_size: 100, + }]; + let mut image = test_image(chunks); + let mut input_memory = [55u8; 100]; + let input_volatile_memory = &mut input_memory[..]; + image + .read_exact_at_volatile(input_volatile_memory.get_slice(0, 100).unwrap(), 0) + .expect("Could not read"); + let input_vec: Vec<u8> = input_memory.into_iter().cloned().collect(); + assert_eq!(input_vec, vec![0u8; 100]); + } + + #[test] + fn read_fill_simple() { + let chunks = vec![ChunkWithSize { + chunk: Chunk::Fill(vec![10, 20]), + expanded_size: 8, + }]; + let mut image = test_image(chunks); + let mut input_memory = [55u8; 8]; + let input_volatile_memory = &mut input_memory[..]; + image + .read_exact_at_volatile(input_volatile_memory.get_slice(0, 8).unwrap(), 0) + .expect("Could not read"); + let input_vec: Vec<u8> = input_memory.into_iter().cloned().collect(); + assert_eq!(input_vec, vec![10, 20, 10, 20, 10, 20, 10, 20]); + } + + #[test] + fn read_fill_edges() { + let chunks = vec![ChunkWithSize { + chunk: Chunk::Fill(vec![10, 20, 30]), + expanded_size: 8, + }]; + let mut image = test_image(chunks); + let mut input_memory = [55u8; 6]; + let input_volatile_memory = &mut input_memory[..]; + image + .read_exact_at_volatile(input_volatile_memory.get_slice(0, 6).unwrap(), 1) + .expect("Could not read"); + let input_vec: Vec<u8> = input_memory.into_iter().cloned().collect(); + assert_eq!(input_vec, vec![20, 30, 10, 20, 30, 10]); + } + + #[test] + fn read_raw() { + let chunks = vec![ChunkWithSize { + chunk: Chunk::Raw(0), + expanded_size: 100, + }]; + let mut image = test_image(chunks); + write!(image.file, "hello").expect("Failed to write into internal file"); + let mut input_memory = [55u8; 5]; + let input_volatile_memory = &mut input_memory[..]; + image + .read_exact_at_volatile(input_volatile_memory.get_slice(0, 5).unwrap(), 0) + .expect("Could not read"); + let input_vec: Vec<u8> = input_memory.into_iter().cloned().collect(); + assert_eq!(input_vec, vec![104, 101, 108, 108, 111]); + } + + #[test] + fn read_two_fills() { + let chunks = vec![ + ChunkWithSize { + chunk: Chunk::Fill(vec![10, 20]), + expanded_size: 4, + }, + ChunkWithSize { + chunk: Chunk::Fill(vec![30, 40]), + expanded_size: 4, + }, + ]; + let mut image = test_image(chunks); + let mut input_memory = [55u8; 8]; + let input_volatile_memory = &mut input_memory[..]; + image + .read_exact_at_volatile(input_volatile_memory.get_slice(0, 8).unwrap(), 0) + .expect("Could not read"); + let input_vec: Vec<u8> = input_memory.into_iter().cloned().collect(); + assert_eq!(input_vec, vec![10, 20, 10, 20, 30, 40, 30, 40]); + } +} diff --git a/disk/src/composite.rs b/disk/src/composite.rs index cd048c1..e95c8e9 100644 --- a/disk/src/composite.rs +++ b/disk/src/composite.rs @@ -24,7 +24,7 @@ pub enum Error { InvalidMagicHeader, InvalidProto(protobuf::ProtobufError), InvalidSpecification(String), - OpenFile(io::Error), + OpenFile(io::Error, String), ReadSpecificationError(io::Error), UnknownVersion(u64), UnsupportedComponent(ImageType), @@ -41,7 +41,7 @@ impl Display for Error { InvalidMagicHeader => write!(f, "invalid magic header for composite disk format"), InvalidProto(e) => write!(f, "failed to parse specification proto: \"{}\"", e), InvalidSpecification(s) => write!(f, "invalid specification: \"{}\"", s), - OpenFile(e) => write!(f, "failed to open component file: \"{}\"", e), + OpenFile(e, p) => write!(f, "failed to open component file \"{}\": \"{}\"", p, e), ReadSpecificationError(e) => write!(f, "failed to read specification: \"{}\"", e), UnknownVersion(v) => write!(f, "unknown version {} in specification", v), UnsupportedComponent(c) => write!(f, "unsupported component disk type \"{:?}\"", c), @@ -142,7 +142,7 @@ impl CompositeDiskFile { ); let file = open_options .open(disk.get_file_path()) - .map_err(Error::OpenFile)?; + .map_err(|e| Error::OpenFile(e, disk.get_file_path().to_string()))?; Ok(ComponentDiskPart { file: create_disk_file(file).map_err(|e| Error::DiskError(Box::new(e)))?, offset: disk.get_offset(), diff --git a/disk/src/disk.rs b/disk/src/disk.rs index 2f9ad72..e00e843 100644 --- a/disk/src/disk.rs +++ b/disk/src/disk.rs @@ -22,11 +22,15 @@ mod composite; #[cfg(feature = "composite-disk")] use composite::{CompositeDiskFile, CDISK_MAGIC, CDISK_MAGIC_LEN}; +mod android_sparse; +use android_sparse::{AndroidSparse, SPARSE_HEADER_MAGIC}; + #[sorted] #[derive(Debug)] pub enum Error { BlockDeviceNew(sys_util::Error), ConversionNotSupported, + CreateAndroidSparseDisk(android_sparse::Error), #[cfg(feature = "composite-disk")] CreateCompositeDisk(composite::Error), QcowError(qcow::Error), @@ -95,6 +99,7 @@ impl Display for Error { match self { BlockDeviceNew(e) => write!(f, "failed to create block device: {}", e), ConversionNotSupported => write!(f, "requested file conversion not supported"), + CreateAndroidSparseDisk(e) => write!(f, "failure in android sparse disk: {}", e), #[cfg(feature = "composite-disk")] CreateCompositeDisk(e) => write!(f, "failure in composite disk: {}", e), QcowError(e) => write!(f, "failure in qcow: {}", e), @@ -114,6 +119,7 @@ pub enum ImageType { Raw, Qcow2, CompositeDisk, + AndroidSparse, } fn convert_copy<R, W>(reader: &mut R, writer: &mut W, offset: u64, size: u64) -> Result<()> @@ -248,6 +254,8 @@ pub fn detect_image_type(file: &File) -> Result<ImageType> { } let image_type = if magic == QCOW_MAGIC { ImageType::Qcow2 + } else if magic == SPARSE_HEADER_MAGIC.to_be() { + ImageType::AndroidSparse } else { ImageType::Raw }; @@ -272,5 +280,9 @@ pub fn create_disk_file(raw_image: File) -> Result<Box<dyn DiskFile>> { } #[cfg(not(feature = "composite-disk"))] ImageType::CompositeDisk => return Err(Error::UnknownType), + ImageType::AndroidSparse => { + Box::new(AndroidSparse::from_file(raw_image).map_err(Error::CreateAndroidSparseDisk)?) + as Box<dyn DiskFile> + } }) } diff --git a/disk/src/qcow/mod.rs b/disk/src/qcow/mod.rs index add4f48..c5e119d 100644 --- a/disk/src/qcow/mod.rs +++ b/disk/src/qcow/mod.rs @@ -10,31 +10,35 @@ use data_model::{VolatileMemory, VolatileSlice}; use libc::{EINVAL, ENOSPC, ENOTSUP}; use remain::sorted; use sys_util::{ - error, FileAllocate, FileReadWriteAtVolatile, FileReadWriteVolatile, FileSetLen, FileSync, - PunchHole, SeekHole, WriteZeroesAt, + error, AsRawFds, FileAllocate, FileReadWriteAtVolatile, FileReadWriteVolatile, FileSetLen, + FileSync, PunchHole, SeekHole, WriteZeroesAt, }; use std::cmp::{max, min}; use std::fmt::{self, Display}; -use std::fs::File; +use std::fs::{File, OpenOptions}; use std::io::{self, Read, Seek, SeekFrom, Write}; use std::mem::size_of; use std::os::unix::io::{AsRawFd, RawFd}; +use std::str; use crate::qcow::qcow_raw_file::QcowRawFile; use crate::qcow::refcount::RefCount; use crate::qcow::vec_cache::{CacheMap, Cacheable, VecCache}; -use crate::{DiskFile, DiskGetLen}; +use crate::{create_disk_file, DiskFile, DiskGetLen}; #[sorted] #[derive(Debug)] pub enum Error { - BackingFilesNotSupported, + BackingFileIo(io::Error), + BackingFileOpen(Box<crate::Error>), + BackingFileTooLong(usize), CompressedBlocksNotSupported, EvictingCache(io::Error), FileTooBig(u64), GettingFileSize(io::Error), GettingRefcount(refcount::Error), + InvalidBackingFileName(str::Utf8Error), InvalidClusterIndex, InvalidClusterSize, InvalidIndex, @@ -74,7 +78,11 @@ impl Display for Error { #[sorted] match self { - BackingFilesNotSupported => write!(f, "backing files not supported"), + BackingFileIo(e) => write!(f, "backing file io error: {}", e), + BackingFileOpen(e) => write!(f, "backing file open error: {}", *e), + BackingFileTooLong(len) => { + write!(f, "backing file name is too long: {} bytes over", len) + } CompressedBlocksNotSupported => write!(f, "compressed blocks not supported"), EvictingCache(e) => write!(f, "failed to evict cache: {}", e), FileTooBig(size) => write!( @@ -84,6 +92,7 @@ impl Display for Error { ), GettingFileSize(e) => write!(f, "failed to get file size: {}", e), GettingRefcount(e) => write!(f, "failed to get refcount: {}", e), + InvalidBackingFileName(e) => write!(f, "failed to parse filename: {}", e), InvalidClusterIndex => write!(f, "invalid cluster index"), InvalidClusterSize => write!(f, "invalid cluster size"), InvalidIndex => write!(f, "invalid index"), @@ -144,8 +153,14 @@ const COMPRESSED_FLAG: u64 = 1 << 62; const CLUSTER_USED_FLAG: u64 = 1 << 63; const COMPATIBLE_FEATURES_LAZY_REFCOUNTS: u64 = 1 << 0; +// The format supports a "header extension area", that crosvm does not use. +const QCOW_EMPTY_HEADER_EXTENSION_SIZE: u32 = 8; + +// Defined by the specification +const MAX_BACKING_FILE_SIZE: u32 = 1023; + /// Contains the information from the header of a qcow file. -#[derive(Copy, Clone, Debug)] +#[derive(Clone, Debug)] pub struct QcowHeader { pub magic: u32, pub version: u32, @@ -172,6 +187,9 @@ pub struct QcowHeader { pub autoclear_features: u64, pub refcount_order: u32, pub header_size: u32, + + // Post-header entries + pub backing_file_path: Option<String>, } // Reads the next u16 from the file. @@ -211,7 +229,7 @@ impl QcowHeader { return Err(Error::InvalidMagic); } - Ok(QcowHeader { + let mut header = QcowHeader { magic, version: read_u32_from_file(f)?, backing_file_offset: read_u64_from_file(f)?, @@ -230,24 +248,50 @@ impl QcowHeader { autoclear_features: read_u64_from_file(f)?, refcount_order: read_u32_from_file(f)?, header_size: read_u32_from_file(f)?, - }) + backing_file_path: None, + }; + if header.backing_file_size > MAX_BACKING_FILE_SIZE { + return Err(Error::BackingFileTooLong(header.backing_file_size as usize)); + } + if header.backing_file_offset != 0 { + f.seek(SeekFrom::Start(header.backing_file_offset)) + .map_err(Error::ReadingHeader)?; + let mut backing_file_name_bytes = vec![0u8; header.backing_file_size as usize]; + f.read_exact(&mut backing_file_name_bytes) + .map_err(Error::ReadingHeader)?; + header.backing_file_path = Some( + String::from_utf8(backing_file_name_bytes) + .map_err(|err| Error::InvalidBackingFileName(err.utf8_error()))?, + ); + } + Ok(header) } - /// Create a header for the given `size`. - pub fn create_for_size(size: u64) -> QcowHeader { + pub fn create_for_size_and_path(size: u64, backing_file: Option<&str>) -> Result<QcowHeader> { let cluster_bits: u32 = DEFAULT_CLUSTER_BITS; let cluster_size: u32 = 0x01 << cluster_bits; + let max_length: usize = + (cluster_size - V3_BARE_HEADER_SIZE - QCOW_EMPTY_HEADER_EXTENSION_SIZE) as usize; + if let Some(path) = backing_file { + if path.len() > max_length { + return Err(Error::BackingFileTooLong(path.len() - max_length)); + } + } // L2 blocks are always one cluster long. They contain cluster_size/sizeof(u64) addresses. let l2_size: u32 = cluster_size / size_of::<u64>() as u32; let num_clusters: u32 = div_round_up_u64(size, u64::from(cluster_size)) as u32; let num_l2_clusters: u32 = div_round_up_u32(num_clusters, l2_size); let l1_clusters: u32 = div_round_up_u32(num_l2_clusters, cluster_size); let header_clusters = div_round_up_u32(size_of::<QcowHeader>() as u32, cluster_size); - QcowHeader { + Ok(QcowHeader { magic: QCOW_MAGIC, version: 3, - backing_file_offset: 0, - backing_file_size: 0, + backing_file_offset: (if backing_file.is_none() { + 0 + } else { + V3_BARE_HEADER_SIZE + QCOW_EMPTY_HEADER_EXTENSION_SIZE + }) as u64, + backing_file_size: backing_file.map_or(0, |x| x.len()) as u32, cluster_bits: DEFAULT_CLUSTER_BITS, size, crypt_method: 0, @@ -277,7 +321,8 @@ impl QcowHeader { autoclear_features: 0, refcount_order: DEFAULT_REFCOUNT_ORDER, header_size: V3_BARE_HEADER_SIZE, - } + backing_file_path: backing_file.map(|x| String::from(x)), + }) } /// Write the header to `file`. @@ -312,6 +357,11 @@ impl QcowHeader { write_u64_to_file(file, self.autoclear_features)?; write_u32_to_file(file, self.refcount_order)?; write_u32_to_file(file, self.header_size)?; + write_u32_to_file(file, 0)?; // header extension type: end of header extension area + write_u32_to_file(file, 0)?; // length of header extension data: 0 + if let Some(backing_file_path) = self.backing_file_path.as_ref() { + write!(file, "{}", backing_file_path).map_err(Error::WritingHeader)?; + } // Set the file length by seeking and writing a zero to the last byte. This avoids needing // a `File` instead of anything that implements seek as the `file` argument. @@ -365,7 +415,7 @@ pub struct QcowFile { // List of unreferenced clusters available to be used. unref clusters become available once the // removal of references to them have been synced to disk. avail_clusters: Vec<u64>, - //TODO(dgreid) Add support for backing files. - backing_file: Option<Box<QcowFile<T>>>, + backing_file: Option<Box<dyn DiskFile>>, } impl QcowFile { @@ -394,10 +444,18 @@ impl QcowFile { return Err(Error::FileTooBig(header.size)); } - // No current support for backing files. - if header.backing_file_offset != 0 { - return Err(Error::BackingFilesNotSupported); - } + let backing_file = if let Some(backing_file_path) = header.backing_file_path.as_ref() { + let path = backing_file_path.clone(); + let backing_raw_file = OpenOptions::new() + .read(true) + .open(path) + .map_err(Error::BackingFileIo)?; + let backing_file = create_disk_file(backing_raw_file) + .map_err(|e| Error::BackingFileOpen(Box::new(e)))?; + Some(backing_file) + } else { + None + }; // Only support two byte refcounts. let refcount_bits: u64 = 0x01u64 @@ -412,7 +470,6 @@ impl QcowFile { if header.refcount_table_clusters == 0 { return Err(Error::NoRefcountClusters); } - offset_is_cluster_boundary(header.backing_file_offset, header.cluster_bits)?; offset_is_cluster_boundary(header.l1_table_offset, header.cluster_bits)?; offset_is_cluster_boundary(header.snapshots_offset, header.cluster_bits)?; // refcount table must be a cluster boundary, and within the file's virtual or actual size. @@ -444,7 +501,7 @@ impl QcowFile { let mut raw_file = QcowRawFile::from(file, cluster_size).ok_or(Error::InvalidClusterSize)?; if refcount_rebuild_required { - QcowFile::rebuild_refcounts(&mut raw_file, header)?; + QcowFile::rebuild_refcounts(&mut raw_file, header.clone())?; } let l2_size = cluster_size / size_of::<u64>() as u64; @@ -500,6 +557,7 @@ impl QcowFile { current_offset: 0, unref_clusters: Vec::new(), avail_clusters: Vec::new(), + backing_file, }; // Check that the L1 and refcount tables fit in a 64bit address space. @@ -518,8 +576,27 @@ impl QcowFile { } /// Creates a new QcowFile at the given path. - pub fn new(mut file: File, virtual_size: u64) -> Result<QcowFile> { - let header = QcowHeader::create_for_size(virtual_size); + pub fn new(file: File, virtual_size: u64) -> Result<QcowFile> { + let header = QcowHeader::create_for_size_and_path(virtual_size, None)?; + QcowFile::new_from_header(file, header) + } + + /// Creates a new QcowFile at the given path. + pub fn new_from_backing(file: File, backing_file_name: &str) -> Result<QcowFile> { + let backing_raw_file = OpenOptions::new() + .read(true) + .open(backing_file_name) + .map_err(Error::BackingFileIo)?; + let backing_file = + create_disk_file(backing_raw_file).map_err(|e| Error::BackingFileOpen(Box::new(e)))?; + let size = backing_file.get_len().map_err(Error::BackingFileIo)?; + let header = QcowHeader::create_for_size_and_path(size, Some(backing_file_name))?; + let mut result = QcowFile::new_from_header(file, header)?; + result.backing_file = Some(backing_file); + Ok(result) + } + + fn new_from_header(mut file: File, header: QcowHeader) -> Result<QcowFile> { file.seek(SeekFrom::Start(0)).map_err(Error::SeekingFile)?; header.write_to(&mut file)?; @@ -543,6 +620,10 @@ impl QcowFile { Ok(qcow) } + pub fn set_backing_file(&mut self, backing: Option<Box<dyn DiskFile>>) { + self.backing_file = backing; + } + /// Returns the `QcowHeader` for this file. pub fn header(&self) -> &QcowHeader { &self.header @@ -862,9 +943,9 @@ impl QcowFile { // Find all references clusters and rebuild refcounts. set_header_refcount(&mut refcounts, cluster_size)?; - set_l1_refcounts(&mut refcounts, header, cluster_size)?; - set_data_refcounts(&mut refcounts, header, cluster_size, raw_file)?; - set_refcount_table_refcounts(&mut refcounts, header, cluster_size)?; + set_l1_refcounts(&mut refcounts, header.clone(), cluster_size)?; + set_data_refcounts(&mut refcounts, header.clone(), cluster_size, raw_file)?; + set_refcount_table_refcounts(&mut refcounts, header.clone(), cluster_size)?; // Allocate clusters to store the new reference count blocks. let ref_table = alloc_refblocks( @@ -984,7 +1065,7 @@ impl QcowFile { let l2_table = if l2_addr_disk == 0 { // Allocate a new cluster to store the L2 table and update the L1 table to point // to the new table. - let new_addr: u64 = self.get_new_cluster()?; + let new_addr: u64 = self.get_new_cluster(None)?; // The cluster refcount starts at one meaning it is used but doesn't need COW. set_refcounts.push((new_addr, 1)); self.l1_table[l1_index] = new_addr; @@ -1005,8 +1086,19 @@ impl QcowFile { let cluster_addr = match self.l2_cache.get(&l1_index).unwrap()[l2_index] { 0 => { + let initial_data = if let Some(backing) = self.backing_file.as_mut() { + let cluster_size = self.raw_file.cluster_size(); + let cluster_begin = address - (address % cluster_size); + let mut cluster_data = vec![0u8; cluster_size as usize]; + let raw_slice = cluster_data.as_mut_slice(); + let volatile_slice = raw_slice.get_slice(0, cluster_size).unwrap(); + backing.read_exact_at_volatile(volatile_slice, cluster_begin)?; + Some(cluster_data) + } else { + None + }; // Need to allocate a data cluster - let cluster_addr = self.append_data_cluster()?; + let cluster_addr = self.append_data_cluster(initial_data)?; self.update_cluster_addr(l1_index, l2_index, cluster_addr, &mut set_refcounts)?; cluster_addr } @@ -1043,7 +1135,7 @@ impl QcowFile { // Allocate a new cluster to store the L2 table and update the L1 table to point // to the new table. The cluster will be written when the cache is flushed, no // need to copy the data now. - let new_addr: u64 = self.get_new_cluster()?; + let new_addr: u64 = self.get_new_cluster(None)?; // The cluster refcount starts at one indicating it is used but doesn't need // COW. set_refcounts.push((new_addr, 1)); @@ -1055,15 +1147,22 @@ impl QcowFile { } // Allocate a new cluster and return its offset within the raw file. - fn get_new_cluster(&mut self) -> std::io::Result<u64> { + fn get_new_cluster(&mut self, initial_data: Option<Vec<u8>>) -> std::io::Result<u64> { // First use a pre allocated cluster if one is available. if let Some(free_cluster) = self.avail_clusters.pop() { - self.raw_file.zero_cluster(free_cluster)?; + if let Some(initial_data) = initial_data { + self.raw_file.write_cluster(free_cluster, initial_data)?; + } else { + self.raw_file.zero_cluster(free_cluster)?; + } return Ok(free_cluster); } let max_valid_cluster_offset = self.refcounts.max_valid_cluster_offset(); if let Some(new_cluster) = self.raw_file.add_cluster_end(max_valid_cluster_offset)? { + if let Some(initial_data) = initial_data { + self.raw_file.write_cluster(new_cluster, initial_data)?; + } Ok(new_cluster) } else { error!("No free clusters in get_new_cluster()"); @@ -1073,8 +1172,8 @@ impl QcowFile { // Allocate and initialize a new data cluster. Returns the offset of the // cluster in to the file on success. - fn append_data_cluster(&mut self) -> std::io::Result<u64> { - let new_addr: u64 = self.get_new_cluster()?; + fn append_data_cluster(&mut self, initial_data: Option<Vec<u8>>) -> std::io::Result<u64> { + let new_addr: u64 = self.get_new_cluster(initial_data)?; // The cluster refcount starts at one indicating it is used but doesn't need COW. let mut newly_unref = self.set_cluster_refcount(new_addr, 1)?; self.unref_clusters.append(&mut newly_unref); @@ -1305,7 +1404,7 @@ impl QcowFile { } Err(refcount::Error::NeedNewCluster) => { // Allocate the cluster and call set_cluster_refcount again. - let addr = self.get_new_cluster()?; + let addr = self.get_new_cluster(None)?; added_clusters.push(addr); new_cluster = Some(( addr, @@ -1382,6 +1481,8 @@ impl QcowFile { if let Some(offset) = file_offset { cb(Some(self.raw_file.file_mut()), nread, offset, count)?; + } else if let Some(backing) = self.backing_file.as_mut() { + cb(Some(backing.as_mut()), nread, curr_addr, count)?; } else { cb(None, nread, 0, count)?; } @@ -1424,9 +1525,13 @@ impl Drop for QcowFile { } } -impl AsRawFd for QcowFile { - fn as_raw_fd(&self) -> RawFd { - self.raw_file.file().as_raw_fd() +impl AsRawFds for QcowFile { + fn as_raw_fds(&self) -> Vec<RawFd> { + let mut fds = vec![self.raw_file.file().as_raw_fd()]; + if let Some(backing) = &self.backing_file { + fds.append(&mut backing.as_raw_fds()); + } + fds } } @@ -1714,17 +1819,20 @@ mod tests { ] } - fn with_basic_file<F>(header: &[u8], mut testfn: F) - where - F: FnMut(File), - { + fn basic_file(header: &[u8]) -> File { let shm = SharedMemory::anon().unwrap(); let mut disk_file: File = shm.into(); disk_file.write_all(&header).unwrap(); disk_file.set_len(0x1_0000_0000).unwrap(); disk_file.seek(SeekFrom::Start(0)).unwrap(); + disk_file + } - testfn(disk_file); // File closed when the function exits. + fn with_basic_file<F>(header: &[u8], mut testfn: F) + where + F: FnMut(File), + { + testfn(basic_file(header)); // File closed when the function exits. } fn with_default_file<F>(file_size: u64, mut testfn: F) @@ -1739,10 +1847,11 @@ mod tests { #[test] fn default_header() { - let header = QcowHeader::create_for_size(0x10_0000); + let header = QcowHeader::create_for_size_and_path(0x10_0000, None); let shm = SharedMemory::anon().unwrap(); let mut disk_file: File = shm.into(); header + .expect("Failed to create header.") .write_to(&mut disk_file) .expect("Failed to write header to shm."); disk_file.seek(SeekFrom::Start(0)).unwrap(); @@ -1757,6 +1866,24 @@ mod tests { } #[test] + fn header_with_backing() { + let header = QcowHeader::create_for_size_and_path(0x10_0000, Some("/my/path/to/a/file")) + .expect("Failed to create header."); + let shm = SharedMemory::anon().unwrap(); + let mut disk_file: File = shm.into(); + header + .write_to(&mut disk_file) + .expect("Failed to write header to shm."); + disk_file.seek(SeekFrom::Start(0)).unwrap(); + let read_header = QcowHeader::new(&mut disk_file).expect("Failed to create header."); + assert_eq!( + header.backing_file_path, + Some(String::from("/my/path/to/a/file")) + ); + assert_eq!(read_header.backing_file_path, header.backing_file_path); + } + + #[test] fn invalid_magic() { let invalid_header = vec![0x51u8, 0x46, 0x4a, 0xfb]; with_basic_file(&invalid_header, |mut disk_file: File| { @@ -1871,6 +1998,42 @@ mod tests { } #[test] + fn write_read_start_backing() { + let disk_file = basic_file(&valid_header()); + let mut backing = QcowFile::from(disk_file).unwrap(); + backing + .write(b"test first bytes") + .expect("Failed to write test string."); + let mut buf = [0u8; 4]; + let wrapping_disk_file = basic_file(&valid_header()); + let mut wrapping = QcowFile::from(wrapping_disk_file).unwrap(); + wrapping.set_backing_file(Some(Box::new(backing))); + wrapping.seek(SeekFrom::Start(0)).expect("Failed to seek."); + wrapping.read(&mut buf).expect("Failed to read."); + assert_eq!(&buf, b"test"); + } + + #[test] + fn write_read_start_backing_overlap() { + let disk_file = basic_file(&valid_header()); + let mut backing = QcowFile::from(disk_file).unwrap(); + backing + .write(b"test first bytes") + .expect("Failed to write test string."); + let wrapping_disk_file = basic_file(&valid_header()); + let mut wrapping = QcowFile::from(wrapping_disk_file).unwrap(); + wrapping.set_backing_file(Some(Box::new(backing))); + wrapping.seek(SeekFrom::Start(0)).expect("Failed to seek."); + wrapping + .write(b"TEST") + .expect("Failed to write second test string."); + let mut buf = [0u8; 10]; + wrapping.seek(SeekFrom::Start(0)).expect("Failed to seek."); + wrapping.read(&mut buf).expect("Failed to read."); + assert_eq!(&buf, b"TEST first"); + } + + #[test] fn offset_write_read() { with_basic_file(&valid_header(), |disk_file: File| { let mut q = QcowFile::from(disk_file).unwrap(); diff --git a/disk/src/qcow/qcow_raw_file.rs b/disk/src/qcow/qcow_raw_file.rs index ede28d8..09d2176 100644 --- a/disk/src/qcow/qcow_raw_file.rs +++ b/disk/src/qcow/qcow_raw_file.rs @@ -6,7 +6,8 @@ use std::fs::File; use std::io::{self, BufWriter, Read, Seek, SeekFrom, Write}; use std::mem::size_of; -use sys_util::WriteZeroes; +use data_model::VolatileMemory; +use sys_util::{FileReadWriteAtVolatile, WriteZeroes}; /// A qcow file. Allows reading/writing clusters and appending clusters. #[derive(Debug)] @@ -145,4 +146,13 @@ impl QcowRawFile { self.file.write_zeroes_all(cluster_size)?; Ok(()) } + + /// Writes + pub fn write_cluster(&mut self, address: u64, mut initial_data: Vec<u8>) -> io::Result<()> { + let raw_slice = initial_data.as_mut_slice(); + let volatile_slice = raw_slice + .get_slice(0, self.cluster_size) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("{:?}", e)))?; + self.file.write_all_at_volatile(volatile_slice, address) + } } diff --git a/docker/Dockerfile b/docker/Dockerfile index 9a4f7ce..fecc31d 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -36,7 +36,7 @@ RUN apt-get update && apt-get install -y \ ENV RUSTUP_HOME=/usr/local/rustup \ CARGO_HOME=/usr/local/cargo \ PATH=/usr/local/cargo/bin:$PATH \ - RUST_VERSION=1.38.0 \ + RUST_VERSION=1.41.0 \ RUSTFLAGS='--cfg hermetic' # Debian usually has an old rust version in the repository. Instead of using that, we use rustup to @@ -59,12 +59,6 @@ RUN cargo install thisiznotarealpackage -q || true # Used /scratch for building dependencies which are too new or don't exist on Debian stretch. WORKDIR /scratch -# minijail does not exist in upstream linux distros. -RUN git clone https://android.googlesource.com/platform/external/minijail \ - && cd minijail \ - && make -j$(nproc) \ - && cp libminijail.so /usr/lib/x86_64-linux-gnu/ - # New libepoxy and libdrm-dev requires newer meson than is in Debian stretch. ARG MESON_COMMIT=master RUN git clone https://github.com/mesonbuild/meson \ @@ -147,6 +141,14 @@ ENV THIRD_PARTY_ROOT=$CROS_ROOT/third_party RUN mkdir -p $THIRD_PARTY_ROOT ENV PLATFORM_ROOT=$CROS_ROOT/platform RUN mkdir -p $PLATFORM_ROOT +ENV AOSP_EXTERNAL_ROOT=$CROS_ROOT/aosp/external +RUN mkdir -p $AOSP_EXTERNAL_ROOT + +# minijail does not exist in upstream linux distros. +RUN git clone https://android.googlesource.com/platform/external/minijail $AOSP_EXTERNAL_ROOT/minijail \ + && cd $AOSP_EXTERNAL_ROOT/minijail \ + && make -j$(nproc) \ + && cp libminijail.so /usr/lib/x86_64-linux-gnu/ # Pull the cras library for audio access. ARG ADHD_COMMIT=master diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 1616921..93c2e03 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -42,3 +42,6 @@ path = "virtqueue_fuzzer.rs" [[bin]] name = "crosvm_zimage_fuzzer" path = "zimage_fuzzer.rs" + +[patch.crates-io] +sys_util = { path = "../sys_util" } diff --git a/gpu_display/src/event_device.rs b/gpu_display/src/event_device.rs index 5f1bbc7..5aae55c 100644 --- a/gpu_display/src/event_device.rs +++ b/gpu_display/src/event_device.rs @@ -3,14 +3,14 @@ // found in the LICENSE file. use data_model::DataInit; -use linux_input_sys::input_event; +use linux_input_sys::{virtio_input_event, InputEventDecoder}; use std::collections::VecDeque; use std::io::{self, Error, ErrorKind, Read, Write}; use std::iter::ExactSizeIterator; use std::os::unix::io::{AsRawFd, RawFd}; use std::os::unix::net::UnixStream; -const EVENT_SIZE: usize = input_event::EVENT_SIZE; +const EVENT_SIZE: usize = virtio_input_event::SIZE; const EVENT_BUFFER_LEN_MAX: usize = 16 * EVENT_SIZE; // /// Half-way build `EventDevice` with only the `event_socket` defined. Finish building the @@ -93,7 +93,7 @@ impl EventDevice { self.event_buffer.is_empty() } - pub fn send_report<E: IntoIterator<Item = input_event>>( + pub fn send_report<E: IntoIterator<Item = virtio_input_event>>( &mut self, events: E, ) -> io::Result<bool> @@ -111,14 +111,14 @@ impl EventDevice { } self.event_buffer - .extend(input_event::syn().as_slice().iter()); + .extend(virtio_input_event::syn().as_slice().iter()); self.flush_buffered_events() } /// Sends the given `event`, returning `Ok(true)` if, after this function returns, there are no /// buffered events remaining. - pub fn send_event_encoded(&mut self, event: input_event) -> io::Result<bool> { + pub fn send_event_encoded(&mut self, event: virtio_input_event) -> io::Result<bool> { if !self.flush_buffered_events()? { return Ok(false); } @@ -137,14 +137,14 @@ impl EventDevice { Ok(false) } - pub fn recv_event_encoded(&self) -> io::Result<input_event> { + pub fn recv_event_encoded(&self) -> io::Result<virtio_input_event> { let mut event_bytes = [0u8; 24]; (&self.event_socket).read_exact(&mut event_bytes)?; - match input_event::from_slice(&event_bytes) { + match virtio_input_event::from_slice(&event_bytes) { Some(event) => Ok(*event), None => Err(Error::new( ErrorKind::InvalidInput, - "failed to read input_event", + "failed to read virtio_input_event", )), } } diff --git a/gpu_display/src/gpu_display_x.rs b/gpu_display/src/gpu_display_x.rs index c0074ca..2940f40 100644 --- a/gpu_display/src/gpu_display_x.rs +++ b/gpu_display/src/gpu_display_x.rs @@ -11,7 +11,7 @@ )] mod xlib; -use linux_input_sys::input_event; +use linux_input_sys::virtio_input_event; use std::cmp::max; use std::collections::BTreeMap; use std::ffi::{c_void, CStr, CString}; @@ -333,7 +333,11 @@ impl Surface { } } - fn dispatch_to_event_devices(&mut self, events: &[input_event], device_type: EventDeviceKind) { + fn dispatch_to_event_devices( + &mut self, + events: &[virtio_input_event], + device_type: EventDeviceKind, + ) { for event_device in self.event_devices.values_mut() { if event_device.kind() != device_type { continue; @@ -348,7 +352,7 @@ impl Surface { match ev.as_enum(self.buffer_completion_type) { XEventEnum::KeyEvent(key) => { if let Some(linux_keycode) = self.keycode_translator.translate(key.keycode) { - let events = &[input_event::key( + let events = &[virtio_input_event::key( linux_keycode, key.type_ == xlib::KeyPress as i32, )]; @@ -363,9 +367,9 @@ impl Surface { if button_event.button & xlib::Button1 != 0 { // The touch event *must* be first per the Linux input subsystem's guidance. let events = &[ - input_event::touch(pressed), - input_event::absolute_x(max(0, button_event.x) as u32), - input_event::absolute_y(max(0, button_event.y) as u32), + virtio_input_event::touch(pressed), + virtio_input_event::absolute_x(max(0, button_event.x) as u32), + virtio_input_event::absolute_y(max(0, button_event.y) as u32), ]; self.dispatch_to_event_devices(events, EventDeviceKind::Touchscreen); } @@ -373,9 +377,9 @@ impl Surface { XEventEnum::Motion(motion) => { if motion.state & xlib::Button1Mask != 0 { let events = &[ - input_event::touch(true), - input_event::absolute_x(max(0, motion.x) as u32), - input_event::absolute_y(max(0, motion.y) as u32), + virtio_input_event::touch(true), + virtio_input_event::absolute_x(max(0, motion.x) as u32), + virtio_input_event::absolute_y(max(0, motion.y) as u32), ]; self.dispatch_to_event_devices(events, EventDeviceKind::Touchscreen); } diff --git a/io_jail/Cargo.toml b/io_jail/Cargo.toml index 31f8e6f..dcd5d4e 100644 --- a/io_jail/Cargo.toml +++ b/io_jail/Cargo.toml @@ -6,4 +6,4 @@ edition = "2018" [dependencies] libc = "*" -net_sys = { path = "../net_sys" } +minijail-sys = "*" diff --git a/io_jail/src/lib.rs b/io_jail/src/lib.rs index 16212c6..a927cdb 100644 --- a/io_jail/src/lib.rs +++ b/io_jail/src/lib.rs @@ -2,19 +2,13 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#[allow(dead_code)] -#[allow(non_camel_case_types)] -#[allow(non_snake_case)] -#[allow(non_upper_case_globals)] -mod libminijail; - use libc::pid_t; -use net_sys::{sock_filter, sock_fprog}; +use minijail_sys::*; use std::ffi::CString; use std::fmt::{self, Display}; use std::fs; use std::io; -use std::os::raw::{c_ulong, c_ushort}; +use std::os::raw::{c_char, c_ulong, c_ushort}; use std::os::unix::io::{AsRawFd, RawFd}; use std::path::{Path, PathBuf}; use std::ptr::{null, null_mut}; @@ -206,7 +200,7 @@ pub type Result<T> = std::result::Result<T, Error>; /// partial jail is not recoverable and will instead result in killing the /// process. pub struct Minijail { - jail: *mut libminijail::minijail, + jail: *mut minijail, } impl Minijail { @@ -215,7 +209,7 @@ impl Minijail { let j = unsafe { // libminijail actually owns the minijail structure. It will live until we call // minijail_destroy. - libminijail::minijail_new() + minijail_new() }; if j.is_null() { return Err(Error::CreatingMinijail); @@ -229,22 +223,22 @@ impl Minijail { pub fn change_uid(&mut self, uid: libc::uid_t) { unsafe { - libminijail::minijail_change_uid(self.jail, uid); + minijail_change_uid(self.jail, uid); } } pub fn change_gid(&mut self, gid: libc::gid_t) { unsafe { - libminijail::minijail_change_gid(self.jail, gid); + minijail_change_gid(self.jail, gid); } } pub fn set_supplementary_gids(&mut self, ids: &[libc::gid_t]) { unsafe { - libminijail::minijail_set_supplementary_gids(self.jail, ids.len(), ids.as_ptr()); + minijail_set_supplementary_gids(self.jail, ids.len(), ids.as_ptr()); } } pub fn keep_supplementary_gids(&mut self) { unsafe { - libminijail::minijail_keep_supplementary_gids(self.jail); + minijail_keep_supplementary_gids(self.jail); } } pub fn set_rlimit( @@ -253,7 +247,7 @@ impl Minijail { cur: libc::rlim64_t, max: libc::rlim64_t, ) -> Result<()> { - let errno = unsafe { libminijail::minijail_rlimit(self.jail, kind, cur, max) }; + let errno = unsafe { minijail_rlimit(self.jail, kind, cur, max) }; if errno == 0 { Ok(()) } else { @@ -262,22 +256,22 @@ impl Minijail { } pub fn use_seccomp(&mut self) { unsafe { - libminijail::minijail_use_seccomp(self.jail); + minijail_use_seccomp(self.jail); } } pub fn no_new_privs(&mut self) { unsafe { - libminijail::minijail_no_new_privs(self.jail); + minijail_no_new_privs(self.jail); } } pub fn use_seccomp_filter(&mut self) { unsafe { - libminijail::minijail_use_seccomp_filter(self.jail); + minijail_use_seccomp_filter(self.jail); } } pub fn set_seccomp_filter_tsync(&mut self) { unsafe { - libminijail::minijail_set_seccomp_filter_tsync(self.jail); + minijail_set_seccomp_filter_tsync(self.jail); } } pub fn parse_seccomp_program(&mut self, path: &Path) -> Result<()> { @@ -298,7 +292,7 @@ impl Minijail { filter: buffer.as_ptr() as *mut sock_filter, }; unsafe { - libminijail::minijail_set_seccomp_filters(self.jail, &header); + minijail_set_seccomp_filters(self.jail, &header); } Ok(()) } @@ -314,98 +308,98 @@ impl Minijail { let filename = CString::new(pathstring).map_err(|_| Error::PathToCString(path.to_owned()))?; unsafe { - libminijail::minijail_parse_seccomp_filters(self.jail, filename.as_ptr()); + minijail_parse_seccomp_filters(self.jail, filename.as_ptr()); } Ok(()) } pub fn log_seccomp_filter_failures(&mut self) { unsafe { - libminijail::minijail_log_seccomp_filter_failures(self.jail); + minijail_log_seccomp_filter_failures(self.jail); } } pub fn use_caps(&mut self, capmask: u64) { unsafe { - libminijail::minijail_use_caps(self.jail, capmask); + minijail_use_caps(self.jail, capmask); } } pub fn capbset_drop(&mut self, capmask: u64) { unsafe { - libminijail::minijail_capbset_drop(self.jail, capmask); + minijail_capbset_drop(self.jail, capmask); } } pub fn set_ambient_caps(&mut self) { unsafe { - libminijail::minijail_set_ambient_caps(self.jail); + minijail_set_ambient_caps(self.jail); } } pub fn reset_signal_mask(&mut self) { unsafe { - libminijail::minijail_reset_signal_mask(self.jail); + minijail_reset_signal_mask(self.jail); } } pub fn run_as_init(&mut self) { unsafe { - libminijail::minijail_run_as_init(self.jail); + minijail_run_as_init(self.jail); } } pub fn namespace_pids(&mut self) { unsafe { - libminijail::minijail_namespace_pids(self.jail); + minijail_namespace_pids(self.jail); } } pub fn namespace_user(&mut self) { unsafe { - libminijail::minijail_namespace_user(self.jail); + minijail_namespace_user(self.jail); } } pub fn namespace_user_disable_setgroups(&mut self) { unsafe { - libminijail::minijail_namespace_user_disable_setgroups(self.jail); + minijail_namespace_user_disable_setgroups(self.jail); } } pub fn namespace_vfs(&mut self) { unsafe { - libminijail::minijail_namespace_vfs(self.jail); + minijail_namespace_vfs(self.jail); } } pub fn new_session_keyring(&mut self) { unsafe { - libminijail::minijail_new_session_keyring(self.jail); + minijail_new_session_keyring(self.jail); } } pub fn skip_remount_private(&mut self) { unsafe { - libminijail::minijail_skip_remount_private(self.jail); + minijail_skip_remount_private(self.jail); } } pub fn namespace_ipc(&mut self) { unsafe { - libminijail::minijail_namespace_ipc(self.jail); + minijail_namespace_ipc(self.jail); } } pub fn namespace_net(&mut self) { unsafe { - libminijail::minijail_namespace_net(self.jail); + minijail_namespace_net(self.jail); } } pub fn namespace_cgroups(&mut self) { unsafe { - libminijail::minijail_namespace_cgroups(self.jail); + minijail_namespace_cgroups(self.jail); } } pub fn remount_proc_readonly(&mut self) { unsafe { - libminijail::minijail_remount_proc_readonly(self.jail); + minijail_remount_proc_readonly(self.jail); } } pub fn set_remount_mode(&mut self, mode: c_ulong) { - unsafe { libminijail::minijail_remount_mode(self.jail, mode) } + unsafe { minijail_remount_mode(self.jail, mode) } } pub fn uidmap(&mut self, uid_map: &str) -> Result<()> { let map_cstring = CString::new(uid_map).map_err(|_| Error::StrToCString(uid_map.to_owned()))?; unsafe { - libminijail::minijail_uidmap(self.jail, map_cstring.as_ptr()); + minijail_uidmap(self.jail, map_cstring.as_ptr()); } Ok(()) } @@ -413,20 +407,19 @@ impl Minijail { let map_cstring = CString::new(gid_map).map_err(|_| Error::StrToCString(gid_map.to_owned()))?; unsafe { - libminijail::minijail_gidmap(self.jail, map_cstring.as_ptr()); + minijail_gidmap(self.jail, map_cstring.as_ptr()); } Ok(()) } pub fn inherit_usergroups(&mut self) { unsafe { - libminijail::minijail_inherit_usergroups(self.jail); + minijail_inherit_usergroups(self.jail); } } pub fn use_alt_syscall(&mut self, table_name: &str) -> Result<()> { let table_name_string = CString::new(table_name).map_err(|_| Error::StrToCString(table_name.to_owned()))?; - let ret = - unsafe { libminijail::minijail_use_alt_syscall(self.jail, table_name_string.as_ptr()) }; + let ret = unsafe { minijail_use_alt_syscall(self.jail, table_name_string.as_ptr()) }; if ret < 0 { return Err(Error::SetAltSyscallTable { errno: ret, @@ -441,7 +434,7 @@ impl Minijail { .to_str() .ok_or(Error::PathToCString(dir.to_owned()))?; let dirname = CString::new(pathstring).map_err(|_| Error::PathToCString(dir.to_owned()))?; - let ret = unsafe { libminijail::minijail_enter_chroot(self.jail, dirname.as_ptr()) }; + let ret = unsafe { minijail_enter_chroot(self.jail, dirname.as_ptr()) }; if ret < 0 { return Err(Error::SettingChrootDirectory(ret, dir.to_owned())); } @@ -453,7 +446,7 @@ impl Minijail { .to_str() .ok_or(Error::PathToCString(dir.to_owned()))?; let dirname = CString::new(pathstring).map_err(|_| Error::PathToCString(dir.to_owned()))?; - let ret = unsafe { libminijail::minijail_enter_pivot_root(self.jail, dirname.as_ptr()) }; + let ret = unsafe { minijail_enter_pivot_root(self.jail, dirname.as_ptr()) }; if ret < 0 { return Err(Error::SettingPivotRootDirectory(ret, dir.to_owned())); } @@ -485,7 +478,7 @@ impl Minijail { CString::new(fstype).map_err(|_| Error::StrToCString(fstype.to_owned()))?; let data_string = CString::new(data).map_err(|_| Error::StrToCString(data.to_owned()))?; let ret = unsafe { - libminijail::minijail_mount_with_data( + minijail_mount_with_data( self.jail, src_path.as_ptr(), dest_path.as_ptr(), @@ -508,17 +501,17 @@ impl Minijail { } pub fn mount_dev(&mut self) { unsafe { - libminijail::minijail_mount_dev(self.jail); + minijail_mount_dev(self.jail); } } pub fn mount_tmp(&mut self) { unsafe { - libminijail::minijail_mount_tmp(self.jail); + minijail_mount_tmp(self.jail); } } pub fn mount_tmp_size(&mut self, size: usize) { unsafe { - libminijail::minijail_mount_tmp_size(self.jail, size); + minijail_mount_tmp_size(self.jail, size); } } pub fn mount_bind(&mut self, src: &Path, dest: &Path, writable: bool) -> Result<()> { @@ -534,7 +527,7 @@ impl Minijail { let dest_path = CString::new(dest_os).map_err(|_| Error::StrToCString(dest_os.to_owned()))?; let ret = unsafe { - libminijail::minijail_bind( + minijail_bind( self.jail, src_path.as_ptr(), dest_path.as_ptr(), @@ -571,7 +564,7 @@ impl Minijail { args_array.push(null()); for fd in inheritable_fds { - let ret = unsafe { libminijail::minijail_preserve_fd(self.jail, *fd, *fd) }; + let ret = unsafe { minijail_preserve_fd(self.jail, *fd, *fd) }; if ret < 0 { return Err(Error::PreservingFd(ret)); } @@ -586,9 +579,7 @@ impl Minijail { // These will only be closed when this process exits. for io_fd in &[libc::STDIN_FILENO, libc::STDOUT_FILENO, libc::STDERR_FILENO] { if !inheritable_fds.contains(io_fd) { - let ret = unsafe { - libminijail::minijail_preserve_fd(self.jail, dev_null.as_raw_fd(), *io_fd) - }; + let ret = unsafe { minijail_preserve_fd(self.jail, dev_null.as_raw_fd(), *io_fd) }; if ret < 0 { return Err(Error::PreservingFd(ret)); } @@ -596,15 +587,15 @@ impl Minijail { } unsafe { - libminijail::minijail_close_open_fds(self.jail); + minijail_close_open_fds(self.jail); } let mut pid = 0; let ret = unsafe { - libminijail::minijail_run_pid_pipes( + minijail_run_pid_pipes( self.jail, cmd_cstr.as_ptr(), - args_array.as_ptr(), + args_array.as_ptr() as *const *mut c_char, &mut pid, null_mut(), null_mut(), @@ -635,7 +626,7 @@ impl Minijail { if let Some(keep_fds) = inheritable_fds { for fd in keep_fds { - let ret = libminijail::minijail_preserve_fd(self.jail, *fd, *fd); + let ret = minijail_preserve_fd(self.jail, *fd, *fd); if ret < 0 { return Err(Error::PreservingFd(ret)); } @@ -651,17 +642,16 @@ impl Minijail { // These will only be closed when this process exits. for io_fd in &[libc::STDIN_FILENO, libc::STDOUT_FILENO, libc::STDERR_FILENO] { if inheritable_fds.is_none() || !inheritable_fds.unwrap().contains(io_fd) { - let ret = - libminijail::minijail_preserve_fd(self.jail, dev_null.as_raw_fd(), *io_fd); + let ret = minijail_preserve_fd(self.jail, dev_null.as_raw_fd(), *io_fd); if ret < 0 { return Err(Error::PreservingFd(ret)); } } } - libminijail::minijail_close_open_fds(self.jail); + minijail_close_open_fds(self.jail); - let ret = libminijail::minijail_fork(self.jail); + let ret = minijail_fork(self.jail); if ret < 0 { return Err(Error::ForkingMinijail(ret)); } @@ -675,7 +665,7 @@ impl Drop for Minijail { unsafe { // Destroys the minijail's memory. It is safe to do here because all references to // this object have been dropped. - libminijail::minijail_destroy(self.jail); + minijail_destroy(self.jail); } } } @@ -701,9 +691,9 @@ mod tests { #[test] fn create_and_free() { unsafe { - let j = libminijail::minijail_new(); + let j = minijail_new(); assert_ne!(std::ptr::null_mut(), j); - libminijail::minijail_destroy(j); + minijail_destroy(j); } let j = Minijail::new().unwrap(); diff --git a/kvm/src/lib.rs b/kvm/src/lib.rs index c5da845..b8cd12b 100644 --- a/kvm/src/lib.rs +++ b/kvm/src/lib.rs @@ -1121,6 +1121,19 @@ pub enum VcpuExit { size: usize, data: [u8; 8], }, + IoapicEoi { + vector: u8, + }, + HypervSynic { + msr: u32, + control: u64, + evt_page: u64, + msg_page: u64, + }, + HypervHcall { + input: u64, + params: [u64; 2], + }, Unknown, Exception, Hypercall, @@ -1240,10 +1253,10 @@ impl Vcpu { &self.guest_mem } - /// Sets the data received by an mmio or ioport read/in instruction. + /// Sets the data received by a mmio read, ioport in, or hypercall instruction. /// - /// This function should be called after `Vcpu::run` returns an `VcpuExit::IoIn` or - /// `Vcpu::MmioRead`. + /// This function should be called after `Vcpu::run` returns an `VcpuExit::IoIn`, + /// `VcpuExit::MmioRead`, or 'VcpuExit::HypervHcall`. #[allow(clippy::cast_ptr_alignment)] pub fn set_data(&self, data: &[u8]) -> Result<()> { // Safe because we know we mapped enough memory to hold the kvm_run struct because the @@ -1285,6 +1298,20 @@ impl Vcpu { mmio.data[..len].copy_from_slice(data); Ok(()) } + KVM_EXIT_HYPERV => { + // Safe because the exit_reason (which comes from the kernel) told us which + // union field to use. + let hyperv = unsafe { &mut run.__bindgen_anon_1.hyperv }; + if hyperv.type_ != KVM_EXIT_HYPERV_HCALL { + return Err(Error::new(EINVAL)); + } + let hcall = unsafe { &mut hyperv.u.hcall }; + if data.len() != std::mem::size_of::<u64>() { + return Err(Error::new(EINVAL)); + } + hcall.result.to_ne_bytes().copy_from_slice(data); + Ok(()) + } _ => Err(Error::new(EINVAL)), } } @@ -1310,6 +1337,26 @@ impl Vcpu { }); } + /// Request the VCPU to exit when it becomes possible to inject interrupts into the guest. + #[allow(clippy::cast_ptr_alignment)] + pub fn request_interrupt_window(&self) { + // Safe because we know we mapped enough memory to hold the kvm_run struct because the + // kernel told us how large it was. The pointer is page aligned so casting to a different + // type is well defined, hence the clippy allow attribute. + let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) }; + run.request_interrupt_window = 1; + } + + /// Checks if we can inject an interrupt into the VCPU. + #[allow(clippy::cast_ptr_alignment)] + pub fn ready_for_interrupt(&self) -> bool { + // Safe because we know we mapped enough memory to hold the kvm_run struct because the + // kernel told us how large it was. The pointer is page aligned so casting to a different + // type is well defined, hence the clippy allow attribute. + let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) }; + run.ready_for_interrupt_injection != 0 && run.if_flag != 0 + } + /// Gets the VCPU registers. #[cfg(not(any(target_arch = "arm", target_arch = "aarch64")))] pub fn get_regs(&self) -> Result<kvm_regs> { @@ -1502,6 +1549,24 @@ impl Vcpu { Ok(()) } + /// X86 specific call to get the system emulated hyper-v CPUID values + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub fn get_hyperv_cpuid(&self) -> Result<CpuId> { + const MAX_KVM_CPUID_ENTRIES: usize = 256; + let mut cpuid = CpuId::new(MAX_KVM_CPUID_ENTRIES); + + let ret = unsafe { + // ioctl is unsafe. The kernel is trusted not to write beyond the bounds of the memory + // allocated for the struct. The limit is read from nent, which is set to the allocated + // size(MAX_KVM_CPUID_ENTRIES) above. + ioctl_with_mut_ptr(self, KVM_GET_SUPPORTED_HV_CPUID(), cpuid.as_mut_ptr()) + }; + if ret < 0 { + return errno_result(); + } + Ok(cpuid) + } + /// X86 specific call to get the state of the "Local Advanced Programmable Interrupt Controller". /// /// See the documentation for KVM_GET_LAPIC. @@ -1607,6 +1672,18 @@ impl Vcpu { Ok(()) } + /// Enable the specified capability. + /// See documentation for KVM_ENABLE_CAP. + pub fn kvm_enable_cap(&self, cap: &kvm_enable_cap) -> Result<()> { + // safe becuase we allocated the struct and we know the kernel will read + // exactly the size of the struct + let ret = unsafe { ioctl_with_ref(self, KVM_ENABLE_CAP(), cap) }; + if ret < 0 { + return errno_result(); + } + Ok(()) + } + /// Signals to the host kernel that this VCPU is about to be paused. /// /// See the documentation for KVM_KVMCLOCK_CTRL. @@ -1781,6 +1858,36 @@ impl RunnableVcpu { Ok(VcpuExit::MmioRead { address, size }) } } + KVM_EXIT_IOAPIC_EOI => { + // Safe because the exit_reason (which comes from the kernel) told us which + // union field to use. + let vector = unsafe { run.__bindgen_anon_1.eoi.vector }; + Ok(VcpuExit::IoapicEoi { vector }) + } + KVM_EXIT_HYPERV => { + // Safe because the exit_reason (which comes from the kernel) told us which + // union field to use. + let hyperv = unsafe { &run.__bindgen_anon_1.hyperv }; + match hyperv.type_ as u32 { + KVM_EXIT_HYPERV_SYNIC => { + let synic = unsafe { &hyperv.u.synic }; + Ok(VcpuExit::HypervSynic { + msr: synic.msr, + control: synic.control, + evt_page: synic.evt_page, + msg_page: synic.msg_page, + }) + } + KVM_EXIT_HYPERV_HCALL => { + let hcall = unsafe { &hyperv.u.hcall }; + Ok(VcpuExit::HypervHcall { + input: hcall.input, + params: hcall.params, + }) + } + _ => Err(Error::new(EINVAL)), + } + } KVM_EXIT_UNKNOWN => Ok(VcpuExit::Unknown), KVM_EXIT_EXCEPTION => Ok(VcpuExit::Exception), KVM_EXIT_HYPERCALL => Ok(VcpuExit::Hypercall), @@ -2333,6 +2440,36 @@ mod tests { #[test] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + fn get_hyperv_cpuid() { + let kvm = Kvm::new().unwrap(); + let gm = GuestMemory::new(&vec![(GuestAddress(0), 0x10000)]).unwrap(); + let vm = Vm::new(&kvm, gm).unwrap(); + let vcpu = Vcpu::new(0, &kvm, &vm).unwrap(); + let cpuid = vcpu.get_hyperv_cpuid(); + // Older kernels don't support so tolerate this kind of failure. + match cpuid { + Ok(_) => {} + Err(e) => { + assert_eq!(e.errno(), EINVAL); + } + } + } + + #[test] + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + fn enable_feature() { + let kvm = Kvm::new().unwrap(); + let gm = GuestMemory::new(&vec![(GuestAddress(0), 0x10000)]).unwrap(); + let vm = Vm::new(&kvm, gm).unwrap(); + vm.create_irq_chip().unwrap(); + let vcpu = Vcpu::new(0, &kvm, &vm).unwrap(); + let mut cap: kvm_enable_cap = Default::default(); + cap.cap = kvm_sys::KVM_CAP_HYPERV_SYNIC; + vcpu.kvm_enable_cap(&cap).unwrap(); + } + + #[test] + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] fn mp_state() { let kvm = Kvm::new().unwrap(); let gm = GuestMemory::new(&vec![(GuestAddress(0), 0x10000)]).unwrap(); diff --git a/kvm_sys/src/aarch64/bindings.rs b/kvm_sys/src/aarch64/bindings.rs index 7be9e0d..084fc13 100644 --- a/kvm_sys/src/aarch64/bindings.rs +++ b/kvm_sys/src/aarch64/bindings.rs @@ -274,11 +274,14 @@ pub const KVM_EXIT_EPR: ::std::os::raw::c_uint = 23; pub const KVM_EXIT_SYSTEM_EVENT: ::std::os::raw::c_uint = 24; pub const KVM_EXIT_S390_STSI: ::std::os::raw::c_uint = 25; pub const KVM_EXIT_IOAPIC_EOI: ::std::os::raw::c_uint = 26; +pub const KVM_EXIT_HYPERV: ::std::os::raw::c_uint = 27; pub const KVM_INTERNAL_ERROR_EMULATION: ::std::os::raw::c_uint = 1; pub const KVM_INTERNAL_ERROR_SIMUL_EX: ::std::os::raw::c_uint = 2; pub const KVM_INTERNAL_ERROR_DELIVERY_EV: ::std::os::raw::c_uint = 3; pub const KVM_EXIT_IO_IN: ::std::os::raw::c_uint = 0; pub const KVM_EXIT_IO_OUT: ::std::os::raw::c_uint = 1; +pub const KVM_EXIT_HYPERV_SYNIC: ::std::os::raw::c_uint = 1; +pub const KVM_EXIT_HYPERV_HCALL: ::std::os::raw::c_uint = 2; pub const KVM_S390_RESET_POR: ::std::os::raw::c_uint = 1; pub const KVM_S390_RESET_CLEAR: ::std::os::raw::c_uint = 2; pub const KVM_S390_RESET_SUBSYSTEM: ::std::os::raw::c_uint = 4; @@ -1694,6 +1697,7 @@ pub union kvm_run__bindgen_ty_1 { pub system_event: kvm_run__bindgen_ty_1__bindgen_ty_17, pub s390_stsi: kvm_run__bindgen_ty_1__bindgen_ty_18, pub eoi: kvm_run__bindgen_ty_1__bindgen_ty_19, + pub hyperv: kvm_hyperv_exit, pub padding: [::std::os::raw::c_char; 256usize], _bindgen_union_align: [u64; 32usize], } @@ -2833,6 +2837,174 @@ fn bindgen_test_layout_kvm_run__bindgen_ty_1__bindgen_ty_19() { ) ); } +#[repr(C)] +#[derive(Copy, Clone)] +pub struct kvm_hyperv_exit { + pub type_: __u32, + pub pad: __u32, + pub u: kvm_hyperv_exit__bindgen_ty_1, +} +#[test] +fn bindgen_test_layout_kvm_hyperv_exit() { + assert_eq!( + ::std::mem::size_of::<kvm_hyperv_exit>(), + 40usize, + concat!("Size of: ", stringify!(kvm_hyperv_exit)) + ); + assert_eq!( + ::std::mem::align_of::<kvm_hyperv_exit>(), + 8usize, + concat!("Alignment of ", stringify!(kvm_hyperv_exit)) + ); + assert_eq!( + unsafe { &(*(::std::ptr::null::<kvm_hyperv_exit>())).u as *const _ as usize }, + 8usize, + concat!( + "Offset of field: ", + stringify!(kvm_hyperv_exit), + "::", + stringify!(u) + ) + ); +} +#[repr(C)] +#[derive(Copy, Clone)] +pub union kvm_hyperv_exit__bindgen_ty_1 { + pub synic: kvm_hyperv_exit__bindgen_ty_1__bindgen_ty_1, + pub hcall: kvm_hyperv_exit__bindgen_ty_1__bindgen_ty_2, +} +#[test] +fn bindgen_test_layout_kvm_hyperv_exit__bindgen_ty_1() { + assert_eq!( + ::std::mem::size_of::<kvm_hyperv_exit__bindgen_ty_1>(), + 32usize, + concat!("Size of: ", stringify!(kvm_hyperv_exit__bindgen_ty_1)) + ); + assert_eq!( + ::std::mem::align_of::<kvm_hyperv_exit__bindgen_ty_1>(), + 8usize, + concat!("Alignment of ", stringify!(kvm_hyperv_exit__bindgen_ty_1)) + ); +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct kvm_hyperv_exit__bindgen_ty_1__bindgen_ty_1 { + pub msr: __u32, + pub pad: __u32, + pub control: __u64, + pub evt_page: __u64, + pub msg_page: __u64, +} +#[test] +fn bindgen_test_layout_kvm_hyperv_exit__bindgen_ty_1__bindgen_ty_1() { + assert_eq!( + ::std::mem::size_of::<kvm_hyperv_exit__bindgen_ty_1__bindgen_ty_1>(), + 32usize, + concat!( + "Size of: ", + stringify!(kvm_hyperv_exit__bindgen_ty_1__bindgen_ty_1) + ) + ); + assert_eq!( + ::std::mem::align_of::<kvm_hyperv_exit__bindgen_ty_1__bindgen_ty_1>(), + 8usize, + concat!( + "Alignment of ", + stringify!(kvm_hyperv_exit__bindgen_ty_1__bindgen_ty_1) + ) + ); + assert_eq!( + unsafe { + &(*(::std::ptr::null::<kvm_hyperv_exit__bindgen_ty_1__bindgen_ty_1>())).control + as *const _ as usize + }, + 8usize, + concat!( + "Offset of field: ", + stringify!(kvm_hyperv_exit__bindgen_ty_1__bindgen_ty_1), + "::", + stringify!(control) + ) + ); + assert_eq!( + unsafe { + &(*(::std::ptr::null::<kvm_hyperv_exit__bindgen_ty_1__bindgen_ty_1>())).evt_page + as *const _ as usize + }, + 16usize, + concat!( + "Offset of field: ", + stringify!(kvm_hyperv_exit__bindgen_ty_1__bindgen_ty_1), + "::", + stringify!(evt_page) + ) + ); + assert_eq!( + unsafe { + &(*(::std::ptr::null::<kvm_hyperv_exit__bindgen_ty_1__bindgen_ty_1>())).msg_page + as *const _ as usize + }, + 24usize, + concat!( + "Offset of field: ", + stringify!(kvm_hyperv_exit__bindgen_ty_1__bindgen_ty_1), + "::", + stringify!(msg_page) + ) + ); +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct kvm_hyperv_exit__bindgen_ty_1__bindgen_ty_2 { + pub input: __u64, + pub result: __u64, + pub params: [__u64; 2], +} +#[test] +fn bindgen_test_layout_kvm_hyperv_exit__bindgen_ty_1__bindgen_ty_2() { + assert_eq!( + ::std::mem::size_of::<kvm_hyperv_exit__bindgen_ty_1__bindgen_ty_2>(), + 32usize, + concat!( + "Size of: ", + stringify!(kvm_hyperv_exit__bindgen_ty_1__bindgen_ty_2) + ) + ); + assert_eq!( + ::std::mem::align_of::<kvm_hyperv_exit__bindgen_ty_1__bindgen_ty_2>(), + 8usize, + concat!( + "Alignment of ", + stringify!(kvm_hyperv_exit__bindgen_ty_1__bindgen_ty_2) + ) + ); + assert_eq!( + unsafe { + &(*(::std::ptr::null::<kvm_hyperv_exit__bindgen_ty_1__bindgen_ty_2>())).result + as *const _ as usize + }, + 8usize, + concat!( + "Offset of field: ", + stringify!(kvm_hyperv_exit__bindgen_ty_1__bindgen_ty_2), + "::", + stringify!(result) + ) + ); + assert_eq!( + unsafe { + &(*(::std::ptr::null::<kvm_hyperv_exit__bindgen_ty_1__bindgen_ty_2>())).params + as *const _ as usize + }, + 16usize, + concat!( + "Offset of field: ", + stringify!(kvm_hyperv_exit__bindgen_ty_1__bindgen_ty_2), + "::", + stringify!(params) + ) + ); +} #[test] fn bindgen_test_layout_kvm_run__bindgen_ty_1() { assert_eq!( diff --git a/kvm_sys/src/lib.rs b/kvm_sys/src/lib.rs index b9748a3..8f27690 100644 --- a/kvm_sys/src/lib.rs +++ b/kvm_sys/src/lib.rs @@ -8,9 +8,6 @@ use sys_util::{ioctl_io_nr, ioctl_ior_nr, ioctl_iow_nr, ioctl_iowr_nr}; -// Somehow this one gets missed by bindgen -pub const KVM_EXIT_IO_OUT: ::std::os::raw::c_uint = 1; - // Each of the below modules defines ioctls specific to their platform. #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] @@ -47,6 +44,7 @@ pub mod x86 { ioctl_iow_nr!(KVM_SET_XSAVE, KVMIO, 0xa5, kvm_xsave); ioctl_ior_nr!(KVM_GET_XCRS, KVMIO, 0xa6, kvm_xcrs); ioctl_iow_nr!(KVM_SET_XCRS, KVMIO, 0xa7, kvm_xcrs); + ioctl_iowr_nr!(KVM_GET_SUPPORTED_HV_CPUID, KVMIO, 0xc1, kvm_cpuid2); } #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] diff --git a/kvm_sys/src/x86/bindings.rs b/kvm_sys/src/x86/bindings.rs index a9d792f..7236611 100644 --- a/kvm_sys/src/x86/bindings.rs +++ b/kvm_sys/src/x86/bindings.rs @@ -251,11 +251,14 @@ pub const KVM_EXIT_EPR: ::std::os::raw::c_uint = 23; pub const KVM_EXIT_SYSTEM_EVENT: ::std::os::raw::c_uint = 24; pub const KVM_EXIT_S390_STSI: ::std::os::raw::c_uint = 25; pub const KVM_EXIT_IOAPIC_EOI: ::std::os::raw::c_uint = 26; +pub const KVM_EXIT_HYPERV: ::std::os::raw::c_uint = 27; pub const KVM_INTERNAL_ERROR_EMULATION: ::std::os::raw::c_uint = 1; pub const KVM_INTERNAL_ERROR_SIMUL_EX: ::std::os::raw::c_uint = 2; pub const KVM_INTERNAL_ERROR_DELIVERY_EV: ::std::os::raw::c_uint = 3; pub const KVM_EXIT_IO_IN: ::std::os::raw::c_uint = 0; pub const KVM_EXIT_IO_OUT: ::std::os::raw::c_uint = 1; +pub const KVM_EXIT_HYPERV_SYNIC: ::std::os::raw::c_uint = 1; +pub const KVM_EXIT_HYPERV_HCALL: ::std::os::raw::c_uint = 2; pub const KVM_S390_RESET_POR: ::std::os::raw::c_uint = 1; pub const KVM_S390_RESET_CLEAR: ::std::os::raw::c_uint = 2; pub const KVM_S390_RESET_SUBSYSTEM: ::std::os::raw::c_uint = 4; @@ -422,7 +425,9 @@ pub const KVM_CAP_GUEST_DEBUG_HW_BPS: ::std::os::raw::c_uint = 119; pub const KVM_CAP_GUEST_DEBUG_HW_WPS: ::std::os::raw::c_uint = 120; pub const KVM_CAP_SPLIT_IRQCHIP: ::std::os::raw::c_uint = 121; pub const KVM_CAP_IOEVENTFD_ANY_LENGTH: ::std::os::raw::c_uint = 122; +pub const KVM_CAP_HYPERV_SYNIC: ::std::os::raw::c_uint = 123; pub const KVM_CAP_IMMEDIATE_EXIT: ::std::os::raw::c_uint = 136; +pub const KVM_CAP_HYPERV_SYNIC2: ::std::os::raw::c_uint = 148; pub const KVM_IRQ_ROUTING_IRQCHIP: ::std::os::raw::c_uint = 1; pub const KVM_IRQ_ROUTING_MSI: ::std::os::raw::c_uint = 2; pub const KVM_IRQ_ROUTING_S390_ADAPTER: ::std::os::raw::c_uint = 3; @@ -4082,6 +4087,7 @@ pub union kvm_run__bindgen_ty_1 { pub system_event: kvm_run__bindgen_ty_1__bindgen_ty_17, pub s390_stsi: kvm_run__bindgen_ty_1__bindgen_ty_18, pub eoi: kvm_run__bindgen_ty_1__bindgen_ty_19, + pub hyperv: kvm_hyperv_exit, pub padding: [::std::os::raw::c_char; 256usize], _bindgen_union_align: [u64; 32usize], } @@ -5221,7 +5227,174 @@ fn bindgen_test_layout_kvm_run__bindgen_ty_1__bindgen_ty_19() { ) ); } - +#[repr(C)] +#[derive(Copy, Clone)] +pub struct kvm_hyperv_exit { + pub type_: __u32, + pub pad: __u32, + pub u: kvm_hyperv_exit__bindgen_ty_1, +} +#[test] +fn bindgen_test_layout_kvm_hyperv_exit() { + assert_eq!( + ::std::mem::size_of::<kvm_hyperv_exit>(), + 40usize, + concat!("Size of: ", stringify!(kvm_hyperv_exit)) + ); + assert_eq!( + ::std::mem::align_of::<kvm_hyperv_exit>(), + 8usize, + concat!("Alignment of ", stringify!(kvm_hyperv_exit)) + ); + assert_eq!( + unsafe { &(*(::std::ptr::null::<kvm_hyperv_exit>())).u as *const _ as usize }, + 8usize, + concat!( + "Offset of field: ", + stringify!(kvm_hyperv_exit), + "::", + stringify!(u) + ) + ); +} +#[repr(C)] +#[derive(Copy, Clone)] +pub union kvm_hyperv_exit__bindgen_ty_1 { + pub synic: kvm_hyperv_exit__bindgen_ty_1__bindgen_ty_1, + pub hcall: kvm_hyperv_exit__bindgen_ty_1__bindgen_ty_2, +} +#[test] +fn bindgen_test_layout_kvm_hyperv_exit__bindgen_ty_1() { + assert_eq!( + ::std::mem::size_of::<kvm_hyperv_exit__bindgen_ty_1>(), + 32usize, + concat!("Size of: ", stringify!(kvm_hyperv_exit__bindgen_ty_1)) + ); + assert_eq!( + ::std::mem::align_of::<kvm_hyperv_exit__bindgen_ty_1>(), + 8usize, + concat!("Alignment of ", stringify!(kvm_hyperv_exit__bindgen_ty_1)) + ); +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct kvm_hyperv_exit__bindgen_ty_1__bindgen_ty_1 { + pub msr: __u32, + pub pad: __u32, + pub control: __u64, + pub evt_page: __u64, + pub msg_page: __u64, +} +#[test] +fn bindgen_test_layout_kvm_hyperv_exit__bindgen_ty_1__bindgen_ty_1() { + assert_eq!( + ::std::mem::size_of::<kvm_hyperv_exit__bindgen_ty_1__bindgen_ty_1>(), + 32usize, + concat!( + "Size of: ", + stringify!(kvm_hyperv_exit__bindgen_ty_1__bindgen_ty_1) + ) + ); + assert_eq!( + ::std::mem::align_of::<kvm_hyperv_exit__bindgen_ty_1__bindgen_ty_1>(), + 8usize, + concat!( + "Alignment of ", + stringify!(kvm_hyperv_exit__bindgen_ty_1__bindgen_ty_1) + ) + ); + assert_eq!( + unsafe { + &(*(::std::ptr::null::<kvm_hyperv_exit__bindgen_ty_1__bindgen_ty_1>())).control + as *const _ as usize + }, + 8usize, + concat!( + "Offset of field: ", + stringify!(kvm_hyperv_exit__bindgen_ty_1__bindgen_ty_1), + "::", + stringify!(control) + ) + ); + assert_eq!( + unsafe { + &(*(::std::ptr::null::<kvm_hyperv_exit__bindgen_ty_1__bindgen_ty_1>())).evt_page + as *const _ as usize + }, + 16usize, + concat!( + "Offset of field: ", + stringify!(kvm_hyperv_exit__bindgen_ty_1__bindgen_ty_1), + "::", + stringify!(evt_page) + ) + ); + assert_eq!( + unsafe { + &(*(::std::ptr::null::<kvm_hyperv_exit__bindgen_ty_1__bindgen_ty_1>())).msg_page + as *const _ as usize + }, + 24usize, + concat!( + "Offset of field: ", + stringify!(kvm_hyperv_exit__bindgen_ty_1__bindgen_ty_1), + "::", + stringify!(msg_page) + ) + ); +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct kvm_hyperv_exit__bindgen_ty_1__bindgen_ty_2 { + pub input: __u64, + pub result: __u64, + pub params: [__u64; 2], +} +#[test] +fn bindgen_test_layout_kvm_hyperv_exit__bindgen_ty_1__bindgen_ty_2() { + assert_eq!( + ::std::mem::size_of::<kvm_hyperv_exit__bindgen_ty_1__bindgen_ty_2>(), + 32usize, + concat!( + "Size of: ", + stringify!(kvm_hyperv_exit__bindgen_ty_1__bindgen_ty_2) + ) + ); + assert_eq!( + ::std::mem::align_of::<kvm_hyperv_exit__bindgen_ty_1__bindgen_ty_2>(), + 8usize, + concat!( + "Alignment of ", + stringify!(kvm_hyperv_exit__bindgen_ty_1__bindgen_ty_2) + ) + ); + assert_eq!( + unsafe { + &(*(::std::ptr::null::<kvm_hyperv_exit__bindgen_ty_1__bindgen_ty_2>())).result + as *const _ as usize + }, + 8usize, + concat!( + "Offset of field: ", + stringify!(kvm_hyperv_exit__bindgen_ty_1__bindgen_ty_2), + "::", + stringify!(result) + ) + ); + assert_eq!( + unsafe { + &(*(::std::ptr::null::<kvm_hyperv_exit__bindgen_ty_1__bindgen_ty_2>())).params + as *const _ as usize + }, + 16usize, + concat!( + "Offset of field: ", + stringify!(kvm_hyperv_exit__bindgen_ty_1__bindgen_ty_2), + "::", + stringify!(params) + ) + ); +} #[test] fn bindgen_test_layout_kvm_run__bindgen_ty_1() { assert_eq!( diff --git a/linux_input_sys/src/lib.rs b/linux_input_sys/src/lib.rs index 3880be2..f70dcc9 100644 --- a/linux_input_sys/src/lib.rs +++ b/linux_input_sys/src/lib.rs @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -use data_model::DataInit; +use data_model::{DataInit, Le16, Le32}; use std::mem::size_of; const EV_SYN: u16 = 0x00; @@ -20,6 +20,13 @@ const ABS_Y: u16 = 0x01; const BTN_TOUCH: u16 = 0x14a; const BTN_TOOL_FINGER: u16 = 0x145; +/// Allows a raw input event of the implementor's type to be decoded into +/// a virtio_input_event. +pub trait InputEventDecoder { + const SIZE: usize; + fn decode(data: &[u8]) -> virtio_input_event; +} + #[derive(Copy, Clone, Debug, Default, Eq, PartialEq)] #[repr(C)] pub struct input_event { @@ -32,55 +39,99 @@ pub struct input_event { unsafe impl DataInit for input_event {} impl input_event { - pub const EVENT_SIZE: usize = size_of::<input_event>(); - - #[inline] - pub fn syn() -> input_event { + pub fn from_virtio_input_event(other: &virtio_input_event) -> input_event { input_event { timestamp_fields: [0, 0], - type_: EV_SYN, - code: SYN_REPORT, - value: 0, + type_: other.type_.into(), + code: other.code.into(), + value: other.value.into(), + } + } +} + +impl InputEventDecoder for input_event { + const SIZE: usize = size_of::<Self>(); + + fn decode(data: &[u8]) -> virtio_input_event { + #[repr(align(8))] + struct Aligner([u8; input_event::SIZE]); + let data_aligned = Aligner(*<[u8; input_event::SIZE]>::from_slice(data).unwrap()); + let e = Self::from_slice(&data_aligned.0).unwrap(); + virtio_input_event { + type_: Le16::from(e.type_), + code: Le16::from(e.code), + value: Le32::from(e.value), + } + } +} + +#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)] +#[repr(C)] +pub struct virtio_input_event { + pub type_: Le16, + pub code: Le16, + pub value: Le32, +} + +// Safe because it only has data and has no implicit padding. +unsafe impl DataInit for virtio_input_event {} + +impl InputEventDecoder for virtio_input_event { + const SIZE: usize = size_of::<Self>(); + + fn decode(data: &[u8]) -> virtio_input_event { + #[repr(align(4))] + struct Aligner([u8; virtio_input_event::SIZE]); + let data_aligned = Aligner(*<[u8; virtio_input_event::SIZE]>::from_slice(data).unwrap()); + *Self::from_slice(&data_aligned.0).unwrap() + } +} + +impl virtio_input_event { + #[inline] + pub fn syn() -> virtio_input_event { + virtio_input_event { + type_: Le16::from(EV_SYN), + code: Le16::from(SYN_REPORT), + value: Le32::from(0), } } #[inline] - pub fn absolute(code: u16, value: u32) -> input_event { - input_event { - timestamp_fields: [0, 0], - type_: EV_ABS, - code, - value, + pub fn absolute(code: u16, value: u32) -> virtio_input_event { + virtio_input_event { + type_: Le16::from(EV_ABS), + code: Le16::from(code), + value: Le32::from(value), } } #[inline] - pub fn absolute_x(x: u32) -> input_event { + pub fn absolute_x(x: u32) -> virtio_input_event { Self::absolute(ABS_X, x) } #[inline] - pub fn absolute_y(y: u32) -> input_event { + pub fn absolute_y(y: u32) -> virtio_input_event { Self::absolute(ABS_Y, y) } #[inline] - pub fn touch(has_contact: bool) -> input_event { + pub fn touch(has_contact: bool) -> virtio_input_event { Self::key(BTN_TOUCH, has_contact) } #[inline] - pub fn finger_tool(active: bool) -> input_event { + pub fn finger_tool(active: bool) -> virtio_input_event { Self::key(BTN_TOOL_FINGER, active) } #[inline] - pub fn key(code: u16, pressed: bool) -> input_event { - input_event { - timestamp_fields: [0, 0], - type_: EV_KEY, - code, - value: if pressed { 1 } else { 0 }, + pub fn key(code: u16, pressed: bool) -> virtio_input_event { + virtio_input_event { + type_: Le16::from(EV_KEY), + code: Le16::from(code), + value: Le32::from(if pressed { 1 } else { 0 }), } } } diff --git a/msg_socket/Cargo.toml b/msg_socket/Cargo.toml index dcfccfc..c803bed 100644 --- a/msg_socket/Cargo.toml +++ b/msg_socket/Cargo.toml @@ -5,6 +5,9 @@ authors = ["The Chromium OS Authors"] edition = "2018" [dependencies] +cros_async = { path = "../cros_async" } data_model = { path = "../data_model" } +futures = "*" +libc = "*" msg_on_socket_derive = { path = "msg_on_socket_derive" } sys_util = { path = "../sys_util" } diff --git a/msg_socket/src/lib.rs b/msg_socket/src/lib.rs index 5b9f9ce..ea817f0 100644 --- a/msg_socket/src/lib.rs +++ b/msg_socket/src/lib.rs @@ -7,8 +7,17 @@ mod msg_on_socket; use std::io::Result; use std::marker::PhantomData; use std::os::unix::io::{AsRawFd, RawFd}; +use std::pin::Pin; +use std::task::{Context, Poll}; -use sys_util::{handle_eintr, net::UnixSeqpacket, Error as SysError, ScmSocket}; +use futures::Stream; +use libc::{EWOULDBLOCK, O_NONBLOCK}; + +use cros_async::fd_executor::add_read_waker; +use sys_util::{ + add_fd_flags, clear_fd_flags, error, handle_eintr, net::UnixSeqpacket, Error as SysError, + ScmSocket, +}; pub use crate::msg_on_socket::*; pub use msg_on_socket_derive::*; @@ -18,16 +27,8 @@ pub use msg_on_socket_derive::*; pub fn pair<Request: MsgOnSocket, Response: MsgOnSocket>( ) -> Result<(MsgSocket<Request, Response>, MsgSocket<Response, Request>)> { let (sock1, sock2) = UnixSeqpacket::pair()?; - let requester = MsgSocket { - sock: sock1, - _i: PhantomData, - _o: PhantomData, - }; - let responder = MsgSocket { - sock: sock2, - _i: PhantomData, - _o: PhantomData, - }; + let requester = MsgSocket::new(sock1); + let responder = MsgSocket::new(sock2); Ok((requester, responder)) } @@ -47,6 +48,11 @@ impl<I: MsgOnSocket, O: MsgOnSocket> MsgSocket<I, O> { _o: PhantomData, } } + + // Creates an async receiver that implements `futures::Stream`. + pub fn async_receiver(&mut self) -> MsgResult<AsyncReceiver<I, O>> { + AsyncReceiver::new(self) + } } /// One direction socket that only supports sending. @@ -191,3 +197,63 @@ impl<I: MsgOnSocket> MsgSender for Sender<I> { impl<O: MsgOnSocket> MsgReceiver for Receiver<O> { type M = O; } + +/// Asynchronous adaptor for `MsgSocket`. +pub struct AsyncReceiver<'a, I: MsgOnSocket, O: MsgOnSocket> { + inner: &'a mut MsgSocket<I, O>, + done: bool, // Have hit an error and the Stream should return null when polled. +} + +impl<'a, I: MsgOnSocket, O: MsgOnSocket> AsyncReceiver<'a, I, O> { + fn new(msg_socket: &mut MsgSocket<I, O>) -> MsgResult<AsyncReceiver<I, O>> { + add_fd_flags(msg_socket.as_raw_fd(), O_NONBLOCK).map_err(MsgError::SettingFdFlags)?; + Ok(AsyncReceiver { + inner: msg_socket, + done: false, + }) + } +} + +impl<'a, I: MsgOnSocket, O: MsgOnSocket> Drop for AsyncReceiver<'a, I, O> { + fn drop(&mut self) { + if let Err(e) = clear_fd_flags(self.inner.as_raw_fd(), O_NONBLOCK) { + error!( + "Failed to restore non-blocking behavior to message socket: {}", + e + ); + } + } +} + +impl<'a, I: MsgOnSocket, O: MsgOnSocket> Stream for AsyncReceiver<'a, I, O> { + type Item = MsgResult<O>; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> { + if self.done { + return Poll::Ready(None); + } + + let ret = match self.inner.recv() { + Ok(msg) => Ok(Poll::Ready(Some(Ok(msg)))), + Err(MsgError::Recv(e)) => { + if e.errno() == EWOULDBLOCK { + add_read_waker(self.inner.as_raw_fd(), cx.waker().clone()) + .map(|_| Poll::Pending) + .map_err(MsgError::AddingWaker) + } else { + Err(MsgError::Recv(e)) + } + } + Err(e) => Err(e), + }; + + match ret { + Ok(p) => p, + Err(e) => { + // Indicate something went wrong and no more events will be provided. + self.done = true; + Poll::Ready(Some(Err(e))) + } + } + } +} diff --git a/msg_socket/src/msg_on_socket.rs b/msg_socket/src/msg_on_socket.rs index d4eb18c..36e00c8 100644 --- a/msg_socket/src/msg_on_socket.rs +++ b/msg_socket/src/msg_on_socket.rs @@ -15,6 +15,8 @@ use sys_util::{Error as SysError, EventFd}; #[derive(Debug, PartialEq)] /// An error during transaction or serialization/deserialization. pub enum MsgError { + /// Error adding a waker for async read. + AddingWaker(cros_async::fd_executor::Error), /// Error while sending a request or response. Send(SysError), /// Error while receiving a request or response. @@ -28,6 +30,8 @@ pub enum MsgError { ExpectFd, /// There was some associated file descriptor received but not used when deserialize. NotExpectFd, + /// Failed to set flags on the file descriptor. + SettingFdFlags(SysError), /// Trying to serialize/deserialize, but fd buffer size is too small. This typically happens /// when max_fd_count() returns a value that is too small. WrongFdBufferSize, @@ -43,6 +47,7 @@ impl Display for MsgError { use self::MsgError::*; match self { + AddingWaker(e) => write!(f, "failed to add a waker: {}", e), Send(e) => write!(f, "failed to send request or response: {}", e), Recv(e) => write!(f, "failed to receive request or response: {}", e), InvalidType => write!(f, "invalid type"), @@ -53,6 +58,7 @@ impl Display for MsgError { ), ExpectFd => write!(f, "missing associated file descriptor for request"), NotExpectFd => write!(f, "unexpected file descriptor is unused"), + SettingFdFlags(e) => write!(f, "failed setting flags on the message FD: {}", e), WrongFdBufferSize => write!(f, "fd buffer size too small"), WrongMsgBufferSize => write!(f, "msg buffer size too small"), } diff --git a/protos/src/plugin.proto b/protos/src/plugin.proto index e2838b0..8e5f3a9 100644 --- a/protos/src/plugin.proto +++ b/protos/src/plugin.proto @@ -334,6 +334,9 @@ message VcpuRequest { bytes state = 2; } + message CpuidRequest { + } + message GetMsrs { // The entry data will be returned in the same order as this in the // VcpuResponse::GetMsrs::entry_data array. @@ -356,6 +359,10 @@ message VcpuRequest { message Shutdown { } + message EnableCapability { + uint32 capability = 1; + } + // The type of the message is determined by which of these oneof fields is present in the // protobuf. oneof message { @@ -367,6 +374,8 @@ message VcpuRequest { SetMsrs set_msrs = 6; SetCpuid set_cpuid = 7; Shutdown shutdown = 8; + CpuidRequest get_hyperv_cpuid = 9; + EnableCapability enable_capability = 10; } } @@ -394,15 +403,30 @@ message VcpuResponse { bytes debugregs = 8; } - // This type of wait reason is only generated after a PuaseVcpus request on this VCPU. + // This type of wait reason is only generated after a PauseVcpus request on this VCPU. message User { uint64 user = 1; } + message HypervCall { + uint64 input = 1; + uint64 params0 = 2; + uint64 params1 = 3; + } + + message HypervSynic { + uint32 msr = 1; + uint64 control = 2; + uint64 evt_page = 3; + uint64 msg_page = 4; + } + oneof exit { Init init = 1; Io io = 2; User user = 3; + HypervCall hyperv_call = 4; + HypervSynic hyperv_synic = 5; } } @@ -417,6 +441,10 @@ message VcpuResponse { message SetState { } + message CpuidResponse { + repeated CpuidEntry entries = 1; + } + message GetMsrs { // The order of the entry_data values is the same order as the array of indices given in the // corresponding request. @@ -427,6 +455,8 @@ message VcpuResponse { message SetCpuid {} + message EnableCapability {} + // This is zero on success, and a negative integer on failure. sint32 errno = 1; // The field present here is always the same as the one present in the corresponding @@ -439,5 +469,7 @@ message VcpuResponse { GetMsrs get_msrs = 6; SetMsrs set_msrs = 7; SetCpuid set_cpuid = 8; + CpuidResponse get_hyperv_cpuid = 9; + EnableCapability enable_capability = 10; } } diff --git a/qcow_utils/src/qcow_utils.rs b/qcow_utils/src/qcow_utils.rs index 97d9a94..dc9adad 100644 --- a/qcow_utils/src/qcow_utils.rs +++ b/qcow_utils/src/qcow_utils.rs @@ -7,14 +7,10 @@ use libc::{EINVAL, EIO, ENOSYS}; use std::ffi::CStr; use std::fs::OpenOptions; -use std::io::{Seek, SeekFrom}; use std::os::raw::{c_char, c_int}; -use disk::{ImageType, QcowFile}; -use sys_util::{flock, FileSetLen, FlockOperation}; - -trait DiskFile: FileSetLen + Seek {} -impl<D: FileSetLen + Seek> DiskFile for D {} +use disk::{DiskFile, ImageType, QcowFile}; +use sys_util::{flock, FlockOperation}; #[no_mangle] pub unsafe extern "C" fn create_qcow_with_size(path: *const c_char, virtual_size: u64) -> c_int { @@ -73,7 +69,7 @@ pub unsafe extern "C" fn expand_disk_image(path: *const c_char, virtual_size: u6 Err(_) => return -EINVAL, }; - let mut disk_image: Box<dyn DiskFile> = match image_type { + let disk_image: Box<dyn DiskFile> = match image_type { ImageType::Raw => Box::new(raw_image), ImageType::Qcow2 => match QcowFile::from(raw_image) { Ok(f) => Box::new(f), @@ -89,7 +85,7 @@ pub unsafe extern "C" fn expand_disk_image(path: *const c_char, virtual_size: u6 // acquired by other instances of this function as well as crosvm // itself when running a VM, so this should be safe in all cases that // can access a disk image in normal operation. - let current_size = match disk_image.seek(SeekFrom::End(0)) { + let current_size = match disk_image.get_len() { Ok(len) => len, Err(_) => return -EIO, }; diff --git a/seccomp/aarch64/fs_device.policy b/seccomp/aarch64/fs_device.policy index 5199092..9fd4c8b 100644 --- a/seccomp/aarch64/fs_device.policy +++ b/seccomp/aarch64/fs_device.policy @@ -4,10 +4,13 @@ @include /usr/share/policy/crosvm/common_device.policy +fallocate: 1 fchmodat: 1 fchownat: 1 +fdatasync: 1 fgetxattr: 1 fsetxattr: 1 +fsync: 1 newfstatat: 1 fstatfs: 1 ftruncate: 1 diff --git a/seccomp/arm/fs_device.policy b/seccomp/arm/fs_device.policy index 5822261..eb9df16 100644 --- a/seccomp/arm/fs_device.policy +++ b/seccomp/arm/fs_device.policy @@ -4,12 +4,15 @@ @include /usr/share/policy/crosvm/common_device.policy +fallocate: 1 fchmodat: 1 fchownat: 1 +fdatasync: 1 fgetxattr: 1 fsetxattr: 1 fstatat64: 1 fstatfs64: 1 +fsync: 1 ftruncate64: 1 getdents64: 1 getegid32: 1 diff --git a/seccomp/x86_64/fs_device.policy b/seccomp/x86_64/fs_device.policy index 32e7477..ddb2a51 100644 --- a/seccomp/x86_64/fs_device.policy +++ b/seccomp/x86_64/fs_device.policy @@ -4,11 +4,14 @@ @include /usr/share/policy/crosvm/common_device.policy +fallocate: 1 fchmodat: 1 fchownat: 1 +fdatasync: 1 fgetxattr: 1 fsetxattr: 1 fstatfs: 1 +fsync: 1 ftruncate: 1 getdents64: 1 getegid: 1 diff --git a/src/crosvm.rs b/src/crosvm.rs index 082e43c..2eead30 100644 --- a/src/crosvm.rs +++ b/src/crosvm.rs @@ -13,7 +13,7 @@ pub mod plugin; use std::collections::BTreeMap; use std::net; use std::os::unix::io::RawFd; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use std::str::FromStr; use arch::Pstore; @@ -57,23 +57,58 @@ pub struct GidMap { pub count: u32, } -const DEFAULT_TOUCH_DEVICE_WIDTH: u32 = 800; -const DEFAULT_TOUCH_DEVICE_HEIGHT: u32 = 1280; +pub const DEFAULT_TOUCH_DEVICE_HEIGHT: u32 = 1024; +pub const DEFAULT_TOUCH_DEVICE_WIDTH: u32 = 1280; pub struct TouchDeviceOption { - pub path: PathBuf, - pub width: u32, - pub height: u32, + path: PathBuf, + width: Option<u32>, + height: Option<u32>, + default_width: u32, + default_height: u32, } impl TouchDeviceOption { pub fn new(path: PathBuf) -> TouchDeviceOption { TouchDeviceOption { path, - width: DEFAULT_TOUCH_DEVICE_WIDTH, - height: DEFAULT_TOUCH_DEVICE_HEIGHT, + width: None, + height: None, + default_width: DEFAULT_TOUCH_DEVICE_WIDTH, + default_height: DEFAULT_TOUCH_DEVICE_HEIGHT, } } + + /// Getter for the path to the input event streams. + pub fn get_path(&self) -> &Path { + self.path.as_path() + } + + /// When a user specifies the parameters for a touch device, width and height are optional. + /// If the width and height are missing, default values are used. Default values can be set + /// dynamically, for example from the display sizes specified by the gpu argument. + pub fn set_default_size(&mut self, width: u32, height: u32) { + self.default_width = width; + self.default_height = height; + } + + /// Setter for the width specified by the user. + pub fn set_width(&mut self, width: u32) { + self.width.replace(width); + } + + /// Setter for the height specified by the user. + pub fn set_height(&mut self, height: u32) { + self.height.replace(height); + } + + /// If the user specifies the size, use it. Otherwise, use the default values. + pub fn get_size(&self) -> (u32, u32) { + ( + self.width.unwrap_or(self.default_width), + self.height.unwrap_or(self.default_height), + ) + } } pub enum SharedDirKind { diff --git a/src/linux.rs b/src/linux.rs index 84edf5c..4a87f7d 100644 --- a/src/linux.rs +++ b/src/linux.rs @@ -27,13 +27,13 @@ use std::time::{Duration, SystemTime, UNIX_EPOCH}; use libc::{self, c_int, gid_t, uid_t}; -use audio_streams::DummyStreamSource; +use audio_streams::shm_streams::NullShmStreamSource; #[cfg(feature = "gpu")] use devices::virtio::EventDevice; use devices::virtio::{self, VirtioDevice}; use devices::{ - self, HostBackendDeviceProvider, PciDevice, VfioDevice, VfioPciDevice, VirtioPciDevice, - XhciController, + self, HostBackendDeviceProvider, PciDevice, VfioContainer, VfioDevice, VfioPciDevice, + VirtioPciDevice, XhciController, }; use io_jail::{self, Minijail}; use kvm::*; @@ -63,7 +63,6 @@ use vm_control::{ }; use crate::{Config, DiskOption, Executable, SharedDir, SharedDirKind, TouchDeviceOption}; - use arch::{self, LinuxArch, RunnableLinuxVm, VirtioDeviceStub, VmComponents, VmImage}; #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] @@ -97,7 +96,7 @@ pub enum Error { CreateVfioDevice(devices::vfio::VfioError), DeviceJail(io_jail::Error), DevicePivotRoot(io_jail::Error), - Disk(io::Error), + Disk(PathBuf, io::Error), DiskImageLock(sys_util::Error), DropCapabilities(sys_util::Error), FsDeviceNew(virtio::fs::Error), @@ -184,7 +183,7 @@ impl Display for Error { CreateVfioDevice(e) => write!(f, "Failed to create vfio device {}", e), DeviceJail(e) => write!(f, "failed to jail device: {}", e), DevicePivotRoot(e) => write!(f, "failed to pivot root device: {}", e), - Disk(e) => write!(f, "failed to load disk image: {}", e), + Disk(p, e) => write!(f, "failed to load disk image {}: {}", p.display(), e), DiskImageLock(e) => write!(f, "failed to lock disk image: {}", e), DropCapabilities(e) => write!(f, "failed to drop process capabilities: {}", e), FsDeviceNew(e) => write!(f, "failed to create fs device: {}", e), @@ -301,55 +300,85 @@ fn get_max_open_files() -> Result<libc::rlim64_t> { } } +struct SandboxConfig<'a> { + limit_caps: bool, + log_failures: bool, + seccomp_policy: &'a Path, + uid_map: Option<&'a str>, + gid_map: Option<&'a str>, +} + fn create_base_minijail( root: &Path, - log_failures: bool, - seccomp_policy: &Path, + r_limit: Option<u64>, + config: Option<&SandboxConfig>, ) -> Result<Minijail> { // All child jails run in a new user namespace without any users mapped, // they run as nobody unless otherwise configured. let mut j = Minijail::new().map_err(Error::DeviceJail)?; - j.namespace_pids(); - j.namespace_user(); - j.namespace_user_disable_setgroups(); - // Don't need any capabilities. - j.use_caps(0); - // Create a new mount namespace with an empty root FS. - j.namespace_vfs(); - j.enter_pivot_root(root).map_err(Error::DevicePivotRoot)?; - // Run in an empty network namespace. - j.namespace_net(); - // Most devices don't need to open many fds. - j.set_rlimit(libc::RLIMIT_NOFILE as i32, 1024, 1024) - .map_err(Error::SettingMaxOpenFiles)?; - // Apply the block device seccomp policy. - j.no_new_privs(); - - // By default we'll prioritize using the pre-compiled .bpf over the .policy - // file (the .bpf is expected to be compiled using "trap" as the failure - // behavior instead of the default "kill" behavior). - // Refer to the code comment for the "seccomp-log-failures" - // command-line parameter for an explanation about why the |log_failures| - // flag forces the use of .policy files (and the build-time alternative to - // this run-time flag). - let bpf_policy_file = seccomp_policy.with_extension("bpf"); - if bpf_policy_file.exists() && !log_failures { - j.parse_seccomp_program(&bpf_policy_file) - .map_err(Error::DeviceJail)?; - } else { - // Use TSYNC only for the side effect of it using SECCOMP_RET_TRAP, - // which will correctly kill the entire device process if a worker - // thread commits a seccomp violation. - j.set_seccomp_filter_tsync(); - if log_failures { - j.log_seccomp_filter_failures(); + + if let Some(config) = config { + j.namespace_pids(); + j.namespace_user(); + j.namespace_user_disable_setgroups(); + if config.limit_caps { + // Don't need any capabilities. + j.use_caps(0); } - j.parse_seccomp_filters(&seccomp_policy.with_extension("policy")) - .map_err(Error::DeviceJail)?; + if let Some(uid_map) = config.uid_map { + j.uidmap(uid_map).map_err(Error::SettingUidMap)?; + } + if let Some(gid_map) = config.gid_map { + j.gidmap(gid_map).map_err(Error::SettingGidMap)?; + } + // Run in a new mount namespace. + j.namespace_vfs(); + + // Run in an empty network namespace. + j.namespace_net(); + + // Don't allow the device to gain new privileges. + j.no_new_privs(); + + // By default we'll prioritize using the pre-compiled .bpf over the .policy + // file (the .bpf is expected to be compiled using "trap" as the failure + // behavior instead of the default "kill" behavior). + // Refer to the code comment for the "seccomp-log-failures" + // command-line parameter for an explanation about why the |log_failures| + // flag forces the use of .policy files (and the build-time alternative to + // this run-time flag). + let bpf_policy_file = config.seccomp_policy.with_extension("bpf"); + if bpf_policy_file.exists() && !config.log_failures { + j.parse_seccomp_program(&bpf_policy_file) + .map_err(Error::DeviceJail)?; + } else { + // Use TSYNC only for the side effect of it using SECCOMP_RET_TRAP, + // which will correctly kill the entire device process if a worker + // thread commits a seccomp violation. + j.set_seccomp_filter_tsync(); + if config.log_failures { + j.log_seccomp_filter_failures(); + } + j.parse_seccomp_filters(&config.seccomp_policy.with_extension("policy")) + .map_err(Error::DeviceJail)?; + } + j.use_seccomp_filter(); + // Don't do init setup. + j.run_as_init(); + } + + // Only pivot_root if we are not re-using the current root directory. + if root != Path::new("/") { + // It's safe to call `namespace_vfs` multiple times. + j.namespace_vfs(); + j.enter_pivot_root(root).map_err(Error::DevicePivotRoot)?; } - j.use_seccomp_filter(); - // Don't do init setup. - j.run_as_init(); + + // Most devices don't need to open many fds. + let limit = if let Some(r) = r_limit { r } else { 1024u64 }; + j.set_rlimit(libc::RLIMIT_NOFILE as i32, limit, limit) + .map_err(Error::SettingMaxOpenFiles)?; + Ok(j) } @@ -362,11 +391,14 @@ fn simple_jail(cfg: &Config, policy: &str) -> Result<Option<Minijail>> { return Err(Error::PivotRootDoesntExist(pivot_root)); } let policy_path: PathBuf = cfg.seccomp_policy_dir.join(policy); - Ok(Some(create_base_minijail( - root_path, - cfg.seccomp_log_failures, - &policy_path, - )?)) + let config = SandboxConfig { + limit_caps: true, + log_failures: cfg.seccomp_log_failures, + seccomp_policy: &policy_path, + uid_map: None, + gid_map: None, + }; + Ok(Some(create_base_minijail(root_path, None, Some(&config))?)) } else { Ok(None) } @@ -388,7 +420,7 @@ fn create_block_device( .read(true) .write(!disk.read_only) .open(&disk.path) - .map_err(Error::Disk)? + .map_err(|e| Error::Disk(disk.path.to_path_buf(), e))? }; // Lock the disk image to prevent other crosvm instances from using it. let lock_op = if disk.read_only { @@ -473,13 +505,16 @@ fn create_tpm_device(cfg: &Config) -> DeviceResult { } fn create_single_touch_device(cfg: &Config, single_touch_spec: &TouchDeviceOption) -> DeviceResult { - let socket = single_touch_spec.path.into_unix_stream().map_err(|e| { - error!("failed configuring virtio single touch: {:?}", e); - e - })?; - - let dev = virtio::new_single_touch(socket, single_touch_spec.width, single_touch_spec.height) - .map_err(Error::InputDeviceNew)?; + let socket = single_touch_spec + .get_path() + .into_unix_stream() + .map_err(|e| { + error!("failed configuring virtio single touch: {:?}", e); + e + })?; + + let (width, height) = single_touch_spec.get_size(); + let dev = virtio::new_single_touch(socket, width, height).map_err(Error::InputDeviceNew)?; Ok(VirtioDeviceStub { dev: Box::new(dev), jail: simple_jail(&cfg, "input_device")?, @@ -487,13 +522,13 @@ fn create_single_touch_device(cfg: &Config, single_touch_spec: &TouchDeviceOptio } fn create_trackpad_device(cfg: &Config, trackpad_spec: &TouchDeviceOption) -> DeviceResult { - let socket = trackpad_spec.path.into_unix_stream().map_err(|e| { + let socket = trackpad_spec.get_path().into_unix_stream().map_err(|e| { error!("failed configuring virtio trackpad: {}", e); e })?; - let dev = virtio::new_trackpad(socket, trackpad_spec.width, trackpad_spec.height) - .map_err(Error::InputDeviceNew)?; + let (width, height) = trackpad_spec.get_size(); + let dev = virtio::new_trackpad(socket, width, height).map_err(Error::InputDeviceNew)?; Ok(VirtioDeviceStub { dev: Box::new(dev), @@ -768,45 +803,20 @@ fn create_fs_device( tag: &str, fs_cfg: virtio::fs::passthrough::Config, ) -> DeviceResult { - let mut j = Minijail::new().map_err(Error::DeviceJail)?; - - if cfg.sandbox { - j.namespace_pids(); - j.namespace_user(); - j.namespace_user_disable_setgroups(); - j.uidmap(uid_map).map_err(Error::SettingUidMap)?; - j.gidmap(gid_map).map_err(Error::SettingGidMap)?; - - // Run in an empty network namespace. - j.namespace_net(); - - j.no_new_privs(); - - // Use TSYNC only for the side effect of it using SECCOMP_RET_TRAP, which will correctly kill - // the entire device process if a worker thread commits a seccomp violation. - let seccomp_policy = cfg.seccomp_policy_dir.join("fs_device"); - j.set_seccomp_filter_tsync(); - if cfg.seccomp_log_failures { - j.log_seccomp_filter_failures(); - } - j.parse_seccomp_filters(&seccomp_policy) - .map_err(Error::DeviceJail)?; - j.use_seccomp_filter(); - - // Don't do init setup. - j.run_as_init(); - } - - // Create a new mount namespace with the source directory as the root. We need this even when - // sandboxing is disabled as the server relies on the host kernel to prevent path traversals - // from leaking out of the shared directory. - j.namespace_vfs(); - j.enter_pivot_root(src).map_err(Error::DevicePivotRoot)?; - - // The file server opens a lot of fds and needs a really high open file limit. let max_open_files = get_max_open_files()?; - j.set_rlimit(libc::RLIMIT_NOFILE as i32, max_open_files, max_open_files) - .map_err(Error::SettingMaxOpenFiles)?; + let j = if cfg.sandbox { + let seccomp_policy = cfg.seccomp_policy_dir.join("fs_device"); + let config = SandboxConfig { + limit_caps: false, + uid_map: Some(uid_map), + gid_map: Some(gid_map), + log_failures: cfg.seccomp_log_failures, + seccomp_policy: &seccomp_policy, + }; + create_base_minijail(src, Some(max_open_files), Some(&config))? + } else { + create_base_minijail(src, Some(max_open_files), None)? + }; // TODO(chirantan): Use more than one worker once the kernel driver has been fixed to not panic // when num_queues > 1. @@ -818,25 +828,36 @@ fn create_fs_device( }) } -fn create_9p_device(cfg: &Config, src: &Path, tag: &str) -> DeviceResult { - let (jail, root) = match simple_jail(&cfg, "9p_device")? { - Some(mut jail) => { - // The shared directory becomes the root of the device's file system. - let root = Path::new("/"); - jail.mount_bind(src, root, true)?; +fn create_9p_device( + cfg: &Config, + uid_map: &str, + gid_map: &str, + src: &Path, + tag: &str, +) -> DeviceResult { + let max_open_files = get_max_open_files()?; + let (jail, root) = if cfg.sandbox { + let seccomp_policy = cfg.seccomp_policy_dir.join("9p_device"); + let config = SandboxConfig { + limit_caps: false, + uid_map: Some(uid_map), + gid_map: Some(gid_map), + log_failures: cfg.seccomp_log_failures, + seccomp_policy: &seccomp_policy, + }; - // We want bind mounts from the parent namespaces to propagate into the 9p server's - // namespace. - jail.set_remount_mode(libc::MS_SLAVE); + let mut jail = create_base_minijail(src, Some(max_open_files), Some(&config))?; + // We want bind mounts from the parent namespaces to propagate into the 9p server's + // namespace. + jail.set_remount_mode(libc::MS_SLAVE); - add_crosvm_user_to_jail(&mut jail, "p9")?; - (Some(jail), root) - } - None => { - // There's no bind mount so we tell the server to treat the source directory as the - // root. - (None, src) - } + // The shared directory becomes the root of the device's file system. + let root = Path::new("/"); + (Some(jail), root) + } else { + // There's no mount namespace so we tell the server to treat the source directory as the + // root. + (None, src) }; let dev = virtio::P9::new(root, tag).map_err(Error::P9DeviceNew)?; @@ -858,10 +879,11 @@ fn create_pmem_device( .read(true) .write(!disk.read_only) .open(&disk.path) - .map_err(Error::Disk)?; + .map_err(|e| Error::Disk(disk.path.to_path_buf(), e))?; let (disk_size, arena_size) = { - let metadata = std::fs::metadata(&disk.path).map_err(Error::Disk)?; + let metadata = + std::fs::metadata(&disk.path).map_err(|e| Error::Disk(disk.path.to_path_buf(), e))?; let disk_len = metadata.len(); // Linux requires pmem region sizes to be 2 MiB aligned. Linux will fill any partial page // at the end of an mmap'd file and won't write back beyond the actual file length, but if @@ -1022,16 +1044,22 @@ fn create_virtio_devices( #[cfg(feature = "gpu")] { - if cfg.gpu_parameters.is_some() { + if let Some(gpu_parameters) = &cfg.gpu_parameters { let mut event_devices = Vec::new(); if cfg.display_window_mouse { let (event_device_socket, virtio_dev_socket) = UnixStream::pair().map_err(Error::CreateSocket)?; - // TODO(nkgold): the width/height here should match the display's height/width. When - // those settings are available as CLI options, we should use the CLI options here - // as well. - let dev = virtio::new_single_touch(virtio_dev_socket, 1280, 1024) - .map_err(Error::InputDeviceNew)?; + let (single_touch_width, single_touch_height) = cfg + .virtio_single_touch + .as_ref() + .map(|single_touch_spec| single_touch_spec.get_size()) + .unwrap_or((gpu_parameters.display_width, gpu_parameters.display_height)); + let dev = virtio::new_single_touch( + virtio_dev_socket, + single_touch_width, + single_touch_height, + ) + .map_err(Error::InputDeviceNew)?; devs.push(VirtioDeviceStub { dev: Box::new(dev), jail: simple_jail(&cfg, "input_device")?, @@ -1077,7 +1105,7 @@ fn create_virtio_devices( let dev = match kind { SharedDirKind::FS => create_fs_device(cfg, uid_map, gid_map, src, tag, fs_cfg.clone())?, - SharedDirKind::P9 => create_9p_device(cfg, src, tag)?, + SharedDirKind::P9 => create_9p_device(cfg, uid_map, gid_map, src, tag)?, }; devs.push(dev); } @@ -1136,7 +1164,7 @@ fn create_devices( } if cfg.null_audio { - let server = Box::new(DummyStreamSource::new()); + let server = Box::new(NullShmStreamSource::new()); let null_audio = devices::Ac97Dev::new(mem.clone(), server); pci_devices.push(( @@ -1148,7 +1176,11 @@ fn create_devices( let usb_controller = Box::new(XhciController::new(mem.clone(), usb_provider)); pci_devices.push((usb_controller, simple_jail(&cfg, "xhci")?)); - if cfg.vfio.is_some() { + if let Some(vfio_path) = &cfg.vfio { + let vfio_container = Arc::new(Mutex::new( + VfioContainer::new().map_err(Error::CreateVfioDevice)?, + )); + let (vfio_host_socket_irq, vfio_device_socket_irq) = msg_socket::pair::<VmIrqResponse, VmIrqRequest>().map_err(Error::CreateSocket)?; control_sockets.push(TaggedControlSocket::VmIrq(vfio_host_socket_irq)); @@ -1157,9 +1189,9 @@ fn create_devices( msg_socket::pair::<VmMemoryResponse, VmMemoryRequest>().map_err(Error::CreateSocket)?; control_sockets.push(TaggedControlSocket::VmMemory(vfio_host_socket_mem)); - let vfio_path = cfg.vfio.as_ref().unwrap().as_path(); - let vfiodevice = - VfioDevice::new(vfio_path, vm, mem.clone()).map_err(Error::CreateVfioDevice)?; + let vfio_path = vfio_path.as_path(); + let vfiodevice = VfioDevice::new(vfio_path, vm, mem, vfio_container.clone()) + .map_err(Error::CreateVfioDevice)?; let vfiopcidevice = Box::new(VfioPciDevice::new( vfiodevice, vfio_device_socket_irq, @@ -1314,6 +1346,26 @@ fn runnable_vcpu(vcpu: Vcpu, use_kvm_signals: bool, cpu_id: u32) -> Option<Runna } } +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +fn inject_interrupt(pic: &Arc<Mutex<devices::Pic>>, vcpu: &RunnableVcpu) { + let mut pic = pic.lock(); + if pic.interrupt_requested() && vcpu.ready_for_interrupt() { + if let Some(vector) = pic.get_external_interrupt() { + if let Err(e) = vcpu.interrupt(vector as u32) { + error!("PIC: failed to inject interrupt to vCPU0: {}", e); + } + } + // The second interrupt request should be handled immediately, so ask + // vCPU to exit as soon as possible. + if pic.interrupt_requested() { + vcpu.request_interrupt_window(); + } + } +} + +#[cfg(any(target_arch = "arm", target_arch = "aarch64"))] +fn inject_interrupt(pic: &Arc<Mutex<devices::Pic>>, vcpu: &RunnableVcpu) {} + fn run_vcpu( vcpu: Vcpu, cpu_id: u32, @@ -1321,6 +1373,7 @@ fn run_vcpu( start_barrier: Arc<Barrier>, io_bus: devices::Bus, mmio_bus: devices::Bus, + split_irqchip: Option<(Arc<Mutex<devices::Pic>>, Arc<Mutex<devices::Ioapic>>)>, exit_evt: EventFd, requires_kvmclock_ctrl: bool, run_mode_arc: Arc<VcpuRunMode>, @@ -1382,6 +1435,13 @@ fn run_vcpu( }) => { mmio_bus.write(address, &data[..size]); } + Ok(VcpuExit::IoapicEoi{vector}) => { + if let Some((_, ioapic)) = &split_irqchip { + ioapic.lock().end_of_interrupt(vector); + } else { + panic!("userspace ioapic not found in split irqchip mode, should be impossible."); + } + }, Ok(VcpuExit::Hlt) => break, Ok(VcpuExit::Shutdown) => break, Ok(VcpuExit::FailEntry { @@ -1437,6 +1497,11 @@ fn run_vcpu( run_mode_lock = run_mode_arc.cvar.wait(run_mode_lock); } } + + if cpu_id != 0 { continue; } + if let Some((pic, _)) = &split_irqchip { + inject_interrupt(pic, &vcpu); + } } } }) @@ -1554,10 +1619,15 @@ pub fn run_config(cfg: Config) -> Result<()> { msg_socket::pair::<VmMemoryResponse, VmMemoryRequest>().map_err(Error::CreateSocket)?; control_sockets.push(TaggedControlSocket::VmMemory(gpu_host_socket)); + let (ioapic_host_socket, ioapic_device_socket) = + msg_socket::pair::<VmIrqResponse, VmIrqRequest>().map_err(Error::CreateSocket)?; + control_sockets.push(TaggedControlSocket::VmIrq(ioapic_host_socket)); + let sandbox = cfg.sandbox; let linux = Arch::build_vm( components, cfg.split_irqchip, + ioapic_device_socket, &cfg.serial_parameters, simple_jail(&cfg, "serial")?, |mem, vm, sys_allocator, exit_evt| { @@ -1623,8 +1693,10 @@ fn run_control( #[derive(PollToken)] enum Token { Exit, + Suspend, ChildSignal, CheckAvailableMemory, + IrqFd { gsi: usize }, LowMemory, LowmemTimer, VmControlServer, @@ -1637,6 +1709,7 @@ fn run_control( let poll_ctx = PollContext::build_with(&[ (&linux.exit_evt, Token::Exit), + (&linux.suspend_evt, Token::Suspend), (&sigchld_fd, Token::ChildSignal), ]) .map_err(Error::PollContextAdd)?; @@ -1674,6 +1747,16 @@ fn run_control( .add(&freemem_timer, Token::CheckAvailableMemory) .map_err(Error::PollContextAdd)?; + if let Some(gsi_relay) = &linux.gsi_relay { + for (gsi, evt) in gsi_relay.irqfd.into_iter().enumerate() { + if let Some(evt) = evt { + poll_ctx + .add(evt, Token::IrqFd { gsi }) + .map_err(Error::PollContextAdd)?; + } + } + } + // Used to add jitter to timer values so that we don't have a thundering herd problem when // multiple VMs are running. let mut simple_rng = SimpleRng::new( @@ -1702,6 +1785,7 @@ fn run_control( vcpu_thread_barrier.clone(), linux.io_bus.clone(), linux.mmio_bus.clone(), + linux.split_irqchip.clone(), linux.exit_evt.try_clone().map_err(Error::CloneEventFd)?, linux.vm.check_extension(Cap::KvmclockCtrl), run_mode_arc.clone(), @@ -1711,6 +1795,7 @@ fn run_control( } vcpu_thread_barrier.wait(); + let mut ioapic_delayed = Vec::<usize>::default(); 'poll: loop { let events = { match poll_ctx.wait() { @@ -1722,6 +1807,26 @@ fn run_control( } }; + ioapic_delayed.retain(|&gsi| { + if let Some((_, ioapic)) = &linux.split_irqchip { + if let Ok(mut ioapic) = ioapic.try_lock() { + // The unwrap will never fail because gsi_relay is Some iff split_irqchip is + // Some. + if linux.gsi_relay.as_ref().unwrap().irqfd_resample[gsi].is_some() { + ioapic.service_irq(gsi, true); + } else { + ioapic.service_irq(gsi, true); + ioapic.service_irq(gsi, false); + } + false + } else { + true + } + } else { + true + } + }); + let mut vm_control_indices_to_remove = Vec::new(); for event in events.iter_readable() { match event.token() { @@ -1729,6 +1834,14 @@ fn run_control( info!("vcpu requested shutdown"); break 'poll; } + Token::Suspend => { + info!("VM requested suspend"); + linux.suspend_evt.read().unwrap(); + run_mode_arc.set_and_notify(VmRunMode::Suspending); + for handle in &vcpu_handles { + let _ = handle.kill(SIGRTMIN() + 0); + } + } Token::ChildSignal => { // Print all available siginfo structs, then exit the loop. while let Some(siginfo) = sigchld_fd.read().map_err(Error::SignalFd)? { @@ -1777,6 +1890,47 @@ fn run_control( } } } + Token::IrqFd { gsi } => { + if let Some((pic, ioapic)) = &linux.split_irqchip { + // This will never fail because gsi_relay is Some iff split_irqchip is + // Some. + let gsi_relay = linux.gsi_relay.as_ref().unwrap(); + if let Some(eventfd) = &gsi_relay.irqfd[gsi] { + eventfd.read().unwrap(); + } else { + warn!( + "irqfd {} not found in GSI relay, should be impossible.", + gsi + ); + } + + let mut pic = pic.lock(); + if gsi_relay.irqfd_resample[gsi].is_some() { + pic.service_irq(gsi as u8, true); + } else { + pic.service_irq(gsi as u8, true); + pic.service_irq(gsi as u8, false); + } + if let Err(e) = vcpu_handles[0].kill(SIGRTMIN() + 0) { + warn!("PIC: failed to kick vCPU0: {}", e); + } + + // When IOAPIC is configuring its redirection table, we should first + // process its AddMsiRoute request, otherwise we would deadlock. + if let Ok(mut ioapic) = ioapic.try_lock() { + if gsi_relay.irqfd_resample[gsi].is_some() { + ioapic.service_irq(gsi, true); + } else { + ioapic.service_irq(gsi, true); + ioapic.service_irq(gsi, false); + } + } else { + ioapic_delayed.push(gsi); + } + } else { + panic!("split irqchip not found, should be impossible."); + } + } Token::LowMemory => { if let Some(low_mem) = &low_mem { let old_balloon_memory = current_balloon_memory; @@ -1864,6 +2018,17 @@ fn run_control( VmRunMode::Exiting => { break 'poll; } + VmRunMode::Running => { + if let VmRunMode::Suspending = + *run_mode_arc.mtx.lock() + { + linux.io_bus.notify_resume(); + } + run_mode_arc.set_and_notify(VmRunMode::Running); + for handle in &vcpu_handles { + let _ = handle.kill(SIGRTMIN() + 0); + } + } other => { run_mode_arc.set_and_notify(other); for handle in &vcpu_handles { @@ -1922,8 +2087,10 @@ fn run_control( for event in events.iter_hungup() { match event.token() { Token::Exit => {} + Token::Suspend => {} Token::ChildSignal => {} Token::CheckAvailableMemory => {} + Token::IrqFd { gsi: _ } => {} Token::LowMemory => {} Token::LowmemTimer => {} Token::VmControlServer => {} diff --git a/src/main.rs b/src/main.rs index d0b12c1..7688134 100644 --- a/src/main.rs +++ b/src/main.rs @@ -22,7 +22,7 @@ use crosvm::{ linux, BindMount, Config, DiskOption, Executable, GidMap, SharedDir, TouchDeviceOption, }; #[cfg(feature = "gpu")] -use devices::virtio::gpu::{GpuMode, GpuParameters, DEFAULT_GPU_PARAMS}; +use devices::virtio::gpu::{GpuMode, GpuParameters}; use devices::{SerialParameters, SerialType}; use disk::QcowFile; use msg_socket::{MsgReceiver, MsgSender, MsgSocket}; @@ -114,7 +114,7 @@ fn parse_cpu_set(s: &str) -> argument::Result<Vec<usize>> { #[cfg(feature = "gpu")] fn parse_gpu_options(s: Option<&str>) -> argument::Result<GpuParameters> { - let mut gpu_params = DEFAULT_GPU_PARAMS; + let mut gpu_params: GpuParameters = Default::default(); if let Some(s) = s { let opts = s @@ -124,12 +124,37 @@ fn parse_gpu_options(s: Option<&str>) -> argument::Result<GpuParameters> { for (k, v) in opts { match k { + // Deprecated: Specifying --gpu=<mode> Not great as the mode can be set multiple + // times if the user specifies several modes (--gpu=2d,3d,gfxstream) "2d" | "2D" => { gpu_params.mode = GpuMode::Mode2D; } "3d" | "3D" => { gpu_params.mode = GpuMode::Mode3D; } + #[cfg(feature = "gfxstream")] + "gfxstream" => { + gpu_params.mode = GpuMode::ModeGfxStream; + } + // Preferred: Specifying --gpu,backend=<mode> + "backend" => match v { + "2d" | "2D" => { + gpu_params.mode = GpuMode::Mode2D; + } + "3d" | "3D" => { + gpu_params.mode = GpuMode::Mode3D; + } + #[cfg(feature = "gfxstream")] + "gfxstream" => { + gpu_params.mode = GpuMode::ModeGfxStream; + } + _ => { + return Err(argument::Error::InvalidValue { + value: v.to_string(), + expected: "gpu parameter 'backend' should be one of (2d|3d|gfxstream)", + }); + } + }, "egl" => match v { "true" | "" => { gpu_params.renderer_use_egl = true; @@ -982,12 +1007,11 @@ fn set_argument(cfg: &mut Config, name: &str, value: Option<&str>) -> argument:: let mut single_touch_spec = TouchDeviceOption::new(PathBuf::from(it.next().unwrap().to_owned())); if let Some(width) = it.next() { - single_touch_spec.width = width.trim().parse().unwrap(); + single_touch_spec.set_width(width.trim().parse().unwrap()); } if let Some(height) = it.next() { - single_touch_spec.height = height.trim().parse().unwrap(); + single_touch_spec.set_height(height.trim().parse().unwrap()); } - cfg.virtio_single_touch = Some(single_touch_spec); } "trackpad" => { @@ -1001,12 +1025,11 @@ fn set_argument(cfg: &mut Config, name: &str, value: Option<&str>) -> argument:: let mut trackpad_spec = TouchDeviceOption::new(PathBuf::from(it.next().unwrap().to_owned())); if let Some(width) = it.next() { - trackpad_spec.width = width.trim().parse().unwrap(); + trackpad_spec.set_width(width.trim().parse().unwrap()); } if let Some(height) = it.next() { - trackpad_spec.height = height.trim().parse().unwrap(); + trackpad_spec.set_height(height.trim().parse().unwrap()); } - cfg.virtio_trackpad = Some(trackpad_spec); } "mouse" => { @@ -1074,6 +1097,44 @@ fn set_argument(cfg: &mut Config, name: &str, value: Option<&str>) -> argument:: Ok(()) } +fn validate_arguments(cfg: &mut Config) -> std::result::Result<(), argument::Error> { + if cfg.executable_path.is_none() { + return Err(argument::Error::ExpectedArgument("`KERNEL`".to_owned())); + } + if cfg.host_ip.is_some() || cfg.netmask.is_some() || cfg.mac_address.is_some() { + if cfg.host_ip.is_none() { + return Err(argument::Error::ExpectedArgument( + "`host_ip` missing from network config".to_owned(), + )); + } + if cfg.netmask.is_none() { + return Err(argument::Error::ExpectedArgument( + "`netmask` missing from network config".to_owned(), + )); + } + if cfg.mac_address.is_none() { + return Err(argument::Error::ExpectedArgument( + "`mac` missing from network config".to_owned(), + )); + } + } + if cfg.plugin_root.is_some() && !executable_is_plugin(&cfg.executable_path) { + return Err(argument::Error::ExpectedArgument( + "`plugin-root` requires `plugin`".to_owned(), + )); + } + #[cfg(feature = "gpu")] + { + if let Some(gpu_parameters) = cfg.gpu_parameters.as_ref() { + let (width, height) = (gpu_parameters.display_width, gpu_parameters.display_height); + if let Some(virtio_single_touch) = cfg.virtio_single_touch.as_mut() { + virtio_single_touch.set_default_size(width, height); + } + } + } + Ok(()) +} + fn run_vm(args: std::env::Args) -> std::result::Result<(), ()> { let arguments = &[Argument::positional("KERNEL", "bzImage of kernel to run."), @@ -1264,6 +1325,8 @@ will be added each time this argument is given. Comma separated key=value pairs for setting up a virtio-gpu device. Possible key values: + backend=(2d|3d|gfxstream) + Which backend to use for virtio-gpu (determining rendering protocol). width=INT The width of the virtual display connected to the virtio-gpu. @@ -1321,34 +1384,7 @@ Enable split-irqchip support. let match_res = set_arguments(args, &arguments[..], |name, value| { set_argument(&mut cfg, name, value) }) - .and_then(|_| { - if cfg.executable_path.is_none() { - return Err(argument::Error::ExpectedArgument("`KERNEL`".to_owned())); - } - if cfg.host_ip.is_some() || cfg.netmask.is_some() || cfg.mac_address.is_some() { - if cfg.host_ip.is_none() { - return Err(argument::Error::ExpectedArgument( - "`host_ip` missing from network config".to_owned(), - )); - } - if cfg.netmask.is_none() { - return Err(argument::Error::ExpectedArgument( - "`netmask` missing from network config".to_owned(), - )); - } - if cfg.mac_address.is_none() { - return Err(argument::Error::ExpectedArgument( - "`mac` missing from network config".to_owned(), - )); - } - } - if cfg.plugin_root.is_some() && !executable_is_plugin(&cfg.executable_path) { - return Err(argument::Error::ExpectedArgument( - "`plugin-root` requires `plugin`".to_owned(), - )); - } - Ok(()) - }); + .and_then(|_| validate_arguments(&mut cfg)); match match_res { #[cfg(feature = "plugin")] @@ -1475,34 +1511,82 @@ fn balloon_vms(mut args: std::env::Args) -> std::result::Result<(), ()> { vms_request(&VmRequest::BalloonCommand(command), args) } -fn create_qcow2(mut args: std::env::Args) -> std::result::Result<(), ()> { - if args.len() != 2 { - print_help("crosvm create_qcow2", "PATH SIZE", &[]); - println!("Create a new QCOW2 image at `PATH` of the specified `SIZE` in bytes."); +fn create_qcow2(args: std::env::Args) -> std::result::Result<(), ()> { + let arguments = [ + Argument::positional("PATH", "where to create the qcow2 image"), + Argument::positional("[SIZE]", "the expanded size of the image"), + Argument::value( + "backing_file", + "path/to/file", + " the file to back the image", + ), + ]; + let mut positional_index = 0; + let mut file_path = String::from(""); + let mut size: Option<u64> = None; + let mut backing_file: Option<String> = None; + set_arguments(args, &arguments[..], |name, value| { + match (name, positional_index) { + ("", 0) => { + // NAME + positional_index += 1; + file_path = value.unwrap().to_owned(); + } + ("", 1) => { + // [SIZE] + positional_index += 1; + size = Some(value.unwrap().parse::<u64>().map_err(|_| { + argument::Error::InvalidValue { + value: value.unwrap().to_owned(), + expected: "SIZE should be a nonnegative integer", + } + })?); + } + ("", _) => { + return Err(argument::Error::TooManyArguments( + "Expected at most 2 positional arguments".to_owned(), + )); + } + ("backing_file", _) => { + backing_file = value.map(|x| x.to_owned()); + } + _ => unreachable!(), + }; + Ok(()) + }) + .map_err(|e| { + error!("Unable to parse command line arguments: {}", e); + })?; + if file_path.len() == 0 || !(size.is_some() ^ backing_file.is_some()) { + print_help("crosvm create_qcow2", "PATH [SIZE]", &arguments); + println!( + "Create a new QCOW2 image at `PATH` of either the specified `SIZE` in bytes or +with a '--backing_file'." + ); return Err(()); } - let file_path = args.nth(0).unwrap(); - let size: u64 = match args.nth(0).unwrap().parse::<u64>() { - Ok(n) => n, - Err(_) => { - error!("Failed to parse size of the disk."); - return Err(()); - } - }; let file = OpenOptions::new() .create(true) .read(true) .write(true) + .truncate(true) .open(&file_path) .map_err(|e| { error!("Failed opening qcow file at '{}': {}", file_path, e); })?; - QcowFile::new(file, size).map_err(|e| { - error!("Failed to create qcow file at '{}': {}", file_path, e); - })?; - + match (size, backing_file) { + (Some(size), None) => QcowFile::new(file, size).map_err(|e| { + error!("Failed to create qcow file at '{}': {}", file_path, e); + })?, + (None, Some(backing_file)) => { + QcowFile::new_from_backing(file, &backing_file).map_err(|e| { + error!("Failed to create qcow file at '{}': {}", file_path, e); + })? + } + _ => unreachable!(), + }; Ok(()) } @@ -1811,6 +1895,7 @@ fn main() { #[cfg(test)] mod tests { use super::*; + use crosvm::{DEFAULT_TOUCH_DEVICE_HEIGHT, DEFAULT_TOUCH_DEVICE_WIDTH}; #[test] fn parse_cpu_set_single() { @@ -2001,4 +2086,110 @@ mod tests { set_argument(&mut config, "plugin-gid-map", Some("1:2:blah")) .expect_err("parse should fail because count is not a number"); } + + #[test] + fn single_touch_spec_and_track_pad_spec_default_size() { + let mut config = Config::default(); + config + .executable_path + .replace(Executable::Kernel(PathBuf::from("kernel"))); + set_argument(&mut config, "single-touch", Some("/dev/single-touch-test")).unwrap(); + set_argument(&mut config, "trackpad", Some("/dev/single-touch-test")).unwrap(); + validate_arguments(&mut config).unwrap(); + assert_eq!( + config.virtio_single_touch.unwrap().get_size(), + (DEFAULT_TOUCH_DEVICE_WIDTH, DEFAULT_TOUCH_DEVICE_HEIGHT) + ); + assert_eq!( + config.virtio_trackpad.unwrap().get_size(), + (DEFAULT_TOUCH_DEVICE_WIDTH, DEFAULT_TOUCH_DEVICE_HEIGHT) + ); + } + + #[cfg(feature = "gpu")] + #[test] + fn single_touch_spec_default_size_from_gpu() { + let width = 12345u32; + let height = 54321u32; + let mut config = Config::default(); + config + .executable_path + .replace(Executable::Kernel(PathBuf::from("kernel"))); + set_argument(&mut config, "single-touch", Some("/dev/single-touch-test")).unwrap(); + set_argument( + &mut config, + "gpu", + Some(&format!("width={},height={}", width, height)), + ) + .unwrap(); + validate_arguments(&mut config).unwrap(); + assert_eq!( + config.virtio_single_touch.unwrap().get_size(), + (width, height) + ); + } + + #[test] + fn single_touch_spec_and_track_pad_spec_with_size() { + let width = 12345u32; + let height = 54321u32; + let mut config = Config::default(); + config + .executable_path + .replace(Executable::Kernel(PathBuf::from("kernel"))); + set_argument( + &mut config, + "single-touch", + Some(&format!("/dev/single-touch-test:{}:{}", width, height)), + ) + .unwrap(); + set_argument( + &mut config, + "trackpad", + Some(&format!("/dev/single-touch-test:{}:{}", width, height)), + ) + .unwrap(); + validate_arguments(&mut config).unwrap(); + assert_eq!( + config.virtio_single_touch.unwrap().get_size(), + (width, height) + ); + assert_eq!(config.virtio_trackpad.unwrap().get_size(), (width, height)); + } + + #[cfg(feature = "gpu")] + #[test] + fn single_touch_spec_with_size_independent_from_gpu() { + let touch_width = 12345u32; + let touch_height = 54321u32; + let display_width = 1234u32; + let display_height = 5432u32; + let mut config = Config::default(); + config + .executable_path + .replace(Executable::Kernel(PathBuf::from("kernel"))); + set_argument( + &mut config, + "single-touch", + Some(&format!( + "/dev/single-touch-test:{}:{}", + touch_width, touch_height + )), + ) + .unwrap(); + set_argument( + &mut config, + "gpu", + Some(&format!( + "width={},height={}", + display_width, display_height + )), + ) + .unwrap(); + validate_arguments(&mut config).unwrap(); + assert_eq!( + config.virtio_single_touch.unwrap().get_size(), + (touch_width, touch_height) + ); + } } diff --git a/src/plugin/mod.rs b/src/plugin/mod.rs index adda9a3..1c7027f 100644 --- a/src/plugin/mod.rs +++ b/src/plugin/mod.rs @@ -517,6 +517,21 @@ pub fn run_vcpus( &vcpu, ); } + VcpuExit::HypervHcall { input, params } => { + let mut data = [0; 8]; + vcpu_plugin.hyperv_call(input, params, &mut data, &vcpu); + // Setting data for hyperv call can not fail. + let _ = vcpu.set_data(&data); + } + VcpuExit::HypervSynic { + msr, + control, + evt_page, + msg_page, + } => { + vcpu_plugin + .hyperv_synic(msr, control, evt_page, msg_page, &vcpu); + } VcpuExit::Hlt => break, VcpuExit::Shutdown => break, VcpuExit::InternalError => { diff --git a/src/plugin/vcpu.rs b/src/plugin/vcpu.rs index c623bda..3bb6bed 100644 --- a/src/plugin/vcpu.rs +++ b/src/plugin/vcpu.rs @@ -20,8 +20,8 @@ use assertions::const_assert; use data_model::DataInit; use kvm::{CpuId, Vcpu}; use kvm_sys::{ - kvm_debugregs, kvm_fpu, kvm_lapic_state, kvm_mp_state, kvm_msr_entry, kvm_msrs, kvm_regs, - kvm_sregs, kvm_vcpu_events, kvm_xcrs, KVM_CPUID_FLAG_SIGNIFCANT_INDEX, + kvm_debugregs, kvm_enable_cap, kvm_fpu, kvm_lapic_state, kvm_mp_state, kvm_msr_entry, kvm_msrs, + kvm_regs, kvm_sregs, kvm_vcpu_events, kvm_xcrs, KVM_CPUID_FLAG_SIGNIFCANT_INDEX, }; use protobuf::stream::CodedOutputStream; use protos::plugin::*; @@ -513,6 +513,54 @@ impl PluginVcpu { self.process(IoSpace::Mmio, addr, VcpuRunData::Write(data), vcpu) } + /// Has the plugin process handle a hyper-v call. + pub fn hyperv_call(&self, input: u64, params: [u64; 2], data: &mut [u8], vcpu: &Vcpu) -> bool { + let mut wait_reason = VcpuResponse_Wait::new(); + let hv = wait_reason.mut_hyperv_call(); + hv.input = input; + hv.params0 = params[0]; + hv.params1 = params[1]; + + self.wait_reason.set(Some(wait_reason)); + match self.handle_until_resume(vcpu) { + Ok(resume_data) => { + data.copy_from_slice(&resume_data); + true + } + Err(e) if e.errno() == EPIPE => false, + Err(e) => { + error!("failed to process hyperv call request: {}", e); + false + } + } + } + + /// Has the plugin process handle a synic config change. + pub fn hyperv_synic( + &self, + msr: u32, + control: u64, + evt_page: u64, + msg_page: u64, + vcpu: &Vcpu, + ) -> bool { + let mut wait_reason = VcpuResponse_Wait::new(); + let hv = wait_reason.mut_hyperv_synic(); + hv.msr = msr; + hv.control = control; + hv.evt_page = evt_page; + hv.msg_page = msg_page; + self.wait_reason.set(Some(wait_reason)); + match self.handle_until_resume(vcpu) { + Ok(_resume_data) => true, + Err(e) if e.errno() == EPIPE => false, + Err(e) => { + error!("failed to process hyperv synic request: {}", e); + false + } + } + } + fn handle_request(&self, vcpu: &Vcpu) -> SysResult<Option<Vec<u8>>> { let mut wait_reason = self.wait_reason.take(); let mut do_recv = true; @@ -595,6 +643,17 @@ impl PluginVcpu { response.mut_set_state(); let set_state = request.get_set_state(); set_vcpu_state(vcpu, set_state.set, set_state.get_state()) + } else if request.has_get_hyperv_cpuid() { + let cpuid_response = &mut response.mut_get_hyperv_cpuid().entries; + match vcpu.get_hyperv_cpuid() { + Ok(mut cpuid) => { + for entry in cpuid.mut_entries_slice() { + cpuid_response.push(cpuid_kvm_to_proto(entry)); + } + Ok(()) + } + Err(e) => Err(e), + } } else if request.has_get_msrs() { let entry_data = &mut response.mut_get_msrs().entry_data; let entry_indices = &request.get_get_msrs().entry_indices; @@ -664,6 +723,18 @@ impl PluginVcpu { cpuid_entry.edx = request_entry.edx; } vcpu.set_cpuid2(&cpuid) + } else if request.has_enable_capability() { + response.mut_enable_capability(); + let capability = request.get_enable_capability().capability; + if capability != kvm_sys::KVM_CAP_HYPERV_SYNIC + && capability != kvm_sys::KVM_CAP_HYPERV_SYNIC2 + { + Err(SysError::new(EINVAL)) + } else { + let mut cap: kvm_enable_cap = Default::default(); + cap.cap = capability; + vcpu.kvm_enable_cap(&cap) + } } else if request.has_shutdown() { return Err(SysError::new(EPIPE)); } else { diff --git a/sys_util/src/guest_memory.rs b/sys_util/src/guest_memory.rs index 6399f8c..2390b92 100644 --- a/sys_util/src/guest_memory.rs +++ b/sys_util/src/guest_memory.rs @@ -4,6 +4,7 @@ //! Track memory regions that are mapped to the guest VM. +use std::convert::AsRef; use std::convert::TryFrom; use std::fmt::{self, Display}; use std::os::unix::io::{AsRawFd, RawFd}; @@ -107,6 +108,12 @@ impl AsRawFd for GuestMemory { } } +impl AsRef<SharedMemory> for GuestMemory { + fn as_ref(&self) -> &SharedMemory { + &self.memfd + } +} + impl GuestMemory { /// Creates backing memfd for GuestMemory regions fn create_memfd(ranges: &[(GuestAddress, u64)]) -> Result<SharedMemory> { diff --git a/sys_util/src/syslog.rs b/sys_util/src/syslog.rs index 82b7137..232f046 100644 --- a/sys_util/src/syslog.rs +++ b/sys_util/src/syslog.rs @@ -610,6 +610,7 @@ mod tests { shm_unlink(shm_name.as_ptr()); let fd = shm_open(shm_name.as_ptr(), O_RDWR | O_CREAT | O_EXCL, 0666); assert!(fd >= 0, "error creating shared memory;"); + shm_unlink(shm_name.as_ptr()); File::from_raw_fd(fd) }; diff --git a/tests/plugin_enable_cap.c b/tests/plugin_enable_cap.c new file mode 100644 index 0000000..7ae416e --- /dev/null +++ b/tests/plugin_enable_cap.c @@ -0,0 +1,323 @@ +/* + * Copyright 2020 The Chromium OS Authors. All rights reserved. + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#include <errno.h> +#include <fcntl.h> +#include <linux/kvm.h> +#include <linux/memfd.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/syscall.h> +#include <unistd.h> + +#include "crosvm.h" + +#define KILL_ADDRESS 0x3f9 + +#ifndef F_LINUX_SPECIFIC_BASE +#define F_LINUX_SPECIFIC_BASE 1024 +#endif + +#ifndef F_ADD_SEALS +#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) +#endif + +#ifndef F_SEAL_SHRINK +#define F_SEAL_SHRINK 0x0002 +#endif + +const uint8_t code[] = { + // Set a non-zero value for HV_X64_MSR_GUEST_OS_ID + // to enable hypercalls. + + // mov edx, 0xffffffff + 0x66, 0xba, 0xff, 0xff, 0xff, 0xff, + + // mov eax, 0xffffffff + 0x66, 0xb8, 0xff, 0xff, 0xff, 0xff, + + // mov ecx, 0x40000000 # HV_X64_MSR_GUEST_OS_ID + 0x66, 0xb9, 0x00, 0x00, 0x00, 0x40, + + // wrmsr + 0x0f, 0x30, + + // Establish page at 0x2000 as the hypercall page. + + // mov edx, 0x00000000 + 0x66, 0xba, 0x00, 0x00, 0x00, 0x00, + + // mov eax, 0x00002001 # lowest bit is enable bit + 0x66, 0xb8, 0x01, 0x20, 0x00, 0x00, + + // mov ecx, 0x40000001 # HV_X64_MSR_HYPERCALL + 0x66, 0xb9, 0x01, 0x00, 0x00, 0x40, + + // wrmsr + 0x0f, 0x30, + + // We can't test generic hypercalls since they're + // defined to UD for processors running in real mode. + + // for HV_X64_MSR_CONTROL: + // edx:eax gets transferred as 'control' + + // mov edx, 0x05060708 + 0x66, 0xba, 0x08, 0x07, 0x06, 0x05, + + // mov eax, 0x01020304 + 0x66, 0xb8, 0x04, 0x03, 0x02, 0x01, + + // mov ecx, 0x40000080 # HV_X64_MSR_SCONTROL + 0x66, 0xb9, 0x80, 0x00, 0x00, 0x40, + + // wrmsr + 0x0f, 0x30, + + // Establish page at 0x3000 as the evt_page. + + // mov edx, 0x00000000 + 0x66, 0xba, 0x00, 0x00, 0x00, 0x00, + + // mov eax, 0x00003000 + 0x66, 0xb8, 0x00, 0x30, 0x00, 0x00, + + // mov ecx, 0x40000082 # HV_X64_MSR_SIEFP + 0x66, 0xb9, 0x82, 0x00, 0x00, 0x40, + + // wrmsr + 0x0f, 0x30, + + // Establish page at 0x4000 as the 'msg_page'. + + // mov edx, 0x00000000 + 0x66, 0xba, 0x00, 0x00, 0x00, 0x00, + + // mov eax, 0x00004000 + 0x66, 0xb8, 0x00, 0x40, 0x00, 0x00, + + // mov ecx, 0x40000083 # HV_X64_MSR_SIMP + 0x66, 0xb9, 0x83, 0x00, 0x00, 0x40, + + // wrmsr + 0x0f, 0x30, + + // Request a kill. + + // mov dx, 0x3f9 + 0xba, 0xf9, 0x03, + + // mov al, 0x1 + 0xb0, 0x01, + + // out dx, al + 0xee, + + // hlt + 0xf4 +}; + +int check_synic_access(struct crosvm_vcpu* vcpu, struct crosvm_vcpu_event *evt, + uint32_t msr, uint64_t control, uint64_t evt_page, + uint64_t msg_page, const char *phase) { + if (evt->kind != CROSVM_VCPU_EVENT_KIND_HYPERV_SYNIC) { + fprintf(stderr, "Got incorrect exit type before %s: %d\n", phase, + evt->kind); + return 1; + } + if (evt->hyperv_synic.msr != msr || + evt->hyperv_synic._reserved != 0 || + evt->hyperv_synic.control != control || + evt->hyperv_synic.evt_page != evt_page || + evt->hyperv_synic.msg_page != msg_page) { + fprintf(stderr, "Got unexpected synic message after %s: " + "0x%x vs 0x%x, 0x%lx vs 0x%lx, 0x%lx vs 0x%lx, " + "0x%lx vs 0x%lx\n", + phase, msr, evt->hyperv_synic.msr, + control, evt->hyperv_synic.control, + evt_page, evt->hyperv_synic.evt_page, + msg_page, evt->hyperv_synic.msg_page); + return 1; + } + + if (crosvm_vcpu_resume(vcpu) != 0) { + fprintf(stderr, "Failed to resume after %s\n", phase); + return 1; + } + + if (crosvm_vcpu_wait(vcpu, evt) != 0) { + fprintf(stderr, "Failed to wait after %s\n", phase); + return 1; + } + return 0; +} + +int main(int argc, char** argv) { + struct crosvm* crosvm = NULL; + uint64_t cap_args[4] = {0}; + + int ret = crosvm_connect(&crosvm); + if (ret) { + fprintf(stderr, "failed to connect to crosvm: %d\n", ret); + return 1; + } + + ret = crosvm_reserve_range(crosvm, CROSVM_ADDRESS_SPACE_IOPORT, + KILL_ADDRESS, 1); + if (ret) { + fprintf(stderr, "failed to reserve kill port: %d\n", ret); + return 1; + } + + // VM mem layout: + // null page, code page, hypercall page, synic evt_page, synic msg_page + int mem_size = 0x4000; + int mem_fd = syscall(SYS_memfd_create, "guest_mem", + MFD_CLOEXEC | MFD_ALLOW_SEALING); + if (mem_fd < 0) { + fprintf(stderr, "failed to create guest memfd: %d\n", errno); + return 1; + } + ret = ftruncate(mem_fd, mem_size); + if (ret) { + fprintf(stderr, "failed to set size of guest memory: %d\n", errno); + return 1; + } + uint8_t *mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_SHARED, + mem_fd, 0x0); + if (mem == MAP_FAILED) { + fprintf(stderr, "failed to mmap guest memory: %d\n", errno); + return 1; + } + fcntl(mem_fd, F_ADD_SEALS, F_SEAL_SHRINK); + memcpy(mem, code, sizeof(code)); + + // Before MSR verify hypercall page is zero + int i; + for (i = 0; i < 5; ++i) { + if (mem[0x1000 + i]) { + fprintf(stderr, "Hypercall page isn't zero\n"); + return 1; + } + } + + struct crosvm_memory *mem_obj; + ret = crosvm_create_memory(crosvm, mem_fd, 0x0, mem_size, 0x1000, + false, false, &mem_obj); + if (ret) { + fprintf(stderr, "failed to create memory in crosvm: %d\n", ret); + return 1; + } + + struct crosvm_vcpu* vcpu = NULL; + ret = crosvm_get_vcpu(crosvm, 0, &vcpu); + if (ret) { + fprintf(stderr, "failed to get vcpu #0: %d\n", ret); + return 1; + } + + ret = crosvm_start(crosvm); + if (ret) { + fprintf(stderr, "failed to start vm: %d\n", ret); + return 1; + } + + struct crosvm_vcpu_event evt = {0}; + ret = crosvm_vcpu_wait(vcpu, &evt); + if (ret) { + fprintf(stderr, "failed to wait for vm start: %d\n", ret); + return 1; + } + if (evt.kind != CROSVM_VCPU_EVENT_KIND_INIT) { + fprintf(stderr, "Got unexpected exit type: %d\n", evt.kind); + return 1; + } + + ret = crosvm_enable_capability(crosvm, 0, 0, cap_args); + if (ret != -EINVAL) { + fprintf(stderr, "Unexpected crosvm_enable_capability result: %d\n", + ret); + return 1; + } + + ret = crosvm_vcpu_enable_capability(vcpu, KVM_CAP_HYPERV_SYNIC, 0, + cap_args); + if (ret) { + fprintf(stderr, "crosvm_vcpu_enable_capability() failed: %d\n", ret); + return 1; + } + + { + struct kvm_sregs sregs = {0}; + crosvm_vcpu_get_sregs(vcpu, &sregs); + sregs.cs.base = 0; + sregs.cs.selector = 0; + sregs.es.base = 0; + sregs.es.selector = 0; + crosvm_vcpu_set_sregs(vcpu, &sregs); + + struct kvm_regs regs = {0}; + crosvm_vcpu_get_regs(vcpu, ®s); + regs.rip = 0x1000; + regs.rflags = 2; + crosvm_vcpu_set_regs(vcpu, ®s); + } + + if (crosvm_vcpu_resume(vcpu) != 0) { + fprintf(stderr, "Failed to resume after init\n"); + return 1; + } + + if (crosvm_vcpu_wait(vcpu, &evt) != 0) { + fprintf(stderr, "Failed to wait after init\n"); + return 1; + } + if (check_synic_access(vcpu, &evt, 0x40000080, 0x506070801020304, 0, 0, + "synic msg #1")) { + return 1; + } + + // After first MSR verify hypercall page is non-zero + uint8_t value = 0; + for (i = 0; i < 5; ++i) { + value |= mem[0x1000+i]; + } + if (value == 0) { + fprintf(stderr, "Hypercall page is still zero\n"); + return 1; + } + + if (check_synic_access(vcpu, &evt, 0x40000082, 0x506070801020304, 0x3000, + 0, "synic msg #2")) { + return 1; + } + + if (check_synic_access(vcpu, &evt, 0x40000083, 0x506070801020304, 0x3000, + 0x4000, "synic msg #3")) { + return 1; + } + + if (evt.kind != CROSVM_VCPU_EVENT_KIND_IO_ACCESS) { + fprintf(stderr, "Got incorrect exit type after synic #3: %d\n", + evt.kind); + return 1; + } + if (evt.io_access.address_space != CROSVM_ADDRESS_SPACE_IOPORT || + evt.io_access.address != KILL_ADDRESS || + !evt.io_access.is_write || + evt.io_access.length != 1 || + evt.io_access.data[0] != 1) { + fprintf(stderr, "Didn't see kill request from VM\n"); + return 1; + } + + fprintf(stderr, "Saw kill request from VM, exiting\n"); + + return 0; +} diff --git a/tests/plugin_supported_cpuid.c b/tests/plugin_supported_cpuid.c index 0acb134..7109ff3 100644 --- a/tests/plugin_supported_cpuid.c +++ b/tests/plugin_supported_cpuid.c @@ -12,56 +12,96 @@ #include "crosvm.h" -int main(int argc, char** argv) { - struct crosvm *crosvm; - int ret = crosvm_connect(&crosvm); - if (ret) { - fprintf(stderr, "failed to connect to crosvm: %d\n", ret); - return 1; - } +typedef int (*crosvm_function)(struct crosvm*, uint32_t, + struct kvm_cpuid_entry2*, uint32_t*); +typedef int (*vcpu_function)(struct crosvm_vcpu*, uint32_t, + struct kvm_cpuid_entry2*, uint32_t*); + +// Members of union should only differ by the pointer type of 1st arg. +union cpuid_function { + crosvm_function crosvm; + vcpu_function vcpu; +}; +int test_cpuid(void* crosvm, union cpuid_function funct, const char* name) { struct kvm_cpuid_entry2 cpuids[100]; - int n_entries; - ret = crosvm_get_supported_cpuid(crosvm, 1, cpuids, &n_entries); + int n_entries = 0; + int ret = funct.crosvm(crosvm, 1, cpuids, &n_entries); if (ret >= 0) { fprintf(stderr, - "expected crosvm_get_supported_cpuids to fail with E2BIG\n"); - return 1; + "expected %s to fail with E2BIG\n", name); + return ret; } - ret = crosvm_get_supported_cpuid(crosvm, 100, cpuids, &n_entries); + ret = funct.crosvm(crosvm, 100, cpuids, &n_entries); if (ret < 0) { - fprintf(stderr, - "unexpected failure of crosvm_get_supported_cpuids: %d\n", ret); - return 1; + if (ret != -EINVAL) { + fprintf(stderr, "unexpected failure of %s: %d\n", name, ret); + } else { + fprintf(stderr, + "Query of %s failed with EINVAL (may be expected)\n", + name, ret); + } + return ret; } if (n_entries <= 1) { fprintf(stderr, - "unexpected number of supported cpuid entries: %d\n", - n_entries); + "unexpected number of cpuid entries from %s: %d\n", + name, n_entries); return 1; } + return 0; +} - ret = crosvm_get_emulated_cpuid(crosvm, 1, cpuids, &n_entries); - if (ret >= 0) { - fprintf(stderr, - "expected crosvm_get_emulated_cpuids to fail with E2BIG\n"); +int main(int argc, char** argv) { + struct crosvm* crosvm = NULL; + int ret = crosvm_connect(&crosvm); + if (ret) { + fprintf(stderr, "failed to connect to crosvm: %d\n", ret); return 1; } - ret = crosvm_get_emulated_cpuid(crosvm, 100, cpuids, &n_entries); - if (ret < 0) { - fprintf(stderr, - "unexpected failure of crosvm_get_emulated_cpuid: %d\n", ret); + struct crosvm_vcpu* vcpu = NULL; + ret = crosvm_get_vcpu(crosvm, 0, &vcpu); + if (ret) { + fprintf(stderr, "failed to get vcpu #0: %d\n", ret); return 1; } - if (n_entries < 1) { - fprintf(stderr, - "unexpected number of emulated cpuid entries: %d\n", n_entries); + union cpuid_function funct; + funct.crosvm = crosvm_get_supported_cpuid; + if (test_cpuid(crosvm, funct, "crosvm_get_supported_cpuid")) { + return 1; + } + funct.crosvm = crosvm_get_emulated_cpuid; + if (test_cpuid(crosvm, funct, "crosvm_get_emulated_cpuid")) { + return 1; + } + + ret = crosvm_start(crosvm); + if (ret) { + fprintf(stderr, "failed to start vm: %d\n", ret); return 1; } + struct crosvm_vcpu_event evt = {0}; + ret = crosvm_vcpu_wait(vcpu, &evt); + if (ret) { + fprintf(stderr, "failed to wait for vm start: %d\n", ret); + return 1; + } + if (evt.kind != CROSVM_VCPU_EVENT_KIND_INIT) { + fprintf(stderr, "Got unexpected exit type: %d\n", evt.kind); + return 1; + } + + funct.vcpu = crosvm_get_hyperv_cpuid; + ret = test_cpuid(vcpu, funct, "crosvm_get_hyperv_cpuid"); + // Older kernels don't support and return EINVAL, so allow this for now. + if (ret && ret != -EINVAL) { + fprintf(stderr, "Ignoring failure of crosvm_get_hyperv_cpuid\n"); + return 1; + } return 0; } diff --git a/tests/plugin_vcpu_pause.c b/tests/plugin_vcpu_pause.c index ff69b04..010d0fa 100644 --- a/tests/plugin_vcpu_pause.c +++ b/tests/plugin_vcpu_pause.c @@ -13,8 +13,10 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <sys/eventfd.h> #include <sys/mman.h> #include <sys/syscall.h> +#include <sys/types.h> #include <time.h> #include <unistd.h> @@ -36,6 +38,7 @@ #define KILL_ADDRESS 0x3f9 static char g_serial_out[16]; +static int g_next_evt; static int g_kill_evt; static bool g_paused; @@ -70,7 +73,7 @@ static void *vcpu_thread_fn(void *arg) { /* Signal the main thread that init is done */ uint64_t dummy = 1; - write(g_kill_evt, &dummy, sizeof(dummy)); + write(g_next_evt, &dummy, sizeof(dummy)); } else if (evt.kind == CROSVM_VCPU_EVENT_KIND_IO_ACCESS && evt.io_access.address_space == CROSVM_ADDRESS_SPACE_IOPORT && @@ -85,7 +88,7 @@ static void *vcpu_thread_fn(void *arg) { else if (evt.kind == CROSVM_VCPU_EVENT_KIND_PAUSED) { /* Signal that we paused */ uint64_t dummy = 1; - write(g_kill_evt, &dummy, sizeof(dummy)); + write(g_next_evt, &dummy, sizeof(dummy)); /* Wait till we can continue again */ pthread_mutex_lock(&g_pause_mutex); @@ -101,7 +104,7 @@ static void *vcpu_thread_fn(void *arg) { } /* Signal that we are no longer paused */ - write(g_kill_evt, &dummy, sizeof(dummy)); + write(g_next_evt, &dummy, sizeof(dummy)); pthread_mutex_unlock(&g_pause_mutex); } @@ -147,6 +150,12 @@ int main(int argc, char** argv) { 0xf4 }; + g_next_evt = eventfd(0, 0); + if (g_next_evt == -1) { + fprintf(stderr, "failed to create eventfd: %d\n", errno); + return 1; + } + struct crosvm *crosvm; int ret = crosvm_connect(&crosvm); if (ret) { @@ -220,7 +229,7 @@ int main(int argc, char** argv) { /* Wait till VCPU thread tells us that its initialization is done */ uint64_t dummy; - read(g_kill_evt, &dummy, sizeof(dummy)); + read(g_next_evt, &dummy, sizeof(dummy)); ret = signal_pause(crosvm); if (ret) { @@ -229,7 +238,7 @@ int main(int argc, char** argv) { } /* Wait till VCPU thread tells us it is paused */ - read(g_kill_evt, &dummy, sizeof(dummy)); + read(g_next_evt, &dummy, sizeof(dummy)); /* Try pausing VCPUs 2nd time to make sure we do not deadlock */ ret = signal_pause(crosvm); @@ -241,7 +250,7 @@ int main(int argc, char** argv) { signal_unpause(crosvm, false); /* Wait until VCPU thread tells us that it is no longer paused */ - read(g_kill_evt, &dummy, sizeof(dummy)); + read(g_next_evt, &dummy, sizeof(dummy)); /* * Try pausing VCPUs 3rd time to see if we will miss pause @@ -255,9 +264,6 @@ int main(int argc, char** argv) { signal_unpause(crosvm, true); - /* Wait until VCPU thread tells us that it is no longer paused */ - read(g_kill_evt, &dummy, sizeof(dummy)); - /* Wait for crosvm to request that we exit otherwise we will be killed. */ read(g_kill_evt, &dummy, sizeof(dummy)); diff --git a/tests/plugins.rs b/tests/plugins.rs index d56f4ce..c45096f 100644 --- a/tests/plugins.rs +++ b/tests/plugins.rs @@ -261,6 +261,11 @@ fn test_supported_cpuid() { } #[test] +fn test_enable_cap() { + test_plugin(include_str!("plugin_enable_cap.c")); +} + +#[test] fn test_msr_index_list() { test_plugin(include_str!("plugin_msr_index_list.c")); } diff --git a/x86_64/Cargo.toml b/x86_64/Cargo.toml index e5d5e14..49ef53f 100644 --- a/x86_64/Cargo.toml +++ b/x86_64/Cargo.toml @@ -3,7 +3,6 @@ name = "x86_64" version = "0.1.0" authors = ["The Chromium OS Authors"] edition = "2018" -build = "build.rs" [dependencies] arch = { path = "../arch" } @@ -20,6 +19,5 @@ remain = "*" resources = { path = "../resources" } sync = { path = "../sync" } sys_util = { path = "../sys_util" } - -[build-dependencies] -cc = "=1.0.25" +acpi_tables = {path = "../acpi_tables" } +vm_control = { path = "../vm_control" } diff --git a/x86_64/build.rs b/x86_64/build.rs deleted file mode 100644 index 5f2c1eb..0000000 --- a/x86_64/build.rs +++ /dev/null @@ -1,7 +0,0 @@ -// Copyright 2018 The Chromium OS Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -fn main() { - cc::Build::new().file("host_cpuid.c").compile("host_cpuid"); -} diff --git a/x86_64/host_cpuid.c b/x86_64/host_cpuid.c deleted file mode 100644 index 3230c90..0000000 --- a/x86_64/host_cpuid.c +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright 2018 The Chromium OS Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include <stdint.h> - -void host_cpuid(uint32_t func, uint32_t func2, uint32_t *pEax, - uint32_t *pEbx, uint32_t *pEcx, uint32_t *pEdx) { - asm volatile("cpuid" : "=a"(*pEax), "=b"(*pEbx), "=c"(*pEcx), "=d"(*pEdx) : - "0"(func), "2"(func2) : "cc"); -} diff --git a/x86_64/src/acpi.rs b/x86_64/src/acpi.rs new file mode 100644 index 0000000..3600d16 --- /dev/null +++ b/x86_64/src/acpi.rs @@ -0,0 +1,233 @@ +// Copyright 2020 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +use acpi_tables::{rsdp::RSDP, sdt::SDT}; +use data_model::DataInit; +use sys_util::{GuestAddress, GuestMemory}; + +#[repr(C)] +#[derive(Clone, Copy, Default)] +struct LocalAPIC { + _type: u8, + _length: u8, + _processor_id: u8, + _apic_id: u8, + _flags: u32, +} + +// Safe as LocalAPIC structure only contains raw data +unsafe impl DataInit for LocalAPIC {} + +#[repr(C)] +#[derive(Clone, Copy, Default)] +struct IOAPIC { + _type: u8, + _length: u8, + _ioapic_id: u8, + _reserved: u8, + _apic_address: u32, + _gsi_base: u32, +} + +// Safe as IOAPIC structure only contains raw data +unsafe impl DataInit for IOAPIC {} + +const OEM_REVISION: u32 = 1; +//DSDT +const DSDT_REVISION: u8 = 6; +// FADT +const FADT_LEN: u32 = 276; +const FADT_REVISION: u8 = 6; +const FADT_MINOR_REVISION: u8 = 3; +// FADT flags +const FADT_POWER_BUTTON: u32 = (1 << 4); +const FADT_SLEEP_BUTTON: u32 = (1 << 5); +// FADT fields offset +const FADT_FIELD_SCI_INTERRUPT: usize = 46; +const FADT_FIELD_PM1A_EVENT_BLK_ADDR: usize = 56; +const FADT_FIELD_PM1A_CONTROL_BLK_ADDR: usize = 64; +const FADT_FIELD_PM1A_EVENT_BLK_LEN: usize = 88; +const FADT_FIELD_PM1A_CONTROL_BLK_LEN: usize = 89; +const FADT_FIELD_FLAGS: usize = 112; +const FADT_FIELD_MINOR_REVISION: usize = 131; +const FADT_FIELD_DSDT_ADDR: usize = 140; +const FADT_FIELD_HYPERVISOR_ID: usize = 268; +// MADT +const MADT_LEN: u32 = 44; +const MADT_REVISION: u8 = 5; +// MADT fields offset +const MADT_FIELD_LAPIC_ADDR: usize = 36; +// MADT types +const MADT_TYPE_LOCAL_APIC: u8 = 0; +const MADT_TYPE_IO_APIC: u8 = 1; +// MADT flags +const MADT_ENABLED: u32 = 1; +// XSDT +const XSDT_REVISION: u8 = 1; + +fn create_dsdt_table() -> SDT { + // The hex tables in this file are generated from the ASL below with: + // "iasl -tc <dsdt.asl>" + // Below is the tables represents by the pm_dsdt_data + // Name (_S1, Package (0x04) // _S1_: S1 System State + // { + // One, + // One, + // Zero, + // Zero + // }) + let pm_dsdt_data = [ + 0x08u8, 0x5F, 0x53, 0x31, 0x5f, 0x12, 0x06, 0x04, 0x01, 0x01, 0x00, 0x00, + ]; + + let mut dsdt = SDT::new( + *b"DSDT", + acpi_tables::HEADER_LEN, + DSDT_REVISION, + *b"CROSVM", + *b"CROSVMDT", + OEM_REVISION, + ); + dsdt.append(pm_dsdt_data); + + dsdt +} + +/// Create ACPI tables and return the RSDP. +/// The basic tables DSDT/FACP/MADT/XSDT are constructed in this function. +/// # Arguments +/// +/// * `guest_mem` - The guest memory where the tables will be stored. +/// * `num_cpus` - Used to construct the MADT. +/// * `sci_irq` - Used to fill the FACP SCI_INTERRUPT field, which +/// is going to be used by the ACPI drivers to register +/// sci handler. +pub fn create_acpi_tables(guest_mem: &GuestMemory, num_cpus: u8, sci_irq: u32) -> GuestAddress { + // RSDP is at the HI RSDP WINDOW + let rsdp_offset = GuestAddress(super::ACPI_HI_RSDP_WINDOW_BASE); + let mut tables: Vec<u64> = Vec::new(); + + // DSDT + let dsdt = create_dsdt_table(); + let dsdt_offset = rsdp_offset.checked_add(RSDP::len() as u64).unwrap(); + guest_mem + .write_at_addr(dsdt.as_slice(), dsdt_offset) + .expect("Error writing DSDT table"); + + // FACP aka FADT + // Revision 6 of the ACPI FADT table is 276 bytes long + let mut facp = SDT::new( + *b"FACP", + FADT_LEN, + FADT_REVISION, + *b"CROSVM", + *b"CROSVMDT", + OEM_REVISION, + ); + + let fadt_flags: u32 = FADT_POWER_BUTTON | FADT_SLEEP_BUTTON; // mask POWER and SLEEP BUTTON + facp.write(FADT_FIELD_FLAGS, fadt_flags); + + // SCI Interrupt + facp.write(FADT_FIELD_SCI_INTERRUPT, sci_irq as u16); + + // PM1A Event Block Address + facp.write( + FADT_FIELD_PM1A_EVENT_BLK_ADDR, + devices::acpi::ACPIPM_RESOURCE_BASE as u32, + ); + + // PM1A Control Block Address + facp.write( + FADT_FIELD_PM1A_CONTROL_BLK_ADDR, + devices::acpi::ACPIPM_RESOURCE_BASE as u32 + + devices::acpi::ACPIPM_RESOURCE_EVENTBLK_LEN as u32, + ); + + // PM1 Event Block Length + facp.write( + FADT_FIELD_PM1A_EVENT_BLK_LEN, + devices::acpi::ACPIPM_RESOURCE_EVENTBLK_LEN as u8, + ); + + // PM1 Control Block Length + facp.write( + FADT_FIELD_PM1A_CONTROL_BLK_LEN, + devices::acpi::ACPIPM_RESOURCE_CONTROLBLK_LEN as u8, + ); + + facp.write(FADT_FIELD_MINOR_REVISION, FADT_MINOR_REVISION); // FADT minor version + facp.write(FADT_FIELD_DSDT_ADDR, dsdt_offset.0 as u64); // X_DSDT + + facp.write(FADT_FIELD_HYPERVISOR_ID, *b"CROSVM"); // Hypervisor Vendor Identity + + let facp_offset = dsdt_offset.checked_add(dsdt.len() as u64).unwrap(); + guest_mem + .write_at_addr(facp.as_slice(), facp_offset) + .expect("Error writing FACP table"); + tables.push(facp_offset.0); + + // MADT + let mut madt = SDT::new( + *b"APIC", + MADT_LEN, + MADT_REVISION, + *b"CROSVM", + *b"CROSVMDT", + OEM_REVISION, + ); + madt.write( + MADT_FIELD_LAPIC_ADDR, + super::mptable::APIC_DEFAULT_PHYS_BASE as u32, + ); + + for cpu in 0..num_cpus { + let lapic = LocalAPIC { + _type: MADT_TYPE_LOCAL_APIC, + _length: std::mem::size_of::<LocalAPIC>() as u8, + _processor_id: cpu, + _apic_id: cpu, + _flags: MADT_ENABLED, + }; + madt.append(lapic); + } + + madt.append(IOAPIC { + _type: MADT_TYPE_IO_APIC, + _length: std::mem::size_of::<IOAPIC>() as u8, + _apic_address: super::mptable::IO_APIC_DEFAULT_PHYS_BASE, + ..Default::default() + }); + + let madt_offset = facp_offset.checked_add(facp.len() as u64).unwrap(); + guest_mem + .write_at_addr(madt.as_slice(), madt_offset) + .expect("Error writing MADT table"); + tables.push(madt_offset.0); + + // XSDT + let mut xsdt = SDT::new( + *b"XSDT", + acpi_tables::HEADER_LEN, + XSDT_REVISION, + *b"CROSVM", + *b"CROSVMDT", + OEM_REVISION, + ); + for table in tables { + xsdt.append(table); + } + + let xsdt_offset = madt_offset.checked_add(madt.len() as u64).unwrap(); + guest_mem + .write_at_addr(xsdt.as_slice(), xsdt_offset) + .expect("Error writing XSDT table"); + + // RSDP + let rsdp = RSDP::new(*b"CROSVM", xsdt_offset.0); + guest_mem + .write_at_addr(rsdp.as_slice(), rsdp_offset) + .expect("Error writing RSDP"); + + rsdp_offset +} diff --git a/x86_64/src/bootparam.rs b/x86_64/src/bootparam.rs index 33bd90a..fd8e1d5 100644 --- a/x86_64/src/bootparam.rs +++ b/x86_64/src/bootparam.rs @@ -401,7 +401,8 @@ pub struct boot_params { pub _pad2: [__u8; 4usize], pub tboot_addr: __u64, pub ist_info: ist_info, - pub _pad3: [__u8; 16usize], + pub acpi_rsdp_addr: __u64, + pub _pad3: [__u8; 8usize], pub hd0_info: [__u8; 16usize], pub hd1_info: [__u8; 16usize], pub sys_desc_table: sys_desc_table, diff --git a/x86_64/src/cpuid.rs b/x86_64/src/cpuid.rs index a42b662..46294b2 100644 --- a/x86_64/src/cpuid.rs +++ b/x86_64/src/cpuid.rs @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. +use std::arch::x86_64::{__cpuid, __cpuid_count}; use std::fmt::{self, Display}; use std::result; @@ -28,19 +29,6 @@ impl Display for Error { } } -// This function is implemented in C because stable rustc does not -// support inline assembly. -extern "C" { - fn host_cpuid( - func: u32, - func2: u32, - rEax: *mut u32, - rEbx: *mut u32, - rEcx: *mut u32, - rEdx: *mut u32, - ) -> (); -} - // CPUID bits in ebx, ecx, and edx. const EBX_CLFLUSH_CACHELINE: u32 = 8; // Flush a cache line size. const EBX_CLFLUSH_SIZE_SHIFT: u32 = 8; // Bytes flushed when executing CLFLUSH. @@ -77,25 +65,19 @@ fn filter_cpuid( } } 2 | 0x80000005 | 0x80000006 => unsafe { - host_cpuid( - entry.function, - 0, - &mut entry.eax as *mut u32, - &mut entry.ebx as *mut u32, - &mut entry.ecx as *mut u32, - &mut entry.edx as *mut u32, - ); + let result = __cpuid(entry.function); + entry.eax = result.eax; + entry.ebx = result.ebx; + entry.ecx = result.ecx; + entry.edx = result.edx; }, 4 => { unsafe { - host_cpuid( - entry.function, - entry.index, - &mut entry.eax as *mut u32, - &mut entry.ebx as *mut u32, - &mut entry.ecx as *mut u32, - &mut entry.edx as *mut u32, - ); + let result = __cpuid_count(entry.function, entry.index); + entry.eax = result.eax; + entry.ebx = result.ebx; + entry.ecx = result.ecx; + entry.edx = result.edx; } entry.eax &= !0xFC000000; } @@ -132,34 +114,12 @@ pub fn setup_cpuid(kvm: &kvm::Kvm, vcpu: &kvm::Vcpu, cpu_id: u64, nrcpus: u64) - /// get host cpu max physical address bits pub fn phy_max_address_bits() -> u32 { - let mut eax: u32 = 0; - let mut ebx: u32 = 0; - let mut ecx: u32 = 0; - let mut edx: u32 = 0; let mut phys_bits: u32 = 36; - unsafe { - host_cpuid( - 0x80000000, - 0, - &mut eax as *mut u32, - &mut ebx as *mut u32, - &mut ecx as *mut u32, - &mut edx as *mut u32, - ); - } - if eax >= 0x80000008 { - unsafe { - host_cpuid( - 0x80000008, - 0, - &mut eax as *mut u32, - &mut ebx as *mut u32, - &mut ecx as *mut u32, - &mut edx as *mut u32, - ); - } - phys_bits = eax & 0xff; + let highest_ext_function = unsafe { __cpuid(0x80000000) }; + if highest_ext_function.eax >= 0x80000008 { + let addr_size = unsafe { __cpuid(0x80000008) }; + phys_bits = addr_size.eax & 0xff; } phys_bits diff --git a/x86_64/src/lib.rs b/x86_64/src/lib.rs index 84f7dfd..a912edd 100644 --- a/x86_64/src/lib.rs +++ b/x86_64/src/lib.rs @@ -35,6 +35,7 @@ unsafe impl data_model::DataInit for mpspec::mpc_table {} unsafe impl data_model::DataInit for mpspec::mpc_lintsrc {} unsafe impl data_model::DataInit for mpspec::mpf_intel {} +mod acpi; mod bzimage; mod cpuid; mod gdt; @@ -54,23 +55,30 @@ use std::sync::Arc; use crate::bootparam::boot_params; use arch::{RunnableLinuxVm, VmComponents, VmImage}; -use devices::{get_serial_tty_string, PciConfigIo, PciDevice, PciInterruptPin, SerialParameters}; +use devices::split_irqchip_common::GsiRelay; +use devices::{ + get_serial_tty_string, Ioapic, PciConfigIo, PciDevice, PciInterruptPin, Pic, SerialParameters, + IOAPIC_BASE_ADDRESS, IOAPIC_MEM_LENGTH_BYTES, +}; use io_jail::Minijail; use kvm::*; use remain::sorted; use resources::SystemAllocator; use sync::Mutex; use sys_util::{Clock, EventFd, GuestAddress, GuestMemory, GuestMemoryError}; +use vm_control::VmIrqRequestSocket; #[sorted] #[derive(Debug)] pub enum Error { + AllocateIrq, CloneEventFd(sys_util::Error), Cmdline(kernel_cmdline::Error), ConfigureSystem, CreateDevices(Box<dyn StdError>), CreateEventFd(sys_util::Error), CreateFdt(arch::fdt::Error), + CreateIoapicDevice(sys_util::Error), CreateIrqChip(sys_util::Error), CreateKvm(sys_util::Error), CreatePciRoot(arch::DeviceRegistrationError), @@ -81,6 +89,7 @@ pub enum Error { CreateVcpu(sys_util::Error), CreateVm(sys_util::Error), E820Configuration, + EnableSplitIrqchip(sys_util::Error), KernelOffsetPastEnd, LoadBios(io::Error), LoadBzImage(bzimage::Error), @@ -111,12 +120,14 @@ impl Display for Error { #[sorted] match self { + AllocateIrq => write!(f, "error allocating a single irq"), CloneEventFd(e) => write!(f, "unable to clone an EventFd: {}", e), Cmdline(e) => write!(f, "the given kernel command line was invalid: {}", e), ConfigureSystem => write!(f, "error configuring the system"), CreateDevices(e) => write!(f, "error creating devices: {}", e), CreateEventFd(e) => write!(f, "unable to make an EventFd: {}", e), CreateFdt(e) => write!(f, "failed to create fdt: {}", e), + CreateIoapicDevice(e) => write!(f, "failed to create IOAPIC device: {}", e), CreateIrqChip(e) => write!(f, "failed to create irq chip: {}", e), CreateKvm(e) => write!(f, "failed to open /dev/kvm: {}", e), CreatePciRoot(e) => write!(f, "failed to create a PCI root hub: {}", e), @@ -127,6 +138,7 @@ impl Display for Error { CreateVcpu(e) => write!(f, "failed to create VCPU: {}", e), CreateVm(e) => write!(f, "failed to create VM: {}", e), E820Configuration => write!(f, "invalid e820 setup params"), + EnableSplitIrqchip(e) => write!(f, "failed to enable split irqchip: {}", e), KernelOffsetPastEnd => write!(f, "the kernel extends past the end of RAM"), LoadBios(e) => write!(f, "error loading bios: {}", e), LoadBzImage(e) => write!(f, "error loading kernel bzImage: {}", e), @@ -178,6 +190,7 @@ const CMDLINE_MAX_SIZE: u64 = KERNEL_START_OFFSET - CMDLINE_OFFSET; const X86_64_SERIAL_1_3_IRQ: u32 = 4; const X86_64_SERIAL_2_4_IRQ: u32 = 3; const X86_64_IRQ_BASE: u32 = 5; +const ACPI_HI_RSDP_WINDOW_BASE: u64 = 0x000E0000; fn configure_system( guest_mem: &GuestMemory, @@ -190,6 +203,7 @@ fn configure_system( setup_data: Option<GuestAddress>, initrd: Option<(GuestAddress, usize)>, mut params: boot_params, + sci_irq: u32, ) -> Result<()> { const EBDA_START: u64 = 0x0009fc00; const KERNEL_BOOT_FLAG_MAGIC: u16 = 0xaa55; @@ -252,6 +266,10 @@ fn configure_system( guest_mem .write_obj_at_addr(params, zero_page_addr) .map_err(|_| Error::ZeroPageSetup)?; + + let rsdp_addr = acpi::create_acpi_tables(guest_mem, num_cpus, sci_irq); + params.acpi_rsdp_addr = rsdp_addr.0; + Ok(()) } @@ -305,6 +323,7 @@ impl arch::LinuxArch for X8664arch { fn build_vm<F, E>( mut components: VmComponents, split_irqchip: bool, + ioapic_device_socket: VmIrqRequestSocket, serial_parameters: &BTreeMap<u8, SerialParameters>, serial_jail: Option<Minijail>, create_devices: F, @@ -353,23 +372,57 @@ impl arch::LinuxArch for X8664arch { let exit_evt = EventFd::new().map_err(Error::CreateEventFd)?; + let (split_irqchip, mut gsi_relay) = if split_irqchip { + let gsi_relay = GsiRelay::new(); + let pic = Arc::new(Mutex::new(Pic::new())); + let ioapic = Arc::new(Mutex::new( + Ioapic::new(&mut vm, ioapic_device_socket).map_err(Error::CreateIoapicDevice)?, + )); + mmio_bus + .insert( + ioapic.clone(), + IOAPIC_BASE_ADDRESS, + IOAPIC_MEM_LENGTH_BYTES, + false, + ) + .unwrap(); + (Some((pic, ioapic)), Some(gsi_relay)) + } else { + (None, None) + }; let pci_devices = create_devices(&mem, &mut vm, &mut resources, &exit_evt) .map_err(|e| Error::CreateDevices(Box::new(e)))?; - let (pci, pci_irqs, pid_debug_label_map) = - arch::generate_pci_root(pci_devices, &mut mmio_bus, &mut resources, &mut vm) - .map_err(Error::CreatePciRoot)?; + let (pci, pci_irqs, pid_debug_label_map) = arch::generate_pci_root( + pci_devices, + &mut gsi_relay, + &mut mmio_bus, + &mut resources, + &mut vm, + ) + .map_err(Error::CreatePciRoot)?; let pci_bus = Arc::new(Mutex::new(PciConfigIo::new(pci))); + // Event used to notify crosvm that guest OS is trying to suspend. + let suspend_evt = EventFd::new().map_err(Error::CreateEventFd)?; + // allocate sci_irq to fill the ACPI FACP table + let sci_irq = resources.allocate_irq().ok_or(Error::AllocateIrq)?; + let mut io_bus = Self::setup_io_bus( &mut vm, - split_irqchip, + &mut gsi_relay, exit_evt.try_clone().map_err(Error::CloneEventFd)?, Some(pci_bus.clone()), components.memory_size, + suspend_evt.try_clone().map_err(Error::CloneEventFd)?, )?; - let stdio_serial_num = - Self::setup_serial_devices(&mut vm, &mut io_bus, serial_parameters, serial_jail)?; + let stdio_serial_num = Self::setup_serial_devices( + &mut vm, + &mut io_bus, + &mut gsi_relay, + serial_parameters, + serial_jail, + )?; let ramoops_region = match components.pstore { Some(pstore) => Some( @@ -379,6 +432,25 @@ impl arch::LinuxArch for X8664arch { None => None, }; + let gsi_relay = if let Some((pic, ioapic)) = &split_irqchip { + io_bus.insert(pic.clone(), 0x20, 0x2, true).unwrap(); + io_bus.insert(pic.clone(), 0xa0, 0x2, true).unwrap(); + io_bus.insert(pic.clone(), 0x4d0, 0x2, true).unwrap(); + + let mut irq_num = resources.allocate_irq().unwrap(); + while irq_num < kvm::NUM_IOAPIC_PINS as u32 { + irq_num = resources.allocate_irq().unwrap(); + } + + // This will never fail because gsi_relay is Some iff split_irqchip is Some. + let gsi_relay = Arc::new(gsi_relay.unwrap()); + pic.lock().register_relay(gsi_relay.clone()); + ioapic.lock().register_relay(gsi_relay.clone()); + Some(gsi_relay) + } else { + None + }; + match components.vm_image { VmImage::Bios(ref mut bios) => Self::load_bios(&mem, bios)?, VmImage::Kernel(ref mut kernel_image) => { @@ -420,6 +492,7 @@ impl arch::LinuxArch for X8664arch { components.android_fstab, kernel_end, params, + sci_irq, )?; } } @@ -431,9 +504,12 @@ impl arch::LinuxArch for X8664arch { vcpus, vcpu_affinity, irq_chip, + split_irqchip, + gsi_relay, io_bus, mmio_bus, pid_debug_label_map, + suspend_evt, }) } } @@ -503,6 +579,7 @@ impl X8664arch { android_fstab: Option<File>, kernel_end: u64, params: boot_params, + sci_irq: u32, ) -> Result<()> { kernel_loader::load_cmdline(mem, GuestAddress(CMDLINE_OFFSET), cmdline) .map_err(Error::LoadCmdline)?; @@ -564,6 +641,7 @@ impl X8664arch { setup_data, initrd, params, + sci_irq, )?; Ok(()) } @@ -576,12 +654,28 @@ impl X8664arch { /// * `split_irqchip` - Whether to use a split IRQ chip. /// * `mem` - The memory to be used by the guest. fn create_vm(kvm: &Kvm, split_irqchip: bool, mem: GuestMemory) -> Result<Vm> { - let vm = Vm::new(&kvm, mem).map_err(Error::CreateVm)?; + let mut vm = Vm::new(&kvm, mem).map_err(Error::CreateVm)?; let tss_addr = GuestAddress(0xfffbd000); vm.set_tss_addr(tss_addr).map_err(Error::SetTssAddr)?; if !split_irqchip { vm.create_pit().map_err(Error::CreatePit)?; vm.create_irq_chip().map_err(Error::CreateIrqChip)?; + } else { + vm.enable_split_irqchip() + .map_err(Error::EnableSplitIrqchip)?; + for i in 0..kvm::NUM_IOAPIC_PINS { + // Add dummy MSI routes to replace the default IRQChip routes. + let route = IrqRoute { + gsi: i as u32, + source: IrqSource::Msi { + address: 0, + data: 0, + }, + }; + // Safe to ignore errors because errors are caused by the default routes and dummy + // MSI routes will always be registered. + let _ = vm.add_irq_route_entry(route); + } } Ok(vm) } @@ -650,15 +744,17 @@ impl X8664arch { /// # Arguments /// /// * - `vm` the vm object - /// * - `split_irqchip`: whether to use a split IRQ chip (i.e. userspace PIT/PIC/IOAPIC) + /// * - `gsi_relay`: only valid for split IRQ chip (i.e. userspace PIT/PIC/IOAPIC) /// * - `exit_evt` - the event fd object which should receive exit events /// * - `mem_size` - the size in bytes of physical ram for the guest + /// * - `suspend_evt` - the event fd object which used to suspend the vm fn setup_io_bus( - vm: &mut Vm, - split_irqchip: bool, + _vm: &mut Vm, + gsi_relay: &mut Option<GsiRelay>, exit_evt: EventFd, pci: Option<Arc<Mutex<devices::PciConfigIo>>>, mem_size: u64, + suspend_evt: EventFd, ) -> Result<devices::Bus> { struct NoDevice; impl devices::BusDevice for NoDevice { @@ -687,7 +783,7 @@ impl X8664arch { exit_evt.try_clone().map_err(Error::CloneEventFd)?, ))); - if split_irqchip { + if let Some(gsi_relay) = gsi_relay { let pit_evt = EventFd::new().map_err(Error::CreateEventFd)?; let pit = Arc::new(Mutex::new( devices::Pit::new( @@ -699,8 +795,7 @@ impl X8664arch { io_bus.insert(pit.clone(), 0x040, 0x8, true).unwrap(); io_bus.insert(pit.clone(), 0x061, 0x1, true).unwrap(); io_bus.insert(i8042, 0x062, 0x3, true).unwrap(); - vm.register_irqfd(&pit_evt, 0) - .map_err(Error::RegisterIrqfd)?; + gsi_relay.register_irqfd(pit_evt, 0); } else { io_bus .insert(nul_device.clone(), 0x040, 0x8, false) @@ -724,6 +819,17 @@ impl X8664arch { .unwrap(); } + let pm = Arc::new(Mutex::new(devices::ACPIPMResource::new(suspend_evt))); + io_bus + .insert( + pm.clone(), + devices::acpi::ACPIPM_RESOURCE_BASE, + devices::acpi::ACPIPM_RESOURCE_LEN as u64, + false, + ) + .unwrap(); + io_bus.notify_on_resume(pm); + Ok(io_bus) } @@ -734,10 +840,12 @@ impl X8664arch { /// /// * - `vm` the vm object /// * - `io_bus` the I/O bus to add the devices to + /// * - `gsi_relay`: only valid for split IRQ chip (i.e. userspace PIT/PIC/IOAPIC) /// * - `serial_parmaters` - definitions for how the serial devices should be configured fn setup_serial_devices( vm: &mut Vm, io_bus: &mut devices::Bus, + gsi_relay: &mut Option<GsiRelay>, serial_parameters: &BTreeMap<u8, SerialParameters>, serial_jail: Option<Minijail>, ) -> Result<Option<u8>> { @@ -753,10 +861,15 @@ impl X8664arch { ) .map_err(Error::CreateSerialDevices)?; - vm.register_irqfd(&com_evt_1_3, X86_64_SERIAL_1_3_IRQ) - .map_err(Error::RegisterIrqfd)?; - vm.register_irqfd(&com_evt_2_4, X86_64_SERIAL_2_4_IRQ) - .map_err(Error::RegisterIrqfd)?; + if let Some(gsi_relay) = gsi_relay { + gsi_relay.register_irqfd(com_evt_1_3, X86_64_SERIAL_1_3_IRQ as usize); + gsi_relay.register_irqfd(com_evt_2_4, X86_64_SERIAL_2_4_IRQ as usize); + } else { + vm.register_irqfd(&com_evt_1_3, X86_64_SERIAL_1_3_IRQ) + .map_err(Error::RegisterIrqfd)?; + vm.register_irqfd(&com_evt_2_4, X86_64_SERIAL_2_4_IRQ) + .map_err(Error::RegisterIrqfd)?; + } Ok(stdio_serial_num) } diff --git a/x86_64/src/mptable.rs b/x86_64/src/mptable.rs index 8b754bd..9aded3f 100644 --- a/x86_64/src/mptable.rs +++ b/x86_64/src/mptable.rs @@ -77,8 +77,10 @@ const MPC_OEM: [c_char; 8] = char_array!(c_char; 'C', 'R', 'O', 'S', 'V', 'M', ' const MPC_PRODUCT_ID: [c_char; 12] = ['0' as c_char; 12]; const BUS_TYPE_ISA: [u8; 6] = char_array!(u8; 'I', 'S', 'A', ' ', ' ', ' '); const BUS_TYPE_PCI: [u8; 6] = char_array!(u8; 'P', 'C', 'I', ' ', ' ', ' '); -const IO_APIC_DEFAULT_PHYS_BASE: u32 = 0xfec00000; // source: linux/arch/x86/include/asm/apicdef.h -const APIC_DEFAULT_PHYS_BASE: u32 = 0xfee00000; // source: linux/arch/x86/include/asm/apicdef.h +// source: linux/arch/x86/include/asm/apicdef.h +pub const IO_APIC_DEFAULT_PHYS_BASE: u32 = 0xfec00000; +// source: linux/arch/x86/include/asm/apicdef.h +pub const APIC_DEFAULT_PHYS_BASE: u32 = 0xfee00000; const APIC_VERSION: u8 = 0x14; const CPU_STEPPING: u32 = 0x600; const CPU_FEATURE_APIC: u32 = 0x200; |