summary refs log tree commit diff
path: root/disk/src
diff options
context:
space:
mode:
authorDaniel Verkamp <dverkamp@chromium.org>2019-12-17 17:04:58 -0800
committerCommit Bot <commit-bot@chromium.org>2020-01-06 21:26:38 +0000
commitf2eecc4152eca8d395566cffa2c102ec090a152d (patch)
tree9480039e894cdfc3eb47e7953d225d47323981d5 /disk/src
parent521cf5d6d3faad12671ba0f7eb5d6ed85fc20693 (diff)
downloadcrosvm-f2eecc4152eca8d395566cffa2c102ec090a152d.tar
crosvm-f2eecc4152eca8d395566cffa2c102ec090a152d.tar.gz
crosvm-f2eecc4152eca8d395566cffa2c102ec090a152d.tar.bz2
crosvm-f2eecc4152eca8d395566cffa2c102ec090a152d.tar.lz
crosvm-f2eecc4152eca8d395566cffa2c102ec090a152d.tar.xz
crosvm-f2eecc4152eca8d395566cffa2c102ec090a152d.tar.zst
crosvm-f2eecc4152eca8d395566cffa2c102ec090a152d.zip
disk: move qcow into disk crate
Move qcow from being its own crate into a module of the disk crate,
similar to the composite disk module.

This will allow use of qcow from disk and vice versa without introducing
a circular crate dependency.

BUG=None
TEST=./build_test.py
TEST=USE='asan fuzzer' emerge-nami crosvm

Change-Id: I77512bbe4b94faff1b5460f9796ee56505135580
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/1972477
Reviewed-by: Cody Schuffelen <schuffelen@google.com>
Reviewed-by: Zach Reizner <zachr@chromium.org>
Tested-by: kokoro <noreply+kokoro@google.com>
Commit-Queue: Daniel Verkamp <dverkamp@chromium.org>
Diffstat (limited to 'disk/src')
-rw-r--r--disk/src/disk.rs4
-rw-r--r--disk/src/qcow/mod.rs2486
-rw-r--r--disk/src/qcow/qcow_raw_file.rs148
-rw-r--r--disk/src/qcow/refcount.rs253
-rw-r--r--disk/src/qcow/vec_cache.rs185
5 files changed, 3075 insertions, 1 deletions
diff --git a/disk/src/disk.rs b/disk/src/disk.rs
index c520d13..e3e7e63 100644
--- a/disk/src/disk.rs
+++ b/disk/src/disk.rs
@@ -8,13 +8,15 @@ use std::fs::File;
 use std::io::{self, Read, Seek, SeekFrom, Write};
 
 use libc::EINVAL;
-use qcow::{QcowFile, QCOW_MAGIC};
 use remain::sorted;
 use sys_util::{
     AsRawFds, FileGetLen, FileReadWriteAtVolatile, FileSetLen, FileSync, PunchHole, SeekHole,
     WriteZeroesAt,
 };
 
+mod qcow;
+pub use qcow::{QcowFile, QCOW_MAGIC};
+
 #[cfg(feature = "composite-disk")]
 mod composite;
 #[cfg(feature = "composite-disk")]
diff --git a/disk/src/qcow/mod.rs b/disk/src/qcow/mod.rs
new file mode 100644
index 0000000..f449d1d
--- /dev/null
+++ b/disk/src/qcow/mod.rs
@@ -0,0 +1,2486 @@
+// Copyright 2018 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+mod qcow_raw_file;
+mod refcount;
+mod vec_cache;
+
+use data_model::{VolatileMemory, VolatileSlice};
+use libc::{EINVAL, ENOSPC, ENOTSUP};
+use remain::sorted;
+use sys_util::{
+    error, FileGetLen, FileReadWriteAtVolatile, FileReadWriteVolatile, FileSetLen, FileSync,
+    PunchHole, SeekHole, WriteZeroesAt,
+};
+
+use std::cmp::{max, min};
+use std::fmt::{self, Display};
+use std::fs::File;
+use std::io::{self, Read, Seek, SeekFrom, Write};
+use std::mem::size_of;
+use std::os::unix::io::{AsRawFd, RawFd};
+
+use crate::qcow::qcow_raw_file::QcowRawFile;
+use crate::qcow::refcount::RefCount;
+use crate::qcow::vec_cache::{CacheMap, Cacheable, VecCache};
+
+#[sorted]
+#[derive(Debug)]
+pub enum Error {
+    BackingFilesNotSupported,
+    CompressedBlocksNotSupported,
+    EvictingCache(io::Error),
+    FileTooBig(u64),
+    GettingFileSize(io::Error),
+    GettingRefcount(refcount::Error),
+    InvalidClusterIndex,
+    InvalidClusterSize,
+    InvalidIndex,
+    InvalidL1TableOffset,
+    InvalidL1TableSize(u32),
+    InvalidMagic,
+    InvalidOffset(u64),
+    InvalidRefcountTableOffset,
+    InvalidRefcountTableSize(u64),
+    NoFreeClusters,
+    NoRefcountClusters,
+    NotEnoughSpaceForRefcounts,
+    OpeningFile(io::Error),
+    ReadingHeader(io::Error),
+    ReadingPointers(io::Error),
+    ReadingRefCountBlock(refcount::Error),
+    ReadingRefCounts(io::Error),
+    RebuildingRefCounts(io::Error),
+    RefcountTableOffEnd,
+    RefcountTableTooLarge,
+    SeekingFile(io::Error),
+    SettingRefcountRefcount(io::Error),
+    SizeTooSmallForNumberOfClusters,
+    TooManyL1Entries(u64),
+    TooManyRefcounts(u64),
+    UnsupportedRefcountOrder,
+    UnsupportedVersion(u32),
+    WritingHeader(io::Error),
+}
+
+pub type Result<T> = std::result::Result<T, Error>;
+
+impl Display for Error {
+    #[remain::check]
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        use self::Error::*;
+
+        #[sorted]
+        match self {
+            BackingFilesNotSupported => write!(f, "backing files not supported"),
+            CompressedBlocksNotSupported => write!(f, "compressed blocks not supported"),
+            EvictingCache(e) => write!(f, "failed to evict cache: {}", e),
+            FileTooBig(size) => write!(
+                f,
+                "file larger than max of {}: {}",
+                MAX_QCOW_FILE_SIZE, size
+            ),
+            GettingFileSize(e) => write!(f, "failed to get file size: {}", e),
+            GettingRefcount(e) => write!(f, "failed to get refcount: {}", e),
+            InvalidClusterIndex => write!(f, "invalid cluster index"),
+            InvalidClusterSize => write!(f, "invalid cluster size"),
+            InvalidIndex => write!(f, "invalid index"),
+            InvalidL1TableOffset => write!(f, "invalid L1 table offset"),
+            InvalidL1TableSize(size) => write!(f, "invalid L1 table size {}", size),
+            InvalidMagic => write!(f, "invalid magic"),
+            InvalidOffset(_) => write!(f, "invalid offset"),
+            InvalidRefcountTableOffset => write!(f, "invalid refcount table offset"),
+            InvalidRefcountTableSize(size) => write!(f, "invalid refcount table size: {}", size),
+            NoFreeClusters => write!(f, "no free clusters"),
+            NoRefcountClusters => write!(f, "no refcount clusters"),
+            NotEnoughSpaceForRefcounts => write!(f, "not enough space for refcounts"),
+            OpeningFile(e) => write!(f, "failed to open file: {}", e),
+            ReadingHeader(e) => write!(f, "failed to read header: {}", e),
+            ReadingPointers(e) => write!(f, "failed to read pointers: {}", e),
+            ReadingRefCountBlock(e) => write!(f, "failed to read ref count block: {}", e),
+            ReadingRefCounts(e) => write!(f, "failed to read ref counts: {}", e),
+            RebuildingRefCounts(e) => write!(f, "failed to rebuild ref counts: {}", e),
+            RefcountTableOffEnd => write!(f, "refcount table offset past file end"),
+            RefcountTableTooLarge => write!(f, "too many clusters specified for refcount table"),
+            SeekingFile(e) => write!(f, "failed to seek file: {}", e),
+            SettingRefcountRefcount(e) => write!(f, "failed to set refcount refcount: {}", e),
+            SizeTooSmallForNumberOfClusters => write!(f, "size too small for number of clusters"),
+            TooManyL1Entries(count) => write!(f, "l1 entry table too large: {}", count),
+            TooManyRefcounts(count) => write!(f, "ref count table too large: {}", count),
+            UnsupportedRefcountOrder => write!(f, "unsupported refcount order"),
+            UnsupportedVersion(v) => write!(f, "unsupported version: {}", v),
+            WritingHeader(e) => write!(f, "failed to write header: {}", e),
+        }
+    }
+}
+
+// Maximum data size supported.
+const MAX_QCOW_FILE_SIZE: u64 = 0x01 << 44; // 16 TB.
+
+// QCOW magic constant that starts the header.
+pub const QCOW_MAGIC: u32 = 0x5146_49fb;
+// Default to a cluster size of 2^DEFAULT_CLUSTER_BITS
+const DEFAULT_CLUSTER_BITS: u32 = 16;
+// Limit clusters to reasonable sizes. Choose the same limits as qemu. Making the clusters smaller
+// increases the amount of overhead for book keeping.
+const MIN_CLUSTER_BITS: u32 = 9;
+const MAX_CLUSTER_BITS: u32 = 21;
+// The L1 and RefCount table are kept in RAM, only handle files that require less than 35M entries.
+// This easily covers 1 TB files. When support for bigger files is needed the assumptions made to
+// keep these tables in RAM needs to be thrown out.
+const MAX_RAM_POINTER_TABLE_SIZE: u64 = 35_000_000;
+// Only support 2 byte refcounts, 2^refcount_order bits.
+const DEFAULT_REFCOUNT_ORDER: u32 = 4;
+
+const V3_BARE_HEADER_SIZE: u32 = 104;
+
+// bits 0-8 and 56-63 are reserved.
+const L1_TABLE_OFFSET_MASK: u64 = 0x00ff_ffff_ffff_fe00;
+const L2_TABLE_OFFSET_MASK: u64 = 0x00ff_ffff_ffff_fe00;
+// Flags
+const COMPRESSED_FLAG: u64 = 1 << 62;
+const CLUSTER_USED_FLAG: u64 = 1 << 63;
+const COMPATIBLE_FEATURES_LAZY_REFCOUNTS: u64 = 1 << 0;
+
+/// Contains the information from the header of a qcow file.
+#[derive(Copy, Clone, Debug)]
+pub struct QcowHeader {
+    pub magic: u32,
+    pub version: u32,
+
+    pub backing_file_offset: u64,
+    pub backing_file_size: u32,
+
+    pub cluster_bits: u32,
+    pub size: u64,
+    pub crypt_method: u32,
+
+    pub l1_size: u32,
+    pub l1_table_offset: u64,
+
+    pub refcount_table_offset: u64,
+    pub refcount_table_clusters: u32,
+
+    pub nb_snapshots: u32,
+    pub snapshots_offset: u64,
+
+    // v3 entries
+    pub incompatible_features: u64,
+    pub compatible_features: u64,
+    pub autoclear_features: u64,
+    pub refcount_order: u32,
+    pub header_size: u32,
+}
+
+// Reads the next u16 from the file.
+fn read_u16_from_file(mut f: &File) -> Result<u16> {
+    let mut value = [0u8; 2];
+    (&mut f)
+        .read_exact(&mut value)
+        .map_err(Error::ReadingHeader)?;
+    Ok(u16::from_be_bytes(value))
+}
+
+// Reads the next u32 from the file.
+fn read_u32_from_file(mut f: &File) -> Result<u32> {
+    let mut value = [0u8; 4];
+    (&mut f)
+        .read_exact(&mut value)
+        .map_err(Error::ReadingHeader)?;
+    Ok(u32::from_be_bytes(value))
+}
+
+// Reads the next u64 from the file.
+fn read_u64_from_file(mut f: &File) -> Result<u64> {
+    let mut value = [0u8; 8];
+    (&mut f)
+        .read_exact(&mut value)
+        .map_err(Error::ReadingHeader)?;
+    Ok(u64::from_be_bytes(value))
+}
+
+impl QcowHeader {
+    /// Creates a QcowHeader from a reference to a file.
+    pub fn new(f: &mut File) -> Result<QcowHeader> {
+        f.seek(SeekFrom::Start(0)).map_err(Error::ReadingHeader)?;
+
+        let magic = read_u32_from_file(f)?;
+        if magic != QCOW_MAGIC {
+            return Err(Error::InvalidMagic);
+        }
+
+        Ok(QcowHeader {
+            magic,
+            version: read_u32_from_file(f)?,
+            backing_file_offset: read_u64_from_file(f)?,
+            backing_file_size: read_u32_from_file(f)?,
+            cluster_bits: read_u32_from_file(f)?,
+            size: read_u64_from_file(f)?,
+            crypt_method: read_u32_from_file(f)?,
+            l1_size: read_u32_from_file(f)?,
+            l1_table_offset: read_u64_from_file(f)?,
+            refcount_table_offset: read_u64_from_file(f)?,
+            refcount_table_clusters: read_u32_from_file(f)?,
+            nb_snapshots: read_u32_from_file(f)?,
+            snapshots_offset: read_u64_from_file(f)?,
+            incompatible_features: read_u64_from_file(f)?,
+            compatible_features: read_u64_from_file(f)?,
+            autoclear_features: read_u64_from_file(f)?,
+            refcount_order: read_u32_from_file(f)?,
+            header_size: read_u32_from_file(f)?,
+        })
+    }
+
+    /// Create a header for the given `size`.
+    pub fn create_for_size(size: u64) -> QcowHeader {
+        let cluster_bits: u32 = DEFAULT_CLUSTER_BITS;
+        let cluster_size: u32 = 0x01 << cluster_bits;
+        // L2 blocks are always one cluster long. They contain cluster_size/sizeof(u64) addresses.
+        let l2_size: u32 = cluster_size / size_of::<u64>() as u32;
+        let num_clusters: u32 = div_round_up_u64(size, u64::from(cluster_size)) as u32;
+        let num_l2_clusters: u32 = div_round_up_u32(num_clusters, l2_size);
+        let l1_clusters: u32 = div_round_up_u32(num_l2_clusters, cluster_size);
+        let header_clusters = div_round_up_u32(size_of::<QcowHeader>() as u32, cluster_size);
+        QcowHeader {
+            magic: QCOW_MAGIC,
+            version: 3,
+            backing_file_offset: 0,
+            backing_file_size: 0,
+            cluster_bits: DEFAULT_CLUSTER_BITS,
+            size,
+            crypt_method: 0,
+            l1_size: num_l2_clusters,
+            l1_table_offset: u64::from(cluster_size),
+            // The refcount table is after l1 + header.
+            refcount_table_offset: u64::from(cluster_size * (l1_clusters + 1)),
+            refcount_table_clusters: {
+                // Pre-allocate enough clusters for the entire refcount table as it must be
+                // continuous in the file. Allocate enough space to refcount all clusters, including
+                // the refcount clusters.
+                let max_refcount_clusters = max_refcount_clusters(
+                    DEFAULT_REFCOUNT_ORDER,
+                    cluster_size,
+                    num_clusters + l1_clusters + num_l2_clusters + header_clusters,
+                ) as u32;
+                // The refcount table needs to store the offset of each refcount cluster.
+                div_round_up_u32(
+                    max_refcount_clusters * size_of::<u64>() as u32,
+                    cluster_size,
+                )
+            },
+            nb_snapshots: 0,
+            snapshots_offset: 0,
+            incompatible_features: 0,
+            compatible_features: 0,
+            autoclear_features: 0,
+            refcount_order: DEFAULT_REFCOUNT_ORDER,
+            header_size: V3_BARE_HEADER_SIZE,
+        }
+    }
+
+    /// Write the header to `file`.
+    pub fn write_to<F: Write + Seek>(&self, file: &mut F) -> Result<()> {
+        // Writes the next u32 to the file.
+        fn write_u32_to_file<F: Write>(f: &mut F, value: u32) -> Result<()> {
+            f.write_all(&value.to_be_bytes())
+                .map_err(Error::WritingHeader)
+        }
+
+        // Writes the next u64 to the file.
+        fn write_u64_to_file<F: Write>(f: &mut F, value: u64) -> Result<()> {
+            f.write_all(&value.to_be_bytes())
+                .map_err(Error::WritingHeader)
+        }
+
+        write_u32_to_file(file, self.magic)?;
+        write_u32_to_file(file, self.version)?;
+        write_u64_to_file(file, self.backing_file_offset)?;
+        write_u32_to_file(file, self.backing_file_size)?;
+        write_u32_to_file(file, self.cluster_bits)?;
+        write_u64_to_file(file, self.size)?;
+        write_u32_to_file(file, self.crypt_method)?;
+        write_u32_to_file(file, self.l1_size)?;
+        write_u64_to_file(file, self.l1_table_offset)?;
+        write_u64_to_file(file, self.refcount_table_offset)?;
+        write_u32_to_file(file, self.refcount_table_clusters)?;
+        write_u32_to_file(file, self.nb_snapshots)?;
+        write_u64_to_file(file, self.snapshots_offset)?;
+        write_u64_to_file(file, self.incompatible_features)?;
+        write_u64_to_file(file, self.compatible_features)?;
+        write_u64_to_file(file, self.autoclear_features)?;
+        write_u32_to_file(file, self.refcount_order)?;
+        write_u32_to_file(file, self.header_size)?;
+
+        // Set the file length by seeking and writing a zero to the last byte. This avoids needing
+        // a `File` instead of anything that implements seek as the `file` argument.
+        // Zeros out the l1 and refcount table clusters.
+        let cluster_size = 0x01u64 << self.cluster_bits;
+        let refcount_blocks_size = u64::from(self.refcount_table_clusters) * cluster_size;
+        file.seek(SeekFrom::Start(
+            self.refcount_table_offset + refcount_blocks_size - 2,
+        ))
+        .map_err(Error::WritingHeader)?;
+        file.write(&[0u8]).map_err(Error::WritingHeader)?;
+
+        Ok(())
+    }
+}
+
+fn max_refcount_clusters(refcount_order: u32, cluster_size: u32, num_clusters: u32) -> u64 {
+    // Use u64 as the product of the u32 inputs can overflow.
+    let refcount_bytes = (0x01 << refcount_order as u64) / 8;
+    let for_data = div_round_up_u64(num_clusters as u64 * refcount_bytes, cluster_size as u64);
+    let for_refcounts = div_round_up_u64(for_data * refcount_bytes, cluster_size as u64);
+    for_data + for_refcounts
+}
+
+/// Represents a qcow2 file. This is a sparse file format maintained by the qemu project.
+/// Full documentation of the format can be found in the qemu repository.
+///
+/// # Example
+///
+/// ```
+/// # use std::io::{Read, Seek, SeekFrom};
+/// # use disk::QcowFile;
+/// # fn test(file: std::fs::File) -> std::io::Result<()> {
+///     let mut q = QcowFile::from(file).expect("Can't open qcow file");
+///     let mut buf = [0u8; 12];
+///     q.seek(SeekFrom::Start(10 as u64))?;
+///     q.read(&mut buf[..])?;
+/// #   Ok(())
+/// # }
+/// ```
+#[derive(Debug)]
+pub struct QcowFile {
+    raw_file: QcowRawFile,
+    header: QcowHeader,
+    l1_table: VecCache<u64>,
+    l2_entries: u64,
+    l2_cache: CacheMap<VecCache<u64>>,
+    refcounts: RefCount,
+    current_offset: u64,
+    unref_clusters: Vec<u64>, // List of freshly unreferenced clusters.
+    // List of unreferenced clusters available to be used. unref clusters become available once the
+    // removal of references to them have been synced to disk.
+    avail_clusters: Vec<u64>,
+    //TODO(dgreid) Add support for backing files. - backing_file: Option<Box<QcowFile<T>>>,
+}
+
+impl QcowFile {
+    /// Creates a QcowFile from `file`. File must be a valid qcow2 image.
+    pub fn from(mut file: File) -> Result<QcowFile> {
+        let header = QcowHeader::new(&mut file)?;
+
+        // Only v3 files are supported.
+        if header.version != 3 {
+            return Err(Error::UnsupportedVersion(header.version));
+        }
+
+        // Make sure that the L1 table fits in RAM.
+        if u64::from(header.l1_size) > MAX_RAM_POINTER_TABLE_SIZE {
+            return Err(Error::InvalidL1TableSize(header.l1_size));
+        }
+
+        let cluster_bits: u32 = header.cluster_bits;
+        if cluster_bits < MIN_CLUSTER_BITS || cluster_bits > MAX_CLUSTER_BITS {
+            return Err(Error::InvalidClusterSize);
+        }
+        let cluster_size = 0x01u64 << cluster_bits;
+
+        // Limit the total size of the disk.
+        if header.size > MAX_QCOW_FILE_SIZE {
+            return Err(Error::FileTooBig(header.size));
+        }
+
+        // No current support for backing files.
+        if header.backing_file_offset != 0 {
+            return Err(Error::BackingFilesNotSupported);
+        }
+
+        // Only support two byte refcounts.
+        let refcount_bits: u64 = 0x01u64
+            .checked_shl(header.refcount_order)
+            .ok_or(Error::UnsupportedRefcountOrder)?;
+        if refcount_bits != 16 {
+            return Err(Error::UnsupportedRefcountOrder);
+        }
+        let refcount_bytes = (refcount_bits + 7) / 8;
+
+        // Need at least one refcount cluster
+        if header.refcount_table_clusters == 0 {
+            return Err(Error::NoRefcountClusters);
+        }
+        offset_is_cluster_boundary(header.backing_file_offset, header.cluster_bits)?;
+        offset_is_cluster_boundary(header.l1_table_offset, header.cluster_bits)?;
+        offset_is_cluster_boundary(header.snapshots_offset, header.cluster_bits)?;
+        // refcount table must be a cluster boundary, and within the file's virtual or actual size.
+        offset_is_cluster_boundary(header.refcount_table_offset, header.cluster_bits)?;
+        let file_size = file.metadata().map_err(Error::GettingFileSize)?.len();
+        if header.refcount_table_offset > max(file_size, header.size) {
+            return Err(Error::RefcountTableOffEnd);
+        }
+
+        // The first cluster should always have a non-zero refcount, so if it is 0,
+        // this is an old file with broken refcounts, which requires a rebuild.
+        let mut refcount_rebuild_required = true;
+        file.seek(SeekFrom::Start(header.refcount_table_offset))
+            .map_err(Error::SeekingFile)?;
+        let first_refblock_addr = read_u64_from_file(&file)?;
+        if first_refblock_addr != 0 {
+            file.seek(SeekFrom::Start(first_refblock_addr))
+                .map_err(Error::SeekingFile)?;
+            let first_cluster_refcount = read_u16_from_file(&file)?;
+            if first_cluster_refcount != 0 {
+                refcount_rebuild_required = false;
+            }
+        }
+
+        if (header.compatible_features & COMPATIBLE_FEATURES_LAZY_REFCOUNTS) != 0 {
+            refcount_rebuild_required = true;
+        }
+
+        let mut raw_file =
+            QcowRawFile::from(file, cluster_size).ok_or(Error::InvalidClusterSize)?;
+        if refcount_rebuild_required {
+            QcowFile::rebuild_refcounts(&mut raw_file, header)?;
+        }
+
+        let l2_size = cluster_size / size_of::<u64>() as u64;
+        let num_clusters = div_round_up_u64(header.size, cluster_size);
+        let num_l2_clusters = div_round_up_u64(num_clusters, l2_size);
+        let l1_clusters = div_round_up_u64(num_l2_clusters, cluster_size);
+        let header_clusters = div_round_up_u64(size_of::<QcowHeader>() as u64, cluster_size);
+        if num_l2_clusters > MAX_RAM_POINTER_TABLE_SIZE {
+            return Err(Error::TooManyL1Entries(num_l2_clusters));
+        }
+        let l1_table = VecCache::from_vec(
+            raw_file
+                .read_pointer_table(
+                    header.l1_table_offset,
+                    num_l2_clusters,
+                    Some(L1_TABLE_OFFSET_MASK),
+                )
+                .map_err(Error::ReadingHeader)?,
+        );
+
+        let num_clusters = div_round_up_u64(header.size, cluster_size);
+        let refcount_clusters = max_refcount_clusters(
+            header.refcount_order,
+            cluster_size as u32,
+            (num_clusters + l1_clusters + num_l2_clusters + header_clusters) as u32,
+        );
+        // Check that the given header doesn't have a suspiciously sized refcount table.
+        if u64::from(header.refcount_table_clusters) > 2 * refcount_clusters {
+            return Err(Error::RefcountTableTooLarge);
+        }
+        if l1_clusters + refcount_clusters > MAX_RAM_POINTER_TABLE_SIZE {
+            return Err(Error::TooManyRefcounts(refcount_clusters));
+        }
+        let refcount_block_entries = cluster_size / refcount_bytes;
+        let refcounts = RefCount::new(
+            &mut raw_file,
+            header.refcount_table_offset,
+            refcount_clusters,
+            refcount_block_entries,
+            cluster_size,
+        )
+        .map_err(Error::ReadingRefCounts)?;
+
+        let l2_entries = cluster_size / size_of::<u64>() as u64;
+
+        let mut qcow = QcowFile {
+            raw_file,
+            header,
+            l1_table,
+            l2_entries,
+            l2_cache: CacheMap::new(100),
+            refcounts,
+            current_offset: 0,
+            unref_clusters: Vec::new(),
+            avail_clusters: Vec::new(),
+        };
+
+        // Check that the L1 and refcount tables fit in a 64bit address space.
+        qcow.header
+            .l1_table_offset
+            .checked_add(qcow.l1_address_offset(qcow.virtual_size()))
+            .ok_or(Error::InvalidL1TableOffset)?;
+        qcow.header
+            .refcount_table_offset
+            .checked_add(u64::from(qcow.header.refcount_table_clusters) * cluster_size)
+            .ok_or(Error::InvalidRefcountTableOffset)?;
+
+        qcow.find_avail_clusters()?;
+
+        Ok(qcow)
+    }
+
+    /// Creates a new QcowFile at the given path.
+    pub fn new(mut file: File, virtual_size: u64) -> Result<QcowFile> {
+        let header = QcowHeader::create_for_size(virtual_size);
+        file.seek(SeekFrom::Start(0)).map_err(Error::SeekingFile)?;
+        header.write_to(&mut file)?;
+
+        let mut qcow = Self::from(file)?;
+
+        // Set the refcount for each refcount table cluster.
+        let cluster_size = 0x01u64 << qcow.header.cluster_bits;
+        let refcount_table_base = qcow.header.refcount_table_offset as u64;
+        let end_cluster_addr =
+            refcount_table_base + u64::from(qcow.header.refcount_table_clusters) * cluster_size;
+
+        let mut cluster_addr = 0;
+        while cluster_addr < end_cluster_addr {
+            let mut unref_clusters = qcow
+                .set_cluster_refcount(cluster_addr, 1)
+                .map_err(Error::SettingRefcountRefcount)?;
+            qcow.unref_clusters.append(&mut unref_clusters);
+            cluster_addr += cluster_size;
+        }
+
+        Ok(qcow)
+    }
+
+    /// Returns the `QcowHeader` for this file.
+    pub fn header(&self) -> &QcowHeader {
+        &self.header
+    }
+
+    /// Returns the L1 lookup table for this file. This is only useful for debugging.
+    pub fn l1_table(&self) -> &[u64] {
+        &self.l1_table.get_values()
+    }
+
+    /// Returns an L2_table of cluster addresses, only used for debugging.
+    pub fn l2_table(&mut self, l1_index: usize) -> Result<Option<&[u64]>> {
+        let l2_addr_disk = *self.l1_table.get(l1_index).ok_or(Error::InvalidIndex)?;
+
+        if l2_addr_disk == 0 {
+            // Reading from an unallocated cluster will return zeros.
+            return Ok(None);
+        }
+
+        if !self.l2_cache.contains_key(&l1_index) {
+            // Not in the cache.
+            let table = VecCache::from_vec(
+                Self::read_l2_cluster(&mut self.raw_file, l2_addr_disk)
+                    .map_err(Error::ReadingPointers)?,
+            );
+            let l1_table = &self.l1_table;
+            let raw_file = &mut self.raw_file;
+            self.l2_cache
+                .insert(l1_index, table, |index, evicted| {
+                    raw_file.write_pointer_table(
+                        l1_table[index],
+                        evicted.get_values(),
+                        CLUSTER_USED_FLAG,
+                    )
+                })
+                .map_err(Error::EvictingCache)?;
+        }
+
+        // The index must exist as it was just inserted if it didn't already.
+        Ok(Some(self.l2_cache.get(&l1_index).unwrap().get_values()))
+    }
+
+    /// Returns the refcount table for this file. This is only useful for debugging.
+    pub fn ref_table(&self) -> &[u64] {
+        &self.refcounts.ref_table()
+    }
+
+    /// Returns the `index`th refcount block from the file.
+    pub fn refcount_block(&mut self, index: usize) -> Result<Option<&[u16]>> {
+        self.refcounts
+            .refcount_block(&mut self.raw_file, index)
+            .map_err(Error::ReadingRefCountBlock)
+    }
+
+    /// Returns the first cluster in the file with a 0 refcount. Used for testing.
+    pub fn first_zero_refcount(&mut self) -> Result<Option<u64>> {
+        let file_size = self
+            .raw_file
+            .file_mut()
+            .metadata()
+            .map_err(Error::GettingFileSize)?
+            .len();
+        let cluster_size = 0x01u64 << self.header.cluster_bits;
+
+        let mut cluster_addr = 0;
+        while cluster_addr < file_size {
+            let cluster_refcount = self
+                .refcounts
+                .get_cluster_refcount(&mut self.raw_file, cluster_addr)
+                .map_err(Error::GettingRefcount)?;
+            if cluster_refcount == 0 {
+                return Ok(Some(cluster_addr));
+            }
+            cluster_addr += cluster_size;
+        }
+        Ok(None)
+    }
+
+    fn find_avail_clusters(&mut self) -> Result<()> {
+        let cluster_size = self.raw_file.cluster_size();
+
+        let file_size = self
+            .raw_file
+            .file_mut()
+            .metadata()
+            .map_err(Error::GettingFileSize)?
+            .len();
+
+        for i in (0..file_size).step_by(cluster_size as usize) {
+            let refcount = self
+                .refcounts
+                .get_cluster_refcount(&mut self.raw_file, i)
+                .map_err(Error::GettingRefcount)?;
+            if refcount == 0 {
+                self.avail_clusters.push(i);
+            }
+        }
+
+        Ok(())
+    }
+
+    /// Rebuild the reference count tables.
+    fn rebuild_refcounts(raw_file: &mut QcowRawFile, header: QcowHeader) -> Result<()> {
+        fn add_ref(refcounts: &mut [u16], cluster_size: u64, cluster_address: u64) -> Result<()> {
+            let idx = (cluster_address / cluster_size) as usize;
+            if idx >= refcounts.len() {
+                return Err(Error::InvalidClusterIndex);
+            }
+            refcounts[idx] += 1;
+            Ok(())
+        }
+
+        // Add a reference to the first cluster (header plus extensions).
+        fn set_header_refcount(refcounts: &mut [u16], cluster_size: u64) -> Result<()> {
+            add_ref(refcounts, cluster_size, 0)
+        }
+
+        // Add references to the L1 table clusters.
+        fn set_l1_refcounts(
+            refcounts: &mut [u16],
+            header: QcowHeader,
+            cluster_size: u64,
+        ) -> Result<()> {
+            let l1_clusters = div_round_up_u64(header.l1_size as u64, cluster_size);
+            let l1_table_offset = header.l1_table_offset;
+            for i in 0..l1_clusters {
+                add_ref(refcounts, cluster_size, l1_table_offset + i * cluster_size)?;
+            }
+            Ok(())
+        }
+
+        // Traverse the L1 and L2 tables to find all reachable data clusters.
+        fn set_data_refcounts(
+            refcounts: &mut [u16],
+            header: QcowHeader,
+            cluster_size: u64,
+            raw_file: &mut QcowRawFile,
+        ) -> Result<()> {
+            let l1_table = raw_file
+                .read_pointer_table(
+                    header.l1_table_offset,
+                    header.l1_size as u64,
+                    Some(L1_TABLE_OFFSET_MASK),
+                )
+                .map_err(Error::ReadingPointers)?;
+            for l1_index in 0..header.l1_size as usize {
+                let l2_addr_disk = *l1_table.get(l1_index).ok_or(Error::InvalidIndex)?;
+                if l2_addr_disk != 0 {
+                    // Add a reference to the L2 table cluster itself.
+                    add_ref(refcounts, cluster_size, l2_addr_disk)?;
+
+                    // Read the L2 table and find all referenced data clusters.
+                    let l2_table = raw_file
+                        .read_pointer_table(
+                            l2_addr_disk,
+                            cluster_size / size_of::<u64>() as u64,
+                            Some(L2_TABLE_OFFSET_MASK),
+                        )
+                        .map_err(Error::ReadingPointers)?;
+                    for data_cluster_addr in l2_table {
+                        if data_cluster_addr != 0 {
+                            add_ref(refcounts, cluster_size, data_cluster_addr)?;
+                        }
+                    }
+                }
+            }
+
+            Ok(())
+        }
+
+        // Add references to the top-level refcount table clusters.
+        fn set_refcount_table_refcounts(
+            refcounts: &mut [u16],
+            header: QcowHeader,
+            cluster_size: u64,
+        ) -> Result<()> {
+            let refcount_table_offset = header.refcount_table_offset;
+            for i in 0..header.refcount_table_clusters as u64 {
+                add_ref(
+                    refcounts,
+                    cluster_size,
+                    refcount_table_offset + i * cluster_size,
+                )?;
+            }
+            Ok(())
+        }
+
+        // Allocate clusters for refblocks.
+        // This needs to be done last so that we have the correct refcounts for all other
+        // clusters.
+        fn alloc_refblocks(
+            refcounts: &mut [u16],
+            cluster_size: u64,
+            refblock_clusters: u64,
+            pointers_per_cluster: u64,
+        ) -> Result<Vec<u64>> {
+            let refcount_table_entries = div_round_up_u64(refblock_clusters, pointers_per_cluster);
+            let mut ref_table = vec![0; refcount_table_entries as usize];
+            let mut first_free_cluster: u64 = 0;
+            for refblock_addr in &mut ref_table {
+                loop {
+                    if first_free_cluster >= refcounts.len() as u64 {
+                        return Err(Error::NotEnoughSpaceForRefcounts);
+                    }
+                    if refcounts[first_free_cluster as usize] == 0 {
+                        break;
+                    }
+                    first_free_cluster += 1;
+                }
+
+                *refblock_addr = first_free_cluster * cluster_size;
+                add_ref(refcounts, cluster_size, *refblock_addr)?;
+
+                first_free_cluster += 1;
+            }
+
+            Ok(ref_table)
+        }
+
+        // Write the updated reference count blocks and reftable.
+        fn write_refblocks(
+            refcounts: &[u16],
+            mut header: QcowHeader,
+            ref_table: &[u64],
+            raw_file: &mut QcowRawFile,
+            refcount_block_entries: u64,
+        ) -> Result<()> {
+            // Rewrite the header with lazy refcounts enabled while we are rebuilding the tables.
+            header.compatible_features |= COMPATIBLE_FEATURES_LAZY_REFCOUNTS;
+            raw_file
+                .file_mut()
+                .seek(SeekFrom::Start(0))
+                .map_err(Error::SeekingFile)?;
+            header.write_to(raw_file.file_mut())?;
+
+            for (i, refblock_addr) in ref_table.iter().enumerate() {
+                // Write a block of refcounts to the location indicated by refblock_addr.
+                let refblock_start = i * (refcount_block_entries as usize);
+                let refblock_end = min(
+                    refcounts.len(),
+                    refblock_start + refcount_block_entries as usize,
+                );
+                let refblock = &refcounts[refblock_start..refblock_end];
+                raw_file
+                    .write_refcount_block(*refblock_addr, refblock)
+                    .map_err(Error::WritingHeader)?;
+
+                // If this is the last (partial) cluster, pad it out to a full refblock cluster.
+                if refblock.len() < refcount_block_entries as usize {
+                    let refblock_padding =
+                        vec![0u16; refcount_block_entries as usize - refblock.len()];
+                    raw_file
+                        .write_refcount_block(
+                            *refblock_addr + refblock.len() as u64 * 2,
+                            &refblock_padding,
+                        )
+                        .map_err(Error::WritingHeader)?;
+                }
+            }
+
+            // Rewrite the top-level refcount table.
+            raw_file
+                .write_pointer_table(header.refcount_table_offset, &ref_table, 0)
+                .map_err(Error::WritingHeader)?;
+
+            // Rewrite the header again, now with lazy refcounts disabled.
+            header.compatible_features &= !COMPATIBLE_FEATURES_LAZY_REFCOUNTS;
+            raw_file
+                .file_mut()
+                .seek(SeekFrom::Start(0))
+                .map_err(Error::SeekingFile)?;
+            header.write_to(raw_file.file_mut())?;
+
+            Ok(())
+        }
+
+        let cluster_size = raw_file.cluster_size();
+
+        let file_size = raw_file
+            .file_mut()
+            .metadata()
+            .map_err(Error::GettingFileSize)?
+            .len();
+
+        let refcount_bits = 1u64 << header.refcount_order;
+        let refcount_bytes = div_round_up_u64(refcount_bits, 8);
+        let refcount_block_entries = cluster_size / refcount_bytes;
+        let pointers_per_cluster = cluster_size / size_of::<u64>() as u64;
+        let data_clusters = div_round_up_u64(header.size, cluster_size);
+        let l2_clusters = div_round_up_u64(data_clusters, pointers_per_cluster);
+        let l1_clusters = div_round_up_u64(l2_clusters, cluster_size);
+        let header_clusters = div_round_up_u64(size_of::<QcowHeader>() as u64, cluster_size);
+        let max_clusters = data_clusters + l2_clusters + l1_clusters + header_clusters;
+        let mut max_valid_cluster_index = max_clusters;
+        let refblock_clusters = div_round_up_u64(max_valid_cluster_index, refcount_block_entries);
+        let reftable_clusters = div_round_up_u64(refblock_clusters, pointers_per_cluster);
+        // Account for refblocks and the ref table size needed to address them.
+        let refblocks_for_refs = div_round_up_u64(
+            refblock_clusters + reftable_clusters,
+            refcount_block_entries,
+        );
+        let reftable_clusters_for_refs =
+            div_round_up_u64(refblocks_for_refs, refcount_block_entries);
+        max_valid_cluster_index += refblock_clusters + reftable_clusters;
+        max_valid_cluster_index += refblocks_for_refs + reftable_clusters_for_refs;
+
+        if max_valid_cluster_index > MAX_RAM_POINTER_TABLE_SIZE {
+            return Err(Error::InvalidRefcountTableSize(max_valid_cluster_index));
+        }
+
+        let max_valid_cluster_offset = max_valid_cluster_index * cluster_size;
+        if max_valid_cluster_offset < file_size - cluster_size {
+            return Err(Error::InvalidRefcountTableSize(max_valid_cluster_offset));
+        }
+
+        let mut refcounts = vec![0; max_valid_cluster_index as usize];
+
+        // Find all references clusters and rebuild refcounts.
+        set_header_refcount(&mut refcounts, cluster_size)?;
+        set_l1_refcounts(&mut refcounts, header, cluster_size)?;
+        set_data_refcounts(&mut refcounts, header, cluster_size, raw_file)?;
+        set_refcount_table_refcounts(&mut refcounts, header, cluster_size)?;
+
+        // Allocate clusters to store the new reference count blocks.
+        let ref_table = alloc_refblocks(
+            &mut refcounts,
+            cluster_size,
+            refblock_clusters,
+            pointers_per_cluster,
+        )?;
+
+        // Write updated reference counts and point the reftable at them.
+        write_refblocks(
+            &refcounts,
+            header,
+            &ref_table,
+            raw_file,
+            refcount_block_entries,
+        )
+    }
+
+    // Limits the range so that it doesn't exceed the virtual size of the file.
+    fn limit_range_file(&self, address: u64, count: usize) -> usize {
+        if address.checked_add(count as u64).is_none() || address > self.virtual_size() {
+            return 0;
+        }
+        min(count as u64, self.virtual_size() - address) as usize
+    }
+
+    // Limits the range so that it doesn't overflow the end of a cluster.
+    fn limit_range_cluster(&self, address: u64, count: usize) -> usize {
+        let offset: u64 = self.raw_file.cluster_offset(address);
+        let limit = self.raw_file.cluster_size() - offset;
+        min(count as u64, limit) as usize
+    }
+
+    // Gets the maximum virtual size of this image.
+    fn virtual_size(&self) -> u64 {
+        self.header.size
+    }
+
+    // Gets the offset of `address` in the L1 table.
+    fn l1_address_offset(&self, address: u64) -> u64 {
+        let l1_index = self.l1_table_index(address);
+        l1_index * size_of::<u64>() as u64
+    }
+
+    // Gets the offset of `address` in the L1 table.
+    fn l1_table_index(&self, address: u64) -> u64 {
+        (address / self.raw_file.cluster_size()) / self.l2_entries
+    }
+
+    // Gets the offset of `address` in the L2 table.
+    fn l2_table_index(&self, address: u64) -> u64 {
+        (address / self.raw_file.cluster_size()) % self.l2_entries
+    }
+
+    // Gets the offset of the given guest address in the host file. If L1, L2, or data clusters have
+    // yet to be allocated, return None.
+    fn file_offset_read(&mut self, address: u64) -> std::io::Result<Option<u64>> {
+        if address >= self.virtual_size() as u64 {
+            return Err(std::io::Error::from_raw_os_error(EINVAL));
+        }
+
+        let l1_index = self.l1_table_index(address) as usize;
+        let l2_addr_disk = *self
+            .l1_table
+            .get(l1_index)
+            .ok_or(std::io::Error::from_raw_os_error(EINVAL))?;
+
+        if l2_addr_disk == 0 {
+            // Reading from an unallocated cluster will return zeros.
+            return Ok(None);
+        }
+
+        let l2_index = self.l2_table_index(address) as usize;
+
+        if !self.l2_cache.contains_key(&l1_index) {
+            // Not in the cache.
+            let table =
+                VecCache::from_vec(Self::read_l2_cluster(&mut self.raw_file, l2_addr_disk)?);
+
+            let l1_table = &self.l1_table;
+            let raw_file = &mut self.raw_file;
+            self.l2_cache.insert(l1_index, table, |index, evicted| {
+                raw_file.write_pointer_table(
+                    l1_table[index],
+                    evicted.get_values(),
+                    CLUSTER_USED_FLAG,
+                )
+            })?;
+        };
+
+        let cluster_addr = self.l2_cache.get(&l1_index).unwrap()[l2_index];
+        if cluster_addr == 0 {
+            return Ok(None);
+        }
+        Ok(Some(cluster_addr + self.raw_file.cluster_offset(address)))
+    }
+
+    // Gets the offset of the given guest address in the host file. If L1, L2, or data clusters need
+    // to be allocated, they will be.
+    fn file_offset_write(&mut self, address: u64) -> std::io::Result<u64> {
+        if address >= self.virtual_size() as u64 {
+            return Err(std::io::Error::from_raw_os_error(EINVAL));
+        }
+
+        let l1_index = self.l1_table_index(address) as usize;
+        let l2_addr_disk = *self
+            .l1_table
+            .get(l1_index)
+            .ok_or(std::io::Error::from_raw_os_error(EINVAL))?;
+        let l2_index = self.l2_table_index(address) as usize;
+
+        let mut set_refcounts = Vec::new();
+
+        if !self.l2_cache.contains_key(&l1_index) {
+            // Not in the cache.
+            let l2_table = if l2_addr_disk == 0 {
+                // Allocate a new cluster to store the L2 table and update the L1 table to point
+                // to the new table.
+                let new_addr: u64 = self.get_new_cluster()?;
+                // The cluster refcount starts at one meaning it is used but doesn't need COW.
+                set_refcounts.push((new_addr, 1));
+                self.l1_table[l1_index] = new_addr;
+                VecCache::new(self.l2_entries as usize)
+            } else {
+                VecCache::from_vec(Self::read_l2_cluster(&mut self.raw_file, l2_addr_disk)?)
+            };
+            let l1_table = &self.l1_table;
+            let raw_file = &mut self.raw_file;
+            self.l2_cache.insert(l1_index, l2_table, |index, evicted| {
+                raw_file.write_pointer_table(
+                    l1_table[index],
+                    evicted.get_values(),
+                    CLUSTER_USED_FLAG,
+                )
+            })?;
+        }
+
+        let cluster_addr = match self.l2_cache.get(&l1_index).unwrap()[l2_index] {
+            0 => {
+                // Need to allocate a data cluster
+                let cluster_addr = self.append_data_cluster()?;
+                self.update_cluster_addr(l1_index, l2_index, cluster_addr, &mut set_refcounts)?;
+                cluster_addr
+            }
+            a => a,
+        };
+
+        for (addr, count) in set_refcounts {
+            let mut newly_unref = self.set_cluster_refcount(addr, count)?;
+            self.unref_clusters.append(&mut newly_unref);
+        }
+
+        Ok(cluster_addr + self.raw_file.cluster_offset(address))
+    }
+
+    // Updates the l1 and l2 tables to point to the new `cluster_addr`.
+    fn update_cluster_addr(
+        &mut self,
+        l1_index: usize,
+        l2_index: usize,
+        cluster_addr: u64,
+        set_refcounts: &mut Vec<(u64, u16)>,
+    ) -> io::Result<()> {
+        if !self.l2_cache.get(&l1_index).unwrap().dirty() {
+            // Free the previously used cluster if one exists. Modified tables are always
+            // witten to new clusters so the L1 table can be committed to disk after they
+            // are and L1 never points at an invalid table.
+            // The index must be valid from when it was insterted.
+            let addr = self.l1_table[l1_index];
+            if addr != 0 {
+                self.unref_clusters.push(addr);
+                set_refcounts.push((addr, 0));
+            }
+
+            // Allocate a new cluster to store the L2 table and update the L1 table to point
+            // to the new table. The cluster will be written when the cache is flushed, no
+            // need to copy the data now.
+            let new_addr: u64 = self.get_new_cluster()?;
+            // The cluster refcount starts at one indicating it is used but doesn't need
+            // COW.
+            set_refcounts.push((new_addr, 1));
+            self.l1_table[l1_index] = new_addr;
+        }
+        // 'unwrap' is OK because it was just added.
+        self.l2_cache.get_mut(&l1_index).unwrap()[l2_index] = cluster_addr;
+        Ok(())
+    }
+
+    // Allocate a new cluster and return its offset within the raw file.
+    fn get_new_cluster(&mut self) -> std::io::Result<u64> {
+        // First use a pre allocated cluster if one is available.
+        if let Some(free_cluster) = self.avail_clusters.pop() {
+            self.raw_file.zero_cluster(free_cluster)?;
+            return Ok(free_cluster);
+        }
+
+        let max_valid_cluster_offset = self.refcounts.max_valid_cluster_offset();
+        if let Some(new_cluster) = self.raw_file.add_cluster_end(max_valid_cluster_offset)? {
+            Ok(new_cluster)
+        } else {
+            error!("No free clusters in get_new_cluster()");
+            Err(std::io::Error::from_raw_os_error(ENOSPC))
+        }
+    }
+
+    // Allocate and initialize a new data cluster. Returns the offset of the
+    // cluster in to the file on success.
+    fn append_data_cluster(&mut self) -> std::io::Result<u64> {
+        let new_addr: u64 = self.get_new_cluster()?;
+        // The cluster refcount starts at one indicating it is used but doesn't need COW.
+        let mut newly_unref = self.set_cluster_refcount(new_addr, 1)?;
+        self.unref_clusters.append(&mut newly_unref);
+        Ok(new_addr)
+    }
+
+    // Returns true if the cluster containing `address` is already allocated.
+    fn cluster_allocated(&mut self, address: u64) -> std::io::Result<bool> {
+        if address >= self.virtual_size() as u64 {
+            return Err(std::io::Error::from_raw_os_error(EINVAL));
+        }
+
+        let l1_index = self.l1_table_index(address) as usize;
+        let l2_addr_disk = *self
+            .l1_table
+            .get(l1_index)
+            .ok_or(std::io::Error::from_raw_os_error(EINVAL))?;
+        let l2_index = self.l2_table_index(address) as usize;
+
+        if l2_addr_disk == 0 {
+            // The whole L2 table for this address is not allocated yet,
+            // so the cluster must also be unallocated.
+            return Ok(false);
+        }
+
+        if !self.l2_cache.contains_key(&l1_index) {
+            // Not in the cache.
+            let table =
+                VecCache::from_vec(Self::read_l2_cluster(&mut self.raw_file, l2_addr_disk)?);
+            let l1_table = &self.l1_table;
+            let raw_file = &mut self.raw_file;
+            self.l2_cache.insert(l1_index, table, |index, evicted| {
+                raw_file.write_pointer_table(
+                    l1_table[index],
+                    evicted.get_values(),
+                    CLUSTER_USED_FLAG,
+                )
+            })?;
+        }
+
+        let cluster_addr = self.l2_cache.get(&l1_index).unwrap()[l2_index];
+        // If cluster_addr != 0, the cluster is allocated.
+        Ok(cluster_addr != 0)
+    }
+
+    // Find the first guest address greater than or equal to `address` whose allocation state
+    // matches `allocated`.
+    fn find_allocated_cluster(
+        &mut self,
+        address: u64,
+        allocated: bool,
+    ) -> std::io::Result<Option<u64>> {
+        let size = self.virtual_size();
+        if address >= size {
+            return Ok(None);
+        }
+
+        // If offset is already within a hole, return it.
+        if self.cluster_allocated(address)? == allocated {
+            return Ok(Some(address));
+        }
+
+        // Skip to the next cluster boundary.
+        let cluster_size = self.raw_file.cluster_size();
+        let mut cluster_addr = (address / cluster_size + 1) * cluster_size;
+
+        // Search for clusters with the desired allocation state.
+        while cluster_addr < size {
+            if self.cluster_allocated(cluster_addr)? == allocated {
+                return Ok(Some(cluster_addr));
+            }
+            cluster_addr += cluster_size;
+        }
+
+        Ok(None)
+    }
+
+    // Deallocate the storage for the cluster starting at `address`.
+    // Any future reads of this cluster will return all zeroes.
+    fn deallocate_cluster(&mut self, address: u64) -> std::io::Result<()> {
+        if address >= self.virtual_size() as u64 {
+            return Err(std::io::Error::from_raw_os_error(EINVAL));
+        }
+
+        let l1_index = self.l1_table_index(address) as usize;
+        let l2_addr_disk = *self
+            .l1_table
+            .get(l1_index)
+            .ok_or(std::io::Error::from_raw_os_error(EINVAL))?;
+        let l2_index = self.l2_table_index(address) as usize;
+
+        if l2_addr_disk == 0 {
+            // The whole L2 table for this address is not allocated yet,
+            // so the cluster must also be unallocated.
+            return Ok(());
+        }
+
+        if !self.l2_cache.contains_key(&l1_index) {
+            // Not in the cache.
+            let table =
+                VecCache::from_vec(Self::read_l2_cluster(&mut self.raw_file, l2_addr_disk)?);
+            let l1_table = &self.l1_table;
+            let raw_file = &mut self.raw_file;
+            self.l2_cache.insert(l1_index, table, |index, evicted| {
+                raw_file.write_pointer_table(
+                    l1_table[index],
+                    evicted.get_values(),
+                    CLUSTER_USED_FLAG,
+                )
+            })?;
+        }
+
+        let cluster_addr = self.l2_cache.get(&l1_index).unwrap()[l2_index];
+        if cluster_addr == 0 {
+            // This cluster is already unallocated; nothing to do.
+            return Ok(());
+        }
+
+        // Decrement the refcount.
+        let refcount = self
+            .refcounts
+            .get_cluster_refcount(&mut self.raw_file, cluster_addr)
+            .map_err(|_| std::io::Error::from_raw_os_error(EINVAL))?;
+        if refcount == 0 {
+            return Err(std::io::Error::from_raw_os_error(EINVAL));
+        }
+
+        let new_refcount = refcount - 1;
+        let mut newly_unref = self.set_cluster_refcount(cluster_addr, new_refcount)?;
+        self.unref_clusters.append(&mut newly_unref);
+
+        // Rewrite the L2 entry to remove the cluster mapping.
+        // unwrap is safe as we just checked/inserted this entry.
+        self.l2_cache.get_mut(&l1_index).unwrap()[l2_index] = 0;
+
+        if new_refcount == 0 {
+            let cluster_size = self.raw_file.cluster_size();
+            // This cluster is no longer in use; deallocate the storage.
+            // The underlying FS may not support FALLOC_FL_PUNCH_HOLE,
+            // so don't treat an error as fatal.  Future reads will return zeros anyways.
+            let _ = self
+                .raw_file
+                .file_mut()
+                .punch_hole(cluster_addr, cluster_size);
+            self.unref_clusters.push(cluster_addr);
+        }
+        Ok(())
+    }
+
+    // Deallocate the storage for `length` bytes starting at `address`.
+    // Any future reads of this range will return all zeroes.
+    fn deallocate_bytes(&mut self, address: u64, length: usize) -> std::io::Result<()> {
+        let write_count: usize = self.limit_range_file(address, length);
+
+        let mut nwritten: usize = 0;
+        while nwritten < write_count {
+            let curr_addr = address + nwritten as u64;
+            let count = self.limit_range_cluster(curr_addr, write_count - nwritten);
+
+            if count == self.raw_file.cluster_size() as usize {
+                // Full cluster - deallocate the storage.
+                self.deallocate_cluster(curr_addr)?;
+            } else {
+                // Partial cluster - zero out the relevant bytes if it was allocated.
+                // Any space in unallocated clusters can be left alone, since
+                // unallocated clusters already read back as zeroes.
+                if let Some(offset) = self.file_offset_read(curr_addr)? {
+                    // Partial cluster - zero it out.
+                    self.raw_file
+                        .file_mut()
+                        .write_zeroes_all_at(offset, count)?;
+                }
+            }
+
+            nwritten += count;
+        }
+        Ok(())
+    }
+
+    // Reads an L2 cluster from the disk, returning an error if the file can't be read or if any
+    // cluster is compressed.
+    fn read_l2_cluster(raw_file: &mut QcowRawFile, cluster_addr: u64) -> std::io::Result<Vec<u64>> {
+        let file_values = raw_file.read_pointer_cluster(cluster_addr, None)?;
+        if file_values.iter().any(|entry| entry & COMPRESSED_FLAG != 0) {
+            return Err(std::io::Error::from_raw_os_error(ENOTSUP));
+        }
+        Ok(file_values
+            .iter()
+            .map(|entry| *entry & L2_TABLE_OFFSET_MASK)
+            .collect())
+    }
+
+    // Set the refcount for a cluster with the given address.
+    // Returns a list of any refblocks that can be reused, this happens when a refblock is moved,
+    // the old location can be reused.
+    fn set_cluster_refcount(&mut self, address: u64, refcount: u16) -> std::io::Result<Vec<u64>> {
+        let mut added_clusters = Vec::new();
+        let mut unref_clusters = Vec::new();
+        let mut refcount_set = false;
+        let mut new_cluster = None;
+
+        while !refcount_set {
+            match self.refcounts.set_cluster_refcount(
+                &mut self.raw_file,
+                address,
+                refcount,
+                new_cluster.take(),
+            ) {
+                Ok(None) => {
+                    refcount_set = true;
+                }
+                Ok(Some(freed_cluster)) => {
+                    unref_clusters.push(freed_cluster);
+                    refcount_set = true;
+                }
+                Err(refcount::Error::EvictingRefCounts(e)) => {
+                    return Err(e);
+                }
+                Err(refcount::Error::InvalidIndex) => {
+                    return Err(std::io::Error::from_raw_os_error(EINVAL));
+                }
+                Err(refcount::Error::NeedCluster(addr)) => {
+                    // Read the address and call set_cluster_refcount again.
+                    new_cluster = Some((
+                        addr,
+                        VecCache::from_vec(self.raw_file.read_refcount_block(addr)?),
+                    ));
+                }
+                Err(refcount::Error::NeedNewCluster) => {
+                    // Allocate the cluster and call set_cluster_refcount again.
+                    let addr = self.get_new_cluster()?;
+                    added_clusters.push(addr);
+                    new_cluster = Some((
+                        addr,
+                        VecCache::new(self.refcounts.refcounts_per_block() as usize),
+                    ));
+                }
+                Err(refcount::Error::ReadingRefCounts(e)) => {
+                    return Err(e);
+                }
+            }
+        }
+
+        for addr in added_clusters {
+            self.set_cluster_refcount(addr, 1)?;
+        }
+        Ok(unref_clusters)
+    }
+
+    fn sync_caches(&mut self) -> std::io::Result<()> {
+        // Write out all dirty L2 tables.
+        for (l1_index, l2_table) in self.l2_cache.iter_mut().filter(|(_k, v)| v.dirty()) {
+            // The index must be valid from when we insterted it.
+            let addr = self.l1_table[*l1_index];
+            if addr != 0 {
+                self.raw_file.write_pointer_table(
+                    addr,
+                    l2_table.get_values(),
+                    CLUSTER_USED_FLAG,
+                )?;
+            } else {
+                return Err(std::io::Error::from_raw_os_error(EINVAL));
+            }
+            l2_table.mark_clean();
+        }
+        // Write the modified refcount blocks.
+        self.refcounts.flush_blocks(&mut self.raw_file)?;
+        // Make sure metadata(file len) and all data clusters are written.
+        self.raw_file.file_mut().sync_all()?;
+
+        // Push L1 table and refcount table last as all the clusters they point to are now
+        // guaranteed to be valid.
+        let mut sync_required = false;
+        if self.l1_table.dirty() {
+            self.raw_file.write_pointer_table(
+                self.header.l1_table_offset,
+                &self.l1_table.get_values(),
+                0,
+            )?;
+            self.l1_table.mark_clean();
+            sync_required = true;
+        }
+        sync_required |= self.refcounts.flush_table(&mut self.raw_file)?;
+        if sync_required {
+            self.raw_file.file_mut().sync_data()?;
+        }
+        Ok(())
+    }
+
+    // Reads `count` bytes starting at `address`, calling `cb` repeatedly with the backing file,
+    // number of bytes read so far, and number of bytes to read from the file in that invocation. If
+    // None is given to `cb` in place of the backing file, the `cb` should infer zeros would have
+    // been read.
+    fn read_cb<F>(&mut self, address: u64, count: usize, mut cb: F) -> std::io::Result<usize>
+    where
+        F: FnMut(Option<&mut File>, usize, usize) -> std::io::Result<()>,
+    {
+        let read_count: usize = self.limit_range_file(address, count);
+
+        let mut nread: usize = 0;
+        while nread < read_count {
+            let curr_addr = address + nread as u64;
+            let file_offset = self.file_offset_read(curr_addr)?;
+            let count = self.limit_range_cluster(curr_addr, read_count - nread);
+
+            if let Some(offset) = file_offset {
+                self.raw_file.file_mut().seek(SeekFrom::Start(offset))?;
+                cb(Some(self.raw_file.file_mut()), nread, count)?;
+            } else {
+                cb(None, nread, count)?;
+            }
+
+            nread += count;
+        }
+        Ok(read_count)
+    }
+
+    // Writes `count` bytes starting at `address`, calling `cb` repeatedly with the backing file,
+    // number of bytes written so far, and number of bytes to write to the file in that invocation.
+    fn write_cb<F>(&mut self, address: u64, count: usize, mut cb: F) -> std::io::Result<usize>
+    where
+        F: FnMut(&mut File, usize, usize) -> std::io::Result<()>,
+    {
+        let write_count: usize = self.limit_range_file(address, count);
+
+        let mut nwritten: usize = 0;
+        while nwritten < write_count {
+            let curr_addr = address + nwritten as u64;
+            let offset = self.file_offset_write(curr_addr)?;
+            let count = self.limit_range_cluster(curr_addr, write_count - nwritten);
+
+            if let Err(e) = self.raw_file.file_mut().seek(SeekFrom::Start(offset)) {
+                return Err(e);
+            }
+            if let Err(e) = cb(self.raw_file.file_mut(), nwritten, count) {
+                return Err(e);
+            }
+
+            nwritten += count;
+        }
+        Ok(write_count)
+    }
+}
+
+impl Drop for QcowFile {
+    fn drop(&mut self) {
+        let _ = self.sync_caches();
+    }
+}
+
+impl AsRawFd for QcowFile {
+    fn as_raw_fd(&self) -> RawFd {
+        self.raw_file.file().as_raw_fd()
+    }
+}
+
+impl Read for QcowFile {
+    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
+        let read_count =
+            self.read_cb(
+                self.current_offset,
+                buf.len(),
+                |file, offset, count| match file {
+                    Some(f) => f.read_exact(&mut buf[offset..(offset + count)]),
+                    None => {
+                        for b in &mut buf[offset..(offset + count)] {
+                            *b = 0;
+                        }
+                        Ok(())
+                    }
+                },
+            )?;
+        self.current_offset += read_count as u64;
+        Ok(read_count)
+    }
+}
+
+impl Seek for QcowFile {
+    fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64> {
+        let new_offset: Option<u64> = match pos {
+            SeekFrom::Start(off) => Some(off),
+            SeekFrom::End(off) => {
+                if off < 0 {
+                    0i64.checked_sub(off)
+                        .and_then(|increment| self.virtual_size().checked_sub(increment as u64))
+                } else {
+                    self.virtual_size().checked_add(off as u64)
+                }
+            }
+            SeekFrom::Current(off) => {
+                if off < 0 {
+                    0i64.checked_sub(off)
+                        .and_then(|increment| self.current_offset.checked_sub(increment as u64))
+                } else {
+                    self.current_offset.checked_add(off as u64)
+                }
+            }
+        };
+
+        if let Some(o) = new_offset {
+            if o <= self.virtual_size() {
+                self.current_offset = o;
+                return Ok(o);
+            }
+        }
+        Err(std::io::Error::from_raw_os_error(EINVAL))
+    }
+}
+
+impl Write for QcowFile {
+    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
+        let write_count =
+            self.write_cb(self.current_offset, buf.len(), |file, offset, count| {
+                file.write_all(&buf[offset..(offset + count)])
+            })?;
+        self.current_offset += write_count as u64;
+        Ok(write_count)
+    }
+
+    fn flush(&mut self) -> std::io::Result<()> {
+        self.sync_caches()?;
+        self.avail_clusters.append(&mut self.unref_clusters);
+        Ok(())
+    }
+}
+
+impl FileReadWriteVolatile for QcowFile {
+    fn read_volatile(&mut self, slice: VolatileSlice) -> io::Result<usize> {
+        let read_count = self.read_cb(
+            self.current_offset,
+            slice.size() as usize,
+            |file, offset, count| {
+                let sub_slice = slice.get_slice(offset as u64, count as u64).unwrap();
+                match file {
+                    Some(f) => f.read_exact_volatile(sub_slice),
+                    None => {
+                        sub_slice.write_bytes(0);
+                        Ok(())
+                    }
+                }
+            },
+        )?;
+        self.current_offset += read_count as u64;
+        Ok(read_count)
+    }
+
+    fn write_volatile(&mut self, slice: VolatileSlice) -> io::Result<usize> {
+        let write_count = self.write_cb(
+            self.current_offset,
+            slice.size() as usize,
+            |file, offset, count| {
+                let sub_slice = slice.get_slice(offset as u64, count as u64).unwrap();
+                file.write_all_volatile(sub_slice)
+            },
+        )?;
+        self.current_offset += write_count as u64;
+        Ok(write_count)
+    }
+}
+
+impl FileReadWriteAtVolatile for QcowFile {
+    fn read_at_volatile(&mut self, slice: VolatileSlice, offset: u64) -> io::Result<usize> {
+        self.read_cb(offset, slice.size() as usize, |file, offset, count| {
+            let sub_slice = slice.get_slice(offset as u64, count as u64).unwrap();
+            match file {
+                Some(f) => f.read_exact_volatile(sub_slice),
+                None => {
+                    sub_slice.write_bytes(0);
+                    Ok(())
+                }
+            }
+        })
+    }
+
+    fn write_at_volatile(&mut self, slice: VolatileSlice, offset: u64) -> io::Result<usize> {
+        self.write_cb(offset, slice.size() as usize, |file, offset, count| {
+            let sub_slice = slice.get_slice(offset as u64, count as u64).unwrap();
+            file.write_all_volatile(sub_slice)
+        })
+    }
+}
+
+impl FileSync for QcowFile {
+    fn fsync(&mut self) -> std::io::Result<()> {
+        self.flush()
+    }
+}
+
+impl FileSetLen for QcowFile {
+    fn set_len(&self, _len: u64) -> std::io::Result<()> {
+        Err(std::io::Error::new(
+            std::io::ErrorKind::Other,
+            "set_len() not supported for QcowFile",
+        ))
+    }
+}
+
+impl FileGetLen for QcowFile {
+    fn get_len(&self) -> io::Result<u64> {
+        Ok(self.virtual_size())
+    }
+}
+
+impl PunchHole for QcowFile {
+    fn punch_hole(&mut self, offset: u64, length: u64) -> std::io::Result<()> {
+        let mut remaining = length;
+        let mut offset = offset;
+        while remaining > 0 {
+            let chunk_length = min(remaining, std::usize::MAX as u64) as usize;
+            self.deallocate_bytes(offset, chunk_length)?;
+            remaining -= chunk_length as u64;
+            offset += chunk_length as u64;
+        }
+        Ok(())
+    }
+}
+
+impl WriteZeroesAt for QcowFile {
+    fn write_zeroes_at(&mut self, offset: u64, length: usize) -> io::Result<usize> {
+        self.punch_hole(offset, length as u64)?;
+        Ok(length)
+    }
+}
+
+impl SeekHole for QcowFile {
+    fn seek_hole(&mut self, offset: u64) -> io::Result<Option<u64>> {
+        match self.find_allocated_cluster(offset, false) {
+            Err(e) => Err(e),
+            Ok(None) => {
+                if offset < self.virtual_size() {
+                    Ok(Some(self.seek(SeekFrom::End(0))?))
+                } else {
+                    Ok(None)
+                }
+            }
+            Ok(Some(o)) => {
+                self.seek(SeekFrom::Start(o))?;
+                Ok(Some(o))
+            }
+        }
+    }
+
+    fn seek_data(&mut self, offset: u64) -> io::Result<Option<u64>> {
+        match self.find_allocated_cluster(offset, true) {
+            Err(e) => Err(e),
+            Ok(None) => Ok(None),
+            Ok(Some(o)) => {
+                self.seek(SeekFrom::Start(o))?;
+                Ok(Some(o))
+            }
+        }
+    }
+}
+
+// Returns an Error if the given offset doesn't align to a cluster boundary.
+fn offset_is_cluster_boundary(offset: u64, cluster_bits: u32) -> Result<()> {
+    if offset & ((0x01 << cluster_bits) - 1) != 0 {
+        return Err(Error::InvalidOffset(offset));
+    }
+    Ok(())
+}
+
+// Ceiling of the division of `dividend`/`divisor`.
+fn div_round_up_u64(dividend: u64, divisor: u64) -> u64 {
+    dividend / divisor + if dividend % divisor != 0 { 1 } else { 0 }
+}
+
+// Ceiling of the division of `dividend`/`divisor`.
+fn div_round_up_u32(dividend: u32, divisor: u32) -> u32 {
+    dividend / divisor + if dividend % divisor != 0 { 1 } else { 0 }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::fs::File;
+    use std::io::{Read, Seek, SeekFrom, Write};
+    use sys_util::{SharedMemory, WriteZeroes};
+
+    fn valid_header() -> Vec<u8> {
+        vec![
+            0x51u8, 0x46, 0x49, 0xfb, // magic
+            0x00, 0x00, 0x00, 0x03, // version
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // backing file offset
+            0x00, 0x00, 0x00, 0x00, // backing file size
+            0x00, 0x00, 0x00, 0x10, // cluster_bits
+            0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, // size
+            0x00, 0x00, 0x00, 0x00, // crypt method
+            0x00, 0x00, 0x01, 0x00, // L1 size
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, // L1 table offset
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, // refcount table offset
+            0x00, 0x00, 0x00, 0x03, // refcount table clusters
+            0x00, 0x00, 0x00, 0x00, // nb snapshots
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, // snapshots offset
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // incompatible_features
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // compatible_features
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // autoclear_features
+            0x00, 0x00, 0x00, 0x04, // refcount_order
+            0x00, 0x00, 0x00, 0x68, // header_length
+        ]
+    }
+
+    // Test case found by clusterfuzz to allocate excessive memory.
+    fn test_huge_header() -> Vec<u8> {
+        vec![
+            0x51, 0x46, 0x49, 0xfb, // magic
+            0x00, 0x00, 0x00, 0x03, // version
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // backing file offset
+            0x00, 0x00, 0x00, 0x00, // backing file size
+            0x00, 0x00, 0x00, 0x09, // cluster_bits
+            0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, // size
+            0x00, 0x00, 0x00, 0x00, // crypt method
+            0x00, 0x00, 0x01, 0x00, // L1 size
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, // L1 table offset
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, // refcount table offset
+            0x00, 0x00, 0x00, 0x03, // refcount table clusters
+            0x00, 0x00, 0x00, 0x00, // nb snapshots
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, // snapshots offset
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // incompatible_features
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // compatible_features
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // autoclear_features
+            0x00, 0x00, 0x00, 0x04, // refcount_order
+            0x00, 0x00, 0x00, 0x68, // header_length
+        ]
+    }
+
+    fn with_basic_file<F>(header: &[u8], mut testfn: F)
+    where
+        F: FnMut(File),
+    {
+        let shm = SharedMemory::anon().unwrap();
+        let mut disk_file: File = shm.into();
+        disk_file.write_all(&header).unwrap();
+        disk_file.set_len(0x1_0000_0000).unwrap();
+        disk_file.seek(SeekFrom::Start(0)).unwrap();
+
+        testfn(disk_file); // File closed when the function exits.
+    }
+
+    fn with_default_file<F>(file_size: u64, mut testfn: F)
+    where
+        F: FnMut(QcowFile),
+    {
+        let shm = SharedMemory::anon().unwrap();
+        let qcow_file = QcowFile::new(shm.into(), file_size).unwrap();
+
+        testfn(qcow_file); // File closed when the function exits.
+    }
+
+    #[test]
+    fn default_header() {
+        let header = QcowHeader::create_for_size(0x10_0000);
+        let shm = SharedMemory::anon().unwrap();
+        let mut disk_file: File = shm.into();
+        header
+            .write_to(&mut disk_file)
+            .expect("Failed to write header to shm.");
+        disk_file.seek(SeekFrom::Start(0)).unwrap();
+        QcowFile::from(disk_file).expect("Failed to create Qcow from default Header");
+    }
+
+    #[test]
+    fn header_read() {
+        with_basic_file(&valid_header(), |mut disk_file: File| {
+            QcowHeader::new(&mut disk_file).expect("Failed to create Header.");
+        });
+    }
+
+    #[test]
+    fn invalid_magic() {
+        let invalid_header = vec![0x51u8, 0x46, 0x4a, 0xfb];
+        with_basic_file(&invalid_header, |mut disk_file: File| {
+            QcowHeader::new(&mut disk_file).expect_err("Invalid header worked.");
+        });
+    }
+
+    #[test]
+    fn invalid_refcount_order() {
+        let mut header = valid_header();
+        header[99] = 2;
+        with_basic_file(&header, |disk_file: File| {
+            QcowFile::from(disk_file).expect_err("Invalid refcount order worked.");
+        });
+    }
+
+    #[test]
+    fn invalid_cluster_bits() {
+        let mut header = valid_header();
+        header[23] = 3;
+        with_basic_file(&header, |disk_file: File| {
+            QcowFile::from(disk_file).expect_err("Failed to create file.");
+        });
+    }
+
+    #[test]
+    fn test_header_huge_file() {
+        let header = test_huge_header();
+        with_basic_file(&header, |disk_file: File| {
+            QcowFile::from(disk_file).expect_err("Failed to create file.");
+        });
+    }
+
+    #[test]
+    fn test_header_crazy_file_size_rejected() {
+        let mut header = valid_header();
+        &mut header[24..32].copy_from_slice(&[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x1e]);
+        with_basic_file(&header, |disk_file: File| {
+            QcowFile::from(disk_file).expect_err("Failed to create file.");
+        });
+    }
+
+    #[test]
+    fn test_huge_l1_table() {
+        let mut header = valid_header();
+        header[36] = 0x12;
+        with_basic_file(&header, |disk_file: File| {
+            QcowFile::from(disk_file).expect_err("Failed to create file.");
+        });
+    }
+
+    #[test]
+    fn test_header_1_tb_file_min_cluster() {
+        let mut header = test_huge_header();
+        header[24] = 0;
+        header[26] = 1;
+        header[31] = 0;
+        // 1 TB with the min cluster size makes the arrays too big, it should fail.
+        with_basic_file(&header, |disk_file: File| {
+            QcowFile::from(disk_file).expect_err("Failed to create file.");
+        });
+    }
+
+    #[test]
+    fn test_header_1_tb_file() {
+        let mut header = test_huge_header();
+        // reset to 1 TB size.
+        header[24] = 0;
+        header[26] = 1;
+        header[31] = 0;
+        // set cluster_bits
+        header[23] = 16;
+        with_basic_file(&header, |disk_file: File| {
+            let mut qcow = QcowFile::from(disk_file).expect("Failed to create file.");
+            qcow.seek(SeekFrom::Start(0x100_0000_0000 - 8))
+                .expect("Failed to seek.");
+            let value = 0x0000_0040_3f00_ffffu64;
+            qcow.write_all(&value.to_le_bytes())
+                .expect("failed to write data");
+        });
+    }
+
+    #[test]
+    fn test_header_huge_num_refcounts() {
+        let mut header = valid_header();
+        &mut header[56..60].copy_from_slice(&[0x02, 0x00, 0xe8, 0xff]);
+        with_basic_file(&header, |disk_file: File| {
+            QcowFile::from(disk_file).expect_err("Created disk with crazy refcount clusters");
+        });
+    }
+
+    #[test]
+    fn test_header_huge_refcount_offset() {
+        let mut header = valid_header();
+        &mut header[48..56].copy_from_slice(&[0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x02, 0x00]);
+        with_basic_file(&header, |disk_file: File| {
+            QcowFile::from(disk_file).expect_err("Created disk with crazy refcount offset");
+        });
+    }
+
+    #[test]
+    fn write_read_start() {
+        with_basic_file(&valid_header(), |disk_file: File| {
+            let mut q = QcowFile::from(disk_file).unwrap();
+            q.write(b"test first bytes")
+                .expect("Failed to write test string.");
+            let mut buf = [0u8; 4];
+            q.seek(SeekFrom::Start(0)).expect("Failed to seek.");
+            q.read(&mut buf).expect("Failed to read.");
+            assert_eq!(&buf, b"test");
+        });
+    }
+
+    #[test]
+    fn offset_write_read() {
+        with_basic_file(&valid_header(), |disk_file: File| {
+            let mut q = QcowFile::from(disk_file).unwrap();
+            let b = [0x55u8; 0x1000];
+            q.seek(SeekFrom::Start(0xfff2000)).expect("Failed to seek.");
+            q.write(&b).expect("Failed to write test string.");
+            let mut buf = [0u8; 4];
+            q.seek(SeekFrom::Start(0xfff2000)).expect("Failed to seek.");
+            q.read(&mut buf).expect("Failed to read.");
+            assert_eq!(buf[0], 0x55);
+        });
+    }
+
+    #[test]
+    fn write_zeroes_read() {
+        with_basic_file(&valid_header(), |disk_file: File| {
+            let mut q = QcowFile::from(disk_file).unwrap();
+            // Write some test data.
+            let b = [0x55u8; 0x1000];
+            q.seek(SeekFrom::Start(0xfff2000)).expect("Failed to seek.");
+            q.write(&b).expect("Failed to write test string.");
+            // Overwrite the test data with zeroes.
+            q.seek(SeekFrom::Start(0xfff2000)).expect("Failed to seek.");
+            q.write_zeroes_all(0x200).expect("Failed to write zeroes.");
+            // Verify that the correct part of the data was zeroed out.
+            let mut buf = [0u8; 0x1000];
+            q.seek(SeekFrom::Start(0xfff2000)).expect("Failed to seek.");
+            q.read(&mut buf).expect("Failed to read.");
+            assert_eq!(buf[0], 0);
+            assert_eq!(buf[0x1FF], 0);
+            assert_eq!(buf[0x200], 0x55);
+            assert_eq!(buf[0xFFF], 0x55);
+        });
+    }
+
+    #[test]
+    fn write_zeroes_full_cluster() {
+        // Choose a size that is larger than a cluster.
+        // valid_header uses cluster_bits = 12, which corresponds to a cluster size of 4096.
+        const CHUNK_SIZE: usize = 4096 * 2 + 512;
+        with_basic_file(&valid_header(), |disk_file: File| {
+            let mut q = QcowFile::from(disk_file).unwrap();
+            // Write some test data.
+            let b = [0x55u8; CHUNK_SIZE];
+            q.seek(SeekFrom::Start(0)).expect("Failed to seek.");
+            q.write(&b).expect("Failed to write test string.");
+            // Overwrite the full cluster with zeroes.
+            q.seek(SeekFrom::Start(0)).expect("Failed to seek.");
+            q.write_zeroes_all(CHUNK_SIZE)
+                .expect("Failed to write zeroes.");
+            // Verify that the data was zeroed out.
+            let mut buf = [0u8; CHUNK_SIZE];
+            q.seek(SeekFrom::Start(0)).expect("Failed to seek.");
+            q.read(&mut buf).expect("Failed to read.");
+            assert_eq!(buf[0], 0);
+            assert_eq!(buf[CHUNK_SIZE - 1], 0);
+        });
+    }
+
+    #[test]
+    fn test_header() {
+        with_basic_file(&valid_header(), |disk_file: File| {
+            let q = QcowFile::from(disk_file).unwrap();
+            assert_eq!(q.virtual_size(), 0x20_0000_0000);
+        });
+    }
+
+    #[test]
+    fn read_small_buffer() {
+        with_basic_file(&valid_header(), |disk_file: File| {
+            let mut q = QcowFile::from(disk_file).unwrap();
+            let mut b = [5u8; 16];
+            q.seek(SeekFrom::Start(1000)).expect("Failed to seek.");
+            q.read(&mut b).expect("Failed to read.");
+            assert_eq!(0, b[0]);
+            assert_eq!(0, b[15]);
+        });
+    }
+
+    #[test]
+    fn replay_ext4() {
+        with_basic_file(&valid_header(), |disk_file: File| {
+            let mut q = QcowFile::from(disk_file).unwrap();
+            const BUF_SIZE: usize = 0x1000;
+            let mut b = [0u8; BUF_SIZE];
+
+            struct Transfer {
+                pub write: bool,
+                pub addr: u64,
+            };
+
+            // Write transactions from mkfs.ext4.
+            let xfers: Vec<Transfer> = vec![
+                Transfer {
+                    write: false,
+                    addr: 0xfff0000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0xfffe000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0x0,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0x1000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0xffff000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0xffdf000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0xfff8000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0xffe0000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0xffce000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0xffb6000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0xffab000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0xffa4000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0xff8e000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0xff86000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0xff84000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0xff89000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0xfe7e000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0x100000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0x3000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0x7000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0xf000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0x2000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0x4000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0x5000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0x6000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0x8000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0x9000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0xa000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0xb000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0xc000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0xd000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0xe000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0x10000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0x11000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0x12000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0x13000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0x14000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0x15000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0x16000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0x17000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0x18000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0x19000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0x1a000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0x1b000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0x1c000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0x1d000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0x1e000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0x1f000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0x21000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0x22000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0x24000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0x40000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0x0,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0x3000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0x7000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0x0,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0x1000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0x2000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0x3000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0x0,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0x449000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0x48000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0x48000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0x448000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0x44a000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0x48000,
+                },
+                Transfer {
+                    write: false,
+                    addr: 0x48000,
+                },
+                Transfer {
+                    write: true,
+                    addr: 0x0,
+                },
+                Transfer {
+                    write: true,
+                    addr: 0x448000,
+                },
+                Transfer {
+                    write: true,
+                    addr: 0x449000,
+                },
+                Transfer {
+                    write: true,
+                    addr: 0x44a000,
+                },
+                Transfer {
+                    write: true,
+                    addr: 0xfff0000,
+                },
+                Transfer {
+                    write: true,
+                    addr: 0xfff1000,
+                },
+                Transfer {
+                    write: true,
+                    addr: 0xfff2000,
+                },
+                Transfer {
+                    write: true,
+                    addr: 0xfff3000,
+                },
+                Transfer {
+                    write: true,
+                    addr: 0xfff4000,
+                },
+                Transfer {
+                    write: true,
+                    addr: 0xfff5000,
+                },
+                Transfer {
+                    write: true,
+                    addr: 0xfff6000,
+                },
+                Transfer {
+                    write: true,
+                    addr: 0xfff7000,
+                },
+                Transfer {
+                    write: true,
+                    addr: 0xfff8000,
+                },
+                Transfer {
+                    write: true,
+                    addr: 0xfff9000,
+                },
+                Transfer {
+                    write: true,
+                    addr: 0xfffa000,
+                },
+                Transfer {
+                    write: true,
+                    addr: 0xfffb000,
+                },
+                Transfer {
+                    write: true,
+                    addr: 0xfffc000,
+                },
+                Transfer {
+                    write: true,
+                    addr: 0xfffd000,
+                },
+                Transfer {
+                    write: true,
+                    addr: 0xfffe000,
+                },
+                Transfer {
+                    write: true,
+                    addr: 0xffff000,
+                },
+            ];
+
+            for xfer in &xfers {
+                q.seek(SeekFrom::Start(xfer.addr)).expect("Failed to seek.");
+                if xfer.write {
+                    q.write(&b).expect("Failed to write.");
+                } else {
+                    let read_count: usize = q.read(&mut b).expect("Failed to read.");
+                    assert_eq!(read_count, BUF_SIZE);
+                }
+            }
+        });
+    }
+
+    #[test]
+    fn combo_write_read() {
+        with_default_file(1024 * 1024 * 1024 * 256, |mut qcow_file| {
+            const NUM_BLOCKS: usize = 555;
+            const BLOCK_SIZE: usize = 0x1_0000;
+            const OFFSET: usize = 0x1_0000_0020;
+            let data = [0x55u8; BLOCK_SIZE];
+            let mut readback = [0u8; BLOCK_SIZE];
+            for i in 0..NUM_BLOCKS {
+                let seek_offset = OFFSET + i * BLOCK_SIZE;
+                qcow_file
+                    .seek(SeekFrom::Start(seek_offset as u64))
+                    .expect("Failed to seek.");
+                let nwritten = qcow_file.write(&data).expect("Failed to write test data.");
+                assert_eq!(nwritten, BLOCK_SIZE);
+                // Read back the data to check it was written correctly.
+                qcow_file
+                    .seek(SeekFrom::Start(seek_offset as u64))
+                    .expect("Failed to seek.");
+                let nread = qcow_file.read(&mut readback).expect("Failed to read.");
+                assert_eq!(nread, BLOCK_SIZE);
+                for (orig, read) in data.iter().zip(readback.iter()) {
+                    assert_eq!(orig, read);
+                }
+            }
+            // Check that address 0 is still zeros.
+            qcow_file.seek(SeekFrom::Start(0)).expect("Failed to seek.");
+            let nread = qcow_file.read(&mut readback).expect("Failed to read.");
+            assert_eq!(nread, BLOCK_SIZE);
+            for read in readback.iter() {
+                assert_eq!(*read, 0);
+            }
+            // Check the data again after the writes have happened.
+            for i in 0..NUM_BLOCKS {
+                let seek_offset = OFFSET + i * BLOCK_SIZE;
+                qcow_file
+                    .seek(SeekFrom::Start(seek_offset as u64))
+                    .expect("Failed to seek.");
+                let nread = qcow_file.read(&mut readback).expect("Failed to read.");
+                assert_eq!(nread, BLOCK_SIZE);
+                for (orig, read) in data.iter().zip(readback.iter()) {
+                    assert_eq!(orig, read);
+                }
+            }
+
+            assert_eq!(qcow_file.first_zero_refcount().unwrap(), None);
+        });
+    }
+
+    fn seek_cur(file: &mut QcowFile) -> u64 {
+        file.seek(SeekFrom::Current(0)).unwrap()
+    }
+
+    #[test]
+    fn seek_data() {
+        with_default_file(0x30000, |mut file| {
+            // seek_data at or after the end of the file should return None
+            assert_eq!(file.seek_data(0x10000).unwrap(), None);
+            assert_eq!(seek_cur(&mut file), 0);
+            assert_eq!(file.seek_data(0x10001).unwrap(), None);
+            assert_eq!(seek_cur(&mut file), 0);
+
+            // Write some data to [0x10000, 0x20000)
+            let b = [0x55u8; 0x10000];
+            file.seek(SeekFrom::Start(0x10000)).unwrap();
+            file.write_all(&b).unwrap();
+            assert_eq!(file.seek_data(0).unwrap(), Some(0x10000));
+            assert_eq!(seek_cur(&mut file), 0x10000);
+
+            // seek_data within data should return the same offset
+            assert_eq!(file.seek_data(0x10000).unwrap(), Some(0x10000));
+            assert_eq!(seek_cur(&mut file), 0x10000);
+            assert_eq!(file.seek_data(0x10001).unwrap(), Some(0x10001));
+            assert_eq!(seek_cur(&mut file), 0x10001);
+            assert_eq!(file.seek_data(0x1FFFF).unwrap(), Some(0x1FFFF));
+            assert_eq!(seek_cur(&mut file), 0x1FFFF);
+
+            assert_eq!(file.seek_data(0).unwrap(), Some(0x10000));
+            assert_eq!(seek_cur(&mut file), 0x10000);
+            assert_eq!(file.seek_data(0x1FFFF).unwrap(), Some(0x1FFFF));
+            assert_eq!(seek_cur(&mut file), 0x1FFFF);
+            assert_eq!(file.seek_data(0x20000).unwrap(), None);
+            assert_eq!(seek_cur(&mut file), 0x1FFFF);
+        });
+    }
+
+    #[test]
+    fn seek_hole() {
+        with_default_file(0x30000, |mut file| {
+            // File consisting entirely of a hole
+            assert_eq!(file.seek_hole(0).unwrap(), Some(0));
+            assert_eq!(seek_cur(&mut file), 0);
+            assert_eq!(file.seek_hole(0xFFFF).unwrap(), Some(0xFFFF));
+            assert_eq!(seek_cur(&mut file), 0xFFFF);
+
+            // seek_hole at or after the end of the file should return None
+            file.seek(SeekFrom::Start(0)).unwrap();
+            assert_eq!(file.seek_hole(0x30000).unwrap(), None);
+            assert_eq!(seek_cur(&mut file), 0);
+            assert_eq!(file.seek_hole(0x30001).unwrap(), None);
+            assert_eq!(seek_cur(&mut file), 0);
+
+            // Write some data to [0x10000, 0x20000)
+            let b = [0x55u8; 0x10000];
+            file.seek(SeekFrom::Start(0x10000)).unwrap();
+            file.write_all(&b).unwrap();
+
+            // seek_hole within a hole should return the same offset
+            assert_eq!(file.seek_hole(0).unwrap(), Some(0));
+            assert_eq!(seek_cur(&mut file), 0);
+            assert_eq!(file.seek_hole(0xFFFF).unwrap(), Some(0xFFFF));
+            assert_eq!(seek_cur(&mut file), 0xFFFF);
+
+            // seek_hole within data should return the next hole
+            file.seek(SeekFrom::Start(0)).unwrap();
+            assert_eq!(file.seek_hole(0x10000).unwrap(), Some(0x20000));
+            assert_eq!(seek_cur(&mut file), 0x20000);
+            file.seek(SeekFrom::Start(0)).unwrap();
+            assert_eq!(file.seek_hole(0x10001).unwrap(), Some(0x20000));
+            assert_eq!(seek_cur(&mut file), 0x20000);
+            file.seek(SeekFrom::Start(0)).unwrap();
+            assert_eq!(file.seek_hole(0x1FFFF).unwrap(), Some(0x20000));
+            assert_eq!(seek_cur(&mut file), 0x20000);
+            file.seek(SeekFrom::Start(0)).unwrap();
+            assert_eq!(file.seek_hole(0xFFFF).unwrap(), Some(0xFFFF));
+            assert_eq!(seek_cur(&mut file), 0xFFFF);
+            file.seek(SeekFrom::Start(0)).unwrap();
+            assert_eq!(file.seek_hole(0x10000).unwrap(), Some(0x20000));
+            assert_eq!(seek_cur(&mut file), 0x20000);
+            file.seek(SeekFrom::Start(0)).unwrap();
+            assert_eq!(file.seek_hole(0x1FFFF).unwrap(), Some(0x20000));
+            assert_eq!(seek_cur(&mut file), 0x20000);
+            file.seek(SeekFrom::Start(0)).unwrap();
+            assert_eq!(file.seek_hole(0x20000).unwrap(), Some(0x20000));
+            assert_eq!(seek_cur(&mut file), 0x20000);
+            file.seek(SeekFrom::Start(0)).unwrap();
+            assert_eq!(file.seek_hole(0x20001).unwrap(), Some(0x20001));
+            assert_eq!(seek_cur(&mut file), 0x20001);
+
+            // seek_hole at EOF should return None
+            file.seek(SeekFrom::Start(0)).unwrap();
+            assert_eq!(file.seek_hole(0x30000).unwrap(), None);
+            assert_eq!(seek_cur(&mut file), 0);
+
+            // Write some data to [0x20000, 0x30000)
+            file.seek(SeekFrom::Start(0x20000)).unwrap();
+            file.write_all(&b).unwrap();
+
+            // seek_hole within [0x20000, 0x30000) should now find the hole at EOF
+            assert_eq!(file.seek_hole(0x20000).unwrap(), Some(0x30000));
+            assert_eq!(seek_cur(&mut file), 0x30000);
+            file.seek(SeekFrom::Start(0)).unwrap();
+            assert_eq!(file.seek_hole(0x20001).unwrap(), Some(0x30000));
+            assert_eq!(seek_cur(&mut file), 0x30000);
+            file.seek(SeekFrom::Start(0)).unwrap();
+            assert_eq!(file.seek_hole(0x30000).unwrap(), None);
+            assert_eq!(seek_cur(&mut file), 0);
+        });
+    }
+
+    #[test]
+    fn rebuild_refcounts() {
+        with_basic_file(&valid_header(), |mut disk_file: File| {
+            let header = QcowHeader::new(&mut disk_file).expect("Failed to create Header.");
+            let cluster_size = 65536;
+            let mut raw_file =
+                QcowRawFile::from(disk_file, cluster_size).expect("Failed to create QcowRawFile.");
+            QcowFile::rebuild_refcounts(&mut raw_file, header)
+                .expect("Failed to rebuild recounts.");
+        });
+    }
+}
diff --git a/disk/src/qcow/qcow_raw_file.rs b/disk/src/qcow/qcow_raw_file.rs
new file mode 100644
index 0000000..ede28d8
--- /dev/null
+++ b/disk/src/qcow/qcow_raw_file.rs
@@ -0,0 +1,148 @@
+// Copyright 2018 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+use std::fs::File;
+use std::io::{self, BufWriter, Read, Seek, SeekFrom, Write};
+use std::mem::size_of;
+
+use sys_util::WriteZeroes;
+
+/// A qcow file. Allows reading/writing clusters and appending clusters.
+#[derive(Debug)]
+pub struct QcowRawFile {
+    file: File,
+    cluster_size: u64,
+    cluster_mask: u64,
+}
+
+impl QcowRawFile {
+    /// Creates a `QcowRawFile` from the given `File`, `None` is returned if `cluster_size` is not
+    /// a power of two.
+    pub fn from(file: File, cluster_size: u64) -> Option<Self> {
+        if cluster_size.count_ones() != 1 {
+            return None;
+        }
+        Some(QcowRawFile {
+            file,
+            cluster_size,
+            cluster_mask: cluster_size - 1,
+        })
+    }
+
+    /// Reads `count` 64 bit offsets and returns them as a vector.
+    /// `mask` optionally ands out some of the bits on the file.
+    pub fn read_pointer_table(
+        &mut self,
+        offset: u64,
+        count: u64,
+        mask: Option<u64>,
+    ) -> io::Result<Vec<u64>> {
+        let mut table = vec![0; count as usize];
+        self.file.seek(SeekFrom::Start(offset))?;
+        let mask = mask.unwrap_or(u64::max_value());
+        for ptr in &mut table {
+            let mut value = [0u8; 8];
+            self.file.read_exact(&mut value)?;
+            *ptr = u64::from_be_bytes(value) & mask;
+        }
+        Ok(table)
+    }
+
+    /// Reads a cluster's worth of 64 bit offsets and returns them as a vector.
+    /// `mask` optionally ands out some of the bits on the file.
+    pub fn read_pointer_cluster(&mut self, offset: u64, mask: Option<u64>) -> io::Result<Vec<u64>> {
+        let count = self.cluster_size / size_of::<u64>() as u64;
+        self.read_pointer_table(offset, count, mask)
+    }
+
+    /// Writes `table` of u64 pointers to `offset` in the file.
+    /// `non_zero_flags` will be ORed with all non-zero values in `table`.
+    /// writing.
+    pub fn write_pointer_table(
+        &mut self,
+        offset: u64,
+        table: &[u64],
+        non_zero_flags: u64,
+    ) -> io::Result<()> {
+        self.file.seek(SeekFrom::Start(offset))?;
+        let mut buffer = BufWriter::with_capacity(table.len() * size_of::<u64>(), &self.file);
+        for addr in table {
+            let val = if *addr == 0 {
+                0
+            } else {
+                *addr | non_zero_flags
+            };
+            buffer.write_all(&val.to_be_bytes())?;
+        }
+        Ok(())
+    }
+
+    /// Read a refcount block from the file and returns a Vec containing the block.
+    /// Always returns a cluster's worth of data.
+    pub fn read_refcount_block(&mut self, offset: u64) -> io::Result<Vec<u16>> {
+        let count = self.cluster_size / size_of::<u16>() as u64;
+        let mut table = vec![0; count as usize];
+        self.file.seek(SeekFrom::Start(offset))?;
+        for refcount in &mut table {
+            let mut value = [0u8; 2];
+            self.file.read_exact(&mut value)?;
+            *refcount = u16::from_be_bytes(value);
+        }
+        Ok(table)
+    }
+
+    /// Writes a refcount block to the file.
+    pub fn write_refcount_block(&mut self, offset: u64, table: &[u16]) -> io::Result<()> {
+        self.file.seek(SeekFrom::Start(offset))?;
+        let mut buffer = BufWriter::with_capacity(table.len() * size_of::<u16>(), &self.file);
+        for count in table {
+            buffer.write_all(&count.to_be_bytes())?;
+        }
+        Ok(())
+    }
+
+    /// Allocates a new cluster at the end of the current file, return the address.
+    pub fn add_cluster_end(&mut self, max_valid_cluster_offset: u64) -> io::Result<Option<u64>> {
+        // Determine where the new end of the file should be and set_len, which
+        // translates to truncate(2).
+        let file_end: u64 = self.file.seek(SeekFrom::End(0))?;
+        let new_cluster_address: u64 = (file_end + self.cluster_size - 1) & !self.cluster_mask;
+
+        if new_cluster_address > max_valid_cluster_offset {
+            return Ok(None);
+        }
+
+        self.file.set_len(new_cluster_address + self.cluster_size)?;
+
+        Ok(Some(new_cluster_address))
+    }
+
+    /// Returns a reference to the underlying file.
+    pub fn file(&self) -> &File {
+        &self.file
+    }
+
+    /// Returns a mutable reference to the underlying file.
+    pub fn file_mut(&mut self) -> &mut File {
+        &mut self.file
+    }
+
+    /// Returns the size of the file's clusters.
+    pub fn cluster_size(&self) -> u64 {
+        self.cluster_size
+    }
+
+    /// Returns the offset of `address` within a cluster.
+    pub fn cluster_offset(&self, address: u64) -> u64 {
+        address & self.cluster_mask
+    }
+
+    /// Zeros out a cluster in the file.
+    pub fn zero_cluster(&mut self, address: u64) -> io::Result<()> {
+        let cluster_size = self.cluster_size as usize;
+        self.file.seek(SeekFrom::Start(address))?;
+        self.file.write_zeroes_all(cluster_size)?;
+        Ok(())
+    }
+}
diff --git a/disk/src/qcow/refcount.rs b/disk/src/qcow/refcount.rs
new file mode 100644
index 0000000..438de5b
--- /dev/null
+++ b/disk/src/qcow/refcount.rs
@@ -0,0 +1,253 @@
+// Copyright 2018 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+use std;
+use std::fmt::{self, Display};
+use std::io;
+
+use libc::EINVAL;
+
+use crate::qcow::qcow_raw_file::QcowRawFile;
+use crate::qcow::vec_cache::{CacheMap, Cacheable, VecCache};
+
+#[derive(Debug)]
+pub enum Error {
+    /// `EvictingCache` - Error writing a refblock from the cache to disk.
+    EvictingRefCounts(io::Error),
+    /// `InvalidIndex` - Address requested isn't within the range of the disk.
+    InvalidIndex,
+    /// `NeedCluster` - Handle this error by reading the cluster and calling the function again.
+    NeedCluster(u64),
+    /// `NeedNewCluster` - Handle this error by allocating a cluster and calling the function again.
+    NeedNewCluster,
+    /// `ReadingRefCounts` - Error reading the file in to the refcount cache.
+    ReadingRefCounts(io::Error),
+}
+
+pub type Result<T> = std::result::Result<T, Error>;
+
+impl Display for Error {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        use self::Error::*;
+
+        match self {
+            EvictingRefCounts(e) => write!(
+                f,
+                "failed to write a refblock from the cache to disk: {}",
+                e
+            ),
+            InvalidIndex => write!(f, "address requested is not within the range of the disk"),
+            NeedCluster(addr) => write!(f, "cluster with addr={} needs to be read", addr),
+            NeedNewCluster => write!(f, "new cluster needs to be allocated for refcounts"),
+            ReadingRefCounts(e) => {
+                write!(f, "failed to read the file into the refcount cache: {}", e)
+            }
+        }
+    }
+}
+
+/// Represents the refcount entries for an open qcow file.
+#[derive(Debug)]
+pub struct RefCount {
+    ref_table: VecCache<u64>,
+    refcount_table_offset: u64,
+    refblock_cache: CacheMap<VecCache<u16>>,
+    refcount_block_entries: u64, // number of refcounts in a cluster.
+    cluster_size: u64,
+    max_valid_cluster_offset: u64,
+}
+
+impl RefCount {
+    /// Creates a `RefCount` from `file`, reading the refcount table from `refcount_table_offset`.
+    /// `refcount_table_entries` specifies the number of refcount blocks used by this image.
+    /// `refcount_block_entries` indicates the number of refcounts in each refcount block.
+    /// Each refcount table entry points to a refcount block.
+    pub fn new(
+        raw_file: &mut QcowRawFile,
+        refcount_table_offset: u64,
+        refcount_table_entries: u64,
+        refcount_block_entries: u64,
+        cluster_size: u64,
+    ) -> io::Result<RefCount> {
+        let ref_table = VecCache::from_vec(raw_file.read_pointer_table(
+            refcount_table_offset,
+            refcount_table_entries,
+            None,
+        )?);
+        let max_valid_cluster_index = (ref_table.len() as u64) * refcount_block_entries - 1;
+        let max_valid_cluster_offset = max_valid_cluster_index * cluster_size;
+        Ok(RefCount {
+            ref_table,
+            refcount_table_offset,
+            refblock_cache: CacheMap::new(50),
+            refcount_block_entries,
+            cluster_size,
+            max_valid_cluster_offset,
+        })
+    }
+
+    /// Returns the number of refcounts per block.
+    pub fn refcounts_per_block(&self) -> u64 {
+        self.refcount_block_entries
+    }
+
+    /// Returns the maximum valid cluster offset in the raw file for this refcount table.
+    pub fn max_valid_cluster_offset(&self) -> u64 {
+        self.max_valid_cluster_offset
+    }
+
+    /// Returns `NeedNewCluster` if a new cluster needs to be allocated for refcounts. If an
+    /// existing cluster needs to be read, `NeedCluster(addr)` is returned. The Caller should
+    /// allocate a cluster or read the required one and call this function again with the cluster.
+    /// On success, an optional address of a dropped cluster is returned. The dropped cluster can
+    /// be reused for other purposes.
+    pub fn set_cluster_refcount(
+        &mut self,
+        raw_file: &mut QcowRawFile,
+        cluster_address: u64,
+        refcount: u16,
+        mut new_cluster: Option<(u64, VecCache<u16>)>,
+    ) -> Result<Option<u64>> {
+        let (table_index, block_index) = self.get_refcount_index(cluster_address);
+
+        let block_addr_disk = *self.ref_table.get(table_index).ok_or(Error::InvalidIndex)?;
+
+        // Fill the cache if this block isn't yet there.
+        if !self.refblock_cache.contains_key(&table_index) {
+            // Need a new cluster
+            if let Some((addr, table)) = new_cluster.take() {
+                self.ref_table[table_index] = addr;
+                let ref_table = &self.ref_table;
+                self.refblock_cache
+                    .insert(table_index, table, |index, evicted| {
+                        raw_file.write_refcount_block(ref_table[index], evicted.get_values())
+                    })
+                    .map_err(Error::EvictingRefCounts)?;
+            } else {
+                if block_addr_disk == 0 {
+                    return Err(Error::NeedNewCluster);
+                }
+                return Err(Error::NeedCluster(block_addr_disk));
+            }
+        }
+
+        // Unwrap is safe here as the entry was filled directly above.
+        let dropped_cluster = if !self.refblock_cache.get(&table_index).unwrap().dirty() {
+            // Free the previously used block and use a new one. Writing modified counts to new
+            // blocks keeps the on-disk state consistent even if it's out of date.
+            if let Some((addr, _)) = new_cluster.take() {
+                self.ref_table[table_index] = addr;
+                Some(block_addr_disk)
+            } else {
+                return Err(Error::NeedNewCluster);
+            }
+        } else {
+            None
+        };
+
+        self.refblock_cache.get_mut(&table_index).unwrap()[block_index] = refcount;
+        Ok(dropped_cluster)
+    }
+
+    /// Flush the dirty refcount blocks. This must be done before flushing the table that points to
+    /// the blocks.
+    pub fn flush_blocks(&mut self, raw_file: &mut QcowRawFile) -> io::Result<()> {
+        // Write out all dirty L2 tables.
+        for (table_index, block) in self.refblock_cache.iter_mut().filter(|(_k, v)| v.dirty()) {
+            let addr = self.ref_table[*table_index];
+            if addr != 0 {
+                raw_file.write_refcount_block(addr, block.get_values())?;
+            } else {
+                return Err(std::io::Error::from_raw_os_error(EINVAL));
+            }
+            block.mark_clean();
+        }
+        Ok(())
+    }
+
+    /// Flush the refcount table that keeps the address of the refcounts blocks.
+    /// Returns true if the table changed since the previous `flush_table()` call.
+    pub fn flush_table(&mut self, raw_file: &mut QcowRawFile) -> io::Result<bool> {
+        if self.ref_table.dirty() {
+            raw_file.write_pointer_table(
+                self.refcount_table_offset,
+                &self.ref_table.get_values(),
+                0,
+            )?;
+            self.ref_table.mark_clean();
+            Ok(true)
+        } else {
+            Ok(false)
+        }
+    }
+
+    /// Gets the refcount for a cluster with the given address.
+    pub fn get_cluster_refcount(
+        &mut self,
+        raw_file: &mut QcowRawFile,
+        address: u64,
+    ) -> Result<u16> {
+        let (table_index, block_index) = self.get_refcount_index(address);
+        let block_addr_disk = *self.ref_table.get(table_index).ok_or(Error::InvalidIndex)?;
+        if block_addr_disk == 0 {
+            return Ok(0);
+        }
+        if !self.refblock_cache.contains_key(&table_index) {
+            let table = VecCache::from_vec(
+                raw_file
+                    .read_refcount_block(block_addr_disk)
+                    .map_err(Error::ReadingRefCounts)?,
+            );
+            let ref_table = &self.ref_table;
+            self.refblock_cache
+                .insert(table_index, table, |index, evicted| {
+                    raw_file.write_refcount_block(ref_table[index], evicted.get_values())
+                })
+                .map_err(Error::EvictingRefCounts)?;
+        }
+        Ok(self.refblock_cache.get(&table_index).unwrap()[block_index])
+    }
+
+    /// Returns the refcount table for this file. This is only useful for debugging.
+    pub fn ref_table(&self) -> &[u64] {
+        &self.ref_table.get_values()
+    }
+
+    /// Returns the refcounts stored in the given block.
+    pub fn refcount_block(
+        &mut self,
+        raw_file: &mut QcowRawFile,
+        table_index: usize,
+    ) -> Result<Option<&[u16]>> {
+        let block_addr_disk = *self.ref_table.get(table_index).ok_or(Error::InvalidIndex)?;
+        if block_addr_disk == 0 {
+            return Ok(None);
+        }
+        if !self.refblock_cache.contains_key(&table_index) {
+            let table = VecCache::from_vec(
+                raw_file
+                    .read_refcount_block(block_addr_disk)
+                    .map_err(Error::ReadingRefCounts)?,
+            );
+            // TODO(dgreid) - closure needs to return an error.
+            let ref_table = &self.ref_table;
+            self.refblock_cache
+                .insert(table_index, table, |index, evicted| {
+                    raw_file.write_refcount_block(ref_table[index], evicted.get_values())
+                })
+                .map_err(Error::EvictingRefCounts)?;
+        }
+        // The index must exist as it was just inserted if it didn't already.
+        Ok(Some(
+            self.refblock_cache.get(&table_index).unwrap().get_values(),
+        ))
+    }
+
+    // Gets the address of the refcount block and the index into the block for the given address.
+    fn get_refcount_index(&self, address: u64) -> (usize, usize) {
+        let block_index = (address / self.cluster_size) % self.refcount_block_entries;
+        let refcount_table_index = (address / self.cluster_size) / self.refcount_block_entries;
+        (refcount_table_index as usize, block_index as usize)
+    }
+}
diff --git a/disk/src/qcow/vec_cache.rs b/disk/src/qcow/vec_cache.rs
new file mode 100644
index 0000000..ecd7673
--- /dev/null
+++ b/disk/src/qcow/vec_cache.rs
@@ -0,0 +1,185 @@
+// Copyright 2018 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+use std::collections::hash_map::IterMut;
+use std::collections::HashMap;
+use std::io;
+use std::ops::{Index, IndexMut};
+use std::slice::SliceIndex;
+
+/// Trait that allows for checking if an implementor is dirty. Useful for types that are cached so
+/// it can be checked if they need to be committed to disk.
+pub trait Cacheable {
+    /// Used to check if the item needs to be written out or if it can be discarded.
+    fn dirty(&self) -> bool;
+}
+
+#[derive(Debug)]
+/// Represents a vector that implements the `Cacheable` trait so it can be held in a cache.
+pub struct VecCache<T: 'static + Copy + Default> {
+    vec: Box<[T]>,
+    dirty: bool,
+}
+
+impl<T: 'static + Copy + Default> VecCache<T> {
+    /// Creates a `VecCache` that can hold `count` elements.
+    pub fn new(count: usize) -> VecCache<T> {
+        VecCache {
+            vec: vec![Default::default(); count].into_boxed_slice(),
+            dirty: true,
+        }
+    }
+
+    /// Creates a `VecCache` from the passed in `vec`.
+    pub fn from_vec(vec: Vec<T>) -> VecCache<T> {
+        VecCache {
+            vec: vec.into_boxed_slice(),
+            dirty: false,
+        }
+    }
+
+    pub fn get<I>(&self, index: I) -> Option<&<I as SliceIndex<[T]>>::Output>
+    where
+        I: SliceIndex<[T]>,
+    {
+        self.vec.get(index)
+    }
+
+    /// Gets a reference to the underlying vector.
+    pub fn get_values(&self) -> &[T] {
+        &self.vec
+    }
+
+    /// Mark this cache element as clean.
+    pub fn mark_clean(&mut self) {
+        self.dirty = false;
+    }
+
+    /// Returns the number of elements in the vector.
+    pub fn len(&self) -> usize {
+        self.vec.len()
+    }
+}
+
+impl<T: 'static + Copy + Default> Cacheable for VecCache<T> {
+    fn dirty(&self) -> bool {
+        self.dirty
+    }
+}
+
+impl<T: 'static + Copy + Default> Index<usize> for VecCache<T> {
+    type Output = T;
+
+    fn index(&self, index: usize) -> &T {
+        self.vec.index(index)
+    }
+}
+
+impl<T: 'static + Copy + Default> IndexMut<usize> for VecCache<T> {
+    fn index_mut(&mut self, index: usize) -> &mut T {
+        self.dirty = true;
+        self.vec.index_mut(index)
+    }
+}
+
+#[derive(Debug)]
+pub struct CacheMap<T: Cacheable> {
+    capacity: usize,
+    map: HashMap<usize, T>,
+}
+
+impl<T: Cacheable> CacheMap<T> {
+    pub fn new(capacity: usize) -> Self {
+        CacheMap {
+            capacity,
+            map: HashMap::with_capacity(capacity),
+        }
+    }
+
+    pub fn contains_key(&self, key: &usize) -> bool {
+        self.map.contains_key(key)
+    }
+
+    pub fn get(&self, index: &usize) -> Option<&T> {
+        self.map.get(index)
+    }
+
+    pub fn get_mut(&mut self, index: &usize) -> Option<&mut T> {
+        self.map.get_mut(index)
+    }
+
+    pub fn iter_mut(&mut self) -> IterMut<usize, T> {
+        self.map.iter_mut()
+    }
+
+    // Check if the refblock cache is full and we need to evict.
+    pub fn insert<F>(&mut self, index: usize, block: T, write_callback: F) -> io::Result<()>
+    where
+        F: FnOnce(usize, T) -> io::Result<()>,
+    {
+        if self.map.len() == self.capacity {
+            // TODO(dgreid) - smarter eviction strategy.
+            let to_evict = *self.map.iter().nth(0).unwrap().0;
+            if let Some(evicted) = self.map.remove(&to_evict) {
+                if evicted.dirty() {
+                    write_callback(to_evict, evicted)?;
+                }
+            }
+        }
+        self.map.insert(index, block);
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    struct NumCache(pub u64);
+    impl Cacheable for NumCache {
+        fn dirty(&self) -> bool {
+            true
+        }
+    }
+
+    #[test]
+    fn evicts_when_full() {
+        let mut cache = CacheMap::<NumCache>::new(3);
+        let mut evicted = None;
+        cache
+            .insert(0, NumCache(5), |index, _| {
+                evicted = Some(index);
+                Ok(())
+            })
+            .unwrap();
+        assert_eq!(evicted, None);
+        cache
+            .insert(1, NumCache(6), |index, _| {
+                evicted = Some(index);
+                Ok(())
+            })
+            .unwrap();
+        assert_eq!(evicted, None);
+        cache
+            .insert(2, NumCache(7), |index, _| {
+                evicted = Some(index);
+                Ok(())
+            })
+            .unwrap();
+        assert_eq!(evicted, None);
+        cache
+            .insert(3, NumCache(8), |index, _| {
+                evicted = Some(index);
+                Ok(())
+            })
+            .unwrap();
+        assert!(evicted.is_some());
+
+        // Check that three of the four items inserted are still there and that the most recently
+        // inserted is one of them.
+        let num_items = (0..=3).filter(|k| cache.contains_key(&k)).count();
+        assert_eq!(num_items, 3);
+        assert!(cache.contains_key(&3));
+    }
+}