summary refs log tree commit diff
path: root/pkgs/applications/science/biology/svaba/default.nix
blob: 441c7ed6dcc3d193f7ed06590517aeac62645f78 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
{ lib, stdenv, zlib, bzip2, xz, fetchFromGitHub } :

stdenv.mkDerivation rec {
  version = "1.1.0";
  pname = "svaba";

  src = fetchFromGitHub {
    owner = "walaj";
    repo = pname;
    rev = version;
    sha256 = "1vv5mc9z5d22kgdy7mm27ya5aahnqgkcrskdr2405058ikk9g8kp";
    fetchSubmodules = true;
  };

  buildInputs = [ zlib bzip2 xz ];

  # Workaround build failure on -fno-common toolchains like upstream
  # gcc-10. Otherwise build fails as:
  #   ld: ./libfml.a(rle.o):/build/source/SeqLib/fermi-lite/rle.h:33: multiple definition of
  #     `rle_auxtab'; ./libfml.a(misc.o):/build/source/SeqLib/fermi-lite/rle.h:33: first defined here
  env.NIX_CFLAGS_COMPILE = "-fcommon";

  installPhase = ''
    runHook preInstall
    install -Dm555 src/svaba/svaba $out/bin/svaba
    runHook postInstall
  '';

  meta = with lib; {
    broken = (stdenv.isLinux && stdenv.isAarch64);
    description = "Structural variant and INDEL caller for DNA sequencing data, using genome-wide local assembly";
    license = licenses.gpl3;
    homepage = "https://github.com/walaj/svaba";
    maintainers = with maintainers; [ scalavision ];
    platforms = platforms.linux;
    longDescription = ''
      SvABA is a method for detecting structural variants in sequencing data
      using genome-wide local assembly. Under the hood, SvABA uses a custom
      implementation of SGA (String Graph Assembler) by Jared Simpson,
      and BWA-MEM by Heng Li. Contigs are assembled for every 25kb window
      (with some small overlap) for every region in the genome.
      The default is to use only clipped, discordant, unmapped and indel reads,
      although this can be customized to any set of reads at the command line using VariantBam rules.
      These contigs are then immediately aligned to the reference with BWA-MEM and parsed to identify variants.
      Sequencing reads are then realigned to the contigs with BWA-MEM, and variants are scored by their read support.
    '';

  };
}