From 1e47dd9cb847f57ec859c071d6c697270a53d1e7 Mon Sep 17 00:00:00 2001 From: Andreas Rammhold Date: Tue, 25 May 2021 20:30:28 +0200 Subject: cpython: Enable LTO on all builds as it doesn't break reproducibility LTO allows us to optimise the binaries we are shipping a bit further than just with the regular -Ox and other compiler flags. It also is deterministic and doesn't harm our reproducibility efforts while providing us with up to 10% performance gain (and sometimes slightly slower). See the table below for a comparsion of this version with the Python 3.9 build that sets -fno-semantic-interposition. +-------------------------+--------------------+------------------------+--------------+------------------------+ | Benchmark | py39-nsip.nix.json | py39-nsip-lto.nix.json | Change | Significance | +=========================+====================+========================+==============+========================+ | 2to3 | 642 ms | 620 ms | 1.03x faster | Significant (t=12.04) | +-------------------------+--------------------+------------------------+--------------+------------------------+ | chameleon | 14.6 ms | 14.4 ms | 1.02x faster | Not significant | +-------------------------+--------------------+------------------------+--------------+------------------------+ | chaos | 182 ms | 182 ms | 1.00x slower | Not significant | +-------------------------+--------------------+------------------------+--------------+------------------------+ | crypto_pyaes | 175 ms | 172 ms | 1.02x faster | Not significant | +-------------------------+--------------------+------------------------+--------------+------------------------+ | deltablue | 11.2 ms | 11.2 ms | 1.00x faster | Not significant | +-------------------------+--------------------+------------------------+--------------+------------------------+ | django_template | 82.0 ms | 81.4 ms | 1.01x faster | Not significant | +-------------------------+--------------------+------------------------+--------------+------------------------+ | dulwich_log | 101 ms | 99.8 ms | 1.01x faster | Not significant | +-------------------------+--------------------+------------------------+--------------+------------------------+ | fannkuch | 634 ms | 638 ms | 1.01x slower | Not significant | +-------------------------+--------------------+------------------------+--------------+------------------------+ | float | 176 ms | 189 ms | 1.08x slower | Significant (t=-4.30) | +-------------------------+--------------------+------------------------+--------------+------------------------+ | go | 366 ms | 365 ms | 1.00x faster | Not significant | +-------------------------+--------------------+------------------------+--------------+------------------------+ | hexiom | 14.9 ms | 15.1 ms | 1.01x slower | Not significant | +-------------------------+--------------------+------------------------+--------------+------------------------+ | json_dumps | 18.7 ms | 18.5 ms | 1.01x faster | Not significant | +-------------------------+--------------------+------------------------+--------------+------------------------+ | json_loads | 37.9 us | 37.3 us | 1.02x faster | Not significant | +-------------------------+--------------------+------------------------+--------------+------------------------+ | logging_format | 14.2 us | 14.3 us | 1.01x slower | Not significant | +-------------------------+--------------------+------------------------+--------------+------------------------+ | logging_silent | 305 ns | 313 ns | 1.02x slower | Significant (t=-3.91) | +-------------------------+--------------------+------------------------+--------------+------------------------+ | logging_simple | 13.0 us | 13.2 us | 1.01x slower | Not significant | +-------------------------+--------------------+------------------------+--------------+------------------------+ | mako | 24.7 ms | 23.3 ms | 1.06x faster | Significant (t=10.74) | +-------------------------+--------------------+------------------------+--------------+------------------------+ | meteor_contest | 130 ms | 128 ms | 1.01x faster | Not significant | +-------------------------+--------------------+------------------------+--------------+------------------------+ | nbody | 201 ms | 201 ms | 1.00x faster | Not significant | +-------------------------+--------------------+------------------------+--------------+------------------------+ | nqueens | 152 ms | 154 ms | 1.01x slower | Not significant | +-------------------------+--------------------+------------------------+--------------+------------------------+ | pathlib | 26.3 ms | 26.2 ms | 1.01x faster | Not significant | +-------------------------+--------------------+------------------------+--------------+------------------------+ | pickle | 13.1 us | 13.7 us | 1.05x slower | Significant (t=-10.36) | +-------------------------+--------------------+------------------------+--------------+------------------------+ | pickle_dict | 26.6 us | 27.1 us | 1.02x slower | Not significant | +-------------------------+--------------------+------------------------+--------------+------------------------+ | pickle_list | 4.34 us | 4.31 us | 1.01x faster | Not significant | +-------------------------+--------------------+------------------------+--------------+------------------------+ | pickle_pure_python | 738 us | 759 us | 1.03x slower | Significant (t=-5.26) | +-------------------------+--------------------+------------------------+--------------+------------------------+ | pidigits | 181 ms | 181 ms | 1.00x faster | Not significant | +-------------------------+--------------------+------------------------+--------------+------------------------+ | pyflate | 959 ms | 974 ms | 1.02x slower | Not significant | +-------------------------+--------------------+------------------------+--------------+------------------------+ | python_startup | 34.0 ms | 31.4 ms | 1.08x faster | Significant (t=29.75) | +-------------------------+--------------------+------------------------+--------------+------------------------+ | python_startup_no_site | 15.3 ms | 14.5 ms | 1.06x faster | Significant (t=17.58) | +-------------------------+--------------------+------------------------+--------------+------------------------+ | raytrace | 849 ms | 846 ms | 1.00x faster | Not significant | +-------------------------+--------------------+------------------------+--------------+------------------------+ | regex_compile | 261 ms | 261 ms | 1.00x faster | Not significant | +-------------------------+--------------------+------------------------+--------------+------------------------+ | regex_dna | 187 ms | 221 ms | 1.18x slower | Significant (t=-46.94) | +-------------------------+--------------------+------------------------+--------------+------------------------+ | regex_effbot | 3.66 ms | 3.98 ms | 1.09x slower | Significant (t=-18.55) | +-------------------------+--------------------+------------------------+--------------+------------------------+ | regex_v8 | 29.7 ms | 29.3 ms | 1.01x faster | Not significant | +-------------------------+--------------------+------------------------+--------------+------------------------+ | richards | 113 ms | 110 ms | 1.03x faster | Significant (t=3.47) | +-------------------------+--------------------+------------------------+--------------+------------------------+ | scimark_fft | 592 ms | 590 ms | 1.00x faster | Not significant | +-------------------------+--------------------+------------------------+--------------+------------------------+ | scimark_lu | 253 ms | 265 ms | 1.05x slower | Significant (t=-8.97) | +-------------------------+--------------------+------------------------+--------------+------------------------+ | scimark_monte_carlo | 170 ms | 177 ms | 1.04x slower | Significant (t=-4.96) | +-------------------------+--------------------+------------------------+--------------+------------------------+ | scimark_sor | 310 ms | 315 ms | 1.02x slower | Not significant | +-------------------------+--------------------+------------------------+--------------+------------------------+ | scimark_sparse_mat_mult | 8.36 ms | 8.33 ms | 1.00x faster | Not significant | +-------------------------+--------------------+------------------------+--------------+------------------------+ | spectral_norm | 232 ms | 229 ms | 1.02x faster | Not significant | +-------------------------+--------------------+------------------------+--------------+------------------------+ | sqlalchemy_declarative | 185 ms | 183 ms | 1.01x faster | Not significant | +-------------------------+--------------------+------------------------+--------------+------------------------+ | sqlalchemy_imperative | 27.4 ms | 27.6 ms | 1.01x slower | Not significant | +-------------------------+--------------------+------------------------+--------------+------------------------+ | sqlite_synth | 4.73 us | 4.73 us | 1.00x faster | Not significant | +-------------------------+--------------------+------------------------+--------------+------------------------+ | sympy_expand | 813 ms | 819 ms | 1.01x slower | Not significant | +-------------------------+--------------------+------------------------+--------------+------------------------+ | sympy_integrate | 31.6 ms | 31.8 ms | 1.01x slower | Not significant | +-------------------------+--------------------+------------------------+--------------+------------------------+ | sympy_str | 477 ms | 479 ms | 1.00x slower | Not significant | +-------------------------+--------------------+------------------------+--------------+------------------------+ | sympy_sum | 245 ms | 247 ms | 1.01x slower | Not significant | +-------------------------+--------------------+------------------------+--------------+------------------------+ | telco | 11.3 ms | 11.4 ms | 1.00x slower | Not significant | +-------------------------+--------------------+------------------------+--------------+------------------------+ | tornado_http | 172 ms | 172 ms | 1.00x slower | Not significant | +-------------------------+--------------------+------------------------+--------------+------------------------+ | unpack_sequence | 56.2 ns | 51.2 ns | 1.10x faster | Significant (t=2.50) | +-------------------------+--------------------+------------------------+--------------+------------------------+ | unpickle | 19.8 us | 19.5 us | 1.02x faster | Not significant | +-------------------------+--------------------+------------------------+--------------+------------------------+ | unpickle_list | 5.75 us | 5.75 us | 1.00x faster | Not significant | +-------------------------+--------------------+------------------------+--------------+------------------------+ | unpickle_pure_python | 524 us | 522 us | 1.01x faster | Not significant | +-------------------------+--------------------+------------------------+--------------+------------------------+ | xml_etree_generate | 148 ms | 148 ms | 1.00x slower | Not significant | +-------------------------+--------------------+------------------------+--------------+------------------------+ | xml_etree_iterparse | 129 ms | 131 ms | 1.02x slower | Not significant | +-------------------------+--------------------+------------------------+--------------+------------------------+ | xml_etree_parse | 179 ms | 177 ms | 1.01x faster | Not significant | +-------------------------+--------------------+------------------------+--------------+------------------------+ | xml_etree_process | 118 ms | 119 ms | 1.00x slower | Not significant | +-------------------------+--------------------+------------------------+--------------+------------------------+ --- pkgs/development/interpreters/python/cpython/default.nix | 6 ++++++ pkgs/development/interpreters/python/default.nix | 1 + 2 files changed, 7 insertions(+) (limited to 'pkgs/development/interpreters/python') diff --git a/pkgs/development/interpreters/python/cpython/default.nix b/pkgs/development/interpreters/python/cpython/default.nix index e0b941e009d..4ee67a452fe 100644 --- a/pkgs/development/interpreters/python/cpython/default.nix +++ b/pkgs/development/interpreters/python/cpython/default.nix @@ -41,6 +41,8 @@ , enableOptimizations ? false # enableNoSemanticInterposition is a subset of the enableOptimizations flag that doesn't harm reproducibility. , enableNoSemanticInterposition ? true +# enableLTO is a subset of the enableOptimizations flag that doesn't harm reproducibility. +, enableLTO ? true , reproducibleBuild ? true , pythonAttr ? "python${sourceVersion.major}${sourceVersion.minor}" }: @@ -102,6 +104,8 @@ let ] ++ optionals (stdenv.hostPlatform != stdenv.buildPlatform) [ buildPackages.stdenv.cc pythonForBuild + ] ++ optionals (stdenv.cc.isClang && enableLTO) [ + stdenv.cc.cc.libllvm.out ]; buildInputs = filter (p: p != null) ([ @@ -280,6 +284,8 @@ in with passthru; stdenv.mkDerivation { "--with-system-ffi" ] ++ optionals enableOptimizations [ "--enable-optimizations" + ] ++ optionals enableLTO [ + "--with-lto" ] ++ optionals (pythonOlder "3.7") [ # This is unconditionally true starting in CPython 3.7. "--with-threads" diff --git a/pkgs/development/interpreters/python/default.nix b/pkgs/development/interpreters/python/default.nix index 176f8db4bff..be0d6ce1e43 100644 --- a/pkgs/development/interpreters/python/default.nix +++ b/pkgs/development/interpreters/python/default.nix @@ -225,6 +225,7 @@ in { stripBytecode = true; includeSiteCustomize = false; enableOptimizations = false; + enableLTO = false; mimetypesSupport = false; } // sources.python39)).overrideAttrs(old: { pname = "python3-minimal"; -- cgit 1.4.1