summary refs log tree commit diff
diff options
context:
space:
mode:
authorSergei Trofimovich <slyich@gmail.com>2022-06-04 15:38:56 +0100
committerSergei Trofimovich <slyich@gmail.com>2022-06-11 11:25:58 +0100
commitffb456ae61d1740b5b254f6d431801782d7ab0d7 (patch)
tree844de0975b9723d1b5261687319a3e067a0ca512
parent0da898ca39a9b860fbd50cdde781286ec52f4194 (diff)
downloadnixpkgs-ffb456ae61d1740b5b254f6d431801782d7ab0d7.tar
nixpkgs-ffb456ae61d1740b5b254f6d431801782d7ab0d7.tar.gz
nixpkgs-ffb456ae61d1740b5b254f6d431801782d7ab0d7.tar.bz2
nixpkgs-ffb456ae61d1740b5b254f6d431801782d7ab0d7.tar.lz
nixpkgs-ffb456ae61d1740b5b254f6d431801782d7ab0d7.tar.xz
nixpkgs-ffb456ae61d1740b5b254f6d431801782d7ab0d7.tar.zst
nixpkgs-ffb456ae61d1740b5b254f6d431801782d7ab0d7.zip
fetchzip: force UTF-8 compatibel locale to unpack non-ASCII symbols
musl and darwin support UTF-8 locales without any extras. As a result
unzip can unpack UTF-8 filenames there as is. But on glibc without
locale archive presence files get mangled as:

    deps/αβ -> deps/#U03b1#U03b2

This makes `fetchzip` fixed-output derivations unstable.

Tested this change to fail in `coq.src` which was generated in system
that mangles UTF-8 symbols:

    $ nix build -f. coq.src --rebuild -L
    source> trying https://github.com/coq/coq/archive/V8.15.2.zip
    source>   % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
    source>                                  Dload  Upload   Total   Spent    Left  Speed
    source>   0     0    0     0    0     0      0      0 --:--:--  0:00:01 --:--:--     0
    source> 100 8945k  100 8945k    0     0  1513k      0  0:00:05  0:00:05 --:--:-- 1989k
    source> unpacking source archive /build/V8.15.2.zip
    error: hash mismatch in fixed-output derivation '/nix/store/hrnyykm7wgw8vxisgq7hc2bg5gr0y6s8-source.drv':
             specified: sha256-h81nFqkuvZkMR7YLHy7laTq5yOhjMW+w6rYzncxvyD4=
                got:    sha256-DTspmwyD3Evl1CUmvUy2MonbLGUezvsHN3prmP9eK2I=

Note: it means that some of existing caches for fixed output
derivations become incorrect. It should not break already cached
tarballs on cache.nixos.org thus the impact should not be widespread.
-rw-r--r--pkgs/build-support/fetchzip/default.nix7
-rw-r--r--pkgs/tools/archivers/unzip/setup-hook.sh8
2 files changed, 12 insertions, 3 deletions
diff --git a/pkgs/build-support/fetchzip/default.nix b/pkgs/build-support/fetchzip/default.nix
index 98c41037074..10142134792 100644
--- a/pkgs/build-support/fetchzip/default.nix
+++ b/pkgs/build-support/fetchzip/default.nix
@@ -5,7 +5,7 @@
 # (e.g. due to minor changes in the compression algorithm, or changes
 # in timestamps).
 
-{ lib, fetchurl, unzip }:
+{ lib, fetchurl, unzip, glibcLocalesUtf8 }:
 
 { # Optionally move the contents of the unpacked tree up one level.
   stripRoot ? true
@@ -35,7 +35,10 @@ in {
 
   downloadToTemp = true;
 
-  nativeBuildInputs = [ unzip ] ++ nativeBuildInputs;
+  # Have to pull in glibcLocalesUtf8 for unzip in setup-hook.sh to handle
+  # UTF-8 aware locale:
+  #   https://github.com/NixOS/nixpkgs/issues/176225#issuecomment-1146617263
+  nativeBuildInputs = [ unzip glibcLocalesUtf8 ] ++ nativeBuildInputs;
 
   postFetch =
     ''
diff --git a/pkgs/tools/archivers/unzip/setup-hook.sh b/pkgs/tools/archivers/unzip/setup-hook.sh
index 4055d2fab51..99c63f68e94 100644
--- a/pkgs/tools/archivers/unzip/setup-hook.sh
+++ b/pkgs/tools/archivers/unzip/setup-hook.sh
@@ -1,5 +1,11 @@
 unpackCmdHooks+=(_tryUnzip)
 _tryUnzip() {
     if ! [[ "$curSrc" =~ \.zip$ ]]; then return 1; fi
-    unzip -qq "$curSrc"
+
+    # UTF-8 locale is needed for unzip on glibc to handle UTF-8 symbols:
+    #   https://github.com/NixOS/nixpkgs/issues/176225#issuecomment-1146617263
+    # Otherwise unzip unpacks escaped file names as if '-U' options was in effect.
+    #
+    # Pick en_US.UTF-8 as most possible to be present on glibc, musl and darwin.
+    LANG=en_US.UTF-8 unzip -qq "$curSrc"
 }