diff options
Diffstat (limited to 'pkgs/os-specific/linux/kernel')
52 files changed, 1523 insertions, 3441 deletions
diff --git a/pkgs/os-specific/linux/kernel/common-config.nix b/pkgs/os-specific/linux/kernel/common-config.nix index 2a23c8d3ea5..2954ee8f78b 100644 --- a/pkgs/os-specific/linux/kernel/common-config.nix +++ b/pkgs/os-specific/linux/kernel/common-config.nix @@ -29,20 +29,31 @@ let mkIf (stdenv.hostPlatform.isAarch32 || stdenv.hostPlatform.isAarch64 || stdenv.hostPlatform.isx86_64 || - (stdenv.hostPlatform.isPowerPC && stdenv.hostPlatform.is64bit) || + (stdenv.hostPlatform.isPower && stdenv.hostPlatform.is64bit) || (stdenv.hostPlatform.isMips && stdenv.hostPlatform.is64bit)); options = { debug = { - DEBUG_INFO = if (features.debug or false) then yes else no; + # Necessary for BTF + DEBUG_INFO = mkMerge [ + (whenOlder "5.2" (if (features.debug or false) then yes else no)) + (whenBetween "5.2" "5.18" yes) + ]; + DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT = whenAtLeast "5.18" yes; + # Reduced debug info conflict with BTF and have been enabled in + # aarch64 defconfig since 5.13 + DEBUG_INFO_REDUCED = whenAtLeast "5.13" (option no); + DEBUG_INFO_BTF = whenAtLeast "5.2" (option yes); + # Allow loading modules with mismatched BTFs + # FIXME: figure out how to actually make BTFs reproducible instead + # See https://github.com/NixOS/nixpkgs/pull/181456 for details. + MODULE_ALLOW_BTF_MISMATCH = whenAtLeast "5.18" (option yes); + BPF_LSM = whenAtLeast "5.7" (option yes); DEBUG_KERNEL = yes; DEBUG_DEVRES = no; DYNAMIC_DEBUG = yes; - TIMER_STATS = whenOlder "4.11" yes; - DEBUG_NX_TEST = whenOlder "4.11" no; DEBUG_STACK_USAGE = no; - DEBUG_STACKOVERFLOW = option no; RCU_TORTURE_TEST = no; SCHEDSTATS = no; DETECT_HUNG_TASK = yes; @@ -54,16 +65,50 @@ let }; power-management = { - PM_ADVANCED_DEBUG = yes; - X86_INTEL_LPSS = yes; - X86_INTEL_PSTATE = yes; - INTEL_IDLE = yes; CPU_FREQ_DEFAULT_GOV_PERFORMANCE = yes; - CPU_FREQ_GOV_SCHEDUTIL = whenAtLeast "4.9" yes; + CPU_FREQ_GOV_SCHEDUTIL = yes; + PM_ADVANCED_DEBUG = yes; PM_WAKELOCKS = yes; - # Power-capping framework and support for INTEL RAPL POWERCAP = yes; + # ACPI Firmware Performance Data Table Support + ACPI_FPDT = whenAtLeast "5.12" (option yes); + # ACPI Heterogeneous Memory Attribute Table Support + ACPI_HMAT = whenAtLeast "5.2" (option yes); + # ACPI Platform Error Interface + ACPI_APEI = (option yes); + # APEI Generic Hardware Error Source + ACPI_APEI_GHES = (option yes); + + # Enable lazy RCUs for power savings: + # https://lore.kernel.org/rcu/20221019225138.GA2499943@paulmck-ThinkPad-P17-Gen-1/ + # RCU_LAZY depends on RCU_NOCB_CPU depends on NO_HZ_FULL + # depends on HAVE_VIRT_CPU_ACCOUNTING_GEN depends on 64BIT, + # so we can't force-enable this + RCU_LAZY = whenAtLeast "6.2" (option yes); + } // optionalAttrs (stdenv.hostPlatform.isx86) { + INTEL_IDLE = yes; INTEL_RAPL = whenAtLeast "5.3" module; + X86_INTEL_LPSS = yes; + X86_INTEL_PSTATE = yes; + X86_AMD_PSTATE = whenAtLeast "5.17" yes; + # Intel DPTF (Dynamic Platform and Thermal Framework) Support + ACPI_DPTF = whenAtLeast "5.10" yes; + + # Required to bring up some Bay Trail devices properly + I2C = yes; + I2C_DESIGNWARE_PLATFORM = yes; + PMIC_OPREGION = whenAtLeast "5.10" yes; + INTEL_SOC_PMIC = whenAtLeast "5.10" yes; + BYTCRC_PMIC_OPREGION = whenAtLeast "5.10" yes; + CHTCRC_PMIC_OPREGION = whenAtLeast "5.10" yes; + XPOWER_PMIC_OPREGION = whenAtLeast "5.10" yes; + BXT_WC_PMIC_OPREGION = whenAtLeast "5.10" yes; + INTEL_SOC_PMIC_CHTWC = whenAtLeast "5.10" yes; + CHT_WC_PMIC_OPREGION = whenAtLeast "5.10" yes; + INTEL_SOC_PMIC_CHTDC_TI = whenAtLeast "5.10" yes; + CHT_DC_TI_PMIC_OPREGION = whenAtLeast "5.10" yes; + MFD_TPS68470 = whenBetween "5.10" "5.13" yes; + TPS68470_PMIC_OPREGION = whenAtLeast "5.10" yes; }; external-firmware = { @@ -82,6 +127,16 @@ let CC_OPTIMIZE_FOR_SIZE = no; }; + memory = { + DAMON = whenAtLeast "5.15" yes; + DAMON_VADDR = whenAtLeast "5.15" yes; + DAMON_PADDR = whenAtLeast "5.16" yes; + DAMON_SYSFS = whenAtLeast "5.18" yes; + DAMON_DBGFS = whenAtLeast "5.15" yes; + DAMON_RECLAIM = whenAtLeast "5.16" yes; + DAMON_LRU_SORT = whenAtLeast "6.0" yes; + }; + memtest = { MEMTEST = yes; }; @@ -91,13 +146,20 @@ let scheduler = { IOSCHED_CFQ = whenOlder "5.0" yes; # Removed in 5.0-RC1 BLK_CGROUP = yes; # required by CFQ" - BLK_CGROUP_IOLATENCY = whenAtLeast "4.19" yes; + BLK_CGROUP_IOLATENCY = yes; BLK_CGROUP_IOCOST = whenAtLeast "5.4" yes; IOSCHED_DEADLINE = whenOlder "5.0" yes; # Removed in 5.0-RC1 - MQ_IOSCHED_DEADLINE = whenAtLeast "4.11" yes; - BFQ_GROUP_IOSCHED = whenAtLeast "4.12" yes; - MQ_IOSCHED_KYBER = whenAtLeast "4.12" yes; - IOSCHED_BFQ = whenAtLeast "4.12" module; + MQ_IOSCHED_DEADLINE = yes; + BFQ_GROUP_IOSCHED = yes; + MQ_IOSCHED_KYBER = yes; + IOSCHED_BFQ = module; + }; + + + timer = { + # Enable Full Dynticks System. + # NO_HZ_FULL depends on HAVE_VIRT_CPU_ACCOUNTING_GEN depends on 64BIT + NO_HZ_FULL = mkIf stdenv.is64bit yes; }; # Enable NUMA. @@ -109,6 +171,7 @@ let NET = yes; IP_ADVANCED_ROUTER = yes; IP_PNP = no; + IP_ROUTE_MULTIPATH = yes; IP_VS_PROTO_TCP = yes; IP_VS_PROTO_UDP = yes; IP_VS_PROTO_ESP = yes; @@ -120,10 +183,11 @@ let BPF_JIT = whenPlatformHasEBPFJit yes; BPF_JIT_ALWAYS_ON = whenPlatformHasEBPFJit no; # whenPlatformHasEBPFJit yes; # see https://github.com/NixOS/nixpkgs/issues/79304 HAVE_EBPF_JIT = whenPlatformHasEBPFJit yes; - BPF_STREAM_PARSER = whenAtLeast "4.19" yes; - XDP_SOCKETS = whenAtLeast "4.19" yes; + BPF_STREAM_PARSER = yes; + XDP_SOCKETS = yes; XDP_SOCKETS_DIAG = whenAtLeast "5.1" yes; WAN = yes; + TCP_CONG_ADVANCED = yes; TCP_CONG_CUBIC = yes; # This is the default congestion control algorithm since 2.6.19 # Required by systemd per-cgroup firewalling CGROUP_BPF = option yes; @@ -141,12 +205,12 @@ let IPV6_MROUTE = yes; IPV6_MROUTE_MULTIPLE_TABLES = yes; IPV6_PIMSM_V2 = yes; - IPV6_FOU_TUNNEL = whenAtLeast "4.7" module; - IPV6_SEG6_LWTUNNEL = whenAtLeast "4.10" yes; - IPV6_SEG6_HMAC = whenAtLeast "4.10" yes; - IPV6_SEG6_BPF = whenAtLeast "4.18" yes; - NET_CLS_BPF = whenAtLeast "4.4" module; - NET_ACT_BPF = whenAtLeast "4.4" module; + IPV6_FOU_TUNNEL = module; + IPV6_SEG6_LWTUNNEL = yes; + IPV6_SEG6_HMAC = yes; + IPV6_SEG6_BPF = yes; + NET_CLS_BPF = module; + NET_ACT_BPF = module; NET_SCHED = yes; L2TP_V3 = yes; L2TP_IP = module; @@ -161,7 +225,7 @@ let PPP_FILTER = yes; # needed for iwd WPS support (wpa_supplicant replacement) - KEY_DH_OPERATIONS = whenAtLeast "4.7" yes; + KEY_DH_OPERATIONS = yes; # needed for nftables # Networking Options @@ -173,22 +237,17 @@ let NF_CONNTRACK_TIMEOUT = yes; NF_CONNTRACK_TIMESTAMP = yes; NETFILTER_NETLINK_GLUE_CT = yes; - NF_TABLES_INET = mkMerge [ (whenOlder "4.17" module) - (whenAtLeast "4.17" yes) ]; - NF_TABLES_NETDEV = mkMerge [ (whenOlder "4.17" module) - (whenAtLeast "4.17" yes) ]; + NF_TABLES_INET = yes; + NF_TABLES_NETDEV = yes; NFT_REJECT_NETDEV = whenAtLeast "5.11" module; # IP: Netfilter Configuration - NF_TABLES_IPV4 = mkMerge [ (whenOlder "4.17" module) - (whenAtLeast "4.17" yes) ]; - NF_TABLES_ARP = mkMerge [ (whenOlder "4.17" module) - (whenAtLeast "4.17" yes) ]; + NF_TABLES_IPV4 = yes; + NF_TABLES_ARP = yes; # IPv6: Netfilter Configuration - NF_TABLES_IPV6 = mkMerge [ (whenOlder "4.17" module) - (whenAtLeast "4.17" yes) ]; + NF_TABLES_IPV6 = yes; # Bridge Netfilter Configuration - NF_TABLES_BRIDGE = mkMerge [ (whenBetween "4.19" "5.3" yes) + NF_TABLES_BRIDGE = mkMerge [ (whenOlder "5.3" yes) (whenAtLeast "5.3" module) ]; # needed for `dropwatch` @@ -200,28 +259,49 @@ let INET_DIAG = mkDefault module; INET_TCP_DIAG = mkDefault module; INET_UDP_DIAG = mkDefault module; - INET_RAW_DIAG = whenAtLeast "4.14" (mkDefault module); - INET_DIAG_DESTROY = whenAtLeast "4.9" (mkDefault yes); + INET_RAW_DIAG = mkDefault module; + INET_DIAG_DESTROY = mkDefault yes; # enable multipath-tcp MPTCP = whenAtLeast "5.6" yes; MPTCP_IPV6 = whenAtLeast "5.6" yes; INET_MPTCP_DIAG = whenAtLeast "5.9" (mkDefault module); + + # Kernel TLS + TLS = module; + TLS_DEVICE = yes; + + # infiniband + INFINIBAND = module; + INFINIBAND_IPOIB = module; + INFINIBAND_IPOIB_CM = yes; }; wireless = { - CFG80211_WEXT = option yes; # Without it, ipw2200 drivers don't build - IPW2100_MONITOR = option yes; # support promiscuous mode - IPW2200_MONITOR = option yes; # support promiscuous mode - HOSTAP_FIRMWARE = option yes; # Support downloading firmware images with Host AP driver - HOSTAP_FIRMWARE_NVRAM = option yes; - ATH9K_PCI = option yes; # Detect Atheros AR9xxx cards on PCI(e) bus - ATH9K_AHB = option yes; # Ditto, AHB bus - B43_PHY_HT = option yes; - BCMA_HOST_PCI = option yes; - RTW88 = whenAtLeast "5.2" module; - RTW88_8822BE = mkMerge [ (whenBetween "5.2" "5.8" yes) (whenAtLeast "5.8" module) ]; - RTW88_8822CE = mkMerge [ (whenBetween "5.2" "5.8" yes) (whenAtLeast "5.8" module) ]; + CFG80211_WEXT = option yes; # Without it, ipw2200 drivers don't build + IPW2100_MONITOR = option yes; # support promiscuous mode + IPW2200_MONITOR = option yes; # support promiscuous mode + HOSTAP_FIRMWARE = option yes; # Support downloading firmware images with Host AP driver + HOSTAP_FIRMWARE_NVRAM = option yes; + ATH9K_PCI = option yes; # Detect Atheros AR9xxx cards on PCI(e) bus + ATH9K_AHB = option yes; # Ditto, AHB bus + # The description of this option makes it sound dangerous or even illegal + # But OpenWRT enables it by default: https://github.com/openwrt/openwrt/blob/master/package/kernel/mac80211/Makefile#L55 + # At the time of writing (25-06-2023): this is only used in a "correct" way by ath drivers for initiating DFS radiation + # for "certified devices" + EXPERT = option yes; # this is needed for offering the certification option + CFG80211_CERTIFICATION_ONUS = option yes; + # DFS: "Dynamic Frequency Selection" is a spectrum-sharing mechanism that allows + # you to use certain interesting frequency when your local regulatory domain mandates it. + # ATH drivers hides the feature behind this option and makes hostapd works with DFS frequencies. + # OpenWRT enables it too: https://github.com/openwrt/openwrt/blob/master/package/kernel/mac80211/ath.mk#L42 + ATH9K_DFS_CERTIFIED = option yes; + ATH10K_DFS_CERTIFIED = option yes; + B43_PHY_HT = option yes; + BCMA_HOST_PCI = option yes; + RTW88 = whenAtLeast "5.2" module; + RTW88_8822BE = mkMerge [ (whenBetween "5.2" "5.8" yes) (whenAtLeast "5.8" module) ]; + RTW88_8822CE = mkMerge [ (whenBetween "5.2" "5.8" yes) (whenAtLeast "5.8" module) ]; }; fb = { @@ -238,40 +318,59 @@ let FB_3DFX_ACCEL = yes; FB_VESA = yes; FRAMEBUFFER_CONSOLE = yes; - FRAMEBUFFER_CONSOLE_DEFERRED_TAKEOVER = whenAtLeast "4.19" yes; + FRAMEBUFFER_CONSOLE_DEFERRED_TAKEOVER = yes; FRAMEBUFFER_CONSOLE_ROTATION = yes; + FRAMEBUFFER_CONSOLE_DETECT_PRIMARY = yes; FB_GEODE = mkIf (stdenv.hostPlatform.system == "i686-linux") yes; + # On 5.14 this conflicts with FB_SIMPLE. + DRM_SIMPLEDRM = whenAtLeast "5.14" no; + DRM_FBDEV_EMULATION = yes; + }; + + fonts = { + FONTS = yes; + # Default fonts enabled if FONTS is not set + FONT_8x8 = yes; + FONT_8x16 = yes; + # High DPI font + FONT_TER16x32 = whenAtLeast "5.0" yes; }; video = { + DRM_LEGACY = no; + NOUVEAU_LEGACY_CTX_SUPPORT = whenBetween "5.2" "6.3" no; + # Allow specifying custom EDID on the kernel command line DRM_LOAD_EDID_FIRMWARE = yes; VGA_SWITCHEROO = yes; # Hybrid graphics support DRM_GMA500 = whenAtLeast "5.12" module; DRM_GMA600 = whenOlder "5.13" yes; DRM_GMA3600 = whenOlder "5.12" yes; - DRM_VMWGFX_FBCON = yes; - # necessary for amdgpu polaris support - DRM_AMD_POWERPLAY = whenBetween "4.5" "4.9" yes; + DRM_VMWGFX_FBCON = whenOlder "6.2" yes; # (experimental) amdgpu support for verde and newer chipsets - DRM_AMDGPU_SI = whenAtLeast "4.9" yes; + DRM_AMDGPU_SI = yes; # (stable) amdgpu support for bonaire and newer chipsets - DRM_AMDGPU_CIK = whenAtLeast "4.9" yes; + DRM_AMDGPU_CIK = yes; # Allow device firmware updates - DRM_DP_AUX_CHARDEV = whenAtLeast "4.6" yes; + DRM_DP_AUX_CHARDEV = yes; # amdgpu display core (DC) support - DRM_AMD_DC_DCN1_0 = whenBetween "4.15" "5.6" yes; - DRM_AMD_DC_PRE_VEGA = whenBetween "4.15" "4.18" yes; + DRM_AMD_DC_DCN1_0 = whenOlder "5.6" yes; DRM_AMD_DC_DCN2_0 = whenBetween "5.3" "5.6" yes; DRM_AMD_DC_DCN2_1 = whenBetween "5.4" "5.6" yes; DRM_AMD_DC_DCN3_0 = whenBetween "5.9" "5.11" yes; - DRM_AMD_DC_DCN = whenAtLeast "5.11" yes; - DRM_AMD_DC_HDCP = whenAtLeast "5.5" yes; + DRM_AMD_DC_DCN = whenBetween "5.11" "6.4" yes; + DRM_AMD_DC_FP = whenAtLeast "6.4" yes; + DRM_AMD_DC_HDCP = whenBetween "5.5" "6.4" yes; DRM_AMD_DC_SI = whenAtLeast "5.10" yes; } // optionalAttrs (stdenv.hostPlatform.system == "x86_64-linux") { # Intel GVT-g graphics virtualization supports 64-bit only - DRM_I915_GVT = whenAtLeast "4.16" yes; - DRM_I915_GVT_KVMGT = whenAtLeast "4.16" module; + DRM_I915_GVT = yes; + DRM_I915_GVT_KVMGT = module; + # Enable Hyper-V Synthetic DRM Driver + DRM_HYPERV = whenAtLeast "5.14" module; + } // optionalAttrs (stdenv.hostPlatform.system == "aarch64-linux") { + # enable HDMI-CEC on RPi boards + DRM_VC4_HDMI_CEC = yes; }; sound = { @@ -284,11 +383,12 @@ let SND_HDA_CODEC_CA0132_DSP = whenOlder "5.7" yes; # Enable DSP firmware loading on Creative Soundblaster Z/Zx/ZxR/Recon SND_OSSEMUL = yes; SND_USB_CAIAQ_INPUT = yes; - # Enable PSS mixer (Beethoven ADSP-16 and other compatible) - PSS_MIXER = whenOlder "4.12" yes; # Enable Sound Open Firmware support } // optionalAttrs (stdenv.hostPlatform.system == "x86_64-linux" && versionAtLeast version "5.5") { + SND_SOC_INTEL_SOUNDWIRE_SOF_MACH = whenAtLeast "5.10" module; + SND_SOC_INTEL_USER_FRIENDLY_LONG_NAMES = whenAtLeast "5.10" yes; # dep of SOF_MACH + SND_SOC_SOF_INTEL_SOUNDWIRE_LINK = whenBetween "5.10" "5.11" yes; # dep of SOF_MACH SND_SOC_SOF_TOPLEVEL = yes; SND_SOC_SOF_ACPI = module; SND_SOC_SOF_PCI = module; @@ -321,25 +421,9 @@ let usb-serial = { USB_SERIAL_GENERIC = yes; # USB Generic Serial Driver - } // optionalAttrs (versionOlder version "4.16") { - # Include firmware for various USB serial devices. - # Only applicable for kernels below 4.16, after that no firmware is shipped in the kernel tree. - USB_SERIAL_KEYSPAN_MPR = yes; - USB_SERIAL_KEYSPAN_USA28 = yes; - USB_SERIAL_KEYSPAN_USA28X = yes; - USB_SERIAL_KEYSPAN_USA28XA = yes; - USB_SERIAL_KEYSPAN_USA28XB = yes; - USB_SERIAL_KEYSPAN_USA19 = yes; - USB_SERIAL_KEYSPAN_USA18X = yes; - USB_SERIAL_KEYSPAN_USA19W = yes; - USB_SERIAL_KEYSPAN_USA19QW = yes; - USB_SERIAL_KEYSPAN_USA19QI = yes; - USB_SERIAL_KEYSPAN_USA49W = yes; - USB_SERIAL_KEYSPAN_USA49WLC = yes; }; usb = { - USB_DEBUG = { optional = true; tristate = whenOlder "4.18" "n";}; USB_EHCI_ROOT_HUB_TT = yes; # Root Hub Transaction Translators USB_EHCI_TT_NEWSCHED = yes; # Improved transaction translator scheduling USB_HIDDEV = yes; # USB Raw HID Devices (like monitor controls and Uninterruptable Power Supplies) @@ -348,10 +432,12 @@ let # Filesystem options - in particular, enable extended attributes and # ACLs for all filesystems that support them. filesystem = { - FANOTIFY = yes; + FANOTIFY = yes; + FANOTIFY_ACCESS_PERMISSIONS = yes; + TMPFS = yes; TMPFS_POSIX_ACL = yes; - FS_ENCRYPTION = if (versionAtLeast version "5.1") then yes else whenAtLeast "4.9" (option module); + FS_ENCRYPTION = if (versionAtLeast version "5.1") then yes else option module; EXT2_FS_XATTR = yes; EXT2_FS_POSIX_ACL = yes; @@ -362,7 +448,11 @@ let EXT4_FS_POSIX_ACL = yes; EXT4_FS_SECURITY = yes; - EXT4_ENCRYPTION = { optional = true; tristate = if (versionOlder version "4.8") then "m" else "y"; }; + EXT4_ENCRYPTION = whenOlder "5.1" yes; + + NTFS_FS = whenAtLeast "5.15" no; + NTFS3_LZX_XPRESS = whenAtLeast "5.15" yes; + NTFS3_FS_POSIX_ACL = whenAtLeast "5.15" yes; REISERFS_FS_XATTR = option yes; REISERFS_FS_POSIX_ACL = option yes; @@ -374,6 +464,7 @@ let XFS_QUOTA = option yes; XFS_POSIX_ACL = option yes; XFS_RT = option yes; # XFS Realtime subvolume support + XFS_ONLINE_SCRUB = option yes; OCFS2_DEBUG_MASKLOG = option no; @@ -383,13 +474,12 @@ let F2FS_FS = module; F2FS_FS_SECURITY = option yes; - F2FS_FS_ENCRYPTION = option yes; + F2FS_FS_ENCRYPTION = whenOlder "5.1" yes; F2FS_FS_COMPRESSION = whenAtLeast "5.6" yes; UDF_FS = module; - NFSD_PNFS = whenBetween "4.0" "4.6" yes; - NFSD_V2_ACL = yes; - NFSD_V3 = yes; + NFSD_V2_ACL = whenOlder "6.2" yes; + NFSD_V3 = whenOlder "5.18" yes; NFSD_V3_ACL = yes; NFSD_V4 = yes; NFSD_V4_SECURITY_LABEL = yes; @@ -404,24 +494,22 @@ let CIFS_XATTR = yes; CIFS_POSIX = option yes; CIFS_FSCACHE = yes; - CIFS_STATS = whenOlder "4.19" yes; - CIFS_WEAK_PW_HASH = yes; + CIFS_WEAK_PW_HASH = whenOlder "5.15" yes; CIFS_UPCALL = yes; CIFS_ACL = whenOlder "5.3" yes; CIFS_DFS_UPCALL = yes; - CIFS_SMB2 = whenOlder "4.13" yes; CEPH_FSCACHE = yes; CEPH_FS_POSIX_ACL = yes; SQUASHFS_FILE_DIRECT = yes; - SQUASHFS_DECOMP_MULTI_PERCPU = yes; + SQUASHFS_DECOMP_MULTI_PERCPU = whenOlder "6.2" yes; SQUASHFS_XATTR = yes; SQUASHFS_ZLIB = yes; SQUASHFS_LZO = yes; SQUASHFS_XZ = yes; SQUASHFS_LZ4 = yes; - SQUASHFS_ZSTD = whenAtLeast "4.14" yes; + SQUASHFS_ZSTD = yes; # Native Language Support modules, needed by some filesystems NLS = yes; @@ -430,19 +518,29 @@ let NLS_CODEPAGE_437 = module; # VFAT default for the codepage= mount option NLS_ISO8859_1 = module; # VFAT default for the iocharset= mount option + # Needed to use the installation iso image. Not included in all defconfigs (e.g. arm64) + ISO9660_FS = module; + DEVTMPFS = yes; UNICODE = whenAtLeast "5.2" yes; # Casefolding support for filesystems }; security = { - # Detect writes to read-only module pages - DEBUG_SET_MODULE_RONX = { optional = true; tristate = whenOlder "4.11" "y"; }; + FORTIFY_SOURCE = option yes; + + # https://googleprojectzero.blogspot.com/2019/11/bad-binder-android-in-wild-exploit.html + DEBUG_LIST = yes; + HARDENED_USERCOPY = yes; RANDOMIZE_BASE = option yes; - STRICT_DEVMEM = option yes; # Filter access to /dev/mem + STRICT_DEVMEM = mkDefault yes; # Filter access to /dev/mem + IO_STRICT_DEVMEM = mkDefault yes; SECURITY_SELINUX_BOOTPARAM_VALUE = whenOlder "5.1" (freeform "0"); # Disable SELinux by default # Prevent processes from ptracing non-children processes SECURITY_YAMA = option yes; + # The goal of Landlock is to enable to restrict ambient rights (e.g. global filesystem access) for a set of processes. + # This does not have any effect if a program does not support it + SECURITY_LANDLOCK = whenAtLeast "5.13" yes; DEVKMEM = whenOlder "5.13" no; # Disable /dev/kmem USER_NS = yes; # Support for user namespaces @@ -450,23 +548,49 @@ let SECURITY_APPARMOR = yes; DEFAULT_SECURITY_APPARMOR = yes; - RANDOM_TRUST_CPU = whenAtLeast "4.19" yes; # allow RDRAND to seed the RNG + RANDOM_TRUST_CPU = whenOlder "6.2" yes; # allow RDRAND to seed the RNG + RANDOM_TRUST_BOOTLOADER = whenOlder "6.2" (whenAtLeast "5.4" yes); # allow the bootloader to seed the RNG MODULE_SIG = no; # r13y, generates a random key during build and bakes it in # Depends on MODULE_SIG and only really helps when you sign your modules # and enforce signatures which we don't do by default. - SECURITY_LOCKDOWN_LSM = option no; - } // optionalAttrs (!stdenv.hostPlatform.isAarch32) { - - # Detect buffer overflows on the stack - CC_STACKPROTECTOR_REGULAR = {optional = true; tristate = whenOlder "4.18" "y";}; + SECURITY_LOCKDOWN_LSM = whenAtLeast "5.4" no; + + # provides a register of persistent per-UID keyrings, useful for encrypting storage pools in stratis + PERSISTENT_KEYRINGS = yes; + # enable temporary caching of the last request_key() result + KEYS_REQUEST_CACHE = whenAtLeast "5.3" yes; + # randomized slab caches + RANDOM_KMALLOC_CACHES = whenAtLeast "6.6" yes; + + # NIST SP800-90A DRBG modes - enabled by most distributions + # and required by some out-of-tree modules (ShuffleCake) + # This does not include the NSA-backdoored Dual-EC mode from the same NIST publication. + CRYPTO_DRBG_HASH = yes; + CRYPTO_DRBG_CTR = yes; + + } // optionalAttrs stdenv.hostPlatform.isx86_64 { + # Enable Intel SGX + X86_SGX = whenAtLeast "5.11" yes; + # Allow KVM guests to load SGX enclaves + X86_SGX_KVM = whenAtLeast "5.13" yes; + + # AMD Cryptographic Coprocessor (CCP) + CRYPTO_DEV_CCP = yes; + # AMD SME + AMD_MEM_ENCRYPT = yes; + # AMD SEV and AMD SEV-SE + KVM_AMD_SEV = yes; + # AMD SEV-SNP + SEV_GUEST = whenAtLeast "5.19" module; + # Shadow stacks + X86_USER_SHADOW_STACK = whenAtLeast "6.6" yes; }; microcode = { MICROCODE = yes; - MICROCODE_INTEL = yes; - MICROCODE_AMD = yes; - } // optionalAttrs (versionAtLeast version "4.10") { + MICROCODE_INTEL = whenOlder "6.6" yes; + MICROCODE_AMD = whenOlder "6.6" yes; # Write Back Throttling # https://lwn.net/Articles/682582/ # https://bugzilla.kernel.org/show_bug.cgi?id=12309#c655 @@ -481,15 +605,14 @@ let CGROUP_DEVICE = yes; CGROUP_HUGETLB = yes; CGROUP_PERF = yes; - CGROUP_RDMA = whenAtLeast "4.11" yes; + CGROUP_RDMA = yes; MEMCG = yes; - MEMCG_SWAP = yes; + MEMCG_SWAP = whenOlder "6.1" yes; - DEVPTS_MULTIPLE_INSTANCES = whenOlder "4.7" yes; BLK_DEV_THROTTLING = yes; CFQ_GROUP_IOSCHED = whenOlder "5.0" yes; # Removed in 5.0-RC1 - CGROUP_PIDS = whenAtLeast "4.3" yes; + CGROUP_PIDS = yes; }; staging = { @@ -512,25 +635,27 @@ let FTRACE_SYSCALLS = yes; SCHED_TRACER = yes; STACK_TRACER = yes; - UPROBE_EVENT = { optional = true; tristate = whenOlder "4.11" "y";}; - UPROBE_EVENTS = { optional = true; tristate = whenAtLeast "4.11" "y";}; - BPF_SYSCALL = whenAtLeast "4.4" yes; - BPF_EVENTS = whenAtLeast "4.4" yes; + UPROBE_EVENTS = option yes; + BPF_SYSCALL = yes; + BPF_UNPRIV_DEFAULT_OFF = whenBetween "5.10" "5.16" yes; + BPF_EVENTS = yes; FUNCTION_PROFILER = yes; RING_BUFFER_BENCHMARK = no; }; + perf = { + # enable AMD Zen branch sampling if available + PERF_EVENTS_AMD_BRS = whenAtLeast "5.19" (option yes); + }; + virtualisation = { PARAVIRT = option yes; HYPERVISOR_GUEST = yes; PARAVIRT_SPINLOCKS = option yes; - KVM_APIC_ARCHITECTURE = whenOlder "4.8" yes; KVM_ASYNC_PF = yes; - KVM_COMPAT = { optional = true; tristate = whenBetween "4.0" "4.12" "y"; }; - KVM_DEVICE_ASSIGNMENT = { optional = true; tristate = whenBetween "3.10" "4.12" "y"; }; - KVM_GENERIC_DIRTYLOG_READ_PROTECT = whenAtLeast "4.0" yes; + KVM_GENERIC_DIRTYLOG_READ_PROTECT = yes; KVM_GUEST = yes; KVM_MMIO = yes; KVM_VFIO = yes; @@ -563,22 +688,20 @@ let XEN_PVH = option yes; XEN_PVHVM = option yes; XEN_SAVE_RESTORE = option yes; - XEN_SCRUB_PAGES = option yes; - XEN_SELFBALLOONING = option yes; - XEN_STUB = option yes; - XEN_TMEM = option yes; + XEN_SELFBALLOONING = whenOlder "5.3" yes; + + # Enable device detection on virtio-mmio hypervisors + VIRTIO_MMIO_CMDLINE_DEVICES = yes; }; media = { MEDIA_DIGITAL_TV_SUPPORT = yes; MEDIA_CAMERA_SUPPORT = yes; - MEDIA_RC_SUPPORT = whenOlder "4.14" yes; MEDIA_CONTROLLER = yes; MEDIA_PCI_SUPPORT = yes; MEDIA_USB_SUPPORT = yes; MEDIA_ANALOG_TV_SUPPORT = yes; - VIDEO_STK1160_COMMON = module; - VIDEO_STK1160_AC97 = whenOlder "4.11" yes; + VIDEO_STK1160_COMMON = whenOlder "6.5" module; }; "9p" = { @@ -594,10 +717,11 @@ let }; zram = { - ZRAM = module; - ZSWAP = option yes; - ZBUD = option yes; - ZSMALLOC = module; + ZRAM = module; + ZRAM_WRITEBACK = option yes; + ZSWAP = option yes; + ZPOOL = yes; + ZBUD = option yes; }; brcmfmac = { @@ -616,67 +740,27 @@ let tests = { # This menu disables all/most of them on >= 4.16 RUNTIME_TESTING_MENU = option no; - } // optionalAttrs (versionOlder version "4.16") { - # For older kernels, painstakingly disable each symbol. - ARM_KPROBES_TEST = option no; - ASYNC_RAID6_TEST = option no; - ATOMIC64_SELFTEST = option no; - BACKTRACE_SELF_TEST = option no; - INTERVAL_TREE_TEST = option no; - PERCPU_TEST = option no; - RBTREE_TEST = option no; - TEST_BITMAP = option no; - TEST_BPF = option no; - TEST_FIRMWARE = option no; - TEST_HASH = option no; - TEST_HEXDUMP = option no; - TEST_KMOD = option no; - TEST_KSTRTOX = option no; - TEST_LIST_SORT = option no; - TEST_LKM = option no; - TEST_PARMAN = option no; - TEST_PRINTF = option no; - TEST_RHASHTABLE = option no; - TEST_SORT = option no; - TEST_STATIC_KEYS = option no; - TEST_STRING_HELPERS = option no; - TEST_UDELAY = option no; - TEST_USER_COPY = option no; - TEST_UUID = option no; } // { CRC32_SELFTEST = option no; CRYPTO_TEST = option no; EFI_TEST = option no; GLOB_SELFTEST = option no; - DRM_DEBUG_MM_SELFTEST = { optional = true; tristate = whenOlder "4.18" "n";}; - LNET_SELFTEST = { optional = true; tristate = whenOlder "4.18" "n";}; LOCK_TORTURE_TEST = option no; MTD_TESTS = option no; NOTIFIER_ERROR_INJECTION = option no; - RCU_PERF_TEST = option no; + RCU_PERF_TEST = whenOlder "5.9" no; + RCU_SCALE_TEST = whenAtLeast "5.10" no; RCU_TORTURE_TEST = option no; TEST_ASYNC_DRIVER_PROBE = option no; WW_MUTEX_SELFTEST = option no; XZ_DEC_TEST = option no; }; - criu = if (versionAtLeast version "4.19") then { + criu = { # Unconditionally enabled, because it is required for CRIU and # it provides the kcmp() system call that Mesa depends on. CHECKPOINT_RESTORE = yes; - } else optionalAttrs (features.criu or false) ({ - # For older kernels, CHECKPOINT_RESTORE is hidden behind EXPERT. - EXPERT = yes; - CHECKPOINT_RESTORE = yes; - } // optionalAttrs (features.criu_revert_expert or true) { - RFKILL_INPUT = option yes; - HID_PICOLCD_FB = option yes; - HID_PICOLCD_BACKLIGHT = option yes; - HID_PICOLCD_LCD = option yes; - HID_PICOLCD_LEDS = option yes; - HID_PICOLCD_CIR = option yes; - DEBUG_MEMORY_INIT = option yes; - }); + }; misc = let # Use zstd for kernel compression if 64-bit and newer than 5.9, otherwise xz. @@ -692,7 +776,12 @@ let HID_ACRUX_FF = yes; DRAGONRISE_FF = yes; + GREENASIA_FF = yes; HOLTEK_FF = yes; + JOYSTICK_PSXPAD_SPI_FF = yes; + LOGIG940_FF = yes; + NINTENDO_FF = whenAtLeast "5.16" yes; + PLAYSTATION_FF = whenAtLeast "5.12" yes; SONY_FF = yes; SMARTJOYPLUS_FF = yes; THRUSTMASTER_FF = yes; @@ -715,7 +804,6 @@ let PM_TRACE_RTC = no; # Disable some expensive (?) features. ACCESSIBILITY = yes; # Accessibility support AUXDISPLAY = yes; # Auxiliary Display support - DONGLE = whenOlder "4.17" yes; # Serial dongle support HIPPI = yes; MTD_COMPLEX_MAPPINGS = yes; # needed for many devices @@ -732,35 +820,47 @@ let AIC79XX_DEBUG_ENABLE = no; AIC7XXX_DEBUG_ENABLE = no; AIC94XX_DEBUG = no; - B43_PCMCIA = { optional=true; tristate = whenOlder "4.4" "y";}; BLK_DEV_INTEGRITY = yes; - BLK_SED_OPAL = whenAtLeast "4.14" yes; + BLK_SED_OPAL = yes; BSD_PROCESS_ACCT_V3 = yes; + SERIAL_DEV_BUS = yes; # enables support for serial devices + SERIAL_DEV_CTRL_TTYPORT = yes; # enables support for TTY serial devices + + BT_HCIBTUSB_MTK = whenAtLeast "5.3" yes; # MediaTek protocol support + BT_HCIUART_QCA = yes; # Qualcomm Atheros protocol support + BT_HCIUART_SERDEV = yes; # required by BT_HCIUART_QCA + BT_HCIUART = module; # required for BT devices with serial port interface (QCA6390) BT_HCIUART_BCSP = option yes; BT_HCIUART_H4 = option yes; # UART (H4) protocol support BT_HCIUART_LL = option yes; BT_RFCOMM_TTY = option yes; # RFCOMM TTY support + BT_QCA = module; # enables QCA6390 bluetooth - CLEANCACHE = option yes; + # Removed on 5.17 as it was unused + # upstream: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=0a4ee518185e902758191d968600399f3bc2be31 + CLEANCACHE = whenOlder "5.17" (option yes); CRASH_DUMP = option no; + FSCACHE_STATS = yes; + DVB_DYNAMIC_MINORS = option yes; # we use udev EFI_STUB = yes; # EFI bootloader in the bzImage itself + EFI_GENERIC_STUB_INITRD_CMDLINE_LOADER = + whenOlder "6.2" (whenAtLeast "5.8" yes); # initrd kernel parameter for EFI CGROUPS = yes; # used by systemd FHANDLE = yes; # used by systemd SECCOMP = yes; # used by systemd >= 231 SECCOMP_FILTER = yes; # ditto POSIX_MQUEUE = yes; - FRONTSWAP = yes; + FRONTSWAP = whenOlder "6.6" yes; FUSION = yes; # Fusion MPT device support IDE = whenOlder "5.14" no; # deprecated IDE support, removed in 5.14 IDLE_PAGE_TRACKING = yes; - IRDA_ULTRA = whenOlder "4.17" yes; # Ultra (connectionless) protocol JOYSTICK_IFORCE_232 = { optional = true; tristate = whenOlder "5.3" "y"; }; # I-Force Serial joysticks and wheels JOYSTICK_IFORCE_USB = { optional = true; tristate = whenOlder "5.3" "y"; }; # I-Force USB joysticks and wheels @@ -780,25 +880,30 @@ let MEDIA_ATTACH = yes; MEGARAID_NEWGEN = yes; - MLX4_EN_VXLAN = whenOlder "4.8" yes; MLX5_CORE_EN = option yes; - NVME_MULTIPATH = whenAtLeast "4.15" yes; + NVME_MULTIPATH = yes; PSI = whenAtLeast "4.20" yes; - MODVERSIONS = whenOlder "4.9" yes; MOUSE_ELAN_I2C_SMBUS = yes; MOUSE_PS2_ELANTECH = yes; # Elantech PS/2 protocol extension + MOUSE_PS2_VMMOUSE = yes; MTRR_SANITIZER = yes; NET_FC = yes; # Fibre Channel driver support + # Needed for touchpads to work on some AMD laptops + PINCTRL_AMD = whenAtLeast "5.19" yes; # GPIO on Intel Bay Trail, for some Chromebook internal eMMC disks PINCTRL_BAYTRAIL = yes; + # GPIO for Braswell and Cherryview devices + # Needs to be built-in to for integrated keyboards to function properly + PINCTRL_CHERRYVIEW = yes; # 8 is default. Modern gpt tables on eMMC may go far beyond 8. MMC_BLOCK_MINORS = freeform "32"; REGULATOR = yes; # Voltage and Current Regulator Support RC_DEVICES = option yes; # Enable IR devices + RC_DECODERS = option yes; # Required for IR devices to work RT2800USB_RT53XX = yes; RT2800USB_RT55XX = yes; @@ -809,6 +914,9 @@ let SCSI_LOGGING = yes; # SCSI logging facility SERIAL_8250 = yes; # 8250/16550 and compatible serial support + SLAB_FREELIST_HARDENED = yes; + SLAB_FREELIST_RANDOM = yes; + SLIP_COMPRESSED = yes; # CSLIP compressed headers SLIP_SMART = yes; @@ -831,6 +939,8 @@ let # Disable the firmware helper fallback, udev doesn't implement it any more FW_LOADER_USER_HELPER_FALLBACK = option no; + FW_LOADER_COMPRESS = option yes; + HOTPLUG_PCI_ACPI = yes; # PCI hotplug using ACPI HOTPLUG_PCI_PCIE = yes; # PCI-Expresscard hotplug support @@ -845,9 +955,34 @@ let X86_AMD_PLATFORM_DEVICE = yes; X86_PLATFORM_DRIVERS_DELL = whenAtLeast "5.12" yes; + X86_PLATFORM_DRIVERS_HP = whenAtLeast "6.1" yes; + + LIRC = yes; + + SCHED_CORE = whenAtLeast "5.14" yes; + + LRU_GEN = whenAtLeast "6.1" yes; + LRU_GEN_ENABLED = whenAtLeast "6.1" yes; + + FSL_MC_UAPI_SUPPORT = mkIf (stdenv.hostPlatform.system == "aarch64-linux") (whenAtLeast "5.12" yes); + + ASHMEM = { optional = true; tristate = whenBetween "5.0" "5.18" "y";}; + ANDROID = { optional = true; tristate = whenBetween "5.0" "5.19" "y";}; + ANDROID_BINDER_IPC = { optional = true; tristate = whenAtLeast "5.0" "y";}; + ANDROID_BINDERFS = { optional = true; tristate = whenAtLeast "5.0" "y";}; + ANDROID_BINDER_DEVICES = { optional = true; freeform = whenAtLeast "5.0" "binder,hwbinder,vndbinder";}; + + TASKSTATS = yes; + TASK_DELAY_ACCT = yes; + TASK_XACCT = yes; + TASK_IO_ACCOUNTING = yes; - LIRC = mkMerge [ (whenOlder "4.16" module) (whenAtLeast "4.17" yes) ]; + # Fresh toolchains frequently break -Werror build for minor issues. + WERROR = whenAtLeast "5.15" no; + # > CONFIG_KUNIT should not be enabled in a production environment. Enabling KUnit disables Kernel Address-Space Layout Randomization (KASLR), and tests may affect the state of the kernel in ways not suitable for production. + # https://www.kernel.org/doc/html/latest/dev-tools/kunit/start.html + KUNIT = whenAtLeast "5.5" no; } // optionalAttrs (stdenv.hostPlatform.system == "x86_64-linux" || stdenv.hostPlatform.system == "aarch64-linux") { # Enable CPU/memory hotplug support # Allows you to dynamically add & remove CPUs/memory to a VM client running NixOS without requiring a reboot @@ -864,7 +999,7 @@ let NR_CPUS = freeform "384"; } // optionalAttrs (stdenv.hostPlatform.system == "armv7l-linux" || stdenv.hostPlatform.system == "aarch64-linux") { # Enables support for the Allwinner Display Engine 2.0 - SUN8I_DE2_CCU = whenAtLeast "4.13" yes; + SUN8I_DE2_CCU = yes; # See comments on https://github.com/NixOS/nixpkgs/commit/9b67ea9106102d882f53d62890468071900b9647 CRYPTO_AEGIS128_SIMD = whenAtLeast "5.4" no; @@ -880,6 +1015,46 @@ let # Keeping it a built-in ensures it will be used if possible. FB_SIMPLE = yes; + # https://docs.kernel.org/arch/arm/mem_alignment.html + # tldr: + # when buggy userspace code emits illegal misaligned LDM, STM, + # LDRD and STRDs, the instructions trap, are caught, and then + # are emulated by the kernel. + # + # This is the default on armv7l, anyway, but it is explicitly + # enabled here for the sake of providing context for the + # aarch64 compat option which follows. + ALIGNMENT_TRAP = mkIf (stdenv.hostPlatform.system == "armv7l-linux") yes; + + # https://patchwork.kernel.org/project/linux-arm-kernel/patch/20220701135322.3025321-1-ardb@kernel.org/ + # tldr: + # when encountering alignment faults under aarch64, this option + # makes the kernel attempt to handle the fault by doing the + # same style of misaligned emulation that is performed under + # armv7l (see above option). + # + # This minimizes the potential for aarch32 userspace to behave + # differently when run under aarch64 kernels compared to when + # it is run under an aarch32 kernel. + COMPAT_ALIGNMENT_FIXUPS = mkIf (stdenv.hostPlatform.system == "aarch64-linux") (whenAtLeast "6.1" yes); + } // optionalAttrs (versionAtLeast version "5.4" && (stdenv.hostPlatform.system == "x86_64-linux" || stdenv.hostPlatform.system == "aarch64-linux")) { + # Required for various hardware features on Chrome OS devices + CHROME_PLATFORMS = yes; + CHROMEOS_TBMC = module; + + CROS_EC = module; + + CROS_EC_I2C = module; + CROS_EC_SPI = module; + CROS_EC_LPC = module; + CROS_EC_ISHTP = module; + + CROS_KBD_LED_BACKLIGHT = module; + + TCG_TIS_SPI_CR50 = whenAtLeast "5.5" yes; + } // optionalAttrs (versionAtLeast version "5.4" && stdenv.hostPlatform.system == "x86_64-linux") { + CHROMEOS_LAPTOP = module; + CHROMEOS_PSTORE = module; }; }; in diff --git a/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/4.11.patch b/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/4.11.patch deleted file mode 100644 index 38cc0532ba9..00000000000 --- a/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/4.11.patch +++ /dev/null @@ -1,784 +0,0 @@ -commit 827b86ad1dd21feed4c0b99faf6059f245f7dadb -Author: Tejun Heo <tj@kernel.org> -Date: Fri Mar 11 07:31:23 2016 -0500 - - sched: Misc preps for cgroup unified hierarchy interface - - Make the following changes in preparation for the cpu controller - interface implementation for the unified hierarchy. This patch - doesn't cause any functional differences. - - * s/cpu_stats_show()/cpu_cfs_stats_show()/ - - * s/cpu_files/cpu_legacy_files/ - - * Separate out cpuacct_stats_read() from cpuacct_stats_show(). While - at it, make the @val array u64 for consistency. - - Signed-off-by: Tejun Heo <tj@kernel.org> - Cc: Ingo Molnar <mingo@redhat.com> - Cc: Peter Zijlstra <peterz@infradead.org> - Cc: Li Zefan <lizefan@huawei.com> - Cc: Johannes Weiner <hannes@cmpxchg.org> - -diff --git a/kernel/sched/core.c b/kernel/sched/core.c -index 3b31fc05a0f1..a1b95e83fa87 100644 ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -7174,7 +7174,7 @@ static int __cfs_schedulable(struct task_group *tg, u64 period, u64 quota) - return ret; - } - --static int cpu_stats_show(struct seq_file *sf, void *v) -+static int cpu_cfs_stats_show(struct seq_file *sf, void *v) - { - struct task_group *tg = css_tg(seq_css(sf)); - struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth; -@@ -7214,7 +7214,7 @@ static u64 cpu_rt_period_read_uint(struct cgroup_subsys_state *css, - } - #endif /* CONFIG_RT_GROUP_SCHED */ - --static struct cftype cpu_files[] = { -+static struct cftype cpu_legacy_files[] = { - #ifdef CONFIG_FAIR_GROUP_SCHED - { - .name = "shares", -@@ -7235,7 +7235,7 @@ static struct cftype cpu_files[] = { - }, - { - .name = "stat", -- .seq_show = cpu_stats_show, -+ .seq_show = cpu_cfs_stats_show, - }, - #endif - #ifdef CONFIG_RT_GROUP_SCHED -@@ -7261,7 +7261,7 @@ struct cgroup_subsys cpu_cgrp_subsys = { - .fork = cpu_cgroup_fork, - .can_attach = cpu_cgroup_can_attach, - .attach = cpu_cgroup_attach, -- .legacy_cftypes = cpu_files, -+ .legacy_cftypes = cpu_legacy_files, - .early_init = true, - }; - -diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c -index f95ab29a45d0..6151c23f722f 100644 ---- a/kernel/sched/cpuacct.c -+++ b/kernel/sched/cpuacct.c -@@ -276,26 +276,33 @@ static int cpuacct_all_seq_show(struct seq_file *m, void *V) - return 0; - } - --static int cpuacct_stats_show(struct seq_file *sf, void *v) -+static void cpuacct_stats_read(struct cpuacct *ca, -+ u64 (*val)[CPUACCT_STAT_NSTATS]) - { -- struct cpuacct *ca = css_ca(seq_css(sf)); -- s64 val[CPUACCT_STAT_NSTATS]; - int cpu; -- int stat; - -- memset(val, 0, sizeof(val)); -+ memset(val, 0, sizeof(*val)); -+ - for_each_possible_cpu(cpu) { - u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat; - -- val[CPUACCT_STAT_USER] += cpustat[CPUTIME_USER]; -- val[CPUACCT_STAT_USER] += cpustat[CPUTIME_NICE]; -- val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SYSTEM]; -- val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_IRQ]; -- val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SOFTIRQ]; -+ (*val)[CPUACCT_STAT_USER] += cpustat[CPUTIME_USER]; -+ (*val)[CPUACCT_STAT_USER] += cpustat[CPUTIME_NICE]; -+ (*val)[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SYSTEM]; -+ (*val)[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_IRQ]; -+ (*val)[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SOFTIRQ]; - } -+} -+ -+static int cpuacct_stats_show(struct seq_file *sf, void *v) -+{ -+ u64 val[CPUACCT_STAT_NSTATS]; -+ int stat; -+ -+ cpuacct_stats_read(css_ca(seq_css(sf)), &val); - - for (stat = 0; stat < CPUACCT_STAT_NSTATS; stat++) { -- seq_printf(sf, "%s %lld\n", -+ seq_printf(sf, "%s %llu\n", - cpuacct_stat_desc[stat], - (long long)nsec_to_clock_t(val[stat])); - } - -commit fdb64d002b3a223ce4bb11aa4448a42050470052 -Author: Tejun Heo <tj@kernel.org> -Date: Fri Mar 11 07:31:23 2016 -0500 - - sched: Implement interface for cgroup unified hierarchy - - While the cpu controller doesn't have any functional problems, there - are a couple interface issues which can be addressed in the v2 - interface. - - * cpuacct being a separate controller. This separation is artificial - and rather pointless as demonstrated by most use cases co-mounting - the two controllers. It also forces certain information to be - accounted twice. - - * Use of different time units. Writable control knobs use - microseconds, some stat fields use nanoseconds while other cpuacct - stat fields use centiseconds. - - * Control knobs which can't be used in the root cgroup still show up - in the root. - - * Control knob names and semantics aren't consistent with other - controllers. - - This patchset implements cpu controller's interface on the unified - hierarchy which adheres to the controller file conventions described - in Documentation/cgroups/unified-hierarchy.txt. Overall, the - following changes are made. - - * cpuacct is implictly enabled and disabled by cpu and its information - is reported through "cpu.stat" which now uses microseconds for all - time durations. All time duration fields now have "_usec" appended - to them for clarity. While this doesn't solve the double accounting - immediately, once majority of users switch to v2, cpu can directly - account and report the relevant stats and cpuacct can be disabled on - the unified hierarchy. - - Note that cpuacct.usage_percpu is currently not included in - "cpu.stat". If this information is actually called for, it can be - added later. - - * "cpu.shares" is replaced with "cpu.weight" and operates on the - standard scale defined by CGROUP_WEIGHT_MIN/DFL/MAX (1, 100, 10000). - The weight is scaled to scheduler weight so that 100 maps to 1024 - and the ratio relationship is preserved - if weight is W and its - scaled value is S, W / 100 == S / 1024. While the mapped range is a - bit smaller than the orignal scheduler weight range, the dead zones - on both sides are relatively small and covers wider range than the - nice value mappings. This file doesn't make sense in the root - cgroup and isn't create on root. - - * "cpu.cfs_quota_us" and "cpu.cfs_period_us" are replaced by "cpu.max" - which contains both quota and period. - - * "cpu.rt_runtime_us" and "cpu.rt_period_us" are replaced by - "cpu.rt.max" which contains both runtime and period. - - v2: cpu_stats_show() was incorrectly using CONFIG_FAIR_GROUP_SCHED for - CFS bandwidth stats and also using raw division for u64. Use - CONFIG_CFS_BANDWITH and do_div() instead. - - The semantics of "cpu.rt.max" is not fully decided yet. Dropped - for now. - - Signed-off-by: Tejun Heo <tj@kernel.org> - Cc: Ingo Molnar <mingo@redhat.com> - Cc: Peter Zijlstra <peterz@infradead.org> - Cc: Li Zefan <lizefan@huawei.com> - Cc: Johannes Weiner <hannes@cmpxchg.org> - -diff --git a/kernel/sched/core.c b/kernel/sched/core.c -index a1b95e83fa87..f01d56e58a1b 100644 ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -7253,6 +7253,139 @@ static struct cftype cpu_legacy_files[] = { - { } /* Terminate */ - }; - -+static int cpu_stats_show(struct seq_file *sf, void *v) -+{ -+ cpuacct_cpu_stats_show(sf); -+ -+#ifdef CONFIG_CFS_BANDWIDTH -+ { -+ struct task_group *tg = css_tg(seq_css(sf)); -+ struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth; -+ u64 throttled_usec; -+ -+ throttled_usec = cfs_b->throttled_time; -+ do_div(throttled_usec, NSEC_PER_USEC); -+ -+ seq_printf(sf, "nr_periods %d\n" -+ "nr_throttled %d\n" -+ "throttled_usec %llu\n", -+ cfs_b->nr_periods, cfs_b->nr_throttled, -+ throttled_usec); -+ } -+#endif -+ return 0; -+} -+ -+#ifdef CONFIG_FAIR_GROUP_SCHED -+static u64 cpu_weight_read_u64(struct cgroup_subsys_state *css, -+ struct cftype *cft) -+{ -+ struct task_group *tg = css_tg(css); -+ u64 weight = scale_load_down(tg->shares); -+ -+ return DIV_ROUND_CLOSEST_ULL(weight * CGROUP_WEIGHT_DFL, 1024); -+} -+ -+static int cpu_weight_write_u64(struct cgroup_subsys_state *css, -+ struct cftype *cftype, u64 weight) -+{ -+ /* -+ * cgroup weight knobs should use the common MIN, DFL and MAX -+ * values which are 1, 100 and 10000 respectively. While it loses -+ * a bit of range on both ends, it maps pretty well onto the shares -+ * value used by scheduler and the round-trip conversions preserve -+ * the original value over the entire range. -+ */ -+ if (weight < CGROUP_WEIGHT_MIN || weight > CGROUP_WEIGHT_MAX) -+ return -ERANGE; -+ -+ weight = DIV_ROUND_CLOSEST_ULL(weight * 1024, CGROUP_WEIGHT_DFL); -+ -+ return sched_group_set_shares(css_tg(css), scale_load(weight)); -+} -+#endif -+ -+static void __maybe_unused cpu_period_quota_print(struct seq_file *sf, -+ long period, long quota) -+{ -+ if (quota < 0) -+ seq_puts(sf, "max"); -+ else -+ seq_printf(sf, "%ld", quota); -+ -+ seq_printf(sf, " %ld\n", period); -+} -+ -+/* caller should put the current value in *@periodp before calling */ -+static int __maybe_unused cpu_period_quota_parse(char *buf, -+ u64 *periodp, u64 *quotap) -+{ -+ char tok[21]; /* U64_MAX */ -+ -+ if (!sscanf(buf, "%s %llu", tok, periodp)) -+ return -EINVAL; -+ -+ *periodp *= NSEC_PER_USEC; -+ -+ if (sscanf(tok, "%llu", quotap)) -+ *quotap *= NSEC_PER_USEC; -+ else if (!strcmp(tok, "max")) -+ *quotap = RUNTIME_INF; -+ else -+ return -EINVAL; -+ -+ return 0; -+} -+ -+#ifdef CONFIG_CFS_BANDWIDTH -+static int cpu_max_show(struct seq_file *sf, void *v) -+{ -+ struct task_group *tg = css_tg(seq_css(sf)); -+ -+ cpu_period_quota_print(sf, tg_get_cfs_period(tg), tg_get_cfs_quota(tg)); -+ return 0; -+} -+ -+static ssize_t cpu_max_write(struct kernfs_open_file *of, -+ char *buf, size_t nbytes, loff_t off) -+{ -+ struct task_group *tg = css_tg(of_css(of)); -+ u64 period = tg_get_cfs_period(tg); -+ u64 quota; -+ int ret; -+ -+ ret = cpu_period_quota_parse(buf, &period, "a); -+ if (!ret) -+ ret = tg_set_cfs_bandwidth(tg, period, quota); -+ return ret ?: nbytes; -+} -+#endif -+ -+static struct cftype cpu_files[] = { -+ { -+ .name = "stat", -+ .flags = CFTYPE_NOT_ON_ROOT, -+ .seq_show = cpu_stats_show, -+ }, -+#ifdef CONFIG_FAIR_GROUP_SCHED -+ { -+ .name = "weight", -+ .flags = CFTYPE_NOT_ON_ROOT, -+ .read_u64 = cpu_weight_read_u64, -+ .write_u64 = cpu_weight_write_u64, -+ }, -+#endif -+#ifdef CONFIG_CFS_BANDWIDTH -+ { -+ .name = "max", -+ .flags = CFTYPE_NOT_ON_ROOT, -+ .seq_show = cpu_max_show, -+ .write = cpu_max_write, -+ }, -+#endif -+ { } /* terminate */ -+}; -+ - struct cgroup_subsys cpu_cgrp_subsys = { - .css_alloc = cpu_cgroup_css_alloc, - .css_online = cpu_cgroup_css_online, -@@ -7262,7 +7395,15 @@ struct cgroup_subsys cpu_cgrp_subsys = { - .can_attach = cpu_cgroup_can_attach, - .attach = cpu_cgroup_attach, - .legacy_cftypes = cpu_legacy_files, -+ .dfl_cftypes = cpu_files, - .early_init = true, -+#ifdef CONFIG_CGROUP_CPUACCT -+ /* -+ * cpuacct is enabled together with cpu on the unified hierarchy -+ * and its stats are reported through "cpu.stat". -+ */ -+ .depends_on = 1 << cpuacct_cgrp_id, -+#endif - }; - - #endif /* CONFIG_CGROUP_SCHED */ -diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c -index 6151c23f722f..fc1cf13c3af1 100644 ---- a/kernel/sched/cpuacct.c -+++ b/kernel/sched/cpuacct.c -@@ -347,6 +347,31 @@ static struct cftype files[] = { - { } /* terminate */ - }; - -+/* used to print cpuacct stats in cpu.stat on the unified hierarchy */ -+void cpuacct_cpu_stats_show(struct seq_file *sf) -+{ -+ struct cgroup_subsys_state *css; -+ u64 usage, val[CPUACCT_STAT_NSTATS]; -+ -+ css = cgroup_get_e_css(seq_css(sf)->cgroup, &cpuacct_cgrp_subsys); -+ -+ usage = cpuusage_read(css, seq_cft(sf)); -+ cpuacct_stats_read(css_ca(css), &val); -+ -+ val[CPUACCT_STAT_USER] *= TICK_NSEC; -+ val[CPUACCT_STAT_SYSTEM] *= TICK_NSEC; -+ do_div(usage, NSEC_PER_USEC); -+ do_div(val[CPUACCT_STAT_USER], NSEC_PER_USEC); -+ do_div(val[CPUACCT_STAT_SYSTEM], NSEC_PER_USEC); -+ -+ seq_printf(sf, "usage_usec %llu\n" -+ "user_usec %llu\n" -+ "system_usec %llu\n", -+ usage, val[CPUACCT_STAT_USER], val[CPUACCT_STAT_SYSTEM]); -+ -+ css_put(css); -+} -+ - /* - * charge this task's execution time to its accounting group. - * -diff --git a/kernel/sched/cpuacct.h b/kernel/sched/cpuacct.h -index ba72807c73d4..ddf7af466d35 100644 ---- a/kernel/sched/cpuacct.h -+++ b/kernel/sched/cpuacct.h -@@ -2,6 +2,7 @@ - - extern void cpuacct_charge(struct task_struct *tsk, u64 cputime); - extern void cpuacct_account_field(struct task_struct *tsk, int index, u64 val); -+extern void cpuacct_cpu_stats_show(struct seq_file *sf); - - #else - -@@ -14,4 +15,8 @@ cpuacct_account_field(struct task_struct *tsk, int index, u64 val) - { - } - -+static inline void cpuacct_cpu_stats_show(struct seq_file *sf) -+{ -+} -+ - #endif - -commit 8dde150866b8c433216105c50b7e889d5242d583 -Author: Tejun Heo <tj@kernel.org> -Date: Fri Aug 5 12:41:01 2016 -0400 - - cgroup: add documentation regarding CPU controller cgroup v2 support - - Signed-off-by: Tejun Heo <tj@kernel.org> - -diff --git a/Documentation/cgroup-v2-cpu.txt b/Documentation/cgroup-v2-cpu.txt -new file mode 100644 -index 000000000000..1ed7032d4472 ---- /dev/null -+++ b/Documentation/cgroup-v2-cpu.txt -@@ -0,0 +1,368 @@ -+ -+ -+CPU Controller on Control Group v2 -+ -+August, 2016 Tejun Heo <tj@kernel.org> -+ -+ -+While most controllers have support for cgroup v2 now, the CPU -+controller support is not upstream yet due to objections from the -+scheduler maintainers on the basic designs of cgroup v2. This -+document explains the current situation as well as an interim -+solution, and details the disagreements and arguments. The latest -+version of this document can be found at the following URL. -+ -+ https://git.kernel.org/cgit/linux/kernel/git/tj/cgroup.git/tree/Documentation/cgroup-v2-cpu.txt?h=cgroup-v2-cpu -+ -+This document was posted to the linux-kernel and cgroup mailing lists. -+Unfortunately, no consensus was reached as of Oct, 2016. The thread -+can be found at the following URL. -+ -+ http://lkml.kernel.org/r/20160805170752.GK2542@mtj.duckdns.org -+ -+ -+CONTENTS -+ -+1. Current Situation and Interim Solution -+2. Disagreements and Arguments -+ 2-1. Contentious Restrictions -+ 2-1-1. Process Granularity -+ 2-1-2. No Internal Process Constraint -+ 2-2. Impact on CPU Controller -+ 2-2-1. Impact of Process Granularity -+ 2-2-2. Impact of No Internal Process Constraint -+ 2-3. Arguments for cgroup v2 -+3. Way Forward -+4. References -+ -+ -+1. Current Situation and Interim Solution -+ -+All objections from the scheduler maintainers apply to cgroup v2 core -+design, and there are no known objections to the specifics of the CPU -+controller cgroup v2 interface. The only blocked part is changes to -+expose the CPU controller interface on cgroup v2, which comprises the -+following two patches: -+ -+ [1] sched: Misc preps for cgroup unified hierarchy interface -+ [2] sched: Implement interface for cgroup unified hierarchy -+ -+The necessary changes are superficial and implement the interface -+files on cgroup v2. The combined diffstat is as follows. -+ -+ kernel/sched/core.c | 149 +++++++++++++++++++++++++++++++++++++++++++++++-- -+ kernel/sched/cpuacct.c | 57 ++++++++++++------ -+ kernel/sched/cpuacct.h | 5 + -+ 3 files changed, 189 insertions(+), 22 deletions(-) -+ -+The patches are easy to apply and forward-port. The following git -+branch will always carry the two patches on top of the latest release -+of the upstream kernel. -+ -+ git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git/cgroup-v2-cpu -+ -+There also are versioned branches going back to v4.4. -+ -+ git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git/cgroup-v2-cpu-$KERNEL_VER -+ -+While it's difficult to tell whether the CPU controller support will -+be merged, there are crucial resource control features in cgroup v2 -+that are only possible due to the design choices that are being -+objected to, and every effort will be made to ease enabling the CPU -+controller cgroup v2 support out-of-tree for parties which choose to. -+ -+ -+2. Disagreements and Arguments -+ -+There have been several lengthy discussion threads [3][4] on LKML -+around the structural constraints of cgroup v2. The two that affect -+the CPU controller are process granularity and no internal process -+constraint. Both arise primarily from the need for common resource -+domain definition across different resources. -+ -+The common resource domain is a powerful concept in cgroup v2 that -+allows controllers to make basic assumptions about the structural -+organization of processes and controllers inside the cgroup hierarchy, -+and thus solve problems spanning multiple types of resources. The -+prime example for this is page cache writeback: dirty page cache is -+regulated through throttling buffered writers based on memory -+availability, and initiating batched write outs to the disk based on -+IO capacity. Tracking and controlling writeback inside a cgroup thus -+requires the direct cooperation of the memory and the IO controller. -+ -+This easily extends to other areas, such as CPU cycles consumed while -+performing memory reclaim or IO encryption. -+ -+ -+2-1. Contentious Restrictions -+ -+For controllers of different resources to work together, they must -+agree on a common organization. This uniform model across controllers -+imposes two contentious restrictions on the CPU controller: process -+granularity and the no-internal-process constraint. -+ -+ -+ 2-1-1. Process Granularity -+ -+ For memory, because an address space is shared between all threads -+ of a process, the terminal consumer is a process, not a thread. -+ Separating the threads of a single process into different memory -+ control domains doesn't make semantical sense. cgroup v2 ensures -+ that all controller can agree on the same organization by requiring -+ that threads of the same process belong to the same cgroup. -+ -+ There are other reasons to enforce process granularity. One -+ important one is isolating system-level management operations from -+ in-process application operations. The cgroup interface, being a -+ virtual filesystem, is very unfit for multiple independent -+ operations taking place at the same time as most operations have to -+ be multi-step and there is no way to synchronize multiple accessors. -+ See also [5] Documentation/cgroup-v2.txt, "R-2. Thread Granularity" -+ -+ -+ 2-1-2. No Internal Process Constraint -+ -+ cgroup v2 does not allow processes to belong to any cgroup which has -+ child cgroups when resource controllers are enabled on it (the -+ notable exception being the root cgroup itself). This is because, -+ for some resources, a resource domain (cgroup) is not directly -+ comparable to the terminal consumer (process/task) of said resource, -+ and so putting the two into a sibling relationship isn't meaningful. -+ -+ - Differing Control Parameters and Capabilities -+ -+ A cgroup controller has different resource control parameters and -+ capabilities from a terminal consumer, be that a task or process. -+ There are a couple cases where a cgroup control knob can be mapped -+ to a per-task or per-process API but they are exceptions and the -+ mappings aren't obvious even in those cases. -+ -+ For example, task priorities (also known as nice values) set -+ through setpriority(2) are mapped to the CPU controller -+ "cpu.shares" values. However, how exactly the two ranges map and -+ even the fact that they map to each other at all are not obvious. -+ -+ The situation gets further muddled when considering other resource -+ types and control knobs. IO priorities set through ioprio_set(2) -+ cannot be mapped to IO controller weights and most cgroup resource -+ control knobs including the bandwidth control knobs of the CPU -+ controller don't have counterparts in the terminal consumers. -+ -+ - Anonymous Resource Consumption -+ -+ For CPU, every time slice consumed from inside a cgroup, which -+ comprises most but not all of consumed CPU time for the cgroup, -+ can be clearly attributed to a specific task or process. Because -+ these two types of entities are directly comparable as consumers -+ of CPU time, it's theoretically possible to mix tasks and cgroups -+ on the same tree levels and let them directly compete for the time -+ quota available to their common ancestor. -+ -+ However, the same can't be said for resource types like memory or -+ IO: the memory consumed by the page cache, for example, can be -+ tracked on a per-cgroup level, but due to mismatches in lifetimes -+ of involved objects (page cache can persist long after processes -+ are gone), shared usages and the implementation overhead of -+ tracking persistent state, it can no longer be attributed to -+ individual processes after instantiation. Consequently, any IO -+ incurred by page cache writeback can be attributed to a cgroup, -+ but not to the individual consumers inside the cgroup. -+ -+ For memory and IO, this makes a resource domain (cgroup) an object -+ of a fundamentally different type than a terminal consumer -+ (process). A process can't be a first class object in the resource -+ distribution graph as its total resource consumption can't be -+ described without the containing resource domain. -+ -+ Disallowing processes in internal cgroups avoids competition between -+ cgroups and processes which cannot be meaningfully defined for these -+ resources. All resource control takes place among cgroups and a -+ terminal consumer interacts with the containing cgroup the same way -+ it would with the system without cgroup. -+ -+ Root cgroup is exempt from this constraint, which is in line with -+ how root cgroup is handled in general - it's excluded from cgroup -+ resource accounting and control. -+ -+ -+Enforcing process granularity and no internal process constraint -+allows all controllers to be on the same footing in terms of resource -+distribution hierarchy. -+ -+ -+2-2. Impact on CPU Controller -+ -+As indicated earlier, the CPU controller's resource distribution graph -+is the simplest. Every schedulable resource consumption can be -+attributed to a specific task. In addition, for weight based control, -+the per-task priority set through setpriority(2) can be translated to -+and from a per-cgroup weight. As such, the CPU controller can treat a -+task and a cgroup symmetrically, allowing support for any tree layout -+of cgroups and tasks. Both process granularity and the no internal -+process constraint restrict how the CPU controller can be used. -+ -+ -+ 2-2-1. Impact of Process Granularity -+ -+ Process granularity prevents tasks belonging to the same process to -+ be assigned to different cgroups. It was pointed out [6] that this -+ excludes the valid use case of hierarchical CPU distribution within -+ processes. -+ -+ To address this issue, the rgroup (resource group) [7][8][9] -+ interface, an extension of the existing setpriority(2) API, was -+ proposed, which is in line with other programmable priority -+ mechanisms and eliminates the risk of in-application configuration -+ and system configuration stepping on each other's toes. -+ Unfortunately, the proposal quickly turned into discussions around -+ cgroup v2 design decisions [4] and no consensus could be reached. -+ -+ -+ 2-2-2. Impact of No Internal Process Constraint -+ -+ The no internal process constraint disallows tasks from competing -+ directly against cgroups. Here is an excerpt from Peter Zijlstra -+ pointing out the issue [10] - R, L and A are cgroups; t1, t2, t3 and -+ t4 are tasks: -+ -+ -+ R -+ / | \ -+ t1 t2 A -+ / \ -+ t3 t4 -+ -+ -+ Is fundamentally different from: -+ -+ -+ R -+ / \ -+ L A -+ / \ / \ -+ t1 t2 t3 t4 -+ -+ -+ Because if in the first hierarchy you add a task (t5) to R, all of -+ its A will run at 1/4th of total bandwidth where before it had -+ 1/3rd, whereas with the second example, if you add our t5 to L, A -+ doesn't get any less bandwidth. -+ -+ -+ It is true that the trees are semantically different from each other -+ and the symmetric handling of tasks and cgroups is aesthetically -+ pleasing. However, it isn't clear what the practical usefulness of -+ a layout with direct competition between tasks and cgroups would be, -+ considering that number and behavior of tasks are controlled by each -+ application, and cgroups primarily deal with system level resource -+ distribution; changes in the number of active threads would directly -+ impact resource distribution. Real world use cases of such layouts -+ could not be established during the discussions. -+ -+ -+2-3. Arguments for cgroup v2 -+ -+There are strong demands for comprehensive hierarchical resource -+control across all major resources, and establishing a common resource -+hierarchy is an essential step. As with most engineering decisions, -+common resource hierarchy definition comes with its trade-offs. With -+cgroup v2, the trade-offs are in the form of structural constraints -+which, among others, restrict the CPU controller's space of possible -+configurations. -+ -+However, even with the restrictions, cgroup v2, in combination with -+rgroup, covers most of identified real world use cases while enabling -+new important use cases of resource control across multiple resource -+types that were fundamentally broken previously. -+ -+Furthermore, for resource control, treating resource domains as -+objects of a different type from terminal consumers has important -+advantages - it can account for resource consumptions which are not -+tied to any specific terminal consumer, be that a task or process, and -+allows decoupling resource distribution controls from in-application -+APIs. Even the CPU controller may benefit from it as the kernel can -+consume significant amount of CPU cycles in interrupt context or tasks -+shared across multiple resource domains (e.g. softirq). -+ -+Finally, it's important to note that enabling cgroup v2 support for -+the CPU controller doesn't block use cases which require the features -+which are not available on cgroup v2. Unlikely, but should anybody -+actually rely on the CPU controller's symmetric handling of tasks and -+cgroups, backward compatibility is and will be maintained by being -+able to disconnect the controller from the cgroup v2 hierarchy and use -+it standalone. This also holds for cpuset which is often used in -+highly customized configurations which might be a poor fit for common -+resource domains. -+ -+The required changes are minimal, the benefits for the target use -+cases are critical and obvious, and use cases which have to use v1 can -+continue to do so. -+ -+ -+3. Way Forward -+ -+cgroup v2 primarily aims to solve the problem of comprehensive -+hierarchical resource control across all major computing resources, -+which is one of the core problems of modern server infrastructure -+engineering. The trade-offs that cgroup v2 took are results of -+pursuing that goal and gaining a better understanding of the nature of -+resource control in the process. -+ -+I believe that real world usages will prove cgroup v2's model right, -+considering the crucial pieces of comprehensive resource control that -+cannot be implemented without common resource domains. This is not to -+say that cgroup v2 is fixed in stone and can't be updated; if there is -+an approach which better serves both comprehensive resource control -+and the CPU controller's flexibility, we will surely move towards -+that. It goes without saying that discussions around such approach -+should consider practical aspects of resource control as a whole -+rather than absolutely focusing on a particular controller. -+ -+Until such consensus can be reached, the CPU controller cgroup v2 -+support will be maintained out of the mainline kernel in an easily -+accessible form. If there is anything cgroup developers can do to -+ease the pain, please feel free to contact us on the cgroup mailing -+list at cgroups@vger.kernel.org. -+ -+ -+4. References -+ -+[1] http://lkml.kernel.org/r/20160105164834.GE5995@mtj.duckdns.org -+ [PATCH 1/2] sched: Misc preps for cgroup unified hierarchy interface -+ Tejun Heo <tj@kernel.org> -+ -+[2] http://lkml.kernel.org/r/20160105164852.GF5995@mtj.duckdns.org -+ [PATCH 2/2] sched: Implement interface for cgroup unified hierarchy -+ Tejun Heo <tj@kernel.org> -+ -+[3] http://lkml.kernel.org/r/1438641689-14655-4-git-send-email-tj@kernel.org -+ [PATCH 3/3] sched: Implement interface for cgroup unified hierarchy -+ Tejun Heo <tj@kernel.org> -+ -+[4] http://lkml.kernel.org/r/20160407064549.GH3430@twins.programming.kicks-ass.net -+ Re: [PATCHSET RFC cgroup/for-4.6] cgroup, sched: implement resource group and PRIO_RGRP -+ Peter Zijlstra <peterz@infradead.org> -+ -+[5] https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/cgroup-v2.txt -+ Control Group v2 -+ Tejun Heo <tj@kernel.org> -+ -+[6] http://lkml.kernel.org/r/CAPM31RJNy3jgG=DYe6GO=wyL4BPPxwUm1f2S6YXacQmo7viFZA@mail.gmail.com -+ Re: [PATCH 3/3] sched: Implement interface for cgroup unified hierarchy -+ Paul Turner <pjt@google.com> -+ -+[7] http://lkml.kernel.org/r/20160105154503.GC5995@mtj.duckdns.org -+ [RFD] cgroup: thread granularity support for cpu controller -+ Tejun Heo <tj@kernel.org> -+ -+[8] http://lkml.kernel.org/r/1457710888-31182-1-git-send-email-tj@kernel.org -+ [PATCHSET RFC cgroup/for-4.6] cgroup, sched: implement resource group and PRIO_RGRP -+ Tejun Heo <tj@kernel.org> -+ -+[9] http://lkml.kernel.org/r/20160311160522.GA24046@htj.duckdns.org -+ Example program for PRIO_RGRP -+ Tejun Heo <tj@kernel.org> -+ -+[10] http://lkml.kernel.org/r/20160407082810.GN3430@twins.programming.kicks-ass.net -+ Re: [PATCHSET RFC cgroup/for-4.6] cgroup, sched: implement resource -+ Peter Zijlstra <peterz@infradead.org> diff --git a/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/4.4.patch b/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/4.4.patch deleted file mode 100644 index 8f2418c9efc..00000000000 --- a/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/4.4.patch +++ /dev/null @@ -1,407 +0,0 @@ -commit e7cae741f6d645ac68fe8823ca6ef45dbbf6891b -Author: Tejun Heo <tj@kernel.org> -Date: Fri Mar 11 07:31:23 2016 -0500 - - sched: Misc preps for cgroup unified hierarchy interface - - Make the following changes in preparation for the cpu controller - interface implementation for the unified hierarchy. This patch - doesn't cause any functional differences. - - * s/cpu_stats_show()/cpu_cfs_stats_show()/ - - * s/cpu_files/cpu_legacy_files/ - - * Separate out cpuacct_stats_read() from cpuacct_stats_show(). While - at it, remove pointless cpuacct_stat_desc[] array. - - Signed-off-by: Tejun Heo <tj@kernel.org> - Cc: Ingo Molnar <mingo@redhat.com> - Cc: Peter Zijlstra <peterz@infradead.org> - Cc: Li Zefan <lizefan@huawei.com> - Cc: Johannes Weiner <hannes@cmpxchg.org> - -diff --git a/kernel/sched/core.c b/kernel/sched/core.c -index 732e993..77f3ddd 100644 ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -8512,7 +8512,7 @@ static int __cfs_schedulable(struct task_group *tg, u64 period, u64 quota) - return ret; - } - --static int cpu_stats_show(struct seq_file *sf, void *v) -+static int cpu_cfs_stats_show(struct seq_file *sf, void *v) - { - struct task_group *tg = css_tg(seq_css(sf)); - struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth; -@@ -8552,7 +8552,7 @@ static u64 cpu_rt_period_read_uint(struct cgroup_subsys_state *css, - } - #endif /* CONFIG_RT_GROUP_SCHED */ - --static struct cftype cpu_files[] = { -+static struct cftype cpu_legacy_files[] = { - #ifdef CONFIG_FAIR_GROUP_SCHED - { - .name = "shares", -@@ -8573,7 +8573,7 @@ static struct cftype cpu_files[] = { - }, - { - .name = "stat", -- .seq_show = cpu_stats_show, -+ .seq_show = cpu_cfs_stats_show, - }, - #endif - #ifdef CONFIG_RT_GROUP_SCHED -@@ -8599,7 +8599,7 @@ struct cgroup_subsys cpu_cgrp_subsys = { - .fork = cpu_cgroup_fork, - .can_attach = cpu_cgroup_can_attach, - .attach = cpu_cgroup_attach, -- .legacy_cftypes = cpu_files, -+ .legacy_cftypes = cpu_legacy_files, - .early_init = 1, - }; - -diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c -index dd7cbb5..42b2dd5 100644 ---- a/kernel/sched/cpuacct.c -+++ b/kernel/sched/cpuacct.c -@@ -177,36 +177,33 @@ static int cpuacct_percpu_seq_show(struct seq_file *m, void *V) - return 0; - } - --static const char * const cpuacct_stat_desc[] = { -- [CPUACCT_STAT_USER] = "user", -- [CPUACCT_STAT_SYSTEM] = "system", --}; -- --static int cpuacct_stats_show(struct seq_file *sf, void *v) -+static void cpuacct_stats_read(struct cpuacct *ca, u64 *userp, u64 *sysp) - { -- struct cpuacct *ca = css_ca(seq_css(sf)); - int cpu; -- s64 val = 0; - -+ *userp = 0; - for_each_online_cpu(cpu) { - struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu); -- val += kcpustat->cpustat[CPUTIME_USER]; -- val += kcpustat->cpustat[CPUTIME_NICE]; -+ *userp += kcpustat->cpustat[CPUTIME_USER]; -+ *userp += kcpustat->cpustat[CPUTIME_NICE]; - } -- val = cputime64_to_clock_t(val); -- seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_USER], val); - -- val = 0; -+ *sysp = 0; - for_each_online_cpu(cpu) { - struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu); -- val += kcpustat->cpustat[CPUTIME_SYSTEM]; -- val += kcpustat->cpustat[CPUTIME_IRQ]; -- val += kcpustat->cpustat[CPUTIME_SOFTIRQ]; -+ *sysp += kcpustat->cpustat[CPUTIME_SYSTEM]; -+ *sysp += kcpustat->cpustat[CPUTIME_IRQ]; -+ *sysp += kcpustat->cpustat[CPUTIME_SOFTIRQ]; - } -+} - -- val = cputime64_to_clock_t(val); -- seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_SYSTEM], val); -+static int cpuacct_stats_show(struct seq_file *sf, void *v) -+{ -+ cputime64_t user, sys; - -+ cpuacct_stats_read(css_ca(seq_css(sf)), &user, &sys); -+ seq_printf(sf, "user %lld\n", cputime64_to_clock_t(user)); -+ seq_printf(sf, "system %lld\n", cputime64_to_clock_t(sys)); - return 0; - } - - -commit 1bb33e8a69f089f2d3f58a0e681d4ff352e11c97 -Author: Tejun Heo <tj@kernel.org> -Date: Fri Mar 11 07:31:23 2016 -0500 - - sched: Implement interface for cgroup unified hierarchy - - While the cpu controller doesn't have any functional problems, there - are a couple interface issues which can be addressed in the v2 - interface. - - * cpuacct being a separate controller. This separation is artificial - and rather pointless as demonstrated by most use cases co-mounting - the two controllers. It also forces certain information to be - accounted twice. - - * Use of different time units. Writable control knobs use - microseconds, some stat fields use nanoseconds while other cpuacct - stat fields use centiseconds. - - * Control knobs which can't be used in the root cgroup still show up - in the root. - - * Control knob names and semantics aren't consistent with other - controllers. - - This patchset implements cpu controller's interface on the unified - hierarchy which adheres to the controller file conventions described - in Documentation/cgroups/unified-hierarchy.txt. Overall, the - following changes are made. - - * cpuacct is implictly enabled and disabled by cpu and its information - is reported through "cpu.stat" which now uses microseconds for all - time durations. All time duration fields now have "_usec" appended - to them for clarity. While this doesn't solve the double accounting - immediately, once majority of users switch to v2, cpu can directly - account and report the relevant stats and cpuacct can be disabled on - the unified hierarchy. - - Note that cpuacct.usage_percpu is currently not included in - "cpu.stat". If this information is actually called for, it can be - added later. - - * "cpu.shares" is replaced with "cpu.weight" and operates on the - standard scale defined by CGROUP_WEIGHT_MIN/DFL/MAX (1, 100, 10000). - The weight is scaled to scheduler weight so that 100 maps to 1024 - and the ratio relationship is preserved - if weight is W and its - scaled value is S, W / 100 == S / 1024. While the mapped range is a - bit smaller than the orignal scheduler weight range, the dead zones - on both sides are relatively small and covers wider range than the - nice value mappings. This file doesn't make sense in the root - cgroup and isn't create on root. - - * "cpu.cfs_quota_us" and "cpu.cfs_period_us" are replaced by "cpu.max" - which contains both quota and period. - - * "cpu.rt_runtime_us" and "cpu.rt_period_us" are replaced by - "cpu.rt.max" which contains both runtime and period. - - v2: cpu_stats_show() was incorrectly using CONFIG_FAIR_GROUP_SCHED for - CFS bandwidth stats and also using raw division for u64. Use - CONFIG_CFS_BANDWITH and do_div() instead. - - The semantics of "cpu.rt.max" is not fully decided yet. Dropped - for now. - - Signed-off-by: Tejun Heo <tj@kernel.org> - Cc: Ingo Molnar <mingo@redhat.com> - Cc: Peter Zijlstra <peterz@infradead.org> - Cc: Li Zefan <lizefan@huawei.com> - Cc: Johannes Weiner <hannes@cmpxchg.org> - -diff --git a/kernel/sched/core.c b/kernel/sched/core.c -index 77f3ddd..7aafe63 100644 ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -8591,6 +8591,139 @@ static struct cftype cpu_legacy_files[] = { - { } /* terminate */ - }; - -+static int cpu_stats_show(struct seq_file *sf, void *v) -+{ -+ cpuacct_cpu_stats_show(sf); -+ -+#ifdef CONFIG_CFS_BANDWIDTH -+ { -+ struct task_group *tg = css_tg(seq_css(sf)); -+ struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth; -+ u64 throttled_usec; -+ -+ throttled_usec = cfs_b->throttled_time; -+ do_div(throttled_usec, NSEC_PER_USEC); -+ -+ seq_printf(sf, "nr_periods %d\n" -+ "nr_throttled %d\n" -+ "throttled_usec %llu\n", -+ cfs_b->nr_periods, cfs_b->nr_throttled, -+ throttled_usec); -+ } -+#endif -+ return 0; -+} -+ -+#ifdef CONFIG_FAIR_GROUP_SCHED -+static u64 cpu_weight_read_u64(struct cgroup_subsys_state *css, -+ struct cftype *cft) -+{ -+ struct task_group *tg = css_tg(css); -+ u64 weight = scale_load_down(tg->shares); -+ -+ return DIV_ROUND_CLOSEST_ULL(weight * CGROUP_WEIGHT_DFL, 1024); -+} -+ -+static int cpu_weight_write_u64(struct cgroup_subsys_state *css, -+ struct cftype *cftype, u64 weight) -+{ -+ /* -+ * cgroup weight knobs should use the common MIN, DFL and MAX -+ * values which are 1, 100 and 10000 respectively. While it loses -+ * a bit of range on both ends, it maps pretty well onto the shares -+ * value used by scheduler and the round-trip conversions preserve -+ * the original value over the entire range. -+ */ -+ if (weight < CGROUP_WEIGHT_MIN || weight > CGROUP_WEIGHT_MAX) -+ return -ERANGE; -+ -+ weight = DIV_ROUND_CLOSEST_ULL(weight * 1024, CGROUP_WEIGHT_DFL); -+ -+ return sched_group_set_shares(css_tg(css), scale_load(weight)); -+} -+#endif -+ -+static void __maybe_unused cpu_period_quota_print(struct seq_file *sf, -+ long period, long quota) -+{ -+ if (quota < 0) -+ seq_puts(sf, "max"); -+ else -+ seq_printf(sf, "%ld", quota); -+ -+ seq_printf(sf, " %ld\n", period); -+} -+ -+/* caller should put the current value in *@periodp before calling */ -+static int __maybe_unused cpu_period_quota_parse(char *buf, -+ u64 *periodp, u64 *quotap) -+{ -+ char tok[21]; /* U64_MAX */ -+ -+ if (!sscanf(buf, "%s %llu", tok, periodp)) -+ return -EINVAL; -+ -+ *periodp *= NSEC_PER_USEC; -+ -+ if (sscanf(tok, "%llu", quotap)) -+ *quotap *= NSEC_PER_USEC; -+ else if (!strcmp(tok, "max")) -+ *quotap = RUNTIME_INF; -+ else -+ return -EINVAL; -+ -+ return 0; -+} -+ -+#ifdef CONFIG_CFS_BANDWIDTH -+static int cpu_max_show(struct seq_file *sf, void *v) -+{ -+ struct task_group *tg = css_tg(seq_css(sf)); -+ -+ cpu_period_quota_print(sf, tg_get_cfs_period(tg), tg_get_cfs_quota(tg)); -+ return 0; -+} -+ -+static ssize_t cpu_max_write(struct kernfs_open_file *of, -+ char *buf, size_t nbytes, loff_t off) -+{ -+ struct task_group *tg = css_tg(of_css(of)); -+ u64 period = tg_get_cfs_period(tg); -+ u64 quota; -+ int ret; -+ -+ ret = cpu_period_quota_parse(buf, &period, "a); -+ if (!ret) -+ ret = tg_set_cfs_bandwidth(tg, period, quota); -+ return ret ?: nbytes; -+} -+#endif -+ -+static struct cftype cpu_files[] = { -+ { -+ .name = "stat", -+ .flags = CFTYPE_NOT_ON_ROOT, -+ .seq_show = cpu_stats_show, -+ }, -+#ifdef CONFIG_FAIR_GROUP_SCHED -+ { -+ .name = "weight", -+ .flags = CFTYPE_NOT_ON_ROOT, -+ .read_u64 = cpu_weight_read_u64, -+ .write_u64 = cpu_weight_write_u64, -+ }, -+#endif -+#ifdef CONFIG_CFS_BANDWIDTH -+ { -+ .name = "max", -+ .flags = CFTYPE_NOT_ON_ROOT, -+ .seq_show = cpu_max_show, -+ .write = cpu_max_write, -+ }, -+#endif -+ { } /* terminate */ -+}; -+ - struct cgroup_subsys cpu_cgrp_subsys = { - .css_alloc = cpu_cgroup_css_alloc, - .css_free = cpu_cgroup_css_free, -@@ -8600,7 +8733,15 @@ struct cgroup_subsys cpu_cgrp_subsys = { - .can_attach = cpu_cgroup_can_attach, - .attach = cpu_cgroup_attach, - .legacy_cftypes = cpu_legacy_files, -+ .dfl_cftypes = cpu_files, - .early_init = 1, -+#ifdef CONFIG_CGROUP_CPUACCT -+ /* -+ * cpuacct is enabled together with cpu on the unified hierarchy -+ * and its stats are reported through "cpu.stat". -+ */ -+ .depends_on = 1 << cpuacct_cgrp_id, -+#endif - }; - - #endif /* CONFIG_CGROUP_SCHED */ -diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c -index 42b2dd5..b4d32a6 100644 ---- a/kernel/sched/cpuacct.c -+++ b/kernel/sched/cpuacct.c -@@ -224,6 +224,30 @@ static struct cftype files[] = { - { } /* terminate */ - }; - -+/* used to print cpuacct stats in cpu.stat on the unified hierarchy */ -+void cpuacct_cpu_stats_show(struct seq_file *sf) -+{ -+ struct cgroup_subsys_state *css; -+ u64 usage, user, sys; -+ -+ css = cgroup_get_e_css(seq_css(sf)->cgroup, &cpuacct_cgrp_subsys); -+ -+ usage = cpuusage_read(css, seq_cft(sf)); -+ cpuacct_stats_read(css_ca(css), &user, &sys); -+ -+ user *= TICK_NSEC; -+ sys *= TICK_NSEC; -+ do_div(usage, NSEC_PER_USEC); -+ do_div(user, NSEC_PER_USEC); -+ do_div(sys, NSEC_PER_USEC); -+ -+ seq_printf(sf, "usage_usec %llu\n" -+ "user_usec %llu\n" -+ "system_usec %llu\n", usage, user, sys); -+ -+ css_put(css); -+} -+ - /* - * charge this task's execution time to its accounting group. - * -diff --git a/kernel/sched/cpuacct.h b/kernel/sched/cpuacct.h -index ed60562..44eace9 100644 ---- a/kernel/sched/cpuacct.h -+++ b/kernel/sched/cpuacct.h -@@ -2,6 +2,7 @@ - - extern void cpuacct_charge(struct task_struct *tsk, u64 cputime); - extern void cpuacct_account_field(struct task_struct *p, int index, u64 val); -+extern void cpuacct_cpu_stats_show(struct seq_file *sf); - - #else - -@@ -14,4 +15,8 @@ cpuacct_account_field(struct task_struct *p, int index, u64 val) - { - } - -+static inline void cpuacct_cpu_stats_show(struct seq_file *sf) -+{ -+} -+ - #endif diff --git a/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/4.9.patch b/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/4.9.patch deleted file mode 100644 index 6f0904cbce9..00000000000 --- a/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/4.9.patch +++ /dev/null @@ -1,784 +0,0 @@ -commit 280858b0bb3384b9ec06b455e196b453888bd6b8 -Author: Tejun Heo <tj@kernel.org> -Date: Fri Mar 11 07:31:23 2016 -0500 - - sched: Misc preps for cgroup unified hierarchy interface - - Make the following changes in preparation for the cpu controller - interface implementation for the unified hierarchy. This patch - doesn't cause any functional differences. - - * s/cpu_stats_show()/cpu_cfs_stats_show()/ - - * s/cpu_files/cpu_legacy_files/ - - * Separate out cpuacct_stats_read() from cpuacct_stats_show(). While - at it, make the @val array u64 for consistency. - - Signed-off-by: Tejun Heo <tj@kernel.org> - Cc: Ingo Molnar <mingo@redhat.com> - Cc: Peter Zijlstra <peterz@infradead.org> - Cc: Li Zefan <lizefan@huawei.com> - Cc: Johannes Weiner <hannes@cmpxchg.org> - -diff --git a/kernel/sched/core.c b/kernel/sched/core.c -index 154fd689fe02..57472485b79c 100644 ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -8705,7 +8705,7 @@ static int __cfs_schedulable(struct task_group *tg, u64 period, u64 quota) - return ret; - } - --static int cpu_stats_show(struct seq_file *sf, void *v) -+static int cpu_cfs_stats_show(struct seq_file *sf, void *v) - { - struct task_group *tg = css_tg(seq_css(sf)); - struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth; -@@ -8745,7 +8745,7 @@ static u64 cpu_rt_period_read_uint(struct cgroup_subsys_state *css, - } - #endif /* CONFIG_RT_GROUP_SCHED */ - --static struct cftype cpu_files[] = { -+static struct cftype cpu_legacy_files[] = { - #ifdef CONFIG_FAIR_GROUP_SCHED - { - .name = "shares", -@@ -8766,7 +8766,7 @@ static struct cftype cpu_files[] = { - }, - { - .name = "stat", -- .seq_show = cpu_stats_show, -+ .seq_show = cpu_cfs_stats_show, - }, - #endif - #ifdef CONFIG_RT_GROUP_SCHED -@@ -8791,7 +8791,7 @@ struct cgroup_subsys cpu_cgrp_subsys = { - .fork = cpu_cgroup_fork, - .can_attach = cpu_cgroup_can_attach, - .attach = cpu_cgroup_attach, -- .legacy_cftypes = cpu_files, -+ .legacy_cftypes = cpu_legacy_files, - .early_init = true, - }; - -diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c -index bc0b309c3f19..d1e5dd0b3a64 100644 ---- a/kernel/sched/cpuacct.c -+++ b/kernel/sched/cpuacct.c -@@ -276,26 +276,33 @@ static int cpuacct_all_seq_show(struct seq_file *m, void *V) - return 0; - } - --static int cpuacct_stats_show(struct seq_file *sf, void *v) -+static void cpuacct_stats_read(struct cpuacct *ca, -+ u64 (*val)[CPUACCT_STAT_NSTATS]) - { -- struct cpuacct *ca = css_ca(seq_css(sf)); -- s64 val[CPUACCT_STAT_NSTATS]; - int cpu; -- int stat; - -- memset(val, 0, sizeof(val)); -+ memset(val, 0, sizeof(*val)); -+ - for_each_possible_cpu(cpu) { - u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat; - -- val[CPUACCT_STAT_USER] += cpustat[CPUTIME_USER]; -- val[CPUACCT_STAT_USER] += cpustat[CPUTIME_NICE]; -- val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SYSTEM]; -- val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_IRQ]; -- val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SOFTIRQ]; -+ (*val)[CPUACCT_STAT_USER] += cpustat[CPUTIME_USER]; -+ (*val)[CPUACCT_STAT_USER] += cpustat[CPUTIME_NICE]; -+ (*val)[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SYSTEM]; -+ (*val)[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_IRQ]; -+ (*val)[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SOFTIRQ]; - } -+} -+ -+static int cpuacct_stats_show(struct seq_file *sf, void *v) -+{ -+ u64 val[CPUACCT_STAT_NSTATS]; -+ int stat; -+ -+ cpuacct_stats_read(css_ca(seq_css(sf)), &val); - - for (stat = 0; stat < CPUACCT_STAT_NSTATS; stat++) { -- seq_printf(sf, "%s %lld\n", -+ seq_printf(sf, "%s %llu\n", - cpuacct_stat_desc[stat], - cputime64_to_clock_t(val[stat])); - } - -commit 015cbdcb90034fd566d00de9d3d405613da3cd26 -Author: Tejun Heo <tj@kernel.org> -Date: Fri Mar 11 07:31:23 2016 -0500 - - sched: Implement interface for cgroup unified hierarchy - - While the cpu controller doesn't have any functional problems, there - are a couple interface issues which can be addressed in the v2 - interface. - - * cpuacct being a separate controller. This separation is artificial - and rather pointless as demonstrated by most use cases co-mounting - the two controllers. It also forces certain information to be - accounted twice. - - * Use of different time units. Writable control knobs use - microseconds, some stat fields use nanoseconds while other cpuacct - stat fields use centiseconds. - - * Control knobs which can't be used in the root cgroup still show up - in the root. - - * Control knob names and semantics aren't consistent with other - controllers. - - This patchset implements cpu controller's interface on the unified - hierarchy which adheres to the controller file conventions described - in Documentation/cgroups/unified-hierarchy.txt. Overall, the - following changes are made. - - * cpuacct is implictly enabled and disabled by cpu and its information - is reported through "cpu.stat" which now uses microseconds for all - time durations. All time duration fields now have "_usec" appended - to them for clarity. While this doesn't solve the double accounting - immediately, once majority of users switch to v2, cpu can directly - account and report the relevant stats and cpuacct can be disabled on - the unified hierarchy. - - Note that cpuacct.usage_percpu is currently not included in - "cpu.stat". If this information is actually called for, it can be - added later. - - * "cpu.shares" is replaced with "cpu.weight" and operates on the - standard scale defined by CGROUP_WEIGHT_MIN/DFL/MAX (1, 100, 10000). - The weight is scaled to scheduler weight so that 100 maps to 1024 - and the ratio relationship is preserved - if weight is W and its - scaled value is S, W / 100 == S / 1024. While the mapped range is a - bit smaller than the orignal scheduler weight range, the dead zones - on both sides are relatively small and covers wider range than the - nice value mappings. This file doesn't make sense in the root - cgroup and isn't create on root. - - * "cpu.cfs_quota_us" and "cpu.cfs_period_us" are replaced by "cpu.max" - which contains both quota and period. - - * "cpu.rt_runtime_us" and "cpu.rt_period_us" are replaced by - "cpu.rt.max" which contains both runtime and period. - - v2: cpu_stats_show() was incorrectly using CONFIG_FAIR_GROUP_SCHED for - CFS bandwidth stats and also using raw division for u64. Use - CONFIG_CFS_BANDWITH and do_div() instead. - - The semantics of "cpu.rt.max" is not fully decided yet. Dropped - for now. - - Signed-off-by: Tejun Heo <tj@kernel.org> - Cc: Ingo Molnar <mingo@redhat.com> - Cc: Peter Zijlstra <peterz@infradead.org> - Cc: Li Zefan <lizefan@huawei.com> - Cc: Johannes Weiner <hannes@cmpxchg.org> - -diff --git a/kernel/sched/core.c b/kernel/sched/core.c -index 57472485b79c..c0ae869f51c4 100644 ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -8784,6 +8784,139 @@ static struct cftype cpu_legacy_files[] = { - { } /* terminate */ - }; - -+static int cpu_stats_show(struct seq_file *sf, void *v) -+{ -+ cpuacct_cpu_stats_show(sf); -+ -+#ifdef CONFIG_CFS_BANDWIDTH -+ { -+ struct task_group *tg = css_tg(seq_css(sf)); -+ struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth; -+ u64 throttled_usec; -+ -+ throttled_usec = cfs_b->throttled_time; -+ do_div(throttled_usec, NSEC_PER_USEC); -+ -+ seq_printf(sf, "nr_periods %d\n" -+ "nr_throttled %d\n" -+ "throttled_usec %llu\n", -+ cfs_b->nr_periods, cfs_b->nr_throttled, -+ throttled_usec); -+ } -+#endif -+ return 0; -+} -+ -+#ifdef CONFIG_FAIR_GROUP_SCHED -+static u64 cpu_weight_read_u64(struct cgroup_subsys_state *css, -+ struct cftype *cft) -+{ -+ struct task_group *tg = css_tg(css); -+ u64 weight = scale_load_down(tg->shares); -+ -+ return DIV_ROUND_CLOSEST_ULL(weight * CGROUP_WEIGHT_DFL, 1024); -+} -+ -+static int cpu_weight_write_u64(struct cgroup_subsys_state *css, -+ struct cftype *cftype, u64 weight) -+{ -+ /* -+ * cgroup weight knobs should use the common MIN, DFL and MAX -+ * values which are 1, 100 and 10000 respectively. While it loses -+ * a bit of range on both ends, it maps pretty well onto the shares -+ * value used by scheduler and the round-trip conversions preserve -+ * the original value over the entire range. -+ */ -+ if (weight < CGROUP_WEIGHT_MIN || weight > CGROUP_WEIGHT_MAX) -+ return -ERANGE; -+ -+ weight = DIV_ROUND_CLOSEST_ULL(weight * 1024, CGROUP_WEIGHT_DFL); -+ -+ return sched_group_set_shares(css_tg(css), scale_load(weight)); -+} -+#endif -+ -+static void __maybe_unused cpu_period_quota_print(struct seq_file *sf, -+ long period, long quota) -+{ -+ if (quota < 0) -+ seq_puts(sf, "max"); -+ else -+ seq_printf(sf, "%ld", quota); -+ -+ seq_printf(sf, " %ld\n", period); -+} -+ -+/* caller should put the current value in *@periodp before calling */ -+static int __maybe_unused cpu_period_quota_parse(char *buf, -+ u64 *periodp, u64 *quotap) -+{ -+ char tok[21]; /* U64_MAX */ -+ -+ if (!sscanf(buf, "%s %llu", tok, periodp)) -+ return -EINVAL; -+ -+ *periodp *= NSEC_PER_USEC; -+ -+ if (sscanf(tok, "%llu", quotap)) -+ *quotap *= NSEC_PER_USEC; -+ else if (!strcmp(tok, "max")) -+ *quotap = RUNTIME_INF; -+ else -+ return -EINVAL; -+ -+ return 0; -+} -+ -+#ifdef CONFIG_CFS_BANDWIDTH -+static int cpu_max_show(struct seq_file *sf, void *v) -+{ -+ struct task_group *tg = css_tg(seq_css(sf)); -+ -+ cpu_period_quota_print(sf, tg_get_cfs_period(tg), tg_get_cfs_quota(tg)); -+ return 0; -+} -+ -+static ssize_t cpu_max_write(struct kernfs_open_file *of, -+ char *buf, size_t nbytes, loff_t off) -+{ -+ struct task_group *tg = css_tg(of_css(of)); -+ u64 period = tg_get_cfs_period(tg); -+ u64 quota; -+ int ret; -+ -+ ret = cpu_period_quota_parse(buf, &period, "a); -+ if (!ret) -+ ret = tg_set_cfs_bandwidth(tg, period, quota); -+ return ret ?: nbytes; -+} -+#endif -+ -+static struct cftype cpu_files[] = { -+ { -+ .name = "stat", -+ .flags = CFTYPE_NOT_ON_ROOT, -+ .seq_show = cpu_stats_show, -+ }, -+#ifdef CONFIG_FAIR_GROUP_SCHED -+ { -+ .name = "weight", -+ .flags = CFTYPE_NOT_ON_ROOT, -+ .read_u64 = cpu_weight_read_u64, -+ .write_u64 = cpu_weight_write_u64, -+ }, -+#endif -+#ifdef CONFIG_CFS_BANDWIDTH -+ { -+ .name = "max", -+ .flags = CFTYPE_NOT_ON_ROOT, -+ .seq_show = cpu_max_show, -+ .write = cpu_max_write, -+ }, -+#endif -+ { } /* terminate */ -+}; -+ - struct cgroup_subsys cpu_cgrp_subsys = { - .css_alloc = cpu_cgroup_css_alloc, - .css_released = cpu_cgroup_css_released, -@@ -8792,7 +8925,15 @@ struct cgroup_subsys cpu_cgrp_subsys = { - .can_attach = cpu_cgroup_can_attach, - .attach = cpu_cgroup_attach, - .legacy_cftypes = cpu_legacy_files, -+ .dfl_cftypes = cpu_files, - .early_init = true, -+#ifdef CONFIG_CGROUP_CPUACCT -+ /* -+ * cpuacct is enabled together with cpu on the unified hierarchy -+ * and its stats are reported through "cpu.stat". -+ */ -+ .depends_on = 1 << cpuacct_cgrp_id, -+#endif - }; - - #endif /* CONFIG_CGROUP_SCHED */ -diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c -index d1e5dd0b3a64..57f390514c39 100644 ---- a/kernel/sched/cpuacct.c -+++ b/kernel/sched/cpuacct.c -@@ -347,6 +347,31 @@ static struct cftype files[] = { - { } /* terminate */ - }; - -+/* used to print cpuacct stats in cpu.stat on the unified hierarchy */ -+void cpuacct_cpu_stats_show(struct seq_file *sf) -+{ -+ struct cgroup_subsys_state *css; -+ u64 usage, val[CPUACCT_STAT_NSTATS]; -+ -+ css = cgroup_get_e_css(seq_css(sf)->cgroup, &cpuacct_cgrp_subsys); -+ -+ usage = cpuusage_read(css, seq_cft(sf)); -+ cpuacct_stats_read(css_ca(css), &val); -+ -+ val[CPUACCT_STAT_USER] *= TICK_NSEC; -+ val[CPUACCT_STAT_SYSTEM] *= TICK_NSEC; -+ do_div(usage, NSEC_PER_USEC); -+ do_div(val[CPUACCT_STAT_USER], NSEC_PER_USEC); -+ do_div(val[CPUACCT_STAT_SYSTEM], NSEC_PER_USEC); -+ -+ seq_printf(sf, "usage_usec %llu\n" -+ "user_usec %llu\n" -+ "system_usec %llu\n", -+ usage, val[CPUACCT_STAT_USER], val[CPUACCT_STAT_SYSTEM]); -+ -+ css_put(css); -+} -+ - /* - * charge this task's execution time to its accounting group. - * -diff --git a/kernel/sched/cpuacct.h b/kernel/sched/cpuacct.h -index ba72807c73d4..ddf7af466d35 100644 ---- a/kernel/sched/cpuacct.h -+++ b/kernel/sched/cpuacct.h -@@ -2,6 +2,7 @@ - - extern void cpuacct_charge(struct task_struct *tsk, u64 cputime); - extern void cpuacct_account_field(struct task_struct *tsk, int index, u64 val); -+extern void cpuacct_cpu_stats_show(struct seq_file *sf); - - #else - -@@ -14,4 +15,8 @@ cpuacct_account_field(struct task_struct *tsk, int index, u64 val) - { - } - -+static inline void cpuacct_cpu_stats_show(struct seq_file *sf) -+{ -+} -+ - #endif - -commit 5019fe3d7ec456b58d451ef06fe1f81d7d9f28a9 -Author: Tejun Heo <tj@kernel.org> -Date: Fri Aug 5 12:41:01 2016 -0400 - - cgroup: add documentation regarding CPU controller cgroup v2 support - - Signed-off-by: Tejun Heo <tj@kernel.org> - -diff --git a/Documentation/cgroup-v2-cpu.txt b/Documentation/cgroup-v2-cpu.txt -new file mode 100644 -index 000000000000..1ed7032d4472 ---- /dev/null -+++ b/Documentation/cgroup-v2-cpu.txt -@@ -0,0 +1,368 @@ -+ -+ -+CPU Controller on Control Group v2 -+ -+August, 2016 Tejun Heo <tj@kernel.org> -+ -+ -+While most controllers have support for cgroup v2 now, the CPU -+controller support is not upstream yet due to objections from the -+scheduler maintainers on the basic designs of cgroup v2. This -+document explains the current situation as well as an interim -+solution, and details the disagreements and arguments. The latest -+version of this document can be found at the following URL. -+ -+ https://git.kernel.org/cgit/linux/kernel/git/tj/cgroup.git/tree/Documentation/cgroup-v2-cpu.txt?h=cgroup-v2-cpu -+ -+This document was posted to the linux-kernel and cgroup mailing lists. -+Unfortunately, no consensus was reached as of Oct, 2016. The thread -+can be found at the following URL. -+ -+ http://lkml.kernel.org/r/20160805170752.GK2542@mtj.duckdns.org -+ -+ -+CONTENTS -+ -+1. Current Situation and Interim Solution -+2. Disagreements and Arguments -+ 2-1. Contentious Restrictions -+ 2-1-1. Process Granularity -+ 2-1-2. No Internal Process Constraint -+ 2-2. Impact on CPU Controller -+ 2-2-1. Impact of Process Granularity -+ 2-2-2. Impact of No Internal Process Constraint -+ 2-3. Arguments for cgroup v2 -+3. Way Forward -+4. References -+ -+ -+1. Current Situation and Interim Solution -+ -+All objections from the scheduler maintainers apply to cgroup v2 core -+design, and there are no known objections to the specifics of the CPU -+controller cgroup v2 interface. The only blocked part is changes to -+expose the CPU controller interface on cgroup v2, which comprises the -+following two patches: -+ -+ [1] sched: Misc preps for cgroup unified hierarchy interface -+ [2] sched: Implement interface for cgroup unified hierarchy -+ -+The necessary changes are superficial and implement the interface -+files on cgroup v2. The combined diffstat is as follows. -+ -+ kernel/sched/core.c | 149 +++++++++++++++++++++++++++++++++++++++++++++++-- -+ kernel/sched/cpuacct.c | 57 ++++++++++++------ -+ kernel/sched/cpuacct.h | 5 + -+ 3 files changed, 189 insertions(+), 22 deletions(-) -+ -+The patches are easy to apply and forward-port. The following git -+branch will always carry the two patches on top of the latest release -+of the upstream kernel. -+ -+ git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git/cgroup-v2-cpu -+ -+There also are versioned branches going back to v4.4. -+ -+ git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git/cgroup-v2-cpu-$KERNEL_VER -+ -+While it's difficult to tell whether the CPU controller support will -+be merged, there are crucial resource control features in cgroup v2 -+that are only possible due to the design choices that are being -+objected to, and every effort will be made to ease enabling the CPU -+controller cgroup v2 support out-of-tree for parties which choose to. -+ -+ -+2. Disagreements and Arguments -+ -+There have been several lengthy discussion threads [3][4] on LKML -+around the structural constraints of cgroup v2. The two that affect -+the CPU controller are process granularity and no internal process -+constraint. Both arise primarily from the need for common resource -+domain definition across different resources. -+ -+The common resource domain is a powerful concept in cgroup v2 that -+allows controllers to make basic assumptions about the structural -+organization of processes and controllers inside the cgroup hierarchy, -+and thus solve problems spanning multiple types of resources. The -+prime example for this is page cache writeback: dirty page cache is -+regulated through throttling buffered writers based on memory -+availability, and initiating batched write outs to the disk based on -+IO capacity. Tracking and controlling writeback inside a cgroup thus -+requires the direct cooperation of the memory and the IO controller. -+ -+This easily extends to other areas, such as CPU cycles consumed while -+performing memory reclaim or IO encryption. -+ -+ -+2-1. Contentious Restrictions -+ -+For controllers of different resources to work together, they must -+agree on a common organization. This uniform model across controllers -+imposes two contentious restrictions on the CPU controller: process -+granularity and the no-internal-process constraint. -+ -+ -+ 2-1-1. Process Granularity -+ -+ For memory, because an address space is shared between all threads -+ of a process, the terminal consumer is a process, not a thread. -+ Separating the threads of a single process into different memory -+ control domains doesn't make semantical sense. cgroup v2 ensures -+ that all controller can agree on the same organization by requiring -+ that threads of the same process belong to the same cgroup. -+ -+ There are other reasons to enforce process granularity. One -+ important one is isolating system-level management operations from -+ in-process application operations. The cgroup interface, being a -+ virtual filesystem, is very unfit for multiple independent -+ operations taking place at the same time as most operations have to -+ be multi-step and there is no way to synchronize multiple accessors. -+ See also [5] Documentation/cgroup-v2.txt, "R-2. Thread Granularity" -+ -+ -+ 2-1-2. No Internal Process Constraint -+ -+ cgroup v2 does not allow processes to belong to any cgroup which has -+ child cgroups when resource controllers are enabled on it (the -+ notable exception being the root cgroup itself). This is because, -+ for some resources, a resource domain (cgroup) is not directly -+ comparable to the terminal consumer (process/task) of said resource, -+ and so putting the two into a sibling relationship isn't meaningful. -+ -+ - Differing Control Parameters and Capabilities -+ -+ A cgroup controller has different resource control parameters and -+ capabilities from a terminal consumer, be that a task or process. -+ There are a couple cases where a cgroup control knob can be mapped -+ to a per-task or per-process API but they are exceptions and the -+ mappings aren't obvious even in those cases. -+ -+ For example, task priorities (also known as nice values) set -+ through setpriority(2) are mapped to the CPU controller -+ "cpu.shares" values. However, how exactly the two ranges map and -+ even the fact that they map to each other at all are not obvious. -+ -+ The situation gets further muddled when considering other resource -+ types and control knobs. IO priorities set through ioprio_set(2) -+ cannot be mapped to IO controller weights and most cgroup resource -+ control knobs including the bandwidth control knobs of the CPU -+ controller don't have counterparts in the terminal consumers. -+ -+ - Anonymous Resource Consumption -+ -+ For CPU, every time slice consumed from inside a cgroup, which -+ comprises most but not all of consumed CPU time for the cgroup, -+ can be clearly attributed to a specific task or process. Because -+ these two types of entities are directly comparable as consumers -+ of CPU time, it's theoretically possible to mix tasks and cgroups -+ on the same tree levels and let them directly compete for the time -+ quota available to their common ancestor. -+ -+ However, the same can't be said for resource types like memory or -+ IO: the memory consumed by the page cache, for example, can be -+ tracked on a per-cgroup level, but due to mismatches in lifetimes -+ of involved objects (page cache can persist long after processes -+ are gone), shared usages and the implementation overhead of -+ tracking persistent state, it can no longer be attributed to -+ individual processes after instantiation. Consequently, any IO -+ incurred by page cache writeback can be attributed to a cgroup, -+ but not to the individual consumers inside the cgroup. -+ -+ For memory and IO, this makes a resource domain (cgroup) an object -+ of a fundamentally different type than a terminal consumer -+ (process). A process can't be a first class object in the resource -+ distribution graph as its total resource consumption can't be -+ described without the containing resource domain. -+ -+ Disallowing processes in internal cgroups avoids competition between -+ cgroups and processes which cannot be meaningfully defined for these -+ resources. All resource control takes place among cgroups and a -+ terminal consumer interacts with the containing cgroup the same way -+ it would with the system without cgroup. -+ -+ Root cgroup is exempt from this constraint, which is in line with -+ how root cgroup is handled in general - it's excluded from cgroup -+ resource accounting and control. -+ -+ -+Enforcing process granularity and no internal process constraint -+allows all controllers to be on the same footing in terms of resource -+distribution hierarchy. -+ -+ -+2-2. Impact on CPU Controller -+ -+As indicated earlier, the CPU controller's resource distribution graph -+is the simplest. Every schedulable resource consumption can be -+attributed to a specific task. In addition, for weight based control, -+the per-task priority set through setpriority(2) can be translated to -+and from a per-cgroup weight. As such, the CPU controller can treat a -+task and a cgroup symmetrically, allowing support for any tree layout -+of cgroups and tasks. Both process granularity and the no internal -+process constraint restrict how the CPU controller can be used. -+ -+ -+ 2-2-1. Impact of Process Granularity -+ -+ Process granularity prevents tasks belonging to the same process to -+ be assigned to different cgroups. It was pointed out [6] that this -+ excludes the valid use case of hierarchical CPU distribution within -+ processes. -+ -+ To address this issue, the rgroup (resource group) [7][8][9] -+ interface, an extension of the existing setpriority(2) API, was -+ proposed, which is in line with other programmable priority -+ mechanisms and eliminates the risk of in-application configuration -+ and system configuration stepping on each other's toes. -+ Unfortunately, the proposal quickly turned into discussions around -+ cgroup v2 design decisions [4] and no consensus could be reached. -+ -+ -+ 2-2-2. Impact of No Internal Process Constraint -+ -+ The no internal process constraint disallows tasks from competing -+ directly against cgroups. Here is an excerpt from Peter Zijlstra -+ pointing out the issue [10] - R, L and A are cgroups; t1, t2, t3 and -+ t4 are tasks: -+ -+ -+ R -+ / | \ -+ t1 t2 A -+ / \ -+ t3 t4 -+ -+ -+ Is fundamentally different from: -+ -+ -+ R -+ / \ -+ L A -+ / \ / \ -+ t1 t2 t3 t4 -+ -+ -+ Because if in the first hierarchy you add a task (t5) to R, all of -+ its A will run at 1/4th of total bandwidth where before it had -+ 1/3rd, whereas with the second example, if you add our t5 to L, A -+ doesn't get any less bandwidth. -+ -+ -+ It is true that the trees are semantically different from each other -+ and the symmetric handling of tasks and cgroups is aesthetically -+ pleasing. However, it isn't clear what the practical usefulness of -+ a layout with direct competition between tasks and cgroups would be, -+ considering that number and behavior of tasks are controlled by each -+ application, and cgroups primarily deal with system level resource -+ distribution; changes in the number of active threads would directly -+ impact resource distribution. Real world use cases of such layouts -+ could not be established during the discussions. -+ -+ -+2-3. Arguments for cgroup v2 -+ -+There are strong demands for comprehensive hierarchical resource -+control across all major resources, and establishing a common resource -+hierarchy is an essential step. As with most engineering decisions, -+common resource hierarchy definition comes with its trade-offs. With -+cgroup v2, the trade-offs are in the form of structural constraints -+which, among others, restrict the CPU controller's space of possible -+configurations. -+ -+However, even with the restrictions, cgroup v2, in combination with -+rgroup, covers most of identified real world use cases while enabling -+new important use cases of resource control across multiple resource -+types that were fundamentally broken previously. -+ -+Furthermore, for resource control, treating resource domains as -+objects of a different type from terminal consumers has important -+advantages - it can account for resource consumptions which are not -+tied to any specific terminal consumer, be that a task or process, and -+allows decoupling resource distribution controls from in-application -+APIs. Even the CPU controller may benefit from it as the kernel can -+consume significant amount of CPU cycles in interrupt context or tasks -+shared across multiple resource domains (e.g. softirq). -+ -+Finally, it's important to note that enabling cgroup v2 support for -+the CPU controller doesn't block use cases which require the features -+which are not available on cgroup v2. Unlikely, but should anybody -+actually rely on the CPU controller's symmetric handling of tasks and -+cgroups, backward compatibility is and will be maintained by being -+able to disconnect the controller from the cgroup v2 hierarchy and use -+it standalone. This also holds for cpuset which is often used in -+highly customized configurations which might be a poor fit for common -+resource domains. -+ -+The required changes are minimal, the benefits for the target use -+cases are critical and obvious, and use cases which have to use v1 can -+continue to do so. -+ -+ -+3. Way Forward -+ -+cgroup v2 primarily aims to solve the problem of comprehensive -+hierarchical resource control across all major computing resources, -+which is one of the core problems of modern server infrastructure -+engineering. The trade-offs that cgroup v2 took are results of -+pursuing that goal and gaining a better understanding of the nature of -+resource control in the process. -+ -+I believe that real world usages will prove cgroup v2's model right, -+considering the crucial pieces of comprehensive resource control that -+cannot be implemented without common resource domains. This is not to -+say that cgroup v2 is fixed in stone and can't be updated; if there is -+an approach which better serves both comprehensive resource control -+and the CPU controller's flexibility, we will surely move towards -+that. It goes without saying that discussions around such approach -+should consider practical aspects of resource control as a whole -+rather than absolutely focusing on a particular controller. -+ -+Until such consensus can be reached, the CPU controller cgroup v2 -+support will be maintained out of the mainline kernel in an easily -+accessible form. If there is anything cgroup developers can do to -+ease the pain, please feel free to contact us on the cgroup mailing -+list at cgroups@vger.kernel.org. -+ -+ -+4. References -+ -+[1] http://lkml.kernel.org/r/20160105164834.GE5995@mtj.duckdns.org -+ [PATCH 1/2] sched: Misc preps for cgroup unified hierarchy interface -+ Tejun Heo <tj@kernel.org> -+ -+[2] http://lkml.kernel.org/r/20160105164852.GF5995@mtj.duckdns.org -+ [PATCH 2/2] sched: Implement interface for cgroup unified hierarchy -+ Tejun Heo <tj@kernel.org> -+ -+[3] http://lkml.kernel.org/r/1438641689-14655-4-git-send-email-tj@kernel.org -+ [PATCH 3/3] sched: Implement interface for cgroup unified hierarchy -+ Tejun Heo <tj@kernel.org> -+ -+[4] http://lkml.kernel.org/r/20160407064549.GH3430@twins.programming.kicks-ass.net -+ Re: [PATCHSET RFC cgroup/for-4.6] cgroup, sched: implement resource group and PRIO_RGRP -+ Peter Zijlstra <peterz@infradead.org> -+ -+[5] https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/cgroup-v2.txt -+ Control Group v2 -+ Tejun Heo <tj@kernel.org> -+ -+[6] http://lkml.kernel.org/r/CAPM31RJNy3jgG=DYe6GO=wyL4BPPxwUm1f2S6YXacQmo7viFZA@mail.gmail.com -+ Re: [PATCH 3/3] sched: Implement interface for cgroup unified hierarchy -+ Paul Turner <pjt@google.com> -+ -+[7] http://lkml.kernel.org/r/20160105154503.GC5995@mtj.duckdns.org -+ [RFD] cgroup: thread granularity support for cpu controller -+ Tejun Heo <tj@kernel.org> -+ -+[8] http://lkml.kernel.org/r/1457710888-31182-1-git-send-email-tj@kernel.org -+ [PATCHSET RFC cgroup/for-4.6] cgroup, sched: implement resource group and PRIO_RGRP -+ Tejun Heo <tj@kernel.org> -+ -+[9] http://lkml.kernel.org/r/20160311160522.GA24046@htj.duckdns.org -+ Example program for PRIO_RGRP -+ Tejun Heo <tj@kernel.org> -+ -+[10] http://lkml.kernel.org/r/20160407082810.GN3430@twins.programming.kicks-ass.net -+ Re: [PATCHSET RFC cgroup/for-4.6] cgroup, sched: implement resource -+ Peter Zijlstra <peterz@infradead.org> diff --git a/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/README.md b/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/README.md deleted file mode 100644 index 6d48fde6bee..00000000000 --- a/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/README.md +++ /dev/null @@ -1,21 +0,0 @@ -Patches for CPU Controller on Control Group v2 -=============================================== - -See Tejun Heo's [explanation][1] for why these patches are currently -out-of-tree. - -Generating the patches ------------------------ - -In a linux checkout, with remote tc-cgroup pointing to -git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git, your -nixpkgs checkout in the same directory as your linux checkout (or -modify the command accordingly), and setting `ver` to the appropriate -version: - -```shell -$ ver=4.7 -$ git log --reverse --patch v$ver..remotes/tc-cgroup/cgroup-v2-cpu-v$ver > ../nixpkgs/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/$ver.patch -``` - -[1]: https://git.kernel.org/cgit/linux/kernel/git/tj/cgroup.git/tree/Documentation/cgroup-v2-cpu.txt?h=cgroup-v2-cpu diff --git a/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/default.nix b/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/default.nix deleted file mode 100644 index 5bef5633aa0..00000000000 --- a/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/default.nix +++ /dev/null @@ -1,11 +0,0 @@ -let - ents = builtins.readDir ./.; -in builtins.listToAttrs (builtins.filter (x: x != null) (map (name: let - match = builtins.match "(.*)\\.patch" name; -in if match == null then null else { - name = builtins.head match; - value = { - name = "cpu-cgroup-v2-${name}"; - patch = ./. + "/${name}"; - }; -}) (builtins.attrNames ents))) diff --git a/pkgs/os-specific/linux/kernel/gen-kheaders-metadata.patch b/pkgs/os-specific/linux/kernel/gen-kheaders-metadata.patch deleted file mode 100644 index 0639f8b4e8f..00000000000 --- a/pkgs/os-specific/linux/kernel/gen-kheaders-metadata.patch +++ /dev/null @@ -1,86 +0,0 @@ -From 2cc99c9cdc8fde5e92e34f9655829449cebd3e00 Mon Sep 17 00:00:00 2001 -From: Dmitry Goldin <dgoldin+lkml@protonmail.ch> -Date: Fri, 4 Oct 2019 10:40:07 +0000 -Subject: kheaders: make headers archive reproducible - -In commit 43d8ce9d65a5 ("Provide in-kernel headers to make -extending kernel easier") a new mechanism was introduced, for kernels ->=5.2, which embeds the kernel headers in the kernel image or a module -and exposes them in procfs for use by userland tools. - -The archive containing the header files has nondeterminism caused by -header files metadata. This patch normalizes the metadata and utilizes -KBUILD_BUILD_TIMESTAMP if provided and otherwise falls back to the -default behaviour. - -In commit f7b101d33046 ("kheaders: Move from proc to sysfs") it was -modified to use sysfs and the script for generation of the archive was -renamed to what is being patched. - -Signed-off-by: Dmitry Goldin <dgoldin+lkml@protonmail.ch> -Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> -Reviewed-by: Joel Fernandes (Google) <joel@joelfernandes.org> -Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com> - ---- - -nixos note: This patch is from -https://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild.git/commit/?h=fixes&id=2cc99c9cdc8fde5e92e34f9655829449cebd3e00 -I commented out the documentation part here, so that it easily applies -to linux 5.2 and 5.3, which does not ship with the reproducible build -documentation yet, which only was introduced recently. - ---- - Documentation/kbuild/reproducible-builds.rst | 13 +++++++++---- - kernel/gen_kheaders.sh | 5 ++++- - 2 files changed, 13 insertions(+), 5 deletions(-) - -#diff --git a/Documentation/kbuild/reproducible-builds.rst b/Documentation/kbuild/reproducible-builds.rst -#index ab92e98c89c8..503393854e2e 100644 -# --- a/Documentation/kbuild/reproducible-builds.rst -#+++ b/Documentation/kbuild/reproducible-builds.rst -#@@ -16,16 +16,21 @@ the kernel may be unreproducible, and how to avoid them. -# Timestamps -# ---------- -# -#-The kernel embeds a timestamp in two places: -#+The kernel embeds timestamps in three places: -# -# * The version string exposed by ``uname()`` and included in -# ``/proc/version`` -# -# * File timestamps in the embedded initramfs -# -#-By default the timestamp is the current time. This must be overridden -#-using the `KBUILD_BUILD_TIMESTAMP`_ variable. If you are building -#-from a git commit, you could use its commit date. -#+* If enabled via ``CONFIG_IKHEADERS``, file timestamps of kernel -#+ headers embedded in the kernel or respective module, -#+ exposed via ``/sys/kernel/kheaders.tar.xz`` -#+ -#+By default the timestamp is the current time and in the case of -#+``kheaders`` the various files' modification times. This must -#+be overridden using the `KBUILD_BUILD_TIMESTAMP`_ variable. -#+If you are building from a git commit, you could use its commit date. -# -# The kernel does *not* use the ``__DATE__`` and ``__TIME__`` macros, -# and enables warnings if they are used. If you incorporate external -diff --git a/kernel/gen_kheaders.sh b/kernel/gen_kheaders.sh -index 9ff449888d9c..aff79e461fc9 100755 ---- a/kernel/gen_kheaders.sh -+++ b/kernel/gen_kheaders.sh -@@ -71,7 +71,10 @@ done | cpio --quiet -pd $cpio_dir >/dev/null 2>&1 - find $cpio_dir -type f -print0 | - xargs -0 -P8 -n1 perl -pi -e 'BEGIN {undef $/;}; s/\/\*((?!SPDX).)*?\*\///smg;' - --tar -Jcf $tarfile -C $cpio_dir/ . > /dev/null -+# Create archive and try to normalize metadata for reproducibility -+tar "${KBUILD_BUILD_TIMESTAMP:+--mtime=$KBUILD_BUILD_TIMESTAMP}" \ -+ --owner=0 --group=0 --sort=name --numeric-owner \ -+ -Jcf $tarfile -C $cpio_dir/ . > /dev/null - - echo "$src_files_md5" > kernel/kheaders.md5 - echo "$obj_files_md5" >> kernel/kheaders.md5 --- -cgit 1.2-0.3.lf.el7 - diff --git a/pkgs/os-specific/linux/kernel/generate-config.pl b/pkgs/os-specific/linux/kernel/generate-config.pl index df807188f14..7e12ca5d96a 100644 --- a/pkgs/os-specific/linux/kernel/generate-config.pl +++ b/pkgs/os-specific/linux/kernel/generate-config.pl @@ -81,7 +81,7 @@ sub runConfig { my $question = $1; my $name = $2; my $alts = $3; my $answer = ""; # Build everything as a module if possible. - $answer = "m" if $autoModules && $alts =~ /\/m/ && !($preferBuiltin && $alts =~ /Y/); + $answer = "m" if $autoModules && $alts =~ qr{\A(\w/)+m/(\w/)*\?\z} && !($preferBuiltin && $alts =~ /Y/); $answer = $answers{$name} if defined $answers{$name}; print STDERR "QUESTION: $question, NAME: $name, ALTS: $alts, ANSWER: $answer\n" if $debug; print OUT "$answer\n"; diff --git a/pkgs/os-specific/linux/kernel/generic.nix b/pkgs/os-specific/linux/kernel/generic.nix index 7f4f0f2d6bb..df67005dd81 100644 --- a/pkgs/os-specific/linux/kernel/generic.nix +++ b/pkgs/os-specific/linux/kernel/generic.nix @@ -6,6 +6,7 @@ , gmp ? null , libmpc ? null , mpfr ? null +, pahole , lib , stdenv @@ -24,11 +25,16 @@ # Additional make flags passed to kbuild , extraMakeFlags ? [] +, # enables the options in ./common-config.nix; if `false` then only + # `structuredExtraConfig` is used + enableCommonConfig ? true + , # kernel intermediate config overrides, as a set structuredExtraConfig ? {} , # The version number used for the module directory - modDirVersion ? version + # If unspecified, this is determined automatically from the version. + modDirVersion ? null , # An attribute set whose attributes express the availability of # certain features in this kernel. E.g. `{iwlwifi = true;}' @@ -45,8 +51,7 @@ # symbolic name and `patch' is the actual patch. The patch may # optionally be compressed with gzip or bzip2. kernelPatches ? [] -, ignoreConfigErrors ? stdenv.hostPlatform.linux-kernel.name != "pc" || - stdenv.hostPlatform != stdenv.buildPlatform +, ignoreConfigErrors ? stdenv.hostPlatform.linux-kernel.name != "pc" , extraMeta ? {} , isZen ? false @@ -58,8 +63,9 @@ , preferBuiltin ? stdenv.hostPlatform.linux-kernel.preferBuiltin or false , kernelArch ? stdenv.hostPlatform.linuxArch , kernelTests ? [] +, nixosTests , ... -}: +}@args: # Note: this package is used for bootstrapping fetchurl, and thus # cannot use fetchpatch! All mutable patches (generated by GitHub or @@ -69,6 +75,18 @@ assert stdenv.isLinux; let + # Dirty hack to make sure that `version` & `src` have + # `<nixpkgs/pkgs/os-specific/linux/kernel/linux-x.y.nix>` as position + # when using `builtins.unsafeGetAttrPos`. + # + # This is to make sure that ofborg actually detects changes in the kernel derivation + # and pings all maintainers. + # + # For further context, see https://github.com/NixOS/nixpkgs/pull/143113#issuecomment-953319957 + basicArgs = builtins.removeAttrs + args + (lib.filter (x: ! (builtins.elem x [ "version" "src" ])) (lib.attrNames args)); + # Combine the `features' attribute sets of all the kernel patches. kernelFeatures = lib.foldr (x: y: (x.features or {}) // y) ({ iwlwifi = true; @@ -111,18 +129,17 @@ let depsBuildBuild = [ buildPackages.stdenv.cc ]; nativeBuildInputs = [ perl gmp libmpc mpfr ] - ++ lib.optionals (lib.versionAtLeast version "4.16") [ bison flex ]; + ++ lib.optionals (lib.versionAtLeast version "4.16") [ bison flex ] + ++ lib.optional (lib.versionAtLeast version "5.2") pahole; platformName = stdenv.hostPlatform.linux-kernel.name; # e.g. "defconfig" kernelBaseConfig = if defconfig != null then defconfig else stdenv.hostPlatform.linux-kernel.baseConfig; - # e.g. "bzImage" - kernelTarget = stdenv.hostPlatform.linux-kernel.target; makeFlags = lib.optionals (stdenv.hostPlatform.linux-kernel ? makeFlags) stdenv.hostPlatform.linux-kernel.makeFlags ++ extraMakeFlags; - prePatch = kernel.prePatch + '' + postPatch = kernel.postPatch + '' # Patch kconfig to print "###" after every question so that # generate-config.pl from the generic builder can answer them. sed -e '/fflush(stdout);/i\printf("###");' -i scripts/kconfig/conf.c @@ -168,7 +185,9 @@ let moduleStructuredConfig = (lib.evalModules { modules = [ module + ] ++ lib.optionals enableCommonConfig [ { settings = commonStructuredConfig; _file = "pkgs/os-specific/linux/kernel/common-config.nix"; } + ] ++ [ { settings = structuredExtraConfig; _file = "structuredExtraConfig"; } ] ++ structuredConfigFromPatches @@ -179,20 +198,37 @@ let }; }; # end of configfile derivation - kernel = (callPackage ./manual-config.nix { inherit buildPackages; }) { - inherit version modDirVersion src kernelPatches randstructSeed lib stdenv extraMakeFlags extraMeta configfile; + kernel = (callPackage ./manual-config.nix { inherit lib stdenv buildPackages; }) (basicArgs // { + inherit kernelPatches randstructSeed extraMakeFlags extraMeta configfile; + pos = builtins.unsafeGetAttrPos "version" args; config = { CONFIG_MODULES = "y"; CONFIG_FW_LOADER = "m"; }; - }; + } // lib.optionalAttrs (modDirVersion != null) { inherit modDirVersion; }); - passthru = { + passthru = basicArgs // { features = kernelFeatures; - inherit commonStructuredConfig structuredExtraConfig extraMakeFlags isZen isHardened isLibre modDirVersion; + inherit commonStructuredConfig structuredExtraConfig extraMakeFlags isZen isHardened isLibre; isXen = lib.warn "The isXen attribute is deprecated. All Nixpkgs kernels that support it now have Xen enabled." true; - kernelOlder = lib.versionOlder version; - kernelAtLeast = lib.versionAtLeast version; + + # Adds dependencies needed to edit the config: + # nix-shell '<nixpkgs>' -A linux.configEnv --command 'make nconfig' + configEnv = kernel.overrideAttrs (old: { + nativeBuildInputs = old.nativeBuildInputs or [] ++ (with buildPackages; [ + pkg-config ncurses + ]); + }); + passthru = kernel.passthru // (removeAttrs passthru [ "passthru" ]); - tests = kernelTests; + tests = let + overridableKernel = finalKernel // { + override = args: + lib.warn ( + "override is stubbed for NixOS kernel tests, not applying changes these arguments: " + + toString (lib.attrNames (if lib.isAttrs args then args else args {})) + ) overridableKernel; + }; + in [ (nixosTests.kernel-generic.passthru.testsForKernel overridableKernel) ] ++ kernelTests; }; -in lib.extendDerivation true passthru kernel + finalKernel = lib.extendDerivation true passthru kernel; +in finalKernel diff --git a/pkgs/os-specific/linux/kernel/genksyms-fix-segfault.patch b/pkgs/os-specific/linux/kernel/genksyms-fix-segfault.patch deleted file mode 100644 index 47ae77a5a54..00000000000 --- a/pkgs/os-specific/linux/kernel/genksyms-fix-segfault.patch +++ /dev/null @@ -1,19 +0,0 @@ -diff --git a/scripts/genksyms/genksyms.c b/scripts/genksyms/genksyms.c -index 88632df..ba6cfa9 100644 ---- a/scripts/genksyms/genksyms.c -+++ b/scripts/genksyms/genksyms.c -@@ -233,11 +233,11 @@ static struct symbol *__add_symbol(const char *name, enum symbol_type type, - free_list(last_enum_expr, NULL); - last_enum_expr = NULL; - enum_counter = 0; -- if (!name) -- /* Anonymous enum definition, nothing more to do */ -- return NULL; - } - -+ if (!name) -+ return NULL; -+ - h = crc32(name) % HASH_BUCKETS; - for (sym = symtab[h]; sym; sym = sym->hash_next) { - if (map_to_ns(sym->type) == map_to_ns(type) && diff --git a/pkgs/os-specific/linux/kernel/gpio-utils.nix b/pkgs/os-specific/linux/kernel/gpio-utils.nix index e28d838efb3..40e282bbf54 100644 --- a/pkgs/os-specific/linux/kernel/gpio-utils.nix +++ b/pkgs/os-specific/linux/kernel/gpio-utils.nix @@ -2,10 +2,9 @@ with lib; -assert versionAtLeast linux.version "4.6"; - stdenv.mkDerivation { - name = "gpio-utils-${linux.version}"; + pname = "gpio-utils"; + version = linux.version; inherit (linux) src makeFlags; diff --git a/pkgs/os-specific/linux/kernel/hardened/config.nix b/pkgs/os-specific/linux/kernel/hardened/config.nix index 20f9f5aaa14..92192eb79f8 100644 --- a/pkgs/os-specific/linux/kernel/hardened/config.nix +++ b/pkgs/os-specific/linux/kernel/hardened/config.nix @@ -8,7 +8,7 @@ # # See also <nixos/modules/profiles/hardened.nix> -{ lib, version }: +{ stdenv, lib, version }: with lib; with lib.kernel; @@ -20,10 +20,6 @@ assert (versionAtLeast version "4.9"); # Report BUG() conditions and kill the offending process. BUG = yes; - # Safer page access permissions (wrt. code injection). Default on >=4.11. - DEBUG_RODATA = whenOlder "4.11" yes; - DEBUG_SET_MODULE_RONX = whenOlder "4.11" yes; - # Mark LSM hooks read-only after init. SECURITY_WRITABLE_HOOKS n # conflicts with SECURITY_SELINUX_DISABLE y; disabling the latter # implicitly marks LSM hooks read-only after init. @@ -32,10 +28,10 @@ assert (versionAtLeast version "4.9"); # # We set SECURITY_WRITABLE_HOOKS n primarily for documentation purposes; the # config builder fails to detect that it has indeed been unset. - SECURITY_SELINUX_DISABLE = whenAtLeast "4.12" no; - SECURITY_WRITABLE_HOOKS = whenAtLeast "4.12" (option no); + SECURITY_SELINUX_DISABLE = whenOlder "6.4" no; # On 6.4: error: unused option: SECURITY_SELINUX_DISABLE + SECURITY_WRITABLE_HOOKS = option no; - STRICT_KERNEL_RWX = whenAtLeast "4.11" yes; + STRICT_KERNEL_RWX = yes; # Perform additional validation of commonly targeted structures. DEBUG_CREDENTIALS = yes; @@ -45,7 +41,7 @@ assert (versionAtLeast version "4.9"); DEBUG_SG = yes; SCHED_STACK_END_CHECK = yes; - REFCOUNT_FULL = whenBetween "4.13" "5.5" yes; + REFCOUNT_FULL = whenOlder "5.4.208" yes; # Randomize page allocator when page_alloc.shuffle=1 SHUFFLE_PAGE_ALLOCATOR = whenAtLeast "5.2" yes; @@ -68,11 +64,15 @@ assert (versionAtLeast version "4.9"); # Gather additional entropy at boot time for systems that may not have appropriate entropy sources. GCC_PLUGIN_LATENT_ENTROPY = yes; - GCC_PLUGIN_STRUCTLEAK = whenAtLeast "4.11" yes; # A port of the PaX structleak plugin - GCC_PLUGIN_STRUCTLEAK_BYREF_ALL = whenAtLeast "4.14" yes; # Also cover structs passed by address + GCC_PLUGIN_STRUCTLEAK = option yes; # A port of the PaX structleak plugin + GCC_PLUGIN_STRUCTLEAK_BYREF_ALL = option yes; # Also cover structs passed by address GCC_PLUGIN_STACKLEAK = whenAtLeast "4.20" yes; # A port of the PaX stackleak plugin - GCC_PLUGIN_RANDSTRUCT = whenAtLeast "4.13" yes; # A port of the PaX randstruct plugin - GCC_PLUGIN_RANDSTRUCT_PERFORMANCE = whenAtLeast "4.13" yes; + GCC_PLUGIN_RANDSTRUCT = whenOlder "5.19" yes; # A port of the PaX randstruct plugin + GCC_PLUGIN_RANDSTRUCT_PERFORMANCE = whenOlder "5.19" yes; + + # Same as GCC_PLUGIN_RANDSTRUCT*, but has been renamed to `RANDSTRUCT*` in 5.19. + RANDSTRUCT = whenAtLeast "5.19" yes; + RANDSTRUCT_PERFORMANCE = whenAtLeast "5.19" yes; # Disable various dangerous settings ACPI_CUSTOM_METHOD = no; # Allows writing directly to physical memory @@ -88,9 +88,13 @@ assert (versionAtLeast version "4.9"); INET_MPTCP_DIAG = option no; # Use -fstack-protector-strong (gcc 4.9+) for best stack canary coverage. - CC_STACKPROTECTOR_REGULAR = whenOlder "4.18" no; + CC_STACKPROTECTOR_REGULAR = lib.mkForce (whenOlder "4.18" no); CC_STACKPROTECTOR_STRONG = whenOlder "4.18" yes; # Detect out-of-bound reads/writes and use-after-free KFENCE = whenAtLeast "5.12" yes; + + # CONFIG_DEVMEM=n causes these to not exist anymore. + STRICT_DEVMEM = option no; + IO_STRICT_DEVMEM = option no; } diff --git a/pkgs/os-specific/linux/kernel/hardened/patches.json b/pkgs/os-specific/linux/kernel/hardened/patches.json index 412e5041500..d8f8bb2fa73 100644 --- a/pkgs/os-specific/linux/kernel/hardened/patches.json +++ b/pkgs/os-specific/linux/kernel/hardened/patches.json @@ -1,32 +1,82 @@ { "4.14": { - "extra": "-hardened1", - "name": "linux-hardened-4.14.240-hardened1.patch", - "sha256": "0j5zp0f8s4w3f60yam2spg3bx56bdjvv0mh632zlhchz8rdk5zs4", - "url": "https://github.com/anthraxx/linux-hardened/releases/download/4.14.240-hardened1/linux-hardened-4.14.240-hardened1.patch" + "patch": { + "extra": "-hardened1", + "name": "linux-hardened-4.14.328-hardened1.patch", + "sha256": "1qq2l4nwhxgl4drx6isc1ly892kffjq4hqb4zadqs6sxvsdm7x57", + "url": "https://github.com/anthraxx/linux-hardened/releases/download/4.14.328-hardened1/linux-hardened-4.14.328-hardened1.patch" + }, + "sha256": "1igcpvnhwwrczfdsafmszvi0456k7f6j4cgpfw6v6afw09p95d8x", + "version": "4.14.328" }, "4.19": { - "extra": "-hardened1", - "name": "linux-hardened-4.19.198-hardened1.patch", - "sha256": "18c5j00xiwc0xn5klcrwazk6wvjiy3cixbfbrw4xj7zal9r5p6q9", - "url": "https://github.com/anthraxx/linux-hardened/releases/download/4.19.198-hardened1/linux-hardened-4.19.198-hardened1.patch" + "patch": { + "extra": "-hardened1", + "name": "linux-hardened-4.19.297-hardened1.patch", + "sha256": "1qj09bynl7ml880xpc2956jn0b1gmm77yf3jc45v3jq3610jhna4", + "url": "https://github.com/anthraxx/linux-hardened/releases/download/4.19.297-hardened1/linux-hardened-4.19.297-hardened1.patch" + }, + "sha256": "0c9xxqgv2i36hrr06dwz7f3idc04xpv0a5pxg08xdh03cnyf12cx", + "version": "4.19.297" }, "5.10": { - "extra": "-hardened1", - "name": "linux-hardened-5.10.52-hardened1.patch", - "sha256": "062a32rb1g5xk1npiz9fa114k7g4x9pmygycn3alc0phngjmvr98", - "url": "https://github.com/anthraxx/linux-hardened/releases/download/5.10.52-hardened1/linux-hardened-5.10.52-hardened1.patch" + "patch": { + "extra": "-hardened1", + "name": "linux-hardened-5.10.199-hardened1.patch", + "sha256": "10vwd5wygfnxpbz15bq56pjygba3vqqal0d7xry2bch4p444pp5f", + "url": "https://github.com/anthraxx/linux-hardened/releases/download/5.10.199-hardened1/linux-hardened-5.10.199-hardened1.patch" + }, + "sha256": "1h944syk7n6c4j1djlx19n77alzwbxcdza77c9ykicgfynhpgsm0", + "version": "5.10.199" }, - "5.12": { - "extra": "-hardened1", - "name": "linux-hardened-5.12.19-hardened1.patch", - "sha256": "1nr3922gd6il69k5cpp9g3knpy6yjb6jsmpi9k4v02bkvypg86dc", - "url": "https://github.com/anthraxx/linux-hardened/releases/download/5.12.19-hardened1/linux-hardened-5.12.19-hardened1.patch" + "5.15": { + "patch": { + "extra": "-hardened1", + "name": "linux-hardened-5.15.137-hardened1.patch", + "sha256": "19gs1w380qgvazwjwhxypizpfx71faa7hsji0x5cgyw6vxhi6l1b", + "url": "https://github.com/anthraxx/linux-hardened/releases/download/5.15.137-hardened1/linux-hardened-5.15.137-hardened1.patch" + }, + "sha256": "1xxjbxldrhmnh2q6rykpxyfbj8xqgl82q30n8sfavrzr14bb4jcp", + "version": "5.15.137" }, "5.4": { - "extra": "-hardened1", - "name": "linux-hardened-5.4.134-hardened1.patch", - "sha256": "0iay6dxwd1vqj02ljf0ghncrqpr6b0gby90xiza8kkk8wnh3r9hh", - "url": "https://github.com/anthraxx/linux-hardened/releases/download/5.4.134-hardened1/linux-hardened-5.4.134-hardened1.patch" + "patch": { + "extra": "-hardened1", + "name": "linux-hardened-5.4.259-hardened1.patch", + "sha256": "1w8ipflgisd127gmx6wyz8p5qfi8cfd2a5j2xgibspkf45nzfwi8", + "url": "https://github.com/anthraxx/linux-hardened/releases/download/5.4.259-hardened1/linux-hardened-5.4.259-hardened1.patch" + }, + "sha256": "195v4fidavzm637glj6580006mrcaygnbj4za874imb62bxf9rpz", + "version": "5.4.259" + }, + "6.1": { + "patch": { + "extra": "-hardened1", + "name": "linux-hardened-6.1.61-hardened1.patch", + "sha256": "0d9zhh32dx1q828q50kmznmsa6yinppbklhgg8ix7b7k23857ha6", + "url": "https://github.com/anthraxx/linux-hardened/releases/download/6.1.61-hardened1/linux-hardened-6.1.61-hardened1.patch" + }, + "sha256": "1kk4d7ph6pvgdrdmaklg15wf58nw9n7yqgkag7jdvqinzh99sb5d", + "version": "6.1.61" + }, + "6.4": { + "patch": { + "extra": "-hardened1", + "name": "linux-hardened-6.4.16-hardened1.patch", + "sha256": "10lydnnhhq9ynng1gfaqh1mncsb0dmr27zzcbygs1xigy2bl70n9", + "url": "https://github.com/anthraxx/linux-hardened/releases/download/6.4.16-hardened1/linux-hardened-6.4.16-hardened1.patch" + }, + "sha256": "0zgj1z97jyx7wf12zrnlcp0mj4cl43ais9qsy6dh1jwylf2fq9ln", + "version": "6.4.16" + }, + "6.5": { + "patch": { + "extra": "-hardened1", + "name": "linux-hardened-6.5.10-hardened1.patch", + "sha256": "0p2lj7ryiizr1sxvm2kgds3l8sg9fns35y2fcyqq61lg7ymzj1fi", + "url": "https://github.com/anthraxx/linux-hardened/releases/download/6.5.10-hardened1/linux-hardened-6.5.10-hardened1.patch" + }, + "sha256": "12sswml8jvabv6bqx35lg3jj6gq8jjk365rghjngdy5d0j34jpx1", + "version": "6.5.10" } } diff --git a/pkgs/os-specific/linux/kernel/hardened/update.py b/pkgs/os-specific/linux/kernel/hardened/update.py index e96ac9ca855..ce54c298075 100755 --- a/pkgs/os-specific/linux/kernel/hardened/update.py +++ b/pkgs/os-specific/linux/kernel/hardened/update.py @@ -1,5 +1,5 @@ #! /usr/bin/env nix-shell -#! nix-shell -i python -p "python38.withPackages (ps: [ps.PyGithub])" git gnupg +#! nix-shell -i python -p "python3.withPackages (ps: [ps.pygithub])" git gnupg # This is automatically called by ../update.sh. @@ -31,7 +31,12 @@ VersionComponent = Union[int, str] Version = List[VersionComponent] -Patch = TypedDict("Patch", {"name": str, "url": str, "sha256": str, "extra": str}) +PatchData = TypedDict("PatchData", {"name": str, "url": str, "sha256": str, "extra": str}) +Patch = TypedDict("Patch", { + "patch": PatchData, + "version": str, + "sha256": str, +}) @dataclass @@ -133,7 +138,15 @@ def fetch_patch(*, name: str, release_info: ReleaseInfo) -> Optional[Patch]: if not sig_ok: return None - return Patch(name=patch_filename, url=patch_url, sha256=sha256, extra=extra) + kernel_ver = re.sub(r"(.*)(-hardened[\d]+)$", r'\1', release_info.release.tag_name) + major = kernel_ver.split('.')[0] + sha256_kernel, _ = nix_prefetch_url(f"mirror://kernel/linux/kernel/v{major}.x/linux-{kernel_ver}.tar.xz") + + return Patch( + patch=PatchData(name=patch_filename, url=patch_url, sha256=sha256, extra=extra), + version=kernel_ver, + sha256=sha256_kernel + ) def parse_version(version_str: str) -> Version: @@ -180,21 +193,14 @@ with open(HARDENED_PATCHES_PATH) as patches_file: # Get the set of currently packaged kernel versions. kernel_versions = {} -for filename in os.listdir(NIXPKGS_KERNEL_PATH): - filename_match = re.fullmatch(r"linux-(\d+)\.(\d+)\.nix", filename) - if filename_match: - nix_version_expr = f""" - with import {NIXPKGS_PATH} {{}}; - (callPackage {NIXPKGS_KERNEL_PATH / filename} {{}}).version - """ - kernel_version_json = run( - "nix-instantiate", "--eval", "--json", "--expr", nix_version_expr, - ).stdout - kernel_version = parse_version(json.loads(kernel_version_json)) - if kernel_version < MIN_KERNEL_VERSION: - continue - kernel_key = major_kernel_version_key(kernel_version) - kernel_versions[kernel_key] = kernel_version +with open(NIXPKGS_KERNEL_PATH / "kernels-org.json") as kernel_versions_json: + kernel_versions = json.load(kernel_versions_json) + for kernel_branch_str in kernel_versions: + if kernel_branch_str == "testing": continue + kernel_branch = [int(i) for i in kernel_branch_str.split(".")] + if kernel_branch < MIN_KERNEL_VERSION: continue + kernel_version = [int(i) for i in kernel_versions[kernel_branch_str]["version"].split(".")] + kernel_versions[kernel_branch_str] = kernel_version # Remove patches for unpackaged kernel versions. for kernel_key in sorted(patches.keys() - kernel_versions.keys()): @@ -206,13 +212,26 @@ failures = False # Match each kernel version with the best patch version. releases = {} +i = 0 for release in repo.get_releases(): + # Dirty workaround to make sure that we don't run into issues because + # GitHub's API only allows fetching the last 1000 releases. + # It's not reliable to exit earlier because not every kernel minor may + # have hardened patches, hence the naive search below. + i += 1 + if i > 500: + break + version = parse_version(release.tag_name) # needs to look like e.g. 5.6.3-hardened1 if len(version) < 4: continue + if not (isinstance(version[-2], int)): + continue + kernel_version = version[:-1] + kernel_key = major_kernel_version_key(kernel_version) try: packaged_kernel_version = kernel_versions[kernel_key] @@ -226,7 +245,7 @@ for release in repo.get_releases(): else: # Fall back to the latest patch for this major kernel version, # skipping patches for kernels newer than the packaged one. - if kernel_version > packaged_kernel_version: + if '.'.join(str(x) for x in kernel_version) > '.'.join(str(x) for x in packaged_kernel_version): continue elif ( kernel_key not in releases or releases[kernel_key].version < version @@ -245,7 +264,7 @@ for kernel_key in sorted(releases.keys()): old_version_str: Optional[str] = None update: bool try: - old_filename = patches[kernel_key]["name"] + old_filename = patches[kernel_key]["patch"]["name"] old_version_str = old_filename.replace("linux-hardened-", "").replace( ".patch", "" ) diff --git a/pkgs/os-specific/linux/kernel/htmldocs.nix b/pkgs/os-specific/linux/kernel/htmldocs.nix new file mode 100644 index 00000000000..ba641347c83 --- /dev/null +++ b/pkgs/os-specific/linux/kernel/htmldocs.nix @@ -0,0 +1,56 @@ +{ lib +, stdenv +, graphviz +, imagemagick +, linux_latest +, makeFontsConf +, perl +, python3 +, sphinx +, which +}: + +stdenv.mkDerivation { + pname = "linux-kernel-latest-htmldocs"; + + inherit (linux_latest) version src; + + postPatch = '' + patchShebangs \ + Documentation/sphinx/parse-headers.pl \ + scripts/{get_abi.pl,get_feat.pl,kernel-doc,sphinx-pre-install} + ''; + + FONTCONFIG_FILE = makeFontsConf { + fontDirectories = [ ]; + }; + + nativeBuildInputs = [ + graphviz + imagemagick + perl + python3.pkgs.sphinx + python3.pkgs.sphinx-rtd-theme + which + ]; + + preBuild = '' + export XDG_CACHE_HOME="$(mktemp -d)" + ''; + + makeFlags = [ "htmldocs" ]; + + installPhase = '' + mkdir -p $out/share/doc + mv Documentation/output $out/share/doc/linux-doc + cp -r Documentation/* $out/share/doc/linux-doc/ + ''; + + meta = with lib; { + description = "Linux kernel html documentation"; + homepage = "https://www.kernel.org/doc/htmldocs/"; + platforms = platforms.linux; + inherit (linux_latest.meta) license; + maintainers = with maintainers; [ ]; + }; +} diff --git a/pkgs/os-specific/linux/kernel/kernels-org.json b/pkgs/os-specific/linux/kernel/kernels-org.json new file mode 100644 index 00000000000..94ab60aa67b --- /dev/null +++ b/pkgs/os-specific/linux/kernel/kernels-org.json @@ -0,0 +1,38 @@ +{ + "testing": { + "version": "6.7-rc1", + "hash": "sha256:1a071vvmm08sp48d0arqzcmqnz5xdb1vflfhxcqwmpzaabjrgadk" + }, + "6.5": { + "version": "6.5.11", + "hash": "sha256:06dmb4hbwrms0lp4axphwgj8wbnzsym70sx55lxr501b53wlmqif" + }, + "6.1": { + "version": "6.1.62", + "hash": "sha256:1v453q4sf0j8708ivs1zmdf645hgimqvxfc8xz7czgnnmipn3zdr" + }, + "5.15": { + "version": "5.15.138", + "hash": "sha256:1ajaxy97gx0c9cdxiyxa49ykfsykir22i9abfrcizh71ci0yb15g" + }, + "5.10": { + "version": "5.10.200", + "hash": "sha256:012i41bj8rcqn0vhfxrwq3gg82nb6pp2cwq8n146wj47pwgrcbcx" + }, + "5.4": { + "version": "5.4.260", + "hash": "sha256:1zpbaipd2j3idj8h9iznlj0ywcq5nkhwj707a1f9ixf82h3q4c4q" + }, + "4.19": { + "version": "4.19.298", + "hash": "sha256:0mhgq6hdcls1af7nj999x1mds5b37s7vwin8nsb4q0lnx2y1da4x" + }, + "4.14": { + "version": "4.14.329", + "hash": "sha256:1dvb4xf0b7snabznl7bg7gga7ffdmywy8vr8q65pzl9yf6fnhdny" + }, + "6.6": { + "version": "6.6.1", + "hash": "sha256:0d42b1hbvv9w3y3q4wydr6il0g5a823n54a06p4p5vcpgkadf7ns" + } +} diff --git a/pkgs/os-specific/linux/kernel/linux-4.14.nix b/pkgs/os-specific/linux/kernel/linux-4.14.nix deleted file mode 100644 index ccecc433a4a..00000000000 --- a/pkgs/os-specific/linux/kernel/linux-4.14.nix +++ /dev/null @@ -1,20 +0,0 @@ -{ lib, buildPackages, fetchurl, perl, buildLinux, nixosTests, modDirVersionArg ? null, ... } @ args: - -with lib; - -buildLinux (args // rec { - version = "4.14.240"; - - # modDirVersion needs to be x.y.z, will automatically add .0 if needed - modDirVersion = if (modDirVersionArg == null) then concatStringsSep "." (take 3 (splitVersion "${version}.0")) else modDirVersionArg; - - # branchVersion needs to be x.y - extraMeta.branch = versions.majorMinor version; - - src = fetchurl { - url = "mirror://kernel/linux/kernel/v4.x/linux-${version}.tar.xz"; - sha256 = "1k65qwzlnqnh9ym0n2fxpa8nk2qwvykwhwgaixk3b7ndzmr8b6c8"; - }; - - kernelTests = args.kernelTests or [ nixosTests.kernel-generic.linux_4_14 ]; -} // (args.argsOverride or {})) diff --git a/pkgs/os-specific/linux/kernel/linux-4.19.nix b/pkgs/os-specific/linux/kernel/linux-4.19.nix deleted file mode 100644 index 4ed06ee2205..00000000000 --- a/pkgs/os-specific/linux/kernel/linux-4.19.nix +++ /dev/null @@ -1,20 +0,0 @@ -{ lib, buildPackages, fetchurl, perl, buildLinux, nixosTests, modDirVersionArg ? null, ... } @ args: - -with lib; - -buildLinux (args // rec { - version = "4.19.198"; - - # modDirVersion needs to be x.y.z, will automatically add .0 if needed - modDirVersion = if (modDirVersionArg == null) then concatStringsSep "." (take 3 (splitVersion "${version}.0")) else modDirVersionArg; - - # branchVersion needs to be x.y - extraMeta.branch = versions.majorMinor version; - - src = fetchurl { - url = "mirror://kernel/linux/kernel/v4.x/linux-${version}.tar.xz"; - sha256 = "13k0r6a4n8nbni64a18wqzy0pg4vn1zw2li78xrm78rqcrnah85y"; - }; - - kernelTests = args.kernelTests or [ nixosTests.kernel-generic.linux_4_19 ]; -} // (args.argsOverride or {})) diff --git a/pkgs/os-specific/linux/kernel/linux-4.4.nix b/pkgs/os-specific/linux/kernel/linux-4.4.nix deleted file mode 100644 index 6c2595386e0..00000000000 --- a/pkgs/os-specific/linux/kernel/linux-4.4.nix +++ /dev/null @@ -1,14 +0,0 @@ -{ buildPackages, fetchurl, perl, buildLinux, nixosTests, stdenv, ... } @ args: - -buildLinux (args // rec { - version = "4.4.276"; - extraMeta.branch = "4.4"; - extraMeta.broken = stdenv.isAarch64; - - src = fetchurl { - url = "mirror://kernel/linux/kernel/v4.x/linux-${version}.tar.xz"; - sha256 = "1hf9h5kr1ws2lvinzq6cv7aps8af1kx4q8j4bsk2vv4i2zvmfr7y"; - }; - - kernelTests = args.kernelTests or [ nixosTests.kernel-generic.linux_4_4 ]; -} // (args.argsOverride or {})) diff --git a/pkgs/os-specific/linux/kernel/linux-4.9.nix b/pkgs/os-specific/linux/kernel/linux-4.9.nix deleted file mode 100644 index 0dc5cfeae6e..00000000000 --- a/pkgs/os-specific/linux/kernel/linux-4.9.nix +++ /dev/null @@ -1,14 +0,0 @@ -{ buildPackages, fetchurl, perl, buildLinux, nixosTests, stdenv, ... } @ args: - -buildLinux (args // rec { - version = "4.9.276"; - extraMeta.branch = "4.9"; - extraMeta.broken = stdenv.isAarch64; - - src = fetchurl { - url = "mirror://kernel/linux/kernel/v4.x/linux-${version}.tar.xz"; - sha256 = "16jp05jhmqcp8lawqga69gxn1acdkxsskn3a6wf0635863fky3hv"; - }; - - kernelTests = args.kernelTests or [ nixosTests.kernel-generic.linux_4_9 ]; -} // (args.argsOverride or {})) diff --git a/pkgs/os-specific/linux/kernel/linux-5.10.nix b/pkgs/os-specific/linux/kernel/linux-5.10.nix deleted file mode 100644 index f59cca3e12f..00000000000 --- a/pkgs/os-specific/linux/kernel/linux-5.10.nix +++ /dev/null @@ -1,20 +0,0 @@ -{ lib, buildPackages, fetchurl, perl, buildLinux, nixosTests, modDirVersionArg ? null, ... } @ args: - -with lib; - -buildLinux (args // rec { - version = "5.10.52"; - - # modDirVersion needs to be x.y.z, will automatically add .0 if needed - modDirVersion = if (modDirVersionArg == null) then concatStringsSep "." (take 3 (splitVersion "${version}.0")) else modDirVersionArg; - - # branchVersion needs to be x.y - extraMeta.branch = versions.majorMinor version; - - src = fetchurl { - url = "mirror://kernel/linux/kernel/v5.x/linux-${version}.tar.xz"; - sha256 = "0ydf09wsg0pkjm9dk8y730ksg15p5rlbhq445zx8k191zah5g7kn"; - }; - - kernelTests = args.kernelTests or [ nixosTests.kernel-generic.linux_5_10 ]; -} // (args.argsOverride or {})) diff --git a/pkgs/os-specific/linux/kernel/linux-5.12.nix b/pkgs/os-specific/linux/kernel/linux-5.12.nix deleted file mode 100644 index e1e7aec2ce2..00000000000 --- a/pkgs/os-specific/linux/kernel/linux-5.12.nix +++ /dev/null @@ -1,20 +0,0 @@ -{ lib, buildPackages, fetchurl, perl, buildLinux, nixosTests, modDirVersionArg ? null, ... } @ args: - -with lib; - -buildLinux (args // rec { - version = "5.12.19"; - - # modDirVersion needs to be x.y.z, will automatically add .0 if needed - modDirVersion = if (modDirVersionArg == null) then concatStringsSep "." (take 3 (splitVersion "${version}.0")) else modDirVersionArg; - - # branchVersion needs to be x.y - extraMeta.branch = versions.majorMinor version; - - src = fetchurl { - url = "mirror://kernel/linux/kernel/v5.x/linux-${version}.tar.xz"; - sha256 = "0wscz736n13m833cd12lskn47r0b8ki4fhgpjnwga0jsab9iqf79"; - }; - - kernelTests = args.kernelTests or [ nixosTests.kernel-generic.linux_5_12 ]; -} // (args.argsOverride or {})) diff --git a/pkgs/os-specific/linux/kernel/linux-5.13.nix b/pkgs/os-specific/linux/kernel/linux-5.13.nix deleted file mode 100644 index bece15821a8..00000000000 --- a/pkgs/os-specific/linux/kernel/linux-5.13.nix +++ /dev/null @@ -1,20 +0,0 @@ -{ lib, buildPackages, fetchurl, perl, buildLinux, nixosTests, modDirVersionArg ? null, ... } @ args: - -with lib; - -buildLinux (args // rec { - version = "5.13.6"; - - # modDirVersion needs to be x.y.z, will automatically add .0 if needed - modDirVersion = if (modDirVersionArg == null) then concatStringsSep "." (take 3 (splitVersion "${version}.0")) else modDirVersionArg; - - # branchVersion needs to be x.y - extraMeta.branch = versions.majorMinor version; - - src = fetchurl { - url = "mirror://kernel/linux/kernel/v5.x/linux-${version}.tar.xz"; - sha256 = "0xjjl8dmilp425b1cp977v26qxlg1147gh54kni949pzxwh1fb56"; - }; - - kernelTests = args.kernelTests or [ nixosTests.kernel-generic.linux_5_13 ]; -} // (args.argsOverride or { })) diff --git a/pkgs/os-specific/linux/kernel/linux-5.4.nix b/pkgs/os-specific/linux/kernel/linux-5.4.nix deleted file mode 100644 index c4e08b685b5..00000000000 --- a/pkgs/os-specific/linux/kernel/linux-5.4.nix +++ /dev/null @@ -1,20 +0,0 @@ -{ lib, buildPackages, fetchurl, perl, buildLinux, nixosTests, modDirVersionArg ? null, ... } @ args: - -with lib; - -buildLinux (args // rec { - version = "5.4.134"; - - # modDirVersion needs to be x.y.z, will automatically add .0 if needed - modDirVersion = if (modDirVersionArg == null) then concatStringsSep "." (take 3 (splitVersion "${version}.0")) else modDirVersionArg; - - # branchVersion needs to be x.y - extraMeta.branch = versions.majorMinor version; - - src = fetchurl { - url = "mirror://kernel/linux/kernel/v5.x/linux-${version}.tar.xz"; - sha256 = "0haqw1w6f8p330ydbsl7iml1x0qqrv63az6921p2a70n88b8dyy9"; - }; - - kernelTests = args.kernelTests or [ nixosTests.kernel-generic.linux_5_4 ]; -} // (args.argsOverride or {})) diff --git a/pkgs/os-specific/linux/kernel/linux-cros.nix b/pkgs/os-specific/linux/kernel/linux-cros.nix deleted file mode 100644 index edaa33ac0fb..00000000000 --- a/pkgs/os-specific/linux/kernel/linux-cros.nix +++ /dev/null @@ -1,38 +0,0 @@ -{ stdenv, lib, buildPackages, fetchFromGitiles, upstreamInfo, perl, buildLinux -, modDirVersionArg ? null -, ... } @ args: - -let - versionData = upstreamInfo.components."src/third_party/kernel/v5.4"; -in - -with lib; -with lib.kernel; - -buildLinux (args // rec { - inherit (versionData) version; - - # modDirVersion needs to be x.y.z, will automatically add .0 if needed - modDirVersion = - if modDirVersionArg == null - then concatStringsSep "." (take 3 (splitVersion "${version}.0")) - else modDirVersionArg; - - # branchVersion needs to be x.y - extraMeta.branch = versions.majorMinor version; - - src = fetchFromGitiles { inherit (versionData) name url rev sha256; }; - - updateScript = ../chromium-os/update.py; - - structuredExtraConfig = { - # Enabling this (the default) caused a build failure. If you can - # archieve a successful build with this enabled, go ahead and - # enable it. - VIDEO_INTEL_IPU6 = no; - - # RTW88_8822*E were being selected as Y when N/m/? are the only valid options - RTW88_8822BE = lib.mkForce module; - RTW88_8822CE = lib.mkForce module; - } // (args.structuredExtraConfig or {}); -} // (args.argsOverride or {})) diff --git a/pkgs/os-specific/linux/kernel/linux-hardkernel-4.14.nix b/pkgs/os-specific/linux/kernel/linux-hardkernel-4.14.nix deleted file mode 100644 index a64520ab893..00000000000 --- a/pkgs/os-specific/linux/kernel/linux-hardkernel-4.14.nix +++ /dev/null @@ -1,33 +0,0 @@ -{ buildPackages, fetchFromGitHub, perl, buildLinux, libelf, util-linux, ... } @ args: - -buildLinux (args // rec { - version = "4.14.165-172"; - - # modDirVersion needs to be x.y.z. - modDirVersion = "4.14.165"; - - # branchVersion needs to be x.y. - extraMeta.branch = "4.14"; - - src = fetchFromGitHub { - owner = "hardkernel"; - repo = "linux"; - rev = version; - sha256 = "10ayqjjs2hxj1q7sb0mxa3gv75q28lznjha19rpxvig2fpi8015s"; - }; - - defconfig = "odroidxu4_defconfig"; - - # This extraConfig is (only) required because the gator module fails to build as-is. - extraConfig = '' - - GATOR n - - # This attempted fix applies correctly but does not fix the build. - #GATOR_MALI_MIDGARD_PATH ${src}/drivers/gpu/arm/midgard - - '' + (args.extraConfig or ""); - - extraMeta.platforms = [ "armv7l-linux" ]; - -} // (args.argsOverride or {})) diff --git a/pkgs/os-specific/linux/kernel/linux-libre.nix b/pkgs/os-specific/linux/kernel/linux-libre.nix index f02c1ad1250..9cf5f46cfb8 100644 --- a/pkgs/os-specific/linux/kernel/linux-libre.nix +++ b/pkgs/os-specific/linux/kernel/linux-libre.nix @@ -1,8 +1,8 @@ { stdenv, lib, fetchsvn, linux , scripts ? fetchsvn { url = "https://www.fsfla.org/svn/fsfla/software/linux-libre/releases/branches/"; - rev = "18191"; - sha256 = "0ggaccg7z540kh5if48v6sjy39xllzvznqx5srvrlycrs2r89iyr"; + rev = "19441"; + sha256 = "1z0x8cw9nr7qf5qh3xjf6rg20q0i79bg71lik847sabyb6vcrk0z"; } , ... }: @@ -14,9 +14,14 @@ let minor = lib.versions.minor linux.modDirVersion; patch = lib.versions.patch linux.modDirVersion; + # See http://linux-libre.fsfla.org/pub/linux-libre/releases + versionPrefix = if linux.kernelOlder "5.14" then + "gnu1" + else + "gnu"; in linux.override { argsOverride = { - modDirVersion = "${linux.modDirVersion}-gnu"; + modDirVersion = "${linux.modDirVersion}-${versionPrefix}"; isLibre = true; src = stdenv.mkDerivation { @@ -35,10 +40,8 @@ in linux.override { ''; }; - extraMeta.broken = true; - passthru.updateScript = ./update-libre.sh; - maintainers = [ lib.maintainers.qyliss ]; + maintainers = with lib.maintainers; [ qyliss ivar ]; }; } diff --git a/pkgs/os-specific/linux/kernel/linux-lqx.nix b/pkgs/os-specific/linux/kernel/linux-lqx.nix deleted file mode 100644 index 23a6b0b2d36..00000000000 --- a/pkgs/os-specific/linux/kernel/linux-lqx.nix +++ /dev/null @@ -1,26 +0,0 @@ -{ lib, fetchFromGitHub, buildLinux, linux_zen, ... } @ args: - -let - version = "5.12.19"; - suffix = "lqx2"; -in - -buildLinux (args // { - modDirVersion = "${version}-${suffix}"; - inherit version; - isZen = true; - - src = fetchFromGitHub { - owner = "zen-kernel"; - repo = "zen-kernel"; - rev = "v${version}-${suffix}"; - sha256 = "sha256-r2DvKLlm1a1VuJwC81tRuRwCd6H21T3MsBAC3b9TUbs="; - }; - - extraMeta = { - branch = "5.12/master"; - maintainers = with lib.maintainers; [ atemu ]; - description = linux_zen.meta.description + " (Same as linux_zen but less aggressive release schedule)"; - }; - -} // (args.argsOverride or { })) diff --git a/pkgs/os-specific/linux/kernel/linux-mptcp-95.nix b/pkgs/os-specific/linux/kernel/linux-mptcp-95.nix deleted file mode 100644 index a6a8d4936d4..00000000000 --- a/pkgs/os-specific/linux/kernel/linux-mptcp-95.nix +++ /dev/null @@ -1,27 +0,0 @@ -{ lib, buildPackages, fetchFromGitHub, perl, buildLinux, structuredExtraConfig ? {}, ... } @ args: -let - mptcpVersion = "0.95.1"; - modDirVersion = "4.19.126"; -in -buildLinux ({ - version = "${modDirVersion}-mptcp_v${mptcpVersion}"; - inherit modDirVersion; - - extraMeta = { - branch = "4.19"; - maintainers = with lib.maintainers; [ teto layus ]; - }; - - src = fetchFromGitHub { - owner = "multipath-tcp"; - repo = "mptcp"; - rev = "v${mptcpVersion}"; - sha256 = "sha256-J9UXhkI49cq83EtojLHieRtp8fT3LXTJNIqb+mUwZdM="; - }; - - structuredExtraConfig = lib.mkMerge [ - (import ./mptcp-config.nix { inherit lib; }) - structuredExtraConfig - ]; - -} // args) diff --git a/pkgs/os-specific/linux/kernel/linux-rpi.nix b/pkgs/os-specific/linux/kernel/linux-rpi.nix index 8ccf46b402b..1bea6197529 100644 --- a/pkgs/os-specific/linux/kernel/linux-rpi.nix +++ b/pkgs/os-specific/linux/kernel/linux-rpi.nix @@ -2,8 +2,8 @@ let # NOTE: raspberrypifw & raspberryPiWirelessFirmware should be updated with this - modDirVersion = "5.10.17"; - tag = "1.20210303"; + modDirVersion = "6.1.21"; + tag = "1.20230405"; in lib.overrideDerivation (buildLinux (args // { version = "${modDirVersion}-${tag}"; @@ -12,8 +12,8 @@ lib.overrideDerivation (buildLinux (args // { src = fetchFromGitHub { owner = "raspberrypi"; repo = "linux"; - rev = "raspberrypi-kernel_${tag}-1"; - sha256 = "0ffsllayl18ka4mgp4rdy9h0da5gy1n6g0kfvinvzdzabb5wzvrx"; + rev = tag; + hash = "sha256-ILwecHZ1BN6GhZAUB6/UwiN/rZ8gHndKON6DUhidtxI="; }; defconfig = { @@ -27,19 +27,11 @@ lib.overrideDerivation (buildLinux (args // { efiBootStub = false; } // (args.features or {}); - extraConfig = '' - # ../drivers/gpu/drm/ast/ast_mode.c:851:18: error: initialization of 'void (*)(struct drm_crtc *, struct drm_atomic_state *)' from incompatible pointer type 'void (*)(struct drm_crtc *, struct drm_crtc_state *)' [-Werror=incompatible-pointer-types] - # 851 | .atomic_flush = ast_crtc_helper_atomic_flush, - # | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # ../drivers/gpu/drm/ast/ast_mode.c:851:18: note: (near initialization for 'ast_crtc_helper_funcs.atomic_flush') - DRM_AST n - ''; - extraMeta = if (rpiVersion < 3) then { - platforms = with lib.platforms; [ arm ]; + platforms = with lib.platforms; arm; hydraPlatforms = []; } else { - platforms = with lib.platforms; [ arm aarch64 ]; + platforms = with lib.platforms; arm ++ aarch64; hydraPlatforms = [ "aarch64-linux" ]; }; } // (args.argsOverride or {}))) (oldAttrs: { @@ -70,6 +62,7 @@ lib.overrideDerivation (buildLinux (args // { '' + lib.optionalString (lib.elem stdenv.hostPlatform.system ["armv7l-linux"]) '' copyDTB bcm2709-rpi-2-b.dtb bcm2836-rpi-2-b.dtb '' + lib.optionalString (lib.elem stdenv.hostPlatform.system ["armv7l-linux" "aarch64-linux"]) '' + copyDTB bcm2710-rpi-zero-2.dtb bcm2837-rpi-zero-2.dtb copyDTB bcm2710-rpi-3-b.dtb bcm2837-rpi-3-b.dtb copyDTB bcm2710-rpi-3-b-plus.dtb bcm2837-rpi-3-a-plus.dtb copyDTB bcm2710-rpi-3-b-plus.dtb bcm2837-rpi-3-b-plus.dtb diff --git a/pkgs/os-specific/linux/kernel/linux-rt-5.10.nix b/pkgs/os-specific/linux/kernel/linux-rt-5.10.nix index 83b2fc05093..65ca352b53b 100644 --- a/pkgs/os-specific/linux/kernel/linux-rt-5.10.nix +++ b/pkgs/os-specific/linux/kernel/linux-rt-5.10.nix @@ -6,26 +6,25 @@ , ... } @ args: let - version = "5.10.52-rt47"; # updated by ./update-rt.sh + version = "5.10.199-rt97"; # updated by ./update-rt.sh branch = lib.versions.majorMinor version; kversion = builtins.elemAt (lib.splitString "-" version) 0; in buildLinux (args // { inherit version; # modDirVersion needs a patch number, change X.Y-rtZ to X.Y.0-rtZ. - modDirVersion = if (builtins.match "[^.]*[.][^.]*-.*" version) == null then version - else lib.replaceStrings ["-"] [".0-"] version; + modDirVersion = lib.versions.pad 3 version; src = fetchurl { url = "mirror://kernel/linux/kernel/v5.x/linux-${kversion}.tar.xz"; - sha256 = "0ydf09wsg0pkjm9dk8y730ksg15p5rlbhq445zx8k191zah5g7kn"; + sha256 = "1h944syk7n6c4j1djlx19n77alzwbxcdza77c9ykicgfynhpgsm0"; }; kernelPatches = let rt-patch = { name = "rt"; patch = fetchurl { url = "mirror://kernel/linux/kernel/projects/rt/${branch}/older/patch-${version}.patch.xz"; - sha256 = "1n71nbshma0gxyrifyymrd0wii1q0plj020amc0wdzzm27xs5k2k"; + sha256 = "13k7md0a63q4r5vqqvbszmg3kzp5np0hdaj1siyl4yvs9j78d03s"; }; }; in [ rt-patch ] ++ kernelPatches; diff --git a/pkgs/os-specific/linux/kernel/linux-rt-5.11.nix b/pkgs/os-specific/linux/kernel/linux-rt-5.15.nix index 5d1b14f1d0f..bc45a86905c 100644 --- a/pkgs/os-specific/linux/kernel/linux-rt-5.11.nix +++ b/pkgs/os-specific/linux/kernel/linux-rt-5.15.nix @@ -6,7 +6,7 @@ , ... } @ args: let - version = "5.11.4-rt11"; # updated by ./update-rt.sh + version = "5.15.137-rt71"; # updated by ./update-rt.sh branch = lib.versions.majorMinor version; kversion = builtins.elemAt (lib.splitString "-" version) 0; in buildLinux (args // { @@ -18,14 +18,14 @@ in buildLinux (args // { src = fetchurl { url = "mirror://kernel/linux/kernel/v5.x/linux-${kversion}.tar.xz"; - sha256 = "1i8dfw83ndaylwji7lazfckk113plvnz7kh1yppbfg35r6przrc8"; + sha256 = "1xxjbxldrhmnh2q6rykpxyfbj8xqgl82q30n8sfavrzr14bb4jcp"; }; kernelPatches = let rt-patch = { name = "rt"; patch = fetchurl { url = "mirror://kernel/linux/kernel/projects/rt/${branch}/older/patch-${version}.patch.xz"; - sha256 = "1az6cn9jj3bnjgwzzrjy1adnrnn06p2vzsnc1iib4xhs0sfr27hc"; + sha256 = "11zk02ni3b0l1wwrfvyc1q92bd9as61hwgbwlj42xv5gbpd39jlw"; }; }; in [ rt-patch ] ++ kernelPatches; diff --git a/pkgs/os-specific/linux/kernel/linux-rt-5.4.nix b/pkgs/os-specific/linux/kernel/linux-rt-5.4.nix index 4c49dc9c42a..22e07bfd0f5 100644 --- a/pkgs/os-specific/linux/kernel/linux-rt-5.4.nix +++ b/pkgs/os-specific/linux/kernel/linux-rt-5.4.nix @@ -6,7 +6,7 @@ , ... } @ args: let - version = "5.4.129-rt61"; # updated by ./update-rt.sh + version = "5.4.257-rt87"; # updated by ./update-rt.sh branch = lib.versions.majorMinor version; kversion = builtins.elemAt (lib.splitString "-" version) 0; in buildLinux (args // { @@ -14,14 +14,14 @@ in buildLinux (args // { src = fetchurl { url = "mirror://kernel/linux/kernel/v5.x/linux-${kversion}.tar.xz"; - sha256 = "1ps64gx85lmbriq445hd2hcv4g4b1d1cwf4r3nd90x6i2cj4c9j4"; + sha256 = "1w1x91slzg9ggakqhyxnmvz77v2cwfk8bz0knrpgz9qya9q5jxrf"; }; kernelPatches = let rt-patch = { name = "rt"; patch = fetchurl { url = "mirror://kernel/linux/kernel/projects/rt/${branch}/older/patch-${version}.patch.xz"; - sha256 = "0b3hp6a7afkjqd7an4hj423nq6flwzd42kjcyk4pifv5fx6c7pgq"; + sha256 = "0rgkk5ibagsyz9in12clzn7szsw1i3m96s8wy5yxwa26aaa2wki7"; }; }; in [ rt-patch ] ++ kernelPatches; diff --git a/pkgs/os-specific/linux/kernel/linux-rt-6.1.nix b/pkgs/os-specific/linux/kernel/linux-rt-6.1.nix new file mode 100644 index 00000000000..85c8a8b8a10 --- /dev/null +++ b/pkgs/os-specific/linux/kernel/linux-rt-6.1.nix @@ -0,0 +1,45 @@ +{ lib, buildLinux, fetchurl +, kernelPatches ? [ ] +, structuredExtraConfig ? {} +, extraMeta ? {} +, argsOverride ? {} +, ... } @ args: + +let + version = "6.1.59-rt16"; # updated by ./update-rt.sh + branch = lib.versions.majorMinor version; + kversion = builtins.elemAt (lib.splitString "-" version) 0; +in buildLinux (args // { + inherit version; + + # modDirVersion needs a patch number, change X.Y-rtZ to X.Y.0-rtZ. + modDirVersion = if (builtins.match "[^.]*[.][^.]*-.*" version) == null then version + else lib.replaceStrings ["-"] [".0-"] version; + + src = fetchurl { + url = "mirror://kernel/linux/kernel/v6.x/linux-${kversion}.tar.xz"; + sha256 = "1860r1aan258yi2jq68bp1kdbcyy7ygc7d8g54wnc0vmqqj7fzv2"; + }; + + kernelPatches = let rt-patch = { + name = "rt"; + patch = fetchurl { + url = "mirror://kernel/linux/kernel/projects/rt/${branch}/older/patch-${version}.patch.xz"; + sha256 = "1cmgw6a8zlj89172mp85lxaksz1pvc155mj2fq59l1ry35gwb5q7"; + }; + }; in [ rt-patch ] ++ kernelPatches; + + structuredExtraConfig = with lib.kernel; { + PREEMPT_RT = yes; + # Fix error: unused option: PREEMPT_RT. + EXPERT = yes; # PREEMPT_RT depends on it (in kernel/Kconfig.preempt) + # Fix error: option not set correctly: PREEMPT_VOLUNTARY (wanted 'y', got 'n'). + PREEMPT_VOLUNTARY = lib.mkForce no; # PREEMPT_RT deselects it. + # Fix error: unused option: RT_GROUP_SCHED. + RT_GROUP_SCHED = lib.mkForce (option no); # Removed by sched-disable-rt-group-sched-on-rt.patch. + } // structuredExtraConfig; + + extraMeta = extraMeta // { + inherit branch; + }; +} // argsOverride) diff --git a/pkgs/os-specific/linux/kernel/linux-testing-bcachefs.nix b/pkgs/os-specific/linux/kernel/linux-testing-bcachefs.nix index a12633eb6d7..c58c4e67e4d 100644 --- a/pkgs/os-specific/linux/kernel/linux-testing-bcachefs.nix +++ b/pkgs/os-specific/linux/kernel/linux-testing-bcachefs.nix @@ -1,33 +1,46 @@ { lib +, stdenv , fetchpatch , kernel -, date ? "2021-07-08" -, commit ? "3693b2ca83ff9eda49660b31299d2bebe3a1075f" -, diffHash ? "1sfq3vwc2kxa761s292f2cqrm0vvqvkdx6drpyn5yaxwnapwidcw" +, commitDate ? "2023-06-28" +# bcachefs-tools stores the expected-revision in: +# https://evilpiepirate.org/git/bcachefs-tools.git/tree/.bcachefs_revision +# but this does not means that it'll be the latest-compatible revision +, currentCommit ? "4d2faeb4fb58c389dc9f76b8d5ae991ef4497e04" +, diffHash ? "sha256-DtMc8P4lTRzvS6PVvD7WtWEPsfnxIXSpqMsKKWs+edI=" , kernelPatches # must always be defined in bcachefs' all-packages.nix entry because it's also a top-level attribute supplied by callPackage , argsOverride ? {} , ... } @ args: - -kernel.override ( args // { +# NOTE: bcachefs-tools should be updated simultaneously to preserve compatibility +(kernel.override ( args // { argsOverride = { - version = "${kernel.version}-bcachefs-unstable-${date}"; + version = "${kernel.version}-bcachefs-unstable-${commitDate}"; + modDirVersion = kernel.modDirVersion; + extraMeta = { + homepage = "https://bcachefs.org/"; branch = "master"; - maintainers = with lib.maintainers; [ davidak chiiruno ]; - platforms = [ "x86_64-linux" ]; + maintainers = with lib.maintainers; [ davidak Madouura pedrohlc raitobezarius YellowOnion ]; }; } // argsOverride; + structuredExtraConfig = with lib.kernel; { + BCACHEFS_FS = module; + BCACHEFS_QUOTA = option yes; + BCACHEFS_POSIX_ACL = option yes; + # useful for bug reports + FTRACE = option yes; + }; + kernelPatches = [ { - name = "bcachefs-${commit}"; + name = "bcachefs-${currentCommit}"; + patch = fetchpatch { - name = "bcachefs-${commit}.diff"; - url = "https://evilpiepirate.org/git/bcachefs.git/rawdiff/?id=${commit}&id2=v${lib.versions.majorMinor kernel.version}"; + name = "bcachefs-${currentCommit}.diff"; + url = "https://evilpiepirate.org/git/bcachefs.git/rawdiff/?id=${currentCommit}&id2=v${lib.versions.majorMinor kernel.version}"; sha256 = diffHash; }; - extraConfig = "BCACHEFS_FS m"; } ] ++ kernelPatches; - -}) +})) diff --git a/pkgs/os-specific/linux/kernel/linux-testing.nix b/pkgs/os-specific/linux/kernel/linux-testing.nix deleted file mode 100644 index 4e2ef7b4652..00000000000 --- a/pkgs/os-specific/linux/kernel/linux-testing.nix +++ /dev/null @@ -1,22 +0,0 @@ -{ lib, buildPackages, fetchurl, perl, buildLinux, nixosTests, modDirVersionArg ? null, ... } @ args: - -with lib; - -buildLinux (args // rec { - version = "5.13-rc6"; - extraMeta.branch = "5.12"; - - # modDirVersion needs to be x.y.z, will always add .0 - modDirVersion = if (modDirVersionArg == null) then builtins.replaceStrings ["-"] [".0-"] version else modDirVersionArg; - - src = fetchurl { - url = "https://git.kernel.org/torvalds/t/linux-${version}.tar.gz"; - sha256 = "sha256-PunFd6tOsmrsPItp2QX4TEVxHnvvi1BMSwWio/DTlMU="; - }; - - kernelTests = args.kernelTests or [ nixosTests.kernel-generic.linux_testing ]; - - # Should the testing kernels ever be built on Hydra? - extraMeta.hydraPlatforms = []; - -} // (args.argsOverride or {})) diff --git a/pkgs/os-specific/linux/kernel/linux-xanmod.nix b/pkgs/os-specific/linux/kernel/linux-xanmod.nix deleted file mode 100644 index 62e88d01853..00000000000 --- a/pkgs/os-specific/linux/kernel/linux-xanmod.nix +++ /dev/null @@ -1,55 +0,0 @@ -{ lib, stdenv, buildLinux, fetchFromGitHub, ... } @ args: - -let - version = "5.13.7"; - release = "1"; - suffix = "xanmod${release}-cacule"; -in -buildLinux (args // rec { - inherit version; - modDirVersion = "${version}-${suffix}"; - - src = fetchFromGitHub { - owner = "xanmod"; - repo = "linux"; - rev = modDirVersion; - sha256 = "sha256-6SppDriZWzLu6Qye1e6ciiE+Ro63vDyabxfgWS/PTSo="; - }; - - structuredExtraConfig = with lib.kernel; { - # Preemptive Full Tickless Kernel at 500Hz - PREEMPT_VOLUNTARY = lib.mkForce no; - PREEMPT = lib.mkForce yes; - NO_HZ_FULL = yes; - HZ_500 = yes; - - # Google's Multigenerational LRU Framework - LRU_GEN = yes; - LRU_GEN_ENABLED = yes; - - # Google's BBRv2 TCP congestion Control - TCP_CONG_BBR2 = yes; - DEFAULT_BBR2 = yes; - - # FQ-PIE Packet Scheduling - NET_SCH_DEFAULT = yes; - DEFAULT_FQ_PIE = yes; - - # Graysky's additional CPU optimizations - CC_OPTIMIZE_FOR_PERFORMANCE_O3 = yes; - - # Android Ashmem and Binder IPC Driver as module for Anbox - ASHMEM = module; - ANDROID = yes; - ANDROID_BINDER_IPC = module; - ANDROID_BINDERFS = module; - ANDROID_BINDER_DEVICES = freeform "binder,hwbinder,vndbinder"; - }; - - extraMeta = { - branch = "5.13-cacule"; - maintainers = with lib.maintainers; [ fortuneteller2k ]; - description = "Built with custom settings and new features built to provide a stable, responsive and smooth desktop experience"; - broken = stdenv.isAarch64; - }; -} // (args.argsOverride or { })) diff --git a/pkgs/os-specific/linux/kernel/linux-zen.nix b/pkgs/os-specific/linux/kernel/linux-zen.nix deleted file mode 100644 index 2b91a259232..00000000000 --- a/pkgs/os-specific/linux/kernel/linux-zen.nix +++ /dev/null @@ -1,30 +0,0 @@ -{ lib, fetchFromGitHub, buildLinux, ... } @ args: - -let - version = "5.13.7"; - suffix = "zen1"; -in - -buildLinux (args // { - modDirVersion = "${version}-${suffix}"; - inherit version; - isZen = true; - - src = fetchFromGitHub { - owner = "zen-kernel"; - repo = "zen-kernel"; - rev = "v${version}-${suffix}"; - sha256 = "sha256-ZvB5Ejt9MXP4QK5cj9CGQgFJIfDV03IW5xcknCxDui0="; - }; - - structuredExtraConfig = with lib.kernel; { - ZEN_INTERACTIVE = yes; - }; - - extraMeta = { - branch = "5.13"; - maintainers = with lib.maintainers; [ atemu andresilva ]; - description = "Built using the best configuration and kernel sources for desktop, multimedia, and gaming workloads."; - }; - -} // (args.argsOverride or { })) diff --git a/pkgs/os-specific/linux/kernel/mac-nvme-t2.patch b/pkgs/os-specific/linux/kernel/mac-nvme-t2.patch deleted file mode 100644 index 2f1fa6a0dae..00000000000 --- a/pkgs/os-specific/linux/kernel/mac-nvme-t2.patch +++ /dev/null @@ -1,283 +0,0 @@ -diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c -index dd10cf78f2d3..8f006638452b 100644 ---- a/drivers/nvme/host/pci.c -+++ b/drivers/nvme/host/pci.c -@@ -28,8 +28,8 @@ - #include "trace.h" - #include "nvme.h" - --#define SQ_SIZE(depth) (depth * sizeof(struct nvme_command)) --#define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion)) -+#define SQ_SIZE(q) ((q)->q_depth * sizeof(struct nvme_command)) -+#define CQ_SIZE(q) ((q)->q_depth * sizeof(struct nvme_completion)) - - #define SGES_PER_PAGE (PAGE_SIZE / sizeof(struct nvme_sgl_desc)) - -@@ -1344,16 +1344,16 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) - - static void nvme_free_queue(struct nvme_queue *nvmeq) - { -- dma_free_coherent(nvmeq->dev->dev, CQ_SIZE(nvmeq->q_depth), -+ dma_free_coherent(nvmeq->dev->dev, CQ_SIZE(nvmeq), - (void *)nvmeq->cqes, nvmeq->cq_dma_addr); - if (!nvmeq->sq_cmds) - return; - - if (test_and_clear_bit(NVMEQ_SQ_CMB, &nvmeq->flags)) { - pci_free_p2pmem(to_pci_dev(nvmeq->dev->dev), -- nvmeq->sq_cmds, SQ_SIZE(nvmeq->q_depth)); -+ nvmeq->sq_cmds, SQ_SIZE(nvmeq)); - } else { -- dma_free_coherent(nvmeq->dev->dev, SQ_SIZE(nvmeq->q_depth), -+ dma_free_coherent(nvmeq->dev->dev, SQ_SIZE(nvmeq), - nvmeq->sq_cmds, nvmeq->sq_dma_addr); - } - } -@@ -1433,12 +1433,12 @@ static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues, - } - - static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq, -- int qid, int depth) -+ int qid) - { - struct pci_dev *pdev = to_pci_dev(dev->dev); - - if (qid && dev->cmb_use_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) { -- nvmeq->sq_cmds = pci_alloc_p2pmem(pdev, SQ_SIZE(depth)); -+ nvmeq->sq_cmds = pci_alloc_p2pmem(pdev, SQ_SIZE(nvmeq)); - if (nvmeq->sq_cmds) { - nvmeq->sq_dma_addr = pci_p2pmem_virt_to_bus(pdev, - nvmeq->sq_cmds); -@@ -1447,11 +1447,11 @@ static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq, - return 0; - } - -- pci_free_p2pmem(pdev, nvmeq->sq_cmds, SQ_SIZE(depth)); -+ pci_free_p2pmem(pdev, nvmeq->sq_cmds, SQ_SIZE(nvmeq)); - } - } - -- nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth), -+ nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(nvmeq), - &nvmeq->sq_dma_addr, GFP_KERNEL); - if (!nvmeq->sq_cmds) - return -ENOMEM; -@@ -1465,12 +1465,13 @@ static int nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth) - if (dev->ctrl.queue_count > qid) - return 0; - -- nvmeq->cqes = dma_alloc_coherent(dev->dev, CQ_SIZE(depth), -+ nvmeq->q_depth = depth; -+ nvmeq->cqes = dma_alloc_coherent(dev->dev, CQ_SIZE(nvmeq), - &nvmeq->cq_dma_addr, GFP_KERNEL); - if (!nvmeq->cqes) - goto free_nvmeq; - -- if (nvme_alloc_sq_cmds(dev, nvmeq, qid, depth)) -+ if (nvme_alloc_sq_cmds(dev, nvmeq, qid)) - goto free_cqdma; - - nvmeq->dev = dev; -@@ -1479,15 +1480,14 @@ static int nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth) - nvmeq->cq_head = 0; - nvmeq->cq_phase = 1; - nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; -- nvmeq->q_depth = depth; - nvmeq->qid = qid; - dev->ctrl.queue_count++; - - return 0; - - free_cqdma: -- dma_free_coherent(dev->dev, CQ_SIZE(depth), (void *)nvmeq->cqes, -- nvmeq->cq_dma_addr); -+ dma_free_coherent(dev->dev, CQ_SIZE(nvmeq), (void *)nvmeq->cqes, -+ nvmeq->cq_dma_addr); - free_nvmeq: - return -ENOMEM; - } -@@ -1515,7 +1515,7 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid) - nvmeq->cq_head = 0; - nvmeq->cq_phase = 1; - nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; -- memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq->q_depth)); -+ memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq)); - nvme_dbbuf_init(dev, nvmeq, qid); - dev->online_queues++; - wmb(); /* ensure the first interrupt sees the initialization */ -diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c -index cc09b81fc7f4..716ebe87a2b8 100644 ---- a/drivers/nvme/host/core.c -+++ b/drivers/nvme/host/core.c -@@ -1986,6 +1986,7 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap) - ctrl->ctrl_config = NVME_CC_CSS_NVM; - ctrl->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT; - ctrl->ctrl_config |= NVME_CC_AMS_RR | NVME_CC_SHN_NONE; -+ /* Use default IOSQES. We'll update it later if needed */ - ctrl->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES; - ctrl->ctrl_config |= NVME_CC_ENABLE; - -@@ -2698,6 +2699,30 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) - ctrl->hmmin = le32_to_cpu(id->hmmin); - ctrl->hmminds = le32_to_cpu(id->hmminds); - ctrl->hmmaxd = le16_to_cpu(id->hmmaxd); -+ -+ /* Grab required IO queue size */ -+ ctrl->iosqes = id->sqes & 0xf; -+ if (ctrl->iosqes < NVME_NVM_IOSQES) { -+ dev_err(ctrl->device, -+ "unsupported required IO queue size %d\n", ctrl->iosqes); -+ ret = -EINVAL; -+ goto out_free; -+ } -+ /* -+ * If our IO queue size isn't the default, update the setting -+ * in CC:IOSQES. -+ */ -+ if (ctrl->iosqes != NVME_NVM_IOSQES) { -+ ctrl->ctrl_config &= ~(0xfu << NVME_CC_IOSQES_SHIFT); -+ ctrl->ctrl_config |= ctrl->iosqes << NVME_CC_IOSQES_SHIFT; -+ ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, -+ ctrl->ctrl_config); -+ if (ret) { -+ dev_err(ctrl->device, -+ "error updating CC register\n"); -+ goto out_free; -+ } -+ } - } - - ret = nvme_mpath_init(ctrl, id); -diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h -index 716a876119c8..34ef35fcd8a5 100644 ---- a/drivers/nvme/host/nvme.h -+++ b/drivers/nvme/host/nvme.h -@@ -244,6 +244,7 @@ struct nvme_ctrl { - u32 hmmin; - u32 hmminds; - u16 hmmaxd; -+ u8 iosqes; - - /* Fabrics only */ - u16 sqsize; -diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c -index 8f006638452b..54b35ea4af88 100644 ---- a/drivers/nvme/host/pci.c -+++ b/drivers/nvme/host/pci.c -@@ -28,7 +28,7 @@ - #include "trace.h" - #include "nvme.h" - --#define SQ_SIZE(q) ((q)->q_depth * sizeof(struct nvme_command)) -+#define SQ_SIZE(q) ((q)->q_depth << (q)->sqes) - #define CQ_SIZE(q) ((q)->q_depth * sizeof(struct nvme_completion)) - - #define SGES_PER_PAGE (PAGE_SIZE / sizeof(struct nvme_sgl_desc)) -@@ -162,7 +162,7 @@ static inline struct nvme_dev *to_nvme_dev(struct nvme_ctrl *ctrl) - struct nvme_queue { - struct nvme_dev *dev; - spinlock_t sq_lock; -- struct nvme_command *sq_cmds; -+ void *sq_cmds; - /* only used for poll queues: */ - spinlock_t cq_poll_lock ____cacheline_aligned_in_smp; - volatile struct nvme_completion *cqes; -@@ -178,6 +178,7 @@ struct nvme_queue { - u16 last_cq_head; - u16 qid; - u8 cq_phase; -+ u8 sqes; - unsigned long flags; - #define NVMEQ_ENABLED 0 - #define NVMEQ_SQ_CMB 1 -@@ -488,7 +489,8 @@ static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd, - bool write_sq) - { - spin_lock(&nvmeq->sq_lock); -- memcpy(&nvmeq->sq_cmds[nvmeq->sq_tail], cmd, sizeof(*cmd)); -+ memcpy(nvmeq->sq_cmds + (nvmeq->sq_tail << nvmeq->sqes), -+ cmd, sizeof(*cmd)); - if (++nvmeq->sq_tail == nvmeq->q_depth) - nvmeq->sq_tail = 0; - nvme_write_sq_db(nvmeq, write_sq); -@@ -1465,6 +1467,7 @@ static int nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth) - if (dev->ctrl.queue_count > qid) - return 0; - -+ nvmeq->sqes = qid ? dev->ctrl.iosqes : NVME_NVM_ADMSQES; - nvmeq->q_depth = depth; - nvmeq->cqes = dma_alloc_coherent(dev->dev, CQ_SIZE(nvmeq), - &nvmeq->cq_dma_addr, GFP_KERNEL); -diff --git a/include/linux/nvme.h b/include/linux/nvme.h -index 01aa6a6c241d..7af18965fb57 100644 ---- a/include/linux/nvme.h -+++ b/include/linux/nvme.h -@@ -141,6 +141,7 @@ enum { - * (In bytes and specified as a power of two (2^n)). - */ - #define NVME_NVM_IOSQES 6 -+#define NVME_NVM_ADMSQES 6 - #define NVME_NVM_IOCQES 4 - - enum { -diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c -index 716ebe87a2b8..480ea24d8cf4 100644 ---- a/drivers/nvme/host/core.c -+++ b/drivers/nvme/host/core.c -@@ -2701,7 +2701,10 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) - ctrl->hmmaxd = le16_to_cpu(id->hmmaxd); - - /* Grab required IO queue size */ -- ctrl->iosqes = id->sqes & 0xf; -+ if (ctrl->quirks & NVME_QUIRK_128_BYTES_SQES) -+ ctrl->iosqes = 7; -+ else -+ ctrl->iosqes = id->sqes & 0xf; - if (ctrl->iosqes < NVME_NVM_IOSQES) { - dev_err(ctrl->device, - "unsupported required IO queue size %d\n", ctrl->iosqes); -diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h -index 34ef35fcd8a5..b2a78d08b984 100644 ---- a/drivers/nvme/host/nvme.h -+++ b/drivers/nvme/host/nvme.h -@@ -92,6 +92,16 @@ enum nvme_quirks { - * Broken Write Zeroes. - */ - NVME_QUIRK_DISABLE_WRITE_ZEROES = (1 << 9), -+ -+ /* -+ * Use only one interrupt vector for all queues -+ */ -+ NVME_QUIRK_SINGLE_VECTOR = (1 << 10), -+ -+ /* -+ * Use non-standard 128 bytes SQEs. -+ */ -+ NVME_QUIRK_128_BYTES_SQES = (1 << 11), - }; - - /* -diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c -index 54b35ea4af88..ab2358137419 100644 ---- a/drivers/nvme/host/pci.c -+++ b/drivers/nvme/host/pci.c -@@ -2080,6 +2080,9 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues) - dev->io_queues[HCTX_TYPE_DEFAULT] = 1; - dev->io_queues[HCTX_TYPE_READ] = 0; - -+ if (dev->ctrl.quirks & NVME_QUIRK_SINGLE_VECTOR) -+ irq_queues = 1; -+ - return pci_alloc_irq_vectors_affinity(pdev, 1, irq_queues, - PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY, &affd); - } -@@ -3037,6 +3040,9 @@ static const struct pci_device_id nvme_id_table[] = { - { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, - { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) }, - { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) }, -+ { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2005), -+ .driver_data = NVME_QUIRK_SINGLE_VECTOR | -+ NVME_QUIRK_128_BYTES_SQES }, - { 0, } - }; - MODULE_DEVICE_TABLE(pci, nvme_id_table); diff --git a/pkgs/os-specific/linux/kernel/mainline.nix b/pkgs/os-specific/linux/kernel/mainline.nix new file mode 100644 index 00000000000..4e1d5b8a9e8 --- /dev/null +++ b/pkgs/os-specific/linux/kernel/mainline.nix @@ -0,0 +1,27 @@ +{ branch, lib, fetchurl, fetchzip, buildLinux, ... } @ args: + +let + allKernels = builtins.fromJSON (builtins.readFile ./kernels-org.json); + thisKernel = allKernels.${branch}; + inherit (thisKernel) version; + + src = + # testing kernels are a special case because they don't have tarballs on the CDN + if branch == "testing" + then fetchzip { + url = "https://git.kernel.org/torvalds/t/linux-${version}.tar.gz"; + inherit (thisKernel) hash; + } + else fetchurl { + url = "mirror://kernel/linux/kernel/v${lib.versions.major version}.x/linux-${version}.tar.xz"; + inherit (thisKernel) hash; + }; + + args' = (builtins.removeAttrs args ["branch"]) // { + inherit src version; + + modDirVersion = lib.versions.pad 3 version; + extraMeta.branch = branch; + } // (args.argsOverride or {}); +in +buildLinux args' diff --git a/pkgs/os-specific/linux/kernel/manual-config.nix b/pkgs/os-specific/linux/kernel/manual-config.nix index 77add0aef53..2ba31fbc978 100644 --- a/pkgs/os-specific/linux/kernel/manual-config.nix +++ b/pkgs/os-specific/linux/kernel/manual-config.nix @@ -1,9 +1,12 @@ -{ lib, buildPackages, runCommand, nettools, bc, bison, flex, perl, rsync, gmp, libmpc, mpfr, openssl -, libelf, cpio, elfutils, zstd, gawk, python3Minimal -, writeTextFile +{ lib, stdenv, buildPackages, runCommand, nettools, bc, bison, flex, perl, rsync, gmp, libmpc, mpfr, openssl +, libelf, cpio, elfutils, zstd, python3Minimal, zlib, pahole, kmod, ubootTools +, fetchpatch }: let + lib_ = lib; + stdenv_ = stdenv; + readConfig = configfile: import (runCommand "config.nix" {} '' echo "{" > "$out" while IFS='=' read key val; do @@ -13,16 +16,16 @@ let done < "${configfile}" echo "}" >> $out '').outPath; -in { - lib, - # Allow overriding stdenv on each buildLinux call - stdenv, +in lib.makeOverridable ({ # The kernel version version, + # Position of the Linux build expression + pos ? null, # Additional kernel make flags extraMakeFlags ? [], - # The version of the kernel module directory - modDirVersion ? version, + # The name of the kernel module directory + # Needs to be X.Y.Z[-extra], so pad with zeros if needed. + modDirVersion ? lib.versions.pad 3 version, # The kernel source (tarball, git checkout, etc.) src, # a list of { name=..., patch=..., extraConfig=...} patches @@ -35,7 +38,7 @@ in { # Custom seed used for CONFIG_GCC_PLUGIN_RANDSTRUCT if enabled. This is # automatically extended with extra per-version and per-config values. randstructSeed ? "", - # Use defaultMeta // extraMeta + # Extra meta attributes extraMeta ? {}, # for module compatibility @@ -46,7 +49,7 @@ in { # Whether to utilize the controversial import-from-derivation feature to parse the config allowImportFromDerivation ? false, # ignored - features ? null, + features ? null, lib ? lib_, stdenv ? stdenv_, }: let @@ -54,19 +57,13 @@ let hasAttr getAttr optional optionals optionalString optionalAttrs maintainers platforms; # Dependencies that are required to build kernel modules - moduleBuildDependencies = optional (lib.versionAtLeast version "4.14") libelf; - - installkernel = writeTextFile { name = "installkernel"; executable=true; text = '' - #!${stdenv.shell} -e - mkdir -p $4 - cp -av $2 $4 - cp -av $3 $4 - ''; }; - - commonMakeFlags = [ - "O=$(buildRoot)" - ] ++ lib.optionals (stdenv.hostPlatform.linux-kernel ? makeFlags) - stdenv.hostPlatform.linux-kernel.makeFlags; + moduleBuildDependencies = [ + pahole + perl + libelf + # module makefiles often run uname commands to find out the kernel version + (buildPackages.deterministic-uname.override { inherit modDirVersion; }) + ] ++ optional (lib.versionAtLeast version "5.13") zstd; drvAttrs = config_: kernelConf: kernelPatches: configfile: let @@ -88,17 +85,17 @@ let isModular = config.isYes "MODULES"; - installsFirmware = (config.isEnabled "FW_LOADER") && - (isModular || (config.isDisabled "FIRMWARE_IN_KERNEL")) && - (lib.versionOlder version "4.14"); + buildDTBs = kernelConf.DTB or false; + in (optionalAttrs isModular { outputs = [ "out" "dev" ]; }) // { - passthru = { + passthru = rec { inherit version modDirVersion config kernelPatches configfile moduleBuildDependencies stdenv; inherit isZen isHardened isLibre; isXen = lib.warn "The isXen attribute is deprecated. All Nixpkgs kernels that support it now have Xen enabled." true; - kernelOlder = lib.versionOlder version; - kernelAtLeast = lib.versionAtLeast version; + baseVersion = lib.head (lib.splitString "-rc" version); + kernelOlder = lib.versionOlder baseVersion; + kernelAtLeast = lib.versionAtLeast baseVersion; }; inherit src; @@ -106,16 +103,22 @@ let patches = map (p: p.patch) kernelPatches # Required for deterministic builds along with some postPatch magic. - ++ optional (lib.versionAtLeast version "4.13") ./randstruct-provide-seed.patch - # Fixes determinism by normalizing metadata for the archive of kheaders - ++ optional (lib.versionAtLeast version "5.2" && lib.versionOlder version "5.4") ./gen-kheaders-metadata.patch; - - prePatch = '' - for mf in $(find -name Makefile -o -name Makefile.include -o -name install.sh); do - echo "stripping FHS paths in \`$mf'..." - sed -i "$mf" -e 's|/usr/bin/||g ; s|/bin/||g ; s|/sbin/||g' - done - sed -i Makefile -e 's|= depmod|= ${buildPackages.kmod}/bin/depmod|' + ++ optional (lib.versionOlder version "5.19") ./randstruct-provide-seed.patch + ++ optional (lib.versionAtLeast version "5.19") ./randstruct-provide-seed-5.19.patch + # Linux 5.12 marked certain PowerPC-only symbols as GPL, which breaks + # OpenZFS; this was fixed in Linux 5.19 so we backport the fix + # https://github.com/openzfs/zfs/pull/13367 + ++ optional (lib.versionAtLeast version "5.12" && + lib.versionOlder version "5.19" && + stdenv.hostPlatform.isPower) + (fetchpatch { + url = "https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git/patch/?id=d9e5c3e9e75162f845880535957b7fd0b4637d23"; + hash = "sha256-bBOyJcP6jUvozFJU0SPTOf3cmnTQ6ZZ4PlHjiniHXLU="; + }); + + postPatch = '' + # Ensure that depmod gets resolved through PATH + sed -i Makefile -e 's|= /sbin/depmod|= depmod|' # Don't include a (random) NT_GNU_BUILD_ID, to make the build more deterministic. # This way kernels can be bit-by-bit reproducible depending on settings @@ -123,20 +126,29 @@ let # See also https://kernelnewbies.org/BuildId sed -i Makefile -e 's|--build-id=[^ ]*|--build-id=none|' - patchShebangs scripts - ''; + # Some linux-hardened patches now remove certain files in the scripts directory, so the file may not exist. + [[ -f scripts/ld-version.sh ]] && patchShebangs scripts/ld-version.sh - postPatch = '' # Set randstruct seed to a deterministic but diversified value. Note: # we could have instead patched gen-random-seed.sh to take input from # the buildFlags, but that would require also patching the kernel's # toplevel Makefile to add a variable export. This would be likely to # cause future patch conflicts. - if [ -f scripts/gcc-plugins/gen-random-seed.sh ]; then - substituteInPlace scripts/gcc-plugins/gen-random-seed.sh \ - --replace NIXOS_RANDSTRUCT_SEED \ - $(echo ${randstructSeed}${src} ${configfile} | sha256sum | cut -d ' ' -f 1 | tr -d '\n') - fi + for file in scripts/gen-randstruct-seed.sh scripts/gcc-plugins/gen-random-seed.sh; do + if [ -f "$file" ]; then + substituteInPlace "$file" \ + --replace NIXOS_RANDSTRUCT_SEED \ + $(echo ${randstructSeed}${src} ${placeholder "configfile"} | sha256sum | cut -d ' ' -f 1 | tr -d '\n') + break + fi + done + + patchShebangs scripts + + # also patch arch-specific install scripts + for i in $(find arch -name install.sh); do + patchShebangs "$i" + done ''; configurePhase = '' @@ -165,7 +177,6 @@ let exit 1 fi - # Note: we can get rid of this once http://permalink.gmane.org/gmane.linux.kbuild.devel/13800 is merged. buildFlagsArray+=("KBUILD_BUILD_TIMESTAMP=$(date -u -d @$SOURCE_DATE_EPOCH)") cd $buildRoot @@ -175,18 +186,69 @@ let "KBUILD_BUILD_VERSION=1-NixOS" kernelConf.target "vmlinux" # for "perf" and things like that - ] - ++ optional isModular "modules" + ] ++ optional isModular "modules" + ++ optionals buildDTBs ["dtbs" "DTC_FLAGS=-@"] ++ extraMakeFlags; installFlags = [ - "INSTALLKERNEL=${installkernel}" "INSTALL_PATH=$(out)" ] ++ (optional isModular "INSTALL_MOD_PATH=$(out)") - ++ optional installsFirmware "INSTALL_FW_PATH=$(out)/lib/firmware"; - - preInstall = '' + ++ optionals buildDTBs ["dtbs_install" "INSTALL_DTBS_PATH=$(out)/dtbs"]; + + preInstall = let + # All we really need to do here is copy the final image and System.map to $out, + # and use the kernel's modules_install, firmware_install, dtbs_install, etc. targets + # for the rest. Easy, right? + # + # Unfortunately for us, the obvious way of getting the built image path, + # make -s image_name, does not work correctly, because some architectures + # (*cough* aarch64 *cough*) change KBUILD_IMAGE on the fly in their install targets, + # so we end up attempting to install the thing we didn't actually build. + # + # Thankfully, there's a way out that doesn't involve just hardcoding everything. + # + # The kernel has an install target, which runs a pretty simple shell script + # (located at scripts/install.sh or arch/$arch/boot/install.sh, depending on + # which kernel version you're looking at) that tries to do something sensible. + # + # (it would be great to hijack this script immediately, as it has all the + # information we need passed to it and we don't need it to try and be smart, + # but unfortunately, the exact location of the scripts differs between kernel + # versions, and they're seemingly not considered to be public API at all) + # + # One of the ways it tries to discover what "something sensible" actually is + # is by delegating to what's supposed to be a user-provided install script + # located at ~/bin/installkernel. + # + # (the other options are: + # - a distribution-specific script at /sbin/installkernel, + # which we can't really create in the sandbox easily + # - an architecture-specific script at arch/$arch/boot/install.sh, + # which attempts to guess _something_ and usually guesses very wrong) + # + # More specifically, the install script exec's into ~/bin/installkernel, if one + # exists, with the following arguments: + # + # $1: $KERNELRELEASE - full kernel version string + # $2: $KBUILD_IMAGE - the final image path + # $3: System.map - path to System.map file, seemingly hardcoded everywhere + # $4: $INSTALL_PATH - path to the destination directory as specified in installFlags + # + # $2 is exactly what we want, so hijack the script and use the knowledge given to it + # by the makefile overlords for our own nefarious ends. + # + # Note that the makefiles specifically look in ~/bin/installkernel, and + # writeShellScriptBin writes the script to <store path>/bin/installkernel, + # so HOME needs to be set to just the store path. + # + # FIXME: figure out a less roundabout way of doing this. + installkernel = buildPackages.writeShellScriptBin "installkernel" '' + cp -av $2 $4 + cp -av $3 $4 + ''; + in '' installFlagsArray+=("-j$NIX_BUILD_CORES") + export HOME=${installkernel} ''; # Some image types need special install targets (e.g. uImage is installed with make uinstall) @@ -197,11 +259,7 @@ let else "install")) ]; - postInstall = (optionalString installsFirmware '' - mkdir -p $out/lib/firmware - '') + (if (kernelConf.DTB or false) then '' - make $makeFlags "''${makeFlagsArray[@]}" dtbs dtbs_install INSTALL_DTBS_PATH=$out/dtbs - '' else "") + (if isModular then '' + postInstall = optionalString isModular '' mkdir -p $dev cp vmlinux $dev/ if [ -z "''${dontStrip-}" ]; then @@ -210,7 +268,7 @@ let make modules_install $makeFlags "''${makeFlagsArray[@]}" \ $installFlags "''${installFlagsArray[@]}" unlink $out/lib/modules/${modDirVersion}/build - unlink $out/lib/modules/${modDirVersion}/source + rm -f $out/lib/modules/${modDirVersion}/source mkdir -p $dev/lib/modules/${modDirVersion}/{build,source} @@ -261,8 +319,7 @@ let find . -type f -name '*.lds' -print0 | xargs -0 -r chmod u-w # Keep root and arch-specific Makefiles - chmod u-w Makefile - chmod u-w arch/$arch/Makefile* + chmod u-w Makefile arch/"$arch"/Makefile* # Keep whole scripts dir chmod u-w -R scripts @@ -272,13 +329,7 @@ let # Delete empty directories find -empty -type d -delete - - # Remove reference to kmod - sed -i Makefile -e 's|= ${buildPackages.kmod}/bin/depmod|= depmod|' - '' else optionalString installsFirmware '' - make firmware_install $makeFlags "''${makeFlagsArray[@]}" \ - $installFlags "''${installFlagsArray[@]}" - ''); + ''; requiredSystemFeatures = [ "big-parallel" ]; @@ -291,17 +342,19 @@ let + ")"); license = lib.licenses.gpl2Only; homepage = "https://www.kernel.org/"; - repositories.git = "https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git"; - maintainers = [ + maintainers = lib.teams.linux-kernel.members ++ [ maintainers.thoughtpolice ]; platforms = platforms.linux; + badPlatforms = + lib.optionals (lib.versionOlder version "4.15") [ "riscv32-linux" "riscv64-linux" ] ++ + lib.optional (lib.versionOlder version "5.19") "loongarch64-linux"; timeout = 14400; # 4 hours } // extraMeta; }; in -assert (lib.versionAtLeast version "4.14" && lib.versionOlder version "5.8") -> libelf != null; +assert lib.versionOlder version "5.8" -> libelf != null; assert lib.versionAtLeast version "5.8" -> elfutils != null; stdenv.mkDerivation ((drvAttrs config stdenv.hostPlatform.linux-kernel kernelPatches configfile) // { @@ -311,25 +364,26 @@ stdenv.mkDerivation ((drvAttrs config stdenv.hostPlatform.linux-kernel kernelPat enableParallelBuilding = true; depsBuildBuild = [ buildPackages.stdenv.cc ]; - nativeBuildInputs = [ perl bc nettools openssl rsync gmp libmpc mpfr gawk zstd python3Minimal ] - ++ optional (stdenv.hostPlatform.linux-kernel.target == "uImage") buildPackages.ubootTools - ++ optional (lib.versionAtLeast version "4.14" && lib.versionOlder version "5.8") libelf - # Removed util-linuxMinimal since it should not be a dependency. + nativeBuildInputs = [ perl bc nettools openssl rsync gmp libmpc mpfr zstd python3Minimal kmod ubootTools ] + ++ optional (lib.versionOlder version "5.8") libelf ++ optionals (lib.versionAtLeast version "4.16") [ bison flex ] - ++ optional (lib.versionAtLeast version "5.2") cpio + ++ optionals (lib.versionAtLeast version "5.2") [ cpio pahole zlib ] ++ optional (lib.versionAtLeast version "5.8") elfutils ; hardeningDisable = [ "bindnow" "format" "fortify" "stackprotector" "pic" "pie" ]; # Absolute paths for compilers avoid any PATH-clobbering issues. - makeFlags = commonMakeFlags ++ [ + makeFlags = [ + "O=$(buildRoot)" "CC=${stdenv.cc}/bin/${stdenv.cc.targetPrefix}cc" "HOSTCC=${buildPackages.stdenv.cc}/bin/${buildPackages.stdenv.cc.targetPrefix}cc" + "HOSTLD=${buildPackages.stdenv.cc.bintools}/bin/${buildPackages.stdenv.cc.targetPrefix}ld" "ARCH=${stdenv.hostPlatform.linuxArch}" - ] ++ lib.optional (stdenv.hostPlatform != stdenv.buildPlatform) [ + ] ++ lib.optionals (stdenv.hostPlatform != stdenv.buildPlatform) [ "CROSS_COMPILE=${stdenv.cc.targetPrefix}" - ] ++ extraMakeFlags; + ] ++ (stdenv.hostPlatform.linux-kernel.makeFlags or []) + ++ extraMakeFlags; karch = stdenv.hostPlatform.linuxArch; -}) +} // (optionalAttrs (pos != null) { inherit pos; }))) diff --git a/pkgs/os-specific/linux/kernel/p9-fixes.patch b/pkgs/os-specific/linux/kernel/p9-fixes.patch deleted file mode 100644 index f6061b60667..00000000000 --- a/pkgs/os-specific/linux/kernel/p9-fixes.patch +++ /dev/null @@ -1,85 +0,0 @@ -diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c ---- a/fs/9p/vfs_inode.c -+++ b/fs/9p/vfs_inode.c -@@ -483,6 +483,9 @@ static int v9fs_test_inode(struct inode *inode, void *data) - - if (v9inode->qid.type != st->qid.type) - return 0; -+ -+ if (v9inode->qid.path != st->qid.path) -+ return 0; - return 1; - } - -diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c ---- a/fs/9p/vfs_inode_dotl.c -+++ b/fs/9p/vfs_inode_dotl.c -@@ -87,6 +87,9 @@ static int v9fs_test_inode_dotl(struct inode *inode, void *data) - - if (v9inode->qid.type != st->qid.type) - return 0; -+ -+ if (v9inode->qid.path != st->qid.path) -+ return 0; - return 1; - } - -diff --git a/net/9p/client.c b/net/9p/client.c -index 3ce672af1596..f1c8ad373f90 100644 ---- a/net/9p/client.c -+++ b/net/9p/client.c -@@ -749,8 +749,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) - } - again: - /* Wait for the response */ -- err = wait_event_interruptible(*req->wq, -- req->status >= REQ_STATUS_RCVD); -+ err = wait_event_killable(*req->wq, req->status >= REQ_STATUS_RCVD); - - /* - * Make sure our req is coherent with regard to updates in other -diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c -index f24b25c25106..f3a4efcf1456 100644 ---- a/net/9p/trans_virtio.c -+++ b/net/9p/trans_virtio.c -@@ -286,8 +286,8 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req) - if (err == -ENOSPC) { - chan->ring_bufs_avail = 0; - spin_unlock_irqrestore(&chan->lock, flags); -- err = wait_event_interruptible(*chan->vc_wq, -- chan->ring_bufs_avail); -+ err = wait_event_killable(*chan->vc_wq, -+ chan->ring_bufs_avail); - if (err == -ERESTARTSYS) - return err; - -@@ -327,7 +327,7 @@ static int p9_get_mapped_pages(struct virtio_chan *chan, - * Other zc request to finish here - */ - if (atomic_read(&vp_pinned) >= chan->p9_max_pages) { -- err = wait_event_interruptible(vp_wq, -+ err = wait_event_killable(vp_wq, - (atomic_read(&vp_pinned) < chan->p9_max_pages)); - if (err == -ERESTARTSYS) - return err; -@@ -471,8 +471,8 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req, - if (err == -ENOSPC) { - chan->ring_bufs_avail = 0; - spin_unlock_irqrestore(&chan->lock, flags); -- err = wait_event_interruptible(*chan->vc_wq, -- chan->ring_bufs_avail); -+ err = wait_event_killable(*chan->vc_wq, -+ chan->ring_bufs_avail); - if (err == -ERESTARTSYS) - goto err_out; - -@@ -489,8 +489,7 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req, - virtqueue_kick(chan->vq); - spin_unlock_irqrestore(&chan->lock, flags); - p9_debug(P9_DEBUG_TRANS, "virtio request kicked\n"); -- err = wait_event_interruptible(*req->wq, -- req->status >= REQ_STATUS_RCVD); -+ err = wait_event_killable(*req->wq, req->status >= REQ_STATUS_RCVD); - /* - * Non kernel buffers are pinned, unpin them - */ diff --git a/pkgs/os-specific/linux/kernel/patches.nix b/pkgs/os-specific/linux/kernel/patches.nix index f41cedca0f6..5d4ebc214dc 100644 --- a/pkgs/os-specific/linux/kernel/patches.nix +++ b/pkgs/os-specific/linux/kernel/patches.nix @@ -19,6 +19,16 @@ patch = ./bridge-stp-helper.patch; }; + # Reverts the buggy commit causing https://bugzilla.kernel.org/show_bug.cgi?id=217802 + dell_xps_regression = { + name = "dell_xps_regression"; + patch = fetchpatch { + name = "Revert-101bd907b424-misc-rtsx-judge-ASPM-Mode-to-set.patch"; + url = "https://raw.githubusercontent.com/openSUSE/kernel-source/1b02b1528a26f4e9b577e215c114d8c5e773ee10/patches.suse/Revert-101bd907b424-misc-rtsx-judge-ASPM-Mode-to-set.patch"; + sha256 = "sha256-RHJdQ4p0msTOVPR+/dYiKuwwEoG9IpIBqT4dc5cJjf8="; + }; + }; + request_key_helper = { name = "request-key-helper"; patch = ./request-key-helper.patch; @@ -29,62 +39,21 @@ patch = ./request-key-helper-updated.patch; }; - p9_fixes = - { name = "p9-fixes"; - patch = ./p9-fixes.patch; - }; - modinst_arg_list_too_long = { name = "modinst-arglist-too-long"; patch = ./modinst-arg-list-too-long.patch; }; - genksyms_fix_segfault = - { name = "genksyms-fix-segfault"; - patch = ./genksyms-fix-segfault.patch; - }; - - cpu-cgroup-v2 = import ./cpu-cgroup-v2-patches; - hardened = let - mkPatch = kernelVersion: src: { + mkPatch = kernelVersion: { version, sha256, patch }: let src = patch; in { name = lib.removeSuffix ".patch" src.name; patch = fetchurl (lib.filterAttrs (k: v: k != "extra") src); extra = src.extra; + inherit version sha256; }; - patches = builtins.fromJSON (builtins.readFile ./hardened/patches.json); + patches = lib.importJSON ./hardened/patches.json; in lib.mapAttrs mkPatch patches; - # https://bugzilla.kernel.org/show_bug.cgi?id=197591#c6 - iwlwifi_mvm_support_version_7_scan_req_umac_fw_command = rec { - name = "iwlwifi_mvm_support_version_7_scan_req_umac_fw_command"; - patch = fetchpatch { - name = name + ".patch"; - url = "https://bugzilla.kernel.org/attachment.cgi?id=260597"; - sha256 = "09096npxpgvlwdz3pb3m9brvxh7vy0xc9z9p8hh85xyczyzcsjhr"; - }; - }; - - # https://github.com/NixOS/nixpkgs/issues/42755 - xen-netfront_fix_mismatched_rtnl_unlock = rec { - name = "xen-netfront_fix_mismatched_rtnl_unlock"; - patch = fetchpatch { - name = name + ".patch"; - url = "https://github.com/torvalds/linux/commit/cb257783c2927b73614b20f915a91ff78aa6f3e8.patch"; - sha256 = "0xhblx2j8wi3kpnfpgjjwlcwdry97ji2aaq54r3zirk5g5p72zs8"; - }; - }; - - # https://github.com/NixOS/nixpkgs/issues/42755 - xen-netfront_update_features_after_registering_netdev = rec { - name = "xen-netfront_update_features_after_registering_netdev"; - patch = fetchpatch { - name = name + ".patch"; - url = "https://github.com/torvalds/linux/commit/45c8184c1bed1ca8a7f02918552063a00b909bf5.patch"; - sha256 = "1l8xq02rd7vakxg52xm9g4zng0ald866rpgm8kjlh88mwwyjkrwv"; - }; - }; - # Adapted for Linux 5.4 from: # https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=04896832c94aae4842100cafb8d3a73e1bed3a45 rtl8761b_support = @@ -96,10 +65,4 @@ name = "export-rt-sched-migrate"; patch = ./export-rt-sched-migrate.patch; }; - - # patches from https://lkml.org/lkml/2019/7/15/1748 - mac_nvme_t2 = rec { - name = "mac_nvme_t2"; - patch = ./mac-nvme-t2.patch; - }; } diff --git a/pkgs/os-specific/linux/kernel/perf.nix b/pkgs/os-specific/linux/kernel/perf.nix deleted file mode 100644 index b58bca352e6..00000000000 --- a/pkgs/os-specific/linux/kernel/perf.nix +++ /dev/null @@ -1,83 +0,0 @@ -{ lib, stdenv, kernel, elfutils, python2, python3, perl, newt, slang, asciidoc, xmlto, makeWrapper -, docbook_xsl, docbook_xml_dtd_45, libxslt, flex, bison, pkg-config, libunwind, binutils -, libiberty, audit, libbfd, libopcodes, openssl, systemtap, numactl -, zlib -, withGtk ? false, gtk2 -, withZstd ? true, zstd -, withLibcap ? true, libcap -}: - -with lib; - -assert versionAtLeast kernel.version "3.12"; - -stdenv.mkDerivation { - name = "perf-linux-${kernel.version}"; - - inherit (kernel) src; - - preConfigure = '' - cd tools/perf - - substituteInPlace Makefile \ - --replace /usr/include/elfutils $elfutils/include/elfutils - - for x in util/build-id.c util/dso.c; do - substituteInPlace $x --replace /usr/lib/debug /run/current-system/sw/lib/debug - done - - if [ -f bash_completion ]; then - sed -i 's,^have perf,_have perf,' bash_completion - fi - ''; - - makeFlags = ["prefix=$(out)" "WERROR=0"] ++ kernel.makeFlags; - - hardeningDisable = [ "format" ]; - - # perf refers both to newt and slang - nativeBuildInputs = [ - asciidoc xmlto docbook_xsl docbook_xml_dtd_45 libxslt - flex bison libiberty audit makeWrapper pkg-config python3 - ]; - buildInputs = [ - elfutils newt slang libunwind libbfd zlib openssl systemtap.stapBuild numactl - libopcodes python3 perl - ] ++ lib.optional withGtk gtk2 - ++ (if (versionAtLeast kernel.version "4.19") then [ python3 ] else [ python2 ]) - ++ lib.optional withZstd zstd - ++ lib.optional withLibcap libcap; - - # Note: we don't add elfutils to buildInputs, since it provides a - # bad `ld' and other stuff. - NIX_CFLAGS_COMPILE = toString [ - "-Wno-error=cpp" - "-Wno-error=bool-compare" - "-Wno-error=deprecated-declarations" - "-DOBJDUMP_PATH=\"${binutils}/bin/objdump\"" - "-Wno-error=stringop-truncation" - ]; - - postPatch = '' - patchShebangs scripts - ''; - - doCheck = false; # requires "sparse" - doInstallCheck = false; # same - - separateDebugInfo = true; - installFlags = [ "install" "install-man" "ASCIIDOC8=1" "prefix=$(out)" ]; - - preFixup = '' - wrapProgram $out/bin/perf \ - --prefix PATH : "${binutils}/bin" - ''; - - meta = { - homepage = "https://perf.wiki.kernel.org/"; - description = "Linux tools to profile with performance counters"; - maintainers = with lib.maintainers; [viric]; - platforms = with lib.platforms; linux; - broken = kernel.kernelOlder "5"; - }; -} diff --git a/pkgs/os-specific/linux/kernel/perf/default.nix b/pkgs/os-specific/linux/kernel/perf/default.nix new file mode 100644 index 00000000000..ad8f2608d93 --- /dev/null +++ b/pkgs/os-specific/linux/kernel/perf/default.nix @@ -0,0 +1,166 @@ +{ lib +, stdenv +, fetchpatch +, fetchurl +, kernel +, elfutils +, python3 +, perl +, newt +, slang +, asciidoc +, xmlto +, makeWrapper +, docbook_xsl +, docbook_xml_dtd_45 +, libxslt +, flex +, bison +, pkg-config +, libunwind +, binutils-unwrapped +, libiberty +, audit +, libbfd +, libbfd_2_38 +, libopcodes +, libopcodes_2_38 +, libpfm +, libtraceevent +, openssl +, systemtap +, numactl +, zlib +, babeltrace +, withGtk ? false +, gtk2 +, withZstd ? true +, zstd +, withLibcap ? true +, libcap +}: +let + d3-flame-graph-templates = stdenv.mkDerivation rec { + pname = "d3-flame-graph-templates"; + version = "4.1.3"; + + src = fetchurl { + url = "https://registry.npmjs.org/d3-flame-graph/-/d3-flame-graph-${version}.tgz"; + sha256 = "sha256-W5/Vh5jarXUV224aIiTB2TnBFYT3naEIcG2945QjY8Q="; + }; + + installPhase = '' + install -D -m 0755 -t $out/share/d3-flame-graph/ ./dist/templates/* + ''; + }; +in + +stdenv.mkDerivation { + pname = "perf-linux"; + version = kernel.version; + + inherit (kernel) src; + + postPatch = '' + # Linux scripts + patchShebangs scripts + patchShebangs tools/perf/check-headers.sh + '' + lib.optionalString (lib.versionAtLeast kernel.version "6.3") '' + # perf-specific scripts + patchShebangs tools/perf/pmu-events + '' + '' + cd tools/perf + + for x in util/build-id.c util/dso.c; do + substituteInPlace $x --replace /usr/lib/debug /run/current-system/sw/lib/debug + done + + '' + lib.optionalString (lib.versionAtLeast kernel.version "5.8") '' + substituteInPlace scripts/python/flamegraph.py \ + --replace "/usr/share/d3-flame-graph/d3-flamegraph-base.html" \ + "${d3-flame-graph-templates}/share/d3-flame-graph/d3-flamegraph-base.html" + + '' + lib.optionalString (lib.versionAtLeast kernel.version "6.0") '' + patchShebangs pmu-events/jevents.py + ''; + + makeFlags = [ "prefix=$(out)" "WERROR=0" "ASCIIDOC8=1" ] ++ kernel.makeFlags + ++ lib.optional (!withGtk) "NO_GTK2=1" + ++ lib.optional (!withZstd) "NO_LIBZSTD=1" + ++ lib.optional (!withLibcap) "NO_LIBCAP=1"; + + hardeningDisable = [ "format" ]; + + # perf refers both to newt and slang + nativeBuildInputs = [ + asciidoc + xmlto + docbook_xsl + docbook_xml_dtd_45 + libxslt + flex + bison + libiberty + audit + makeWrapper + pkg-config + python3 + ]; + + buildInputs = [ + elfutils + newt + slang + libtraceevent + libunwind + zlib + openssl + numactl + python3 + perl + babeltrace + ] ++ (if (lib.versionAtLeast kernel.version "5.19") + then [ libbfd libopcodes ] + else [ libbfd_2_38 libopcodes_2_38 ]) + ++ lib.optional (lib.meta.availableOn stdenv.hostPlatform systemtap) systemtap.stapBuild + ++ lib.optional withGtk gtk2 + ++ lib.optional withZstd zstd + ++ lib.optional withLibcap libcap + ++ lib.optional (lib.versionAtLeast kernel.version "5.8") libpfm + ++ lib.optional (lib.versionAtLeast kernel.version "6.0") python3.pkgs.setuptools; + + env.NIX_CFLAGS_COMPILE = toString [ + "-Wno-error=cpp" + "-Wno-error=bool-compare" + "-Wno-error=deprecated-declarations" + "-Wno-error=stringop-truncation" + ]; + + doCheck = false; # requires "sparse" + + installTargets = [ "install" "install-man" ]; + + # TODO: Add completions based on perf-completion.sh + postInstall = '' + # Same as perf. Remove. + rm -f $out/bin/trace + ''; + + separateDebugInfo = true; + + preFixup = '' + # Pull in 'objdump' into PATH to make annotations work. + # The embedded Python interpreter will search PATH to calculate the Python path configuration(Should be fixed by upstream). + # Add python.interpreter to PATH for now. + wrapProgram $out/bin/perf \ + --prefix PATH : ${lib.makeBinPath [ binutils-unwrapped python3 ]} + ''; + + meta = with lib; { + homepage = "https://perf.wiki.kernel.org/"; + description = "Linux tools to profile with performance counters"; + maintainers = with maintainers; [ viric ]; + platforms = platforms.linux; + broken = kernel.kernelOlder "5"; + }; +} diff --git a/pkgs/os-specific/linux/kernel/randstruct-provide-seed-5.19.patch b/pkgs/os-specific/linux/kernel/randstruct-provide-seed-5.19.patch new file mode 100644 index 00000000000..5ca897a76bf --- /dev/null +++ b/pkgs/os-specific/linux/kernel/randstruct-provide-seed-5.19.patch @@ -0,0 +1,13 @@ +diff --git a/scripts/gen-randstruct-seed.sh b/scripts/gen-randstruct-seed.sh +index 61017b36c464..7bb494dd2e18 100755 +--- a/scripts/gen-randstruct-seed.sh ++++ b/scripts/gen-randstruct-seed.sh +@@ -1,7 +1,7 @@ + #!/bin/sh + # SPDX-License-Identifier: GPL-2.0 + +-SEED=$(od -A n -t x8 -N 32 /dev/urandom | tr -d ' \n') ++SEED="NIXOS_RANDSTRUCT_SEED" + echo "$SEED" > "$1" + HASH=$(echo -n "$SEED" | sha256sum | cut -d" " -f1) + echo "#define RANDSTRUCT_HASHED_SEED \"$HASH\"" > "$2" diff --git a/pkgs/os-specific/linux/kernel/update-mainline.py b/pkgs/os-specific/linux/kernel/update-mainline.py new file mode 100755 index 00000000000..30b9ebec984 --- /dev/null +++ b/pkgs/os-specific/linux/kernel/update-mainline.py @@ -0,0 +1,130 @@ +#!/usr/bin/env nix-shell +#!nix-shell -i python3 -p "python3.withPackages (ps: [ ps.beautifulsoup4 ps.lxml ])" +import json +import os +import pathlib +import subprocess +import sys +import urllib.request +from dataclasses import dataclass +from enum import Enum + +from bs4 import BeautifulSoup, NavigableString, Tag + +HERE = pathlib.Path(__file__).parent +ROOT = HERE.parent.parent.parent.parent +VERSIONS_FILE = HERE / "kernels-org.json" + + +class KernelNature(Enum): + MAINLINE = 1 + STABLE = 2 + LONGTERM = 3 + + +@dataclass +class KernelRelease: + nature: KernelNature + version: str + branch: str + date: str + link: str + eol: bool = False + + +def parse_release(release: Tag) -> KernelRelease | None: + columns: list[Tag] = list(release.find_all("td")) + try: + nature = KernelNature[columns[0].get_text().rstrip(":").upper()] + except KeyError: + return None + + version = columns[1].get_text().rstrip(" [EOL]") + date = columns[2].get_text() + link = columns[3].find("a") + if link is not None and isinstance(link, Tag): + link = link.attrs.get("href") + assert link is not None, f"link for kernel {version} is non-existent" + eol = bool(release.find(class_="eolkernel")) + + return KernelRelease( + nature=nature, + branch=get_branch(version), + version=version, + date=date, + link=link, + eol=eol, + ) + + +def get_branch(version: str): + # This is a testing kernel. + if "rc" in version: + return "testing" + else: + major, minor, *_ = version.split(".") + return f"{major}.{minor}" + + +def get_hash(kernel: KernelRelease): + if kernel.branch == "testing": + args = ["--unpack"] + else: + args = [] + + hash = ( + subprocess.check_output(["nix-prefetch-url", kernel.link] + args) + .decode() + .strip() + ) + return f"sha256:{hash}" + + +def commit(message): + return subprocess.check_call(["git", "commit", "-m", message, VERSIONS_FILE]) + + +def main(): + kernel_org = urllib.request.urlopen("https://kernel.org/") + soup = BeautifulSoup(kernel_org.read().decode(), "lxml") + release_table = soup.find(id="releases") + if not release_table or isinstance(release_table, NavigableString): + print(release_table, file=sys.stderr) + print("Failed to find the release table on https://kernel.org", file=sys.stderr) + sys.exit(1) + + releases = release_table.find_all("tr") + parsed_releases = filter(None, [parse_release(release) for release in releases]) + all_kernels = json.load(VERSIONS_FILE.open()) + + for kernel in parsed_releases: + branch = get_branch(kernel.version) + nixpkgs_branch = branch.replace(".", "_") + + old_version = all_kernels.get(branch, {}).get("version") + if old_version == kernel.version: + print(f"linux_{nixpkgs_branch}: {kernel.version} is latest, skipping...") + continue + + if old_version is None: + message = f"linux_{nixpkgs_branch}: init at {kernel.version}" + else: + message = f"linux_{nixpkgs_branch}: {old_version} -> {kernel.version}" + + print(message, file=sys.stderr) + + all_kernels[branch] = { + "version": kernel.version, + "hash": get_hash(kernel), + } + + with VERSIONS_FILE.open("w") as fd: + json.dump(all_kernels, fd, indent=4) + fd.write("\n") # makes editorconfig happy + + if os.environ.get("COMMIT") == "1": + commit(message) + + +if __name__ == "__main__": + main() diff --git a/pkgs/os-specific/linux/kernel/update-rt.sh b/pkgs/os-specific/linux/kernel/update-rt.sh index ccb01793342..a9e0577fae9 100755 --- a/pkgs/os-specific/linux/kernel/update-rt.sh +++ b/pkgs/os-specific/linux/kernel/update-rt.sh @@ -38,6 +38,7 @@ latest-rt-version() { curl -sL "$mirror/projects/rt/$branch/sha256sums.asc" | sed -ne '/.patch.xz/ { s/.*patch-\(.*\).patch.xz/\1/p}' | grep -v '\-rc' | + sort --version-sort | tail -n 1 } diff --git a/pkgs/os-specific/linux/kernel/update-zen.py b/pkgs/os-specific/linux/kernel/update-zen.py new file mode 100755 index 00000000000..3c51f806d8f --- /dev/null +++ b/pkgs/os-specific/linux/kernel/update-zen.py @@ -0,0 +1,122 @@ +#! /usr/bin/env nix-shell +#! nix-shell -i python3 -p python3 nix nix-prefetch-git + +import fileinput +import json +import os +import sys +import re +import subprocess + +from datetime import datetime +from urllib.request import urlopen, Request + + +def panic(exc): + raise Exception(exc) + + +DIR = os.path.dirname(os.path.abspath(__file__)) +HEADERS = {'Accept': 'application/vnd.github.v3+json'} + + +def github_api_request(endpoint): + base_url = 'https://api.github.com/' + request = Request(base_url + endpoint, headers=HEADERS) + with urlopen(request) as http_response: + return json.loads(http_response.read().decode('utf-8')) + + +def get_commit_date(repo, sha): + url = f'https://api.github.com/repos/{repo}/commits/{sha}' + request = Request(url, headers=HEADERS) + with urlopen(request) as http_response: + commit = json.loads(http_response.read().decode()) + date = commit['commit']['committer']['date'].rstrip('Z') + date = datetime.fromisoformat(date).date().isoformat() + return 'unstable-' + date + + +def nix_prefetch_git(url, rev): + """Prefetches the requested Git revision (incl. submodules) of the given repository URL.""" + print(f'nix-prefetch-git {url} {rev}') + out = subprocess.check_output([ + 'nix-prefetch-git', '--quiet', + '--url', url, + '--rev', rev, + '--fetch-submodules']) + return json.loads(out)['sha256'] + + +def nix_prefetch_url(url, unpack=False): + """Prefetches the content of the given URL.""" + print(f'nix-prefetch-url {url}') + options = ['--type', 'sha256'] + if unpack: + options += ['--unpack'] + out = subprocess.check_output(['nix-prefetch-url'] + options + [url]) + return out.decode('utf-8').rstrip() + + +def update_file(relpath, variant, version, suffix, sha256): + file_path = os.path.join(DIR, relpath) + with fileinput.FileInput(file_path, inplace=True) as f: + for line in f: + result = line + result = re.sub( + fr'^ version = ".+"; #{variant}', + f' version = "{version}"; #{variant}', + result) + result = re.sub( + fr'^ suffix = ".+"; #{variant}', + f' suffix = "{suffix}"; #{variant}', + result) + result = re.sub( + fr'^ sha256 = ".+"; #{variant}', + f' sha256 = "{sha256}"; #{variant}', + result) + print(result, end='') + + +def read_file(relpath, variant): + file_path = os.path.join(DIR, relpath) + re_version = re.compile(fr'^\s*version = "(.+)"; #{variant}') + re_suffix = re.compile(fr'^\s*suffix = "(.+)"; #{variant}') + version = None + suffix = None + with fileinput.FileInput(file_path, mode='r') as f: + for line in f: + version_match = re_version.match(line) + if version_match: + version = version_match.group(1) + continue + + suffix_match = re_suffix.match(line) + if suffix_match: + suffix = suffix_match.group(1) + continue + + if version and suffix: + break + return version, suffix + + +if __name__ == "__main__": + if len(sys.argv) == 1: + panic("Update variant expected") + variant = sys.argv[1] + if variant not in ("zen", "lqx"): + panic(f"Unexepected variant instead of 'zen' or 'lqx': {sys.argv[1]}") + pattern = re.compile(fr"v(\d+\.\d+\.?\d*)-({variant}\d+)") + zen_tags = github_api_request('repos/zen-kernel/zen-kernel/releases') + for tag in zen_tags: + zen_match = pattern.match(tag['tag_name']) + if zen_match: + zen_tag = zen_match.group(0) + zen_version = zen_match.group(1) + zen_suffix = zen_match.group(2) + break + old_version, old_suffix = read_file('zen-kernels.nix', variant) + if old_version != zen_version or old_suffix != zen_suffix: + zen_hash = nix_prefetch_git('https://github.com/zen-kernel/zen-kernel.git', zen_tag) + update_file('zen-kernels.nix', variant, zen_version, zen_suffix, zen_hash) diff --git a/pkgs/os-specific/linux/kernel/update.sh b/pkgs/os-specific/linux/kernel/update.sh index 560edced36e..37e1cc1a5cd 100755 --- a/pkgs/os-specific/linux/kernel/update.sh +++ b/pkgs/os-specific/linux/kernel/update.sh @@ -1,68 +1,14 @@ #!/usr/bin/env bash -set -e +cd "$(dirname "$(readlink -f "$0")")" || exit -# Get the latest versions from kernel.org -LINUXSED='s/.*linux-\([0-9]\+\(.[0-9]\+\)*\).*/\1/p' -KDATA="$(curl -s https://www.kernel.org | sed -n -e '/Download complete/p')" -VERSIONS=($(sed -n -e $LINUXSED <<< "$KDATA" | sort -Vr)) +echo "Update linux (mainline)" +COMMIT=1 ./update-mainline.py || echo "update-mainline failed with exit code $?" -# Remove mainline version if there is a stable update -# Note due to sorting these two will always exist at the bottom -if grep -q "^${VERSIONS[1]}" <<< "${VERSIONS[0]}"; then - VERSIONS=(${VERSIONS[@]:0:1} ${VERSIONS[@]:2}) -fi +echo "Update linux-rt" +COMMIT=1 ./update-rt.sh || echo "update-rt failed with exit code $?" -# Inspect each file and see if it has the latest version -NIXPKGS="$(git rev-parse --show-toplevel)" -ls $NIXPKGS/pkgs/os-specific/linux/kernel | while read FILE; do - KERNEL="$(sed -n -e $LINUXSED <<< "$FILE")" - [ -z "$KERNEL" ] && continue +echo "Update linux-libre" +COMMIT=1 ./update-libre.sh || echo "update-libre failed with exit code $?" - # Find the matching new kernel version - MATCHING="" - for V in "${VERSIONS[@]}"; do - if grep -q "^$KERNEL" <<< "$V"; then - MATCHING="$V" - break - fi - done - if [ -z "$MATCHING" ]; then - echo "Out-of-support $KERNEL" - continue - fi - - # Inspect the nix expression to check for changes - DATA="$(<$NIXPKGS/pkgs/os-specific/linux/kernel/$FILE)" - URL="$(sed -n -e 's/.*url = "\(.*\)";.*/\1/p' <<< "$DATA" | sed -e "s/\${version}/$MATCHING/g")" - OLDVER=$(sed -n -e 's/.*version = "\(.*\)".*/\1/p' <<< "$DATA") - if [ "$OLDVER" = "$V" ]; then - echo "No updates for $KERNEL" - continue - fi - - # Download the new file for the hash - if ! HASH="$(nix-prefetch-url $URL 2>/dev/null)"; then - echo "Failed to get hash of $URL" - continue - fi - sed -i -e "s/sha256 = \".*\"/sha256 = \"$HASH\"/g" $NIXPKGS/pkgs/os-specific/linux/kernel/$FILE - - # Rewrite the expression - sed -i -e '/version = /d' $NIXPKGS/pkgs/os-specific/linux/kernel/$FILE - sed -i -e "\#buildLinux (args // rec {#a \ version = \"$V\";" $NIXPKGS/pkgs/os-specific/linux/kernel/$FILE - - # Commit the changes - git add -u $NIXPKGS/pkgs/os-specific/linux/kernel/$FILE - git commit -m "linux: $OLDVER -> $V" >/dev/null 2>&1 - - echo "Updated $OLDVER -> $V" -done - -# Update linux-rt -COMMIT=1 $NIXPKGS/pkgs/os-specific/linux/kernel/update-rt.sh - -# Update linux-libre -COMMIT=1 $NIXPKGS/pkgs/os-specific/linux/kernel/update-libre.sh - -# Update linux-hardened -COMMIT=1 $NIXPKGS/pkgs/os-specific/linux/kernel/hardened/update.py +echo "Update linux-hardened" +COMMIT=1 ./hardened/update.py || echo "update-hardened failed with exit code $?" diff --git a/pkgs/os-specific/linux/kernel/xanmod-kernels.nix b/pkgs/os-specific/linux/kernel/xanmod-kernels.nix new file mode 100644 index 00000000000..4f967734d5e --- /dev/null +++ b/pkgs/os-specific/linux/kernel/xanmod-kernels.nix @@ -0,0 +1,56 @@ +{ lib, stdenv, fetchFromGitHub, buildLinux, ... } @ args: + +let + # These names are how they are designated in https://xanmod.org. + + # NOTE: When updating these, please also take a look at the changes done to + # kernel config in the xanmod version commit + ltsVariant = { + version = "6.1.62"; + hash = "sha256-fo5OQ/MZ+QVdCmLzX0OgFUBedfqrkqp+Ev081RVdtWw="; + variant = "lts"; + }; + + mainVariant = { + version = "6.5.11"; + hash = "sha256-1bb5LG6JvqX5eNSe2Xyu86HxaqkUVkKUf1H3T7bFkGE="; + variant = "main"; + }; + + xanmodKernelFor = { version, suffix ? "xanmod1", hash, variant }: buildLinux (args // rec { + inherit version; + modDirVersion = lib.versions.pad 3 "${version}-${suffix}"; + + src = fetchFromGitHub { + owner = "xanmod"; + repo = "linux"; + rev = modDirVersion; + inherit hash; + }; + + structuredExtraConfig = with lib.kernel; { + # Google's BBRv3 TCP congestion Control + TCP_CONG_BBR = yes; + DEFAULT_BBR = yes; + + # WineSync driver for fast kernel-backed Wine + WINESYNC = module; + + # Preemptive Full Tickless Kernel at 250Hz + HZ = freeform "250"; + HZ_250 = yes; + HZ_1000 = no; + }; + + extraMeta = { + branch = lib.versions.majorMinor version; + maintainers = with lib.maintainers; [ moni lovesegfault atemu shawn8901 zzzsy ]; + description = "Built with custom settings and new features built to provide a stable, responsive and smooth desktop experience"; + broken = stdenv.isAarch64; + }; + } // (args.argsOverride or { })); +in +{ + lts = xanmodKernelFor ltsVariant; + main = xanmodKernelFor mainVariant; +} diff --git a/pkgs/os-specific/linux/kernel/zen-kernels.nix b/pkgs/os-specific/linux/kernel/zen-kernels.nix new file mode 100644 index 00000000000..456a6c7c27d --- /dev/null +++ b/pkgs/os-specific/linux/kernel/zen-kernels.nix @@ -0,0 +1,116 @@ +{ lib, stdenv, fetchFromGitHub, buildLinux, ... } @ args: + +let + # comments with variant added for update script + # ./update-zen.py zen + zenVariant = { + version = "6.6.1"; #zen + suffix = "zen1"; #zen + sha256 = "13m820wggf6pkp351w06mdn2lfcwbn08ydwksyxilqb88vmr0lpq"; #zen + isLqx = false; + }; + # ./update-zen.py lqx + lqxVariant = { + version = "6.5.11"; #lqx + suffix = "lqx2"; #lqx + sha256 = "0rak2ald95bwb5qlp8pf2g93a0gkv8rypiv5s8dpds3cilwmxrg9"; #lqx + isLqx = true; + }; + zenKernelsFor = { version, suffix, sha256, isLqx }: buildLinux (args // { + inherit version; + modDirVersion = lib.versions.pad 3 "${version}-${suffix}"; + isZen = true; + + src = fetchFromGitHub { + owner = "zen-kernel"; + repo = "zen-kernel"; + rev = "v${version}-${suffix}"; + inherit sha256; + }; + + # This is based on the following sources: + # - zen: https://gitlab.archlinux.org/archlinux/packaging/packages/linux-zen/-/blob/main/config + # - lqx: https://github.com/damentz/liquorix-package/blob/6.4/master/linux-liquorix/debian/config/kernelarch-x86/config-arch-64 + # - Liquorix features: https://liquorix.net/ + # The list below is not exhaustive, so the kernels probably doesn't match + # the upstream, but should bring most of the improvements that will be + # expected by users + structuredExtraConfig = with lib.kernel; { + # Zen Interactive tuning + ZEN_INTERACTIVE = yes; + + # FQ-Codel Packet Scheduling + NET_SCH_DEFAULT = yes; + DEFAULT_FQ_CODEL = yes; + DEFAULT_NET_SCH = freeform "fq_codel"; + + # Preempt (low-latency) + PREEMPT = lib.mkOverride 60 yes; + PREEMPT_VOLUNTARY = lib.mkOverride 60 no; + + # Preemptible tree-based hierarchical RCU + TREE_RCU = yes; + PREEMPT_RCU = yes; + RCU_EXPERT = yes; + TREE_SRCU = yes; + TASKS_RCU_GENERIC = yes; + TASKS_RCU = yes; + TASKS_RUDE_RCU = yes; + TASKS_TRACE_RCU = yes; + RCU_STALL_COMMON = yes; + RCU_NEED_SEGCBLIST = yes; + RCU_FANOUT = freeform "64"; + RCU_FANOUT_LEAF = freeform "16"; + RCU_BOOST = yes; + RCU_BOOST_DELAY = freeform "500"; + RCU_NOCB_CPU = yes; + RCU_LAZY = yes; + + # Futex WAIT_MULTIPLE implementation for Wine / Proton Fsync. + FUTEX = yes; + FUTEX_PI = yes; + + # Preemptive Full Tickless Kernel at 1000Hz + HZ = freeform "1000"; + HZ_1000 = yes; + } // lib.optionalAttrs (isLqx) { + # Google's BBRv3 TCP congestion Control + TCP_CONG_BBR = yes; + DEFAULT_BBR = yes; + DEFAULT_TCP_CONG = freeform "bbr"; + + # PDS Process Scheduler + SCHED_ALT = yes; + SCHED_PDS = yes; + + # Swap storage is compressed with LZ4 using zswap + ZSWAP_COMPRESSOR_DEFAULT_LZ4 = yes; + ZSWAP_COMPRESSOR_DEFAULT = freeform "lz4"; + + # Fix error: unused option: XXX. + CFS_BANDWIDTH = lib.mkForce (option no); + PSI = lib.mkForce (option no); + RT_GROUP_SCHED = lib.mkForce (option no); + SCHED_AUTOGROUP = lib.mkForce (option no); + SCHED_CORE = lib.mkForce (option no); + + # ERROR: modpost: "sched_numa_hop_mask" [drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.ko] undefined! + MLX5_CORE = no; + }; + + passthru.updateScript = [ ./update-zen.py (if isLqx then "lqx" else "zen") ]; + + extraMeta = { + branch = lib.versions.majorMinor version + "/master"; + maintainers = with lib.maintainers; [ thiagokokada jerrysm64 ]; + description = "Built using the best configuration and kernel sources for desktop, multimedia, and gaming workloads." + + lib.optionalString isLqx " (Same as linux_zen, but less aggressive release schedule and additional extra config)"; + broken = stdenv.isAarch64; + }; + + } // (args.argsOverride or { })); +in +{ + zen = zenKernelsFor zenVariant; + lqx = zenKernelsFor lqxVariant; +} |