diff options
author | Robin Gloster <mail@glob.in> | 2017-08-11 19:11:48 +0200 |
---|---|---|
committer | Robin Gloster <mail@glob.in> | 2017-08-11 19:13:09 +0200 |
commit | 05b8cae9ec88156f46d9b98424643b6208664222 (patch) | |
tree | a338375857defea922108d3789254ceac5077368 | |
parent | 9f3f575ab3eb4cd6b5f8de20e6f1a5374fedef2c (diff) | |
download | nixpkgs-05b8cae9ec88156f46d9b98424643b6208664222.tar nixpkgs-05b8cae9ec88156f46d9b98424643b6208664222.tar.gz nixpkgs-05b8cae9ec88156f46d9b98424643b6208664222.tar.bz2 nixpkgs-05b8cae9ec88156f46d9b98424643b6208664222.tar.lz nixpkgs-05b8cae9ec88156f46d9b98424643b6208664222.tar.xz nixpkgs-05b8cae9ec88156f46d9b98424643b6208664222.tar.zst nixpkgs-05b8cae9ec88156f46d9b98424643b6208664222.zip |
linux: remove unused kernel patches
-rw-r--r-- | pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/4.4.patch | 407 | ||||
-rw-r--r-- | pkgs/os-specific/linux/kernel/crc-regression.patch | 24 | ||||
-rw-r--r-- | pkgs/os-specific/linux/kernel/multithreaded-rsapubkey-asn1.patch | 45 | ||||
-rw-r--r-- | pkgs/os-specific/linux/kernel/no-xsave.patch | 85 | ||||
-rw-r--r-- | pkgs/os-specific/linux/kernel/patches.nix | 72 | ||||
-rw-r--r-- | pkgs/os-specific/linux/kernel/perf.diff | 18 | ||||
-rw-r--r-- | pkgs/os-specific/linux/kernel/perf.nix | 1 | ||||
-rw-r--r-- | pkgs/os-specific/linux/kernel/ubuntu-fan-4.4.patch | 1240 | ||||
-rw-r--r-- | pkgs/os-specific/linux/kernel/ubuntu-unprivileged-overlayfs.patch | 69 |
9 files changed, 0 insertions, 1961 deletions
diff --git a/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/4.4.patch b/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/4.4.patch deleted file mode 100644 index 8f2418c9efc..00000000000 --- a/pkgs/os-specific/linux/kernel/cpu-cgroup-v2-patches/4.4.patch +++ /dev/null @@ -1,407 +0,0 @@ -commit e7cae741f6d645ac68fe8823ca6ef45dbbf6891b -Author: Tejun Heo <tj@kernel.org> -Date: Fri Mar 11 07:31:23 2016 -0500 - - sched: Misc preps for cgroup unified hierarchy interface - - Make the following changes in preparation for the cpu controller - interface implementation for the unified hierarchy. This patch - doesn't cause any functional differences. - - * s/cpu_stats_show()/cpu_cfs_stats_show()/ - - * s/cpu_files/cpu_legacy_files/ - - * Separate out cpuacct_stats_read() from cpuacct_stats_show(). While - at it, remove pointless cpuacct_stat_desc[] array. - - Signed-off-by: Tejun Heo <tj@kernel.org> - Cc: Ingo Molnar <mingo@redhat.com> - Cc: Peter Zijlstra <peterz@infradead.org> - Cc: Li Zefan <lizefan@huawei.com> - Cc: Johannes Weiner <hannes@cmpxchg.org> - -diff --git a/kernel/sched/core.c b/kernel/sched/core.c -index 732e993..77f3ddd 100644 ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -8512,7 +8512,7 @@ static int __cfs_schedulable(struct task_group *tg, u64 period, u64 quota) - return ret; - } - --static int cpu_stats_show(struct seq_file *sf, void *v) -+static int cpu_cfs_stats_show(struct seq_file *sf, void *v) - { - struct task_group *tg = css_tg(seq_css(sf)); - struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth; -@@ -8552,7 +8552,7 @@ static u64 cpu_rt_period_read_uint(struct cgroup_subsys_state *css, - } - #endif /* CONFIG_RT_GROUP_SCHED */ - --static struct cftype cpu_files[] = { -+static struct cftype cpu_legacy_files[] = { - #ifdef CONFIG_FAIR_GROUP_SCHED - { - .name = "shares", -@@ -8573,7 +8573,7 @@ static struct cftype cpu_files[] = { - }, - { - .name = "stat", -- .seq_show = cpu_stats_show, -+ .seq_show = cpu_cfs_stats_show, - }, - #endif - #ifdef CONFIG_RT_GROUP_SCHED -@@ -8599,7 +8599,7 @@ struct cgroup_subsys cpu_cgrp_subsys = { - .fork = cpu_cgroup_fork, - .can_attach = cpu_cgroup_can_attach, - .attach = cpu_cgroup_attach, -- .legacy_cftypes = cpu_files, -+ .legacy_cftypes = cpu_legacy_files, - .early_init = 1, - }; - -diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c -index dd7cbb5..42b2dd5 100644 ---- a/kernel/sched/cpuacct.c -+++ b/kernel/sched/cpuacct.c -@@ -177,36 +177,33 @@ static int cpuacct_percpu_seq_show(struct seq_file *m, void *V) - return 0; - } - --static const char * const cpuacct_stat_desc[] = { -- [CPUACCT_STAT_USER] = "user", -- [CPUACCT_STAT_SYSTEM] = "system", --}; -- --static int cpuacct_stats_show(struct seq_file *sf, void *v) -+static void cpuacct_stats_read(struct cpuacct *ca, u64 *userp, u64 *sysp) - { -- struct cpuacct *ca = css_ca(seq_css(sf)); - int cpu; -- s64 val = 0; - -+ *userp = 0; - for_each_online_cpu(cpu) { - struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu); -- val += kcpustat->cpustat[CPUTIME_USER]; -- val += kcpustat->cpustat[CPUTIME_NICE]; -+ *userp += kcpustat->cpustat[CPUTIME_USER]; -+ *userp += kcpustat->cpustat[CPUTIME_NICE]; - } -- val = cputime64_to_clock_t(val); -- seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_USER], val); - -- val = 0; -+ *sysp = 0; - for_each_online_cpu(cpu) { - struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu); -- val += kcpustat->cpustat[CPUTIME_SYSTEM]; -- val += kcpustat->cpustat[CPUTIME_IRQ]; -- val += kcpustat->cpustat[CPUTIME_SOFTIRQ]; -+ *sysp += kcpustat->cpustat[CPUTIME_SYSTEM]; -+ *sysp += kcpustat->cpustat[CPUTIME_IRQ]; -+ *sysp += kcpustat->cpustat[CPUTIME_SOFTIRQ]; - } -+} - -- val = cputime64_to_clock_t(val); -- seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_SYSTEM], val); -+static int cpuacct_stats_show(struct seq_file *sf, void *v) -+{ -+ cputime64_t user, sys; - -+ cpuacct_stats_read(css_ca(seq_css(sf)), &user, &sys); -+ seq_printf(sf, "user %lld\n", cputime64_to_clock_t(user)); -+ seq_printf(sf, "system %lld\n", cputime64_to_clock_t(sys)); - return 0; - } - - -commit 1bb33e8a69f089f2d3f58a0e681d4ff352e11c97 -Author: Tejun Heo <tj@kernel.org> -Date: Fri Mar 11 07:31:23 2016 -0500 - - sched: Implement interface for cgroup unified hierarchy - - While the cpu controller doesn't have any functional problems, there - are a couple interface issues which can be addressed in the v2 - interface. - - * cpuacct being a separate controller. This separation is artificial - and rather pointless as demonstrated by most use cases co-mounting - the two controllers. It also forces certain information to be - accounted twice. - - * Use of different time units. Writable control knobs use - microseconds, some stat fields use nanoseconds while other cpuacct - stat fields use centiseconds. - - * Control knobs which can't be used in the root cgroup still show up - in the root. - - * Control knob names and semantics aren't consistent with other - controllers. - - This patchset implements cpu controller's interface on the unified - hierarchy which adheres to the controller file conventions described - in Documentation/cgroups/unified-hierarchy.txt. Overall, the - following changes are made. - - * cpuacct is implictly enabled and disabled by cpu and its information - is reported through "cpu.stat" which now uses microseconds for all - time durations. All time duration fields now have "_usec" appended - to them for clarity. While this doesn't solve the double accounting - immediately, once majority of users switch to v2, cpu can directly - account and report the relevant stats and cpuacct can be disabled on - the unified hierarchy. - - Note that cpuacct.usage_percpu is currently not included in - "cpu.stat". If this information is actually called for, it can be - added later. - - * "cpu.shares" is replaced with "cpu.weight" and operates on the - standard scale defined by CGROUP_WEIGHT_MIN/DFL/MAX (1, 100, 10000). - The weight is scaled to scheduler weight so that 100 maps to 1024 - and the ratio relationship is preserved - if weight is W and its - scaled value is S, W / 100 == S / 1024. While the mapped range is a - bit smaller than the orignal scheduler weight range, the dead zones - on both sides are relatively small and covers wider range than the - nice value mappings. This file doesn't make sense in the root - cgroup and isn't create on root. - - * "cpu.cfs_quota_us" and "cpu.cfs_period_us" are replaced by "cpu.max" - which contains both quota and period. - - * "cpu.rt_runtime_us" and "cpu.rt_period_us" are replaced by - "cpu.rt.max" which contains both runtime and period. - - v2: cpu_stats_show() was incorrectly using CONFIG_FAIR_GROUP_SCHED for - CFS bandwidth stats and also using raw division for u64. Use - CONFIG_CFS_BANDWITH and do_div() instead. - - The semantics of "cpu.rt.max" is not fully decided yet. Dropped - for now. - - Signed-off-by: Tejun Heo <tj@kernel.org> - Cc: Ingo Molnar <mingo@redhat.com> - Cc: Peter Zijlstra <peterz@infradead.org> - Cc: Li Zefan <lizefan@huawei.com> - Cc: Johannes Weiner <hannes@cmpxchg.org> - -diff --git a/kernel/sched/core.c b/kernel/sched/core.c -index 77f3ddd..7aafe63 100644 ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -8591,6 +8591,139 @@ static struct cftype cpu_legacy_files[] = { - { } /* terminate */ - }; - -+static int cpu_stats_show(struct seq_file *sf, void *v) -+{ -+ cpuacct_cpu_stats_show(sf); -+ -+#ifdef CONFIG_CFS_BANDWIDTH -+ { -+ struct task_group *tg = css_tg(seq_css(sf)); -+ struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth; -+ u64 throttled_usec; -+ -+ throttled_usec = cfs_b->throttled_time; -+ do_div(throttled_usec, NSEC_PER_USEC); -+ -+ seq_printf(sf, "nr_periods %d\n" -+ "nr_throttled %d\n" -+ "throttled_usec %llu\n", -+ cfs_b->nr_periods, cfs_b->nr_throttled, -+ throttled_usec); -+ } -+#endif -+ return 0; -+} -+ -+#ifdef CONFIG_FAIR_GROUP_SCHED -+static u64 cpu_weight_read_u64(struct cgroup_subsys_state *css, -+ struct cftype *cft) -+{ -+ struct task_group *tg = css_tg(css); -+ u64 weight = scale_load_down(tg->shares); -+ -+ return DIV_ROUND_CLOSEST_ULL(weight * CGROUP_WEIGHT_DFL, 1024); -+} -+ -+static int cpu_weight_write_u64(struct cgroup_subsys_state *css, -+ struct cftype *cftype, u64 weight) -+{ -+ /* -+ * cgroup weight knobs should use the common MIN, DFL and MAX -+ * values which are 1, 100 and 10000 respectively. While it loses -+ * a bit of range on both ends, it maps pretty well onto the shares -+ * value used by scheduler and the round-trip conversions preserve -+ * the original value over the entire range. -+ */ -+ if (weight < CGROUP_WEIGHT_MIN || weight > CGROUP_WEIGHT_MAX) -+ return -ERANGE; -+ -+ weight = DIV_ROUND_CLOSEST_ULL(weight * 1024, CGROUP_WEIGHT_DFL); -+ -+ return sched_group_set_shares(css_tg(css), scale_load(weight)); -+} -+#endif -+ -+static void __maybe_unused cpu_period_quota_print(struct seq_file *sf, -+ long period, long quota) -+{ -+ if (quota < 0) -+ seq_puts(sf, "max"); -+ else -+ seq_printf(sf, "%ld", quota); -+ -+ seq_printf(sf, " %ld\n", period); -+} -+ -+/* caller should put the current value in *@periodp before calling */ -+static int __maybe_unused cpu_period_quota_parse(char *buf, -+ u64 *periodp, u64 *quotap) -+{ -+ char tok[21]; /* U64_MAX */ -+ -+ if (!sscanf(buf, "%s %llu", tok, periodp)) -+ return -EINVAL; -+ -+ *periodp *= NSEC_PER_USEC; -+ -+ if (sscanf(tok, "%llu", quotap)) -+ *quotap *= NSEC_PER_USEC; -+ else if (!strcmp(tok, "max")) -+ *quotap = RUNTIME_INF; -+ else -+ return -EINVAL; -+ -+ return 0; -+} -+ -+#ifdef CONFIG_CFS_BANDWIDTH -+static int cpu_max_show(struct seq_file *sf, void *v) -+{ -+ struct task_group *tg = css_tg(seq_css(sf)); -+ -+ cpu_period_quota_print(sf, tg_get_cfs_period(tg), tg_get_cfs_quota(tg)); -+ return 0; -+} -+ -+static ssize_t cpu_max_write(struct kernfs_open_file *of, -+ char *buf, size_t nbytes, loff_t off) -+{ -+ struct task_group *tg = css_tg(of_css(of)); -+ u64 period = tg_get_cfs_period(tg); -+ u64 quota; -+ int ret; -+ -+ ret = cpu_period_quota_parse(buf, &period, "a); -+ if (!ret) -+ ret = tg_set_cfs_bandwidth(tg, period, quota); -+ return ret ?: nbytes; -+} -+#endif -+ -+static struct cftype cpu_files[] = { -+ { -+ .name = "stat", -+ .flags = CFTYPE_NOT_ON_ROOT, -+ .seq_show = cpu_stats_show, -+ }, -+#ifdef CONFIG_FAIR_GROUP_SCHED -+ { -+ .name = "weight", -+ .flags = CFTYPE_NOT_ON_ROOT, -+ .read_u64 = cpu_weight_read_u64, -+ .write_u64 = cpu_weight_write_u64, -+ }, -+#endif -+#ifdef CONFIG_CFS_BANDWIDTH -+ { -+ .name = "max", -+ .flags = CFTYPE_NOT_ON_ROOT, -+ .seq_show = cpu_max_show, -+ .write = cpu_max_write, -+ }, -+#endif -+ { } /* terminate */ -+}; -+ - struct cgroup_subsys cpu_cgrp_subsys = { - .css_alloc = cpu_cgroup_css_alloc, - .css_free = cpu_cgroup_css_free, -@@ -8600,7 +8733,15 @@ struct cgroup_subsys cpu_cgrp_subsys = { - .can_attach = cpu_cgroup_can_attach, - .attach = cpu_cgroup_attach, - .legacy_cftypes = cpu_legacy_files, -+ .dfl_cftypes = cpu_files, - .early_init = 1, -+#ifdef CONFIG_CGROUP_CPUACCT -+ /* -+ * cpuacct is enabled together with cpu on the unified hierarchy -+ * and its stats are reported through "cpu.stat". -+ */ -+ .depends_on = 1 << cpuacct_cgrp_id, -+#endif - }; - - #endif /* CONFIG_CGROUP_SCHED */ -diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c -index 42b2dd5..b4d32a6 100644 ---- a/kernel/sched/cpuacct.c -+++ b/kernel/sched/cpuacct.c -@@ -224,6 +224,30 @@ static struct cftype files[] = { - { } /* terminate */ - }; - -+/* used to print cpuacct stats in cpu.stat on the unified hierarchy */ -+void cpuacct_cpu_stats_show(struct seq_file *sf) -+{ -+ struct cgroup_subsys_state *css; -+ u64 usage, user, sys; -+ -+ css = cgroup_get_e_css(seq_css(sf)->cgroup, &cpuacct_cgrp_subsys); -+ -+ usage = cpuusage_read(css, seq_cft(sf)); -+ cpuacct_stats_read(css_ca(css), &user, &sys); -+ -+ user *= TICK_NSEC; -+ sys *= TICK_NSEC; -+ do_div(usage, NSEC_PER_USEC); -+ do_div(user, NSEC_PER_USEC); -+ do_div(sys, NSEC_PER_USEC); -+ -+ seq_printf(sf, "usage_usec %llu\n" -+ "user_usec %llu\n" -+ "system_usec %llu\n", usage, user, sys); -+ -+ css_put(css); -+} -+ - /* - * charge this task's execution time to its accounting group. - * -diff --git a/kernel/sched/cpuacct.h b/kernel/sched/cpuacct.h -index ed60562..44eace9 100644 ---- a/kernel/sched/cpuacct.h -+++ b/kernel/sched/cpuacct.h -@@ -2,6 +2,7 @@ - - extern void cpuacct_charge(struct task_struct *tsk, u64 cputime); - extern void cpuacct_account_field(struct task_struct *p, int index, u64 val); -+extern void cpuacct_cpu_stats_show(struct seq_file *sf); - - #else - -@@ -14,4 +15,8 @@ cpuacct_account_field(struct task_struct *p, int index, u64 val) - { - } - -+static inline void cpuacct_cpu_stats_show(struct seq_file *sf) -+{ -+} -+ - #endif diff --git a/pkgs/os-specific/linux/kernel/crc-regression.patch b/pkgs/os-specific/linux/kernel/crc-regression.patch deleted file mode 100644 index 623713d16a6..00000000000 --- a/pkgs/os-specific/linux/kernel/crc-regression.patch +++ /dev/null @@ -1,24 +0,0 @@ -See https://github.com/NixOS/nixpkgs/issues/6231 - -v3.14.31:crypto/crc32c.c is missing the MODULE_ALIAS_CRYPTO("crc32c"). -That's probably because crypto/crc32c.c was renamed to -crypto/crc32c_generic.c in commit -06e5a1f29819759392239669beb2cad27059c8ec and therefore fell through -the cracks when backporting commit -5d26a105b5a73e5635eae0629b42fa0a90e07b7b. - -So the affected kernels (all that backported the "crypto-" prefix -patches) need this additional patch: - -diff --git a/crypto/crc32c.c b/crypto/crc32c.c -index 06f7018c9d95..aae5829eb681 100644 ---- a/crypto/crc32c.c -+++ b/crypto/crc32c.c -@@ -167,6 +167,7 @@ static void __exit crc32c_mod_fini(void) - module_init(crc32c_mod_init); - module_exit(crc32c_mod_fini); - -+MODULE_ALIAS_CRYPTO("crc32c"); - MODULE_AUTHOR("Clay Haapala <chaapala@cisco.com>"); - MODULE_DESCRIPTION("CRC32c (Castagnoli) calculations wrapper for lib/crc32c"); - MODULE_LICENSE("GPL"); diff --git a/pkgs/os-specific/linux/kernel/multithreaded-rsapubkey-asn1.patch b/pkgs/os-specific/linux/kernel/multithreaded-rsapubkey-asn1.patch deleted file mode 100644 index 9f5790862b6..00000000000 --- a/pkgs/os-specific/linux/kernel/multithreaded-rsapubkey-asn1.patch +++ /dev/null @@ -1,45 +0,0 @@ - -From Yang Shi <> -Subject [PATCH] crypto: rsa - fix a potential race condition in build -Date Fri, 2 Dec 2016 15:41:04 -0800 - - -When building kernel with RSA enabled with multithreaded, the below -compile failure might be caught: - -| /buildarea/kernel-source/crypto/rsa_helper.c:18:28: fatal error: rsapubkey-asn1.h: No such file or directory -| #include "rsapubkey-asn1.h" -| ^ -| compilation terminated. -| CC crypto/rsa-pkcs1pad.o -| CC crypto/algboss.o -| CC crypto/testmgr.o -| make[3]: *** [/buildarea/kernel-source/scripts/Makefile.build:289: crypto/rsa_helper.o] Error 1 -| make[3]: *** Waiting for unfinished jobs.... -| make[2]: *** [/buildarea/kernel-source/Makefile:969: crypto] Error 2 -| make[1]: *** [Makefile:150: sub-make] Error 2 -| make: *** [Makefile:24: __sub-make] Error 2 - -The header file is not generated before rsa_helper is compiled, so -adding dependency to avoid such issue. - -Signed-off-by: Yang Shi <yang.shi@windriver.com> - ---- - crypto/Makefile | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/crypto/Makefile b/crypto/Makefile -index 99cc64a..8db39f9 100644 ---- a/crypto/Makefile -+++ b/crypto/Makefile -@@ -40,6 +40,7 @@ obj-$(CONFIG_CRYPTO_ECDH) += ecdh_generic.o - - $(obj)/rsapubkey-asn1.o: $(obj)/rsapubkey-asn1.c $(obj)/rsapubkey-asn1.h - $(obj)/rsaprivkey-asn1.o: $(obj)/rsaprivkey-asn1.c $(obj)/rsaprivkey-asn1.h -+$(obj)/rsa_helper.o: $(obj)/rsa_helper.c $(obj)/rsaprivkey-asn1.h - clean-files += rsapubkey-asn1.c rsapubkey-asn1.h - clean-files += rsaprivkey-asn1.c rsaprivkey-asn1.h - --- -2.0.2 diff --git a/pkgs/os-specific/linux/kernel/no-xsave.patch b/pkgs/os-specific/linux/kernel/no-xsave.patch deleted file mode 100644 index dde96d7dccd..00000000000 --- a/pkgs/os-specific/linux/kernel/no-xsave.patch +++ /dev/null @@ -1,85 +0,0 @@ ---- a/arch/x86/xen/enlighten.c -+++ b/arch/x86/xen/enlighten.c -@@ -168,21 +168,23 @@ static void __init xen_banner(void) - xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); - } - -+static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0; -+static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0; -+ - static void xen_cpuid(unsigned int *ax, unsigned int *bx, - unsigned int *cx, unsigned int *dx) - { -+ unsigned maskecx = ~0; - unsigned maskedx = ~0; - - /* - * Mask out inconvenient features, to try and disable as many - * unsupported kernel subsystems as possible. - */ -- if (*ax == 1) -- maskedx = ~((1 << X86_FEATURE_APIC) | /* disable APIC */ -- (1 << X86_FEATURE_ACPI) | /* disable ACPI */ -- (1 << X86_FEATURE_MCE) | /* disable MCE */ -- (1 << X86_FEATURE_MCA) | /* disable MCA */ -- (1 << X86_FEATURE_ACC)); /* thermal monitoring */ -+ if (*ax == 1) { -+ maskecx = cpuid_leaf1_ecx_mask; -+ maskedx = cpuid_leaf1_edx_mask; -+ } - - asm(XEN_EMULATE_PREFIX "cpuid" - : "=a" (*ax), -@@ -190,9 +192,43 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, - "=c" (*cx), - "=d" (*dx) - : "0" (*ax), "2" (*cx)); -+ -+ *cx &= maskecx; - *dx &= maskedx; - } - -+static __init void xen_init_cpuid_mask(void) -+{ -+ unsigned int ax, bx, cx, dx; -+ -+ cpuid_leaf1_edx_mask = -+ ~((1 << X86_FEATURE_MCE) | /* disable MCE */ -+ (1 << X86_FEATURE_MCA) | /* disable MCA */ -+ (1 << X86_FEATURE_ACC)); /* thermal monitoring */ -+ -+ if (!xen_initial_domain()) -+ cpuid_leaf1_edx_mask &= -+ ~((1 << X86_FEATURE_APIC) | /* disable local APIC */ -+ (1 << X86_FEATURE_ACPI)); /* disable ACPI */ -+ -+ ax = 1; -+ xen_cpuid(&ax, &bx, &cx, &dx); -+ -+ /* cpuid claims we support xsave; try enabling it to see what happens */ -+ if (cx & (1 << (X86_FEATURE_XSAVE % 32))) { -+ unsigned long cr4; -+ -+ set_in_cr4(X86_CR4_OSXSAVE); -+ -+ cr4 = read_cr4(); -+ -+ if ((cr4 & X86_CR4_OSXSAVE) == 0) -+ cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_XSAVE % 32)); -+ -+ clear_in_cr4(X86_CR4_OSXSAVE); -+ } -+} -+ - static void xen_set_debugreg(int reg, unsigned long val) - { - HYPERVISOR_set_debugreg(reg, val); -@@ -903,6 +939,8 @@ asmlinkage void __init xen_start_kernel(void) - - xen_init_irq_ops(); - -+ xen_init_cpuid_mask(); -+ - #ifdef CONFIG_X86_LOCAL_APIC - /* - * set up the basic apic ops. diff --git a/pkgs/os-specific/linux/kernel/patches.nix b/pkgs/os-specific/linux/kernel/patches.nix index 4c78928a99a..e5f430b4d82 100644 --- a/pkgs/os-specific/linux/kernel/patches.nix +++ b/pkgs/os-specific/linux/kernel/patches.nix @@ -21,12 +21,6 @@ in rec { - multithreaded_rsapubkey = - { - name = "multithreaded-rsapubkey-asn1.patch"; - patch = ./multithreaded-rsapubkey-asn1.patch; - }; - bridge_stp_helper = { name = "bridge-stp-helper"; patch = ./bridge-stp-helper.patch; @@ -37,12 +31,6 @@ rec { patch = ./p9-fixes.patch; }; - no_xsave = - { name = "no-xsave"; - patch = ./no-xsave.patch; - features.noXsave = true; - }; - mips_fpureg_emu = { name = "mips-fpureg-emulation"; patch = ./mips-fpureg-emulation.patch; @@ -63,22 +51,6 @@ rec { patch = ./modinst-arg-list-too-long.patch; }; - ubuntu_fan_4_4 = - { name = "ubuntu-fan"; - patch = ./ubuntu-fan-4.4.patch; - }; - - ubuntu_unprivileged_overlayfs = - { name = "ubuntu-unprivileged-overlayfs"; - patch = ./ubuntu-unprivileged-overlayfs.patch; - }; - - tuxonice_3_10 = makeTuxonicePatch { - version = "2013-11-07"; - kernelVersion = "3.10.18"; - sha256 = "00b1rqgd4yr206dxp4mcymr56ymbjcjfa4m82pxw73khj032qw3j"; - }; - grsecurity_testing = throw '' Upstream has ceased free support for grsecurity/PaX. @@ -87,11 +59,6 @@ rec { for more information. ''; - crc_regression = - { name = "crc-backport-regression"; - patch = ./crc-regression.patch; - }; - genksyms_fix_segfault = { name = "genksyms-fix-segfault"; patch = ./genksyms-fix-segfault.patch; @@ -107,47 +74,8 @@ rec { patch = ./chromiumos-patches/no-link-restrictions.patch; }; - chromiumos_mfd_fix_dependency = - { name = "mfd_fix_dependency"; - patch = ./chromiumos-patches/mfd-fix-dependency.patch; - }; - - hiddev_CVE_2016_5829 = - { name = "hiddev_CVE_2016_5829"; - patch = fetchpatch { - url = "https://sources.debian.net/data/main/l/linux/4.6.3-1/debian/patches/bugfix/all/HID-hiddev-validate-num_values-for-HIDIOCGUSAGES-HID.patch"; - sha256 = "14rm1qr87p7a5prz8g5fwbpxzdp3ighj095x8rvhm8csm20wspyy"; - }; - }; - cpu-cgroup-v2 = import ./cpu-cgroup-v2-patches; - lguest_entry-linkage = - { name = "lguest-asmlinkage.patch"; - patch = fetchpatch { - url = "https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git" - + "/patch/drivers/lguest/x86/core.c?id=cdd77e87eae52"; - sha256 = "04xlx6al10cw039av6jkby7gx64zayj8m1k9iza40sw0fydcfqhc"; - }; - }; - - packet_fix_race_condition_CVE_2016_8655 = - { name = "packet_fix_race_condition_CVE_2016_8655.patch"; - patch = fetchpatch { - url = "https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/patch/?id=84ac7260236a49c79eede91617700174c2c19b0c"; - sha256 = "19viqjjgq8j8jiz5yhgmzwhqvhwv175q645qdazd1k69d25nv2ki"; - }; - }; - - panic_on_icmp6_frag_CVE_2016_9919 = rec - { name = "panic_on_icmp6_frag_CVE_2016_9919.patch"; - patch = fetchpatch { - inherit name; - url = "https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/patch/?id=79dc7e3f1cd323be4c81aa1a94faa1b3ed987fb2"; - sha256 = "0mps33r4mnwiy0bmgrzgqkrk59yya17v6kzpv9024g4xlz61rk8p"; - }; - }; - DCCP_double_free_vulnerability_CVE-2017-6074 = rec { name = "DCCP_double_free_vulnerability_CVE-2017-6074.patch"; patch = fetchpatch { diff --git a/pkgs/os-specific/linux/kernel/perf.diff b/pkgs/os-specific/linux/kernel/perf.diff deleted file mode 100644 index 88d0381784f..00000000000 --- a/pkgs/os-specific/linux/kernel/perf.diff +++ /dev/null @@ -1,18 +0,0 @@ ---- perf/config/utilities.mak.orig 2014-01-25 14:55:32.573320370 +0000 -+++ perf/config/utilities.mak 2014-01-25 15:13:34.174337760 +0000 -@@ -186,9 +186,14 @@ - endif - TRY_CC_MSG=echo " CHK $(3)" 1>&2; - -+define newline -+ -+ -+endef -+ - try-cc = $(shell sh -c \ - 'TMP="$(OUTPUT)$(TMPOUT).$$$$"; \ - $(TRY_CC_MSG) \ -- echo "$(1)" | \ -+ echo -e "$(subst $(newline),\\n,$(1))" | tee _test.c | \ - $(CC) -x c - $(2) -o "$$TMP" $(TRY_CC_OUTPUT) && echo y; \ - rm -f "$$TMP"') diff --git a/pkgs/os-specific/linux/kernel/perf.nix b/pkgs/os-specific/linux/kernel/perf.nix index b6f1f7d9f8a..f01f3928950 100644 --- a/pkgs/os-specific/linux/kernel/perf.nix +++ b/pkgs/os-specific/linux/kernel/perf.nix @@ -16,7 +16,6 @@ stdenv.mkDerivation { preConfigure = '' cd tools/perf sed -i s,/usr/include/elfutils,$elfutils/include/elfutils, Makefile - ${optionalString (versionOlder kernel.version "3.13") "patch -p1 < ${./perf.diff}"} [ -f bash_completion ] && sed -i 's,^have perf,_have perf,' bash_completion export makeFlags="DESTDIR=$out $makeFlags" ''; diff --git a/pkgs/os-specific/linux/kernel/ubuntu-fan-4.4.patch b/pkgs/os-specific/linux/kernel/ubuntu-fan-4.4.patch deleted file mode 100644 index 39150ad790d..00000000000 --- a/pkgs/os-specific/linux/kernel/ubuntu-fan-4.4.patch +++ /dev/null @@ -1,1240 +0,0 @@ -From e64058be3b97c5bd3e034fc4ece21e306ef6f90b Mon Sep 17 00:00:00 2001 -From: Jay Vosburgh <jay.vosburgh@canonical.com> -Date: Wed, 1 Apr 2015 16:11:09 -0700 -Subject: [PATCH] UBUNTU: SAUCE: fan: tunnel multiple mapping mode (v3) - -Switch to a single tunnel for all mappings, this removes the limitations -on how many mappings each tunnel can handle, and therefore how many Fan -slices each local address may hold. - -NOTE: This introduces a new kernel netlink interface which needs updated -iproute2 support. - -BugLink: http://bugs.launchpad.net/bugs/1470091 -Signed-off-by: Jay Vosburgh <jay.vosburgh@canonical.com> -Signed-off-by: Andy Whitcroft <apw@canonical.com> -Signed-off-by: Tim Gardner <tim.gardner@canonical.com> - -Conflicts: - include/net/ip_tunnels.h ---- - include/net/ip_tunnels.h | 15 ++++ - include/uapi/linux/if_tunnel.h | 20 +++++ - net/ipv4/ip_tunnel.c | 7 +- - net/ipv4/ipip.c | 186 +++++++++++++++++++++++++++++++++++++++-- - 4 files changed, 222 insertions(+), 6 deletions(-) - -diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h -index 62a750a..47fec59 100644 ---- a/include/net/ip_tunnels.h -+++ b/include/net/ip_tunnels.h -@@ -91,6 +91,19 @@ struct ip_tunnel_dst { - }; - - struct metadata_dst; -+/* A fan overlay /8 (250.0.0.0/8, for example) maps to exactly one /16 -+ * underlay (10.88.0.0/16, for example). Multiple local addresses within -+ * the /16 may be used, but a particular overlay may not span -+ * multiple underlay subnets. -+ * -+ * We store one underlay, indexed by the overlay's high order octet. -+ */ -+#define FAN_OVERLAY_CNT 256 -+ -+struct ip_tunnel_fan { -+/* u32 __rcu *map;*/ -+ u32 map[FAN_OVERLAY_CNT]; -+}; - - struct ip_tunnel { - struct ip_tunnel __rcu *next; -@@ -123,6 +136,7 @@ struct ip_tunnel { - #endif - struct ip_tunnel_prl_entry __rcu *prl; /* potential router list */ - unsigned int prl_count; /* # of entries in PRL */ -+ struct ip_tunnel_fan fan; - int ip_tnl_net_id; - struct gro_cells gro_cells; - bool collect_md; -@@ -143,6 +157,7 @@ struct ip_tunnel { - #define TUNNEL_VXLAN_OPT __cpu_to_be16(0x1000) - - #define TUNNEL_OPTIONS_PRESENT (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT) -+#define TUNNEL_FAN __cpu_to_be16(0x4000) - - struct tnl_ptk_info { - __be16 flags; -diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h -index af4de90..85a3e4b 100644 ---- a/include/uapi/linux/if_tunnel.h -+++ b/include/uapi/linux/if_tunnel.h -@@ -57,6 +57,10 @@ enum { - IFLA_IPTUN_ENCAP_FLAGS, - IFLA_IPTUN_ENCAP_SPORT, - IFLA_IPTUN_ENCAP_DPORT, -+ -+ __IFLA_IPTUN_VENDOR_BREAK, /* Ensure new entries do not hit the below. */ -+ IFLA_IPTUN_FAN_MAP = 33, -+ - __IFLA_IPTUN_MAX, - }; - #define IFLA_IPTUN_MAX (__IFLA_IPTUN_MAX - 1) -@@ -132,4 +136,20 @@ enum { - }; - - #define IFLA_VTI_MAX (__IFLA_VTI_MAX - 1) -+ -+enum { -+ IFLA_FAN_UNSPEC, -+ IFLA_FAN_MAPPING, -+ __IFLA_FAN_MAX, -+}; -+ -+#define IFLA_FAN_MAX (__IFLA_FAN_MAX - 1) -+ -+struct ip_tunnel_fan_map { -+ __be32 underlay; -+ __be32 overlay; -+ __u16 underlay_prefix; -+ __u16 overlay_prefix; -+}; -+ - #endif /* _UAPI_IF_TUNNEL_H_ */ -diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c -index cbb51f3..7a6174b 100644 ---- a/net/ipv4/ip_tunnel.c -+++ b/net/ipv4/ip_tunnel.c -@@ -1110,6 +1110,11 @@ out: - } - EXPORT_SYMBOL_GPL(ip_tunnel_newlink); - -+static int ip_tunnel_is_fan(struct ip_tunnel *tunnel) -+{ -+ return tunnel->parms.i_flags & TUNNEL_FAN; -+} -+ - int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], - struct ip_tunnel_parm *p) - { -@@ -1119,7 +1124,7 @@ int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], - struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id); - - if (dev == itn->fb_tunnel_dev) -- return -EINVAL; -+ return ip_tunnel_is_fan(tunnel) ? 0 : -EINVAL; - - t = ip_tunnel_find(itn, p, dev->type); - -diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c -index a09fb0d..56e8984 100644 ---- a/net/ipv4/ipip.c -+++ b/net/ipv4/ipip.c -@@ -107,6 +107,7 @@ - #include <linux/init.h> - #include <linux/netfilter_ipv4.h> - #include <linux/if_ether.h> -+#include <linux/inetdevice.h> - - #include <net/sock.h> - #include <net/ip.h> -@@ -208,6 +209,40 @@ drop: - return 0; - } - -+static int ipip_tunnel_is_fan(struct ip_tunnel *tunnel) -+{ -+ return tunnel->parms.i_flags & TUNNEL_FAN; -+} -+ -+/* -+ * Determine fan tunnel endpoint to send packet to, based on the inner IP -+ * address. For an overlay (inner) address Y.A.B.C, the transformation is -+ * F.G.A.B, where "F" and "G" are the first two octets of the underlay -+ * network (the network portion of a /16), "A" and "B" are the low order -+ * two octets of the underlay network host (the host portion of a /16), -+ * and "Y" is a configured first octet of the overlay network. -+ * -+ * E.g., underlay host 10.88.3.4 with an overlay of 99 would host overlay -+ * subnet 99.3.4.0/24. An overlay network datagram from 99.3.4.5 to -+ * 99.6.7.8, would be directed to underlay host 10.88.6.7, which hosts -+ * overlay network 99.6.7.0/24. -+ */ -+static int ipip_build_fan_iphdr(struct ip_tunnel *tunnel, struct sk_buff *skb, struct iphdr *iph) -+{ -+ unsigned int overlay; -+ u32 daddr, underlay; -+ -+ daddr = ntohl(ip_hdr(skb)->daddr); -+ overlay = daddr >> 24; -+ underlay = tunnel->fan.map[overlay]; -+ if (!underlay) -+ return -EINVAL; -+ -+ *iph = tunnel->parms.iph; -+ iph->daddr = htonl(underlay | ((daddr >> 8) & 0x0000ffff)); -+ return 0; -+} -+ - /* - * This function assumes it is being called from dev_queue_xmit() - * and that skb is filled properly by that function. -@@ -215,7 +250,8 @@ drop: - static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) - { - struct ip_tunnel *tunnel = netdev_priv(dev); -- const struct iphdr *tiph = &tunnel->parms.iph; -+ const struct iphdr *tiph; -+ struct iphdr fiph; - - if (unlikely(skb->protocol != htons(ETH_P_IP))) - goto tx_error; -@@ -224,6 +260,14 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) - if (IS_ERR(skb)) - goto out; - -+ if (ipip_tunnel_is_fan(tunnel)) { -+ if (ipip_build_fan_iphdr(tunnel, skb, &fiph)) -+ goto tx_error; -+ tiph = &fiph; -+ } else { -+ tiph = &tunnel->parms.iph; -+ } -+ - skb_set_inner_ipproto(skb, IPPROTO_IPIP); - - ip_tunnel_xmit(skb, dev, tiph, tiph->protocol); -@@ -375,21 +419,88 @@ static bool ipip_netlink_encap_parms(struct nlattr *data[], - return ret; - } - -+static void ipip_fan_free_map(struct ip_tunnel *t) -+{ -+ memset(&t->fan.map, 0, sizeof(t->fan.map)); -+} -+ -+static int ipip_fan_set_map(struct ip_tunnel *t, struct ip_tunnel_fan_map *map) -+{ -+ u32 overlay, overlay_mask, underlay, underlay_mask; -+ -+ if ((map->underlay_prefix && map->underlay_prefix != 16) || -+ (map->overlay_prefix && map->overlay_prefix != 8)) -+ return -EINVAL; -+ -+ overlay = ntohl(map->overlay); -+ overlay_mask = ntohl(inet_make_mask(map->overlay_prefix)); -+ -+ underlay = ntohl(map->underlay); -+ underlay_mask = ntohl(inet_make_mask(map->underlay_prefix)); -+ -+ if ((overlay & ~overlay_mask) || (underlay & ~underlay_mask)) -+ return -EINVAL; -+ -+ if (!(overlay & overlay_mask) && (underlay & underlay_mask)) -+ return -EINVAL; -+ -+ t->parms.i_flags |= TUNNEL_FAN; -+ -+ /* Special case: overlay 0 and underlay 0 clears all mappings */ -+ if (!overlay && !underlay) { -+ ipip_fan_free_map(t); -+ return 0; -+ } -+ -+ overlay >>= (32 - map->overlay_prefix); -+ t->fan.map[overlay] = underlay; -+ -+ return 0; -+} -+ -+ -+static int ipip_netlink_fan(struct nlattr *data[], struct ip_tunnel *t, -+ struct ip_tunnel_parm *parms) -+{ -+ struct ip_tunnel_fan_map *map; -+ struct nlattr *attr; -+ int rem, rv; -+ -+ if (!data[IFLA_IPTUN_FAN_MAP]) -+ return 0; -+ -+ if (parms->iph.daddr) -+ return -EINVAL; -+ -+ nla_for_each_nested(attr, data[IFLA_IPTUN_FAN_MAP], rem) { -+ map = nla_data(attr); -+ rv = ipip_fan_set_map(t, map); -+ if (rv) -+ return rv; -+ } -+ -+ return 0; -+} -+ - static int ipip_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[]) - { - struct ip_tunnel_parm p; - struct ip_tunnel_encap ipencap; -+ struct ip_tunnel *t = netdev_priv(dev); -+ int err; - - if (ipip_netlink_encap_parms(data, &ipencap)) { -- struct ip_tunnel *t = netdev_priv(dev); -- int err = ip_tunnel_encap_setup(t, &ipencap); -+ err = ip_tunnel_encap_setup(t, &ipencap); - - if (err < 0) - return err; - } - - ipip_netlink_parms(data, &p); -+ err = ipip_netlink_fan(data, t, &p); -+ if (err < 0) -+ return err; - return ip_tunnel_newlink(dev, tb, &p); - } - -@@ -398,16 +509,20 @@ static int ipip_changelink(struct net_device *dev, struct nlattr *tb[], - { - struct ip_tunnel_parm p; - struct ip_tunnel_encap ipencap; -+ struct ip_tunnel *t = netdev_priv(dev); -+ int err; - - if (ipip_netlink_encap_parms(data, &ipencap)) { -- struct ip_tunnel *t = netdev_priv(dev); -- int err = ip_tunnel_encap_setup(t, &ipencap); -+ err = ip_tunnel_encap_setup(t, &ipencap); - - if (err < 0) - return err; - } - - ipip_netlink_parms(data, &p); -+ err = ipip_netlink_fan(data, t, &p); -+ if (err < 0) -+ return err; - - if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) || - (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr)) -@@ -439,6 +554,8 @@ static size_t ipip_get_size(const struct net_device *dev) - nla_total_size(2) + - /* IFLA_IPTUN_ENCAP_DPORT */ - nla_total_size(2) + -+ /* IFLA_IPTUN_FAN_MAP */ -+ nla_total_size(sizeof(struct ip_tunnel_fan_map)) * 256 + - 0; - } - -@@ -466,6 +583,29 @@ static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev) - tunnel->encap.flags)) - goto nla_put_failure; - -+ if (tunnel->parms.i_flags & TUNNEL_FAN) { -+ struct nlattr *fan_nest; -+ int i; -+ -+ fan_nest = nla_nest_start(skb, IFLA_IPTUN_FAN_MAP); -+ if (!fan_nest) -+ goto nla_put_failure; -+ for (i = 0; i < 256; i++) { -+ if (tunnel->fan.map[i]) { -+ struct ip_tunnel_fan_map map; -+ -+ map.underlay = htonl(tunnel->fan.map[i]); -+ map.underlay_prefix = 16; -+ map.overlay = htonl(i << 24); -+ map.overlay_prefix = 8; -+ if (nla_put(skb, IFLA_FAN_MAPPING, -+ sizeof(map), &map)) -+ goto nla_put_failure; -+ } -+ } -+ nla_nest_end(skb, fan_nest); -+ } -+ - return 0; - - nla_put_failure: -@@ -483,6 +623,9 @@ static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = { - [IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 }, - [IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 }, - [IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 }, -+ -+ [__IFLA_IPTUN_VENDOR_BREAK ... IFLA_IPTUN_MAX] = { .type = NLA_BINARY }, -+ [IFLA_IPTUN_FAN_MAP] = { .type = NLA_NESTED }, - }; - - static struct rtnl_link_ops ipip_link_ops __read_mostly = { -@@ -523,6 +666,23 @@ static struct pernet_operations ipip_net_ops = { - .size = sizeof(struct ip_tunnel_net), - }; - -+#ifdef CONFIG_SYSCTL -+static struct ctl_table_header *ipip_fan_header; -+static unsigned int ipip_fan_version = 3; -+ -+static struct ctl_table ipip_fan_sysctls[] = { -+ { -+ .procname = "version", -+ .data = &ipip_fan_version, -+ .maxlen = sizeof(ipip_fan_version), -+ .mode = 0444, -+ .proc_handler = proc_dointvec, -+ }, -+ {}, -+}; -+ -+#endif /* CONFIG_SYSCTL */ -+ - static int __init ipip_init(void) - { - int err; -@@ -541,9 +701,22 @@ static int __init ipip_init(void) - if (err < 0) - goto rtnl_link_failed; - -+#ifdef CONFIG_SYSCTL -+ ipip_fan_header = register_net_sysctl(&init_net, "net/fan", -+ ipip_fan_sysctls); -+ if (!ipip_fan_header) { -+ err = -ENOMEM; -+ goto sysctl_failed; -+ } -+#endif /* CONFIG_SYSCTL */ -+ - out: - return err; - -+#ifdef CONFIG_SYSCTL -+sysctl_failed: -+ rtnl_link_unregister(&ipip_link_ops); -+#endif /* CONFIG_SYSCTL */ - rtnl_link_failed: - xfrm4_tunnel_deregister(&ipip_handler, AF_INET); - xfrm_tunnel_failed: -@@ -553,6 +726,9 @@ xfrm_tunnel_failed: - - static void __exit ipip_fini(void) - { -+#ifdef CONFIG_SYSCTL -+ unregister_net_sysctl_table(ipip_fan_header); -+#endif /* CONFIG_SYSCTL */ - rtnl_link_unregister(&ipip_link_ops); - if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET)) - pr_info("%s: can't deregister tunnel\n", __func__); --- -2.7.4 - -From 14aba409d044e3a314c09c650e1c42de699700b8 Mon Sep 17 00:00:00 2001 -From: Jay Vosburgh <jay.vosburgh@canonical.com> -Date: Wed, 11 Nov 2015 13:04:50 +0000 -Subject: [PATCH] UBUNTU: SAUCE: fan: add VXLAN implementation - -Generify the fan mapping support and utilise that to implement fan -mappings over vxlan transport. - -Expose the existance of this functionality (when the module is loaded) -via an additional sysctl marker. - -Signed-off-by: Jay Vosburgh <jay.vosburgh@canonical.com> -[apw@canonical.com: added feature marker for fan over vxlan.] -Signed-off-by: Andy Whitcroft <apw@canonical.com> ---- - drivers/net/vxlan.c | 245 +++++++++++++++++++++++++++++++++++++++++ - include/net/ip_tunnels.h | 19 +++- - include/net/vxlan.h | 2 + - include/uapi/linux/if_link.h | 1 + - include/uapi/linux/if_tunnel.h | 2 +- - net/ipv4/ip_tunnel.c | 7 +- - net/ipv4/ipip.c | 242 +++++++++++++++++++++++++++++++--------- - 7 files changed, 453 insertions(+), 65 deletions(-) - -diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c -index 405a7b6..a17cfd0 100644 ---- a/drivers/net/vxlan.c -+++ b/drivers/net/vxlan.c -@@ -23,6 +23,7 @@ - #include <linux/udp.h> - #include <linux/igmp.h> - #include <linux/etherdevice.h> -+#include <linux/inetdevice.h> - #include <linux/if_ether.h> - #include <linux/if_vlan.h> - #include <linux/hash.h> -@@ -106,6 +107,167 @@ static inline bool vxlan_collect_metadata(struct vxlan_sock *vs) - ip_tunnel_collect_metadata(); - } - -+static struct ip_fan_map *vxlan_fan_find_map(struct vxlan_dev *vxlan, __be32 daddr) -+{ -+ struct ip_fan_map *fan_map; -+ -+ rcu_read_lock(); -+ list_for_each_entry_rcu(fan_map, &vxlan->fan.fan_maps, list) { -+ if (fan_map->overlay == -+ (daddr & inet_make_mask(fan_map->overlay_prefix))) { -+ rcu_read_unlock(); -+ return fan_map; -+ } -+ } -+ rcu_read_unlock(); -+ -+ return NULL; -+} -+ -+static void vxlan_fan_flush_map(struct vxlan_dev *vxlan) -+{ -+ struct ip_fan_map *fan_map; -+ -+ list_for_each_entry_rcu(fan_map, &vxlan->fan.fan_maps, list) { -+ list_del_rcu(&fan_map->list); -+ kfree_rcu(fan_map, rcu); -+ } -+} -+ -+static int vxlan_fan_del_map(struct vxlan_dev *vxlan, __be32 overlay) -+{ -+ struct ip_fan_map *fan_map; -+ -+ fan_map = vxlan_fan_find_map(vxlan, overlay); -+ if (!fan_map) -+ return -ENOENT; -+ -+ list_del_rcu(&fan_map->list); -+ kfree_rcu(fan_map, rcu); -+ -+ return 0; -+} -+ -+static int vxlan_fan_add_map(struct vxlan_dev *vxlan, struct ifla_fan_map *map) -+{ -+ __be32 overlay_mask, underlay_mask; -+ struct ip_fan_map *fan_map; -+ -+ overlay_mask = inet_make_mask(map->overlay_prefix); -+ underlay_mask = inet_make_mask(map->underlay_prefix); -+ -+ netdev_dbg(vxlan->dev, "vfam: map: o %x/%d u %x/%d om %x um %x\n", -+ map->overlay, map->overlay_prefix, -+ map->underlay, map->underlay_prefix, -+ overlay_mask, underlay_mask); -+ -+ if ((map->overlay & ~overlay_mask) || (map->underlay & ~underlay_mask)) -+ return -EINVAL; -+ -+ if (!(map->overlay & overlay_mask) && (map->underlay & underlay_mask)) -+ return -EINVAL; -+ -+ /* Special case: overlay 0 and underlay 0: flush all mappings */ -+ if (!map->overlay && !map->underlay) { -+ vxlan_fan_flush_map(vxlan); -+ return 0; -+ } -+ -+ /* Special case: overlay set and underlay 0: clear map for overlay */ -+ if (!map->underlay) -+ return vxlan_fan_del_map(vxlan, map->overlay); -+ -+ if (vxlan_fan_find_map(vxlan, map->overlay)) -+ return -EEXIST; -+ -+ fan_map = kmalloc(sizeof(*fan_map), GFP_KERNEL); -+ fan_map->underlay = map->underlay; -+ fan_map->overlay = map->overlay; -+ fan_map->underlay_prefix = map->underlay_prefix; -+ fan_map->overlay_mask = ntohl(overlay_mask); -+ fan_map->overlay_prefix = map->overlay_prefix; -+ -+ list_add_tail_rcu(&fan_map->list, &vxlan->fan.fan_maps); -+ -+ return 0; -+} -+ -+static int vxlan_parse_fan_map(struct nlattr *data[], struct vxlan_dev *vxlan) -+{ -+ struct ifla_fan_map *map; -+ struct nlattr *attr; -+ int rem, rv; -+ -+ nla_for_each_nested(attr, data[IFLA_IPTUN_FAN_MAP], rem) { -+ map = nla_data(attr); -+ rv = vxlan_fan_add_map(vxlan, map); -+ if (rv) -+ return rv; -+ } -+ -+ return 0; -+} -+ -+static int vxlan_fan_build_rdst(struct vxlan_dev *vxlan, struct sk_buff *skb, -+ struct vxlan_rdst *fan_rdst) -+{ -+ struct ip_fan_map *f_map; -+ union vxlan_addr *va; -+ u32 daddr, underlay; -+ struct arphdr *arp; -+ void *arp_ptr; -+ struct ethhdr *eth; -+ struct iphdr *iph; -+ -+ eth = eth_hdr(skb); -+ switch (eth->h_proto) { -+ case htons(ETH_P_IP): -+ iph = ip_hdr(skb); -+ if (!iph) -+ return -EINVAL; -+ daddr = iph->daddr; -+ break; -+ case htons(ETH_P_ARP): -+ arp = arp_hdr(skb); -+ if (!arp) -+ return -EINVAL; -+ arp_ptr = arp + 1; -+ netdev_dbg(vxlan->dev, -+ "vfbr: arp sha %pM sip %pI4 tha %pM tip %pI4\n", -+ arp_ptr, arp_ptr + skb->dev->addr_len, -+ arp_ptr + skb->dev->addr_len + 4, -+ arp_ptr + (skb->dev->addr_len * 2) + 4); -+ arp_ptr += (skb->dev->addr_len * 2) + 4; -+ memcpy(&daddr, arp_ptr, 4); -+ break; -+ default: -+ netdev_dbg(vxlan->dev, "vfbr: unknown eth p %x\n", eth->h_proto); -+ return -EINVAL; -+ } -+ -+ f_map = vxlan_fan_find_map(vxlan, daddr); -+ if (!f_map) -+ return -EINVAL; -+ -+ daddr = ntohl(daddr); -+ underlay = ntohl(f_map->underlay); -+ if (!underlay) -+ return -EINVAL; -+ -+ memset(fan_rdst, 0, sizeof(*fan_rdst)); -+ va = &fan_rdst->remote_ip; -+ va->sa.sa_family = AF_INET; -+ fan_rdst->remote_vni = vxlan->default_dst.remote_vni; -+ va->sin.sin_addr.s_addr = htonl(underlay | -+ ((daddr & ~f_map->overlay_mask) >> -+ (32 - f_map->overlay_prefix - -+ (32 - f_map->underlay_prefix)))); -+ netdev_dbg(vxlan->dev, "vfbr: daddr %x ul %x dst %x\n", -+ daddr, underlay, va->sin.sin_addr.s_addr); -+ -+ return 0; -+} -+ - #if IS_ENABLED(CONFIG_IPV6) - static inline - bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b) -@@ -2029,6 +2191,13 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, - goto rt_tx_error; - } - -+ if (fan_has_map(&vxlan->fan) && rt->rt_flags & RTCF_LOCAL) { -+ netdev_dbg(dev, "discard fan to localhost %pI4\n", -+ &dst->sin.sin_addr.s_addr); -+ ip_rt_put(rt); -+ goto tx_free; -+ } -+ - /* Bypass encapsulation if the destination is local */ - if (rt->rt_flags & RTCF_LOCAL && - !(rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) { -@@ -2169,6 +2338,20 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) - return NETDEV_TX_OK; - } - -+ if (fan_has_map(&vxlan->fan)) { -+ struct vxlan_rdst fan_rdst; -+ -+ netdev_dbg(vxlan->dev, "vxlan_xmit p %x d %pM\n", -+ eth->h_proto, eth->h_dest); -+ if (vxlan_fan_build_rdst(vxlan, skb, &fan_rdst)) { -+ dev->stats.tx_dropped++; -+ kfree_skb(skb); -+ return NETDEV_TX_OK; -+ } -+ vxlan_xmit_one(skb, dev, &fan_rdst, 0); -+ return NETDEV_TX_OK; -+ } -+ - f = vxlan_find_mac(vxlan, eth->h_dest); - did_rsc = false; - -@@ -2532,6 +2715,8 @@ static void vxlan_setup(struct net_device *dev) - - for (h = 0; h < FDB_HASH_SIZE; ++h) - INIT_HLIST_HEAD(&vxlan->fdb_head[h]); -+ -+ INIT_LIST_HEAD(&vxlan->fan.fan_maps); - } - - static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = { -@@ -2881,6 +3066,7 @@ EXPORT_SYMBOL_GPL(vxlan_dev_create); - static int vxlan_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[]) - { -+ struct vxlan_dev *vxlan = netdev_priv(dev); - struct vxlan_config conf; - int err; - -@@ -2899,6 +3085,12 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev, - conf.remote_ip.sa.sa_family = AF_INET6; - } - -+ if (data[IFLA_VXLAN_FAN_MAP]) { -+ err = vxlan_parse_fan_map(data, vxlan); -+ if (err) -+ return err; -+ } -+ - if (data[IFLA_VXLAN_LOCAL]) { - conf.saddr.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_LOCAL]); - conf.saddr.sa.sa_family = AF_INET; -@@ -3037,6 +3229,7 @@ static size_t vxlan_get_size(const struct net_device *dev) - nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_ZERO_CSUM6_RX */ - nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_TX */ - nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_RX */ -+ nla_total_size(sizeof(struct ip_fan_map) * 256) + - 0; - } - -@@ -3083,6 +3276,26 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev) - } - } - -+ if (fan_has_map(&vxlan->fan)) { -+ struct nlattr *fan_nest; -+ struct ip_fan_map *fan_map; -+ -+ fan_nest = nla_nest_start(skb, IFLA_VXLAN_FAN_MAP); -+ if (!fan_nest) -+ goto nla_put_failure; -+ list_for_each_entry_rcu(fan_map, &vxlan->fan.fan_maps, list) { -+ struct ifla_fan_map map; -+ -+ map.underlay = fan_map->underlay; -+ map.underlay_prefix = fan_map->underlay_prefix; -+ map.overlay = fan_map->overlay; -+ map.overlay_prefix = fan_map->overlay_prefix; -+ if (nla_put(skb, IFLA_FAN_MAPPING, sizeof(map), &map)) -+ goto nla_put_failure; -+ } -+ nla_nest_end(skb, fan_nest); -+ } -+ - if (nla_put_u8(skb, IFLA_VXLAN_TTL, vxlan->cfg.ttl) || - nla_put_u8(skb, IFLA_VXLAN_TOS, vxlan->cfg.tos) || - nla_put_u8(skb, IFLA_VXLAN_LEARNING, -@@ -3201,6 +3414,22 @@ static __net_init int vxlan_init_net(struct net *net) - return 0; - } - -+#ifdef CONFIG_SYSCTL -+static struct ctl_table_header *vxlan_fan_header; -+static unsigned int vxlan_fan_version = 4; -+ -+static struct ctl_table vxlan_fan_sysctls[] = { -+ { -+ .procname = "vxlan", -+ .data = &vxlan_fan_version, -+ .maxlen = sizeof(vxlan_fan_version), -+ .mode = 0444, -+ .proc_handler = proc_dointvec, -+ }, -+ {}, -+}; -+#endif /* CONFIG_SYSCTL */ -+ - static void __net_exit vxlan_exit_net(struct net *net) - { - struct vxlan_net *vn = net_generic(net, vxlan_net_id); -@@ -3256,7 +3485,20 @@ static int __init vxlan_init_module(void) - if (rc) - goto out3; - -+#ifdef CONFIG_SYSCTL -+ vxlan_fan_header = register_net_sysctl(&init_net, "net/fan", -+ vxlan_fan_sysctls); -+ if (!vxlan_fan_header) { -+ rc = -ENOMEM; -+ goto sysctl_failed; -+ } -+#endif /* CONFIG_SYSCTL */ -+ - return 0; -+#ifdef CONFIG_SYSCTL -+sysctl_failed: -+ rtnl_link_unregister(&vxlan_link_ops); -+#endif /* CONFIG_SYSCTL */ - out3: - unregister_netdevice_notifier(&vxlan_notifier_block); - out2: -@@ -3269,6 +3511,9 @@ late_initcall(vxlan_init_module); - - static void __exit vxlan_cleanup_module(void) - { -+#ifdef CONFIG_SYSCTL -+ unregister_net_sysctl_table(vxlan_fan_header); -+#endif /* CONFIG_SYSCTL */ - rtnl_link_unregister(&vxlan_link_ops); - unregister_netdevice_notifier(&vxlan_notifier_block); - destroy_workqueue(vxlan_wq); -diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h -index 47fec59..28a38e5 100644 ---- a/include/net/ip_tunnels.h -+++ b/include/net/ip_tunnels.h -@@ -100,9 +100,18 @@ struct metadata_dst; - */ - #define FAN_OVERLAY_CNT 256 - -+struct ip_fan_map { -+ __be32 underlay; -+ __be32 overlay; -+ u16 underlay_prefix; -+ u16 overlay_prefix; -+ u32 overlay_mask; -+ struct list_head list; -+ struct rcu_head rcu; -+}; -+ - struct ip_tunnel_fan { --/* u32 __rcu *map;*/ -- u32 map[FAN_OVERLAY_CNT]; -+ struct list_head fan_maps; - }; - - struct ip_tunnel { -@@ -157,7 +166,11 @@ struct ip_tunnel { - #define TUNNEL_VXLAN_OPT __cpu_to_be16(0x1000) - - #define TUNNEL_OPTIONS_PRESENT (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT) --#define TUNNEL_FAN __cpu_to_be16(0x4000) -+ -+static inline int fan_has_map(const struct ip_tunnel_fan *fan) -+{ -+ return !list_empty(&fan->fan_maps); -+} - - struct tnl_ptk_info { - __be16 flags; -diff --git a/include/net/vxlan.h b/include/net/vxlan.h -index e289ada..542f421 100644 ---- a/include/net/vxlan.h -+++ b/include/net/vxlan.h -@@ -161,6 +161,8 @@ struct vxlan_dev { - struct vxlan_rdst default_dst; /* default destination */ - u32 flags; /* VXLAN_F_* in vxlan.h */ - -+ struct ip_tunnel_fan fan; -+ - struct timer_list age_timer; - spinlock_t hash_lock; - unsigned int addrcnt; -diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h -index 5ad5737..6cde3bf 100644 ---- a/include/uapi/linux/if_link.h -+++ b/include/uapi/linux/if_link.h -@@ -443,6 +443,7 @@ enum { - IFLA_VXLAN_GBP, - IFLA_VXLAN_REMCSUM_NOPARTIAL, - IFLA_VXLAN_COLLECT_METADATA, -+ IFLA_VXLAN_FAN_MAP = 33, - __IFLA_VXLAN_MAX - }; - #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) -diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h -index 85a3e4b..d36b150 100644 ---- a/include/uapi/linux/if_tunnel.h -+++ b/include/uapi/linux/if_tunnel.h -@@ -145,7 +145,7 @@ enum { - - #define IFLA_FAN_MAX (__IFLA_FAN_MAX - 1) - --struct ip_tunnel_fan_map { -+struct ifla_fan_map { - __be32 underlay; - __be32 overlay; - __u16 underlay_prefix; -diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c -index 7a6174b..c821bf1 100644 ---- a/net/ipv4/ip_tunnel.c -+++ b/net/ipv4/ip_tunnel.c -@@ -1110,11 +1110,6 @@ out: - } - EXPORT_SYMBOL_GPL(ip_tunnel_newlink); - --static int ip_tunnel_is_fan(struct ip_tunnel *tunnel) --{ -- return tunnel->parms.i_flags & TUNNEL_FAN; --} -- - int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], - struct ip_tunnel_parm *p) - { -@@ -1124,7 +1119,7 @@ int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], - struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id); - - if (dev == itn->fb_tunnel_dev) -- return ip_tunnel_is_fan(tunnel) ? 0 : -EINVAL; -+ return fan_has_map(&tunnel->fan) ? 0 : -EINVAL; - - t = ip_tunnel_find(itn, p, dev->type); - -diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c -index 56e8984..3877b0e 100644 ---- a/net/ipv4/ipip.c -+++ b/net/ipv4/ipip.c -@@ -108,6 +108,7 @@ - #include <linux/netfilter_ipv4.h> - #include <linux/if_ether.h> - #include <linux/inetdevice.h> -+#include <linux/rculist.h> - - #include <net/sock.h> - #include <net/ip.h> -@@ -209,37 +210,144 @@ drop: - return 0; - } - --static int ipip_tunnel_is_fan(struct ip_tunnel *tunnel) -+static struct ip_fan_map *ipip_fan_find_map(struct ip_tunnel *t, __be32 daddr) - { -- return tunnel->parms.i_flags & TUNNEL_FAN; -+ struct ip_fan_map *fan_map; -+ -+ rcu_read_lock(); -+ list_for_each_entry_rcu(fan_map, &t->fan.fan_maps, list) { -+ if (fan_map->overlay == -+ (daddr & inet_make_mask(fan_map->overlay_prefix))) { -+ rcu_read_unlock(); -+ return fan_map; -+ } -+ } -+ rcu_read_unlock(); -+ -+ return NULL; - } - --/* -- * Determine fan tunnel endpoint to send packet to, based on the inner IP -- * address. For an overlay (inner) address Y.A.B.C, the transformation is -- * F.G.A.B, where "F" and "G" are the first two octets of the underlay -- * network (the network portion of a /16), "A" and "B" are the low order -- * two octets of the underlay network host (the host portion of a /16), -- * and "Y" is a configured first octet of the overlay network. -+/* Determine fan tunnel endpoint to send packet to, based on the inner IP -+ * address. -+ * -+ * Given a /8 overlay and /16 underlay, for an overlay (inner) address -+ * Y.A.B.C, the transformation is F.G.A.B, where "F" and "G" are the first -+ * two octets of the underlay network (the network portion of a /16), "A" -+ * and "B" are the low order two octets of the underlay network host (the -+ * host portion of a /16), and "Y" is a configured first octet of the -+ * overlay network. -+ * -+ * E.g., underlay host 10.88.3.4/16 with an overlay of 99.0.0.0/8 would -+ * host overlay subnet 99.3.4.0/24. An overlay network datagram from -+ * 99.3.4.5 to 99.6.7.8, would be directed to underlay host 10.88.6.7, -+ * which hosts overlay network subnet 99.6.7.0/24. This transformation is -+ * described in detail further below. -+ * -+ * Using netmasks for the overlay and underlay other than /8 and /16, as -+ * shown above, can yield larger (or smaller) overlay subnets, with the -+ * trade-off of allowing fewer (or more) underlay hosts to participate. -+ * -+ * The size of each overlay network subnet is defined by the total of the -+ * network mask of the overlay plus the size of host portion of the -+ * underlay network. In the above example, /8 + /16 = /24. -+ * -+ * E.g., consider underlay host 10.99.238.5/20 and overlay 99.0.0.0/8. In -+ * this case, the network portion of the underlay is 10.99.224.0/20, and -+ * the host portion is 0.0.14.5 (12 bits). To determine the overlay -+ * network subnet, the 12 bits of host portion are left shifted 12 bits -+ * (/20 - /8) and ORed with the overlay subnet prefix. This yields an -+ * overlay subnet of 99.224.80/20, composed of 8 bits overlay, followed by -+ * 12 bits underlay. This yields 12 bits in the overlay network portion, -+ * allowing for 4094 addresses in each overlay network subnet. The -+ * trade-off is that fewer hosts may participate in the underlay network, -+ * as its host address size has shrunk from 16 bits (65534 addresses) in -+ * the first example to 12 bits (4094 addresses) here. -+ * -+ * For fewer hosts per overlay subnet (permitting a larger number of -+ * underlay hosts to participate), the underlay netmask may be made -+ * smaller. -+ * -+ * E.g., underlay host 10.111.1.2/12 (network 10.96.0.0/12, host portion -+ * is 0.15.1.2, 20 bits) with an overlay of 33.0.0.0/8 would left shift -+ * the 20 bits of host by 4 (so that it's highest order bit is adjacent to -+ * the lowest order bit of the /8 overlay). This yields an overlay subnet -+ * of 33.240.16.32/28 (8 bits overlay, 20 bits from the host portion of -+ * the underlay). This provides more addresses for the underlay network -+ * (approximately 2^20), but each host's segment of the overlay provides -+ * only 4 bits of addresses (14 usable). -+ * -+ * It is also possible to adjust the overlay subnet. -+ * -+ * For an overlay of 240.0.0.0/5 and underlay of 10.88.0.0/20, consider -+ * underlay host 10.88.129.2; the 12 bits of host, 0.0.1.2, are left -+ * shifted 15 bits (/20 - /5), yielding an overlay network of -+ * 240.129.0.0/17. An underlay host of 10.88.244.215 would yield an -+ * overlay network of 242.107.128.0/17. -+ * -+ * For an overlay of 100.64.0.0/10 and underlay of 10.224.220.0/24, for -+ * underlay host 10.224.220.10, the underlay host portion (.10) is left -+ * shifted 14 bits, yielding an overlay network subnet of 100.66.128.0/18. -+ * This would permit 254 addresses on the underlay, with each overlay -+ * segment providing approximately 2^14 - 2 addresses (16382). -+ * -+ * For packets being encapsulated, the overlay network destination IP -+ * address is deconstructed into its overlay and underlay-derived -+ * portions. The underlay portion (determined by the overlay mask and -+ * overlay subnet mask) is right shifted according to the size of the -+ * underlay network mask. This value is then ORed with the network -+ * portion of the underlay network to produce the underlay network -+ * destination for the encapsulated datagram. -+ * -+ * For example, using the initial example of underlay 10.88.3.4/16 and -+ * overlay 99.0.0.0/8, with underlay host 10.88.3.4/16 providing overlay -+ * subnet 99.3.4.0/24 with specfic host 99.3.4.5. A datagram from -+ * 99.3.4.5 to 99.6.7.8 would first have the underlay host derived portion -+ * of the address extracted. This is a number of bits equal to underlay -+ * network host portion. In the destination address, the highest order of -+ * these bits is one bit lower than the lowest order bit from the overlay -+ * network mask. -+ * -+ * Using the sample value, 99.6.7.8, the overlay mask is /8, and the -+ * underlay mask is /16 (leaving 16 bits for the host portion). The bits -+ * to be shifted are the middle two octets, 0.6.7.0, as this is 99.6.7.8 -+ * ANDed with the mask 0x00ffff00 (which is 16 bits, the highest order of -+ * which is 1 bit lower than the lowest order overlay address bit). - * -- * E.g., underlay host 10.88.3.4 with an overlay of 99 would host overlay -- * subnet 99.3.4.0/24. An overlay network datagram from 99.3.4.5 to -- * 99.6.7.8, would be directed to underlay host 10.88.6.7, which hosts -- * overlay network 99.6.7.0/24. -+ * These octets, 0.6.7.0, are then right shifted 8 bits, yielding 0.0.6.7. -+ * This value is then ORed with the underlay network portion, -+ * 10.88.0.0/16, providing 10.88.6.7 as the final underlay destination for -+ * the encapuslated datagram. -+ * -+ * Another transform using the final example: overlay 100.64.0.0/10 and -+ * underlay 10.224.220.0/24. Consider overlay address 100.66.128.1 -+ * sending a datagram to 100.66.200.5. In this case, 8 bits (the host -+ * portion size of 10.224.220.0/24) beginning after the 100.64/10 overlay -+ * prefix are masked off, yielding 0.2.192.0. This is right shifted 14 -+ * (32 - 10 - (32 - 24), i.e., the number of bits between the overlay -+ * network portion and the underlay host portion) bits, yielding 0.0.0.11. -+ * This is ORed with the underlay network portion, 10.224.220.0/24, giving -+ * the underlay destination of 10.224.220.11 for overlay destination -+ * 100.66.200.5. - */ - static int ipip_build_fan_iphdr(struct ip_tunnel *tunnel, struct sk_buff *skb, struct iphdr *iph) - { -- unsigned int overlay; -+ struct ip_fan_map *f_map; - u32 daddr, underlay; - -+ f_map = ipip_fan_find_map(tunnel, ip_hdr(skb)->daddr); -+ if (!f_map) -+ return -ENOENT; -+ - daddr = ntohl(ip_hdr(skb)->daddr); -- overlay = daddr >> 24; -- underlay = tunnel->fan.map[overlay]; -+ underlay = ntohl(f_map->underlay); - if (!underlay) - return -EINVAL; - - *iph = tunnel->parms.iph; -- iph->daddr = htonl(underlay | ((daddr >> 8) & 0x0000ffff)); -+ iph->daddr = htonl(underlay | -+ ((daddr & ~f_map->overlay_mask) >> -+ (32 - f_map->overlay_prefix - -+ (32 - f_map->underlay_prefix)))); - return 0; - } - -@@ -260,7 +368,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) - if (IS_ERR(skb)) - goto out; - -- if (ipip_tunnel_is_fan(tunnel)) { -+ if (fan_has_map(&tunnel->fan)) { - if (ipip_build_fan_iphdr(tunnel, skb, &fiph)) - goto tx_error; - tiph = &fiph; -@@ -325,6 +433,8 @@ static const struct net_device_ops ipip_netdev_ops = { - - static void ipip_tunnel_setup(struct net_device *dev) - { -+ struct ip_tunnel *t = netdev_priv(dev); -+ - dev->netdev_ops = &ipip_netdev_ops; - - dev->type = ARPHRD_TUNNEL; -@@ -336,6 +446,7 @@ static void ipip_tunnel_setup(struct net_device *dev) - dev->features |= IPIP_FEATURES; - dev->hw_features |= IPIP_FEATURES; - ip_tunnel_setup(dev, ipip_net_id); -+ INIT_LIST_HEAD(&t->fan.fan_maps); - } - - static int ipip_tunnel_init(struct net_device *dev) -@@ -419,41 +530,65 @@ static bool ipip_netlink_encap_parms(struct nlattr *data[], - return ret; - } - --static void ipip_fan_free_map(struct ip_tunnel *t) -+static void ipip_fan_flush_map(struct ip_tunnel *t) - { -- memset(&t->fan.map, 0, sizeof(t->fan.map)); -+ struct ip_fan_map *fan_map; -+ -+ list_for_each_entry_rcu(fan_map, &t->fan.fan_maps, list) { -+ list_del_rcu(&fan_map->list); -+ kfree_rcu(fan_map, rcu); -+ } - } - --static int ipip_fan_set_map(struct ip_tunnel *t, struct ip_tunnel_fan_map *map) -+static int ipip_fan_del_map(struct ip_tunnel *t, __be32 overlay) - { -- u32 overlay, overlay_mask, underlay, underlay_mask; -+ struct ip_fan_map *fan_map; - -- if ((map->underlay_prefix && map->underlay_prefix != 16) || -- (map->overlay_prefix && map->overlay_prefix != 8)) -- return -EINVAL; -+ fan_map = ipip_fan_find_map(t, overlay); -+ if (!fan_map) -+ return -ENOENT; -+ -+ list_del_rcu(&fan_map->list); -+ kfree_rcu(fan_map, rcu); - -- overlay = ntohl(map->overlay); -- overlay_mask = ntohl(inet_make_mask(map->overlay_prefix)); -+ return 0; -+} - -- underlay = ntohl(map->underlay); -- underlay_mask = ntohl(inet_make_mask(map->underlay_prefix)); -+static int ipip_fan_add_map(struct ip_tunnel *t, struct ifla_fan_map *map) -+{ -+ __be32 overlay_mask, underlay_mask; -+ struct ip_fan_map *fan_map; - -- if ((overlay & ~overlay_mask) || (underlay & ~underlay_mask)) -- return -EINVAL; -+ overlay_mask = inet_make_mask(map->overlay_prefix); -+ underlay_mask = inet_make_mask(map->underlay_prefix); - -- if (!(overlay & overlay_mask) && (underlay & underlay_mask)) -+ if ((map->overlay & ~overlay_mask) || (map->underlay & ~underlay_mask)) - return -EINVAL; - -- t->parms.i_flags |= TUNNEL_FAN; -+ if (!(map->overlay & overlay_mask) && (map->underlay & underlay_mask)) -+ return -EINVAL; - -- /* Special case: overlay 0 and underlay 0 clears all mappings */ -- if (!overlay && !underlay) { -- ipip_fan_free_map(t); -+ /* Special case: overlay 0 and underlay 0: flush all mappings */ -+ if (!map->overlay && !map->underlay) { -+ ipip_fan_flush_map(t); - return 0; - } -+ -+ /* Special case: overlay set and underlay 0: clear map for overlay */ -+ if (!map->underlay) -+ return ipip_fan_del_map(t, map->overlay); -+ -+ if (ipip_fan_find_map(t, map->overlay)) -+ return -EEXIST; -+ -+ fan_map = kmalloc(sizeof(*fan_map), GFP_KERNEL); -+ fan_map->underlay = map->underlay; -+ fan_map->overlay = map->overlay; -+ fan_map->underlay_prefix = map->underlay_prefix; -+ fan_map->overlay_mask = ntohl(overlay_mask); -+ fan_map->overlay_prefix = map->overlay_prefix; - -- overlay >>= (32 - map->overlay_prefix); -- t->fan.map[overlay] = underlay; -+ list_add_tail_rcu(&fan_map->list, &t->fan.fan_maps); - - return 0; - } -@@ -462,7 +597,7 @@ static int ipip_fan_set_map(struct ip_tunnel *t, struct ip_tunnel_fan_map *map) - static int ipip_netlink_fan(struct nlattr *data[], struct ip_tunnel *t, - struct ip_tunnel_parm *parms) - { -- struct ip_tunnel_fan_map *map; -+ struct ifla_fan_map *map; - struct nlattr *attr; - int rem, rv; - -@@ -474,7 +609,7 @@ static int ipip_netlink_fan(struct nlattr *data[], struct ip_tunnel *t, - - nla_for_each_nested(attr, data[IFLA_IPTUN_FAN_MAP], rem) { - map = nla_data(attr); -- rv = ipip_fan_set_map(t, map); -+ rv = ipip_fan_add_map(t, map); - if (rv) - return rv; - } -@@ -555,7 +690,7 @@ static size_t ipip_get_size(const struct net_device *dev) - /* IFLA_IPTUN_ENCAP_DPORT */ - nla_total_size(2) + - /* IFLA_IPTUN_FAN_MAP */ -- nla_total_size(sizeof(struct ip_tunnel_fan_map)) * 256 + -+ nla_total_size(sizeof(struct ifla_fan_map)) * 256 + - 0; - } - -@@ -583,25 +718,22 @@ static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev) - tunnel->encap.flags)) - goto nla_put_failure; - -- if (tunnel->parms.i_flags & TUNNEL_FAN) { -+ if (fan_has_map(&tunnel->fan)) { - struct nlattr *fan_nest; -- int i; -+ struct ip_fan_map *fan_map; - - fan_nest = nla_nest_start(skb, IFLA_IPTUN_FAN_MAP); - if (!fan_nest) - goto nla_put_failure; -- for (i = 0; i < 256; i++) { -- if (tunnel->fan.map[i]) { -- struct ip_tunnel_fan_map map; -- -- map.underlay = htonl(tunnel->fan.map[i]); -- map.underlay_prefix = 16; -- map.overlay = htonl(i << 24); -- map.overlay_prefix = 8; -- if (nla_put(skb, IFLA_FAN_MAPPING, -- sizeof(map), &map)) -- goto nla_put_failure; -- } -+ list_for_each_entry_rcu(fan_map, &tunnel->fan.fan_maps, list) { -+ struct ifla_fan_map map; -+ -+ map.underlay = fan_map->underlay; -+ map.underlay_prefix = fan_map->underlay_prefix; -+ map.overlay = fan_map->overlay; -+ map.overlay_prefix = fan_map->overlay_prefix; -+ if (nla_put(skb, IFLA_FAN_MAPPING, sizeof(map), &map)) -+ goto nla_put_failure; - } - nla_nest_end(skb, fan_nest); - } --- -2.7.4 - diff --git a/pkgs/os-specific/linux/kernel/ubuntu-unprivileged-overlayfs.patch b/pkgs/os-specific/linux/kernel/ubuntu-unprivileged-overlayfs.patch deleted file mode 100644 index cfa8009e71e..00000000000 --- a/pkgs/os-specific/linux/kernel/ubuntu-unprivileged-overlayfs.patch +++ /dev/null @@ -1,69 +0,0 @@ -From 7415cb7b31569e9266229d4ebc79ccec4841ab04 Mon Sep 17 00:00:00 2001 -From: Serge Hallyn <serge.hallyn@ubuntu.com> -Date: Fri, 7 Feb 2014 09:32:46 -0600 -Subject: [PATCH] UBUNTU: SAUCE: Overlayfs: allow unprivileged mounts - -Unprivileged mounting, here, refers to root in a non-initial user -namespace performing the mount. In particular, it requires -CAP_SYS_ADMIN toward the task's mounts namespace, alleviating -the concerns of manipulating mount environment for setuid-root -binaries on the host. - -We refuse unprivileged mounting of most filesystem types because -we do not trust the in-kernel superblock parsers to correctly -handle malicious input. - -However, overlayfs does not parse any user-provided data other -than the pathnames passed in. Therefore unprivileged mounting -of overlayfs should be safe. - -Allowing unprivileged mounting of overlayfs filesystems would -allow Ubuntu Trusty users to create overlayfs-based container -snapshots, which would be a huge usability improvement. - -This patch enables unprivileged mounting of overlayfs. - -I tested a few simple combinations, and found that, when -doing (the equivalent of) - -mount -t overlayfs -oupperdir=u,lowerdir=l l t - -(u for upper, l for lower, t for target), - -1. overlayfs mount is always allowed, regardless of ownership -of u, l, or t. However - -2. Creation of new files is allowed so long as u is owned by - T. Otherwise, regardless of ownerships of l and t it is - denied. (This is expected; t was the mountpoint and - 'disapears', so its ownership is irrelevant) - -3. modification of a file 'hithere' which is in l but not yet - in u, and which is not owned by T, is not allowed, even if - writes to u are allowed. This may be a bug in overlayfs, - but it is safe behavior. It also will not cause a problem - for lxc since lxc will ensure that files are mapped into T's - namespace. - -Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com> -Signed-off-by: Tim Gardner <tim.gardner@canonical.com> -Signed-off-by: Andy Whitcroft <apw@canonical.com> ---- - fs/overlayfs/super.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c -index 9473e79..50890c2 100644 ---- a/fs/overlayfs/super.c -+++ b/fs/overlayfs/super.c -@@ -668,6 +668,7 @@ static struct file_system_type ovl_fs_type = { - .name = "overlayfs", - .mount = ovl_mount, - .kill_sb = kill_anon_super, -+ .fs_flags = FS_USERNS_MOUNT, - }; - MODULE_ALIAS_FS("overlayfs"); - --- -2.1.0.rc1 - |