Compare commits

..

No commits in common. "a1e2db03f237a1c22e25ac6f1b8a649ea4143594" and "60849f1c5d5c35a611c97faef28f13cf8e89fcb3" have entirely different histories.

13 changed files with 27716 additions and 48 deletions

View file

@ -1,2 +1,2 @@
games-util/steam-launcher ValveSteamLicense
net-im/discord all-rights-reserved
media-sound/spotify Spotify

View file

@ -0,0 +1,151 @@
From d50977b164e708bf523a35ef53315355528c3ca6 Mon Sep 17 00:00:00 2001
From: "Jan Alexander Steffens (heftig)" <jan.steffens@gmail.com>
Date: Mon, 16 Sep 2019 04:53:20 +0200
Subject: [PATCH] ZEN: Add sysctl and CONFIG to disallow unprivileged
CLONE_NEWUSER
Our default behavior continues to match the vanilla kernel.
---
include/linux/user_namespace.h | 4 ++++
init/Kconfig | 16 ++++++++++++++++
kernel/fork.c | 14 ++++++++++++++
kernel/sysctl.c | 12 ++++++++++++
kernel/user_namespace.c | 7 +++++++
5 files changed, 53 insertions(+)
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 45f09bec02c485..87b20e2ee27445 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -148,6 +148,8 @@ static inline void set_userns_rlimit_max(struct user_namespace *ns,
#ifdef CONFIG_USER_NS
+extern int unprivileged_userns_clone;
+
static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
{
if (ns)
@@ -181,6 +183,8 @@ extern bool current_in_userns(const struct user_namespace *target_ns);
struct ns_common *ns_get_owner(struct ns_common *ns);
#else
+#define unprivileged_userns_clone 0
+
static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
{
return &init_user_ns;
diff --git a/init/Kconfig b/init/Kconfig
index 94125d3b6893c7..9f7139b536f638 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1247,6 +1247,22 @@ config USER_NS
If unsure, say N.
+config USER_NS_UNPRIVILEGED
+ bool "Allow unprivileged users to create namespaces"
+ default y
+ depends on USER_NS
+ help
+ When disabled, unprivileged users will not be able to create
+ new namespaces. Allowing users to create their own namespaces
+ has been part of several recent local privilege escalation
+ exploits, so if you need user namespaces but are
+ paranoid^Wsecurity-conscious you want to disable this.
+
+ This setting can be overridden at runtime via the
+ kernel.unprivileged_userns_clone sysctl.
+
+ If unsure, say Y.
+
config PID_NS
bool "PID Namespaces"
default y
diff --git a/kernel/fork.c b/kernel/fork.c
index 08969f5aa38d59..ff601cb7a1fae0 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -98,6 +98,10 @@
#include <linux/io_uring.h>
#include <linux/bpf.h>
+#ifdef CONFIG_USER_NS
+#include <linux/user_namespace.h>
+#endif
+
#include <asm/pgalloc.h>
#include <linux/uaccess.h>
#include <asm/mmu_context.h>
@@ -2008,6 +2012,10 @@ static __latent_entropy struct task_struct *copy_process(
if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))
return ERR_PTR(-EINVAL);
+ if ((clone_flags & CLONE_NEWUSER) && !unprivileged_userns_clone)
+ if (!capable(CAP_SYS_ADMIN))
+ return ERR_PTR(-EPERM);
+
/*
* Thread groups must share signals as well, and detached threads
* can only be started up within the thread group.
@@ -3166,6 +3174,12 @@ int ksys_unshare(unsigned long unshare_flags)
if (unshare_flags & CLONE_NEWNS)
unshare_flags |= CLONE_FS;
+ if ((unshare_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) {
+ err = -EPERM;
+ if (!capable(CAP_SYS_ADMIN))
+ goto bad_unshare_out;
+ }
+
err = check_unshare_flags(unshare_flags);
if (err)
goto bad_unshare_out;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index c6d9dec11b749d..9a4514ad481b21 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -81,6 +81,9 @@
#ifdef CONFIG_RT_MUTEXES
#include <linux/rtmutex.h>
#endif
+#ifdef CONFIG_USER_NS
+#include <linux/user_namespace.h>
+#endif
/* shared constants to be used in various sysctls */
const int sysctl_vals[] = { 0, 1, 2, 3, 4, 100, 200, 1000, 3000, INT_MAX, 65535, -1 };
@@ -1659,6 +1662,15 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+#ifdef CONFIG_USER_NS
+ {
+ .procname = "unprivileged_userns_clone",
+ .data = &unprivileged_userns_clone,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
#ifdef CONFIG_PROC_SYSCTL
{
.procname = "tainted",
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 54211dbd516c57..16ca0c1516298d 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -22,6 +22,13 @@
#include <linux/bsearch.h>
#include <linux/sort.h>
+/* sysctl */
+#ifdef CONFIG_USER_NS_UNPRIVILEGED
+int unprivileged_userns_clone = 1;
+#else
+int unprivileged_userns_clone;
+#endif
+
static struct kmem_cache *user_ns_cachep __ro_after_init;
static DEFINE_MUTEX(userns_state_mutex);

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,822 @@
From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001
From: Tk-Glitch <ti3nou@gmail.com>
Date: Wed, 4 Jul 2018 04:30:08 +0200
Subject: [PATCH 01/17] glitched
---
init/Makefile | 2 +-
1 file changed, 1 insertions(+), 1 deletions(-)
diff --git a/init/Makefile b/init/Makefile
index baf3ab8d9d49..854e32e6aec7 100755
--- a/init/Makefile
+++ b/init/Makefile
@@ -19,7 +19,7 @@ else
# Maximum length of UTS_VERSION is 64 chars
filechk_uts_version = \
- utsver=$$(echo '$(pound)'"$(build-version)" $(smp-flag-y) $(preempt-flag-y) "$(build-timestamp)" | cut -b -64); \
+ utsver=$$(echo '$(pound)'"$(build-version)" $(smp-flag-y) $(preempt-flag-y) "siina" "$(build-timestamp)" | cut -b -64); \
echo '$(pound)'define UTS_VERSION \""$${utsver}"\"
#
--
2.28.0
From c304f43d14e98d4bf1215fc10bc5012f554bdd8a Mon Sep 17 00:00:00 2001
From: Alexandre Frade <admfrade@gmail.com>
Date: Mon, 29 Jan 2018 16:59:22 +0000
Subject: [PATCH 02/17] dcache: cache_pressure = 50 decreases the rate at which
VFS caches are reclaimed
Signed-off-by: Alexandre Frade <admfrade@gmail.com>
---
fs/dcache.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/dcache.c b/fs/dcache.c
index 361ea7ab30ea..0c5cf69b241a 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -71,7 +71,7 @@
* If no ancestor relationship:
* arbitrary, since it's serialized on rename_lock
*/
-int sysctl_vfs_cache_pressure __read_mostly = 100;
+int sysctl_vfs_cache_pressure __read_mostly = 50;
EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
--
2.28.0
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index f788cd61df21..2bfbb4213707 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -15,9 +15,9 @@ __read_mostly int scheduler_running;
/*
* part of the period that we allow rt tasks to run in us.
- * default: 0.95s
+ * XanMod default: 0.98s
*/
-int sysctl_sched_rt_runtime = 950000;
+int sysctl_sched_rt_runtime = 980000;
#ifdef CONFIG_SYSCTL
static int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE;
--
2.28.0
From acc49f33a10f61dc66c423888cbb883ba46710e4 Mon Sep 17 00:00:00 2001
From: Alexandre Frade <admfrade@gmail.com>
Date: Mon, 29 Jan 2018 17:41:29 +0000
Subject: [PATCH 04/17] scripts: disable the localversion "+" tag of a git repo
Signed-off-by: Alexandre Frade <admfrade@gmail.com>
---
scripts/setlocalversion | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/scripts/setlocalversion b/scripts/setlocalversion
index 20f2efd57b11..0552d8b9f582 100755
--- a/scripts/setlocalversion
+++ b/scripts/setlocalversion
@@ -54,7 +54,7 @@ scm_version()
# If only the short version is requested, don't bother
# running further git commands
if $short; then
- echo "+"
+ #echo "+"
return
fi
# If we are past the tagged commit, we pretty print it.
--
2.28.0
From 360c6833e07cc9fdef5746f6bc45bdbc7212288d Mon Sep 17 00:00:00 2001
From: "Jan Alexander Steffens (heftig)" <jan.steffens@gmail.com>
Date: Fri, 26 Oct 2018 11:22:33 +0100
Subject: [PATCH 06/17] infiniband: Fix __read_overflow2 error with -O3
inlining
---
drivers/infiniband/core/addr.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 3a98439bba83..6efc4f907f58 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -820,6 +820,7 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
union {
struct sockaddr_in _sockaddr_in;
struct sockaddr_in6 _sockaddr_in6;
+ struct sockaddr_ib _sockaddr_ib;
} sgid_addr, dgid_addr;
int ret;
--
2.28.0
From f85ed068b4d0e6c31edce8574a95757a60e58b87 Mon Sep 17 00:00:00 2001
From: Etienne Juvigny <Ti3noU@gmail.com>
Date: Mon, 3 Sep 2018 17:36:25 +0200
Subject: [PATCH 07/17] Add Zenify option
---
init/Kconfig | 32 ++++++++++++++++++++++++++++++++
1 file changed, 32 insertions(+)
diff --git a/init/Kconfig b/init/Kconfig
index 3ae8678e1145..da708eed0f1e 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -92,6 +92,38 @@ config THREAD_INFO_IN_TASK
menu "General setup"
+config ZENIFY
+ bool "A selection of patches from Zen/Liquorix kernel and additional tweaks for a better gaming experience"
+ default y
+ help
+ Tunes the kernel for responsiveness at the cost of throughput and power usage.
+
+ --- Virtual Memory Subsystem ---------------------------
+
+ Mem dirty before bg writeback..: 10 % -> 20 %
+ Mem dirty before sync writeback: 20 % -> 50 %
+
+ --- Block Layer ----------------------------------------
+
+ Queue depth...............: 128 -> 512
+ Default MQ scheduler......: mq-deadline -> bfq
+
+ --- CFS CPU Scheduler ----------------------------------
+
+ Scheduling latency.............: 6 -> 3 ms
+ Minimal granularity............: 0.75 -> 0.3 ms
+ Wakeup granularity.............: 1 -> 0.5 ms
+ CPU migration cost.............: 0.5 -> 0.25 ms
+ Bandwidth slice size...........: 5 -> 3 ms
+ Ondemand fine upscaling limit..: 95 % -> 85 %
+
+ --- MuQSS CPU Scheduler --------------------------------
+
+ Scheduling interval............: 6 -> 3 ms
+ ISO task max realtime use......: 70 % -> 25 %
+ Ondemand coarse upscaling limit: 80 % -> 45 %
+ Ondemand fine upscaling limit..: 95 % -> 45 %
+
config BROKEN
bool
--
2.28.0
From e92e67143385cf285851e12aa8b7f083dd38dd24 Mon Sep 17 00:00:00 2001
From: Steven Barrett <damentz@liquorix.net>
Date: Sun, 16 Jan 2011 18:57:32 -0600
Subject: [PATCH 08/17] ZEN: Allow TCP YeAH as default congestion control
4.4: In my tests YeAH dramatically slowed down transfers over a WLAN,
reducing throughput from ~65Mbps (CUBIC) to ~7MBps (YeAH) over 10
seconds (netperf TCP_STREAM) including long stalls.
Be careful when choosing this. ~heftig
---
net/ipv4/Kconfig | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index e64e59b536d3..bfb55ef7ebbe 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -691,6 +691,9 @@ choice
config DEFAULT_VEGAS
bool "Vegas" if TCP_CONG_VEGAS=y
+ config DEFAULT_YEAH
+ bool "YeAH" if TCP_CONG_YEAH=y
+
config DEFAULT_VENO
bool "Veno" if TCP_CONG_VENO=y
@@ -724,6 +727,7 @@ config DEFAULT_TCP_CONG
default "htcp" if DEFAULT_HTCP
default "hybla" if DEFAULT_HYBLA
default "vegas" if DEFAULT_VEGAS
+ default "yeah" if DEFAULT_YEAH
default "westwood" if DEFAULT_WESTWOOD
default "veno" if DEFAULT_VENO
default "reno" if DEFAULT_RENO
--
2.28.0
From 76dbe7477bfde1b5e8bf29a71b5af7ab2be9b98e Mon Sep 17 00:00:00 2001
From: Steven Barrett <steven@liquorix.net>
Date: Wed, 28 Nov 2018 19:01:27 -0600
Subject: [PATCH 09/17] zen: Use [defer+madvise] as default khugepaged defrag
strategy
For some reason, the default strategy to respond to THP fault fallbacks
is still just madvise, meaning stall if the program wants transparent
hugepages, but don't trigger a background reclaim / compaction if THP
begins to fail allocations. This creates a snowball affect where we
still use the THP code paths, but we almost always fail once a system
has been active and busy for a while.
The option "defer" was created for interactive systems where THP can
still improve performance. If we have to fallback to a regular page due
to an allocation failure or anything else, we will trigger a background
reclaim and compaction so future THP attempts succeed and previous
attempts eventually have their smaller pages combined without stalling
running applications.
We still want madvise to stall applications that explicitely want THP,
so defer+madvise _does_ make a ton of sense. Make it the default for
interactive systems, especially if the kernel maintainer left
transparent hugepages on "always".
Reasoning and details in the original patch: https://lwn.net/Articles/711248/
---
mm/huge_memory.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 74300e337c3c..9277f22c10a7 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -53,7 +53,11 @@ unsigned long transparent_hugepage_flags __read_mostly =
#ifdef CONFIG_TRANSPARENT_HUGEPAGE_MADVISE
(1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)|
#endif
+#ifdef CONFIG_ZENIFY
+ (1<<TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG)|
+#else
(1<<TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG)|
+#endif
(1<<TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG)|
(1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG);
--
2.28.0
From 2b65a1329cb220b43c19c4d0de5833fae9e2b22d Mon Sep 17 00:00:00 2001
From: Alexandre Frade <admfrade@gmail.com>
Date: Wed, 24 Oct 2018 16:58:52 -0300
Subject: [PATCH 10/17] net/sched: allow configuring cake qdisc as default
Signed-off-by: Alexandre Frade <admfrade@gmail.com>
---
net/sched/Kconfig | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 84badf00647e..6a922bca9f39 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -471,6 +471,9 @@ choice
config DEFAULT_SFQ
bool "Stochastic Fair Queue" if NET_SCH_SFQ
+ config DEFAULT_CAKE
+ bool "Common Applications Kept Enhanced" if NET_SCH_CAKE
+
config DEFAULT_PFIFO_FAST
bool "Priority FIFO Fast"
endchoice
@@ -481,6 +484,7 @@ config DEFAULT_NET_SCH
default "fq" if DEFAULT_FQ
default "fq_codel" if DEFAULT_FQ_CODEL
default "sfq" if DEFAULT_SFQ
+ default "cake" if DEFAULT_CAKE
default "pfifo_fast"
endif
--
2.28.0
From 90240bcd90a568878738e66c0d45bed3e38e347b Mon Sep 17 00:00:00 2001
From: Tk-Glitch <ti3nou@gmail.com>
Date: Fri, 19 Apr 2019 12:33:38 +0200
Subject: [PATCH 12/17] Set vm.max_map_count to 262144 by default
The value is still pretty low, and AMD64-ABI and ELF extended numbering
supports that, so we should be fine on modern x86 systems.
This fixes crashes in some applications using more than 65535 vmas (also
affects some windows games running in wine, such as Star Citizen).
---
include/linux/mm.h | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index bc05c3588aa3..b0cefe94920d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -190,8 +190,7 @@ static inline void __mm_zero_struct_page(struct page *page)
* not a hard limit any more. Although some userspace tools can be surprised by
* that.
*/
-#define MAPCOUNT_ELF_CORE_MARGIN (5)
-#define DEFAULT_MAX_MAP_COUNT (USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
+#define DEFAULT_MAX_MAP_COUNT (262144)
extern int sysctl_max_map_count;
--
2.28.0
From 3a34034dba5efe91bcec491efe8c66e8087f509b Mon Sep 17 00:00:00 2001
From: Tk-Glitch <ti3nou@gmail.com>
Date: Mon, 27 Jul 2020 00:19:18 +0200
Subject: [PATCH 13/17] mm: bump DEFAULT_MAX_MAP_COUNT
Some games such as Detroit: Become Human tend to be very crash prone with
lower values.
---
include/linux/mm.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index b0cefe94920d..890165099b07 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -190,7 +190,7 @@ static inline void __mm_zero_struct_page(struct page *page)
* not a hard limit any more. Although some userspace tools can be surprised by
* that.
*/
-#define DEFAULT_MAX_MAP_COUNT (262144)
+#define DEFAULT_MAX_MAP_COUNT (16777216)
extern int sysctl_max_map_count;
--
2.28.0
From 977812938da7c7226415778c340832141d9278b7 Mon Sep 17 00:00:00 2001
From: Alexandre Frade <admfrade@gmail.com>
Date: Mon, 25 Nov 2019 15:13:06 -0300
Subject: [PATCH 14/17] elevator: set default scheduler to bfq for blk-mq
Signed-off-by: Alexandre Frade <admfrade@gmail.com>
---
block/elevator.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/block/elevator.c b/block/elevator.c
index 4eab3d70e880..79669aa39d79 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -623,19 +623,19 @@ static inline bool elv_support_iosched(struct request_queue *q)
}
/*
- * For single queue devices, default to using mq-deadline. If we have multiple
- * queues or mq-deadline is not available, default to "none".
+ * For single queue devices, default to using bfq. If we have multiple
+ * queues or bfq is not available, default to "none".
*/
static struct elevator_type *elevator_get_default(struct request_queue *q)
{
if (q->tag_set && q->tag_set->flags & BLK_MQ_F_NO_SCHED_BY_DEFAULT)
return NULL;
if (q->nr_hw_queues != 1 &&
!blk_mq_is_shared_tags(q->tag_set->flags))
return NULL;
- return elevator_find_get(q, "mq-deadline");
+ return elevator_find_get(q, "bfq");
}
/*
--
2.28.0
From 3c229f434aca65c4ca61772bc03c3e0370817b92 Mon Sep 17 00:00:00 2001
From: Alexandre Frade <kernel@xanmod.org>
Date: Mon, 3 Aug 2020 17:05:04 +0000
Subject: [PATCH 16/17] mm: set 2 megabytes for address_space-level file
read-ahead pages size
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
include/linux/pagemap.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index cf2468da68e9..007dea784451 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -655,7 +655,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask);
void delete_from_page_cache_batch(struct address_space *mapping,
struct pagevec *pvec);
-#define VM_READAHEAD_PAGES (SZ_128K / PAGE_SIZE)
+#define VM_READAHEAD_PAGES (SZ_2M / PAGE_SIZE)
void page_cache_sync_readahead(struct address_space *, struct file_ra_state *,
struct file *, pgoff_t index, unsigned long req_count);
--
2.28.0
From 716f41cf6631f3a85834dcb67b4ce99185b6387f Mon Sep 17 00:00:00 2001
From: Steven Barrett <steven@liquorix.net>
Date: Wed, 15 Jan 2020 20:43:56 -0600
Subject: [PATCH 17/17] ZEN: intel-pstate: Implement "enable" parameter
If intel-pstate is compiled into the kernel, it will preempt the loading
of acpi-cpufreq so you can take advantage of hardware p-states without
any friction.
However, intel-pstate is not completely superior to cpufreq's ondemand
for one reason. There's no concept of an up_threshold property.
In ondemand, up_threshold essentially reduces the maximum utilization to
compare against, allowing you to hit max frequencies and turbo boost
from a much lower core utilization.
With intel-pstate, you have the concept of minimum and maximum
performance, but no tunable that lets you define, maximum frequency
means 50% core utilization. For just this oversight, there's reasons
you may want ondemand.
Lets support setting "enable" in kernel boot parameters. This lets
kernel maintainers include "intel_pstate=disable" statically in the
static boot parameters, but let users of the kernel override this
selection.
---
Documentation/admin-guide/kernel-parameters.txt | 3 +++
drivers/cpufreq/intel_pstate.c | 2 ++
2 files changed, 5 insertions(+)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index fb95fad81c79..3e92fee81e33 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1857,6 +1857,9 @@
disable
Do not enable intel_pstate as the default
scaling driver for the supported processors
+ enable
+ Enable intel_pstate in-case "disable" was passed
+ previously in the kernel boot parameters
passive
Use intel_pstate as a scaling driver, but configure it
to work with generic cpufreq governors (instead of
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 36a469150ff9..aee891c9b78a 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -2845,6 +2845,8 @@ static int __init intel_pstate_setup(char *str)
if (!strcmp(str, "no_hwp"))
no_hwp = 1;
+ if (!strcmp(str, "enable"))
+ no_load = 0;
if (!strcmp(str, "force"))
force_load = 1;
if (!strcmp(str, "hwp_only"))
--
2.28.0
From 379cbab18b5c75c622b93e2c5abdfac141fe9654 Mon Sep 17 00:00:00 2001
From: Kenny Levinsen <kl@kl.wtf>
Date: Sun, 27 Dec 2020 14:43:13 +0000
Subject: [PATCH] ZEN: Input: evdev - use call_rcu when detaching client
Significant time was spent on synchronize_rcu in evdev_detach_client
when applications closed evdev devices. Switching VT away from a
graphical environment commonly leads to mass input device closures,
which could lead to noticable delays on systems with many input devices.
Replace synchronize_rcu with call_rcu, deferring reclaim of the evdev
client struct till after the RCU grace period instead of blocking the
calling application.
While this does not solve all slow evdev fd closures, it takes care of a
good portion of them, including this simple test:
#include <fcntl.h>
#include <unistd.h>
int main(int argc, char *argv[])
{
int idx, fd;
const char *path = "/dev/input/event0";
for (idx = 0; idx < 1000; idx++) {
if ((fd = open(path, O_RDWR)) == -1) {
return -1;
}
close(fd);
}
return 0;
}
Time to completion of above test when run locally:
Before: 0m27.111s
After: 0m0.018s
Signed-off-by: Kenny Levinsen <kl@kl.wtf>
---
drivers/input/evdev.c | 19 +++++++++++--------
1 file changed, 11 insertions(+), 8 deletions(-)
diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
index 95f90699d2b17b..2b10fe29d2c8d9 100644
--- a/drivers/input/evdev.c
+++ b/drivers/input/evdev.c
@@ -46,6 +46,7 @@ struct evdev_client {
struct fasync_struct *fasync;
struct evdev *evdev;
struct list_head node;
+ struct rcu_head rcu;
enum input_clock_type clk_type;
bool revoked;
unsigned long *evmasks[EV_CNT];
@@ -377,13 +378,22 @@ static void evdev_attach_client(struct evdev *evdev,
spin_unlock(&evdev->client_lock);
}
+static void evdev_reclaim_client(struct rcu_head *rp)
+{
+ struct evdev_client *client = container_of(rp, struct evdev_client, rcu);
+ unsigned int i;
+ for (i = 0; i < EV_CNT; ++i)
+ bitmap_free(client->evmasks[i]);
+ kvfree(client);
+}
+
static void evdev_detach_client(struct evdev *evdev,
struct evdev_client *client)
{
spin_lock(&evdev->client_lock);
list_del_rcu(&client->node);
spin_unlock(&evdev->client_lock);
- synchronize_rcu();
+ call_rcu(&client->rcu, evdev_reclaim_client);
}
static int evdev_open_device(struct evdev *evdev)
@@ -436,7 +446,6 @@ static int evdev_release(struct inode *inode, struct file *file)
{
struct evdev_client *client = file->private_data;
struct evdev *evdev = client->evdev;
- unsigned int i;
mutex_lock(&evdev->mutex);
@@ -448,11 +457,6 @@ static int evdev_release(struct inode *inode, struct file *file)
evdev_detach_client(evdev, client);
- for (i = 0; i < EV_CNT; ++i)
- bitmap_free(client->evmasks[i]);
-
- kvfree(client);
-
evdev_close_device(evdev);
return 0;
@@ -495,7 +499,6 @@ static int evdev_open(struct inode *inode, struct file *file)
err_free_client:
evdev_detach_client(evdev, client);
- kvfree(client);
return error;
}
From 2aafb56f20e4b63d8c4af172fe9d017c64bc4129 Mon Sep 17 00:00:00 2001
From: Sultan Alsawaf <sultan@kerneltoast.com>
Date: Wed, 20 Oct 2021 20:50:11 -0700
Subject: [PATCH] ZEN: mm: Lower the non-hugetlbpage pageblock size to reduce
scheduling delays
The page allocator processes free pages in groups of pageblocks, where
the size of a pageblock is typically quite large (1024 pages without
hugetlbpage support). Pageblocks are processed atomically with the zone
lock held, which can cause severe scheduling delays on both the CPU
going through the pageblock and any other CPUs waiting to acquire the
zone lock. A frequent offender is move_freepages_block(), which is used
by rmqueue() for page allocation.
As it turns out, there's no requirement for pageblocks to be so large,
so the pageblock order can simply be reduced to ease the scheduling
delays and zone lock contention. PAGE_ALLOC_COSTLY_ORDER is used as a
reasonable setting to ensure non-costly page allocation requests can
still be serviced without always needing to free up more than one
pageblock's worth of pages at a time.
This has a noticeable effect on overall system latency when memory
pressure is elevated. The various mm functions which operate on
pageblocks no longer appear in the preemptoff tracer, where previously
they would spend up to 100 ms on a mobile arm64 CPU processing a
pageblock with preemption disabled and the zone lock held.
Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
---
include/linux/pageblock-flags.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
index 5f1ae07d724b88..97cda629c9e909 100644
--- a/include/linux/pageblock-flags.h
+++ b/include/linux/pageblock-flags.h
@@ -48,7 +48,7 @@ extern unsigned int pageblock_order;
#else /* CONFIG_HUGETLB_PAGE */
/* If huge pages are not used, group by MAX_ORDER_NR_PAGES */
-#define pageblock_order MAX_PAGE_ORDER
+#define pageblock_order PAGE_ALLOC_COSTLY_ORDER
#endif /* CONFIG_HUGETLB_PAGE */
From f22bc56be85e69c71c8e36041193856bb8b01525 Mon Sep 17 00:00:00 2001
From: Sultan Alsawaf <sultan@kerneltoast.com>
Date: Wed, 20 Oct 2021 20:50:32 -0700
Subject: [PATCH] ZEN: mm: Don't hog the CPU and zone lock in rmqueue_bulk()
There is noticeable scheduling latency and heavy zone lock contention
stemming from rmqueue_bulk's single hold of the zone lock while doing
its work, as seen with the preemptoff tracer. There's no actual need for
rmqueue_bulk() to hold the zone lock the entire time; it only does so
for supposed efficiency. As such, we can relax the zone lock and even
reschedule when IRQs are enabled in order to keep the scheduling delays
and zone lock contention at bay. Forward progress is still guaranteed,
as the zone lock can only be relaxed after page removal.
With this change, rmqueue_bulk() no longer appears as a serious offender
in the preemptoff tracer, and system latency is noticeably improved.
Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
---
mm/page_alloc.c | 23 ++++++++++++++++++-----
1 file changed, 18 insertions(+), 5 deletions(-)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a0b0397e29ee4c..87a983a356530c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3118,15 +3119,16 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype,
}
/*
- * Obtain a specified number of elements from the buddy allocator, all under
- * a single hold of the lock, for efficiency. Add them to the supplied list.
- * Returns the number of new pages which were placed at *list.
+ * Obtain a specified number of elements from the buddy allocator, and relax the
+ * zone lock when needed. Add them to the supplied list. Returns the number of
+ * new pages which were placed at *list.
*/
static int rmqueue_bulk(struct zone *zone, unsigned int order,
unsigned long count, struct list_head *list,
int migratetype, unsigned int alloc_flags)
{
unsigned long flags;
- int i;
+ const bool can_resched = !preempt_count() && !irqs_disabled();
+ int i, allocated = 0, last_mod = 0;
/* Caller must hold IRQ-safe pcp->lock so IRQs are disabled. */
spin_lock(&zone->lock);
@@ -3137,6 +3138,18 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
if (unlikely(page == NULL))
break;
+ /* Reschedule and ease the contention on the lock if needed */
+ if (i + 1 < count && ((can_resched && need_resched()) ||
+ spin_needbreak(&zone->lock))) {
+ __mod_zone_page_state(zone, NR_FREE_PAGES,
+ -((i + 1 - last_mod) << order));
+ last_mod = i + 1;
+ spin_unlock(&zone->lock);
+ if (can_resched)
+ cond_resched();
+ spin_lock(&zone->lock);
+ }
+
if (unlikely(check_pcp_refill(page, order)))
continue;
@@ -3163,7 +3176,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
* on i. Do not confuse with 'allocated' which is the number of
* pages added to the pcp list.
*/
- __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order));
+ __mod_zone_page_state(zone, NR_FREE_PAGES, -((i - last_mod) << order));
spin_unlock(&zone->lock);
return allocated;
}
From 6329525a0fa10cd13f39b76948b1296150f75c95 Mon Sep 17 00:00:00 2001
From: Alexandre Frade <kernel@xanmod.org>
Date: Mon, 29 Aug 2022 16:47:26 +0000
Subject: [PATCH 14/16] XANMOD: Makefile: Disable GCC vectorization on trees
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
Makefile | 3 +++
1 file changed, 3 insertions(+)
diff --git a/Makefile b/Makefile
index 3f6628780eb2..35a5ae1ede42 100644
--- a/Makefile
+++ b/Makefile
@@ -1069,6 +1069,9 @@ endif
KBUILD_CFLAGS-$(call gcc-min-version, 90100) += -Wno-alloc-size-larger-than
KBUILD_CFLAGS += $(KBUILD_CFLAGS-y) $(CONFIG_CC_IMPLICIT_FALLTHROUGH)
+# disable GCC vectorization on trees
+KBUILD_CFLAGS += $(call cc-option, -fno-tree-vectorize)
+
# disable invalid "can't wrap" optimizations for signed / pointers
KBUILD_CFLAGS += -fno-strict-overflow
--
2.39.1
From f997578464b2c4c63e7bd1afbfef56212ee44f2d Mon Sep 17 00:00:00 2001
From: Etienne JUVIGNY <ti3nou@gmail.com>
Date: Mon, 6 Mar 2023 13:54:09 +0100
Subject: Don't add -dirty versioning on unclean trees
diff --git a/scripts/setlocalversion b/scripts/setlocalversion
index ca5795e16..ad0d94477 100755
--- a/scripts/setlocalversion
+++ b/scripts/setlocalversion
@@ -85,12 +85,12 @@ scm_version()
# git-diff-index does not refresh the index, so it may give misleading
# results.
# See git-update-index(1), git-diff-index(1), and git-status(1).
- if {
- git --no-optional-locks status -uno --porcelain 2>/dev/null ||
- git diff-index --name-only HEAD
- } | read dummy; then
- printf '%s' -dirty
- fi
+ #if {
+ # git --no-optional-locks status -uno --porcelain 2>/dev/null ||
+ # git diff-index --name-only HEAD
+ #} | read dummy; then
+ # printf '%s' -dirty
+ #fi
}
collect_files()
From 1cf70fdd26245554ab30234722338d8160dff394 Mon Sep 17 00:00:00 2001
From: Steven Barrett <steven@liquorix.net>
Date: Sat, 21 May 2022 15:15:09 -0500
Subject: [PATCH] ZEN: INTERACTIVE: dm-crypt: Disable workqueues for crypto ops
Queueing in dm-crypt for crypto operations reduces performance on modern
systems. As discussed in an article from Cloudflare, they discovered
that queuing was introduced because the crypto subsystem used to be
synchronous. Since it's now asynchronous, we get double queueing when
using the subsystem through dm-crypt. This is obviously undesirable and
reduces throughput and increases latency.
Disable queueing when using our Zen Interactive configuration.
Fixes: https://github.com/zen-kernel/zen-kernel/issues/282
tkg: Config switch changed to our local "ZENIFY" toggle
---
drivers/md/dm-crypt.c | 5 +++++
init/Kconfig | 1 +
2 files changed, 6 insertions(+)
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 2ae8560b6a14ad..cb49218030c88b 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -3242,6 +3242,11 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad;
}
+#ifdef CONFIG_ZENIFY
+ set_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags);
+ set_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags);
+#endif
+
ret = crypt_ctr_cipher(ti, argv[0], argv[1]);
if (ret < 0)
goto bad;

View file

@ -0,0 +1,117 @@
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 2a202a846757..1d9c7ed79b11 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -4,7 +4,7 @@
choice
prompt "Timer frequency"
- default HZ_250
+ default HZ_500
help
Allows the configuration of the timer frequency. It is customary
to have the timer interrupt run at 1000 Hz but 100 Hz may be more
@@ -39,6 +39,13 @@ choice
on SMP and NUMA systems and exactly dividing by both PAL and
NTSC frame rates for video and multimedia work.
+ config HZ_500
+ bool "500 HZ"
+ help
+ 500 Hz is a balanced timer frequency. Provides fast interactivity
+ on desktops with great smoothness without increasing CPU power
+ consumption and sacrificing the battery life on laptops.
+
config HZ_1000
bool "1000 HZ"
help
@@ -52,6 +59,7 @@ config HZ
default 100 if HZ_100
default 250 if HZ_250
default 300 if HZ_300
+ default 500 if HZ_500
default 1000 if HZ_1000
config SCHED_HRTICK
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 2a202a846757..1d9c7ed79b11 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -4,7 +4,7 @@
choice
prompt "Timer frequency"
- default HZ_500
+ default HZ_750
help
Allows the configuration of the timer frequency. It is customary
to have the timer interrupt run at 1000 Hz but 100 Hz may be more
@@ -46,6 +46,13 @@ choice
on desktops with great smoothness without increasing CPU power
consumption and sacrificing the battery life on laptops.
+ config HZ_750
+ bool "750 HZ"
+ help
+ 750 Hz is a good timer frequency for desktops. Provides fast
+ interactivity with great smoothness without sacrificing too
+ much throughput.
+
config HZ_1000
bool "1000 HZ"
help
@@ -60,6 +67,7 @@ config HZ
default 250 if HZ_250
default 300 if HZ_300
default 500 if HZ_500
+ default 750 if HZ_750
default 1000 if HZ_1000
config SCHED_HRTICK
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 6b423eebfd5d..61e3271675d6 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -21,10 +21,10 @@
#include "cpufreq_ondemand.h"
/* On-demand governor macros */
-#define DEF_FREQUENCY_UP_THRESHOLD (80)
-#define DEF_SAMPLING_DOWN_FACTOR (1)
+#define DEF_FREQUENCY_UP_THRESHOLD (55)
+#define DEF_SAMPLING_DOWN_FACTOR (5)
#define MAX_SAMPLING_DOWN_FACTOR (100000)
-#define MICRO_FREQUENCY_UP_THRESHOLD (95)
+#define MICRO_FREQUENCY_UP_THRESHOLD (63)
#define MIN_FREQUENCY_UP_THRESHOLD (1)
#define MAX_FREQUENCY_UP_THRESHOLD (100)
From cba31b19f8c38696b13ba48e0e8b6dbe747d6bae Mon Sep 17 00:00:00 2001
From: Alexandre Frade <admfrade@gmail.com>
Date: Mon, 29 Jan 2018 17:31:25 +0000
Subject: [PATCH 10/16] XANMOD: mm/vmscan: vm_swappiness = 30 decreases the
amount of swapping
Signed-off-by: Alexandre Frade <admfrade@gmail.com>
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
mm/vmscan.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5b7b8d4f5297..549684b29418 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -190,7 +190,7 @@ struct scan_control {
/*
* From 0 .. 200. Higher means more swappy.
*/
-int vm_swappiness = 60;
+int vm_swappiness = 30;
static void set_task_reclaim_state(struct task_struct *task,
struct reclaim_state *rs)
--
2.39.1

View file

@ -0,0 +1,433 @@
From 3d95a728e5570d4aedad19d699fd23db695f1ada Mon Sep 17 00:00:00 2001
From: Peter Jung <admin@ptr1337.dev>
Date: Mon, 7 Oct 2024 11:53:40 +0200
Subject: [PATCH 5/8] ksm
Signed-off-by: Peter Jung <admin@ptr1337.dev>
---
arch/alpha/kernel/syscalls/syscall.tbl | 3 +
arch/arm/tools/syscall.tbl | 3 +
arch/m68k/kernel/syscalls/syscall.tbl | 3 +
arch/microblaze/kernel/syscalls/syscall.tbl | 3 +
arch/mips/kernel/syscalls/syscall_n32.tbl | 3 +
arch/mips/kernel/syscalls/syscall_n64.tbl | 3 +
arch/mips/kernel/syscalls/syscall_o32.tbl | 3 +
arch/parisc/kernel/syscalls/syscall.tbl | 3 +
arch/powerpc/kernel/syscalls/syscall.tbl | 3 +
arch/s390/kernel/syscalls/syscall.tbl | 3 +
arch/sh/kernel/syscalls/syscall.tbl | 3 +
arch/sparc/kernel/syscalls/syscall.tbl | 3 +
arch/x86/entry/syscalls/syscall_32.tbl | 3 +
arch/x86/entry/syscalls/syscall_64.tbl | 3 +
arch/xtensa/kernel/syscalls/syscall.tbl | 3 +
include/linux/syscalls.h | 3 +
include/uapi/asm-generic/unistd.h | 9 +-
kernel/sys.c | 138 ++++++++++++++++++
kernel/sys_ni.c | 3 +
scripts/syscall.tbl | 3 +
.../arch/powerpc/entry/syscalls/syscall.tbl | 3 +
.../perf/arch/s390/entry/syscalls/syscall.tbl | 3 +
22 files changed, 206 insertions(+), 1 deletion(-)
diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
index 74720667fe09..e6a11f3c0a2e 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -502,3 +502,6 @@
570 common lsm_set_self_attr sys_lsm_set_self_attr
571 common lsm_list_modules sys_lsm_list_modules
572 common mseal sys_mseal
+573 common process_ksm_enable sys_process_ksm_enable
+574 common process_ksm_disable sys_process_ksm_disable
+575 common process_ksm_status sys_process_ksm_status
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index 23c98203c40f..10a3099decbe 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -477,3 +477,6 @@
460 common lsm_set_self_attr sys_lsm_set_self_attr
461 common lsm_list_modules sys_lsm_list_modules
462 common mseal sys_mseal
+463 common process_ksm_enable sys_process_ksm_enable
+464 common process_ksm_disable sys_process_ksm_disable
+465 common process_ksm_status sys_process_ksm_status
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl
index 22a3cbd4c602..12d2c7594bf0 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -462,3 +462,6 @@
460 common lsm_set_self_attr sys_lsm_set_self_attr
461 common lsm_list_modules sys_lsm_list_modules
462 common mseal sys_mseal
+463 common process_ksm_enable sys_process_ksm_enable
+464 common process_ksm_disable sys_process_ksm_disable
+465 common process_ksm_status sys_process_ksm_status
diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl
index 2b81a6bd78b2..e2a93c856eed 100644
--- a/arch/microblaze/kernel/syscalls/syscall.tbl
+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
@@ -468,3 +468,6 @@
460 common lsm_set_self_attr sys_lsm_set_self_attr
461 common lsm_list_modules sys_lsm_list_modules
462 common mseal sys_mseal
+463 common process_ksm_enable sys_process_ksm_enable
+464 common process_ksm_disable sys_process_ksm_disable
+465 common process_ksm_status sys_process_ksm_status
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index 953f5b7dc723..b921fbf56fa6 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -401,3 +401,6 @@
460 n32 lsm_set_self_attr sys_lsm_set_self_attr
461 n32 lsm_list_modules sys_lsm_list_modules
462 n32 mseal sys_mseal
+463 n32 process_ksm_enable sys_process_ksm_enable
+464 n32 process_ksm_disable sys_process_ksm_disable
+465 n32 process_ksm_status sys_process_ksm_status
diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl
index 1464c6be6eb3..8d7f9ddd66f4 100644
--- a/arch/mips/kernel/syscalls/syscall_n64.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
@@ -377,3 +377,6 @@
460 n64 lsm_set_self_attr sys_lsm_set_self_attr
461 n64 lsm_list_modules sys_lsm_list_modules
462 n64 mseal sys_mseal
+463 n64 process_ksm_enable sys_process_ksm_enable
+464 n64 process_ksm_disable sys_process_ksm_disable
+465 n64 process_ksm_status sys_process_ksm_status
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 2439a2491cff..9d6142739954 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -450,3 +450,6 @@
460 o32 lsm_set_self_attr sys_lsm_set_self_attr
461 o32 lsm_list_modules sys_lsm_list_modules
462 o32 mseal sys_mseal
+463 o32 process_ksm_enable sys_process_ksm_enable
+464 o32 process_ksm_disable sys_process_ksm_disable
+465 o32 process_ksm_status sys_process_ksm_status
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index 66dc406b12e4..9d46476fd908 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -461,3 +461,6 @@
460 common lsm_set_self_attr sys_lsm_set_self_attr
461 common lsm_list_modules sys_lsm_list_modules
462 common mseal sys_mseal
+463 common process_ksm_enable sys_process_ksm_enable
+464 common process_ksm_disable sys_process_ksm_disable
+465 common process_ksm_status sys_process_ksm_status
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index ebae8415dfbb..16f71bc2f6f0 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -553,3 +553,6 @@
460 common lsm_set_self_attr sys_lsm_set_self_attr
461 common lsm_list_modules sys_lsm_list_modules
462 common mseal sys_mseal
+463 common process_ksm_enable sys_process_ksm_enable
+464 common process_ksm_disable sys_process_ksm_disable
+465 common process_ksm_status sys_process_ksm_status
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index 01071182763e..7394bad8178e 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -465,3 +465,6 @@
460 common lsm_set_self_attr sys_lsm_set_self_attr sys_lsm_set_self_attr
461 common lsm_list_modules sys_lsm_list_modules sys_lsm_list_modules
462 common mseal sys_mseal sys_mseal
+463 common process_ksm_enable sys_process_ksm_enable sys_process_ksm_enable
+464 common process_ksm_disable sys_process_ksm_disable sys_process_ksm_disable
+465 common process_ksm_status sys_process_ksm_status sys_process_ksm_status
diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl
index c55fd7696d40..b9fc31221b87 100644
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -466,3 +466,6 @@
460 common lsm_set_self_attr sys_lsm_set_self_attr
461 common lsm_list_modules sys_lsm_list_modules
462 common mseal sys_mseal
+463 common process_ksm_enable sys_process_ksm_enable
+464 common process_ksm_disable sys_process_ksm_disable
+465 common process_ksm_status sys_process_ksm_status
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index cfdfb3707c16..0d79fd772854 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -508,3 +508,6 @@
460 common lsm_set_self_attr sys_lsm_set_self_attr
461 common lsm_list_modules sys_lsm_list_modules
462 common mseal sys_mseal
+463 common process_ksm_enable sys_process_ksm_enable
+464 common process_ksm_disable sys_process_ksm_disable
+465 common process_ksm_status sys_process_ksm_status
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 534c74b14fab..c546a30575f1 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -468,3 +468,6 @@
460 i386 lsm_set_self_attr sys_lsm_set_self_attr
461 i386 lsm_list_modules sys_lsm_list_modules
462 i386 mseal sys_mseal
+463 i386 process_ksm_enable sys_process_ksm_enable
+464 i386 process_ksm_disable sys_process_ksm_disable
+465 i386 process_ksm_status sys_process_ksm_status
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 7093ee21c0d1..0fcd10ba8dfe 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -386,6 +386,9 @@
460 common lsm_set_self_attr sys_lsm_set_self_attr
461 common lsm_list_modules sys_lsm_list_modules
462 common mseal sys_mseal
+463 common process_ksm_enable sys_process_ksm_enable
+464 common process_ksm_disable sys_process_ksm_disable
+465 common process_ksm_status sys_process_ksm_status
#
# Due to a historical design error, certain syscalls are numbered differently
diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl
index 67083fc1b2f5..c1aecee4ad9b 100644
--- a/arch/xtensa/kernel/syscalls/syscall.tbl
+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
@@ -433,3 +433,6 @@
460 common lsm_set_self_attr sys_lsm_set_self_attr
461 common lsm_list_modules sys_lsm_list_modules
462 common mseal sys_mseal
+463 common process_ksm_enable sys_process_ksm_enable
+464 common process_ksm_disable sys_process_ksm_disable
+465 common process_ksm_status sys_process_ksm_status
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 5758104921e6..cc9c4fac2412 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -818,6 +818,9 @@ asmlinkage long sys_madvise(unsigned long start, size_t len, int behavior);
asmlinkage long sys_process_madvise(int pidfd, const struct iovec __user *vec,
size_t vlen, int behavior, unsigned int flags);
asmlinkage long sys_process_mrelease(int pidfd, unsigned int flags);
+asmlinkage long sys_process_ksm_enable(int pidfd, unsigned int flags);
+asmlinkage long sys_process_ksm_disable(int pidfd, unsigned int flags);
+asmlinkage long sys_process_ksm_status(int pidfd, unsigned int flags);
asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
unsigned long prot, unsigned long pgoff,
unsigned long flags);
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 5bf6148cac2b..613e559ad6e0 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -841,8 +841,15 @@ __SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules)
#define __NR_mseal 462
__SYSCALL(__NR_mseal, sys_mseal)
+#define __NR_process_ksm_enable 463
+__SYSCALL(__NR_process_ksm_enable, sys_process_ksm_enable)
+#define __NR_process_ksm_disable 464
+__SYSCALL(__NR_process_ksm_disable, sys_process_ksm_disable)
+#define __NR_process_ksm_status 465
+__SYSCALL(__NR_process_ksm_status, sys_process_ksm_status)
+
#undef __NR_syscalls
-#define __NR_syscalls 463
+#define __NR_syscalls 466
/*
* 32 bit systems traditionally used different
diff --git a/kernel/sys.c b/kernel/sys.c
index 4da31f28fda8..fcd3aeaddd05 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2791,6 +2791,144 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
return error;
}
+#ifdef CONFIG_KSM
+enum pkc_action {
+ PKSM_ENABLE = 0,
+ PKSM_DISABLE,
+ PKSM_STATUS,
+};
+
+static long do_process_ksm_control(int pidfd, enum pkc_action action)
+{
+ long ret;
+ struct task_struct *task;
+ struct mm_struct *mm;
+ unsigned int f_flags;
+
+ task = pidfd_get_task(pidfd, &f_flags);
+ if (IS_ERR(task)) {
+ ret = PTR_ERR(task);
+ goto out;
+ }
+
+ /* Require PTRACE_MODE_READ to avoid leaking ASLR metadata. */
+ mm = mm_access(task, PTRACE_MODE_READ_FSCREDS);
+ if (IS_ERR_OR_NULL(mm)) {
+ ret = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH;
+ goto release_task;
+ }
+
+ /* Require CAP_SYS_NICE for influencing process performance. */
+ if (!capable(CAP_SYS_NICE)) {
+ ret = -EPERM;
+ goto release_mm;
+ }
+
+ if (mmap_write_lock_killable(mm)) {
+ ret = -EINTR;
+ goto release_mm;
+ }
+
+ switch (action) {
+ case PKSM_ENABLE:
+ ret = ksm_enable_merge_any(mm);
+ break;
+ case PKSM_DISABLE:
+ ret = ksm_disable_merge_any(mm);
+ break;
+ case PKSM_STATUS:
+ ret = !!test_bit(MMF_VM_MERGE_ANY, &mm->flags);
+ break;
+ }
+
+ mmap_write_unlock(mm);
+
+release_mm:
+ mmput(mm);
+release_task:
+ put_task_struct(task);
+out:
+ return ret;
+}
+#endif /* CONFIG_KSM */
+
+SYSCALL_DEFINE2(process_ksm_enable, int, pidfd, unsigned int, flags)
+{
+#ifdef CONFIG_KSM
+ if (flags != 0)
+ return -EINVAL;
+
+ return do_process_ksm_control(pidfd, PKSM_ENABLE);
+#else /* CONFIG_KSM */
+ return -ENOSYS;
+#endif /* CONFIG_KSM */
+}
+
+SYSCALL_DEFINE2(process_ksm_disable, int, pidfd, unsigned int, flags)
+{
+#ifdef CONFIG_KSM
+ if (flags != 0)
+ return -EINVAL;
+
+ return do_process_ksm_control(pidfd, PKSM_DISABLE);
+#else /* CONFIG_KSM */
+ return -ENOSYS;
+#endif /* CONFIG_KSM */
+}
+
+SYSCALL_DEFINE2(process_ksm_status, int, pidfd, unsigned int, flags)
+{
+#ifdef CONFIG_KSM
+ if (flags != 0)
+ return -EINVAL;
+
+ return do_process_ksm_control(pidfd, PKSM_STATUS);
+#else /* CONFIG_KSM */
+ return -ENOSYS;
+#endif /* CONFIG_KSM */
+}
+
+#ifdef CONFIG_KSM
+static ssize_t process_ksm_enable_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%u\n", __NR_process_ksm_enable);
+}
+static struct kobj_attribute process_ksm_enable_attr = __ATTR_RO(process_ksm_enable);
+
+static ssize_t process_ksm_disable_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%u\n", __NR_process_ksm_disable);
+}
+static struct kobj_attribute process_ksm_disable_attr = __ATTR_RO(process_ksm_disable);
+
+static ssize_t process_ksm_status_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%u\n", __NR_process_ksm_status);
+}
+static struct kobj_attribute process_ksm_status_attr = __ATTR_RO(process_ksm_status);
+
+static struct attribute *process_ksm_sysfs_attrs[] = {
+ &process_ksm_enable_attr.attr,
+ &process_ksm_disable_attr.attr,
+ &process_ksm_status_attr.attr,
+ NULL,
+};
+
+static const struct attribute_group process_ksm_sysfs_attr_group = {
+ .attrs = process_ksm_sysfs_attrs,
+ .name = "process_ksm",
+};
+
+static int __init process_ksm_sysfs_init(void)
+{
+ return sysfs_create_group(kernel_kobj, &process_ksm_sysfs_attr_group);
+}
+subsys_initcall(process_ksm_sysfs_init);
+#endif /* CONFIG_KSM */
+
SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep,
struct getcpu_cache __user *, unused)
{
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index c00a86931f8c..d82213d68522 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -186,6 +186,9 @@ COND_SYSCALL(mincore);
COND_SYSCALL(madvise);
COND_SYSCALL(process_madvise);
COND_SYSCALL(process_mrelease);
+COND_SYSCALL(process_ksm_enable);
+COND_SYSCALL(process_ksm_disable);
+COND_SYSCALL(process_ksm_status);
COND_SYSCALL(remap_file_pages);
COND_SYSCALL(mbind);
COND_SYSCALL(get_mempolicy);
diff --git a/scripts/syscall.tbl b/scripts/syscall.tbl
index 845e24eb372e..227d9cc12365 100644
--- a/scripts/syscall.tbl
+++ b/scripts/syscall.tbl
@@ -403,3 +403,6 @@
460 common lsm_set_self_attr sys_lsm_set_self_attr
461 common lsm_list_modules sys_lsm_list_modules
462 common mseal sys_mseal
+463 common process_ksm_enable sys_process_ksm_enable
+464 common process_ksm_disable sys_process_ksm_disable
+465 common process_ksm_status sys_process_ksm_status
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index ebae8415dfbb..16f71bc2f6f0 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -553,3 +553,6 @@
460 common lsm_set_self_attr sys_lsm_set_self_attr
461 common lsm_list_modules sys_lsm_list_modules
462 common mseal sys_mseal
+463 common process_ksm_enable sys_process_ksm_enable
+464 common process_ksm_disable sys_process_ksm_disable
+465 common process_ksm_status sys_process_ksm_status
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index 01071182763e..7394bad8178e 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -465,3 +465,6 @@
460 common lsm_set_self_attr sys_lsm_set_self_attr sys_lsm_set_self_attr
461 common lsm_list_modules sys_lsm_list_modules sys_lsm_list_modules
462 common mseal sys_mseal sys_mseal
+463 common process_ksm_enable sys_process_ksm_enable sys_process_ksm_enable
+464 common process_ksm_disable sys_process_ksm_disable sys_process_ksm_disable
+465 common process_ksm_status sys_process_ksm_status sys_process_ksm_status
--
2.47.0.rc0

View file

@ -0,0 +1,193 @@
From cdeab384f48dd9c88e2dff2e9ad8d57dca1a1b1c Mon Sep 17 00:00:00 2001
From: Mark Weiman <mark.weiman@markzz.com>
Date: Sun, 12 Aug 2018 11:36:21 -0400
Subject: [PATCH] pci: Enable overrides for missing ACS capabilities
This an updated version of Alex Williamson's patch from:
https://lkml.org/lkml/2013/5/30/513
Original commit message follows:
PCIe ACS (Access Control Services) is the PCIe 2.0+ feature that
allows us to control whether transactions are allowed to be redirected
in various subnodes of a PCIe topology. For instance, if two
endpoints are below a root port or downsteam switch port, the
downstream port may optionally redirect transactions between the
devices, bypassing upstream devices. The same can happen internally
on multifunction devices. The transaction may never be visible to the
upstream devices.
One upstream device that we particularly care about is the IOMMU. If
a redirection occurs in the topology below the IOMMU, then the IOMMU
cannot provide isolation between devices. This is why the PCIe spec
encourages topologies to include ACS support. Without it, we have to
assume peer-to-peer DMA within a hierarchy can bypass IOMMU isolation.
Unfortunately, far too many topologies do not support ACS to make this
a steadfast requirement. Even the latest chipsets from Intel are only
sporadically supporting ACS. We have trouble getting interconnect
vendors to include the PCIe spec required PCIe capability, let alone
suggested features.
Therefore, we need to add some flexibility. The pcie_acs_override=
boot option lets users opt-in specific devices or sets of devices to
assume ACS support. The "downstream" option assumes full ACS support
on root ports and downstream switch ports. The "multifunction"
option assumes the subset of ACS features available on multifunction
endpoints and upstream switch ports are supported. The "id:nnnn:nnnn"
option enables ACS support on devices matching the provided vendor
and device IDs, allowing more strategic ACS overrides. These options
may be combined in any order. A maximum of 16 id specific overrides
are available. It's suggested to use the most limited set of options
necessary to avoid completely disabling ACS across the topology.
Note to hardware vendors, we have facilities to permanently quirk
specific devices which enforce isolation but not provide an ACS
capability. Please contact me to have your devices added and save
your customers the hassle of this boot option.
Signed-off-by: Mark Weiman <mark.weiman@markzz.com>
---
.../admin-guide/kernel-parameters.txt | 9 ++
drivers/pci/quirks.c | 101 ++++++++++++++++++
2 files changed, 110 insertions(+)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index aefd358a5ca3..173b3596fd9e 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3190,6 +3190,15 @@
nomsi [MSI] If the PCI_MSI kernel config parameter is
enabled, this kernel boot option can be used to
disable the use of MSI interrupts system-wide.
+ pcie_acs_override =
+ [PCIE] Override missing PCIe ACS support for:
+ downstream
+ All downstream ports - full ACS capabilities
+ multifunction
+ All multifunction devices - multifunction ACS subset
+ id:nnnn:nnnn
+ Specific device - full ACS capabilities
+ Specified as vid:did (vendor/device ID) in hex
noioapicquirk [APIC] Disable all boot interrupt quirks.
Safety option to keep boot IRQs enabled. This
should never be necessary.
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 4700d24e5d55..8f7a3d7fd9c1 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -3372,6 +3372,106 @@ static void quirk_no_bus_reset(struct pci_dev *dev)
dev->dev_flags |= PCI_DEV_FLAGS_NO_BUS_RESET;
}
+static bool acs_on_downstream;
+static bool acs_on_multifunction;
+
+#define NUM_ACS_IDS 16
+struct acs_on_id {
+ unsigned short vendor;
+ unsigned short device;
+};
+static struct acs_on_id acs_on_ids[NUM_ACS_IDS];
+static u8 max_acs_id;
+
+static __init int pcie_acs_override_setup(char *p)
+{
+ if (!p)
+ return -EINVAL;
+
+ while (*p) {
+ if (!strncmp(p, "downstream", 10))
+ acs_on_downstream = true;
+ if (!strncmp(p, "multifunction", 13))
+ acs_on_multifunction = true;
+ if (!strncmp(p, "id:", 3)) {
+ char opt[5];
+ int ret;
+ long val;
+
+ if (max_acs_id >= NUM_ACS_IDS - 1) {
+ pr_warn("Out of PCIe ACS override slots (%d)\n",
+ NUM_ACS_IDS);
+ goto next;
+ }
+
+ p += 3;
+ snprintf(opt, 5, "%s", p);
+ ret = kstrtol(opt, 16, &val);
+ if (ret) {
+ pr_warn("PCIe ACS ID parse error %d\n", ret);
+ goto next;
+ }
+ acs_on_ids[max_acs_id].vendor = val;
+
+ p += strcspn(p, ":");
+ if (*p != ':') {
+ pr_warn("PCIe ACS invalid ID\n");
+ goto next;
+ }
+
+ p++;
+ snprintf(opt, 5, "%s", p);
+ ret = kstrtol(opt, 16, &val);
+ if (ret) {
+ pr_warn("PCIe ACS ID parse error %d\n", ret);
+ goto next;
+ }
+ acs_on_ids[max_acs_id].device = val;
+ max_acs_id++;
+ }
+next:
+ p += strcspn(p, ",");
+ if (*p == ',')
+ p++;
+ }
+
+ if (acs_on_downstream || acs_on_multifunction || max_acs_id)
+ pr_warn("Warning: PCIe ACS overrides enabled; This may allow non-IOMMU protected peer-to-peer DMA\n");
+
+ return 0;
+}
+early_param("pcie_acs_override", pcie_acs_override_setup);
+
+static int pcie_acs_overrides(struct pci_dev *dev, u16 acs_flags)
+{
+ int i;
+
+ /* Never override ACS for legacy devices or devices with ACS caps */
+ if (!pci_is_pcie(dev) ||
+ pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS))
+ return -ENOTTY;
+
+ for (i = 0; i < max_acs_id; i++)
+ if (acs_on_ids[i].vendor == dev->vendor &&
+ acs_on_ids[i].device == dev->device)
+ return 1;
+
+ switch (pci_pcie_type(dev)) {
+ case PCI_EXP_TYPE_DOWNSTREAM:
+ case PCI_EXP_TYPE_ROOT_PORT:
+ if (acs_on_downstream)
+ return 1;
+ break;
+ case PCI_EXP_TYPE_ENDPOINT:
+ case PCI_EXP_TYPE_UPSTREAM:
+ case PCI_EXP_TYPE_LEG_END:
+ case PCI_EXP_TYPE_RC_END:
+ if (acs_on_multifunction && dev->multifunction)
+ return 1;
+ }
+
+ return -ENOTTY;
+}
/*
* Some Atheros AR9xxx and QCA988x chips do not behave after a bus reset.
* The device will throw a Link Down error on AER-capable systems and
@@ -5102,6 +5102,7 @@
{ PCI_VENDOR_ID_ZHAOXIN, PCI_ANY_ID, pci_quirk_zhaoxin_pcie_ports_acs },
/* Wangxun nics */
{ PCI_VENDOR_ID_WANGXUN, PCI_ANY_ID, pci_quirk_wangxun_nic_acs },
+ { PCI_ANY_ID, PCI_ANY_ID, pcie_acs_overrides },
{ 0 }
};

View file

@ -0,0 +1,166 @@
From b70e738f08403950aa3053c36b98c6b0eeb0eb90 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Almeida?= <andrealmeid@collabora.com>
Date: Mon, 25 Oct 2021 09:49:42 -0300
Subject: [PATCH] futex: Add entry point for FUTEX_WAIT_MULTIPLE (opcode 31)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Add an option to wait on multiple futexes using the old interface, that
uses opcode 31 through futex() syscall. Do that by just translation the
old interface to use the new code. This allows old and stable versions
of Proton to still use fsync in new kernel releases.
Signed-off-by: André Almeida <andrealmeid@collabora.com>
---
include/uapi/linux/futex.h | 13 +++++++
kernel/futex/syscalls.c | 75 +++++++++++++++++++++++++++++++++++++-
2 files changed, 87 insertions(+), 1 deletion(-)
diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h
index 71a5df8d2689..d375ab21cbf8 100644
--- a/include/uapi/linux/futex.h
+++ b/include/uapi/linux/futex.h
@@ -22,6 +22,7 @@
#define FUTEX_WAIT_REQUEUE_PI 11
#define FUTEX_CMP_REQUEUE_PI 12
#define FUTEX_LOCK_PI2 13
+#define FUTEX_WAIT_MULTIPLE 31
#define FUTEX_PRIVATE_FLAG 128
#define FUTEX_CLOCK_REALTIME 256
@@ -68,6 +69,18 @@ struct futex_waitv {
__u32 __reserved;
};
+/**
+ * struct futex_wait_block - Block of futexes to be waited for
+ * @uaddr: User address of the futex
+ * @val: Futex value expected by userspace
+ * @bitset: Bitset for the optional bitmasked wakeup
+ */
+struct futex_wait_block {
+ __u32 __user *uaddr;
+ __u32 val;
+ __u32 bitset;
+};
+
/*
* Support for robust futexes: the kernel cleans up held futexes at
* thread exit time.
diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c
index 6f91a07a6a83..2f4d4c04ede2 100644
--- a/kernel/futex/syscalls.c
+++ b/kernel/futex/syscalls.c
@@ -158,6 +158,7 @@ static __always_inline bool futex_cmd_has_timeout(u32 cmd)
case FUTEX_LOCK_PI2:
case FUTEX_WAIT_BITSET:
case FUTEX_WAIT_REQUEUE_PI:
+ case FUTEX_WAIT_MULTIPLE:
return true;
}
return false;
@@ -170,13 +171,79 @@ futex_init_timeout(u32 cmd, u32 op, struct timespec64 *ts, ktime_t *t)
return -EINVAL;
*t = timespec64_to_ktime(*ts);
- if (cmd == FUTEX_WAIT)
+ if (cmd == FUTEX_WAIT || cmd == FUTEX_WAIT_MULTIPLE)
*t = ktime_add_safe(ktime_get(), *t);
else if (cmd != FUTEX_LOCK_PI && !(op & FUTEX_CLOCK_REALTIME))
*t = timens_ktime_to_host(CLOCK_MONOTONIC, *t);
return 0;
}
+/**
+ * futex_read_wait_block - Read an array of futex_wait_block from userspace
+ * @uaddr: Userspace address of the block
+ * @count: Number of blocks to be read
+ *
+ * This function creates and allocate an array of futex_q (we zero it to
+ * initialize the fields) and then, for each futex_wait_block element from
+ * userspace, fill a futex_q element with proper values.
+ */
+inline struct futex_vector *futex_read_wait_block(u32 __user *uaddr, u32 count)
+{
+ unsigned int i;
+ struct futex_vector *futexv;
+ struct futex_wait_block fwb;
+ struct futex_wait_block __user *entry =
+ (struct futex_wait_block __user *)uaddr;
+
+ if (!count || count > FUTEX_WAITV_MAX)
+ return ERR_PTR(-EINVAL);
+
+ futexv = kcalloc(count, sizeof(*futexv), GFP_KERNEL);
+ if (!futexv)
+ return ERR_PTR(-ENOMEM);
+
+ for (i = 0; i < count; i++) {
+ if (copy_from_user(&fwb, &entry[i], sizeof(fwb))) {
+ kfree(futexv);
+ return ERR_PTR(-EFAULT);
+ }
+
+ futexv[i].w.flags = FUTEX_32;
+ futexv[i].w.val = fwb.val;
+ futexv[i].w.uaddr = (uintptr_t) (fwb.uaddr);
+ futexv[i].q = futex_q_init;
+ }
+
+ return futexv;
+}
+
+int futex_wait_multiple(struct futex_vector *vs, unsigned int count,
+ struct hrtimer_sleeper *to);
+
+int futex_opcode_31(ktime_t *abs_time, u32 __user *uaddr, int count)
+{
+ int ret;
+ struct futex_vector *vs;
+ struct hrtimer_sleeper *to = NULL, timeout;
+
+ to = futex_setup_timer(abs_time, &timeout, 0, 0);
+
+ vs = futex_read_wait_block(uaddr, count);
+
+ if (IS_ERR(vs))
+ return PTR_ERR(vs);
+
+ ret = futex_wait_multiple(vs, count, abs_time ? to : NULL);
+ kfree(vs);
+
+ if (to) {
+ hrtimer_cancel(&to->timer);
+ destroy_hrtimer_on_stack(&to->timer);
+ }
+
+ return ret;
+}
+
SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
const struct __kernel_timespec __user *, utime,
u32 __user *, uaddr2, u32, val3)
@@ -196,6 +263,9 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
tp = &t;
}
+ if (cmd == FUTEX_WAIT_MULTIPLE)
+ return futex_opcode_31(tp, uaddr, val);
+
return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3);
}
@@ -392,6 +462,9 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
tp = &t;
}
+ if (cmd == FUTEX_WAIT_MULTIPLE)
+ return futex_opcode_31(tp, uaddr, val);
+
return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3);
}
#endif /* CONFIG_COMPAT_32BIT_TIME */
--
2.33.1

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,8 +1,6 @@
app-emulation/winetricks
app-emulation/protontricks
games-util/gamemode
# winetricks-related stuff
app-arch/cabextract
app-arch/p7zip

View file

@ -2,16 +2,40 @@
app-misc/jq
app-shells/zsh
# replacement for ls
sys-apps/eza
dev-libs/bemenu
# for python neovim things
dev-python/pynvim
gui-wm/swayfx::criminallycute
gui-apps/grim
gui-apps/slurp
gui-apps/wl-clipboard
gui-apps/swaybg
#gui-apps/swayidle
#gui-apps/swaylock
gui-apps/mako
x11-misc/qt5ct
gui-apps/qt6ct
x11-themes/kvantum
# For mouse cursor shenanigans
gnome-base/gsettings-desktop-schemas
media-gfx/ristretto
# for thumbnailing
xfce-base/tumbler
# For media keys
media-sound/playerctl
sys-fs/fuse:0
x11-drivers/xf86-input-libinput
x11-drivers/xf86-video-amdgpu
x11-drivers/xf86-video-ati
x11-misc/xdg-user-dirs
x11-terms/kitty
x11-misc/pcmanfm-qt
gui-libs/xdg-desktop-portal-wlr
# required for screen sharing
sys-apps/xdg-desktop-portal-gnome

View file

@ -1,41 +0,0 @@
@fonts
app-misc/jq
app-shells/zsh
dev-libs/bemenu
gui-wm/swayfx::criminallycute
gui-apps/grim
gui-apps/slurp
gui-apps/wl-clipboard
gui-apps/swaybg
#gui-apps/swayidle
#gui-apps/swaylock
gui-apps/mako
x11-misc/qt5ct
gui-apps/qt6ct
x11-themes/kvantum
# For mouse cursor shenanigans
gnome-base/gsettings-desktop-schemas
media-gfx/ristretto
# for thumbnailing
xfce-base/tumbler
# For media keys
media-sound/playerctl
sys-fs/fuse:0
x11-drivers/xf86-input-libinput
x11-drivers/xf86-video-amdgpu
x11-drivers/xf86-video-ati
x11-misc/xdg-user-dirs
x11-terms/kitty
x11-misc/pcmanfm-qt
gui-libs/xdg-desktop-portal-wlr
# required for screen sharing
sys-apps/xdg-desktop-portal-gnome