Replace the uses of __wake_up_locked with wake_up_locked Signed-off-by: Matthew Wilcox <willy@linux.intel.com> --- fs/eventpoll.c | 11 ++++------- 1 files changed, 4 insertions(+), 7 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 34f68f3..81c04ab 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -656,8 +656,7 @@ is_linked: * wait list. */ if (waitqueue_active(&ep->wq)) - __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE | - TASK_INTERRUPTIBLE); + wake_up_locked(&ep->wq); if (waitqueue_active(&ep->poll_wait)) pwake++; @@ -780,7 +779,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, /* Notify waiting tasks that events are available */ if (waitqueue_active(&ep->wq)) - __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE); + wake_up_locked(&ep->wq); if (waitqueue_active(&ep->poll_wait)) pwake++; } @@ -854,8 +853,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even /* Notify waiting tasks that events are available */ if (waitqueue_active(&ep->wq)) - __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE | - TASK_INTERRUPTIBLE); + wake_up_locked(&ep->wq); if (waitqueue_active(&ep->poll_wait)) pwake++; } @@ -978,8 +976,7 @@ errxit: * wait list (delayed after we release the lock). */ if (waitqueue_active(&ep->wq)) - __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE | - TASK_INTERRUPTIBLE); + wake_up_locked(&ep->wq); if (waitqueue_active(&ep->poll_wait)) pwake++; } -- 1.4.4.2 -
Set TASK_WAKEKILL for TASK_STOPPED and TASK_TRACED, add TASK_KILLABLE and use TASK_WAKEKILL in signal_wake_up() Signed-off-by: Matthew Wilcox <willy@linux.intel.com> --- include/linux/sched.h | 22 ++++++++++++++-------- kernel/signal.c | 8 ++++---- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index fc28637..d1e32ec 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -170,27 +170,33 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) #define TASK_RUNNING 0 #define TASK_INTERRUPTIBLE 1 #define TASK_UNINTERRUPTIBLE 2 -#define TASK_STOPPED 4 -#define TASK_TRACED 8 +#define __TASK_STOPPED 4 +#define __TASK_TRACED 8 /* in tsk->exit_state */ #define EXIT_ZOMBIE 16 #define EXIT_DEAD 32 /* in tsk->state again */ #define TASK_DEAD 64 +#define TASK_WAKEKILL 128 + +/* Convenience macros for the sake of set_task_state */ +#define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) +#define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED) +#define TASK_TRACED (TASK_WAKEKILL | __TASK_TRACED) /* Convenience macros for the sake of wake_up */ #define TASK_NORMAL (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE) -#define TASK_ALL (TASK_NORMAL | TASK_STOPPED | TASK_TRACED) +#define TASK_ALL (TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED) /* get_task_state() */ #define TASK_REPORT (TASK_RUNNING | TASK_INTERRUPTIBLE | \ - TASK_UNINTERRUPTIBLE | TASK_STOPPED | \ - TASK_TRACED) + TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \ + __TASK_TRACED) -#define is_task_traced(task) ((task->state & TASK_TRACED) != 0) -#define is_task_stopped(task) ((task->state & TASK_STOPPED) != 0) +#define is_task_traced(task) ((task->state & __TASK_TRACED) != 0) +#define is_task_stopped(task) ((task->state & __TASK_STOPPED) != 0) #define is_task_stopped_or_traced(task) \ - ((task->state & (TASK_STOPPED | TASK_TRACED)) != 0) + ((task->state & (__TASK_STOPPED ...
Use TASK_KILLABLE to allow wait_on_retry_sync_kiocb to return -EINTR.
All callers then check the return value and break out of their loops.
Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
---
fs/read_write.c | 17 ++++++++++++-----
1 files changed, 12 insertions(+), 5 deletions(-)
diff --git a/fs/read_write.c b/fs/read_write.c
index 124693e..3196a3b 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -218,14 +218,15 @@ Einval:
return -EINVAL;
}
-static void wait_on_retry_sync_kiocb(struct kiocb *iocb)
+static int wait_on_retry_sync_kiocb(struct kiocb *iocb)
{
- set_current_state(TASK_UNINTERRUPTIBLE);
+ set_current_state(TASK_KILLABLE);
if (!kiocbIsKicked(iocb))
schedule();
else
kiocbClearKicked(iocb);
__set_current_state(TASK_RUNNING);
+ return fatal_signal_pending(current) ? -EINTR : 0;
}
ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
@@ -242,7 +243,9 @@ ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *pp
ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos);
if (ret != -EIOCBRETRY)
break;
- wait_on_retry_sync_kiocb(&kiocb);
+ ret = wait_on_retry_sync_kiocb(&kiocb);
+ if (ret)
+ break;
}
if (-EIOCBQUEUED == ret)
@@ -300,7 +303,9 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof
ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos);
if (ret != -EIOCBRETRY)
break;
- wait_on_retry_sync_kiocb(&kiocb);
+ ret = wait_on_retry_sync_kiocb(&kiocb);
+ if (ret)
+ break;
}
if (-EIOCBQUEUED == ret)
@@ -466,7 +471,9 @@ ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos);
if (ret != -EIOCBRETRY)
break;
- wait_on_retry_sync_kiocb(&kiocb);
+ ret = wait_on_retry_sync_kiocb(&kiocb);
+ if (ret)
+ break;
}
if (ret == -EIOCBQUEUED)
--
1.4.4.2
-
This won't work because "sync" kiocbs are a nasty hack that don't follow
the (also nasty) refcounting patterns of the aio core.
-EIOCBRETRY means that aio_{read,write}() has taken on the "IO" kiocb
reference and has ensured that call kick_iocb() will be called in the
future.
Usually kick_iocb() would queue the kiocb to have its ki_retry method
called by the kernel aio threads while holding that reference. But
"sync" kiocbs are on-stack and aren't reference counted. kick_iocb() magic:
/* sync iocbs are easy: they can only ever be executing from a
* single context. */
if (is_sync_kiocb(iocb)) {
kiocbSetKicked(iocb);
wake_up_process(iocb->ki_obj.tsk);
return;
}
So, with this patch, if we catch a signal and return from
wait_on_retry_sync_kiocb() and return from do_sync_{read,write}() then
that on-stack sync kiocb is going to be long gone when kick_iocb() goes
to work with it.
So the first step would be to make sync kiocbs real refcounted
structures so that kick_iocb() could find that the sync submitter has
disappeared.
But then we have to worry about leaving retrying operations in flight
after the sync submitter has returned from their system call. They
might be VERY SURPRISED to find that a read() implemented with
do_sync_read() is still writing into their userspace pointer after the
syscall was interrupted by a signal.
This leads us to the possibility of working with the ki_cancel method to
stop a pending operation if a signal is caught from a sync submitter.
In practice, nothing sets ki_cancel.
And finally, this code will not be run in a solely mainline kernel. The
only thing in mainline that returns -EIOCBRETRY is the goofy usb gadget.
It has both ->{read,write} and ->aio_{read,write} file op methods so
vfs_{read,write}() will never call do_sync_{read,write}(). Sure,
out-of-tree aio providers (SDP?) might get caught up in this.
(Ha ha! Welcome to ...and associated infrastructure such as sync_page_killable and
fatal_signal_pending. Use lock_page_killable in do_generic_mapping_read()
to allow us to kill `cat' of a file on an NFS-mounted filesystem.
Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
---
include/linux/pagemap.h | 14 ++++++++++++++
include/linux/sched.h | 9 ++++++++-
kernel/signal.c | 5 +++++
mm/filemap.c | 25 +++++++++++++++++++++----
4 files changed, 48 insertions(+), 5 deletions(-)
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index db8a410..4b62a10 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -157,6 +157,7 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
}
extern void FASTCALL(__lock_page(struct page *page));
+extern int FASTCALL(__lock_page_killable(struct page *page));
extern void FASTCALL(__lock_page_nosync(struct page *page));
extern void FASTCALL(unlock_page(struct page *page));
@@ -171,6 +172,19 @@ static inline void lock_page(struct page *page)
}
/*
+ * lock_page_killable is like lock_page but can be interrupted by fatal
+ * signals. It returns 0 if it locked the page and -EINTR if it was
+ * killed while waiting.
+ */
+static inline int lock_page_killable(struct page *page)
+{
+ might_sleep();
+ if (TestSetPageLocked(page))
+ return __lock_page_killable(page);
+ return 0;
+}
+
+/*
* lock_page_nosync should only be used if we can't pin the page's inode.
* Doesn't play quite so well with block device plugging.
*/
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d1e32ec..7ccf92a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1856,7 +1856,14 @@ static inline int signal_pending(struct task_struct *p)
{
return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING));
}
-
+
+extern int FASTCALL(__fatal_signal_pending(struct task_struct *p));
+
+static inline int fatal_signal_pending(struct task_struct *p)
+{
+ return ...whoa, big change. What exactly are the semantics here? If the process has actually been killed (ie: we know that userspace won't be running again) then we break out of a lock_page() and allow the process to exit? ie: it's basically invisible to userspace? If so, it sounds OK. I guess. We're still screwed if the process is doing a synchronous write and lots of other scenarios. How well has this been tested? Have the NFS guys had a think about it? Why does it return -EIO from read() and not -EINTR? -
The actual conversions should also be relatively useful groundwork I don't think it will matter in too many situations. If the process is doing a synchronous write, nothing is guaranteed until the syscall returns success... -
Abstracting away direct uses of TASK_ flags allows us to change the
definitions of the task flags more easily.
Also restructure do_wait() a little
Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
---
arch/ia64/kernel/perfmon.c | 4 +-
fs/proc/array.c | 7 +---
fs/proc/base.c | 2 +-
include/linux/sched.h | 15 +++++++
include/linux/wait.h | 11 +++--
kernel/exit.c | 90 +++++++++++++++++++------------------------
kernel/power/process.c | 6 +-
kernel/ptrace.c | 8 ++--
kernel/sched.c | 15 +++----
kernel/signal.c | 6 +-
kernel/wait.c | 2 +-
11 files changed, 82 insertions(+), 84 deletions(-)
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 59169bf..c2ca724 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -2631,7 +2631,7 @@ pfm_task_incompatible(pfm_context_t *ctx, struct task_struct *task)
*/
if (task == current) return 0;
- if ((task->state != TASK_STOPPED) && (task->state != TASK_TRACED)) {
+ if (!is_task_stopped_or_traced(task)) {
DPRINT(("cannot attach to non-stopped task [%d] state=%ld\n", task_pid_nr(task), task->state));
return -EBUSY;
}
@@ -4792,7 +4792,7 @@ recheck:
* the task must be stopped.
*/
if (PFM_CMD_STOPPED(cmd)) {
- if ((task->state != TASK_STOPPED) && (task->state != TASK_TRACED)) {
+ if (!is_task_stopped_or_traced(task)) {
DPRINT(("[%d] task not in stopped state\n", task_pid_nr(task)));
return -EBUSY;
}
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 63c95af..0bf1fa0 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -141,12 +141,7 @@ static const char *task_state_array[] = {
static inline const char *get_task_state(struct task_struct *tsk)
{
- unsigned int state = (tsk->state & (TASK_RUNNING |
- TASK_INTERRUPTIBLE |
- TASK_UNINTERRUPTIBLE |
- TASK_STOPPED |
- ...umm, spose so. There's an excellent chance that a millionth of our monkeys will come along and wreck this diff during the next two months. In which case I might end up dropping bits of it, we'll see. It would be much better if this patch was a series of patches: one to add the new infrastructure and one-per-subsystem to introduce users of the infrastructure. That way I wouldn't have to go mucking around in the internals of the patch and remembering to tell you when I tossed bits of it out, and which bits those were. This shouldn't come as news to an old-timer :( -
On Wed, 24 Oct 2007 08:24:55 -0400 I have dropped this hunk because the file which it is patching is removed by the (newly-added-to-mm) git-perfmon.patch. I can't immediately find any corresponding code which was readded in a different place by git-perfmon so it looks like this code was simply zapped. Of course, if git-perfmon doesn't merge in 2.6.25 then I'll end up merging your patch but accidentally leaving 2.6.25's arch/ia64/kernel/perfmon.c unpatched. It looks like that'll be non-fatal. This isn't going to go very well and I might end up having to drop this whole patch series and ask for a refactored one. We'll see. -
^^^^^^^^^^^^^^^^^^^^^^^^^^ I think this is horrible. Are you going to add full blown static inline -
No, because we don't need them. Just stopped, traced, stopped_or_traced. -- Intel are signing my paycheques ... these opinions are still mine "Bill, look, we understand that you're interested in selling us this operating system, but compare it to ours. We can't possibly take such a retrograde step." -
1) please change 'is' and 'task' around so that it reads nicer: if (task_is_stopped(t)) instead of the tongue-twister: if (is_task_stopped(t)) 2) please change task_is_loadavg() to something more sensible - i didnt know what it meant when i first saw it in -mm's sched.c. task_is_uninterruptible() would be the logical choice ... Ingo --
It's not obvious to me that "can't be interrupted by a signal" is the same thing as "contributes to loadavg". Indeed, I think we would all like to see a day where there's no such thing as an uninterruptible sleep, but we'll still want at least some of those tasks contributing to loadavg. You're right that task_is_loadavg() doesn't make much sense. task_contributes_to_load() is a better name, but only marginally. I've considered task_performing_io(), and name the corresponding bit __TASK_PERFORMING_IO (or even just __TASK_DOING_IO or __TASK_IO), but it's not just doing IO that makes a task contribute to loadavg. I have no good suggestions here ;-( -- Intel are signing my paycheques ... these opinions are still mine "Bill, look, we understand that you're interested in selling us this operating system, but compare it to ours. We can't possibly take such a retrograde step." --
| Greg KH | Og dreams of kernels |
| Jens Axboe | [PATCH 31/33] Fusion: sg chaining support |
| Arnd Bergmann | Re: finding your own dead "CONFIG_" variables |
| Mark Brown | [PATCH 2/2] Subject: natsemi: Allow users to disable workaround for DspCfg reset |
| Tony Breeds | [LGUEST] Look in object dir for .config |
git: | |
| Brian Downing | Re: Git in a Nutshell guide |
| John Benes | Re: master has some toys |
| Matthias Lederhofer | [PATCH 4/7] introduce GIT_WORK_TREE to specify the work tree |
| Alexander Sulfrian | [RFC/PATCH] RE: git calls SSH_ASKPASS even if DISPLAY is not set |
| Junio C Hamano | Re: Rss produced by git is not valid xml? |
| Linux Kernel Mailing List | iSeries: fix section mismatch in iseries_veth |
| Linux Kernel Mailing List | ixbge: remove TX lock and redo TX accounting. |
| Linux |
