#ifndef _STP_UTRACE_H
#define _STP_UTRACE_H
#include <linux/list.h>
#include <linux/kref.h>
#include <linux/sched.h>
#include <linux/binfmts.h>
/*
* Event bits passed to utrace_set_events().
* These appear in &struct task_struct.@utrace_flags
* and &struct utrace_engine.@flags.
*/
enum utrace_events {
_UTRACE_EVENT_QUIESCE, /* Thread is available for examination. */
_UTRACE_EVENT_REAP, /* Zombie reaped, no more tracing possible. */
_UTRACE_EVENT_CLONE, /* Successful clone/fork/vfork just done. */
_UTRACE_EVENT_EXEC, /* Successful execve just completed. */
_UTRACE_EVENT_DEATH, /* Thread has died. */
_UTRACE_EVENT_SYSCALL_ENTRY, /* User entered kernel for system call. */
_UTRACE_EVENT_SYSCALL_EXIT, /* Returning to user after system call. */
_UTRACE_NEVENTS
};
#define UTRACE_EVENT(type) (1UL << _UTRACE_EVENT_##type)
/*
* Both kinds of syscall events; these call the @report_syscall_entry()
* and @report_syscall_exit() callbacks, respectively.
*/
#define UTRACE_EVENT_SYSCALL \
(UTRACE_EVENT(SYSCALL_ENTRY) | UTRACE_EVENT(SYSCALL_EXIT))
/*
* The event reports triggered synchronously by task death.
*/
#define _UTRACE_DEATH_EVENTS (UTRACE_EVENT(DEATH) | UTRACE_EVENT(QUIESCE))
/*
* Flags for utrace_attach_task().
*/
#define UTRACE_ATTACH_MATCH_OPS 0x0001 /* Match engines on ops. */
#define UTRACE_ATTACH_MATCH_DATA 0x0002 /* Match engines on data. */
#define UTRACE_ATTACH_MATCH_MASK 0x000f
#define UTRACE_ATTACH_CREATE 0x0010 /* Attach a new engine. */
#define UTRACE_ATTACH_EXCLUSIVE 0x0020 /* Refuse if existing match. */
/**
* struct utrace_engine - per-engine structure
* @ops: &struct utrace_engine_ops pointer passed to utrace_attach_task()
* @data: engine-private &void * passed to utrace_attach_task()
* @flags: event mask set by utrace_set_events() plus internal flag bits
*
* The task itself never has to worry about engines detaching while
* it's doing event callbacks. These structures are removed from the
* task's active list only when it's stopped, or by the task itself.
*
* utrace_engine_get() and utrace_engine_put() maintain a reference count.
* When it drops to zero, the structure is freed. One reference is held
* implicitly while the engine is attached to its task.
*/
struct utrace_engine {
/* private: */
struct kref kref;
void (*release)(void *);
struct list_head entry;
/* public: */
const struct utrace_engine_ops *ops;
void *data;
unsigned long flags;
};
static int utrace_init(void);
static int utrace_exit(void);
static void utrace_shutdown(void);
/**
* utrace_engine_get - acquire a reference on a &struct utrace_engine
* @engine: &struct utrace_engine pointer
*
* You must hold a reference on @engine, and you get another.
*/
static inline void utrace_engine_get(struct utrace_engine *engine)
{
kref_get(&engine->kref);
}
static void __utrace_engine_release(struct kref *);
/**
* utrace_engine_put - release a reference on a &struct utrace_engine
* @engine: &struct utrace_engine pointer
*
* You must hold a reference on @engine, and you lose that reference.
* If it was the last one, @engine becomes an invalid pointer.
*/
static inline void utrace_engine_put(struct utrace_engine *engine)
{
kref_put(&engine->kref, __utrace_engine_release);
}
/**
* struct utrace_engine_ops - tracing engine callbacks
*
* Each @report_*() callback corresponds to an %UTRACE_EVENT(*) bit.
* utrace_set_events() calls on @engine choose which callbacks will
* be made to @engine from @task.
*
* Most callbacks take an @action argument, giving the resume action
* chosen by other tracing engines. All callbacks take an @engine
* argument. The @report_reap callback takes a @task argument that
* might or might not be @current. All other @report_* callbacks
* report an event in the @current task.
*
* For some calls, @action also includes bits specific to that event
* and utrace_resume_action() is used to extract the resume action.
* This shows what would happen if @engine wasn't there, or will if
* the callback's return value uses %UTRACE_RESUME. This always
* starts as %UTRACE_RESUME when no other tracing is being done on
* this task.
*
* All return values contain &enum utrace_resume_action bits. For
* some calls, other bits specific to that kind of event are added to
* the resume action bits with OR. These are the same bits used in
* the @action argument. The resume action returned by a callback
* does not override previous engines' choices, it only says what
* @engine wants done. What @current actually does is the action that's
* most constrained among the choices made by all attached engines.
* See utrace_control() for more information on the actions.
*
* When %UTRACE_STOP is used in @report_syscall_entry, then @current
* stops before attempting the system call. In this case, another
* @report_syscall_entry callback will follow after @current resumes
* if %UTRACE_REPORT or %UTRACE_INTERRUPT was returned by some
* callback or passed to utrace_control(). In a second or later
* callback, %UTRACE_SYSCALL_RESUMED is set in the @action argument to
* indicate a repeat callback still waiting to attempt the same system
* call invocation. This repeat callback gives each engine an
* opportunity to reexamine registers another engine might have
* changed while @current was held in %UTRACE_STOP.
*
* In other cases, the resume action does not take effect until @current
* is ready to check for signals and return to user mode. If there
* are more callbacks to be made, the last round of calls determines
* the final action. A @report_quiesce callback with @event zero
* will always be the last one made before
* @current resumes. Only %UTRACE_STOP is "sticky"--if @engine returned
* %UTRACE_STOP then @current stays stopped unless @engine returns
* different from a following callback.
*
* The report_death() and report_reap() callbacks do not take @action
* arguments, and only %UTRACE_DETACH is meaningful in the return value
* from a report_death() callback. None of the resume actions applies
* to a dead thread.
*
* All @report_*() hooks are called with no locks held, in a generally
* safe environment when we will be returning to user mode soon (or just
* entered the kernel). It is fine to block for memory allocation and
* the like, but all hooks are asynchronous and must not block on
* external events! If you want the thread to block, use %UTRACE_STOP
* in your hook's return value; then later wake it up with utrace_control().
*
* @report_quiesce:
* Requested by %UTRACE_EVENT(%QUIESCE).
* This does not indicate any event, but just that @current is in a
* safe place for examination. This call is made before each specific
* event callback, except for @report_reap. The @event argument gives
* the %UTRACE_EVENT(@which) value for the event occurring. This
* callback might be made for events @engine has not requested, if
* some other engine is tracing the event; calling utrace_set_events()
* call here can request the immediate callback for this occurrence of
* @event. @event is zero when there is no other event, @current is
* now ready to check for signals and return to user mode, and some
* engine has used %UTRACE_REPORT or %UTRACE_INTERRUPT to request
* this callback.
*
* @report_clone:
* Requested by %UTRACE_EVENT(%CLONE).
* Event reported for parent, before the new task @child might run.
* @clone_flags gives the flags used in the clone system call, or
* equivalent flags for a fork() or vfork() system call. This
* function can use utrace_attach_task() on @child. Then passing
* %UTRACE_STOP to utrace_control() on @child here keeps the child
* stopped before it ever runs in user mode, %UTRACE_REPORT or
* %UTRACE_INTERRUPT ensures a callback from @child before it
* starts in user mode.
*
* @report_exec:
* Requested by %UTRACE_EVENT(%EXEC).
* An execve system call has succeeded and the new program is about to
* start running. The initial user register state is handy to be tweaked
* directly in @regs. @fmt and @bprm gives the details of this exec.
*
* @report_syscall_entry:
* Requested by %UTRACE_EVENT(%SYSCALL_ENTRY).
* Thread has entered the kernel to request a system call.
* The user register state is handy to be tweaked directly in @regs.
* The @action argument contains an &enum utrace_syscall_action,
* use utrace_syscall_action() to extract it. The return value
* overrides the last engine's action for the system call.
* If the final action is %UTRACE_SYSCALL_ABORT, no system call
* is made. The details of the system call being attempted can
* be fetched here with syscall_get_nr() and syscall_get_arguments().
* The parameter registers can be changed with syscall_set_arguments().
* See above about the %UTRACE_SYSCALL_RESUMED flag in @action.
* Use %UTRACE_REPORT in the return value to guarantee you get
* another callback (with %UTRACE_SYSCALL_RESUMED flag) in case
* @current stops with %UTRACE_STOP before attempting the system call.
*
* @report_syscall_exit:
* Requested by %UTRACE_EVENT(%SYSCALL_EXIT).
* Thread is about to leave the kernel after a system call request.
* The user register state is handy to be tweaked directly in @regs.
* The results of the system call attempt can be examined here using
* syscall_get_error() and syscall_get_return_value(). It is safe
* here to call syscall_set_return_value() or syscall_rollback().
*
* @report_death:
* Requested by %UTRACE_EVENT(%DEATH).
* Thread is really dead now. It might be reaped by its parent at
* any time, or self-reap immediately. Though the actual reaping
* may happen in parallel, a report_reap() callback will always be
* ordered after a report_death() callback.
*
* @report_reap:
* Requested by %UTRACE_EVENT(%REAP).
* Called when someone reaps the dead task (parent, init, or self).
* This means the parent called wait, or else this was a detached
* thread or a process whose parent ignores SIGCHLD.
* No more callbacks are made after this one.
* The engine is always detached.
* There is nothing more a tracing engine can do about this thread.
* After this callback, the @engine pointer will become invalid.
* The @task pointer may become invalid if get_task_struct() hasn't
* been used to keep it alive.
* An engine should always request this callback if it stores the
* @engine pointer or stores any pointer in @engine->data, so it
* can clean up its data structures.
* Unlike other callbacks, this can be called from the parent's context
* rather than from the traced thread itself--it must not delay the
* parent by blocking.
*
* @release:
* If not %NULL, this is called after the last utrace_engine_put()
* call for a &struct utrace_engine, which could be implicit after
* a %UTRACE_DETACH return from another callback. Its argument is
* the engine's @data member.
*/
struct utrace_engine_ops {
u32 (*report_quiesce)(u32 action, struct utrace_engine *engine,
unsigned long event);
u32 (*report_clone)(u32 action, struct utrace_engine *engine,
unsigned long clone_flags,
struct task_struct *child);
u32 (*report_exec)(u32 action, struct utrace_engine *engine,
const struct linux_binfmt *fmt,
const struct linux_binprm *bprm,
struct pt_regs *regs);
u32 (*report_syscall_entry)(u32 action, struct utrace_engine *engine,
struct pt_regs *regs);
u32 (*report_syscall_exit)(u32 action, struct utrace_engine *engine,
struct pt_regs *regs);
u32 (*report_death)(struct utrace_engine *engine,
bool group_dead, int signal);
void (*report_reap)(struct utrace_engine *engine,
struct task_struct *task);
void (*release)(void *data);
};
/*
* These are the exported entry points for tracing engines to use.
* See stp_utrace.c for their kerneldoc comments with interface details.
*/
static struct utrace_engine *utrace_attach_task(struct task_struct *, int,
const struct utrace_engine_ops *,
void *);
static int __must_check utrace_set_events(struct task_struct *,
struct utrace_engine *,
unsigned long eventmask);
static int __must_check utrace_barrier(struct task_struct *,
struct utrace_engine *);
/*
* Version number of the API defined in this file. This will change
* whenever a tracing engine's code would need some updates to keep
* working. We maintain this here for the benefit of tracing engine code
* that is developed concurrently with utrace API improvements before they
* are merged into the kernel, making LINUX_VERSION_CODE checks unwieldy.
*/
#define UTRACE_API_VERSION 20110727
/**
* enum utrace_resume_action - engine's choice of action for a traced task
* @UTRACE_STOP: Stay quiescent after callbacks.
* @UTRACE_INTERRUPT: Make quiesce callback soon.
* @UTRACE_REPORT: Make some callback soon.
* @UTRACE_RESUME: Resume normally in user mode.
* @UTRACE_DETACH: Detach my engine (implies %UTRACE_RESUME).
*
* See utrace_control() for detailed descriptions of each action. This is
* encoded in the @action argument and the return value for every callback
* with a &u32 return value.
*
* The order of these is important. When there is more than one engine,
* each supplies its choice and the smallest value prevails.
*/
enum utrace_resume_action {
UTRACE_STOP,
UTRACE_INTERRUPT,
UTRACE_REPORT,
UTRACE_RESUME,
UTRACE_DETACH,
UTRACE_RESUME_MAX
};
#define UTRACE_RESUME_BITS (ilog2(UTRACE_RESUME_MAX) + 1)
#define UTRACE_RESUME_MASK ((1 << UTRACE_RESUME_BITS) - 1)
/**
* utrace_resume_action - &enum utrace_resume_action from callback action
* @action: &u32 callback @action argument or return value
*
* This extracts the &enum utrace_resume_action from @action,
* which is the @action argument to a &struct utrace_engine_ops
* callback or the return value from one.
*/
static inline enum utrace_resume_action utrace_resume_action(u32 action)
{
return action & UTRACE_RESUME_MASK;
}
#endif /* _STP_UTRACE_H */