runtime/linux/uprobes/uprobes.c

runtime/linux/uprobes/uprobes.c - systemtap

Source code

#include <linux/utrace.h>

#ifndef UTRACE_ACTION_RESUME



/*

 * Assume the kernel is running the 2008 version of utrace.

 * Skip the code in this file and instead use uprobes 2.

 */

#include "../uprobes2/uprobes.c"



#else    /* uprobes 1 (based on original utrace) */



/*

 *  Userspace Probes (UProbes)

 *  kernel/uprobes_core.c

 *

 * This program is free software; you can redistribute it and/or modify

 * it under the terms of the GNU General Public License as published by

 * the Free Software Foundation; either version 2 of the License, or

 * (at your option) any later version.

 *

 * This program is distributed in the hope that it will be useful,

 * but WITHOUT ANY WARRANTY; without even the implied warranty of

 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

 * GNU General Public License for more details.

 *

 * You should have received a copy of the GNU General Public License

 * along with this program.  If not, see <http://www.gnu.org/licenses/>.

 *

 * Copyright (C) IBM Corporation, 2006

 */

#include <linux/types.h>

#include <linux/hash.h>

#include <linux/init.h>

#include <linux/module.h>

#include <linux/sched.h>

#include <linux/rcupdate.h>

#include <linux/err.h>

#include <linux/kref.h>

#include <linux/utrace.h>

‌#define UPROBES_IMPLEMENTATION 1

#include "uprobes.h"

#include <linux/tracehook.h>

#include <linux/mm.h>

#include <linux/string.h>

#include <linux/uaccess.h>

#include <asm/tracehook.h>

#include <asm/errno.h>

#include <asm/mman.h>



‌#define SET_ENGINE_FLAGS    1

‌#define CLEAR_ENGINE_FLAGS    0



‌#define MAX_SSOL_SLOTS        1024



#ifdef NO_ACCESS_PROCESS_VM_EXPORT

static int __access_process_vm(struct task_struct *tsk, unsigned long addr,

    void *buf, int len, int write);

‌#define access_process_vm __access_process_vm

#else

extern int access_process_vm(struct task_struct *tsk, unsigned long addr,

    void *buf, int len, int write);

#endif

static int utask_fake_quiesce(struct uprobe_task *utask);

static void uprobe_release_ssol_vma(struct uprobe_process *uproc);



static void uretprobe_handle_entry(struct uprobe *u, struct pt_regs *regs,

    struct uprobe_task *utask);

static void uretprobe_handle_return(struct pt_regs *regs,

    struct uprobe_task *utask);

static void uretprobe_set_trampoline(struct uprobe_process *uproc,

    struct task_struct *tsk);

static void zap_uretprobe_instances(struct uprobe *u,

    struct uprobe_process *uproc);



‌typedef void (*uprobe_handler_t)(struct uprobe*, struct pt_regs*);

‌#define URETPROBE_HANDLE_ENTRY ((uprobe_handler_t)-1L)

‌#define is_uretprobe(u) (u->handler == URETPROBE_HANDLE_ENTRY)

/* Point utask->active_probe at this while running uretprobe handler. */

‌static struct uprobe_probept uretprobe_trampoline_dummy_probe;



/* Table of currently probed processes, hashed by tgid. */

‌static struct hlist_head uproc_table[UPROBE_TABLE_SIZE];



/* Protects uproc_table during uprobe (un)registration */

static DEFINE_MUTEX(uproc_mutex);



/* Table of uprobe_tasks, hashed by task_struct pointer. */

‌static struct hlist_head utask_table[UPROBE_TABLE_SIZE];

static DEFINE_SPINLOCK(utask_table_lock);



‌#define lock_uproc_table() mutex_lock(&uproc_mutex)

‌#define unlock_uproc_table() mutex_unlock(&uproc_mutex)



‌#define lock_utask_table(flags) spin_lock_irqsave(&utask_table_lock, (flags))

‌#define unlock_utask_table(flags) \

    spin_unlock_irqrestore(&utask_table_lock, (flags))



/* p_uprobe_utrace_ops = &uprobe_utrace_ops.  Fwd refs are a pain w/o this. */

‌static const struct utrace_engine_ops *p_uprobe_utrace_ops;



‌struct deferred_registration {

    struct list_head list;

    struct uprobe *uprobe;

    int regflag;    /* 0 - unregister, 1 - register */

    enum uprobe_type type;

};



/*

 * Calling a signal handler cancels single-stepping, so uprobes delays

 * calling the handler, as necessary, until after single-stepping is completed.

 */

‌struct delayed_signal {

    struct list_head list;

    siginfo_t info;

};



‌static struct uprobe_task *uprobe_find_utask_locked(struct task_struct *tsk)

{

    struct hlist_head *head;

    struct hlist_node *node;

    struct uprobe_task *utask;



    head = &utask_table[hash_ptr(tsk, UPROBE_HASH_BITS)];

    hlist_for_each_entry(utask, node, head, hlist) {

        if (utask->tsk == tsk)

            return utask;

    }

    return NULL;

}



‌static struct uprobe_task *uprobe_find_utask(struct task_struct *tsk)

{

        struct uprobe_task *utask;

        unsigned long flags;



        lock_utask_table(flags);

        utask = uprobe_find_utask_locked(tsk);

        unlock_utask_table(flags);

        return utask;

}



‌static void uprobe_hash_utask(struct uprobe_task *utask)

{

    struct hlist_head *head;

    unsigned long flags;



    INIT_HLIST_NODE(&utask->hlist);

    lock_utask_table(flags);

    head = &utask_table[hash_ptr(utask->tsk, UPROBE_HASH_BITS)];

    hlist_add_head(&utask->hlist, head);

    unlock_utask_table(flags);

}



‌static void uprobe_unhash_utask(struct uprobe_task *utask)

{

    unsigned long flags;



    lock_utask_table(flags);

    hlist_del(&utask->hlist);

    unlock_utask_table(flags);

}



‌static struct uprobe_process * uprobe_get_process(struct uprobe_process *uproc)

{

    if (atomic_inc_not_zero(&uproc->refcount))

        return uproc;

    return NULL;

}



/*

 * Decrement uproc's refcount in a situation where we "know" it can't

 * reach zero.  It's OK to call this with uproc locked.  Compare with

 * uprobe_put_process().

 */

‌static inline void uprobe_decref_process(struct uprobe_process *uproc)

{

    if (atomic_dec_and_test(&uproc->refcount))

        BUG();

}



/*

 * Runs with the uproc_mutex held.  Returns with uproc ref-counted and

 * write-locked.

 *

 * Around exec time, briefly, it's possible to have one (finished) uproc

 * for the old image and one for the new image.  We find the latter.

 */

‌static struct uprobe_process *uprobe_find_process(pid_t tgid)

{

    struct hlist_head *head;

    struct hlist_node *node;

    struct uprobe_process *uproc;



    head = &uproc_table[hash_long(tgid, UPROBE_HASH_BITS)];

    hlist_for_each_entry(uproc, node, head, hlist) {

        if (uproc->tgid == tgid && !uproc->finished) {

            uproc = uprobe_get_process(uproc);

            if (uproc)

                down_write(&uproc->rwsem);

            return uproc;

        }

    }

    return NULL;

}



/*

 * In the given uproc's hash table of probepoints, find the one with the

 * specified virtual address.  Runs with uproc->rwsem locked.

 */

‌static struct uprobe_probept *uprobe_find_probept(struct uprobe_process *uproc,

        unsigned long vaddr)

{

    struct uprobe_probept *ppt;

    struct hlist_node *node;

    struct hlist_head *head = &uproc->uprobe_table[hash_long(vaddr,

        UPROBE_HASH_BITS)];



    hlist_for_each_entry(ppt, node, head, ut_node) {

        if (ppt->vaddr == vaddr && ppt->state != UPROBE_DISABLED)

            return ppt;

    }

    return NULL;

}



/*

 * set_bp: Store a breakpoint instruction at ppt->vaddr.

 * Returns BP_INSN_SIZE on success.

 *

 * NOTE: BREAKPOINT_INSTRUCTION on all archs is the same size as

 * uprobe_opcode_t.

 */

‌static int set_bp(struct uprobe_probept *ppt, struct task_struct *tsk)

{

    uprobe_opcode_t bp_insn = BREAKPOINT_INSTRUCTION;

    return access_process_vm(tsk, ppt->vaddr, &bp_insn, BP_INSN_SIZE, 1);

}



/*

 * set_orig_insn:  For probepoint ppt, replace the breakpoint instruction

 * with the original opcode.  Returns BP_INSN_SIZE on success.

 */

‌static int set_orig_insn(struct uprobe_probept *ppt, struct task_struct *tsk)

{

    return access_process_vm(tsk, ppt->vaddr, &ppt->opcode, BP_INSN_SIZE,

        1);

}



‌static void bkpt_insertion_failed(struct uprobe_probept *ppt, const char *why)

{

    printk(KERN_ERR "Can't place uprobe at pid %d vaddr %#lx: %s\n",

            ppt->uproc->tgid, ppt->vaddr, why);

}



/*

 * Save a copy of the original instruction (so it can be single-stepped

 * out of line), insert the breakpoint instruction, and awake

 * register_uprobe().

 */

‌static void insert_bkpt(struct uprobe_probept *ppt, struct task_struct *tsk)

{

    struct uprobe_kimg *uk;

    long result = 0;

    int len;



    if (!tsk) {

        /* No surviving tasks associated with ppt->uproc */

        result = -ESRCH;

        goto out;

    }



    /*

     * If access_process_vm() transfers fewer bytes than the maximum

     * instruction size, assume that the probed instruction is smaller

     * than the max and near the end of the last page of instructions.

     * But there must be room at least for a breakpoint-size instruction.

     */

    len = access_process_vm(tsk, ppt->vaddr, ppt->insn, MAX_UINSN_BYTES, 0);

    if (len < BP_INSN_SIZE) {

        bkpt_insertion_failed(ppt,

            "error reading original instruction");

        result = -EIO;

        goto out;

    }

    memcpy(&ppt->opcode, ppt->insn, BP_INSN_SIZE);

    if (ppt->opcode == BREAKPOINT_INSTRUCTION) {

        /*

         * To avoid filling up the log file with complaints

         * about breakpoints already existing, don't log this

         * error.

         */

        //bkpt_insertion_failed(ppt, "bkpt already exists at that addr");

        result = -EEXIST;

        goto out;

    }



    if ((result = arch_validate_probed_insn(ppt, tsk)) < 0) {

        bkpt_insertion_failed(ppt, "instruction type cannot be probed");

        goto out;

    }



    len = set_bp(ppt, tsk);

    if (len < BP_INSN_SIZE) {

        bkpt_insertion_failed(ppt, "failed to insert bkpt instruction");

        result = -EIO;

        goto out;

    }

out:

    ppt->state = (result ? UPROBE_DISABLED : UPROBE_BP_SET);

    list_for_each_entry(uk, &ppt->uprobe_list, list)

        uk->status = result;

    wake_up_all(&ppt->waitq);

}



‌static void remove_bkpt(struct uprobe_probept *ppt, struct task_struct *tsk)

{

    int len;



    if (tsk) {

        len = set_orig_insn(ppt, tsk);

        if (len < BP_INSN_SIZE) {

            printk(KERN_ERR

                "Error removing uprobe at pid %d vaddr %#lx:"

                " can't restore original instruction\n",

                tsk->tgid, ppt->vaddr);

            /*

             * This shouldn't happen, since we were previously

             * able to write the breakpoint at that address.

             * There's not much we can do besides let the

             * process die with a SIGTRAP the next time the

             * breakpoint is hit.

             */

        }

    }

    /* Wake up unregister_uprobe(). */

    ppt->state = UPROBE_DISABLED;

    wake_up_all(&ppt->waitq);

}



/*

 * Runs with all of uproc's threads quiesced and uproc->rwsem write-locked.

 * As specified, insert or remove the breakpoint instruction for each

 * uprobe_probept on uproc's pending list.

 * tsk = one of the tasks associated with uproc -- NULL if there are

 * no surviving threads.

 * It's OK for uproc->pending_uprobes to be empty here.  It can happen

 * if a register and an unregister are requested (by different probers)

 * simultaneously for the same pid/vaddr.

 * Note that the current task may be a thread in uproc, or it may be

 * a task running [un]register_uprobe() (or both).

 */

‌static void handle_pending_uprobes(struct uprobe_process *uproc,

    struct task_struct *tsk)

{

    struct uprobe_probept *ppt, *tmp;



    list_for_each_entry_safe(ppt, tmp, &uproc->pending_uprobes, pd_node) {

        switch (ppt->state) {

        case UPROBE_INSERTING:

            insert_bkpt(ppt, tsk);

            break;

        case UPROBE_REMOVING:

            remove_bkpt(ppt, tsk);

            break;

        default:

            BUG();

        }

        list_del(&ppt->pd_node);

    }

}



‌static void utask_adjust_flags(struct uprobe_task *utask, int set,

    unsigned long flags)

{

    unsigned long newflags, oldflags;



    newflags = oldflags = utask->engine->flags;



    if (set)

        newflags |= flags;

    else

        newflags &= ~flags;



    if (newflags != oldflags)

        utrace_set_flags(utask->tsk, utask->engine, newflags);

}



‌static inline void clear_utrace_quiesce(struct uprobe_task *utask)

{

    utask_adjust_flags(utask, CLEAR_ENGINE_FLAGS,

            UTRACE_ACTION_QUIESCE | UTRACE_EVENT(QUIESCE));

}



/* Opposite of quiesce_all_threads().  Same locking applies. */

‌static void rouse_all_threads(struct uprobe_process *uproc)

{

    struct uprobe_task *utask;



    list_for_each_entry(utask, &uproc->thread_list, list) {

        if (utask->quiescing) {

            utask->quiescing = 0;

            if (utask->state == UPTASK_QUIESCENT) {

                utask->state = UPTASK_RUNNING;

                uproc->n_quiescent_threads--;

                clear_utrace_quiesce(utask);

            }

        }

    }

    /* Wake any threads that decided to sleep rather than quiesce. */

    wake_up_all(&uproc->waitq);

}



/*

 * If all of uproc's surviving threads have quiesced, do the necessary

 * breakpoint insertions or removals and then un-quiesce everybody.

 * tsk is a surviving thread, or NULL if there is none.  Runs with

 * uproc->rwsem write-locked.

 */

‌static void check_uproc_quiesced(struct uprobe_process *uproc,

        struct task_struct *tsk)

{

    if (uproc->n_quiescent_threads >= uproc->nthreads) {

        handle_pending_uprobes(uproc, tsk);

        rouse_all_threads(uproc);

    }

}



/*

 * Quiesce all threads in the specified process -- e.g., prior to

 * breakpoint insertion.  Runs with uproc->rwsem write-locked.

 * Returns the number of threads that haven't died yet.

 */

‌static int quiesce_all_threads(struct uprobe_process *uproc,

        struct uprobe_task **cur_utask_quiescing)

{

    struct uprobe_task *utask;

    struct task_struct *survivor = NULL;    // any survivor

    int survivors = 0;



    *cur_utask_quiescing = NULL;

    list_for_each_entry(utask, &uproc->thread_list, list) {

        survivor = utask->tsk;

        survivors++;

        if (!utask->quiescing) {

            /*

             * If utask is currently handling a probepoint, it'll

             * check utask->quiescing and quiesce when it's done.

             */

            utask->quiescing = 1;

            if (utask->tsk == current)

                *cur_utask_quiescing = utask;

            else if (utask->state == UPTASK_RUNNING) {

                utask->quiesce_master = current;

                utask_adjust_flags(utask, SET_ENGINE_FLAGS,

                    UTRACE_ACTION_QUIESCE

                    | UTRACE_EVENT(QUIESCE));

                utask->quiesce_master = NULL;

            }

        }

    }

    /*

     * If any task was already quiesced (in utrace's opinion) when we

     * called utask_adjust_flags() on it, uprobe_report_quiesce() was

     * called, but wasn't in a position to call check_uproc_quiesced().

     */

    check_uproc_quiesced(uproc, survivor);

    return survivors;

}



‌static void utask_free_uretprobe_instances(struct uprobe_task *utask)

{

    struct uretprobe_instance *ri;

    struct hlist_node *r1, *r2;



    hlist_for_each_entry_safe(ri, r1, r2, &utask->uretprobe_instances,

            hlist) {

        hlist_del(&ri->hlist);

        kfree(ri);

        uprobe_decref_process(utask->uproc);

    }

}



/* Called with utask->uproc write-locked. */

‌static void uprobe_free_task(struct uprobe_task *utask)

{

    struct deferred_registration *dr, *d;

    struct delayed_signal *ds, *ds2;



        /*    printk(KERN_INFO "uprobe_free_task %p (tid %ld), caller %pS, ctid %ld\n", utask, utask->tsk->pid, _RET_IP_, current->pid); */



        /*

         * Do this first, since a utask that's still in the utask_table

         * is assumed (e.g., by uprobe_report_exit) to be valid.

         */

    uprobe_unhash_utask(utask);

    list_del(&utask->list);

    list_for_each_entry_safe(dr, d, &utask->deferred_registrations, list) {

        list_del(&dr->list);

        kfree(dr);

    }



    list_for_each_entry_safe(ds, ds2, &utask->delayed_signals, list) {

        list_del(&ds->list);

        kfree(ds);

    }

    utask_free_uretprobe_instances(utask);



    kfree(utask);

}



/* Runs with uproc_mutex held and uproc->rwsem write-locked. */

‌static void uprobe_free_process(struct uprobe_process *uproc)

{

    struct uprobe_task *utask, *tmp;

    struct uprobe_ssol_area *area = &uproc->ssol_area;



        /* printk(KERN_INFO "uprobe_free_process %p (pid %ld), caller %pS, ctid %ld\n", uproc, uproc->tgid, _RET_IP_, current->pid); */



    if (!uproc->finished)

        uprobe_release_ssol_vma(uproc);

    if (area->slots)

        kfree(area->slots);

    if (!hlist_unhashed(&uproc->hlist))

        hlist_del(&uproc->hlist);

    list_for_each_entry_safe(utask, tmp, &uproc->thread_list, list) {

        /*

         * utrace_detach() is OK here (required, it seems) even if

         * utask->tsk == current and we're in a utrace callback.

         */

        if (utask->engine)

            utrace_detach(utask->tsk, utask->engine);

        uprobe_free_task(utask);

    }

    up_write(&uproc->rwsem);    // So kfree doesn't complain

        /* printk(KERN_INFO "uprobe_free_process zap %p\n", uproc);*/

    kfree(uproc);

}



/*

 * Decrement uproc's ref count.  If it's zero, free uproc and return 1.

 * Else return 0.  If uproc is locked, don't call this; use

 * uprobe_decref_process().

 *

 * If we free uproc, we also decrement the ref-count on the uprobes

 * module, if any.  If somebody is doing "rmmod --wait uprobes", this

 * function could schedule removal of the module.  Therefore, don't call

 * this function and then sleep in uprobes code, unless you know you'll

 * return with the module ref-count > 0.

 */

‌static int uprobe_put_process(struct uprobe_process *uproc)

{

    int freed = 0;

    if (atomic_dec_and_test(&uproc->refcount)) {

        lock_uproc_table();

        down_write(&uproc->rwsem);

        if (unlikely(atomic_read(&uproc->refcount) != 0)) {

            /*

             * The works because uproc_mutex is held any

             * time the ref count can go from 0 to 1 -- e.g.,

             * register_uprobe() snuck in with a new probe,

                         * or a callback such as uprobe_report_exit()

                         * just started.

             */

            up_write(&uproc->rwsem);

        } else {

            uprobe_free_process(uproc);

            freed = 1;

        }

        unlock_uproc_table();

    }

    if (freed)

        module_put(THIS_MODULE);

    return freed;

}



‌static struct uprobe_kimg *uprobe_mk_kimg(struct uprobe *u)

{

    struct uprobe_kimg *uk = (struct uprobe_kimg*)kzalloc(sizeof *uk,

        GFP_USER);

    if (unlikely(!uk))

        return ERR_PTR(-ENOMEM);

    u->kdata = uk;

    uk->uprobe = u;

    uk->ppt = NULL;

    INIT_LIST_HEAD(&uk->list);

    uk->status = -EBUSY;

    return uk;

}



/*

 * Allocate a uprobe_task object for t and add it to uproc's list.

 * Called with t "got" and uproc->rwsem write-locked.  Called in one of

 * the following cases:

 * - before setting the first uprobe in t's process

 * - we're in uprobe_report_clone() and t is the newly added thread

 * Returns:

 * - pointer to new uprobe_task on success

 * - NULL if t dies before we can utrace_attach it

 * - negative errno otherwise

 */

‌static struct uprobe_task *uprobe_add_task(struct task_struct *t,

        struct uprobe_process *uproc)

{

    struct uprobe_task *utask;

    struct utrace_attached_engine *engine;



    utask = (struct uprobe_task *)kzalloc(sizeof *utask, GFP_USER);

    if (unlikely(utask == NULL))

        return ERR_PTR(-ENOMEM);



    utask->tsk = t;

    utask->state = UPTASK_RUNNING;

    utask->quiescing = 0;

    utask->uproc = uproc;

    utask->active_probe = NULL;

    utask->doomed = 0;

    INIT_HLIST_HEAD(&utask->uretprobe_instances);

    INIT_LIST_HEAD(&utask->deferred_registrations);

    INIT_LIST_HEAD(&utask->delayed_signals);

    INIT_LIST_HEAD(&utask->list);

    list_add_tail(&utask->list, &uproc->thread_list);

    uprobe_hash_utask(utask);



    engine = utrace_attach(t, UTRACE_ATTACH_CREATE, p_uprobe_utrace_ops,

        utask);

    if (IS_ERR(engine)) {

        long err = PTR_ERR(engine);

        printk("uprobes: utrace_attach failed, returned %ld\n", err);

        uprobe_free_task(utask);

        if (err == -ESRCH)

             return NULL;

        return ERR_PTR(err);

    }

    utask->engine = engine;

    /*

     * Always watch for traps, clones, execs and exits. Caller must

     * set any other engine flags.

     */

    utask_adjust_flags(utask, SET_ENGINE_FLAGS,

            UTRACE_EVENT(SIGNAL) | UTRACE_EVENT(SIGNAL_IGN) |

            UTRACE_EVENT(SIGNAL_CORE) | UTRACE_EVENT(EXEC) |

            UTRACE_EVENT(CLONE) | UTRACE_EVENT(EXIT));

    /*

     * Note that it's OK if t dies just after utrace_attach, because

     * with the engine in place, the appropriate report_* callback

     * should handle it after we release uproc->rwsem.

     */

    return utask;

}



/* See comment in uprobe_mk_process(). */

‌static struct task_struct *find_next_thread_to_add(struct uprobe_process *uproc,        struct task_struct *start)

{

    struct task_struct *t;

    struct uprobe_task *utask;



    read_lock(&tasklist_lock);

    t = start;

    do {

        if (unlikely(t->flags & PF_EXITING))

            goto dont_add;

        list_for_each_entry(utask, &uproc->thread_list, list) {

            if (utask->tsk == t)

                /* Already added */

                goto dont_add;

        }

        /* Found thread/task to add. */

        get_task_struct(t);

        read_unlock(&tasklist_lock);

        return t;

dont_add:

        t = next_thread(t);

    } while (t != start);



    read_unlock(&tasklist_lock);

    return NULL;

}



/*

 * Create a per process uproc struct.

 * at_fork: indicates uprobe_mk_process is called from

 * a fork context of a probe process. refer uprobe_fork_uproc

 * for more details.

 *

 * Runs with uproc_mutex held;

 * Returns with uproc->rwsem write-locked when not called

 * from fork context.

 */

‌static struct uprobe_process *uprobe_mk_process(struct task_struct *p,

                        bool at_fork)

{

    struct uprobe_process *uproc;

    struct uprobe_task *utask;

    struct task_struct *add_me;

    int i;

    long err;



    uproc = (struct uprobe_process *)kzalloc(sizeof *uproc, GFP_USER);

    if (unlikely(uproc == NULL))

        return ERR_PTR(-ENOMEM);



    /* Initialize fields */

    atomic_set(&uproc->refcount, 1);

    init_rwsem(&uproc->rwsem);

    if (!at_fork)

        /* not called from fork context. */

        down_write(&uproc->rwsem);

    init_waitqueue_head(&uproc->waitq);

    for (i = 0; i < UPROBE_TABLE_SIZE; i++)

        INIT_HLIST_HEAD(&uproc->uprobe_table[i]);

    uproc->nppt = 0;

    INIT_LIST_HEAD(&uproc->pending_uprobes);

    INIT_LIST_HEAD(&uproc->thread_list);

    uproc->nthreads = 0;

    uproc->n_quiescent_threads = 0;

    INIT_HLIST_NODE(&uproc->hlist);

    uproc->tgid = p->tgid;

    uproc->finished = 0;

    uproc->uretprobe_trampoline_addr = NULL;



    uproc->ssol_area.insn_area = NULL;

    uproc->ssol_area.initialized = 0;

    mutex_init(&uproc->ssol_area.setup_mutex);

    /* Initialize rest of area in uprobe_init_ssol(). */

#ifdef CONFIG_UPROBES_SSOL

    uproc->sstep_out_of_line = 1;

#else

    uproc->sstep_out_of_line = 0;

#endif



    /*

     * Create and populate one utask per thread in this process.  We

     * can't call uprobe_add_task() while holding tasklist_lock, so we:

     *    1. Lock task list.

     *    2. Find the next task, add_me, in this process that's not

     *    already on uproc's thread_list.  (Start search at previous

     *    one found.)

     *    3. Unlock task list.

     *    4. uprobe_add_task(add_me, uproc)

     *    Repeat 1-4 'til we have utasks for all tasks.

     */

    add_me = p;

    while ((add_me = find_next_thread_to_add(uproc, add_me)) != NULL) {

        utask = uprobe_add_task(add_me, uproc);

        put_task_struct(add_me);

        if (IS_ERR(utask)) {

            err = PTR_ERR(utask);

            goto fail;

        }

        if (utask)

            uproc->nthreads++;

    }



    if (uproc->nthreads == 0) {

        /* All threads -- even p -- are dead. */

        err = -ESRCH;

        goto fail;

    }

    return uproc;



fail:

    uprobe_free_process(uproc);

    return ERR_PTR(err);

}



/*

 * Creates a uprobe_probept and connects it to uk and uproc.  Runs with

 * uproc->rwsem write-locked.

 */

‌static struct uprobe_probept *uprobe_add_probept(struct uprobe_kimg *uk,

    struct uprobe_process *uproc)

{

    struct uprobe_probept *ppt;



    ppt = (struct uprobe_probept *)kzalloc(sizeof *ppt, GFP_USER);

    if (unlikely(ppt == NULL))

        return ERR_PTR(-ENOMEM);

    init_waitqueue_head(&ppt->waitq);

    mutex_init(&ppt->ssil_mutex);

    mutex_init(&ppt->slot_mutex);

    ppt->slot = NULL;



    /* Connect to uk. */

    INIT_LIST_HEAD(&ppt->uprobe_list);

    list_add_tail(&uk->list, &ppt->uprobe_list);

    uk->ppt = ppt;

    uk->status = -EBUSY;

    ppt->vaddr = uk->uprobe->vaddr;



    /* Connect to uproc. */

    ppt->state = UPROBE_INSERTING;

    ppt->uproc = uproc;

    INIT_LIST_HEAD(&ppt->pd_node);

    list_add_tail(&ppt->pd_node, &uproc->pending_uprobes);

    INIT_HLIST_NODE(&ppt->ut_node);

    hlist_add_head(&ppt->ut_node,

        &uproc->uprobe_table[hash_long(ppt->vaddr, UPROBE_HASH_BITS)]);

    uproc->nppt++;

    uprobe_get_process(uproc);

    return ppt;

}



/* ppt is going away.  Free its slot (if it owns one) in the SSOL area. */

‌static void uprobe_free_slot(struct uprobe_probept *ppt)

{

    struct uprobe_ssol_slot *slot = ppt->slot;

    if (slot) {

        down_write(&slot->rwsem);

        if (slot->owner == ppt) {

            unsigned long flags;

            struct uprobe_ssol_area *area = &ppt->uproc->ssol_area;

            spin_lock_irqsave(&area->lock, flags);

            slot->state = SSOL_FREE;

            slot->owner = NULL;

            area->nfree++;

            spin_unlock_irqrestore(&area->lock, flags);

        }

        up_write(&slot->rwsem);

    }

}



/*

 * Runs with ppt->uproc write-locked.  Frees ppt and decrements the ref count

 * on ppt->uproc (but ref count shouldn't hit 0).

 */

‌static void uprobe_free_probept(struct uprobe_probept *ppt)

{

    struct uprobe_process *uproc = ppt->uproc;

    uprobe_free_slot(ppt);

    hlist_del(&ppt->ut_node);

    uproc->nppt--;

    kfree(ppt);

    uprobe_decref_process(uproc);

}



‌static void uprobe_free_kimg(struct uprobe_kimg *uk)

{

    uk->uprobe->kdata = NULL;

    kfree(uk);

}



/*

 * Runs with uprobe_process write-locked.

 * Note that we never free u, because the user owns that.

 */

‌static void purge_uprobe(struct uprobe_kimg *uk)

{

    struct uprobe_probept *ppt = uk->ppt;

    list_del(&uk->list);

    uprobe_free_kimg(uk);

    if (list_empty(&ppt->uprobe_list))

        uprobe_free_probept(ppt);

}



/* TODO: Avoid code duplication with uprobe_validate_vaddr(). */

‌static int uprobe_validate_vma(struct task_struct *t, unsigned long vaddr)

{

    struct vm_area_struct *vma;

    struct mm_struct *mm;

    int ret = 0;



    mm = get_task_mm(t);

    if (!mm)

        return -EINVAL;

    down_read(&mm->mmap_sem);

    vma = find_vma(mm, vaddr);

    if (!vma || vaddr < vma->vm_start)

        ret = -ENOENT;

    else if (!(vma->vm_flags & VM_EXEC))

        ret = -EFAULT;

    up_read(&mm->mmap_sem);

    mmput(mm);

    return ret;

}



/* Probed address must be in an executable VM area, outside the SSOL area. */

‌static int uprobe_validate_vaddr(struct task_struct *p, unsigned long vaddr,

    struct uprobe_process *uproc)

{

    struct vm_area_struct *vma;

    struct mm_struct *mm = p->mm;

    if (!mm)

        return -EINVAL;

    down_read(&mm->mmap_sem);

    vma = find_vma(mm, vaddr);

    if (!vma || vaddr < vma->vm_start || !(vma->vm_flags & VM_EXEC)

        || vma->vm_start == (unsigned long) uproc->ssol_area.insn_area) {

        up_read(&mm->mmap_sem);

        return -EINVAL;

    }

    up_read(&mm->mmap_sem);

    return 0;

}



‌static struct task_struct *uprobe_get_task(pid_t pid)

{

    struct task_struct *p;

    rcu_read_lock();

    p = find_task_by_pid(pid);

    if (p)

        get_task_struct(p);

    rcu_read_unlock();

    return p;

}



/* Runs with utask->uproc read-locked.  Returns -EINPROGRESS on success. */

‌static int defer_registration(struct uprobe *u, int regflag,

        struct uprobe_task *utask)

{

    struct deferred_registration *dr =

        kmalloc(sizeof(struct deferred_registration), GFP_USER);

    if (!dr)

        return -ENOMEM;

    dr->type = (is_uretprobe(u) ? UPTY_URETPROBE : UPTY_UPROBE);

    dr->uprobe = u;

    dr->regflag = regflag;

    INIT_LIST_HEAD(&dr->list);

    list_add_tail(&dr->list, &utask->deferred_registrations);

    return -EINPROGRESS;

}



/* See Documentation/uprobes.txt. */

‌int register_uprobe(struct uprobe *u)

{

    struct task_struct *p;

    struct uprobe_process *uproc;

    struct uprobe_kimg *uk;

    struct uprobe_probept *ppt;

    struct uprobe_task *cur_utask, *cur_utask_quiescing = NULL;

    int survivors, ret = 0, uproc_is_new = 0;

    if (!u || !u->handler)

        return -EINVAL;



    p = uprobe_get_task(u->pid);

    if (!p)

        return -ESRCH;



    cur_utask = uprobe_find_utask(current);

    if (cur_utask && cur_utask->active_probe) {

        /*

         * Called from handler; cur_utask->uproc is read-locked.

         * Do this registration later.

         */

        put_task_struct(p);

        return defer_registration(u, 1, cur_utask);

    }



    /* Get the uprobe_process for this pid, or make a new one. */

    lock_uproc_table();

    uproc = uprobe_find_process(p->tgid);



    if (uproc)

        unlock_uproc_table();

    else {

        /* Creating a new uprobe_process.  Ref-count the module. */

        if (!try_module_get(THIS_MODULE)) {

            /* uprobes.ko is being removed. */

            ret = -ENOSYS;

            unlock_uproc_table();

            goto fail_tsk;

        }

        uproc = uprobe_mk_process(p, 0);

        if (IS_ERR(uproc)) {

            ret = (int) PTR_ERR(uproc);

            unlock_uproc_table();

            module_put(THIS_MODULE);

            goto fail_tsk;

        }

        /* Hold uproc_mutex until we've added uproc to uproc_table. */

        uproc_is_new = 1;

    }



    if (is_uretprobe(u) && IS_ERR(uproc->uretprobe_trampoline_addr)) {

        /* Previously failed to set up trampoline. */

        ret = -ENOMEM;

        goto fail_uproc;

    }



    if ((ret = uprobe_validate_vaddr(p, u->vaddr, uproc)) < 0)

        goto fail_uproc;



    if (u->kdata) {

        /*

         * Probe is already/still registered.  This is the only

         * place we return -EBUSY to the user.

         */

        ret = -EBUSY;

        goto fail_uproc;

    }



    uk = uprobe_mk_kimg(u);

    if (IS_ERR(uk)) {

        ret = (int) PTR_ERR(uk);

        goto fail_uproc;

    }



    /* See if we already have a probepoint at the vaddr. */

    ppt = (uproc_is_new ? NULL : uprobe_find_probept(uproc, u->vaddr));

    if (ppt) {

        /* Breakpoint is already in place, or soon will be. */

        uk->ppt = ppt;

        list_add_tail(&uk->list, &ppt->uprobe_list);

        switch (ppt->state) {

        case UPROBE_INSERTING:

            uk->status = -EBUSY;    // in progress

            if (uproc->tgid == current->tgid) {

                cur_utask_quiescing = cur_utask;

                BUG_ON(!cur_utask_quiescing);

            }

            break;

        case UPROBE_REMOVING:

            /* Wait!  Don't remove that bkpt after all! */

            ppt->state = UPROBE_BP_SET;

            list_del(&ppt->pd_node);  // Remove from pending list.

            wake_up_all(&ppt->waitq); // Wake unregister_uprobe().

            /*FALLTHROUGH*/

        case UPROBE_BP_SET:

            uk->status = 0;

            break;

        default:

            BUG();

        }

        up_write(&uproc->rwsem);

        put_task_struct(p);

        if (uk->status == 0) {

            uprobe_put_process(uproc);

            return 0;

        }

        goto await_bkpt_insertion;

    } else {

        ppt = uprobe_add_probept(uk, uproc);

        if (IS_ERR(ppt)) {

            ret = (int) PTR_ERR(ppt);

            goto fail_uk;

        }

    }



    if (uproc_is_new) {

        hlist_add_head(&uproc->hlist,

            &uproc_table[hash_long(uproc->tgid, UPROBE_HASH_BITS)]);

        unlock_uproc_table();

    }

    put_task_struct(p);

    survivors = quiesce_all_threads(uproc, &cur_utask_quiescing);



    if (survivors == 0) {

        purge_uprobe(uk);

        up_write(&uproc->rwsem);

        uprobe_put_process(uproc);

        return -ESRCH;

    }

    up_write(&uproc->rwsem);



await_bkpt_insertion:

    if (cur_utask_quiescing)

        /* Current task is probing its own process. */

        (void) utask_fake_quiesce(cur_utask_quiescing);

    else

        wait_event(ppt->waitq, ppt->state != UPROBE_INSERTING);

    ret = uk->status;

    if (ret != 0) {

        down_write(&uproc->rwsem);

        purge_uprobe(uk);

        up_write(&uproc->rwsem);

    }

    uprobe_put_process(uproc);

    return ret;



fail_uk:

    uprobe_free_kimg(uk);



fail_uproc:

    if (uproc_is_new) {

        uprobe_free_process(uproc);

        unlock_uproc_table();

        module_put(THIS_MODULE);

    } else {

        up_write(&uproc->rwsem);

        uprobe_put_process(uproc);

    }



fail_tsk:

    put_task_struct(p);

    return ret;

}

‌EXPORT_SYMBOL_GPL(register_uprobe);



‌void __unregister_uprobe(struct uprobe *u, bool remove_bkpt)

{

    struct task_struct *p;

    struct uprobe_process *uproc;

    struct uprobe_kimg *uk;

    struct uprobe_probept *ppt;

    struct uprobe_task *cur_utask, *cur_utask_quiescing = NULL;



    if (!u)

        return;

    p = uprobe_get_task(u->pid);

    if (!p)

        return;



    cur_utask = uprobe_find_utask(current);

    if (cur_utask && cur_utask->active_probe) {

        /* Called from handler; uproc is read-locked; do this later */

        put_task_struct(p);

        (void) defer_registration(u, 0, cur_utask);

        return;

    }



    /*

     * Lock uproc before walking the graph, in case the process we're

     * probing is exiting.

     */

    lock_uproc_table();

    uproc = uprobe_find_process(p->tgid);

    unlock_uproc_table();

    put_task_struct(p);

    if (!uproc)

        return;



    uk = (struct uprobe_kimg *)u->kdata;

    if (!uk)

        /*

         * This probe was never successfully registered, or

         * has already been unregistered.

         */

        goto done;

    if (uk->status == -EBUSY)

        /* Looks like register or unregister is already in progress. */

        goto done;

    ppt = uk->ppt;



    list_del(&uk->list);

    uprobe_free_kimg(uk);



    if (is_uretprobe(u))

        zap_uretprobe_instances(u, uproc);



    if (!list_empty(&ppt->uprobe_list))

        goto done;



    /* The last uprobe at ppt's probepoint is being unregistered. */

    if (!remove_bkpt) {

        uprobe_free_probept(ppt);

        goto done;

    }



    /* Queue the breakpoint for removal. */

    ppt->state = UPROBE_REMOVING;

    list_add_tail(&ppt->pd_node, &uproc->pending_uprobes);



    (void) quiesce_all_threads(uproc, &cur_utask_quiescing);

    up_write(&uproc->rwsem);

    if (cur_utask_quiescing)

        /* Current task is probing its own process. */

        (void) utask_fake_quiesce(cur_utask_quiescing);

    else

        wait_event(ppt->waitq, ppt->state != UPROBE_REMOVING);



    if (likely(ppt->state == UPROBE_DISABLED)) {

        down_write(&uproc->rwsem);

        uprobe_free_probept(ppt);

        /* else somebody else's register_uprobe() resurrected ppt. */

        up_write(&uproc->rwsem);

    }

    uprobe_put_process(uproc);

    return;



done:

    up_write(&uproc->rwsem);

    uprobe_put_process(uproc);

}



/* See Documentation/uprobes.txt. */

‌void unregister_uprobe(struct uprobe *u)

{

    __unregister_uprobe(u, true);

}

‌EXPORT_SYMBOL_GPL(unregister_uprobe);



‌void unmap_uprobe(struct uprobe *u)

{

    __unregister_uprobe(u, false);

}

‌EXPORT_SYMBOL_GPL(unmap_uprobe);



/* Find a surviving thread in uproc.  Runs with uproc->rwsem locked. */

‌static struct task_struct *find_surviving_thread(struct uprobe_process *uproc)

{

    struct uprobe_task *utask;



    list_for_each_entry(utask, &uproc->thread_list, list)

        return utask->tsk;

    return NULL;

}



/*

 * Run all the deferred_registrations previously queued by the current utask.

 * Runs with no locks or mutexes held.  The current utask's uprobe_process

 * is ref-counted, so they won't disappear as the result of

 * unregister_u*probe() called here.

 */

‌static void uprobe_run_def_regs(struct list_head *drlist)

{

    struct deferred_registration *dr, *d;



    list_for_each_entry_safe(dr, d, drlist, list) {

        int result = 0;

        struct uprobe *u = dr->uprobe;



        if (dr->type == UPTY_URETPROBE) {

            struct uretprobe *rp =

                container_of(u, struct uretprobe, u);

            if (dr->regflag)

                result = register_uretprobe(rp);

            else

                unregister_uretprobe(rp);

        } else {

            if (dr->regflag)

                result = register_uprobe(u);

            else

                unregister_uprobe(u);

        }

        if (u && u->registration_callback)

            u->registration_callback(u, dr->regflag, dr->type,

                    result);

        list_del(&dr->list);

        kfree(dr);

    }

}



/*

 * Functions for allocation of the SSOL area, and the instruction slots

 * therein

 */



/*

 * We leave the SSOL vma in place even after all the probes are gone.

 * We used to remember its address in current->mm->context.uprobes_ssol_area,

 * but adding that field to mm_context broke KAPI compatibility.

 * Instead, when we shut down the uproc for lack of probes, we "tag" the vma

 * for later identification.  This is not particularly robust, but it's

 * no more vulnerable to ptrace or mprotect mischief than any other part

 * of the address space.

 */

‌#define UPROBES_SSOL_VMA_TAG \

    "This is the SSOL area for uprobes.  Mess with it at your own risk."

‌#define UPROBES_SSOL_TAGSZ ((int)sizeof(UPROBES_SSOL_VMA_TAG))



/*

 * Searching downward from ceiling address (0 signifies top of memory),

 * find the next vma whose flags indicate it could be an SSOL area.

 * Return its address, or 0 for no match.

 */

‌static unsigned long find_next_possible_ssol_vma(unsigned long ceiling)

{

    struct mm_struct *mm = current->mm;

    struct rb_node *rb_node;

    struct vm_area_struct *vma;

    unsigned long good_flags = VM_EXEC | VM_DONTEXPAND;

    unsigned long bad_flags = VM_WRITE | VM_GROWSDOWN | VM_GROWSUP;

    unsigned long addr = 0;



    down_read(&mm->mmap_sem);

    for (rb_node=rb_last(&mm->mm_rb); rb_node; rb_node=rb_prev(rb_node)) {

        vma = rb_entry(rb_node, struct vm_area_struct, vm_rb);

        if (ceiling && vma->vm_start >= ceiling)

            continue;

        if ((vma->vm_flags & good_flags) != good_flags)

            continue;

        if ((vma->vm_flags & bad_flags) != 0)

            continue;

        addr = vma->vm_start;

        break;

    }

    up_read(&mm->mmap_sem);

    return addr;

}



‌static noinline unsigned long find_old_ssol_vma(void)

{

    unsigned long addr;

    unsigned long ceiling = 0;    // top of memory

    char buf[UPROBES_SSOL_TAGSZ];

    while ((addr = find_next_possible_ssol_vma(ceiling)) != 0) {

        ceiling = addr;

        if (copy_from_user(buf, (const void __user*)addr,

                        UPROBES_SSOL_TAGSZ))

            continue;

        if (!strcmp(buf, UPROBES_SSOL_VMA_TAG))

            return addr;

    }

    return 0;

}



/*

 * Mmap nbytes bytes for the uprobes SSOL area for the current process.

 * Returns the address of the page, or a negative errno.

 * This approach was suggested by Roland McGrath.

 */

‌static noinline unsigned long uprobe_setup_ssol_vma(unsigned long nbytes)

{

    unsigned long addr;

    struct mm_struct *mm = current->mm;

    struct vm_area_struct *vma;



    BUG_ON(nbytes & ~PAGE_MASK);

    if ((addr = find_old_ssol_vma()) != 0)

        return addr;



    down_write(&mm->mmap_sem);

    /*

     * Find the end of the top mapping and skip a page.

     * If there is no space for PAGE_SIZE above

     * that, mmap will ignore our address hint.

     */

    vma = rb_entry(rb_last(&mm->mm_rb), struct vm_area_struct, vm_rb);

    addr = vma->vm_end + PAGE_SIZE;

    addr = do_mmap_pgoff(NULL, addr, nbytes, PROT_EXEC,

                    MAP_PRIVATE|MAP_ANONYMOUS, 0);

    if (addr & ~PAGE_MASK) {

        up_write(&mm->mmap_sem);

        printk(KERN_ERR "Uprobes failed to allocate a vma for"

            " pid/tgid %d/%d for single-stepping out of line.\n",

            current->pid, current->tgid);

        return addr;

    }



    vma = find_vma(mm, addr);

    BUG_ON(!vma);

    /*

     * Don't expand vma on mremap().  Allow vma to be copied on

     * fork() -- see uprobe_fork_uproc().

     */

    vma->vm_flags |= VM_DONTEXPAND;



    up_write(&mm->mmap_sem);

    return addr;

}



/**

 * uprobe_init_ssol -- initialize per-process area for single stepping

 * out-of-line.

 * @uproc:    probed process

 * @tsk:    probed task: must be current if @insn_area is %NULL

 * @insn_area:    virtual address of the already-established SSOL vma --

 * see uprobe_fork_uproc().

 *

 * Returns with @uproc->ssol_area.insn_area pointing to the initialized

 * area, or set to a negative errno.

 */

‌static void uprobe_init_ssol(struct uprobe_process *uproc,

    struct task_struct *tsk, __user uprobe_opcode_t *insn_area)

{

    struct uprobe_ssol_area *area = &uproc->ssol_area;

    struct uprobe_ssol_slot *slot;

    int i;

    char *slot_addr;    // Simplify pointer arithmetic



    /* Trampoline setup will either fail or succeed here. */

    uproc->uretprobe_trampoline_addr = ERR_PTR(-ENOMEM);



    if (insn_area) {

        BUG_ON(IS_ERR(insn_area));

        area->insn_area = insn_area;

    } else {

        BUG_ON(tsk != current);

        area->insn_area =

            (uprobe_opcode_t *) uprobe_setup_ssol_vma(PAGE_SIZE);

        if (IS_ERR(area->insn_area))

            return;

    }



    area->nfree = area->nslots = PAGE_SIZE / MAX_UINSN_BYTES;

    if (area->nslots > MAX_SSOL_SLOTS)

        area->nfree = area->nslots = MAX_SSOL_SLOTS;

    area->slots = (struct uprobe_ssol_slot *)

        kzalloc(sizeof(struct uprobe_ssol_slot) * area->nslots,

                                GFP_USER);

    if (!area->slots) {

        area->insn_area = ERR_PTR(-ENOMEM);

        return;

    }

    mutex_init(&area->populate_mutex);

    spin_lock_init(&area->lock);

    area->next_slot = 0;

    slot_addr = (char*) area->insn_area;

    for (i = 0; i < area->nslots; i++) {

        slot = &area->slots[i];

        init_rwsem(&slot->rwsem);

        slot->state = SSOL_FREE;

        slot->owner = NULL;

        slot->last_used = 0;

        slot->insn = (__user uprobe_opcode_t *) slot_addr;

        slot_addr += MAX_UINSN_BYTES;

    }

    uretprobe_set_trampoline(uproc, tsk);

}



/*

 * Verify that the SSOL area has been set up for uproc.  Returns a

 * pointer to the SSOL area, or a negative erro if we couldn't set it up.

 */

static __user uprobe_opcode_t

‌            *uprobe_verify_ssol(struct uprobe_process *uproc)

{

    struct uprobe_ssol_area *area = &uproc->ssol_area;



    if (unlikely(!area->initialized)) {

        /* First time through for this probed process */

        mutex_lock(&uproc->ssol_area.setup_mutex);

        if (likely(!area->initialized)) {

            /* Nobody snuck in and set things up ahead of us. */

            uprobe_init_ssol(uproc, current, NULL);

            area->initialized = 1;

        }

        mutex_unlock(&uproc->ssol_area.setup_mutex);

    }

    return area->insn_area;

}



/*

 * uproc is going away, but the process lives on.  Tag the SSOL vma so a new

 * uproc can reuse it if more probes are requested.

 */

‌static void uprobe_release_ssol_vma(struct uprobe_process *uproc)

{

    unsigned long addr;

    struct task_struct *tsk;

    static const char *buf = UPROBES_SSOL_VMA_TAG;

    int nb;



    /* No need to muck with dying image's mm_struct. */

    BUG_ON(uproc->finished);

    addr = (unsigned long) uproc->ssol_area.insn_area;

    if (!addr || IS_ERR_VALUE(addr))

        return;

    tsk = find_surviving_thread(uproc);

    if (!tsk)

        return;

    nb = access_process_vm(tsk, addr, (void*)buf, UPROBES_SSOL_TAGSZ, 1);

    if (nb != UPROBES_SSOL_TAGSZ)

        printk(KERN_ERR "Failed to tag uprobes SSOL vma: "

            "pid/tgid=%d/%d, vaddr=%#lx\n", tsk->pid, tsk->tgid,

            addr);

}



‌static inline int advance_slot(int slot, struct uprobe_ssol_area *area)

{

    /* Slot 0 is reserved for uretprobe trampoline. */

    slot++;

    if (unlikely(slot >= area->nslots))

        slot = 1;

    return slot;

}



/*

 * Return the slot number of the least-recently-used slot in the

 * neighborhood of area->next_slot.  Limit the number of slots we test

 * to keep it fast.  Nobody dies if this isn't the best choice.

 */

‌static int uprobe_lru_insn_slot(struct uprobe_ssol_area *area)

{

‌#define MAX_LRU_TESTS 10

    struct uprobe_ssol_slot *s;

    int lru_slot = -1;

    unsigned long lru_time = ULONG_MAX;

    int nr_lru_tests = 0;

    int slot = area->next_slot;

    do {

        s = &area->slots[slot];

        if (likely(s->state == SSOL_ASSIGNED)) {

            if( lru_time > s->last_used) {

                lru_time = s->last_used;

                lru_slot = slot;

            }

            if (++nr_lru_tests >= MAX_LRU_TESTS)

                break;

        }

        slot = advance_slot(slot, area);

    } while (slot != area->next_slot);



    if (unlikely(lru_slot < 0))

        /* All slots are in the act of being stolen.  Join the melee. */

        return area->next_slot;

    else

        return lru_slot;

}



/*

 * Choose an instruction slot and take it.  Choose a free slot if there is one.

 * Otherwise choose the least-recently-used slot.  Returns with slot

 * read-locked and containing the desired instruction.  Runs with

 * ppt->slot_mutex locked.

 */

static struct uprobe_ssol_slot

‌        *uprobe_take_insn_slot(struct uprobe_probept *ppt)

{

    struct uprobe_process *uproc = ppt->uproc;

    struct uprobe_ssol_area *area = &uproc->ssol_area;

    struct uprobe_ssol_slot *s;

    int len, slot;

    unsigned long flags;



    spin_lock_irqsave(&area->lock, flags);



    if (area->nfree) {

        for (slot = 0; slot < area->nslots; slot++) {

            if (area->slots[slot].state == SSOL_FREE) {

                area->nfree--;

                goto found_slot;

            }

        }

        /* Shouldn't get here.  Fix nfree and get on with life. */

        area->nfree = 0;

    }

    slot = uprobe_lru_insn_slot(area);



found_slot:

    area->next_slot = advance_slot(slot, area);

    s = &area->slots[slot];

    s->state = SSOL_BEING_STOLEN;



    spin_unlock_irqrestore(&area->lock, flags);



    /* Wait for current users of slot to finish. */

    down_write(&s->rwsem);

    ppt->slot = s;

    s->owner = ppt;

    s->last_used = jiffies;

    s->state = SSOL_ASSIGNED;

    /* Copy the original instruction to the chosen slot. */

    mutex_lock(&area->populate_mutex);

    len = access_process_vm(current, (unsigned long)s->insn,

                     ppt->insn, MAX_UINSN_BYTES, 1);

    mutex_unlock(&area->populate_mutex);

        if (unlikely(len < MAX_UINSN_BYTES)) {

        up_write(&s->rwsem);

        printk(KERN_ERR "Failed to copy instruction at %#lx"

            " to SSOL area (%#lx)\n", ppt->vaddr,

            (unsigned long) area->slots);

        return NULL;

    }

    /* Let other threads single-step in this slot. */

    downgrade_write(&s->rwsem);

    return s;

}



/* ppt doesn't own a slot.  Get one for ppt, and return it read-locked. */

static struct uprobe_ssol_slot

‌        *uprobe_find_insn_slot(struct uprobe_probept *ppt)

{

    struct uprobe_ssol_slot *slot;



    mutex_lock(&ppt->slot_mutex);

    slot = ppt->slot;

    if (unlikely(slot && slot->owner == ppt)) {

        /* Looks like another thread snuck in and got a slot for us. */

        down_read(&slot->rwsem);

        if (likely(slot->owner == ppt)) {

            slot->last_used = jiffies;

            mutex_unlock(&ppt->slot_mutex);

            return slot;

        }

        /* ... but then somebody stole it. */

        up_read(&slot->rwsem);

    }

    slot = uprobe_take_insn_slot(ppt);

    mutex_unlock(&ppt->slot_mutex);

    return slot;

}



/*

 * Ensure that ppt owns an instruction slot for single-stepping.

 * Returns with the slot read-locked and ppt->slot pointing at it.

 */

static

‌struct uprobe_ssol_slot *uprobe_get_insn_slot(struct uprobe_probept *ppt)

{

    struct uprobe_ssol_slot *slot;



retry:

    slot = ppt->slot;

    if (unlikely(!slot))

        return uprobe_find_insn_slot(ppt);



    down_read(&slot->rwsem);

    if (unlikely(slot != ppt->slot)) {

        up_read(&slot->rwsem);

        goto retry;

    }

    if (unlikely(slot->owner != ppt)) {

        up_read(&slot->rwsem);

        return uprobe_find_insn_slot(ppt);

    }

    slot->last_used = jiffies;

    return slot;

}



/*

 * utrace engine report callbacks

 */



/*

 * We've been asked to quiesce, but aren't in a position to do so.

 * This could happen in either of the following cases:

 *

 * 1) Our own thread is doing a register or unregister operation --

 * e.g., as called from a u[ret]probe handler or a non-uprobes utrace

 * callback.  We can't wait_event() for ourselves in [un]register_uprobe().

 *

 * 2) We've been asked to quiesce, but we hit a probepoint first.  Now

 * we're in the report_signal callback, having handled the probepoint.

 * We'd like to just set the UTRACE_ACTION_QUIESCE and

 * UTRACE_EVENT(QUIESCE) flags and coast into quiescence.  Unfortunately,

 * it's possible to hit a probepoint again before we quiesce.  When

 * processing the SIGTRAP, utrace would call uprobe_report_quiesce(),

 * which must decline to take any action so as to avoid removing the

 * uprobe just hit.  As a result, we could keep hitting breakpoints

 * and never quiescing.

 *

 * So here we do essentially what we'd prefer to do in uprobe_report_quiesce().

 * If we're the last thread to quiesce, handle_pending_uprobes() and

 * rouse_all_threads().  Otherwise, pretend we're quiescent and sleep until

 * the last quiescent thread handles that stuff and then wakes us.

 *

 * Called and returns with no mutexes held.  Returns 1 if we free utask->uproc,

 * else 0.

 */

‌static int utask_fake_quiesce(struct uprobe_task *utask)

{

    struct uprobe_process *uproc = utask->uproc;

    enum uprobe_task_state prev_state = utask->state;



    down_write(&uproc->rwsem);



    /* In case we're somehow set to quiesce for real... */

    clear_utrace_quiesce(utask);



    if (uproc->n_quiescent_threads == uproc->nthreads-1) {

        /* We're the last thread to "quiesce." */

        handle_pending_uprobes(uproc, utask->tsk);

        rouse_all_threads(uproc);

        up_write(&uproc->rwsem);

        return 0;

    } else {

        utask->state = UPTASK_SLEEPING;

        uproc->n_quiescent_threads++;

        up_write(&uproc->rwsem);

        /* We ref-count sleepers. */

        uprobe_get_process(uproc);



        wait_event(uproc->waitq, !utask->quiescing);



        down_write(&uproc->rwsem);

        utask->state = prev_state;

        uproc->n_quiescent_threads--;

        up_write(&uproc->rwsem);



        /*

         * If uproc's last uprobe has been unregistered, and

         * unregister_uprobe() woke up before we did, it's up

         * to us to free uproc.

         */

        return uprobe_put_process(uproc);

    }

}



/* Prepare to single-step ppt's probed instruction inline. */

‌static inline void uprobe_pre_ssin(struct uprobe_task *utask,

    struct uprobe_probept *ppt, struct pt_regs *regs)

{

    int len;

    arch_reset_ip_for_sstep(regs);

    mutex_lock(&ppt->ssil_mutex);

    len = set_orig_insn(ppt, utask->tsk);

    if (unlikely(len != BP_INSN_SIZE)) {

        printk("Failed to temporarily restore original "

            "instruction for single-stepping: "

            "pid/tgid=%d/%d, vaddr=%#lx\n",

            utask->tsk->pid, utask->tsk->tgid, ppt->vaddr);

        utask->doomed = 1;

    }

}



/* Prepare to continue execution after single-stepping inline. */

‌static inline void uprobe_post_ssin(struct uprobe_task *utask,

    struct uprobe_probept *ppt)

{



    int len = set_bp(ppt, utask->tsk);

    if (unlikely(len != BP_INSN_SIZE)) {

        printk("Couldn't restore bp: pid/tgid=%d/%d, addr=%#lx\n",

            utask->tsk->pid, utask->tsk->tgid, ppt->vaddr);

        ppt->state = UPROBE_DISABLED;

    }

    mutex_unlock(&ppt->ssil_mutex);

}



/* uprobe_pre_ssout() and uprobe_post_ssout() are architecture-specific. */



/*

 * Delay delivery of the indicated signal until after single-step.

 * Otherwise single-stepping will be cancelled as part of calling

 * the signal handler.

 */

‌static u32 uprobe_delay_signal(struct uprobe_task *utask, siginfo_t *info)

{

    struct delayed_signal *ds = kmalloc(sizeof(*ds), GFP_USER);

    if (ds) {

        ds->info = *info;

        INIT_LIST_HEAD(&ds->list);

        list_add_tail(&ds->list, &utask->delayed_signals);

    }

    return UTRACE_ACTION_HIDE | UTRACE_SIGNAL_IGN |

            UTRACE_ACTION_SINGLESTEP | UTRACE_ACTION_NEWSTATE;

}



‌static void uprobe_inject_delayed_signals(struct list_head *delayed_signals)

{

    struct delayed_signal *ds, *tmp;

    list_for_each_entry_safe(ds, tmp, delayed_signals, list) {

        send_sig_info(ds->info.si_signo, &ds->info, current);

        list_del(&ds->list);

        kfree(ds);

    }

}



/*

 * Signal callback:

 *

 * We get called here with:

 *    state = UPTASK_RUNNING => we are here due to a breakpoint hit

 *        - Read-lock the process

 *        - Figure out which probepoint, based on regs->IP

 *        - Set state = UPTASK_BP_HIT

 *        - Reset regs->IP to beginning of the insn, if necessary

 *        - Invoke handler for each uprobe at this probepoint

 *        - Set singlestep in motion (UTRACE_ACTION_SINGLESTEP),

 *            with state = UPTASK_SSTEP

 *

 *    state = UPTASK_SSTEP => here after single-stepping

 *        - Validate we are here per the state machine

 *        - Clean up after single-stepping

 *        - Set state = UPTASK_RUNNING

 *        - Read-unlock the process

 *        - If it's time to quiesce, take appropriate action.

 *        - If the handler(s) we ran called [un]register_uprobe(),

 *            complete those via uprobe_run_def_regs().

 *

 *    state = ANY OTHER STATE

 *        - Not our signal, pass it on (UTRACE_ACTION_RESUME)

 * Note: Intermediate states such as UPTASK_POST_SSTEP help

 * uprobe_report_exit() decide what to unlock if we die.

 */

‌static u32 uprobe_report_signal(struct utrace_attached_engine *engine,

        struct task_struct *tsk, struct pt_regs *regs, u32 action,

        siginfo_t *info, const struct k_sigaction *orig_ka,

        struct k_sigaction *return_ka)

{

    struct uprobe_task *utask;

    struct uprobe_probept *ppt;

    struct uprobe_process *uproc;

    struct uprobe_kimg *uk;

    u32 ret;

    unsigned long probept;

    int hit_uretprobe_trampoline = 0;

    int registrations_deferred = 0;

    int uproc_freed = 0;

    struct list_head delayed_signals;



    utask = (struct uprobe_task *)rcu_dereference(engine->data);

    BUG_ON(!utask);



    /*

     * info will be null if we're called with action=UTRACE_SIGNAL_HANDLER,

     * which means that single-stepping has been disabled so a signal

     * handler can be called in the probed process.  That should never

     * happen because we intercept and delay handled signals (action =

     * UTRACE_ACTION_RESUME) until after we're done single-stepping.

     */

    BUG_ON(!info);

    if (action == UTRACE_ACTION_RESUME && utask->active_probe &&

                    info->si_signo != SSTEP_SIGNAL)

        return uprobe_delay_signal(utask, info);



    if (info->si_signo != BREAKPOINT_SIGNAL &&

                    info->si_signo != SSTEP_SIGNAL)

        goto no_interest;



    /*

     * Set up the SSOL area if it's not already there.  We do this

     * here because we have to do it before handling the first

     * probepoint hit, the probed process has to do it, and this may

     * be the first time our probed process runs uprobes code.

     *

     * We need the SSOL area for the uretprobe trampoline even if

     * this architectures doesn't single-step out of line.

     */

    uproc = utask->uproc;

#ifdef CONFIG_UPROBES_SSOL

    if (uproc->sstep_out_of_line &&

            unlikely(IS_ERR(uprobe_verify_ssol(uproc))))

        uproc->sstep_out_of_line = 0;

#elif defined(CONFIG_URETPROBES)

    (void) uprobe_verify_ssol(uproc);

#endif



    switch (utask->state) {

    case UPTASK_RUNNING:

        if (info->si_signo != BREAKPOINT_SIGNAL)

            goto no_interest;

        down_read(&uproc->rwsem);

        clear_utrace_quiesce(utask);

        probept = arch_get_probept(regs);



        hit_uretprobe_trampoline = (probept == (unsigned long)

            uproc->uretprobe_trampoline_addr);

        if (hit_uretprobe_trampoline) {

            uretprobe_handle_return(regs, utask);

            goto bkpt_done;

        }



        ppt = uprobe_find_probept(uproc, probept);

        if (!ppt) {

            up_read(&uproc->rwsem);

            goto no_interest;

        }

        utask->active_probe = ppt;

        utask->state = UPTASK_BP_HIT;



        if (likely(ppt->state == UPROBE_BP_SET)) {

            list_for_each_entry(uk, &ppt->uprobe_list, list) {

                struct uprobe *u = uk->uprobe;

                if (is_uretprobe(u))

                    uretprobe_handle_entry(u, regs, utask);

                else if (u->handler)

                    u->handler(u, regs);

            }

        }



        if (uprobe_emulate_insn(regs, ppt))

            goto bkpt_done;



        utask->state = UPTASK_PRE_SSTEP;

#ifdef CONFIG_UPROBES_SSOL

        if (uproc->sstep_out_of_line)

            uprobe_pre_ssout(utask, ppt, regs);

        else

#endif

            uprobe_pre_ssin(utask, ppt, regs);

        if (unlikely(utask->doomed))

            do_exit(SIGSEGV);

        utask->state = UPTASK_SSTEP;

        /*

         * No other engines must see this signal, and the

         * signal shouldn't be passed on either.

         */

        ret = UTRACE_ACTION_HIDE | UTRACE_SIGNAL_IGN |

            UTRACE_ACTION_SINGLESTEP | UTRACE_ACTION_NEWSTATE;

        break;

    case UPTASK_SSTEP:

        if (info->si_signo != SSTEP_SIGNAL)

            goto no_interest;

        ppt = utask->active_probe;

        BUG_ON(!ppt);

        utask->state = UPTASK_POST_SSTEP;

#ifdef CONFIG_UPROBES_SSOL

        if (uproc->sstep_out_of_line)

            uprobe_post_ssout(utask, ppt, regs);

        else

#endif

            uprobe_post_ssin(utask, ppt);

bkpt_done:

        /* Note: Can come here after running uretprobe handlers */

        if (unlikely(utask->doomed))

            do_exit(SIGSEGV);



        utask->active_probe = NULL;



        if (!list_empty(&utask->deferred_registrations)) {

            /*

             * Make sure utask doesn't go away before we run

             * the deferred registrations.  This also keeps

             * the module from getting unloaded before we're

             * ready.

             */

            registrations_deferred = 1;

            uprobe_get_process(uproc);

        }



        /*

         * Delayed signals are a little different.  We want

         * them delivered even if all the probes get unregistered

         * and uproc and utask go away.  So disconnect the list

         * from utask and make it a local list.

         */

        INIT_LIST_HEAD(&delayed_signals);

        list_splice_init(&utask->delayed_signals, &delayed_signals);



        ret = UTRACE_ACTION_HIDE | UTRACE_SIGNAL_IGN

            | UTRACE_ACTION_NEWSTATE;

        utask->state = UPTASK_RUNNING;

        if (utask->quiescing) {

            up_read(&uproc->rwsem);

            uproc_freed |= utask_fake_quiesce(utask);

        } else

            up_read(&uproc->rwsem);



        if (hit_uretprobe_trampoline)

            /*

             * It's possible that the uretprobe_instance

             * we just recycled was the last reason for

             * keeping uproc around.

             */

            uproc_freed |= uprobe_put_process(uproc);



        if (registrations_deferred) {

            uprobe_run_def_regs(&utask->deferred_registrations);

            uproc_freed |= uprobe_put_process(uproc);

        }



        uprobe_inject_delayed_signals(&delayed_signals);



        if (uproc_freed)

            ret |= UTRACE_ACTION_DETACH;

        break;

    default:

        goto no_interest;

    }

    return ret;



no_interest:

    return UTRACE_ACTION_RESUME;

}



/*

 * utask_quiesce_pending_sigtrap: The utask entered the quiesce callback

 * through the signal delivery path, apparently. Check if the associated

 * signal happened due to a uprobe hit.

 *

 * Called with utask->uproc write-locked.  Returns 1 if quiesce was

 * entered with SIGTRAP pending due to a uprobe hit.

 */

‌static int utask_quiesce_pending_sigtrap(struct uprobe_task *utask)

{

    const struct utrace_regset_view *view;

    const struct utrace_regset *regset;

    struct uprobe_probept *ppt;

    unsigned long insn_ptr;



    view = utrace_native_view(utask->tsk);

    regset = utrace_regset(utask->tsk, utask->engine, view, 0);

    if (unlikely(regset == NULL))

        return -EIO;



    if ((*regset->get)(utask->tsk, regset,

            SLOT_IP(utask->tsk) * regset->size,

            regset->size, &insn_ptr, NULL) != 0)

        return -EIO;



    if (regset->size != sizeof(insn_ptr)) {

        /* Assume 32-bit app and 64-bit kernel. */

        u32 *insn_ptr32 = (u32*) &insn_ptr;

        BUG_ON(regset->size != sizeof(u32));

        insn_ptr = *insn_ptr32;

    }



    ppt = uprobe_find_probept(utask->uproc, ARCH_BP_INST_PTR(insn_ptr));

    return (ppt != NULL);

}



/*

 * Quiesce callback: The associated process has one or more breakpoint

 * insertions or removals pending.  If we're the last thread in this

 * process to quiesce, do the insertion(s) and/or removal(s).

 */

‌static u32 uprobe_report_quiesce(struct utrace_attached_engine *engine,

        struct task_struct *tsk)

{

    struct uprobe_task *utask;

    struct uprobe_process *uproc;



    rcu_read_lock();

    utask = (struct uprobe_task *)rcu_dereference(engine->data);

    BUG_ON(!utask);

    uproc = uprobe_get_process(utask->uproc);

    rcu_read_unlock();



    if (!uproc)

        return UTRACE_ACTION_DETACH|UTRACE_ACTION_RESUME;



    if (current == utask->quiesce_master) {

        /*

         * tsk was already quiescent when quiesce_all_threads()

         * called utrace_set_flags(), which in turned called

         * here.  uproc is already locked.  Do as little as possible

         * and get out.

         */

        utask->state = UPTASK_QUIESCENT;

        uproc->n_quiescent_threads++;

        return UTRACE_ACTION_RESUME;

    }



    BUG_ON(utask->active_probe);



    down_write(&uproc->rwsem);



        /* printk(KERN_INFO "uprobe_report_quiesce2 %p %ld=%ld\n", uproc, uproc->tgid, current->pid); */



    /*

     * When a thread hits a breakpoint or single-steps, utrace calls

     * this quiesce callback before our signal callback.  We must

     * let uprobe_report_signal() handle the uprobe hit and THEN

     * quiesce, because (a) there's a chance that we're quiescing

     * in order to remove that very uprobe, and (b) there's a tiny

     * chance that even though that uprobe isn't marked for removal

     * now, it may be before all threads manage to quiesce.

     */

    if (!utask->quiescing || utask_quiesce_pending_sigtrap(utask) == 1) {

        clear_utrace_quiesce(utask);

        goto done;

    }



    utask->state = UPTASK_QUIESCENT;

    uproc->n_quiescent_threads++;

    check_uproc_quiesced(uproc, tsk);

done:

    up_write(&uproc->rwsem);

    uprobe_put_process(uproc);

        /* printk(KERN_INFO "uprobe_report_quiesce3 %p %ld=%ld\n", uproc, uproc->tgid, current->pid); */

    return UTRACE_ACTION_RESUME;

}



/*

 * uproc's process is exiting or exec-ing, so zap all the (now irrelevant)

 * probepoints and uretprobe_instances.  Runs with uproc->rwsem write-locked.

 * Caller must ref-count uproc before calling this function, to ensure that

 * uproc doesn't get freed in the middle of this.

 */

‌static void uprobe_cleanup_process(struct uprobe_process *uproc)

{

    int i;

    struct uprobe_probept *ppt;

    struct hlist_node *pnode1, *pnode2;

    struct hlist_head *head;

    struct uprobe_kimg *uk, *unode;

    struct uprobe_task *utask;



    uproc->finished = 1;



    for (i = 0; i < UPROBE_TABLE_SIZE; i++) {

        head = &uproc->uprobe_table[i];

        hlist_for_each_entry_safe(ppt, pnode1, pnode2, head, ut_node) {

            if (ppt->state == UPROBE_INSERTING ||

                    ppt->state == UPROBE_REMOVING) {

                /*

                 * This task is (exec/exit)ing with

                 * a [un]register_uprobe pending.

                 * [un]register_uprobe will free ppt.

                 */

                ppt->state = UPROBE_DISABLED;

                list_del(&ppt->pd_node);

                list_for_each_entry_safe(uk, unode,

                           &ppt->uprobe_list, list)

                    uk->status = -ESRCH;

                wake_up_all(&ppt->waitq);

            } else if (ppt->state == UPROBE_BP_SET) {

                list_for_each_entry_safe(uk, unode,

                           &ppt->uprobe_list, list) {

                    list_del(&uk->list);

                    uprobe_free_kimg(uk);

                }

                uprobe_free_probept(ppt);

            /* else */

                /*

                 * If ppt is UPROBE_DISABLED, assume that

                 * [un]register_uprobe() has been notified

                 * and will free it soon.

                 */

            }

        }

    }



    /*

     * Free uretprobe_instances.  This is a nop on exit, since all

     * the uprobe_tasks are already gone.  We do this here on exec

     * (as opposed to letting uprobe_free_process() take care of it)

     * because uprobe_free_process() never gets called if we don't

     * tick down the ref count here (PR #7082).

     */

    list_for_each_entry(utask, &uproc->thread_list, list)

        utask_free_uretprobe_instances(utask);

}



/*

 * Exit callback: The associated task/thread is exiting.

 */

‌static u32 uprobe_report_exit(struct utrace_attached_engine *engine,

        struct task_struct *tsk, long orig_code, long *code)

{

    struct uprobe_task *utask;

    struct uprobe_process *uproc = NULL;

    struct uprobe_probept *ppt;

    int utask_quiescing;



    utask = (struct uprobe_task *)rcu_dereference(engine->data);

    if (utask)

        uproc = uprobe_get_process(utask->uproc);



    if (!utask || !uproc)

        /* uprobe_free_process() has probably clobbered utask->proc. */

        return UTRACE_ACTION_DETACH;



        /* printk(KERN_INFO "uprobe_report_exit %p %ld=%ld\n", uproc, uproc->tgid, current->pid); */



    ppt = utask->active_probe;

    if (ppt) {

        if (utask->state == UPTASK_TRAMPOLINE_HIT)

            printk(KERN_WARNING "Task died during uretprobe return:"

                "  pid/tgid = %d/%d\n", tsk->pid, tsk->tgid);

        else

            printk(KERN_WARNING "Task died at uprobe probepoint:"

                "  pid/tgid = %d/%d, probepoint = %#lx\n",

                tsk->pid, tsk->tgid, ppt->vaddr);

        /* Mutex cleanup depends on where we died and SSOL vs. SSIL. */

        if (uproc->sstep_out_of_line) {

            if (utask->state == UPTASK_SSTEP

                    && ppt->slot && ppt->slot->owner == ppt)

                up_read(&ppt->slot->rwsem);

        } else {

            switch (utask->state) {

            case UPTASK_PRE_SSTEP:

            case UPTASK_SSTEP:

            case UPTASK_POST_SSTEP:

                mutex_unlock(&ppt->ssil_mutex);

                break;

            default:

                break;

            }

        }

        up_read(&uproc->rwsem);

        if (utask->state == UPTASK_TRAMPOLINE_HIT)

            uprobe_decref_process(uproc);

    }



    down_write(&uproc->rwsem);



        /* printk(KERN_INFO "uprobe_report_exit2 %p %ld=%ld\n", uproc, uproc->tgid, current->pid); */



    utask_quiescing = utask->quiescing;

    uprobe_free_task(utask);



    uproc->nthreads--;

    if (uproc->nthreads) {

        if (utask_quiescing)

            /*

             * In case other threads are waiting for

             * us to quiesce...

             */

            check_uproc_quiesced(uproc,

                       find_surviving_thread(uproc));

    } else {

        /*

         * We were the last remaining thread - clean up the uprobe

         * remnants a la unregister_uprobe(). We don't have to

         * remove the breakpoints, though.

         */

        uprobe_cleanup_process(uproc);

    }

    up_write(&uproc->rwsem);

        /* printk(KERN_INFO "uprobe_report_exit3 %p %ld=%ld\n", uproc, uproc->tgid, current->pid); */



    uprobe_put_process(uproc);



    return UTRACE_ACTION_DETACH;

}



/*

 * Duplicate the FIFO of uretprobe_instances from parent_utask into

 * child_utask.  Zap the uretprobe pointer, since all we care about is

 * vectoring to the proper return address.  Where there are multiple

 * uretprobe_instances for the same function instance, copy only the

 * one that contains the real return address.

 */

‌static int uprobe_fork_uretprobe_instances(struct uprobe_task *parent_utask,

                    struct uprobe_task *child_utask)

{

    struct uprobe_process *parent_uproc = parent_utask->uproc;

    struct uprobe_process *child_uproc = child_utask->uproc;

    __user uprobe_opcode_t *trampoline_addr =

                child_uproc->uretprobe_trampoline_addr;

    struct hlist_node *tmp, *tail;

    struct uretprobe_instance *pri, *cri;



    BUG_ON(trampoline_addr != parent_uproc->uretprobe_trampoline_addr);



    /* Since there's no hlist_add_tail()... */

    tail = NULL;

    hlist_for_each_entry(pri, tmp, &parent_utask->uretprobe_instances,

                                hlist) {

        if (pri->ret_addr == (unsigned long) trampoline_addr)

            continue;

        cri = kmalloc(sizeof(*cri), GFP_USER);

        if (!cri)

            return -ENOMEM;

        cri->rp = NULL;

        cri->ret_addr = pri->ret_addr;

        cri->sp = pri->sp;

        INIT_HLIST_NODE(&cri->hlist);

        if (tail)

            hlist_add_after(tail, &cri->hlist);

        else

            hlist_add_head(&cri->hlist,

                &child_utask->uretprobe_instances);

        tail = &cri->hlist;



        /* Ref-count uretprobe_instances. */

        uprobe_get_process(child_uproc);

    }

    BUG_ON(hlist_empty(&child_utask->uretprobe_instances));

    return 0;

}



/*

 * A probed process is forking, and at least one function in the

 * call stack has a uretprobe on it.  Since the child inherits the

 * call stack, it's possible that the child could attempt to return

 * through the uretprobe trampoline.  Create a uprobe_process for

 * the child, initialize its SSOL vma (which has been cloned from

 * the parent), and clone the parent's list of uretprobe_instances.

 *

 * Called with uproc_table locked and parent_uproc->rwsem write-locked.

 *

 * (On architectures where it's easy to keep track of where in the

 * stack the return addresses are stored, we could just poke the real

 * return addresses back into the child's stack.  We use this more

 * general solution.)

 */

‌static int uprobe_fork_uproc(struct uprobe_process *parent_uproc,

                struct uprobe_task *parent_utask,

                struct task_struct *child_tsk)

{

    int ret = 0;

    struct uprobe_process *child_uproc;

    struct uprobe_task *child_utask;



    BUG_ON(!parent_uproc->uretprobe_trampoline_addr ||

            IS_ERR(parent_uproc->uretprobe_trampoline_addr));



    ret = uprobe_validate_vma(child_tsk,

            (unsigned long) parent_uproc->ssol_area.insn_area);

    if (ret) {

        int ret2;

        printk(KERN_ERR "uprobes: Child %d failed to inherit"

            " parent %d's SSOL vma at %p.  Error = %d\n",

            child_tsk->pid, parent_utask->tsk->pid,

            parent_uproc->ssol_area.insn_area, ret);

        ret2 = uprobe_validate_vma(parent_utask->tsk,

            (unsigned long) parent_uproc->ssol_area.insn_area);

        if (ret2 != 0)

            printk(KERN_ERR "uprobes: Parent %d's SSOL vma"

                " is no longer valid.  Error = %d\n",

                parent_utask->tsk->pid, ret2);

        return ret;

    }



    if (!try_module_get(THIS_MODULE))

        return -ENOSYS;

    child_uproc = uprobe_mk_process(child_tsk, 1);

    if (IS_ERR(child_uproc)) {

        ret = (int) PTR_ERR(child_uproc);

        module_put(THIS_MODULE);

        return ret;

    }



    mutex_lock(&child_uproc->ssol_area.setup_mutex);

    uprobe_init_ssol(child_uproc, child_tsk,

                parent_uproc->ssol_area.insn_area);

    child_uproc->ssol_area.initialized = 1;

    mutex_unlock(&child_uproc->ssol_area.setup_mutex);



    child_utask = uprobe_find_utask(child_tsk);

    BUG_ON(!child_utask);

    ret = uprobe_fork_uretprobe_instances(parent_utask, child_utask);



    hlist_add_head(&child_uproc->hlist,

            &uproc_table[hash_long(child_uproc->tgid,

            UPROBE_HASH_BITS)]);



    uprobe_decref_process(child_uproc);

    return ret;

}



/*

 * Clone callback: The current task has spawned a thread/process.

 *

 * NOTE: For now, we don't pass on uprobes from the parent to the

 * child. We now do the necessary clearing of breakpoints in the

 * child's address space.

 *

 * TODO:

 *    - Provide option for child to inherit uprobes.

 */

‌static u32 uprobe_report_clone(struct utrace_attached_engine *engine,

        struct task_struct *parent, unsigned long clone_flags,

        struct task_struct *child)

{

    int len;

    struct uprobe_process *uproc;

    struct uprobe_task *ptask, *ctask;



    ptask = (struct uprobe_task *)rcu_dereference(engine->data);

    uproc = ptask->uproc;



        /* printk(KERN_INFO "uprobe_report_clone %p %ld=%ld\n", uproc, uproc->tgid, current->pid); */

    /*

     * Lock uproc so no new uprobes can be installed 'til all

     * report_clone activities are completed.  Lock uproc_table

     * in case we have to run uprobe_fork_uproc().

     */

    lock_uproc_table();

    down_write(&uproc->rwsem);

    get_task_struct(child);



        /* printk(KERN_INFO "uprobe_report_clone2 %p %ld=%ld\n", uproc, uproc->tgid, current->pid); */



    if (clone_flags & (CLONE_THREAD|CLONE_VM)) {

        /* New thread in the same process (CLONE_THREAD) or

         * processes sharing the same memory space (CLONE_VM). */

        ctask = uprobe_add_task(child, uproc);

        BUG_ON(!ctask);

        if (IS_ERR(ctask))

            goto done;

        uproc->nthreads++;

        /*

         * FIXME: Handle the case where uproc is quiescing

         * (assuming it's possible to clone while quiescing).

         */

    } else {

        /*

         * New process spawned by parent.  Remove the probepoints

         * in the child's text.

         *

         * Its not necessary to quiesce the child as we are assured

         * by utrace that this callback happens *before* the child

         * gets to run userspace.

         *

         * We also hold the uproc->rwsem for the parent - so no

         * new uprobes will be registered 'til we return.

         */

        int i;

        struct uprobe_probept *ppt;

        struct hlist_node *node;

        struct hlist_head *head;



        for (i = 0; i < UPROBE_TABLE_SIZE; i++) {

            head = &uproc->uprobe_table[i];

            hlist_for_each_entry(ppt, node, head, ut_node) {

                len = set_orig_insn(ppt, child);

                if (len != BP_INSN_SIZE) {

                    /* Ratelimit this? */

                    printk(KERN_ERR "Pid %d forked %d;"

                        " failed to remove probepoint"

                        " at %#lx in child\n",

                        parent->pid, child->pid,

                        ppt->vaddr);

                }

            }

        }



        if (!hlist_empty(&ptask->uretprobe_instances))

            (void) uprobe_fork_uproc(uproc, ptask, child);

    }



done:

    put_task_struct(child);

    up_write(&uproc->rwsem);

    unlock_uproc_table();

    return UTRACE_ACTION_RESUME;

}



/*

 * Exec callback: The associated process called execve() or friends

 *

 * The new program is about to start running and so there is no

 * possibility of a uprobe from the previous user address space

 * to be hit.

 *

 * NOTE:

 *    Typically, this process would have passed through the clone

 *    callback, where the necessary action *should* have been

 *    taken. However, if we still end up at this callback:

 *        - We don't have to clear the uprobes - memory image

 *          will be overlaid.

 *        - We have to free up uprobe resources associated with

 *          this process.

 */

‌static u32 uprobe_report_exec(struct utrace_attached_engine *engine,

        struct task_struct *tsk, const struct linux_binprm *bprm,

        struct pt_regs *regs)

{

    struct uprobe_process *uproc = NULL;

    struct uprobe_task *utask;

    u32 ret = UTRACE_ACTION_RESUME;



    utask = (struct uprobe_task *)rcu_dereference(engine->data);

    if (utask)

        uproc = uprobe_get_process(utask->uproc);



    if (!utask || !uproc)

        /* uprobe_free_process() has probably clobbered utask->proc. */

        return UTRACE_ACTION_DETACH;



        /* printk(KERN_INFO "uprobe_report_exec %p %ld=%ld\n", uproc, uproc->tgid, current->pid); */



    /*

     * Only cleanup if we're the last thread.  If we aren't,

     * uprobe_report_exit() will handle cleanup.

     *

     * One instance of this can happen if vfork() was called,

     * creating 2 tasks that share the same memory space

     * (CLONE_VFORK|CLONE_VM).  In this case we don't want to

     * remove the probepoints from the child, since that would

     * also remove them from the parent.  Instead, just detach

     * as if this were a simple thread exit.

     */

    down_write(&uproc->rwsem);

    if (uproc->nthreads == 1) {

        uprobe_cleanup_process(uproc);

        /*

         * If [un]register_uprobe() is in progress, cancel the

         * quiesce.  Otherwise, utrace_report_exec() might

         * call uprobe_report_exec() while the

         * [un]register_uprobe thread is freeing the uproc.

         */

        clear_utrace_quiesce(utask);

        } else {

                uprobe_free_task(utask);

                uproc->nthreads--;

                ret = UTRACE_ACTION_DETACH;

    }

    up_write(&uproc->rwsem);

        /* printk(KERN_INFO "uprobe_report_exec2 %p %ld=%ld\n", uproc, uproc->tgid, current->pid); */



    /* If any [un]register_uprobe is pending, it'll clean up. */

    if (uprobe_put_process(uproc))

        ret = UTRACE_ACTION_DETACH;



        /* printk(KERN_INFO "uprobe_report_exec4 %p %ld=%ld ret=%lu\n", uproc, uproc->tgid, current->pid, (unsigned long)ret); */



    return ret;

}



‌static const struct utrace_engine_ops uprobe_utrace_ops =

{

    .report_quiesce = uprobe_report_quiesce,

    .report_signal = uprobe_report_signal,

    .report_exit = uprobe_report_exit,

    .report_clone = uprobe_report_clone,

    .report_exec = uprobe_report_exec

};



‌static int __init init_uprobes(void)

{

    int i;



    for (i = 0; i < UPROBE_TABLE_SIZE; i++) {

        INIT_HLIST_HEAD(&uproc_table[i]);

        INIT_HLIST_HEAD(&utask_table[i]);

    }



    p_uprobe_utrace_ops = &uprobe_utrace_ops;

    return 0;

}



‌static void __exit exit_uprobes(void)

{

}



‌module_init(init_uprobes);

‌module_exit(exit_uprobes);



#ifdef CONFIG_URETPROBES



/* Returns true if ri_sp lies outside the stack (beyond cursp). */

‌static inline bool compare_stack_ptrs(unsigned long cursp,

        unsigned long ri_sp)

{

#ifdef CONFIG_STACK_GROWSUP

    if (cursp < ri_sp)

        return true;

#else

    if (cursp > ri_sp)

        return true;

#endif

    return false;

}



/*

 * A longjmp may cause one or more uretprobed functions to terminate without

 * returning.  Those functions' uretprobe_instances need to be recycled.

 * We detect this when any uretprobed function is subsequently called

 * or returns.  A bypassed uretprobe_instance's stack_ptr is beyond the

 * current stack.

 */

‌static inline void uretprobe_bypass_instances(unsigned long cursp,

                struct uprobe_task *utask)

{

    struct hlist_node *r1, *r2;

    struct uretprobe_instance *ri;

    struct hlist_head *head = &utask->uretprobe_instances;



    hlist_for_each_entry_safe(ri, r1, r2, head, hlist) {

        if (compare_stack_ptrs(cursp, ri->sp)) {

            hlist_del(&ri->hlist);

            kfree(ri);

            uprobe_decref_process(utask->uproc);

        } else

            return;

    }

}



/* Called when the entry-point probe u is hit. */

‌static void uretprobe_handle_entry(struct uprobe *u, struct pt_regs *regs,

    struct uprobe_task *utask)

{

    struct uretprobe_instance *ri;

    unsigned long trampoline_addr;



    if (IS_ERR(utask->uproc->uretprobe_trampoline_addr))

        return;

    trampoline_addr = (unsigned long)

        utask->uproc->uretprobe_trampoline_addr;

    ri = (struct uretprobe_instance *)

        kmalloc(sizeof(struct uretprobe_instance), GFP_USER);

    if (!ri)

        return;

    ri->ret_addr = arch_hijack_uret_addr(trampoline_addr, regs, utask);

    if (likely(ri->ret_addr)) {

        ri->sp = arch_predict_sp_at_ret(regs, utask->tsk);

        uretprobe_bypass_instances(ri->sp, utask);

        ri->rp = container_of(u, struct uretprobe, u);

        INIT_HLIST_NODE(&ri->hlist);

        hlist_add_head(&ri->hlist, &utask->uretprobe_instances);

        /* We ref-count outstanding uretprobe_instances. */

        uprobe_get_process(utask->uproc);

    } else

        kfree(ri);

}



/*

 * For each uretprobe_instance pushed onto the LIFO for the function

 * instance that's now returning, call the handler, free the ri, and

 * decrement the uproc's ref count.  Caller ref-counts uproc, so we

 * should never hit zero in this function.

 *

 * Returns the original return address.

 *

 * TODO: Handle longjmp out of uretprobed function.

 */

‌static unsigned long uretprobe_run_handlers(struct uprobe_task *utask,

        struct pt_regs *regs, unsigned long trampoline_addr)

{

    unsigned long ret_addr, cur_sp;

    struct hlist_head *head = &utask->uretprobe_instances;

    struct uretprobe_instance *ri;

    struct hlist_node *r1, *r2;



    cur_sp = arch_get_cur_sp(regs);

    uretprobe_bypass_instances(cur_sp, utask);

    hlist_for_each_entry_safe(ri, r1, r2, head, hlist) {

        if (ri->rp && ri->rp->handler)

            ri->rp->handler(ri, regs);

        ret_addr = ri->ret_addr;

        hlist_del(&ri->hlist);

        kfree(ri);

        uprobe_decref_process(utask->uproc);

        if (ret_addr != trampoline_addr)

            /*

             * This is the first ri (chronologically) pushed for

             * this particular instance of the probed function.

             */

            return ret_addr;

    }

    printk(KERN_ERR "No uretprobe instance with original return address!"

        " pid/tgid=%d/%d", utask->tsk->pid, utask->tsk->tgid);

    utask->doomed = 1;

    return 0;

}



/* Called when the uretprobe trampoline is hit. */

‌static void uretprobe_handle_return(struct pt_regs *regs,

    struct uprobe_task *utask)

{

    unsigned long orig_ret_addr;

    /* Delay recycling of uproc until end of uprobe_report_signal() */

    uprobe_get_process(utask->uproc);

    utask->state = UPTASK_TRAMPOLINE_HIT;

    utask->active_probe = &uretprobe_trampoline_dummy_probe;

    orig_ret_addr = uretprobe_run_handlers(utask, regs,

        (unsigned long) utask->uproc->uretprobe_trampoline_addr);

    arch_restore_uret_addr(orig_ret_addr, regs);

}



‌int register_uretprobe(struct uretprobe *rp)

{

    if (!rp || !rp->handler)

        return -EINVAL;

    rp->u.handler = URETPROBE_HANDLE_ENTRY;

    return register_uprobe(&rp->u);

}

‌EXPORT_SYMBOL_GPL(register_uretprobe);



/*

 * The uretprobe containing u is being unregistered.  Its uretprobe_instances

 * have to hang around 'til their associated instances return (but we can't

 * run rp's handler).  Zap ri->rp for each one to indicate unregistration.

 *

 * Runs with uproc write-locked.

 */

‌static void zap_uretprobe_instances(struct uprobe *u,

        struct uprobe_process *uproc)

{

    struct uprobe_task *utask;

    struct uretprobe *rp = container_of(u, struct uretprobe, u);



    if (!uproc)

        return;



    list_for_each_entry(utask, &uproc->thread_list, list) {

        struct hlist_node *r;

        struct uretprobe_instance *ri;



        hlist_for_each_entry(ri, r, &utask->uretprobe_instances, hlist)

            if (ri->rp == rp)

                ri->rp = NULL;

    }

}



‌void unregister_uretprobe(struct uretprobe *rp)

{

    if (!rp)

        return;

    unregister_uprobe(&rp->u);

}

‌EXPORT_SYMBOL_GPL(unregister_uretprobe);



‌void unmap_uretprobe(struct uretprobe *rp)

{

    if (!rp)

        return;

    unmap_uprobe(&rp->u);

}

‌EXPORT_SYMBOL_GPL(unmap_uretprobe);



/*

 * uproc->ssol_area has been successfully set up.  Establish the

 * uretprobe trampoline in slot 0.

 */

‌static void uretprobe_set_trampoline(struct uprobe_process *uproc,

                    struct task_struct *tsk)

{

    uprobe_opcode_t bp_insn = BREAKPOINT_INSTRUCTION;

    struct uprobe_ssol_area *area = &uproc->ssol_area;

    struct uprobe_ssol_slot *slot = &area->slots[0];



    if (access_process_vm(tsk, (unsigned long) slot->insn,

            &bp_insn, BP_INSN_SIZE, 1) == BP_INSN_SIZE) {

        uproc->uretprobe_trampoline_addr = slot->insn;

        slot->state = SSOL_RESERVED;

        area->next_slot = 1;

        area->nfree--;

    } else {

        printk(KERN_ERR "uretprobes disabled for pid %d:"

            " cannot set uretprobe trampoline at %p\n",

            uproc->tgid, slot->insn);

    }

}



‌static inline unsigned long lookup_uretprobe(struct hlist_node *r,

                         struct uprobe_process *uproc,

                         unsigned long pc,

                         unsigned long sp)

{

    struct uretprobe_instance *ret_inst;

    unsigned long trampoline_addr;



    if (IS_ERR(uproc->uretprobe_trampoline_addr))

      return pc;

    trampoline_addr = (unsigned long)uproc->uretprobe_trampoline_addr;

    if (pc != trampoline_addr)

        return pc;

    hlist_for_each_entry_from(ret_inst, r, hlist) {

        if (ret_inst->ret_addr == trampoline_addr)

            continue;

        /* First handler with a stack pointer lower than the

           address (or equal) must be the one. */

        if (ret_inst->sp == sp || compare_stack_ptrs(ret_inst->sp, sp))

            return ret_inst->ret_addr;

    }

    printk(KERN_ERR "Original return address for trampoline not found at "

           "0x%lx pid/tgid=%d/%d\n", sp, current->pid, current->tgid);

    return 0;



}



‌unsigned long uprobe_get_pc(struct uretprobe_instance *ri, unsigned long pc,

            unsigned long sp)

{

    struct uretprobe *rp;

    struct uprobe_kimg *uk;

    struct uprobe_task *utask;

    struct uprobe_process *uproc;

    struct hlist_node *r;



    if (ri == GET_PC_URETPROBE_NONE) {

        utask = uprobe_find_utask(current);

        if (!utask)

            return 0;

        uproc = utask->uproc;

        r = utask->uretprobe_instances.first;

    } else {

        rp = ri->rp;

        uk = (struct uprobe_kimg *)rp->u.kdata;

        if (!uk)

            return 0;

        uproc = uk->ppt->uproc;

        r = &ri->hlist;

    }

    return lookup_uretprobe(r, uproc, pc, sp);

}



‌EXPORT_SYMBOL_GPL(uprobe_get_pc);



‌unsigned long uprobe_get_pc_task(struct task_struct *task, unsigned long pc,

                 unsigned long sp)

{

    struct uprobe_task *utask;

    struct uprobe_process *uproc;

    unsigned long result;



    utask = uprobe_find_utask(task);

    if (!utask) {

        return pc;

    } else if (current == task && utask->active_probe) {

        /* everything's locked. */

        return uprobe_get_pc(GET_PC_URETPROBE_NONE, pc, sp);

    }

    uproc = utask->uproc;

    down_read(&uproc->rwsem);

    result = lookup_uretprobe(utask->uretprobe_instances.first, uproc, pc,

                  sp);

    up_read(&uproc->rwsem);

    return result;

}



‌EXPORT_SYMBOL_GPL(uprobe_get_pc_task);



#else    /* ! CONFIG_URETPROBES */



‌static void uretprobe_handle_entry(struct uprobe *u, struct pt_regs *regs,

    struct uprobe_task *utask)

{

}

‌static void uretprobe_handle_return(struct pt_regs *regs,

    struct uprobe_task *utask)

{

}

‌static void uretprobe_set_trampoline(struct uprobe_process *uproc,

                    struct task_struct *tsk)

{

}

‌static void zap_uretprobe_instances(struct uprobe *u,

    struct uprobe_process *uproc)

{

}

#endif /* CONFIG_URETPROBES */



#ifdef NO_ACCESS_PROCESS_VM_EXPORT

/*

 * Some kernel versions export everything that uprobes.ko needs except

 * access_process_vm, so we copied and pasted it here.  Fortunately,

 * everything it calls is exported.

 */

#include <linux/pagemap.h>

#include <asm/cacheflush.h>

‌static int __access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)

{

    struct mm_struct *mm;

    struct vm_area_struct *vma;

    struct page *page;

    void *old_buf = buf;



    mm = get_task_mm(tsk);

    if (!mm)

        return 0;



    down_read(&mm->mmap_sem);

    /* ignore errors, just check how much was successfully transfered */

    while (len) {

        int bytes, ret, offset;

        void *maddr;



        ret = get_user_pages(tsk, mm, addr, 1,

                write, 1, &page, &vma);

        if (ret <= 0)

            break;



        bytes = len;

        offset = addr & (PAGE_SIZE-1);

        if (bytes > PAGE_SIZE-offset)

            bytes = PAGE_SIZE-offset;



        maddr = kmap(page);

        if (write) {

            copy_to_user_page(vma, page, addr,

                      maddr + offset, buf, bytes);

            set_page_dirty_lock(page);

        } else {

            copy_from_user_page(vma, page, addr,

                        buf, maddr + offset, bytes);

        }

        kunmap(page);

        page_cache_release(page);

        len -= bytes;

        buf += bytes;

        addr += bytes;

    }

    up_read(&mm->mmap_sem);

    mmput(mm);



    return buf - old_buf;

}

#endif

#include "uprobes_arch.c"

MODULE_LICENSE("GPL");



#endif    /* uprobes 1 (based on original utrace) */
runtime/linux/uprobes/uprobes.c - systemtap

Global variables defined

Data types defined

Functions defined

Macros defined

Source code