runtime/task_finder_vma.c - systemtap

Global variables defined

Data types defined

Functions defined

Macros defined

Source code

#ifndef TASK_FINDER_VMA_C
#define TASK_FINDER_VMA_C

#include <linux/list.h>
#include <linux/jhash.h>

#include <linux/fs.h>
#include <linux/dcache.h>

#include "stp_helper_lock.h"

// __stp_tf_vma_lock protects the hash table.
// Documentation/spinlocks.txt suggest we can be a bit more clever
// if we guarantee that in interrupt context we only read, not write
// the datastructures. We should never change the hash table or the
// contents in interrupt context (which should only ever call
// stap_find_vma_map_info for getting stored vma info). So we might
// want to look into that if this seems a bottleneck.
static STP_DEFINE_RWLOCK(__stp_tf_vma_lock);

#define __STP_TF_HASH_BITS 4
#define __STP_TF_TABLE_SIZE (1 << __STP_TF_HASH_BITS)

#ifndef TASK_FINDER_VMA_ENTRY_PATHLEN
#define TASK_FINDER_VMA_ENTRY_PATHLEN 64
#elif TASK_FINDER_VMA_ENTRY_PATHLEN < 8
#error "gimme a little more TASK_FINDER_VMA_ENTRY_PATHLEN"
#endif


struct __stp_tf_vma_entry {
    struct hlist_node hlist;

    pid_t pid;
    unsigned long vm_start;
    unsigned long vm_end;
        char path[TASK_FINDER_VMA_ENTRY_PATHLEN]; /* mmpath name, if known */

    // User data (possibly stp_module)
    void *user;
};

static struct hlist_head *__stp_tf_vma_map;

// __stp_tf_vma_new_entry(): Returns an newly allocated or NULL.
// Must only be called from user context.
// ... except, with inode-uprobes / task-finder2, it can be called from
// random tracepoints.  So we cannot sleep after all.
static struct __stp_tf_vma_entry *
__stp_tf_vma_new_entry(void)
{
    struct __stp_tf_vma_entry *entry;
    size_t size = sizeof (struct __stp_tf_vma_entry);
#ifdef CONFIG_UTRACE
    entry = (struct __stp_tf_vma_entry *) _stp_kmalloc_gfp(size,
                                                         STP_ALLOC_SLEEP_FLAGS);
#else
    entry = (struct __stp_tf_vma_entry *) _stp_kmalloc_gfp(size,
                                                               STP_ALLOC_FLAGS);
#endif
    return entry;
}

// __stp_tf_vma_release_entry(): Frees an entry.
static void
__stp_tf_vma_release_entry(struct __stp_tf_vma_entry *entry)
{
    _stp_kfree (entry);
}

// stap_initialize_vma_map():  Initialize the free list.  Grabs the
// spinlock.  Should be called before any of the other stap_*_vma_map
// functions.  Since this is run before any other function is called,
// this doesn't need any locking.  Should be called from a user context
// since it can allocate memory.
static int
stap_initialize_vma_map(void)
{
    size_t size = sizeof(struct hlist_head) * __STP_TF_TABLE_SIZE;
    struct hlist_head *map = (struct hlist_head *) _stp_kzalloc_gfp(size,
                            STP_ALLOC_SLEEP_FLAGS);
    if (map == NULL)
        return -ENOMEM;

    __stp_tf_vma_map = map;
    return 0;
}

// stap_destroy_vma_map(): Unconditionally destroys vma entries.
// Nothing should be using it anymore. Doesn't lock anything and just
// frees all items.
static void
stap_destroy_vma_map(void)
{
    if (__stp_tf_vma_map != NULL) {
        int i;
        for (i = 0; i < __STP_TF_TABLE_SIZE; i++) {
            struct hlist_head *head = &__stp_tf_vma_map[i];
            struct hlist_node *node;
            struct hlist_node *n;
            struct __stp_tf_vma_entry *entry = NULL;

            if (hlist_empty(head))
                continue;

                stap_hlist_for_each_entry_safe(entry, node, n, head, hlist) {
                hlist_del(&entry->hlist);
                __stp_tf_vma_release_entry(entry);
            }
        }
        _stp_kfree(__stp_tf_vma_map);
    }
}


// __stp_tf_vma_map_hash(): Compute the vma map hash.
static inline u32
__stp_tf_vma_map_hash(struct task_struct *tsk)
{
    return (jhash_1word(tsk->pid, 0) & (__STP_TF_TABLE_SIZE - 1));
}

// Get vma_entry if the vma is present in the vma map hash table.
// Returns NULL if not present.  The __stp_tf_vma_lock must be read locked
// before calling this function.
static struct __stp_tf_vma_entry *
__stp_tf_get_vma_map_entry_internal(struct task_struct *tsk,
                    unsigned long vm_start)
{
    struct hlist_head *head;
    struct hlist_node *node;
    struct __stp_tf_vma_entry *entry;

    head = &__stp_tf_vma_map[__stp_tf_vma_map_hash(tsk)];
    stap_hlist_for_each_entry(entry, node, head, hlist) {
        if (tsk->pid == entry->pid
            && vm_start == entry->vm_start) {
            return entry;
        }
    }
    return NULL;
}

// Get vma_entry if the vma with the given vm_end is present in the vma map
// hash table for the tsk.  Returns NULL if not present.
// The __stp_tf_vma_lock must be read locked before calling this function.
static struct __stp_tf_vma_entry *
__stp_tf_get_vma_map_entry_end_internal(struct task_struct *tsk,
                    unsigned long vm_end)
{
    struct hlist_head *head;
    struct hlist_node *node;
    struct __stp_tf_vma_entry *entry;

    head = &__stp_tf_vma_map[__stp_tf_vma_map_hash(tsk)];
    stap_hlist_for_each_entry(entry, node, head, hlist) {
        if (tsk->pid == entry->pid
            && vm_end == entry->vm_end) {
            return entry;
        }
    }
    return NULL;
}


// Add the vma info to the vma map hash table.
// Caller is responsible for name lifetime.
// Can allocate memory, so needs to be called
// only from user context.
static int
stap_add_vma_map_info(struct task_struct *tsk,
              unsigned long vm_start, unsigned long vm_end,
              const char *path, void *user)
{
    struct hlist_head *head;
    struct hlist_node *node;
    struct __stp_tf_vma_entry *entry;
    struct __stp_tf_vma_entry *new_entry;
    unsigned long flags;

    // Take a write lock, since we are most likely going to write
    // after reading. But reserve a new entry first outside the lock.
    new_entry = __stp_tf_vma_new_entry();
    stp_write_lock_irqsave(&__stp_tf_vma_lock, flags);
    entry = __stp_tf_get_vma_map_entry_internal(tsk, vm_start);
    if (entry != NULL) {
        stp_write_unlock_irqrestore(&__stp_tf_vma_lock, flags);
        if (new_entry)
            __stp_tf_vma_release_entry(new_entry);
        return -EBUSY;    /* Already there */
    }

    if (!new_entry) {
        stp_write_unlock_irqrestore(&__stp_tf_vma_lock, flags);
        return -ENOMEM;
    }

    // Fill in the info
    entry = new_entry;
    entry->pid = tsk->pid;
    entry->vm_start = vm_start;
    entry->vm_end = vm_end;
        if (strlen(path) >= TASK_FINDER_VMA_ENTRY_PATHLEN-3)
          {
            strncpy (entry->path, "...", TASK_FINDER_VMA_ENTRY_PATHLEN);
            strlcpy (entry->path+3, &path[strlen(path)-TASK_FINDER_VMA_ENTRY_PATHLEN+4],
                     TASK_FINDER_VMA_ENTRY_PATHLEN-3);
          }
        else
          {
            strlcpy (entry->path, path, TASK_FINDER_VMA_ENTRY_PATHLEN);
          }
    entry->user = user;

    head = &__stp_tf_vma_map[__stp_tf_vma_map_hash(tsk)];
    hlist_add_head(&entry->hlist, head);
    stp_write_unlock_irqrestore(&__stp_tf_vma_lock, flags);
    return 0;
}

// Extend the vma info vm_end in the vma map hash table if there is already
// a vma_info which ends precisely where this new one starts for the given
// task. Returns zero on success, -ESRCH if no existing matching entry could
// be found.
static int
stap_extend_vma_map_info(struct task_struct *tsk,
             unsigned long vm_start, unsigned long vm_end)
{
    struct hlist_head *head;
    struct hlist_node *node;
    struct __stp_tf_vma_entry *entry;

    unsigned long flags;
    int res = -ESRCH; // Entry not there or doesn't match.

    // Take a write lock, since we are most likely going to write
    // to the entry after reading, if its vm_end matches our vm_start.
    stp_write_lock_irqsave(&__stp_tf_vma_lock, flags);
    entry = __stp_tf_get_vma_map_entry_end_internal(tsk, vm_start);
    if (entry != NULL) {
        entry->vm_end = vm_end;
        res = 0;
    }
    stp_write_unlock_irqrestore(&__stp_tf_vma_lock, flags);
    return res;
}


// Remove the vma entry from the vma hash table.
// Returns -ESRCH if the entry isn't present.
static int
stap_remove_vma_map_info(struct task_struct *tsk, unsigned long vm_start)
{
    struct hlist_head *head;
    struct hlist_node *node;
    struct __stp_tf_vma_entry *entry;
    int rc = -ESRCH;

    // Take a write lock since we are most likely going to delete
    // after reading.
    unsigned long flags;
    stp_write_lock_irqsave(&__stp_tf_vma_lock, flags);
    entry = __stp_tf_get_vma_map_entry_internal(tsk, vm_start);
    if (entry != NULL) {
        hlist_del(&entry->hlist);
        __stp_tf_vma_release_entry(entry);
                rc = 0;
    }
    stp_write_unlock_irqrestore(&__stp_tf_vma_lock, flags);
    return rc;
}

// Finds vma info if the vma is present in the vma map hash table for
// a given task and address (between vm_start and vm_end).
// Returns -ESRCH if not present.  The __stp_tf_vma_lock must *not* be
// locked before calling this function.
static int
stap_find_vma_map_info(struct task_struct *tsk, unsigned long addr,
               unsigned long *vm_start, unsigned long *vm_end,
               const char **path, void **user)
{
    struct hlist_head *head;
    struct hlist_node *node;
    struct __stp_tf_vma_entry *entry;
    struct __stp_tf_vma_entry *found_entry = NULL;
    int rc = -ESRCH;
    unsigned long flags;

    if (__stp_tf_vma_map == NULL)
        return rc;

    stp_read_lock_irqsave(&__stp_tf_vma_lock, flags);
    head = &__stp_tf_vma_map[__stp_tf_vma_map_hash(tsk)];
    stap_hlist_for_each_entry(entry, node, head, hlist) {
        if (tsk->pid == entry->pid
            && addr >= entry->vm_start
            && addr < entry->vm_end) {
            found_entry = entry;
            break;
        }
    }
    if (found_entry != NULL) {
        if (vm_start != NULL)
            *vm_start = found_entry->vm_start;
        if (vm_end != NULL)
            *vm_end = found_entry->vm_end;
        if (path != NULL)
            *path = found_entry->path;
        if (user != NULL)
            *user = found_entry->user;
        rc = 0;
    }
    stp_read_unlock_irqrestore(&__stp_tf_vma_lock, flags);
    return rc;
}

// Finds vma info if the vma is present in the vma map hash table for
// a given task with the given user handle.
// Returns -ESRCH if not present.  The __stp_tf_vma_lock must *not* be
// locked before calling this function.
static int
stap_find_vma_map_info_user(struct task_struct *tsk, void *user,
                unsigned long *vm_start, unsigned long *vm_end,
                const char **path)
{
    struct hlist_head *head;
    struct hlist_node *node;
    struct __stp_tf_vma_entry *entry;
    struct __stp_tf_vma_entry *found_entry = NULL;
    int rc = -ESRCH;
    unsigned long flags;

    if (__stp_tf_vma_map == NULL)
        return rc;

    stp_read_lock_irqsave(&__stp_tf_vma_lock, flags);
    head = &__stp_tf_vma_map[__stp_tf_vma_map_hash(tsk)];
    stap_hlist_for_each_entry(entry, node, head, hlist) {
        if (tsk->pid == entry->pid
            && user == entry->user) {
            found_entry = entry;
            break;
        }
    }
    if (found_entry != NULL) {
        if (vm_start != NULL)
            *vm_start = found_entry->vm_start;
        if (vm_end != NULL)
            *vm_end = found_entry->vm_end;
        if (path != NULL)
            *path = found_entry->path;
        rc = 0;
    }
    stp_read_unlock_irqrestore(&__stp_tf_vma_lock, flags);
    return rc;
}

static int
stap_drop_vma_maps(struct task_struct *tsk)
{
    struct hlist_head *head;
    struct hlist_node *node;
    struct hlist_node *n;
    struct __stp_tf_vma_entry *entry;

    unsigned long flags;
    stp_write_lock_irqsave(&__stp_tf_vma_lock, flags);
    head = &__stp_tf_vma_map[__stp_tf_vma_map_hash(tsk)];
        stap_hlist_for_each_entry_safe(entry, node, n, head, hlist) {
            if (tsk->pid == entry->pid) {
            hlist_del(&entry->hlist);
            __stp_tf_vma_release_entry(entry);
            }
        }
    stp_write_unlock_irqrestore(&__stp_tf_vma_lock, flags);
    return 0;
}

/* Find the main executable for this mm.
* NB: mmap_sem should be held already. */
static struct file*
stap_find_exe_file(struct mm_struct* mm)
{
    /* VM_EXECUTABLE was killed in kernel commit e9714acf, but in kernels
     * that new we can just use mm->exe_file anyway.  (PR14712)  */
#ifdef VM_EXECUTABLE
    struct vm_area_struct *vma;
    for (vma = mm->mmap; vma; vma = vma->vm_next)
        if ((vma->vm_flags & VM_EXECUTABLE) && vma->vm_file)
            return vma->vm_file;
    return NULL;
#else
    return mm->exe_file;
#endif
}

#endif /* TASK_FINDER_VMA_C */