runtime/kp_events.c - ktap

Data types defined

Functions defined

Source code

  1. /*
  2. * kp_events.c - ktap events management (registry, destroy, event callback)
  3. *
  4. * This file is part of ktap by Jovi Zhangwei.
  5. *
  6. * Copyright (C) 2012-2013 Jovi Zhangwei <jovi.zhangwei@gmail.com>.
  7. *
  8. * ktap is free software; you can redistribute it and/or modify it
  9. * under the terms and conditions of the GNU General Public License,
  10. * version 2, as published by the Free Software Foundation.
  11. *
  12. * ktap is distributed in the hope it will be useful, but WITHOUT
  13. * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14. * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  15. * more details.
  16. *
  17. * You should have received a copy of the GNU General Public License along with
  18. * this program; if not, write to the Free Software Foundation, Inc.,
  19. * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  20. */

  21. #include <linux/module.h>
  22. #include <linux/ctype.h>
  23. #include <linux/slab.h>
  24. #include <linux/version.h>
  25. #include <asm/syscall.h>
  26. #include "../include/ktap_types.h"
  27. #include "ktap.h"
  28. #include "kp_obj.h"
  29. #include "kp_str.h"
  30. #include "kp_transport.h"
  31. #include "kp_vm.h"
  32. #include "kp_events.h"

  33. const char *kp_event_tostr(ktap_state_t *ks)
  34. {
  35.     struct ktap_event_data *e = ks->current_event;
  36.     struct ftrace_event_call *call;
  37.     struct trace_iterator *iter;
  38.     struct trace_event *ev;
  39.     enum print_line_t ret = TRACE_TYPE_NO_CONSUME;
  40.     static const char *dummy_msg = "argstr_not_available";

  41.     /* need to check current context is vaild tracing context */
  42.     if (!ks->current_event) {
  43.         kp_error(ks, "cannot stringify event str in invalid context\n");
  44.         return NULL;
  45.     }

  46.     /*check if stringified before */
  47.     if (ks->current_event->argstr)
  48.         return getstr(ks->current_event->argstr);

  49.     /* timer event and raw tracepoint don't have associated argstr */
  50.     if (e->event->type == KTAP_EVENT_TYPE_PERF && e->event->perf->tp_event)
  51.         call = e->event->perf->tp_event;
  52.     else
  53.         return dummy_msg;

  54.     /* Simulate the iterator */

  55.     /*
  56.      * use temp percpu buffer as trace_iterator
  57.      * we cannot use same print_buffer because we may called from printf.
  58.      */
  59.     iter = kp_this_cpu_temp_buffer(ks);

  60.     trace_seq_init(&iter->seq);
  61.     iter->ent = e->data->raw->data;

  62.     ev = &(call->event);
  63.     if (ev)
  64.         ret = ev->funcs->trace(iter, 0, ev);

  65.     if (ret != TRACE_TYPE_NO_CONSUME) {
  66.         struct trace_seq *s = &iter->seq;
  67.         int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;

  68.         s->buffer[len] = '\0';
  69.         return &s->buffer[0];
  70.     }

  71.     return dummy_msg;
  72. }

  73. /* return string repr of 'argstr' */
  74. const ktap_str_t *kp_event_stringify(ktap_state_t *ks)
  75. {
  76.     const char *str;
  77.     ktap_str_t *ts;

  78.     /*check if stringified before */
  79.     if (ks->current_event->argstr)
  80.         return ks->current_event->argstr;

  81.     str = kp_event_tostr(ks);
  82.     if (!str)
  83.         return NULL;

  84.     ts = kp_str_newz(ks, str);
  85.     ks->current_event->argstr = ts;
  86.     return ts;
  87. }

  88. /*
  89. * This definition should keep update with kernel/trace/trace.h
  90. * TODO: export this struct in kernel
  91. */
  92. struct ftrace_event_field {
  93.     struct list_head        link;
  94.     const char              *name;
  95.     const char              *type;
  96.     int                     filter_type;
  97.     int                     offset;
  98.     int                     size;
  99.     int                     is_signed;
  100. };

  101. static struct list_head *get_fields(struct ftrace_event_call *event_call)
  102. {
  103.     if (!event_call->class->get_fields)
  104.         return &event_call->class->fields;
  105.     return event_call->class->get_fields(event_call);
  106. }

  107. void kp_event_getarg(ktap_state_t *ks, ktap_val_t *ra, int idx)
  108. {
  109.     struct ktap_event_data *e = ks->current_event;
  110.     struct ktap_event *event = e->event;
  111.     struct ktap_event_field *event_fields = &event->fields[idx];

  112.     switch (event_fields->type)  {
  113.     case KTAP_EVENT_FIELD_TYPE_INT: {
  114.         struct trace_entry *entry = e->data->raw->data;
  115.         void *value = (unsigned char *)entry + event_fields->offset;
  116.         int n = *(int *)value;
  117.         set_number(ra, n);
  118.         return;
  119.         }
  120.     case KTAP_EVENT_FIELD_TYPE_LONG: {
  121.         struct trace_entry *entry = e->data->raw->data;
  122.         void *value = (unsigned char *)entry + event_fields->offset;
  123.         long n = *(long *)value;
  124.         set_number(ra, n);
  125.         return;
  126.         }
  127.     case KTAP_EVENT_FIELD_TYPE_STRING: {
  128.         struct trace_entry *entry = e->data->raw->data;
  129.         ktap_str_t *ts;
  130.         void *value = (unsigned char *)entry + event_fields->offset;
  131.         ts = kp_str_newz(ks, (char *)value);
  132.         if (ts)
  133.             set_string(ra, ts);
  134.         else
  135.             set_nil(ra);
  136.         return;
  137.         }
  138.     case KTAP_EVENT_FIELD_TYPE_CONST: {
  139.         set_number(ra, (ktap_number)event_fields->offset);
  140.         return;
  141.         }
  142.     case KTAP_EVENT_FIELD_TYPE_REGESTER: {
  143.         unsigned long *reg = (unsigned long *)((u8 *)e->regs +
  144.                     event_fields->offset);
  145.         set_number(ra, *reg);
  146.         return;
  147.         }
  148.     case KTAP_EVENT_FIELD_TYPE_NIL:
  149.         set_nil(ra);
  150.         return;
  151.     case KTAP_EVENT_FIELD_TYPE_INVALID:
  152.         kp_error(ks, "the field type is not supported yet\n");
  153.         set_nil(ra);
  154.         return;
  155.     }
  156. }

  157. /* init all fields of event, for quick arg1..arg9 access */
  158. static int init_event_fields(ktap_state_t *ks, struct ktap_event *event)
  159. {
  160.     struct ftrace_event_call *event_call = event->perf->tp_event;
  161.     struct ktap_event_field *event_fields = &event->fields[0];
  162.     struct ftrace_event_field *field;
  163.     struct list_head *head;
  164.     int idx = 0, n = 0;

  165.     /* only init fields for tracepoint, not timer event */
  166.     if (!event_call)
  167.         return 0;

  168.     /* intern probename */
  169.     event->name = kp_str_newz(ks, event_call->name);
  170.     if (unlikely(!event->name))
  171.         return -ENOMEM;

  172.     head = get_fields(event_call);
  173.     list_for_each_entry_reverse(field, head, link) {
  174.         if (n++ == 9) {
  175.             /*
  176.              * For some events have fields more than 9, just ignore
  177.              * those rest fields at present.
  178.              *
  179.              * TODO: support access all fields in tracepoint event
  180.              *
  181.              * Examples: mce:mce_record, ext4:ext4_writepages, ...
  182.              */
  183.             return 0;
  184.         }

  185.         event_fields[idx].offset = field->offset;

  186.         if (field->size == 4) {
  187.             event_fields[idx].type = KTAP_EVENT_FIELD_TYPE_INT;
  188.             idx++;
  189.             continue;
  190.         } else if (field->size == 8) {
  191.             event_fields[idx].type = KTAP_EVENT_FIELD_TYPE_LONG;
  192.             idx++;
  193.             continue;
  194.         }
  195.         if (!strncmp(field->type, "char", 4)) {
  196.             event_fields[idx].type = KTAP_EVENT_FIELD_TYPE_STRING;
  197.             idx++;
  198.             continue;
  199.         }

  200.         /* TODO: add more type check */
  201.         event_fields[idx++].type = KTAP_EVENT_FIELD_TYPE_INVALID;
  202.     }

  203.     /* init all rest fields as NIL */
  204.     while (idx < 9)
  205.         event_fields[idx++].type = KTAP_EVENT_FIELD_TYPE_NIL;

  206.     return 0;
  207. }

  208. static inline void call_probe_closure(ktap_state_t *mainthread,
  209.                       ktap_func_t *fn,
  210.                       struct ktap_event_data *e, int rctx)
  211. {
  212.     ktap_state_t *ks;
  213.     ktap_val_t *func;

  214.     ks = kp_vm_new_thread(mainthread, rctx);
  215.     set_func(ks->top, fn);
  216.     func = ks->top;
  217.     incr_top(ks);

  218.     ks->current_event = e;

  219.     kp_vm_call(ks, func, 0);

  220.     ks->current_event = NULL;
  221.     kp_vm_exit_thread(ks);
  222. }

  223. /*
  224. * Callback tracing function for perf event subsystem.
  225. *
  226. * make ktap reentrant, don't disable irq in callback function,
  227. * same as perf and ftrace. to make reentrant, we need some
  228. * percpu data to be context isolation(irq/sirq/nmi/process)
  229. *
  230. * The recursion checking in here is mainly purpose for avoiding
  231. * corrupt ktap_state_t with timer closure callback. For tracepoint
  232. * recusion, perf core already handle it.
  233. *
  234. * Note tracepoint handler is calling with rcu_read_lock.
  235. */
  236. static void perf_callback(struct perf_event *perf_event,
  237.                struct perf_sample_data *data,
  238.                struct pt_regs *regs)
  239. {
  240.     struct ktap_event *event;
  241.     struct ktap_event_data e;
  242.     ktap_state_t *ks;
  243.     int rctx;

  244.     event = perf_event->overflow_handler_context;
  245.     ks = event->ks;

  246.     if (unlikely(ks->stop))
  247.         return;

  248.     rctx = get_recursion_context(ks);
  249.     if (unlikely(rctx < 0))
  250.         return;

  251.     e.event = event;
  252.     e.data = data;
  253.     e.regs = regs;
  254.     e.argstr = NULL;

  255.     call_probe_closure(ks, event->fn, &e, rctx);

  256.     put_recursion_context(ks, rctx);
  257. }

  258. /*
  259. * Generic ktap event creation function (based on perf callback)
  260. * purpose for tracepoints/kprobe/uprobe/profile-timer/hw_breakpoint/pmu.
  261. */
  262. int kp_event_create(ktap_state_t *ks, struct perf_event_attr *attr,
  263.             struct task_struct *task, const char *filter,
  264.             ktap_func_t *fn)
  265. {
  266.     struct ktap_event *event;
  267.     struct perf_event *perf_event;
  268.     void *callback = perf_callback;
  269.     int cpu, ret;

  270.     if (G(ks)->parm->dry_run)
  271.         callback = NULL;

  272.     /*
  273.      * don't tracing until ktap_wait, the reason is:
  274.      * 1). some event may hit before apply filter
  275.      * 2). more simple to manage tracing thread
  276.      * 3). avoid race with mainthread.
  277.      *
  278.      * Another way to do this is make attr.disabled as 1, then use
  279.      * perf_event_enable after filter apply, however, perf_event_enable
  280.      * was not exported in kernel older than 3.3, so we drop this method.
  281.      */
  282.     ks->stop = 1;

  283.     for_each_cpu(cpu, G(ks)->cpumask) {
  284.         event = kzalloc(sizeof(struct ktap_event), GFP_KERNEL);
  285.         if (!event)
  286.             return -ENOMEM;

  287.         event->type = KTAP_EVENT_TYPE_PERF;
  288.         event->ks = ks;
  289.         event->fn = fn;
  290.         perf_event = perf_event_create_kernel_counter(attr, cpu, task,
  291.                                   callback, event);
  292.         if (IS_ERR(perf_event)) {
  293.             int err = PTR_ERR(perf_event);
  294.             kp_error(ks, "unable register perf event: "
  295.                      "[cpu: %d; id: %d; err: %d]\n",
  296.                      cpu, attr->config, err);
  297.             kfree(event);
  298.             return err;
  299.         }

  300.         if (attr->type == PERF_TYPE_TRACEPOINT) {
  301.             const char *name = perf_event->tp_event->name;
  302.             kp_verbose_printf(ks, "enable perf event: "
  303.                           "[cpu: %d; id: %d; name: %s; "
  304.                           "filter: %s; pid: %d]\n",
  305.                           cpu, attr->config, name, filter,
  306.                           task ? task_tgid_vnr(task) : -1);
  307.         } else if (attr->type == PERF_TYPE_SOFTWARE &&
  308.              attr->config == PERF_COUNT_SW_CPU_CLOCK) {
  309.             kp_verbose_printf(ks, "enable profile event: "
  310.                           "[cpu: %d; sample_period: %d]\n",
  311.                           cpu, attr->sample_period);
  312.         } else {
  313.             kp_verbose_printf(ks, "unknown perf event type\n");
  314.         }

  315.         event->perf = perf_event;
  316.         INIT_LIST_HEAD(&event->list);
  317.         list_add_tail(&event->list, &G(ks)->events_head);

  318.         if (init_event_fields(ks, event)) {
  319.             kp_error(ks, "unable init event fields id %d\n",
  320.                     attr->config);
  321.             perf_event_release_kernel(event->perf);
  322.             list_del(&event->list);
  323.             kfree(event);
  324.             return ret;
  325.         }

  326.         if (!filter)
  327.             continue;

  328.         ret = kp_ftrace_profile_set_filter(perf_event, attr->config,
  329.                            filter);
  330.         if (ret) {
  331.             kp_error(ks, "unable set event filter: "
  332.                      "[id: %d; filter: %s; ret: %d]\n",
  333.                      attr->config, filter, ret);
  334.             perf_event_release_kernel(event->perf);
  335.             list_del(&event->list);
  336.             kfree(event);
  337.             return ret;
  338.         }
  339.     }

  340.     return 0;
  341. }

  342. /*
  343. * tracepoint_probe_register functions changed prototype by introduce
  344. * 'struct tracepoint', this cause hard to refer tracepoint by name.
  345. * And these ktap raw tracepoint interface is not courage to use, so disable
  346. * it now.
  347. */
  348. #if 0
  349. /*
  350. * Ignore function proto in here, just use first argument.
  351. */
  352. static void probe_callback(void *__data)
  353. {
  354.     struct ktap_event *event = __data;
  355.     ktap_state_t *ks = event->ks;
  356.     struct ktap_event_data e;
  357.     struct pt_regs regs; /* pt_regs maybe is large for stack */
  358.     int rctx;

  359.     if (unlikely(ks->stop))
  360.         return;

  361.     rctx = get_recursion_context(ks);
  362.     if (unlikely(rctx < 0))
  363.         return;

  364.     perf_fetch_caller_regs(&regs);

  365.     e.event = event;
  366.     e.regs = &regs;
  367.     e.argstr = NULL;

  368.     call_probe_closure(ks, event->fn, &e, rctx);

  369.     put_recursion_context(ks, rctx);
  370. }

  371. /*
  372. * syscall events handling
  373. */

  374. static DEFINE_MUTEX(syscall_trace_lock);
  375. static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
  376. static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
  377. static int sys_refcount_enter;
  378. static int sys_refcount_exit;

  379. static int get_syscall_num(const char *name)
  380. {
  381.     int i;

  382.     for (i = 0; i < NR_syscalls; i++) {
  383.         if (syscalls_metadata[i] &&
  384.             !strcmp(name, syscalls_metadata[i]->name + 4))
  385.             return i;
  386.     }
  387.     return -1;
  388. }

  389. static void trace_syscall_enter(void *data, struct pt_regs *regs, long id)
  390. {
  391.     struct ktap_event *event = data;
  392.     ktap_state_t *ks = event->ks;
  393.     struct ktap_event_data e;
  394.     int syscall_nr;
  395.     int rctx;

  396.     if (unlikely(ks->stop))
  397.         return;

  398.     syscall_nr = syscall_get_nr(current, regs);
  399.     if (unlikely(syscall_nr < 0))
  400.         return;
  401.     if (!test_bit(syscall_nr, enabled_enter_syscalls))
  402.         return;

  403.     rctx = get_recursion_context(ks);
  404.     if (unlikely(rctx < 0))
  405.         return;

  406.     e.event = event;
  407.     e.regs = regs;
  408.     e.argstr = NULL;

  409.     call_probe_closure(ks, event->fn, &e, rctx);

  410.     put_recursion_context(ks, rctx);
  411. }

  412. static void trace_syscall_exit(void *data, struct pt_regs *regs, long id)
  413. {
  414.     struct ktap_event *event = data;
  415.     ktap_state_t *ks = event->ks;
  416.     struct ktap_event_data e;
  417.     int syscall_nr;
  418.     int rctx;

  419.     syscall_nr = syscall_get_nr(current, regs);
  420.     if (unlikely(syscall_nr < 0))
  421.         return;
  422.     if (!test_bit(syscall_nr, enabled_exit_syscalls))
  423.         return;

  424.     if (unlikely(ks->stop))
  425.         return;

  426.     rctx = get_recursion_context(ks);
  427.     if (unlikely(rctx < 0))
  428.         return;

  429.     e.event = event;
  430.     e.regs = regs;
  431.     e.argstr = NULL;

  432.     call_probe_closure(ks, event->fn, &e, rctx);

  433.     put_recursion_context(ks, rctx);
  434. }

  435. /* called in dry-run mode, purpose for compare overhead with normal vm call */
  436. static void dry_run_callback(void *data, struct pt_regs *regs, long id)
  437. {

  438. }

  439. static void init_syscall_event_fields(struct ktap_event *event, int is_enter)
  440. {
  441.     struct ftrace_event_call *event_call;
  442.     struct ktap_event_field *event_fields = &event->fields[0];
  443.     struct syscall_metadata *meta = syscalls_metadata[event->syscall_nr];
  444.     int idx = 0;

  445.     event_call = is_enter ? meta->enter_event : meta->exit_event;

  446.     event_fields[0].type = KTAP_EVENT_FIELD_TYPE_CONST;
  447.     event_fields[0].offset = event->syscall_nr;

  448.     if (!is_enter) {
  449. #ifdef CONFIG_X86_64
  450.         event_fields[1].type = KTAP_EVENT_FIELD_TYPE_REGESTER;
  451.         event_fields[1].offset = offsetof(struct pt_regs, ax);
  452. #endif
  453.         return;
  454.     }

  455.     while (idx++ < meta->nb_args) {
  456.         event_fields[idx].type = KTAP_EVENT_FIELD_TYPE_REGESTER;
  457. #ifdef CONFIG_X86_64
  458.         switch (idx) {
  459.         case 1:
  460.             event_fields[idx].offset = offsetof(struct pt_regs, di);
  461.             break;
  462.         case 2:
  463.             event_fields[idx].offset = offsetof(struct pt_regs, si);
  464.             break;
  465.         case 3:
  466.             event_fields[idx].offset = offsetof(struct pt_regs, dx);
  467.             break;
  468.         case 4:
  469.             event_fields[idx].offset =
  470.                         offsetof(struct pt_regs, r10);
  471.             break;
  472.         case 5:
  473.             event_fields[idx].offset = offsetof(struct pt_regs, r8);
  474.             break;
  475.         case 6:
  476.             event_fields[idx].offset = offsetof(struct pt_regs, r9);
  477.             break;
  478.         }
  479. #else
  480. #warning "don't support syscall tracepoint event register access in this arch, use 'trace syscalls:* {}' instead"
  481.         break;
  482. #endif
  483.     }

  484.     /* init all rest fields as NIL */
  485.     while (idx < 9)
  486.         event_fields[idx++].type = KTAP_EVENT_FIELD_TYPE_NIL;
  487. }

  488. static int syscall_event_register(ktap_state_t *ks, const char *event_name,
  489.                   struct ktap_event *event)
  490. {
  491.     int syscall_nr = 0, is_enter = 0;
  492.     void *callback = NULL;
  493.     int ret = 0;

  494.     if (!strncmp(event_name, "sys_enter_", 10)) {
  495.         is_enter = 1;
  496.         event->type = KTAP_EVENT_TYPE_SYSCALL_ENTER;
  497.         syscall_nr = get_syscall_num(event_name + 10);
  498.         callback = trace_syscall_enter;
  499.     } else if (!strncmp(event_name, "sys_exit_", 9)) {
  500.         is_enter = 0;
  501.         event->type = KTAP_EVENT_TYPE_SYSCALL_EXIT;
  502.         syscall_nr = get_syscall_num(event_name + 9);
  503.         callback = trace_syscall_exit;
  504.     }

  505.     if (G(ks)->parm->dry_run)
  506.         callback = dry_run_callback;

  507.     if (syscall_nr < 0)
  508.         return -1;

  509.     event->syscall_nr = syscall_nr;

  510.     init_syscall_event_fields(event, is_enter);

  511.     mutex_lock(&syscall_trace_lock);
  512.     if (is_enter) {
  513.         if (!sys_refcount_enter)
  514.             ret = register_trace_sys_enter(callback, event);
  515.         if (!ret) {
  516.             set_bit(syscall_nr, enabled_enter_syscalls);
  517.             sys_refcount_enter++;
  518.         }
  519.     } else {
  520.         if (!sys_refcount_exit)
  521.             ret = register_trace_sys_exit(callback, event);
  522.         if (!ret) {
  523.             set_bit(syscall_nr, enabled_exit_syscalls);
  524.             sys_refcount_exit++;
  525.         }
  526.     }
  527.     mutex_unlock(&syscall_trace_lock);

  528.     return ret;
  529. }

  530. static int syscall_event_unregister(ktap_state_t *ks, struct ktap_event *event)
  531. {
  532.     int ret = 0;
  533.     void *callback;

  534.     if (event->type == KTAP_EVENT_TYPE_SYSCALL_ENTER)
  535.         callback = trace_syscall_enter;
  536.     else
  537.         callback = trace_syscall_exit;

  538.     if (G(ks)->parm->dry_run)
  539.         callback = dry_run_callback;

  540.     mutex_lock(&syscall_trace_lock);
  541.     if (event->type == KTAP_EVENT_TYPE_SYSCALL_ENTER) {
  542.         sys_refcount_enter--;
  543.             clear_bit(event->syscall_nr, enabled_enter_syscalls);
  544.             if (!sys_refcount_enter)
  545.                     unregister_trace_sys_enter(callback, event);
  546.     } else {
  547.         sys_refcount_exit--;
  548.             clear_bit(event->syscall_nr, enabled_exit_syscalls);
  549.             if (!sys_refcount_exit)
  550.                     unregister_trace_sys_exit(callback, event);
  551.     }
  552.     mutex_unlock(&syscall_trace_lock);

  553.     return ret;
  554. }

  555. /*
  556. * Register tracepoint event directly, not based on perf callback
  557. *
  558. * This tracing method would be more faster than perf callback,
  559. * because it won't need to write trace data into any temp buffer,
  560. * and code path is much shorter than perf callback.
  561. */
  562. int kp_event_create_tracepoint(ktap_state_t *ks, const char *event_name,
  563.                    ktap_func_t *fn)
  564. {
  565.     struct ktap_event *event;
  566.     void *callback = probe_callback;
  567.     int is_syscall = 0;
  568.     int ret;

  569.     if (G(ks)->parm->dry_run)
  570.         callback = NULL;

  571.     if (!strncmp(event_name, "sys_enter_", 10) ||
  572.         !strncmp(event_name, "sys_exit_", 9))
  573.         is_syscall = 1;

  574.     event = kzalloc(sizeof(struct ktap_event), GFP_KERNEL);
  575.     if (!event)
  576.         return -ENOMEM;

  577.     event->ks = ks;
  578.     event->fn = fn;
  579.     event->name = kp_str_newz(ks, event_name);
  580.     if (unlikely(!event->name)) {
  581.         kfree(event);
  582.         return -ENOMEM;
  583.     }

  584.     INIT_LIST_HEAD(&event->list);
  585.     list_add_tail(&event->list, &G(ks)->events_head);

  586.     if (is_syscall) {
  587.         ret = syscall_event_register(ks, event_name, event);
  588.     } else {
  589.         event->type = KTAP_EVENT_TYPE_TRACEPOINT;
  590.         ret = tracepoint_probe_register(event_name, callback, event);
  591.     }

  592.     if (ret) {
  593.         kp_error(ks, "register tracepoint %s failed, ret: %d\n",
  594.                 event_name, ret);
  595.         list_del(&event->list);
  596.         kfree(event);
  597.         return ret;
  598.     }
  599.     return 0;
  600. }

  601. #endif

  602. /* kprobe handler */
  603. static int __kprobes pre_handler_kprobe(struct kprobe *p, struct pt_regs *regs)
  604. {
  605.     struct ktap_event *event = container_of(p, struct ktap_event, kp);
  606.     ktap_state_t *ks = event->ks;
  607.     struct ktap_event_data e;
  608.     int rctx;

  609.     if (unlikely(ks->stop))
  610.         return 0;

  611.     rctx = get_recursion_context(ks);
  612.     if (unlikely(rctx < 0))
  613.         return 0;

  614.     e.event = event;
  615.     e.regs = regs;
  616.     e.argstr = NULL;

  617.     call_probe_closure(ks, event->fn, &e, rctx);

  618.     put_recursion_context(ks, rctx);
  619.     return 0;
  620. }

  621. /*
  622. * Register kprobe event directly, not based on perf callback
  623. *
  624. * This tracing method would be more faster than perf callback,
  625. * because it won't need to write trace data into any temp buffer,
  626. * and code path is much shorter than perf callback.
  627. */
  628. int kp_event_create_kprobe(ktap_state_t *ks, const char *event_name,
  629.                ktap_func_t *fn)
  630. {
  631.     struct ktap_event *event;
  632.     void *callback = pre_handler_kprobe;
  633.     int ret;

  634.     if (G(ks)->parm->dry_run)
  635.         callback = NULL;

  636.     event = kzalloc(sizeof(struct ktap_event), GFP_KERNEL);
  637.     if (!event)
  638.         return -ENOMEM;

  639.     event->ks = ks;
  640.     event->fn = fn;
  641.     event->name = kp_str_newz(ks, event_name);
  642.     if (unlikely(!event->name)) {
  643.         kfree(event);
  644.         return -ENOMEM;
  645.     }

  646.     INIT_LIST_HEAD(&event->list);
  647.     list_add_tail(&event->list, &G(ks)->events_head);

  648.     event->type = KTAP_EVENT_TYPE_KPROBE;

  649.     event->kp.symbol_name = event_name;
  650.     event->kp.pre_handler = callback;
  651.     ret = register_kprobe(&event->kp);
  652.     if (ret) {
  653.         kp_error(ks, "register kprobe event %s failed, ret: %d\n",
  654.                 event_name, ret);
  655.         list_del(&event->list);
  656.         kfree(event);
  657.         return ret;
  658.     }
  659.     return 0;
  660. }


  661. static void events_destroy(ktap_state_t *ks)
  662. {
  663.     struct ktap_event *event;
  664.     struct list_head *tmp, *pos;
  665.     struct list_head *head = &G(ks)->events_head;

  666.     list_for_each(pos, head) {
  667.         event = container_of(pos, struct ktap_event,
  668.                        list);
  669.         if (event->type == KTAP_EVENT_TYPE_PERF)
  670.             perf_event_release_kernel(event->perf);
  671. #if 0
  672.         else if (event->type == KTAP_EVENT_TYPE_TRACEPOINT)
  673.             tracepoint_probe_unregister(getstr(event->name),
  674.                             probe_callback, event);
  675.         else if (event->type == KTAP_EVENT_TYPE_SYSCALL_ENTER ||
  676.              event->type == KTAP_EVENT_TYPE_SYSCALL_EXIT )
  677.             syscall_event_unregister(ks, event);
  678. #endif
  679.         else if (event->type == KTAP_EVENT_TYPE_KPROBE)
  680.             unregister_kprobe(&event->kp);
  681.         }
  682.            /*
  683.      * Ensure our callback won't be called anymore. The buffers
  684.      * will be freed after that.
  685.      */
  686.     tracepoint_synchronize_unregister();

  687.     list_for_each_safe(pos, tmp, head) {
  688.         event = container_of(pos, struct ktap_event,
  689.                        list);
  690.         list_del(&event->list);
  691.         kfree(event);
  692.     }
  693. }

  694. void kp_events_exit(ktap_state_t *ks)
  695. {
  696.     if (!G(ks)->trace_enabled)
  697.         return;

  698.     events_destroy(ks);

  699.     /* call trace_end_closure after all event unregistered */
  700.     if ((G(ks)->state != KTAP_ERROR) && G(ks)->trace_end_closure) {
  701.         G(ks)->state = KTAP_TRACE_END;
  702.         set_func(ks->top, G(ks)->trace_end_closure);
  703.         incr_top(ks);
  704.         kp_vm_call(ks, ks->top - 1, 0);
  705.         G(ks)->trace_end_closure = NULL;
  706.     }

  707.     G(ks)->trace_enabled = 0;
  708. }

  709. int kp_events_init(ktap_state_t *ks)
  710. {
  711.     G(ks)->trace_enabled = 1;
  712.     return 0;
  713. }