runtime/kp_transport.c - ktap

Global variables defined

Data types defined

Functions defined

Macros defined

Source code

  1. /*
  2. * kp_transport.c - ktap transport functionality
  3. *
  4. * This file is part of ktap by Jovi Zhangwei.
  5. *
  6. * Copyright (C) 2012-2013 Jovi Zhangwei <jovi.zhangwei@gmail.com>.
  7. *
  8. * ktap is free software; you can redistribute it and/or modify it
  9. * under the terms and conditions of the GNU General Public License,
  10. * version 2, as published by the Free Software Foundation.
  11. *
  12. * ktap is distributed in the hope it will be useful, but WITHOUT
  13. * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14. * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  15. * more details.
  16. *
  17. * You should have received a copy of the GNU General Public License along with
  18. * this program; if not, write to the Free Software Foundation, Inc.,
  19. * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  20. */

  21. #include <linux/debugfs.h>
  22. #include <linux/ftrace_event.h>
  23. #include <linux/stacktrace.h>
  24. #include <linux/clocksource.h>
  25. #include <asm/uaccess.h>
  26. #include <linux/slab.h>
  27. #include <linux/module.h>
  28. #include <linux/kallsyms.h>
  29. #include "../include/ktap_types.h"
  30. #include "ktap.h"
  31. #include "kp_events.h"
  32. #include "kp_transport.h"

  33. struct ktap_trace_iterator {
  34.     struct ring_buffer    *buffer;
  35.     int            print_timestamp;
  36.     void            *private;

  37.     struct trace_iterator    iter;
  38. };

  39. enum ktap_trace_type {
  40.     __TRACE_FIRST_TYPE = 0,

  41.     TRACE_FN = 1, /* must be same as ftrace definition in kernel */
  42.     TRACE_PRINT,
  43.     TRACE_BPUTS,
  44.     TRACE_STACK,
  45.     TRACE_USER_STACK,

  46.     __TRACE_LAST_TYPE,
  47. };

  48. #define KTAP_TRACE_ITER(iter)    \
  49.     container_of(iter, struct ktap_trace_iterator, iter)

  50. static
  51. ssize_t _trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
  52. {
  53.     int len;
  54.     int ret;

  55.     if (!cnt)
  56.         return 0;

  57.     if (s->len <= s->readpos)
  58.         return -EBUSY;

  59.     len = s->len - s->readpos;
  60.     if (cnt > len)
  61.         cnt = len;
  62.     ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
  63.     if (ret == cnt)
  64.         return -EFAULT;

  65.     cnt -= ret;

  66.     s->readpos += cnt;
  67.     return cnt;
  68. }

  69. int _trace_seq_puts(struct trace_seq *s, const char *str)
  70. {
  71.     int len = strlen(str);

  72.     if (s->full)
  73.         return 0;

  74.     if (len > ((PAGE_SIZE - 1) - s->len)) {
  75.         s->full = 1;
  76.         return 0;
  77.     }

  78.     memcpy(s->buffer + s->len, str, len);
  79.     s->len += len;

  80.     return len;
  81. }

  82. static int trace_empty(struct trace_iterator *iter)
  83. {
  84.     struct ktap_trace_iterator *ktap_iter = KTAP_TRACE_ITER(iter);
  85.     int cpu;

  86.     for_each_online_cpu(cpu) {
  87.         if (!ring_buffer_empty_cpu(ktap_iter->buffer, cpu))
  88.             return 0;
  89.     }

  90.     return 1;
  91. }

  92. static void trace_consume(struct trace_iterator *iter)
  93. {
  94.     struct ktap_trace_iterator *ktap_iter = KTAP_TRACE_ITER(iter);

  95.     ring_buffer_consume(ktap_iter->buffer, iter->cpu, &iter->ts,
  96.                 &iter->lost_events);
  97. }

  98. unsigned long long ns2usecs(cycle_t nsec)
  99. {
  100.     nsec += 500;
  101.     do_div(nsec, 1000);
  102.     return nsec;
  103. }

  104. static int trace_print_timestamp(struct trace_iterator *iter)
  105. {
  106.     struct trace_seq *s = &iter->seq;
  107.     unsigned long long t;
  108.     unsigned long secs, usec_rem;

  109.     t = ns2usecs(iter->ts);
  110.     usec_rem = do_div(t, USEC_PER_SEC);
  111.     secs = (unsigned long)t;

  112.     return trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem);
  113. }

  114. /* todo: export kernel function ftrace_find_event in future, and make faster */
  115. static struct trace_event *(*ftrace_find_event)(int type);

  116. static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
  117. {
  118.     struct ktap_trace_iterator *ktap_iter = KTAP_TRACE_ITER(iter);
  119.     struct trace_entry *entry = iter->ent;
  120.     struct trace_event *ev;

  121.     ev = ftrace_find_event(entry->type);

  122.     if (ktap_iter->print_timestamp && !trace_print_timestamp(iter))
  123.         return TRACE_TYPE_PARTIAL_LINE;

  124.     if (ev) {
  125.         int ret = ev->funcs->trace(iter, 0, ev);

  126.         /* overwrite '\n' at the ending */
  127.         iter->seq.buffer[iter->seq.len - 1] = '\0';
  128.         iter->seq.len--;
  129.         return ret;
  130.     }

  131.     return TRACE_TYPE_PARTIAL_LINE;
  132. }

  133. static enum print_line_t print_trace_stack(struct trace_iterator *iter)
  134. {
  135.     struct trace_entry *entry = iter->ent;
  136.     struct stack_trace trace;
  137.     char str[KSYM_SYMBOL_LEN];
  138.     int i;

  139.     trace.entries = (unsigned long *)(entry + 1);
  140.     trace.nr_entries = (iter->ent_size - sizeof(*entry)) /
  141.                sizeof(unsigned long);

  142.     if (!_trace_seq_puts(&iter->seq, "<stack trace>\n"))
  143.         return TRACE_TYPE_PARTIAL_LINE;

  144.     for (i = 0; i < trace.nr_entries; i++) {
  145.         unsigned long p = trace.entries[i];

  146.         if (p == ULONG_MAX)
  147.             break;

  148.         sprint_symbol(str, p);
  149.         if (!trace_seq_printf(&iter->seq, " => %s\n", str))
  150.             return TRACE_TYPE_PARTIAL_LINE;
  151.     }

  152.     return TRACE_TYPE_HANDLED;
  153. }

  154. struct ktap_ftrace_entry {
  155.     struct trace_entry entry;
  156.     unsigned long ip;
  157.     unsigned long parent_ip;
  158. };

  159. static enum print_line_t print_trace_fn(struct trace_iterator *iter)
  160. {
  161.     struct ktap_trace_iterator *ktap_iter = KTAP_TRACE_ITER(iter);
  162.     struct ktap_ftrace_entry *field = (struct ktap_ftrace_entry *)iter->ent;
  163.     char str[KSYM_SYMBOL_LEN];

  164.     if (ktap_iter->print_timestamp && !trace_print_timestamp(iter))
  165.         return TRACE_TYPE_PARTIAL_LINE;

  166.     sprint_symbol(str, field->ip);
  167.     if (!_trace_seq_puts(&iter->seq, str))
  168.         return TRACE_TYPE_PARTIAL_LINE;

  169.     if (!_trace_seq_puts(&iter->seq, " <- "))
  170.         return TRACE_TYPE_PARTIAL_LINE;

  171.     sprint_symbol(str, field->parent_ip);
  172.     if (!_trace_seq_puts(&iter->seq, str))
  173.         return TRACE_TYPE_PARTIAL_LINE;

  174.     return TRACE_TYPE_HANDLED;
  175. }

  176. static enum print_line_t print_trace_bputs(struct trace_iterator *iter)
  177. {
  178.     if (!_trace_seq_puts(&iter->seq,
  179.                 (const char *)(*(unsigned long *)(iter->ent + 1))))
  180.         return TRACE_TYPE_PARTIAL_LINE;

  181.     return TRACE_TYPE_HANDLED;
  182. }

  183. static enum print_line_t print_trace_line(struct trace_iterator *iter)
  184. {
  185.     struct trace_entry *entry = iter->ent;
  186.     char *str = (char *)(entry + 1);

  187.     if (entry->type == TRACE_PRINT) {
  188.         if (!trace_seq_printf(&iter->seq, "%s", str))
  189.             return TRACE_TYPE_PARTIAL_LINE;

  190.         return TRACE_TYPE_HANDLED;
  191.     }

  192.     if (entry->type == TRACE_BPUTS)
  193.         return print_trace_bputs(iter);

  194.     if (entry->type == TRACE_STACK)
  195.         return print_trace_stack(iter);

  196.     if (entry->type == TRACE_FN)
  197.         return print_trace_fn(iter);

  198.     return print_trace_fmt(iter);
  199. }

  200. static struct trace_entry *
  201. peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
  202.         unsigned long *lost_events)
  203. {
  204.     struct ktap_trace_iterator *ktap_iter = KTAP_TRACE_ITER(iter);
  205.     struct ring_buffer_event *event;

  206.     event = ring_buffer_peek(ktap_iter->buffer, cpu, ts, lost_events);
  207.     if (event) {
  208.         iter->ent_size = ring_buffer_event_length(event);
  209.         return ring_buffer_event_data(event);
  210.     }

  211.     return NULL;
  212. }

  213. static struct trace_entry *
  214. __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
  215.           unsigned long *missing_events, u64 *ent_ts)
  216. {
  217.     struct ktap_trace_iterator *ktap_iter = KTAP_TRACE_ITER(iter);
  218.     struct ring_buffer *buffer = ktap_iter->buffer;
  219.     struct trace_entry *ent, *next = NULL;
  220.     unsigned long lost_events = 0, next_lost = 0;
  221.     u64 next_ts = 0, ts;
  222.     int next_cpu = -1;
  223.     int next_size = 0;
  224.     int cpu;

  225.     for_each_online_cpu(cpu) {
  226.         if (ring_buffer_empty_cpu(buffer, cpu))
  227.             continue;

  228.         ent = peek_next_entry(iter, cpu, &ts, &lost_events);
  229.         /*
  230.          * Pick the entry with the smallest timestamp:
  231.          */
  232.         if (ent && (!next || ts < next_ts)) {
  233.             next = ent;
  234.             next_cpu = cpu;
  235.             next_ts = ts;
  236.             next_lost = lost_events;
  237.             next_size = iter->ent_size;
  238.         }
  239.     }

  240.     iter->ent_size = next_size;

  241.     if (ent_cpu)
  242.         *ent_cpu = next_cpu;

  243.     if (ent_ts)
  244.         *ent_ts = next_ts;

  245.     if (missing_events)
  246.         *missing_events = next_lost;

  247.     return next;
  248. }

  249. /* Find the next real entry, and increment the iterator to the next entry */
  250. static void *trace_find_next_entry_inc(struct trace_iterator *iter)
  251. {
  252.     iter->ent = __find_next_entry(iter, &iter->cpu,
  253.                       &iter->lost_events, &iter->ts);
  254.     if (iter->ent)
  255.         iter->idx++;

  256.     return iter->ent ? iter : NULL;
  257. }

  258. static void poll_wait_pipe(void)
  259. {
  260.     set_current_state(TASK_INTERRUPTIBLE);
  261.     /* sleep for 100 msecs, and try again. */
  262.     schedule_timeout(HZ / 10);
  263. }

  264. static int tracing_wait_pipe(struct file *filp)
  265. {
  266.     struct trace_iterator *iter = filp->private_data;
  267.     struct ktap_trace_iterator *ktap_iter = KTAP_TRACE_ITER(iter);
  268.     ktap_state_t *ks = ktap_iter->private;

  269.     while (trace_empty(iter)) {

  270.         if ((filp->f_flags & O_NONBLOCK)) {
  271.             return -EAGAIN;
  272.         }

  273.         mutex_unlock(&iter->mutex);

  274.         poll_wait_pipe();

  275.         mutex_lock(&iter->mutex);

  276.         if (G(ks)->wait_user && trace_empty(iter))
  277.             return -EINTR;
  278.     }

  279.     return 1;
  280. }

  281. static ssize_t
  282. tracing_read_pipe(struct file *filp, char __user *ubuf, size_t cnt,
  283.           loff_t *ppos)
  284. {
  285.     struct trace_iterator *iter = filp->private_data;
  286.     ssize_t sret;

  287.     /* return any leftover data */
  288.     sret = _trace_seq_to_user(&iter->seq, ubuf, cnt);
  289.     if (sret != -EBUSY)
  290.         return sret;
  291.     /*
  292.      * Avoid more than one consumer on a single file descriptor
  293.      * This is just a matter of traces coherency, the ring buffer itself
  294.      * is protected.
  295.      */
  296.     mutex_lock(&iter->mutex);

  297. waitagain:
  298.     sret = tracing_wait_pipe(filp);
  299.     if (sret <= 0)
  300.         goto out;

  301.     /* stop when tracing is finished */
  302.     if (trace_empty(iter)) {
  303.         sret = 0;
  304.         goto out;
  305.     }

  306.     if (cnt >= PAGE_SIZE)
  307.         cnt = PAGE_SIZE - 1;

  308.     /* reset all but tr, trace, and overruns */
  309.     memset(&iter->seq, 0,
  310.            sizeof(struct trace_iterator) -
  311.            offsetof(struct trace_iterator, seq));
  312.     iter->pos = -1;

  313.     while (trace_find_next_entry_inc(iter) != NULL) {
  314.         enum print_line_t ret;
  315.         int len = iter->seq.len;

  316.         ret = print_trace_line(iter);
  317.         if (ret == TRACE_TYPE_PARTIAL_LINE) {
  318.             /* don't print partial lines */
  319.             iter->seq.len = len;
  320.             break;
  321.         }
  322.         if (ret != TRACE_TYPE_NO_CONSUME)
  323.             trace_consume(iter);

  324.         if (iter->seq.len >= cnt)
  325.             break;

  326.         /*
  327.          * Setting the full flag means we reached the trace_seq buffer
  328.          * size and we should leave by partial output condition above.
  329.          * One of the trace_seq_* functions is not used properly.
  330.          */
  331.         WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
  332.               iter->ent->type);
  333.     }

  334.     /* Now copy what we have to the user */
  335.     sret = _trace_seq_to_user(&iter->seq, ubuf, cnt);
  336.     if (iter->seq.readpos >= iter->seq.len)
  337.         trace_seq_init(&iter->seq);

  338.     /*
  339.      * If there was nothing to send to user, in spite of consuming trace
  340.      * entries, go back to wait for more entries.
  341.      */
  342.     if (sret == -EBUSY)
  343.         goto waitagain;

  344. out:
  345.     mutex_unlock(&iter->mutex);

  346.     return sret;
  347. }

  348. static int tracing_open_pipe(struct inode *inode, struct file *filp)
  349. {
  350.     struct ktap_trace_iterator *ktap_iter;
  351.     ktap_state_t *ks = inode->i_private;

  352.     /* create a buffer to store the information to pass to userspace */
  353.     ktap_iter = kzalloc(sizeof(*ktap_iter), GFP_KERNEL);
  354.     if (!ktap_iter)
  355.         return -ENOMEM;

  356.     ktap_iter->private = ks;
  357.     ktap_iter->buffer = G(ks)->buffer;
  358.     ktap_iter->print_timestamp = G(ks)->parm->print_timestamp;
  359.     mutex_init(&ktap_iter->iter.mutex);
  360.     filp->private_data = &ktap_iter->iter;

  361.     nonseekable_open(inode, filp);

  362.     return 0;
  363. }

  364. static int tracing_release_pipe(struct inode *inode, struct file *file)
  365. {
  366.     struct trace_iterator *iter = file->private_data;
  367.     struct ktap_trace_iterator *ktap_iter = KTAP_TRACE_ITER(iter);

  368.     mutex_destroy(&iter->mutex);
  369.     kfree(ktap_iter);
  370.     return 0;
  371. }

  372. static const struct file_operations tracing_pipe_fops = {
  373.     .open        = tracing_open_pipe,
  374.     .read        = tracing_read_pipe,
  375.     .splice_read    = NULL,
  376.     .release    = tracing_release_pipe,
  377.     .llseek        = no_llseek,
  378. };

  379. /*
  380. * preempt disabled in ring_buffer_lock_reserve
  381. *
  382. * The implementation is similar with funtion __ftrace_trace_stack.
  383. */
  384. void kp_transport_print_kstack(ktap_state_t *ks, uint16_t depth, uint16_t skip)
  385. {
  386.     struct ring_buffer *buffer = G(ks)->buffer;
  387.     struct ring_buffer_event *event;
  388.     struct trace_entry *entry;
  389.     int size;

  390.     size = depth * sizeof(unsigned long);
  391.     event = ring_buffer_lock_reserve(buffer, sizeof(*entry) + size);
  392.     if (!event) {
  393.         KTAP_STATS(ks)->events_missed += 1;
  394.         return;
  395.     } else {
  396.         struct stack_trace trace;

  397.         entry = ring_buffer_event_data(event);
  398.         tracing_generic_entry_update(entry, 0, 0);
  399.         entry->type = TRACE_STACK;

  400.         trace.nr_entries = 0;
  401.         trace.skip = skip;
  402.         trace.max_entries = depth;
  403.         trace.entries = (unsigned long *)(entry + 1);
  404.         save_stack_trace(&trace);

  405.         ring_buffer_unlock_commit(buffer, event);
  406.     }
  407. }

  408. void kp_transport_event_write(ktap_state_t *ks, struct ktap_event_data *e)
  409. {
  410.     struct ring_buffer *buffer = G(ks)->buffer;
  411.     struct ring_buffer_event *event;
  412.     struct trace_entry *ev_entry = e->data->raw->data;
  413.     struct trace_entry *entry;
  414.     int entry_size = e->data->raw->size;

  415.     event = ring_buffer_lock_reserve(buffer, entry_size +
  416.                      sizeof(struct ftrace_event_call *));
  417.     if (!event) {
  418.         KTAP_STATS(ks)->events_missed += 1;
  419.         return;
  420.     } else {
  421.         entry = ring_buffer_event_data(event);

  422.         memcpy(entry, ev_entry, entry_size);

  423.         ring_buffer_unlock_commit(buffer, event);
  424.     }
  425. }

  426. void kp_transport_write(ktap_state_t *ks, const void *data, size_t length)
  427. {
  428.     struct ring_buffer *buffer = G(ks)->buffer;
  429.     struct ring_buffer_event *event;
  430.     struct trace_entry *entry;
  431.     int size;

  432.     size = sizeof(struct trace_entry) + length;

  433.     event = ring_buffer_lock_reserve(buffer, size);
  434.     if (!event) {
  435.         KTAP_STATS(ks)->events_missed += 1;
  436.         return;
  437.     } else {
  438.         entry = ring_buffer_event_data(event);

  439.         tracing_generic_entry_update(entry, 0, 0);
  440.         entry->type = TRACE_PRINT;
  441.         memcpy(entry + 1, data, length);

  442.         ring_buffer_unlock_commit(buffer, event);
  443.     }
  444. }

  445. /* general print function */
  446. void kp_printf(ktap_state_t *ks, const char *fmt, ...)
  447. {
  448.     char buff[1024];
  449.     va_list args;
  450.     int len;

  451.     va_start(args, fmt);
  452.     len = vscnprintf(buff, 1024, fmt, args);
  453.     va_end(args);

  454.     buff[len] = '\0';
  455.     kp_transport_write(ks, buff, len + 1);
  456. }

  457. void __kp_puts(ktap_state_t *ks, const char *str)
  458. {
  459.     kp_transport_write(ks, str, strlen(str) + 1);
  460. }

  461. void __kp_bputs(ktap_state_t *ks, const char *str)
  462. {
  463.     struct ring_buffer *buffer = G(ks)->buffer;
  464.     struct ring_buffer_event *event;
  465.     struct trace_entry *entry;
  466.     int size;

  467.     size = sizeof(struct trace_entry) + sizeof(unsigned long *);

  468.     event = ring_buffer_lock_reserve(buffer, size);
  469.     if (!event) {
  470.         KTAP_STATS(ks)->events_missed += 1;
  471.         return;
  472.     } else {
  473.         entry = ring_buffer_event_data(event);

  474.         tracing_generic_entry_update(entry, 0, 0);
  475.         entry->type = TRACE_BPUTS;
  476.         *(unsigned long *)(entry + 1) = (unsigned long)str;

  477.         ring_buffer_unlock_commit(buffer, event);
  478.     }
  479. }

  480. void kp_transport_exit(ktap_state_t *ks)
  481. {
  482.     if (G(ks)->buffer)
  483.         ring_buffer_free(G(ks)->buffer);
  484.     debugfs_remove(G(ks)->trace_pipe_dentry);
  485. }

  486. #define TRACE_BUF_SIZE_DEFAULT    1441792UL /* 16384 * 88 (sizeof(entry)) */

  487. int kp_transport_init(ktap_state_t *ks, struct dentry *dir)
  488. {
  489.     struct ring_buffer *buffer;
  490.     struct dentry *dentry;
  491.     char filename[32] = {0};

  492. #ifdef CONFIG_PPC64
  493.     ftrace_find_event = (void *)kallsyms_lookup_name(".ftrace_find_event");
  494. #else
  495.     ftrace_find_event = (void *)kallsyms_lookup_name("ftrace_find_event");
  496. #endif
  497.     if (!ftrace_find_event) {
  498.         printk("ktap: cannot lookup ftrace_find_event in kallsyms\n");
  499.         return -EINVAL;
  500.     }

  501.     buffer = ring_buffer_alloc(TRACE_BUF_SIZE_DEFAULT, RB_FL_OVERWRITE);
  502.     if (!buffer)
  503.         return -ENOMEM;

  504.     sprintf(filename, "trace_pipe_%d", (int)task_tgid_vnr(current));

  505.     dentry = debugfs_create_file(filename, 0444, dir,
  506.                      ks, &tracing_pipe_fops);
  507.     if (!dentry) {
  508.         pr_err("ktapvm: cannot create trace_pipe file in debugfs\n");
  509.         ring_buffer_free(buffer);
  510.         return -1;
  511.     }

  512.     G(ks)->buffer = buffer;
  513.     G(ks)->trace_pipe_dentry = dentry;

  514.     return 0;
  515. }