ruby.c - ctags-5.8

Global variables defined

Data types defined

Functions defined

Source code

/*
*   $Id: ruby.c 571 2007-06-24 23:32:14Z elliotth $
*
*   Copyright (c) 2000-2001, Thaddeus Covert <sahuagin@mediaone.net>
*   Copyright (c) 2002 Matthias Veit <matthias_veit@yahoo.de>
*   Copyright (c) 2004 Elliott Hughes <enh@acm.org>
*
*   This source code is released for free distribution under the terms of the
*   GNU General Public License.
*
*   This module contains functions for generating tags for Ruby language
*   files.
*/

/*
*   INCLUDE FILES
*/
#include "general.h"  /* must always come first */

#include <string.h>

#include "entry.h"
#include "parse.h"
#include "read.h"
#include "vstring.h"

/*
*   DATA DECLARATIONS
*/
typedef enum {
    K_UNDEFINED = -1, K_CLASS, K_METHOD, K_MODULE, K_SINGLETON
} rubyKind;

/*
*   DATA DEFINITIONS
*/
static kindOption RubyKinds [] = {
    { TRUE, 'c', "class""classes" },
    { TRUE, 'f', "method", "methods" },
    { TRUE, 'm', "module", "modules" },
    { TRUE, 'F', "singleton method", "singleton methods" }
};

static stringList* nesting = 0;

/*
*   FUNCTION DEFINITIONS
*/

/*
* Returns a string describing the scope in 'list'.
* We record the current scope as a list of entered scopes.
* Scopes corresponding to 'if' statements and the like are
* represented by empty strings. Scopes corresponding to
* modules and classes are represented by the name of the
* module or class.
*/
static vString* stringListToScope (const stringList* list)
{
    unsigned int i;
    unsigned int chunks_output = 0;
    vString* result = vStringNew ();
    const unsigned int max = stringListCount (list);
    for (i = 0; i < max; ++i)
    {
        vString* chunk = stringListItem (list, i);
        if (vStringLength (chunk) > 0)
        {
            vStringCatS (result, (chunks_output++ > 0) ? "." : "");
            vStringCatS (result, vStringValue (chunk));
        }
    }
    return result;
}

/*
* Attempts to advance 's' past 'literal'.
* Returns TRUE if it did, FALSE (and leaves 's' where
* it was) otherwise.
*/
static boolean canMatch (const unsigned char** s, const char* literal)
{
    const int literal_length = strlen (literal);
    const unsigned char next_char = *(*s + literal_length);
    if (strncmp ((const char*) *s, literal, literal_length) != 0)
    {
        return FALSE;
    }
    /* Additionally check that we're at the end of a token. */
    if ( ! (next_char == 0 || isspace (next_char) || next_char == '('))
    {
        return FALSE;
    }
    *s += literal_length;
    return TRUE;
}

/*
* Attempts to advance 'cp' past a Ruby operator method name. Returns
* TRUE if successful (and copies the name into 'name'), FALSE otherwise.
*/
static boolean parseRubyOperator (vString* name, const unsigned char** cp)
{
    static const char* RUBY_OPERATORS[] = {
        "[]", "[]=",
        "**",
        "!", "~", "+@", "-@",
        "*", "/", "%",
        "+", "-",
        ">>", "<<",
        "&",
        "^", "|",
        "<=", "<", ">", ">=",
        "<=>", "==", "===", "!=", "=~", "!~",
        "`",
        0
    };
    int i;
    for (i = 0; RUBY_OPERATORS[i] != 0; ++i)
    {
        if (canMatch (cp, RUBY_OPERATORS[i]))
        {
            vStringCatS (name, RUBY_OPERATORS[i]);
            return TRUE;
        }
    }
    return FALSE;
}

/*
* Emits a tag for the given 'name' of kind 'kind' at the current nesting.
*/
static void emitRubyTag (vString* name, rubyKind kind)
{
    tagEntryInfo tag;
    vString* scope;

    vStringTerminate (name);
    scope = stringListToScope (nesting);

    initTagEntry (&tag, vStringValue (name));
    if (vStringLength (scope) > 0) {
        tag.extensionFields.scope [0] = "class";
        tag.extensionFields.scope [1] = vStringValue (scope);
    }
    tag.kindName = RubyKinds [kind].name;
    tag.kind = RubyKinds [kind].letter;
    makeTagEntry (&tag);

    stringListAdd (nesting, vStringNewCopy (name));

    vStringClear (name);
    vStringDelete (scope);
}

/* Tests whether 'ch' is a character in 'list'. */
static boolean charIsIn (char ch, const char* list)
{
    return (strchr (list, ch) != 0);
}

/* Advances 'cp' over leading whitespace. */
static void skipWhitespace (const unsigned char** cp)
{
    while (isspace (**cp))
    {
        ++*cp;
    }
}

/*
* Copies the characters forming an identifier from *cp into
* name, leaving *cp pointing to the character after the identifier.
*/
static rubyKind parseIdentifier (
        const unsigned char** cp, vString* name, rubyKind kind)
{
    /* Method names are slightly different to class and variable names.
     * A method name may optionally end with a question mark, exclamation
     * point or equals sign. These are all part of the name.
     * A method name may also contain a period if it's a singleton method.
     */
    const char* also_ok = (kind == K_METHOD) ? "_.?!=" : "_";

    skipWhitespace (cp);

    /* Check for an anonymous (singleton) class such as "class << HTTP". */
    if (kind == K_CLASS && **cp == '<' && *(*cp + 1) == '<')
    {
        return K_UNDEFINED;
    }

    /* Check for operators such as "def []=(key, val)". */
    if (kind == K_METHOD || kind == K_SINGLETON)
    {
        if (parseRubyOperator (name, cp))
        {
            return kind;
        }
    }

    /* Copy the identifier into 'name'. */
    while (**cp != 0 && (isalnum (**cp) || charIsIn (**cp, also_ok)))
    {
        char last_char = **cp;

        vStringPut (name, last_char);
        ++*cp;

        if (kind == K_METHOD)
        {
            /* Recognize singleton methods. */
            if (last_char == '.')
            {
                vStringTerminate (name);
                vStringClear (name);
                return parseIdentifier (cp, name, K_SINGLETON);
            }

            /* Recognize characters which mark the end of a method name. */
            if (charIsIn (last_char, "?!="))
            {
                break;
            }
        }
    }
    return kind;
}

static void readAndEmitTag (const unsigned char** cp, rubyKind expected_kind)
{
    if (isspace (**cp))
    {
        vString *name = vStringNew ();
        rubyKind actual_kind = parseIdentifier (cp, name, expected_kind);

        if (actual_kind == K_UNDEFINED || vStringLength (name) == 0)
        {
            /*
            * What kind of tags should we create for code like this?
            *
            *    %w(self.clfloor clfloor).each do |name|
            *        module_eval <<-"end;"
            *            def #{name}(x, y=1)
            *                q, r = x.divmod(y)
            *                q = q.to_i
            *                return q, r
            *            end
            *        end;
            *    end
            *
            * Or this?
            *
            *    class << HTTP
            *
            * For now, we don't create any.
            */
        }
        else
        {
            emitRubyTag (name, actual_kind);
        }
        vStringDelete (name);
    }
}

static void enterUnnamedScope (void)
{
    stringListAdd (nesting, vStringNewInit (""));
}

static void findRubyTags (void)
{
    const unsigned char *line;
    boolean inMultiLineComment = FALSE;

    nesting = stringListNew ();

    /* FIXME: this whole scheme is wrong, because Ruby isn't line-based.
    * You could perfectly well write:
    *
    *  def
    *  method
    *   puts("hello")
    *  end
    *
    * if you wished, and this function would fail to recognize anything.
    */
    while ((line = fileReadLine ()) != NULL)
    {
        const unsigned char *cp = line;

        if (canMatch (&cp, "=begin"))
        {
            inMultiLineComment = TRUE;
            continue;
        }
        if (canMatch (&cp, "=end"))
        {
            inMultiLineComment = FALSE;
            continue;
        }

        skipWhitespace (&cp);

        /* Avoid mistakenly starting a scope for modifiers such as
        *
        *   return if <exp>
        *
        * FIXME: this is fooled by code such as
        *
        *   result = if <exp>
        *               <a>
        *            else
        *               <b>
        *            end
        *
        * FIXME: we're also fooled if someone does something heinous such as
        *
        *   puts("hello") \
        *       unless <exp>
        */
        if (canMatch (&cp, "case") || canMatch (&cp, "for") ||
            canMatch (&cp, "if") || canMatch (&cp, "unless") ||
            canMatch (&cp, "while"))
        {
            enterUnnamedScope ();
        }

        /*
        * "module M", "class C" and "def m" should only be at the beginning
        * of a line.
        */
        if (canMatch (&cp, "module"))
        {
            readAndEmitTag (&cp, K_MODULE);
        }
        else if (canMatch (&cp, "class"))
        {
            readAndEmitTag (&cp, K_CLASS);
        }
        else if (canMatch (&cp, "def"))
        {
            readAndEmitTag (&cp, K_METHOD);
        }

        while (*cp != '\0')
        {
            /* FIXME: we don't cope with here documents,
            * or regular expression literals, or ... you get the idea.
            * Hopefully, the restriction above that insists on seeing
            * definitions at the starts of lines should keep us out of
            * mischief.
            */
            if (inMultiLineComment || isspace (*cp))
            {
                ++cp;
            }
            else if (*cp == '#')
            {
                /* FIXME: this is wrong, but there *probably* won't be a
                * definition after an interpolated string (where # doesn't
                * mean 'comment').
                */
                break;
            }
            else if (canMatch (&cp, "begin") || canMatch (&cp, "do"))
            {
                enterUnnamedScope ();
            }
            else if (canMatch (&cp, "end") && stringListCount (nesting) > 0)
            {
                /* Leave the most recent scope. */
                vStringDelete (stringListLast (nesting));
                stringListRemoveLast (nesting);
            }
            else if (*cp == '"')
            {
                /* Skip string literals.
                 * FIXME: should cope with escapes and interpolation.
                 */
                do {
                    ++cp;
                } while (*cp != 0 && *cp != '"');
            }
            else if (*cp != '\0')
            {
                do
                    ++cp;
                while (isalnum (*cp) || *cp == '_');
            }
        }
    }
    stringListDelete (nesting);
}

extern parserDefinition* RubyParser (void)
{
    static const char *const extensions [] = { "rb", "ruby", NULL };
    parserDefinition* def = parserNew ("Ruby");
    def->kinds      = RubyKinds;
    def->kindCount  = KIND_COUNT (RubyKinds);
    def->extensions = extensions;
    def->parser     = findRubyTags;
    return def;
}

/* vi:set tabstop=4 shiftwidth=4: */