[ Back to the overview Matrix ]

Test case : Tokens using ANSI/ISO C

Lines used: 321

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

#define isIdent(k) (isalpha(k) || k == '_')

static int TokenCount = 0;
static int CommentsAllowed = 1;

typedef void (*genfuncptr)(void);
typedef genfuncptr (*State)(int);

#define NEXT       return (genfuncptr)
#define RUN(s, k)  ((s != NULL)? (State) s(k): NULL)

genfuncptr Newline(int);
genfuncptr DirectiveHash(int);
genfuncptr DirAlpha(int);
genfuncptr DirSpace(int);
genfuncptr DirInclude(int);
genfuncptr Ident(int);
genfuncptr IdentEscape(int);
genfuncptr PPNumber(int);
genfuncptr PPNumberExp(int);
genfuncptr Dot(int);
genfuncptr Dot2(int);
genfuncptr Ell(int);
genfuncptr String(int);
genfuncptr StringEscape(int);
genfuncptr CharLit(int);
genfuncptr CharLitEscape(int);
genfuncptr PercentOp(int);
genfuncptr PercentColonOp(int);
genfuncptr PerCoPerOp(int);
genfuncptr GetNext(int);
static const char *DoubleByteOpMatch = NULL;
genfuncptr DoubleByteOp(int k);
#define DOUBLEBYTEOP(m) ((DoubleByteOpMatch = m), DoubleByteOp)


int main(void)
{
    State mystate = Newline;
    int k;
    int backslash = 0;
    int slash = 0;
    int star = 0;
    int ccomment = 0;
    int cppcomment = 0;
    int questions = 0;

    while ((k=getchar()) != EOF)
    {
        /* Handle trigraphs */
        if (k == '?') {
            ++questions;
            if (questions == 3) {
                mystate = RUN(mystate, '?');
                questions = 2;
            }
            continue;
        }
        else if (questions == 2) {
            questions = 0;
            switch (k) {
                case '=':  k = '#';  break;
                case '(':  k = '[';  break;
                case '/':  k = '\\'; break;
                case ')':  k = ']';  break;
                case '\'': k = '^';  break;
                case '<':  k = '{';  break;
                case '!':  k = '|';  break;
                case '>':  k = '}';  break;
                case '-':  k = '~';  break;
                default:   mystate = RUN(RUN(mystate, '?'), '?');
            }
        }
        else if (questions == 1) {
            questions = 0;
            mystate = RUN(mystate, '?');
        }

        /* Handle splicing lines */
        if (backslash) {
            backslash = 0;
            if (k == '\n') continue;
            mystate = RUN(mystate, '\\');
        }
        if (k == '\\') {
            backslash = 1;
            continue;
        }

        /* Handle comments */
        if (!CommentsAllowed)
          goto done_comments;
        if (slash) {
            slash = 0;
            if (k == '*') ccomment = 1;
            else if (k == '/') cppcomment = 1;
            else {
                mystate = RUN(mystate, '/');
            }
        }
        else if (star) {
            star = 0;
            if (k == '/') {
                ccomment = 0;
                /* Each comment is replaced by a single space. */
                mystate = RUN(mystate, ' ');
                continue;
            }
            if (k == '*') star = 1;
        }
        else if (ccomment) {
            if (k == '*') star = 1;
        }
        else if (cppcomment) {
            if (k == '\n') cppcomment = 0;
        }
        else {
            if (k == '/') {
                slash = 1;
                continue;
            }
        }

        if (ccomment || cppcomment)
          continue;

done_comments:
        mystate = RUN(mystate, k);
        if (mystate == NULL) {
            puts("Error in input!");
            return EXIT_FAILURE;
        }
    }

    printf("%d\n", TokenCount);
    return EXIT_SUCCESS;
}


genfuncptr Newline(int k)
{
    if (isspace(k)) NEXT(Newline);
    if (k == '#') { ++TokenCount; NEXT(DirectiveHash); }
    NEXT(GetNext(k));
}

genfuncptr DirectiveHash(int k)
{
    if (k == '\n') NEXT(Newline);
    if (isspace(k)) NEXT(DirectiveHash);
    if (isalnum(k)) NEXT(DirAlpha);
    NEXT(GetNext(k));
}

genfuncptr DirAlpha(int k)
{
    if (isalnum(k)) NEXT(DirAlpha);
    ++TokenCount;
    if (k == '\n') NEXT(Newline);
    if (isspace(k)) NEXT(DirSpace);
    if (k == '<') NEXT(DirInclude);
    NEXT(GetNext(k));
}

genfuncptr DirSpace(int k)
{
    if (k == '\n') NEXT(Newline);
    if (isspace(k)) NEXT(DirSpace);
    if (k == '<') NEXT(DirInclude);
    NEXT(GetNext(k));
}

genfuncptr DirInclude(int k)
{
    if (k == '\n') {
        ++TokenCount;
        NEXT(Newline);
    }
    if (k == '>') {
        ++TokenCount;
        NEXT(GetNext);
    }
    NEXT(DirInclude);
}

genfuncptr Ident(int k)
{
    if (isIdent(k) || isdigit(k)) NEXT(Ident);
    if (k == '\\') NEXT(IdentEscape);
    ++TokenCount;
    NEXT(GetNext(k));
}

genfuncptr IdentEscape(int k)
{
    /* Universal character names can appear in identifiers. */
    if (strchr("Uu", k)) NEXT(Ident);
    TokenCount += 2;
    NEXT(GetNext(k));
}

genfuncptr PPNumber(int k)
{
    if (k == '.') NEXT(Dot);
    if (strchr("EePp", k)) NEXT(PPNumberExp);
    if (isalnum(k)) NEXT(PPNumber);
    ++TokenCount;
    NEXT(GetNext(k));
}

genfuncptr PPNumberExp(int k)
{
    if (strchr("+-", k)) NEXT(PPNumber);
    ++TokenCount;
    NEXT(GetNext(k));
}

genfuncptr Dot(int k)
{
    if (isdigit(k)) NEXT(PPNumber);
    if (k == '.') NEXT(Dot2);
    ++TokenCount;
    NEXT(GetNext(k));
}

genfuncptr Dot2(int k)
{
    /* Three dots: ellipsis */
    ++TokenCount;
    if (k == '.') NEXT(GetNext);
    /* Two dots: two tokens (yes, it's impossible in standard C) */
    ++TokenCount;
    NEXT(GetNext(k));
}

genfuncptr String(int k)
{
    if (k == '\"') {
        ++TokenCount;
        NEXT(GetNext);
    }
    if (k == '\\') NEXT(StringEscape);
    if (k == '\n') {
        /* Whoops!  Unterminated constant!  Let it go. */
        ++TokenCount;
        NEXT(GetNext);
    }
    NEXT(String);
}

genfuncptr StringEscape(int k)
{
    NEXT(String);
}

genfuncptr CharLit(int k)
{
    if (k == '\'') {
        ++TokenCount;
        NEXT(GetNext);
    }
    if (k == '\\') NEXT(CharLitEscape);
    if (k == '\n') {
        /* Whoops!  Unterminated constant!  Let it go. */
        ++TokenCount;
        NEXT(GetNext);
    }
    NEXT(CharLit);
}

genfuncptr CharLitEscape(int k)
{
    NEXT(CharLit);
}

genfuncptr Ell(int k)
{
    /* Handle wide-character strings correctly */
    if (k == '\'') NEXT(CharLit);
    if (k == '\"') NEXT(String);
    NEXT(Ident(k));
}

genfuncptr DoubleByteOp(int k)
{
    ++TokenCount;
    if (strchr(DoubleByteOpMatch, k)) NEXT(GetNext);
    NEXT(GetNext(k));
}

genfuncptr LeftShift(int k)
{
    if (k == '<') NEXT(DOUBLEBYTEOP("="));
    NEXT(DOUBLEBYTEOP("<=:%")(k));
}

genfuncptr RightShift(int k)
{
    if (k == '>') NEXT(DOUBLEBYTEOP("="));
    NEXT(DOUBLEBYTEOP(">=")(k));
}

genfuncptr PercentOp(int k)
{
    /* Percent is used in the %: and %:%: digraphs. */
    if (k == ':') NEXT(PercentColonOp);
    NEXT(DOUBLEBYTEOP(">=")(k));
}

genfuncptr PercentColonOp(int k)
{
    if (k == '%') NEXT(PerCoPerOp);
    ++TokenCount;
    NEXT(GetNext(k));
}

genfuncptr PerCoPerOp(int k)
{
    ++TokenCount;
    if (strchr(":", k)) NEXT(GetNext);
    NEXT(DOUBLEBYTEOP(">=")(k));
}


genfuncptr GetNext(int k)
{
    /* Turn comments back on, by the way! */
    CommentsAllowed = 1;
    if (k == 'L' || k == 'l') NEXT(Ell);
    if (isIdent(k)) NEXT(Ident);
    if (isdigit(k)) NEXT(PPNumber);
    if (k == '\n') NEXT(Newline);
    if (isspace(k)) NEXT(GetNext);
    if (k == '.') NEXT(Dot);
    if (strchr("*/=!^", k)) NEXT(DOUBLEBYTEOP("="));
    if (k == '<') NEXT(LeftShift);
    if (k == '>') NEXT(RightShift);
    if (k == '+') NEXT(DOUBLEBYTEOP("+="));
    if (k == '-') NEXT(DOUBLEBYTEOP("-=>"));
    if (k == '&') NEXT(DOUBLEBYTEOP("&="));
    if (k == '|') NEXT(DOUBLEBYTEOP("|="));
    if (k == '#') NEXT(DOUBLEBYTEOP("#"));
    if (k == '%') NEXT(PercentOp);
    if (k == ':') NEXT(DOUBLEBYTEOP(">"));
    if (strchr("{}()[],;?~", k)) {
        ++TokenCount;
        NEXT(GetNext);
    }
    if (k == '\"') { CommentsAllowed = 0; NEXT(String); }
    if (k == '\'') { CommentsAllowed = 0; NEXT(CharLit); }

    /* Handle unknown characters by counting them as tokens. */
    ++TokenCount;
    NEXT(GetNext);
}



Contributed by Arthur J. O`Dwyer at andrew.cmu.edu