/* string_tokenizer.c
 *
 * Notes: Seems like strsep and linked list is a way to go, but then original
 *        string is still destroyed with strsep. So the idea is to work on a copy
 *        of a string and let strsep to its job. One pitfall of a linked list
 *        is that accessing data is not as flexible as with arrays
 *
 */

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <libgen.h>

void str2dArrTest(void);
void strTokenizerTestPrint(char *, const char *);
size_t strStrtokNumOfTokens(const char *, const char *);
char **strTokenize(const char *, const char *, const size_t);
void strTokenizeFree(char **);
void strTokenizeAndPrint(const char *, const char *, const size_t);
extern inline int isDelim(const char, const char *);
size_t numOfTokens(const char *, const char *);
#ifdef _WIN32
char *strsep(char**, const char*);
#endif

void areNumOfTokensFuncConsistent(const char *, const char *);
void testTokenizer(void);
void strsepTest(void);

struct tokenNode
{
    const char *tok;
    struct tokenNode *next;
};

/* this function assumes that a copy of a str buffer has already been made */
struct tokenNode *strTokenizeLinkedList(char *str, const char *delim)
{
    if (!str || !delim)
        return NULL;

    struct tokenNode *head = (struct tokenNode *) malloc(sizeof(struct tokenNode));
    if (!head )
    {
        fprintf(stderr, "strTokenizeLinkedList failed, %s:%d\n", __FILE__, __LINE__);
        perror("malloc");
        exit(-1);
    }

    char *tok;
    struct tokenNode *newNode;
    while ((tok = strsep(&str, delim)) != NULL)
    {
        newNode = (struct tokenNode *) malloc(sizeof(struct tokenNode));
        newNode->tok = tok;
        newNode->next = head;

        head = newNode;
    }

    return head;
}

#ifdef _WIN32
/* this function is insane in a good way */
char *strsep(char **stringp, const char *delim)
{
    char *result;

    if ((stringp == NULL) || (*stringp == NULL))
        return NULL;

    result = *stringp;

    while (**stringp && !strchr(delim, **stringp))
        ++*stringp;

    if (**stringp)
        *(*stringp)++ = '\0';
    else
        *stringp = NULL;

    return result;
}
#endif

void strsepTest(void)
{
    char *buffer, *words[100], *aPtr;
    int count = 0, i;

    buffer = strdup("The quick brown fox jumps over the lazy dog");
    if (!buffer)
        return;

    while ((aPtr = strsep(&buffer, " ")) && count < 100)
        words[count++] = aPtr;

    for (i = 0; i < count; i++)
        printf("%s\n", words[i]);
}

void str2dArrTest(void)
{
    char *str_arr[200];

    *str_arr = (char *) malloc(sizeof(char *) * 200);

    int i;
    for (i = 0; i < 200; i++)
        str_arr[i] = (char *) malloc(sizeof(char) * 31);

    strcpy(str_arr[0], "Hello");
    strcpy(str_arr[1], ", world!");

    printf("%s", str_arr[0]);
    printf("%s", str_arr[1]);
}

/* warning, str will get modified */
void strTokenizerTestPrint(char *str, const char *delim)
{
    char *pch;

    printf("splitting string \"%s\" into tokens: \n", str);
    pch = strtok(str, delim);

    while (pch != NULL)
    {
        printf("%s ", pch);
        pch = strtok(NULL, delim);
    }
}

size_t strStrtokNumOfTokens(const char *str, const char *delim)
{
    if (!str || !delim)
        return 0;

    /* make a copy of a string since strtok modifies it */
    char *strcopy = (char *) malloc(strlen(str) + 1);
    if (!strcopy)
    {
        fprintf(stderr, "strStrtokNumOfTokens failed, %s:%d\n", __FILE__, __LINE__);
        perror("malloc");
        exit(-1);
    }
    strcpy(strcopy, str);

    char *pch;
    size_t ret;
    pch = strtok(strcopy, delim);

    for (ret = 0; pch != NULL; ret++)
        pch = strtok(NULL, delim);

    free(strcopy);

    return ret;
}

/* returns tokenized array of strings / char pointers where the last token
 * in the array points to NULL address, this function fills the array of strings
 * with a maximum number of maxTokenSize characters if a token exceeds that limit
 */
char **strTokenize(const char *str, const char *delim, const size_t maxTokenSize)
{
    if (maxTokenSize < 1 || maxTokenSize > 100) /* keep it sane */
        return NULL;

    size_t tokensNum;
    if ((tokensNum = strStrtokNumOfTokens(str, delim)) == 0)
        return NULL;

    /* make a copy of a string since strtok modifies it */
    char *strcopy = (char *) malloc(strlen(str) + 1);
    if (!strcopy)
    {
        fprintf(stderr, "strTokenize failed, %s:%d\n", __FILE__, __LINE__);
        perror("malloc");
        exit(-1);
    }
    strcpy(strcopy, str);

    /* char *tokensArr[tokensNum + 1]; */
    char **tokensArr;
    size_t sizeOfTokenPtrArr = sizeof(char *) * (tokensNum + 1);
    tokensArr = (char **) malloc(sizeOfTokenPtrArr);
    if (!tokensArr)
    {
        fprintf(stderr, "strTokenize failed, %s:%d\n", __FILE__, __LINE__);
        perror("malloc");
        exit(-1);
    }
    memset(tokensArr, 0, sizeOfTokenPtrArr);

    int i;
    char *pch;
    for (i = 0; i < tokensNum; i++)
    {
        tokensArr[i] = (char *) malloc(sizeof(char) * (maxTokenSize + 1));
        if (!(tokensArr[i]))
        {
            fprintf(stderr, "strTokenize failed, %s:%d\n", __FILE__, __LINE__);
            perror("malloc");
            exit(-1);
        }
    }

    size_t len;
    pch = strtok(strcopy, delim);
    for (i = 0; pch != NULL; i++)
    {
        len = strlen(pch);

        if (len <= maxTokenSize)
            strcpy(tokensArr[i], pch);
        else
        {
            strncpy(tokensArr[i], pch, maxTokenSize);
            tokensArr[i][maxTokenSize] = '\0';
        }

        pch = strtok(NULL, delim);
    }

    return tokensArr;
}

void strTokenizeFree(char **tokenArr)
{
    if (!tokenArr)
        return;

    int i;
    for (i = 0; tokenArr[i] != NULL; i++)
        free(tokenArr[i]);
}

void strTokenizeAndPrint(const char *strings, const char *delim, const size_t maxTokenLen)
{
    char **tokenized;

    if ((tokenized = strTokenize(strings, delim, maxTokenLen)) == NULL)
    {
        fprintf(stderr, "strTokenizeAndPrint failed\n");
        exit(-1);
    }

    int i;
    for (i = 0; tokenized[i] != NULL; i++)
        printf("token %d: \"%s\"\n", i+1, tokenized[i]);

    strTokenizeFree(tokenized);
}

inline int isDelim(const char c, const char *delim)
{
    if (!delim)
        return 0;

    size_t delimLen = strlen(delim);
    char d;
    int i;
    for (i = 0; i < delimLen; i++)
    {
        d = delim[i];
        if (c == d)
            return 1;
    }

    return 0;
}

size_t numOfTokens(const char *str, const char *delim)
{
    if (!str || !delim)
        return 0;
    else if (str[0] == '\0')
        return 0;

    size_t numOfTokens = 0;
    int delimLast = 0;
    int delimCurr = 0;

    int i;
    delimLast = isDelim(str[0], delim);
    for (i = 1; str[i] != '\0'; i++)
    {
        delimCurr = isDelim(str[i], delim);

        if (!delimLast && delimCurr)
            numOfTokens++;

        delimLast = delimCurr;
    }

    if (!delimLast)
        numOfTokens++;

    return numOfTokens;
}

/* test */
void areNumOfTokensFuncConsistent(const char *str, const char *delim)
{
    const size_t x = strStrtokNumOfTokens(str, delim);
    const size_t y = numOfTokens(str, delim);

    if (x == y)
        printf("PASS: numOfTokens and strStrtokNumOfTokens are consistent!\n");
    else
        fprintf(stderr, "FAIL: numOfTokens and strStrtokNumOfTokens are inconsistent!\n"
                        "numOfTokens found %lu tokens, strStrtokNumOfTokens found "
                        "%lu tokens\n", y, x);
}

void testTokenizer(void)
{
    const char *strings = "You've reached the website for Arch Linux"
    ", a lightweight and flexible Linux® distribution that tries to Keep It Simple."
    "Currently we have official packages optimized for the i686 and x86-64 architectures."
    "We complement our official package sets with a community-operated package "
    "repository that grows in size and quality each and every day.";

    const char *delim = " ,.";

    strTokenizeAndPrint(strings, delim, 31);
    areNumOfTokensFuncConsistent(strings, delim);
}

int main(int argc, char **argv)
{
#if 0
    strTokenizeAndPrint("v 1//2", " /", 21);
    printf("dirname is: \"%s\", basename is: \"%s\"\n", dirname(argv[0]), basename(argv[0]));
#endif

    char str[] = "oh noes I will get modified by strsep, I'm so scared";

    struct tokenNode *head = strTokenizeLinkedList(str, " ");
    int i;
    for (i = 0; head->tok != NULL; i++)
    {
        printf("token: %s\n", head->tok);
        head = head->next;
    }


    return 0;
}