Word Count - Segmentation Fault during test runs

I am getting Segmentation Fault during my test runs. My code works fine on my local machine with sample string. Below is the function code.

#include "word_count.h"
#include <stdio.h>
#include <stdbool.h>
#include <string.h>
#include <ctype.h>

int word_count = 0;
word_count_word_t words[MAX_WORDS];

int count_words(const char *sentence, word_count_word_t *words);
void add_word(char *word, word_count_word_t *words);

int count_words(const char *sentence, word_count_word_t *words) {
    if (!sentence)
        return 0;
    char *ptr = (char *)sentence; 

    while (*ptr) {
        *ptr = tolower(*ptr);
        if (*ptr == '\'' && (char)*(ptr + 1) != 's' && !isalpha((char)*(ptr - 1)) ) {
            *ptr = ',';
        }
    }
    const char delimiters[] = "!.?:, \"\t\n\0";

    char* token = strtok((char *)sentence, delimiters);
 
    while (token != NULL) {
        if (strlen(token) > MAX_WORD_LENGTH)
        {
            return EXCESSIVE_LENGTH_WORD;
        }

        if (word_count > MAX_WORDS)
        {
            return EXCESSIVE_NUMBER_OF_WORDS;
        }
        add_word(token, words);
        token = strtok(NULL, delimiters);
    }
    return word_count;
}

void add_word(char *word, word_count_word_t *words)
{

    for (int i=0; i<MAX_WORDS && i<=word_count; i++)
    {
        if (strcmp(word, words[i].text) == 0)
        {
            words[i].count++;
            return;
        }
    }

    strcpy(words[word_count].text, word);
    words[word_count].count = 1;
    word_count < MAX_WORDS ? word_count++ : word_count;

    return;
}

Can you also show the result of the segmentation fault?

image

This function takes a pointer to a constant string (sentence).
It removes the constness by converting that const char * to a char *.
Then it modifies the string through ptr.
That invokes undefined behavior (UB) and (in this case) the program crashes.


Also:

What if the sentence starts with "'s"? At first ptr points to the beginning ('s') and accessing the byte before the string (*(ptr - 1)) invokes UB.


Also:

It the sentence is not empty this loop does not end.
I think there’s a ++ptr missing.

Thanks for the feedback, and I changed the code, so now I am not getting Segmentation Fault, but test is getting wrong unique_words, see screenshot below from Test Framework and from my local machine:

#include <stdio.h>
#include <stdbool.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>

int unique_words  = 0;
word_count_word_t words[MAX_WORDS];

int count_words(const char *sentence, word_count_word_t *words);
void add_word(char *word, word_count_word_t *words);

int count_words(const char *sentence, word_count_word_t *words)
{
    if (!sentence)
        return 0;

    char *ptr1 = (char *)malloc(strlen(sentence)+1);
    char *ptr2 = ptr1;
    memcpy(ptr1,sentence,strlen(sentence)+1);
    while (*ptr1)
    {
        *ptr1 = tolower(*ptr1);
        if (*ptr1 == '\'' && (char)*(ptr1 + 1) != 's' && !isalpha((char)*(ptr1 - 1)) )
        {
            *ptr1 = ',';
        }
        ptr1++;
    }
    const char delimiters[] = " @$%^&!.?:,\"\t\n\r\0";
    

    for (char *token = strtok(ptr2, delimiters); token != NULL; token = strtok(NULL, delimiters))
    {
        if (strlen(token) > MAX_WORD_LENGTH)
        {
            return EXCESSIVE_LENGTH_WORD;
        }

        if (unique_words > MAX_WORDS)
        {
            return EXCESSIVE_NUMBER_OF_WORDS;
        }
        add_word(token, words);
    }
    return unique_words;
}

void add_word(char *word, word_count_word_t *words)
{
    for (int i=0; i<unique_words; i++)
    {
        if (strcmp(word, words[i].text) == 0)
        {
            words[i].count++;
            return;
        }
    }
    strcpy(words[unique_words].text, word);
    words[unique_words].count = 1;
    unique_words < MAX_WORDS ? unique_words++ : unique_words;

    return;
}

image

There are several issues at play.

Print the current word and the value of unique_words in the function add_words().

That should give you an idea of two or three of the issues.

Below is the output of before and after update, and it looks ok to me. What/Where is the issue. In my local run it works fine. I have pasted my code from CodeBlocks.
one fish two fish red fish blue fish
Word to update: one
Before Update
unique_words: 0
After Update
unique_words: 1
0: 1, one
============
Word to update: fish
Before Update
unique_words: 1
0: 1, one
After Update
unique_words: 2
0: 1, one
1: 1, fish
============
Word to update: two
Before Update
unique_words: 2
0: 1, one
1: 1, fish
After Update
unique_words: 3
0: 1, one
1: 1, fish
2: 1, two
============
Word to update: fish
Before Update
unique_words: 3
0: 1, one
1: 1, fish
2: 1, two
After Update
unique_words: 3
0: 1, one
1: 2, fish
2: 1, two
============
Word to update: red
Before Update
unique_words: 3
0: 1, one
1: 2, fish
2: 1, two
After Update
unique_words: 4
0: 1, one
1: 2, fish
2: 1, two
3: 1, red
============
Word to update: fish
Before Update
unique_words: 4
0: 1, one
1: 2, fish
2: 1, two
3: 1, red
After Update
unique_words: 4
0: 1, one
1: 3, fish
2: 1, two
3: 1, red
============
Word to update: blue
Before Update
unique_words: 4
0: 1, one
1: 3, fish
2: 1, two
3: 1, red
After Update
unique_words: 5
0: 1, one
1: 3, fish
2: 1, two
3: 1, red
4: 1, blue
============
Word to update: fish
Before Update
unique_words: 5
0: 1, one
1: 3, fish
2: 1, two
3: 1, red
4: 1, blue
After Update
unique_words: 5
0: 1, one
1: 4, fish
2: 1, two
3: 1, red
4: 1, blue
============

#include <stdio.h>
#include <stdbool.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>

#define MAX_WORDS       20   // at most MAX_WORDS can be found in the test input string
#define MAX_WORD_LENGTH 50   // no individual word can exceed this length

// results structure
typedef struct word_count_word
{
    char text[MAX_WORD_LENGTH +
              1];   // allow for the string to be null-terminated
    int count;
} word_count_word_t;

#define EXCESSIVE_LENGTH_WORD -1
#define EXCESSIVE_NUMBER_OF_WORDS -2

int unique_words = 0;
word_count_word_t words[MAX_WORDS];

int count_words(const char *sentence, word_count_word_t *words);
void add_word(char *word, word_count_word_t *words);
void print_words(word_count_word_t *words);

int count_words(const char *sentence, word_count_word_t *words)
{
    if (!sentence)
        return 0;

    char *ptr1 = (char *)malloc(strlen(sentence)+1);
    char *ptr2 = ptr1;
    memcpy(ptr1,sentence,strlen(sentence)+1);
    while (*ptr1)
    {
        *ptr1 = tolower(*ptr1);
        if (*ptr1 == '\'' && (char)*(ptr1 + 1) != 's' && !isalpha((char)*(ptr1 - 1)) )
        {
            *ptr1 = ',';
        }
        printf("%c", (char)*ptr1);
        ptr1++;
    }
    printf("\n%s\n", sentence);

    const char delimiters[] = " @$%^&!.?:,\"\t\n\r\0";

    for (char *word = strtok(ptr2, delimiters); word != NULL; word = strtok(NULL, delimiters))
    {
        if (strlen(word) > MAX_WORD_LENGTH)
        {
            return EXCESSIVE_LENGTH_WORD;
        }

        if (unique_words > MAX_WORDS)
        {
            return EXCESSIVE_NUMBER_OF_WORDS;
        }
        
        printf("Word to update: %s\n", word);
        printf("Before Update\n");
        printf("unique_words: %d\n", unique_words);
        print_words(words);

        add_word(word, words);
        
        printf("After Update\n");
        printf("unique_words: %d\n", unique_words);
        print_words(words);
        printf("============\n");
    }

    return unique_words;
}

void add_word(char *word, word_count_word_t *words)
{
    for (int i=0; i<unique_words; i++)
    {
        if (strcmp(word, words[i].text) == 0)
        {
            words[i].count++;

            return;
        }
    }

    strcpy(words[unique_words].text, word);
    words[unique_words].count = 1;
//    unique_words < MAX_WORDS ? unique_words++ : unique_words;
    unique_words++;

    return;
}

void print_words(word_count_word_t *words)
{
    for (int i=0; i<unique_words; i++)
    {
        printf("%d: %d, %s\n", i, words[i].count, words[i].text);
    }
}

int main()
{

//    char phrase[] = "\"That's the password: 'PASSWORD 123'!\", cried the Special Agent.\nSo I fled.";
    char phrase[] = "one fish two fish red fish blue fish";

    printf("unique words: %d\n", count_words(phrase, (word_count_word_t *)&words));
    print_words(words);
    return 0;
}

This is a stand-alone program that runs count_words() exactly once.
Try printing the current word and the current value of unique_words (and only those) in add_word(), and do that in the online editor.

You can also run make memcheck to check for issues related to pointers and memory. In this case I see this output.

$ CC=clang-17 make memcheck
Compiling memcheck
AddressSanitizer:DEADLYSIGNAL
=================================================================
==717360==ERROR: AddressSanitizer: SEGV on unknown address 0x555ce705e9c0 (pc 0x555ce7049e9d bp 0x7ffc1565d060 sp 0x7ffc1565cf00 T0)
==717360==The signal is caused by a WRITE memory access.
    #0 0x555ce7049e9d in count_words /home/sudhackar/.exercism/c/word-count/./word_count.c:19:14
    #1 0x555ce70498cb in test_count_one_word /home/sudhackar/.exercism/c/word-count/./test_word_count.c:68:24
    #2 0x555ce7049466 in UnityDefaultTestRun /home/sudhackar/.exercism/c/word-count/test-framework/unity.c:1837:9
    #3 0x555ce70496a2 in main /home/sudhackar/.exercism/c/word-count/./test_word_count.c:488:4
    #4 0x7f5943a280cf in __libc_start_call_main csu/../sysdeps/nptl/libc_start_call_main.h:58:16
    #5 0x7f5943a28188 in __libc_start_main csu/../csu/libc-start.c:360:3
    #6 0x555ce6f6a324 in _start (/home/sudhackar/.exercism/c/word-count/memcheck.out+0x2d324) (BuildId: b14ed48eb4b47ef7b65d24c9332bb982fc64f3f5)

AddressSanitizer can not provide additional info.
SUMMARY: AddressSanitizer: SEGV /home/sudhackar/.exercism/c/word-count/./word_count.c:19:14 in count_words
==717360==ABORTING
make: *** [makefile:28: memcheck] Error 1