'how to make an file word counter in C
I need to make a program in C that accesses some files and retrieves a set of keywords and stores them in a list, then for each keyword it would loop through a set of emails and count how many times each keyword appeared PER email
#note i'm not allowed to use global variables
my code's kind of a mess as it is, but here it is:
#define _GNU_SOURCE
#include<stdio.h>
#include<string.h>
#include<dirent.h>
#include <stdlib.h>
#define MAXKEYWORDLENGTH 64
typedef struct {
char keyword[MAXKEYWORDLENGTH];
int keywordCount;
int stdev;
} keywordData;
typedef struct {
int emailCount;
} emailData;
int fetchKeywordNumber(const char* filename)
{
int keywordNumber;
FILE* keywords = fopen(filename, "r");
fscanf(keywords,"%d", &keywordNumber);
fclose(keywords);
return keywordNumber;
}
keywordData *fetchKeywords(const char* filename)
{
FILE* keywords = fopen(filename, "r");
keywordData *kd;
// accessing first line of the file which has the number of keywords
int keywordNumber;
keywordNumber = fetchKeywordNumber(filename);
// first number in the file is the number of keywords in the file, so i dont need to count them
if(keywords)
if(fscanf(keywords,"%d", &keywordNumber) != 1) { /* error handling*/}
// printf("%d\n", keywordNumber);
kd = malloc(keywordNumber * sizeof(*kd));
if(kd)
for(int i = 0; i < keywordNumber; i++)
{
fscanf(keywords,"%s", kd[i].keyword);
// printf("%s\n", kd[i].keyword);
}
if(keywords) fclose(keywords);
return kd;
}
int countWord(const char* filename, char wordname, int wordIndex)
{
FILE* email = fopen(filename, "r");
int keywordCount = 0;
fseek(email, 0, SEEK_END);
long emailSize = ftell(email);
fseek(email, 0, SEEK_SET); // same as rewind(f);
char *string = malloc(emailSize + 1);
fread(string, emailSize, 1, email);
fclose(email);
string[emailSize] = 0;
char *bodyPos = strstr(string, "Body:");
const char *next = bodyPos;
while ((next = strcasestr(next, wordname)) != NULL)
{
keywordCount++;
next++;
printf("Found a keyword: %s, occurence number: %d\n", kd[i].keyword, kd[i].keywordCount);
}
return keywordCount;
}
keywordData *emailLoop(char wordName, int wordIndex)
{
keywordData* kd;
kd = malloc((wordIndex + 1) * sizeof(keywordData));
struct dirent *de; // Pointer for directory entry
DIR *dr = opendir("/home/student/bitdefender-challenge/check_baliza/data/emails/");
if (dr == NULL) // opendir returns NULL if couldn't open directory
{
printf("Could not open current directory" );
return 0;
}
while ((de = readdir(dr)) != NULL)
{
if(!(strcmp(de->d_name, ".")) || !(strcmp(de->d_name, "..")))
continue;
char directory[256];
sprintf(directory, "%s%s", "/home/student/bitdefender-challenge/check_baliza/data/emails/", de->d_name);
kd[wordIndex].keywordCount = countWord(directory, wordName, wordIndex);
}
closedir(dr);
return kd;
}
int main()
{
char* keywordFilename = "/home/student/bitdefender-challenge/check_baliza/data/keywords";
char* emailFilename = "/home/student/bitdefender-challenge/check_baliza/data/emails/40";
// keywordData* result = readEmail(emailFilename, fetchKeywords(keywordFilename));
int keywordCount = fetchKeywordNumber(keywordFilename);
keywordData *result = fetchKeywords(keywordFilename);
for(int i = 0; i < keywordCount; i++)
{
emailLoop(result[i].keyword, i);
}
return 0;
}
i'd like to receive some advice on how to make this count the number of occurences for each word per each email and the standard deviation for each keyword my output would need to be like this:
<word> <total occurence number found in all emails> <standard deviation>
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|
