'Unexpected Values Being Read Into Memory with getline() - C++
I wrote this program for an intro to C++ course. My issue is that unexpected values are being stored in memory. I assume it has to do with input.getline() or the way certain characters are stored, but I don't know enough about what is happening "under the hood" to fix it.
Specifically, certain characters like apostrophes and quotation marks appear to not read as their hex ASCII counterparts.
I'm pretty certain the issue lies in the lines
input.getline(raw_paragraph, MAX_PARAGRAPH_CHARS);
charCount = strlen(raw_paragraph);
Below I've included the complete code, a screenshot of the Memory from Visual Studio 2022, the test case, and the program output .
Thank you in advance!
#pragma warning(disable : 4996) //DEV
/**************************************************************************************
Header Content
**************************************************************************************/
// Includes and namespaces ------------------------------------------------------------
#include <cstdlib> // Defines functions such as exit().
#include <cstring> // Defines functions such as strcmp, etc.
#include <fstream> // Supports file I/O
#include <iostream> // Supports terminal I/O
using namespace std;
// Constants Declared -----------------------------------------------------------------
// Maximum allowable space for input / Defines space for memory allocation
const int MAX_WORD_CHARS = 50; // Longest word = 50 chars
const int MAX_WORDS = 1000; // Longest paragraph = 1000 words
const int MAX_PARAGRAPH_CHARS = 50000; // 50 * 1000
// "to be" Semantics
const char TO[] = "to";
const char BE[] = "be";
const int NUM_TO_BE_VERBS = 5; // Qty of "to be verbs below
const char TO_BE_VERBS[NUM_TO_BE_VERBS][MAX_WORD_CHARS] =
{ "am", "are", "is", "was", "were" };
// Conjunctions
const int NUM_CONJUNCTIONS = 7; // Qty objects in CONJUNCTIONS below.
const char CONJUNCTIONS[NUM_CONJUNCTIONS][MAX_WORD_CHARS] =
{ "for", "and", "nor", "but", "or", "yet", "so" };
// Punctuation
const int NUM_PUNCTUATIONS = 4;
const char PUNCTUATIONS[NUM_PUNCTUATIONS] = { '.', ',', '?', '!' };
// Functions Declared -----------------------------------------------------------------
int countComplex(char a[][MAX_WORD_CHARS], int b);
int countSentences(char a[], int b);
int count_to_be_verbs(char a[][MAX_WORD_CHARS], int wc);
void init_array(char* a);
void modify_tokens(char a[][MAX_WORD_CHARS], int wc);
int tokenizeParagraph(char p[], char tp[][MAX_WORD_CHARS]);
/**************************************************************************************
Begin Main
**************************************************************************************/
int main()
{
// Format Output
cout.setf(ios::fixed);
cout.setf(ios::showpoint);
cout.precision(1);
// Create input space for user's file request
char filename[256]; // Stores the user defined filename containing plaintext
init_array(filename);
char raw_paragraph[MAX_PARAGRAPH_CHARS]; // Stores plaintext from filename
init_array(raw_paragraph);
// Declare Variables
int charCount = 0; // Number of chars contained in input file except eof.
int complex_count; // Number of complex sentences
int sentenceCount = 0; // Total number of sentences in input
int simpleSent = 0; // Number of simple sentences in input
int to_be_count; // Number of instances of "to be" verbs in input.
int wordCount = 0; // Number of words in input
double averageWordsPerSentence;
// Asks the user for the name of an input file which contains a paragraph
cout << "Enter a filename: ";
cin.getline(filename, 256);
// Try to load the file in filename:
ifstream input;
input.open(filename);
// If file does not exist, cout error then exit(1)
if (input.fail())
{
cout << "Input file " << filename << " does not exist." << endl;
cout << "Thank you for using the English Analyzer." << endl;
exit(1);
}
// If file is empty, cout "Input file _____ is empty." Then exit(1)
char c;
input.get(c);
if (input.eof())
{
cout << "File " << filename << " is empty." << endl;
cout << "Thank you for using the English Analyzer." << endl;
exit(1);
}
else
input.putback(c);
// Store plaintext from file to raw_paragraph
input.getline(raw_paragraph, MAX_PARAGRAPH_CHARS);
// Close ifstream input, will not need it again.
input.close();
// Allocate memory for the output of tokenizeParagraph
char tkn_para[MAX_WORDS][MAX_WORD_CHARS];
// Count chars
charCount = strlen(raw_paragraph);
// Tokenize paragraph, count words
wordCount = tokenizeParagraph(raw_paragraph, tkn_para);
// Count Sentences
sentenceCount = countSentences(raw_paragraph, charCount);
// Average words per sentence
averageWordsPerSentence = double(wordCount) / double(sentenceCount);
// Count Complex Sentences
complex_count = countComplex(tkn_para, wordCount);
// Calculate Simple Sentences
simpleSent = sentenceCount - complex_count;
// Count "to be" verbs
modify_tokens(tkn_para, wordCount);
to_be_count = count_to_be_verbs(tkn_para, wordCount);
// Cout results
cout << "Number of Characters: " << charCount << endl;
cout << "Number of words: " << wordCount << endl;
cout << "Number of sentences: " << sentenceCount << endl;
cout << "Average number words in a sentence: " << averageWordsPerSentence << endl;
cout << "Number of simple sentences: " << simpleSent << endl;
cout << "Number of \"to be\" verbs: " << to_be_count << endl;
}
/**************************************************************************************
Function Definitions
**************************************************************************************/
int countComplex(char a[][MAX_WORD_CHARS], int b)
{
// counter will keep the number of complex sentences found.
int counter = 0;
// For each word in tkn_para,
for (int i = 0; i < b; i++)
{
// If a comma is at the end of tkn_m,
int s = strlen(a[i]) -1;
if (a[i][s] == ',')
{
// For each word in CONJUNCTIONS
for (int x = 0; x < NUM_CONJUNCTIONS; x++)
{
// If the words match,
if (strcmp(a[i + 1], CONJUNCTIONS[x]) == 0)
{
// Increment counter
counter++;
// If a word from a has already been matched, there
// is no reason to try to compare it to more items
// from CONJUNCTIONS. Therefore,
break;
}
}
}
}
// After all iteration has been completed:
return(counter);
}
int countSentences(char a[], int b)
{
int counter = 0;
// For each char in a[]
for (int i = 0; i < b; i++)
{
// If a[i] is an end of sentence punctuation,
if (a[i] == '.' || a[i] == '?' || a[i] == '!')
// Increment counter
counter++;
}
return counter;
}
int count_to_be_verbs(char a[][MAX_WORD_CHARS], int wc)
{
int counter = 0;
// For each word in a:
for (int i = 0; i < wc; i++)
// For each word in TO_BE_VERBS:
for (int y = 0; y < NUM_TO_BE_VERBS; y++)
{
// If words match:
if (strcmp(a[i], TO_BE_VERBS[y]) == 0)
counter++;
}
// For loop checks for "to" token followed by "be"
for (int i = 0; i < wc; i++)
if (strcmp(a[i], TO) == 0 && strcmp(a[i + 1], BE) == 0)
counter++;
return(counter);
}
void init_array(char* a)
{
// For every char in a:
for (int i = 0; i < strlen(a); i++)
// Set the value of a[i] to NULL
a[i] = NULL;
}
void modify_tokens(char a[][MAX_WORD_CHARS], int wc)
{
// For each word in a:
for (int i = 0; i < wc; i++)
{
// Does the computation once instead of 4 times below.
int s = strlen(a[i]) -1;
// Converts first char if uppercase, into lowercase
if (int('@') < a[i][0] && a[i][0] < int('['))
a[i][0] = a[i][0] + 32;
// Convert last char, if punctuation mark, into NULL
if (a[i][s] == ',' || a[i][s] == '!' || a[i][s] == '?' || a[i][s] == '.')
a[i][s] = NULL;
}
}
int tokenizeParagraph(char p[], char tp[][MAX_WORD_CHARS])
{
int i = 0;
char* cPtr;
cPtr = strtok(p, " \n\t");
while (cPtr != NULL)
{
strcpy(tp[i], cPtr);
i++;
cPtr = strtok(NULL, " \n\t");
}
return(i);
}
Solution 1:[1]
Turns out that the issue has to do with copying the test case into MS Word or another like application. There are character equivalents to apostrophes and quotation marks that "lean" left or right. Those characters are actually distinct and are responsible for the memory values I've been encountering. It suggests to me that a future iteration of the code would have to parse the raw input for those types of characters and replace them.
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|---|
| Solution 1 | Undercaffinated |
