Answered step by step
Verified Expert Solution
Link Copied!

Question

1 Approved Answer

Develop a C program to read and process an input text file, and retrieve the following information and save to file. number of lines number

Develop a C program to read and process an input text file, and retrieve the following information and save to file.

number of lines

number of totoal words

number of distinct words

distinct words and their frequencies

Specifically, write C programs myword.h , containing the following structure definitions and function headers, and myword.c , containing the implementations of the functions. It is required to use your str_trim() and lower_case() functions from Q1, and use some other string library functions such as strtok(), strcmp() and strcpy() in the implementation.

structure definitions:

1. typedef struct word { char word[MAX_WORD]; int frequency; } WORD; typedef struct words { int line_count; int total_word_count; int distinct_word_count; } WORDSTATS;

2. int process_word(char *filename, WORD *words, WORDSTATS *wordstats)opens and reads text file of name passed by filename line by line. For each line, it gets each word, check if it is already in WORD array words, if yes, increases its frequency by 1, otherwise inserts it to the end of the WORD array words and set its frequency 1, and update total_word_count and distinct_word_count. It returns 0 if operation is successful, 1 otherwise.

3. int save_file(char *filename, WORD *words, WORDSTATS *wordstats) saves the word processed data pass words and wordstats to file of filename.

Use the provided main program myword_main.c to test your program with this input text file textdata.txt. The output file and format should be like word_report.txt, the screen output is like the following.

What's Given:

myword_main.c:

/* -------------------------------------------------- Project: cp264-a3q2 File: myword_main.c, a public test driver Author: HBF Version: 2023-01-26 -------------------------------------------------- */ #include #include #include "mystring.h" #include "myword.h"

void display_wordstats_words(WORDSTATS *wordstats, WORD *words); void read_display_file(char *filename);

int main(int argc, char *args[]) { char infilename[40] = "textdata.txt"; //default input file name char outfilename[40] = "word_report.txt"; //default output file name

if (argc > 1) { if (argc >= 2) strcpy(infilename, args[1]); if (argc >= 3) strcpy(outfilename, args[2]); }

WORD words[MAX_WORDS] = {0}; WORDSTATS wordstats = {0}; process_word(infilename, words, &wordstats); display_wordstats_words(&wordstats, words);

save_file(outfilename, words, &wordstats);

read_display_file(outfilename);

return 0; }

void display_wordstats_words(WORDSTATS *wordstats, WORD *words) { printf(" %s:%s ","word stats", "value"); printf("%s:%d ", "line count", wordstats->line_count); printf("%s:%d ", "total word count", wordstats->total_word_count); printf("%s:%d ", "distinct word count", wordstats->distinct_word_count);

printf(" %s:%s ","distinct words", "frequency"); int i; for (i = 0; i < wordstats->distinct_word_count; i++) { printf("%s:%d ", words[i].word, words[i].frequency); } }

void read_display_file(char *filename) { FILE *fp = fopen(filename, "r"); if (fp == NULL) printf(" no file "); else printf(" file contents "); char line[1000]; while (fgets(line, 1000, fp) != NULL) { printf("%s", line); } fclose(fp); }

textdata.txt:

 THIS IS THE FIRST TEST. This is the second test. CP264 Data Structures

word_report.txt

word stats:value line count:3 total word count:13 distinct word count:9 distinct words:frequency this:2 is:2 the:2 first:1 test:2 second:1 cp264:1 data:1 structures:1

Public Test:

word stats:value line count:3 total word count:13 distinct word count:9 distinct words:frequency this:2 is:2 the:2 first:1 test:2 second:1 cp264:1 data:1 structures:1 file contents word stats:value line count:3 total word count:13 distinct word count:9 distinct words:frequency this:2 is:2 the:2 first:1 test:2 second:1 cp264:1 data:1 structures:1

Expert Answer

This solution was written by a subject matter expert. It's designed to help students like you learn core concepts.

Step-by-step

1st step

All steps

Answer only

Step 1/2

Here is an example implementation of the myword.h and myword.c files:

Implementation for myword.h:

#ifndef MYWORD_H

#define MYWORD_H

#include "mystring.h"

#define MAX_WORD 20

#define MAX_WORDS 1000

typedef struct word {

char word[MAX_WORD];

int frequency;

} WORD;

typedef struct words {

int line_count;

int total_word_count;

int distinct_word_count;

} WORDSTATS;

int process_word(char *filename, WORD *words, WORDSTATS *wordstats);

int save_file(char *filename, WORD *words, WORDSTATS *wordstats);

#endif

Explanationfor step 1

The program defines a header file "myword.h" for counting and storing words in a text file.

The header file includes another header file "mystring.h".

MAX_WORD and MAX_WORDS constants are defined with a value of 20 and 1000 respectively.

The "WORD" structure is defined to store a single word and its frequency in the text file.

The "WORDSTATS" structure is defined to store the line count, total word count, and distinct word count in the text file.

The "process_word" function takes a filename, a pointer to an array of "WORD" structures, and a pointer to a "WORDSTATS" structure as arguments, and returns an integer.

The "save_file" function takes a filename, a pointer to an array of "WORD" structures, and a pointer to a "WORDSTATS" structure as arguments and returns an integer.

Step 2/2

Implementation for myword.c:

#include

#include

#include

#include "mystring.h"

#include "myword.h"

#define MAX_LINE 1000

int process_word(char *filename, WORD *words, WORDSTATS *wordstats) {

// Open the file

FILE *fp = fopen(filename, "r");

if (fp == NULL) {

return 1;

}

// Read the file line by line

char line[MAX_LINE];

while (fgets(line, MAX_LINE, fp) != NULL) {

// Increment line count

wordstats->line_count++;

// Tokenize the line

char *token = strtok(line, " \t ");

while (token != NULL) {

// Trim and lowercase the token

str_trim(token);

lower_case(token);

// Check if the word is already in the array

int found = 0;

int i;

for (i = 0; i < wordstats->distinct_word_count; i++) {

if (strcmp(token, words[i].word) == 0) {

// Increase frequency

words[i].frequency++;

found = 1;

break;

}

}

if (!found) {

// Add the word to the end of the array

strcpy(words[wordstats->distinct_word_count].word, token);

words[wordstats->distinct_word_count].frequency = 1;

wordstats->distinct_word_count++;

}

// Increment total word count

wordstats->total_word_count++;

// Get next token

token = strtok(NULL, " \t ");

}

}

fclose(fp);

return 0;

}

int save_file(char *filename, WORD *words, WORDSTATS *wordstats) {

// Open the file

FILE *fp = fopen(filename, "w");

if (fp == NULL) {

return 1;

}

// Write word stats to the file

fprintf(fp, "word stats:value ");

fprintf(fp, "line count:%d ", wordstats->line_count);

fprintf(fp, "total word count:%d ", wordstats->total_word_count);

fprintf(fp, "distinct word count:%d ", wordstats->distinct_word_count);

fprintf(fp, " distinct words:frequency ");

// Write words and their frequencies to the file

int i;

for (i = 0; i < wordstats->distinct_word_count; i++) {

fprintf(fp, "%s:%d ", words[i].word, words[i].frequency);

}

fclose(fp);

return 0;

}

Step by Step Solution

There are 3 Steps involved in it

Step: 1

blur-text-image

Get Instant Access to Expert-Tailored Solutions

See step-by-step solutions with expert insights and AI powered tools for academic success

Step: 2

blur-text-image

Step: 3

blur-text-image

Ace Your Homework with AI

Get the answers you need in no time with our AI-driven, step-by-step assistance

Get Started

Recommended Textbook for

Visual C# And Databases

Authors: Philip Conrod, Lou Tylee

16th Edition

1951077083, 978-1951077082

More Books

Students also viewed these Databases questions

Question

What are the salient product features of CFD?

Answered: 1 week ago