Answered step by step
Verified Expert Solution
Link Copied!

Question

1 Approved Answer

feel free to solve with a different approach i would like to see other ways of going at this as well. this is the assignment:

feel free to solve with a different approach i would like to see other ways of going at this as well.

this is the assignment:

Develop a C program which reads and processes an input text file like this:

 THIS IS THE FIRST TEST. 
 This is the second test. 

It retrieves information:

  • number of lines
  • number of all words
  • number of distinct non-common words
  • non-common words and their frequencies

It outputs the retrieved information to a file like this:

Line count 2

Word count 10

Keyword count 3

Keyword frequency

first 1

test 2

second 1

Specifically, write C source programs myword.h and myword.c containing the following:

  1. structure definitions:

typedef struct word {

char word[30];

int frequency;

} WORD;

typedef struct words {

WORD word_array[1000];

int line_count;

int word_count;

int keyword_count;

} WORDSUMMARY;

  1. void set_stopword(char *filename, char *stopwords[]); this functions reads stop words from the common word file by filename, and puts them in the stop word dictionary data structure as an array of 26 strings, each string holds all stop words starting with the same alphabet, separated by comma ,. The array of strings is passed parameter char *stopwords[], stopwords[i] holds the pointer of the i-th string.
  2. int contain_word(char *stopwords[], char *word); this function checks if the given word is contained in stop word dictionary char stopwords[], it returns 1 if true otherwise 0.
  3. int str_contain_word(char *str, char *word); this checks if the given *word is contained in a given string str, returns 1 if yes and 0 otherwise. For example, if str is the,this,that, word is this, then it returns 1.
  4. int process_word(char *filename, WORDSUMMARY *words, char *stopwords[]); this function opens and reads text file of name passed by *filename line by line. For each line, it gets each word, if it is not a stop word, check if it is already in array words->word_array, if yes, increases its frequency by 1, otherwise inserts it to the end of the word_array and set its frequency 1. Meantime, it updates the count information.
  5. int save_to_file(char *filename, WORDSUMMARY *words); this saves the data of WORDSUMMARY words to file of name passed by filename in specified format.

Use the provided main function program to test

main.c

#include 
#include 
#include "mystring.h"
#include "myword.h"
 
int main(int argc, char *args[]) {
 char infilename[40] = "textdata.txt"; //default input file name
 char outfilename[40] = "result.txt"; //default output file name
 char stopwordfilename[40] = "common-english-words.txt"; //default stop word file name
 if (argc > 1) {
 if (argc >= 2)
 strcpy(infilename, args[1]);
 if (argc >= 3)
 strcpy(outfilename, args[2]);
 if (argc >= 4)
 strcpy(stopwordfilename, args[3]);
 }
 //stop word dictionary as array of strings 
 char stopword_arrays[26][100] = { 0 }; 
 char *stopwords[26];
 int i;
 for (i = 0; i < 26; i++) {
 stopwords[i] = &stopword_arrays[i][0];
 }
 set_stopword(stopwordfilename, stopwords);
 printf("loading stop words done ");
 WORDSUMMARY wordsummary = {0};
 process_word(infilename, &wordsummary, stopwords);
 printf("word processing done ");
 save_to_file(outfilename, &wordsummary);
 printf("saving result to file done ");
 
 FILE *fp =fopen(outfilename, "r");
 if ( fp == NULL ) {
 printf("output file does not exist ");
 return 0;
 }
 char buf[100];
 while( !feof(fp) ) {
 fgets(buf, sizeof(buf), fp);
 printf("%s", buf);
 }
 fclose(fp);
 return 0;
}

mystring.c

#include "mystring.h"

int letter_count(char *s) {

if (s == NULL) return -1;

int counter = 0;

while (*s) {

if ( (*s >= 'a' && *s <= 'z') || (*s >= 'A' && *s <= 'Z'))

counter++;

s++;

}

return counter;

}

int word_count(char *s) {

if (s == NULL || *s == '\0') return 0;

int counter = 0;

char *p = s;

while (*p) {

if (*p != ' ' && (p==s || *(p-1) == ' ')) {

counter++;

}

p++;

}

return counter;

}

void lower_case(char *s) {

if (s == NULL) return;

while (*s) {

if (*s >= 'A' && *s <= 'Z' ) *s += 32;

s++;

}

}

void trim(char *s) {

if (s == NULL || *s == '\0') return;

char *p,*dp;

for (p=s,dp=s; *p; p++) {

if (*p != ' ' || (p > s && *(p-1) != ' ')) {

*dp = *p;

dp++;

}

}

if (*(p-1) == ' ') *(dp-1) = '\0';

else *dp = '\0';

}

mystring.h

#ifndef MYSTRING

#define MYSTRING

#include

int letter_count(char *);

int word_count(char *);

void lower_case(char *);

void trim(char *);

myword.h

#include "mystring.h"

// add descriptions to function declarations

#define MAX_WORD 30

#define MAX_LINE_LEN 1000

#define MAX_WORDS 1000

typedef struct word {

char word[MAX_WORD];

int frequency;

} WORD;

typedef struct words {

WORD word_array[MAX_WORDS];

int line_count;

int word_count;

int keyword_count;

} WORDSUMMARY;

void set_stopword(char *filename, char *stopwordsp[]);

int contain_word(char *stopwords[], char *word);

int str_contain_word(char *str, char *word);

int process_word(char *filename, WORDSUMMARY *words, char *stopwords[]);

int save_to_file(char *filename, WORDSUMMARY *words);

the code where i need help myword.c:

myword.c

#include "myword.h"

void set_stopword(char *filename, char *stopwords[])

{

char line[1000];

char delimiters[] = "., \t ";

char *token;

int i;

while (fgets(line, 1000, fb) != NULL){

token = (char*) strtok(line, delimiters);

while (word_token != NULL){

i = (int)(*word_token - 'a');

strcat(stopwords[i], word_token);

strcat(stopwords[i], ",");

token = (char*) strtok(NULL, delimiters);

}

}

}

// this function check if the word is contained in directory stopwords[]

// returns 1 if yes, 0 otherwise. It use function str_contain_word()

int contain_word(char *stopwords[], char *word)

{

if(word == NULL || *word == '\0')

return 0;

else{

return str_contain_word(stopwords[*word - 'a'], word);

}

}

// this function check if word is a word in string str,

// returns 1 if yes, 0 otherwise

int str_contain_word(char *str, char *word)

{

if (str == NULL || word == NULL) return 0;

char temp[20] = {0};

strcat(temp, ","); // use , as word seperator

strcat(temp, word);

strcat(temp, ",");

if(strstr(str, temp))

return 1;

else

return 0;

}

int process_word(char *filename, WORDSUMMARY *words, char *stopwords[])

{

const char delimiters[] = " .,;:!()&?- \t \"\'";

// your implementation

while(fgets(line, MAX_LINE_LEN, fb) != NULL){

words -> line_count++;

lower_case(line); // your function in mystring.c

trim(line); // your function in mystring.c

word_token = (char *) strtok(line, delimiters); // get the first word

while(word_token != NULL){

if(contain_word(stopwords, word_token) == 0){// pattern: not a stop word

// action: insert word_token into data structure words -> word_array[]

}

word_token = (char*) strtok(NULL, delimiters); // next word

}

}

}

int save_to_file(char *filename, WORDSUMMARY *words)

{

// your implementation

fprintf(fp, "%-20s %8d ", "Word count", words->word_count);

fprintf(fp, "%-20s %8d ", "Keyword count", words->keyword_count);

fprintf(fp, "%-18s %10s ", "Keyword", "frequency");

// your implementation

return 1;

}

Step by Step Solution

There are 3 Steps involved in it

Step: 1

blur-text-image

Get Instant Access to Expert-Tailored Solutions

See step-by-step solutions with expert insights and AI powered tools for academic success

Step: 2

blur-text-image

Step: 3

blur-text-image

Ace Your Homework with AI

Get the answers you need in no time with our AI-driven, step-by-step assistance

Get Started

Recommended Textbook for

Current Trends In Database Technology Edbt 2004 Workshops Edbt 2004 Workshops Phd Datax Pim P2panddb And Clustweb Heraklion Crete Greece March 2004 Revised Selected Papers Lncs 3268

Authors: Wolfgang Lindner ,Marco Mesiti ,Can Turker ,Yannis Tzitzikas ,Athena Vakali

2005th Edition

3540233059, 978-3540233053

More Books

Students also viewed these Databases questions