Ich schrieb ein Programm, das tut, was ich denke, dass Sie suchen. Ich änderte die structs Ich dachte an vor:
typedef node node_t;
struct node {
char *word;
int num_lines;
int *location;
int *frequency;
node_t *next;
};
diese Weise können die Knoten Zeiger auf Arrays von int
enthalten den Ort und die Frequenzinformationen zu speichern. Knoten und Speicher für die Wortstrings, Standortarrays und Frequenzarrays werden alle dynamisch zugewiesen. Hier ist der Code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define MAXLINE 1000
#define MAXWORD 30
typedef struct node node_t;
struct node {
char *word;
int num_lines;
int *location;
int *frequency;
node_t *next;
};
void strip(char *pln);
void normalize_word(char *pstr);
struct node * update_word(char *pwd, int lnum, struct node *phead);
struct node * find_in_list(char *pwd, struct node *phead);
int find_line_pair(int lnum, struct node *pwn);
int list_len(struct node *phead);
int num_pairs(struct node *phead);
int main(int argc, char *argv[])
{
FILE *fp;
struct node *head, *current;
char *pline, *pword;
char line[MAXLINE + 1];
char word[MAXWORD + 1];
int i, n, line_count = 0;
head = NULL;
if (argc < 2) {
fprintf(stderr, "Usage: %s filename\n", argv[0]);
exit(EXIT_FAILURE);
} else {
if ((fp = fopen(argv[1], "r")) == NULL) {
fprintf(stderr, "Unable to open file %s\n", argv[1]);
exit(EXIT_FAILURE);
}
}
/* Read in lines and process words */
pline = line;
pword = word;
while (fgets(pline, MAXLINE, fp) != NULL) {
++line_count;
strip(pline);
while ((pword = strtok(pline, " ")) != NULL) {
normalize_word(pword);
if (*pword != '\0') // don't add empty words
head = update_word(pword, line_count, head);
pline = NULL;
}
pline = line;
}
/* Display list contents */
printf("There are %d words in the text file,\n",
list_len(head));
printf("and %d pairs of (line, occurrences)\n",
num_pairs(head));
printf("Word: pairs\n");
current = head;
while (current != NULL) {
n = current->num_lines;
printf("%s:", current->word);
for (i = 0; i < n; i++) {
printf(" %d, %d;",
current->location[i], current->frequency[i]);
}
putchar('\n');
current = current->next;
}
/* Cleanup */
// close file
if (fclose(fp) != 0)
fprintf(stderr, "Error closing file %s\n", argv[1]);
// free all allocated memory
current = head;
while (current != NULL) {
free(current->word);
free(current->location);
free(current->frequency);
current = current->next;
free(head);
head = current;
}
return 0;
}
/* Remove trailing newlines */
void strip(char *pln)
{
while (*pln != '\0') {
if (*pln == '\n')
*pln = '\0';
++pln;
}
}
/* Convert word to lowercase and remove trailing
* non-alphanumeric characters */
void normalize_word(char *pstr)
{
int i = 0;
char ch;
while ((ch = pstr[i]) != '\0') {
pstr[i] = tolower(ch);
++i;
}
while ((--i >= 0) && !isalnum(pstr[i])) {
pstr[i] = '\0';
continue;
}
}
/* Update existing word node or create a new one, and return
* a pointer to the head of the list */
struct node * update_word(char *pwd, int lnum, struct node *phead)
{
struct node *found, *newnode;
char *pword;
int *ploc, *pfreq;
int index;
/* Modify existing node if word is in list */
if ((found = find_in_list(pwd, phead)) != NULL) {
// add new (location, freq) pair if word not in found line
if ((index = find_line_pair(lnum, found)) == -1) {
index = found->num_lines; // index for new pair
found->num_lines += 1; // increment number of lines
ploc = realloc(found->location, (index + 1) * sizeof(int));
pfreq = realloc(found->frequency, (index + 1) * sizeof(int));
ploc[index] = lnum; // new location
pfreq[index] = 1; // found once in this line so far
found->location = ploc; // point to new location array
found->frequency = pfreq; // point to new frequency array
}
else { // update frequency in existing line
found->frequency[index] += 1;
}
/* Set up a new node */
} else {
// allocate memory for new node
newnode = malloc(sizeof(struct node));
// allocate memory for string pointed to from node
pword = malloc((strlen (pwd) + 1) * sizeof(char));
strcpy(pword, pwd);
newnode->word = pword; // set word pointer
newnode->num_lines = 1; // only one line so far
ploc = malloc(sizeof(int));
pfreq = malloc(sizeof(int));
*ploc = lnum; // location was passed by caller
*pfreq = 1; // only one occurrence so far
newnode->location = ploc;
newnode->frequency = pfreq;
if (phead == NULL) { // if wordlist is empty
newnode->next = NULL; // only/last link in the list
phead = newnode; // newnode is the head
} else {
newnode->next = phead; // insert newnode at front of list
phead = newnode;
}
}
return phead;
}
/* Return pointer to node containing word, or NULL */
struct node * find_in_list(char *pwd, struct node *phead)
{
struct node *current = phead;
while (current != NULL) {
if (strcmp(current->word, pwd) == 0)
return current; // word already in list
current = current->next;
}
return NULL; // word not found
}
/* Return index of existing line location, or -1 */
int find_line_pair(int lnum, struct node *pwn)
{
int n = pwn->num_lines;
int index = 0;
while (index < n) {
if (pwn->location[index] == lnum)
return index; // word already found in this line
++index;
}
return -1; // word not yet found in this line
}
/* Find number of nodes in linked list */
int list_len(struct node *phead)
{
int length = 0;
struct node *current = phead;
while (current != NULL) {
++length;
current = current->next;
}
return length;
}
/* Find number of (line, occurrence) pairs */
int num_pairs(struct node *phead)
{
int num = 0;
struct node *current = phead;
while (current != NULL) {
num += current->num_lines;
current = current->next;
}
return num;
}
Hinweis: ich dies aus der vorherigen Version in der update_word()
Funktion geändert.Der ursprüngliche Code hat am Ende der Liste einen neuen Knoten eingefügt, sodass die resultierende Liste Wörter in der Reihenfolge ihres ersten Auftretens im Eingabetext enthielt. Diese Version fügt einen neuen Knoten am Anfang der Liste ein, so dass die resultierende Liste Wörter in umgekehrter Reihenfolge ihrer ersten Erscheinung enthält. Dies beschleunigt Knoten Einfügen und vereinfacht den Knoten-Platzhalter-Code aus:
current = phead;
while (current->next != NULL) // find tail
current = current->next;
current->next = newnode; // add newnode to end
zu:
newnode->next = phead; // insert newnode at front of list
habe ich keinen Zweifel, dass der Code verbessert werden kann, aber das zu funktionieren scheint. Ich würde nicht sagen, dass das genau so einfach ist, aber relativ einfach. Ich lief es gegen diese Textdatei:
Three blind mice. Three blind mice.
See how they run. See how they run.
They all ran after the farmer's wife,
Who cut off their tails with a carving knife,
Did you ever see such a sight in your life,
As three blind mice?
Hier sind die Ergebnisse:
There are 31 words in the text file,
and 37 pairs of (line, occurrences)
Word: pairs
as: 6, 1;
life: 5, 1;
your: 5, 1;
in: 5, 1;
sight: 5, 1;
such: 5, 1;
ever: 5, 1;
you: 5, 1;
did: 5, 1;
knife: 4, 1;
carving: 4, 1;
a: 4, 1; 5, 1;
with: 4, 1;
tails: 4, 1;
their: 4, 1;
off: 4, 1;
cut: 4, 1;
who: 4, 1;
wife: 3, 1;
farmer's: 3, 1;
the: 3, 1;
after: 3, 1;
ran: 3, 1;
all: 3, 1;
run: 2, 2;
they: 2, 2; 3, 1;
how: 2, 2;
see: 2, 2; 5, 1;
mice: 1, 2; 6, 1;
blind: 1, 2; 6, 1;
three: 1, 2; 6, 1;
Es tut mir leid, wenn meine Frage keinen Sinn ergibt, ich versuche es jetzt klarer zu machen. – RoadRunner
Sie können die (nicht so) brandneue Dokumentation hier versuchen: http://stackoverflow.com/documentation/c/560/linked-lists#t=20160927151829078285 – deamentiaemundi
Danke @deamentiaemundi, yeah nur aus dem Lesen, dass ich fühle Zweifach verknüpfte Listen wären für diese Art von Problem am besten geeignet. – RoadRunner