統計文本中各英文單詞出現次數,並按詞頻逆序排列。
#define _CRT_SECURE_NO_WARNINGS
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#define SIZE 50
typedef struct words
{
char str[SIZE]; //單詞最長100個
int count;
struct words *next;
}Words;
// 初始化鏈表
Words* InitWord()
{
Words *headWord = (Words*)malloc(sizeof(Words));
strcpy(headWord->str,"");
headWord->count = 0;
headWord->next = NULL;
return headWord;
}
// 增加新單詞
void AddWord(Words *headWord,const char *str)//頭插入
{
Words *pos = headWord->next; //儲存當前第一個結構體的指針
Words *newWord = (Words*)malloc(sizeof(Words));
strcpy(newWord->str,str);
headWord->next = newWord;
if(pos == NULL) newWord->next = NULL;//第一個單詞,儲存在鏈表尾部,next指向空
else newWord->next = pos;
newWord->count=1;
}
int CheckStr(Words *headWord,const char *str)//檢查單詞是否出現過,若出現單詞數量加一,返回1,沒出現返回0
{
Words *p = headWord->next;
if(headWord->next == NULL)
{
AddWord(headWord,str);
return 1;
}
while(strcmp(p->str,str) != 0)
{
p = p->next;
if(p == NULL) break;
}
if(p == NULL) return 0;//沒有該單詞,返回0標記
else
{
++(p->count); //已有單詞,數量加一
return 1;
}
return -1; //異常出錯
}
// 讀單詞,該函數每次讀取一個單詞
int ReadWord(FILE *fp,int *plen,char *str)
{
fseek(fp, *plen, SEEK_SET);
unsigned char ch;
//if( feof(fp) ) //文件結束
// {
// return 0;
// }
int flg_eof = 0;
int i=0; // i 表示index
while(fread(&ch,sizeof(char),1,fp) != 0) //注:isalpha(unsigned(ch)),需要強制轉換
{
if(!isalpha(unsigned(ch)) && i == 0) continue;//排除單詞 不是字母 無法讀取問題
if(isalpha(unsigned(ch)) || ch == '\'')//類似I'm中的 ' 符號,有且只能有一個
{
flg_eof = 1; // 標記是否讀過單詞
str[i++] = ch;
}
else
{
break;
}
}
if(flg_eof == 0) return 0; //沒有讀文件,文件結尾了
str[i] = '\0';
*plen = ftell(fp); // 對於文件首的偏移字節數
return *plen;
}
// 讀取文件,調用“讀取單詞函數”,每讀取一個單詞檢查是否需要 添加新單詞 或 計數+1
void ReadFile(Words *headWord,char *name)//文件操作,從文件中讀取單詞
{
FILE *fp ;
int len = 0; //標記文件指針在文件中的位置
if((fp= fopen(name,"r")) == NULL)
{
fp = fopen(name, "w+");/* 如果創建失敗,新建一個文件 w+:可讀寫
文件存在則文件長度清爲零,文件不存在時創建一個 */
}
char str[SIZE];
while(ReadWord(fp,&len,str) != 0)//fscanf(fp,"%s",str) != EOF
{
if(CheckStr(headWord,str) == 0) //沒有該單詞,新增一個
{
AddWord(headWord,str);
}
}
fclose(fp);
}
//單詞交換順序
void SwapWord(Words *posWord,Words *posNext)
{
// 交換單詞
char tmpWord[SIZE]="";
strcpy(tmpWord ,posWord->str);
strcpy(posWord->str, posNext->str);
strcpy(posNext->str, tmpWord);
// 交換對應單詞計數
int tmpCount;
tmpCount = posWord->count;
posWord->count = posNext->count;
posNext->count = tmpCount;
}
// 單詞排序,按計數從大到小
void WordsSort(Words *headWord)
{
if(headWord->next == NULL) return;
Words *flg_word = NULL;
int flag;
while(flg_word != headWord->next)
{
flag = 0;
Words *pos = headWord->next;
Words *pos_next = pos->next;
while(pos_next != NULL)
{
if(pos->count < pos_next->count)
{
SwapWord(pos,pos_next);
flag = 1;
}
if(pos_next->next == flg_word) //仿冒泡的i<n-i-1. 相當於flg_word每次減一
{
flg_word = pos_next;
break; //可有可無,while中已有條件
}
pos = pos->next;
pos_next = pos_next->next;
}
if(flag == 0) break;
}
}
// 打印詞頻排行榜
void PrintWords(Words *headWord,const char *name)
{
Words *pMove = headWord->next;
if(headWord->next == NULL) return;
int count = 0;
printf("\t<<%s>>中出現的最高頻率的單詞:\n",name);
printf("\t單詞:\t\t\t\t\t\t出現次數:\n");
while(pMove != NULL && count<200)
{
printf("\t%-50s%d\n",pMove->str,pMove->count);
pMove = pMove->next;
count++;
}
printf("\n");
}
//釋放鏈表
void free_Word(Words* headNode)
{
Words *point = NULL;
while(headNode != NULL)
{
point = headNode;//指向所釋放的空間
headNode = headNode->next;//指針後移
free(point);
}
}
int main()
{
Words *headWord = InitWord(); //初始化鏈表,創建表頭
char bookName[][100] = {"小王子.txt","CountWords.cpp","test.txt","Harry Potter and The Half-Blood Prince.txt" };
//當前文件*.cpp文件 //哈利波特
ReadFile(headWord,bookName[0]); //文件操作
WordsSort(headWord); //排序
PrintWords(headWord,bookName[0]); //打印
free_Word(headWord); //釋放鏈表
return 0;
}
文件下載:
小王子.txt
鏈接:https://wwa.lanzous.com/icWhOe8z34j
Harry Potter and The Half-Blood Prince.txt
鏈接:https://wwa.lanzous.com/inix8e8z33i