[單鏈表]統計文本中英文單詞出現次數，並輸出前200個單詞及其出現次數

原創

2020-07-06 01:55

統計文本中各英文單詞出現次數，並按詞頻逆序排列。

#define _CRT_SECURE_NO_WARNINGS
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>

#define SIZE 50

typedef struct words
{
	char str[SIZE];	//單詞最長100個
	int count;
	struct words *next;
}Words;


// 初始化鏈表
Words* InitWord()
{
	Words *headWord = (Words*)malloc(sizeof(Words));
	strcpy(headWord->str,"");
	headWord->count = 0;
	headWord->next = NULL;
	return headWord;
}

// 增加新單詞
void AddWord(Words *headWord,const char *str)//頭插入
{
	Words *pos = headWord->next;	//儲存當前第一個結構體的指針
	Words *newWord = (Words*)malloc(sizeof(Words));
	strcpy(newWord->str,str);
	headWord->next = newWord;
	if(pos == NULL)	newWord->next = NULL;//第一個單詞，儲存在鏈表尾部，next指向空
	else			newWord->next = pos;
	newWord->count=1;
}

int CheckStr(Words *headWord,const char *str)//檢查單詞是否出現過，若出現單詞數量加一，返回1，沒出現返回0
{
	Words *p = headWord->next;
	if(headWord->next == NULL)	
		{
			AddWord(headWord,str);
			return 1;
		}
	while(strcmp(p->str,str) != 0)
	{
		p = p->next;
		if(p == NULL)	break;
	}
	if(p == NULL)	return 0;//沒有該單詞，返回0標記
	else
	{
		++(p->count);	//已有單詞，數量加一
		return 1;
	}
	return -1;			//異常出錯
}

// 讀單詞，該函數每次讀取一個單詞
int ReadWord(FILE *fp,int *plen,char *str)
{
	fseek(fp, *plen, SEEK_SET);
	unsigned char ch;
	//if( feof(fp) )	//文件結束
 //   { 
 //       return 0;
 //   }
	int flg_eof = 0;
	int i=0;		// i 表示index
	while(fread(&ch,sizeof(char),1,fp) != 0)	//注：isalpha(unsigned(ch))，需要強制轉換
	{
		if(!isalpha(unsigned(ch)) && i == 0)	continue;//排除單詞 不是字母 無法讀取問題
		if(isalpha(unsigned(ch)) || ch == '\'')//類似I'm中的 ' 符號，有且只能有一個
		{
			flg_eof = 1;	// 標記是否讀過單詞
			str[i++] = ch;
		}
		else
		{
			break;
		}
	}
	if(flg_eof == 0)	return 0;	//沒有讀文件，文件結尾了
	str[i] = '\0';
	*plen = ftell(fp);	// 對於文件首的偏移字節數
	return *plen;
}

// 讀取文件，調用“讀取單詞函數”，每讀取一個單詞檢查是否需要 添加新單詞 或 計數+1
void ReadFile(Words *headWord,char *name)//文件操作，從文件中讀取單詞
{
	FILE *fp ;
	int len = 0;		//標記文件指針在文件中的位置
	if((fp= fopen(name,"r")) == NULL)
	{
		fp = fopen(name, "w+");/* 如果創建失敗，新建一個文件  w+:可讀寫
					文件存在則文件長度清爲零，文件不存在時創建一個   */
	}

	char str[SIZE];
	while(ReadWord(fp,&len,str) != 0)//fscanf(fp,"%s",str) != EOF
	{
		if(CheckStr(headWord,str) == 0)	//沒有該單詞，新增一個
		{
			AddWord(headWord,str);
		}	

	}

	fclose(fp);
}

//單詞交換順序
void SwapWord(Words *posWord,Words *posNext)
{
	// 交換單詞
	char tmpWord[SIZE]="";
	strcpy(tmpWord ,posWord->str);  
	strcpy(posWord->str, posNext->str);
	strcpy(posNext->str, tmpWord);

	// 交換對應單詞計數
	int tmpCount;
	tmpCount	   = posWord->count;
	posWord->count = posNext->count;
	posNext->count = tmpCount;

}
// 單詞排序，按計數從大到小
void WordsSort(Words *headWord)
{
	if(headWord->next == NULL)	return;
	Words *flg_word = NULL;
	int flag;
	while(flg_word != headWord->next)
	{
		flag = 0;
		Words *pos = headWord->next;
		Words *pos_next = pos->next;

		while(pos_next != NULL)
		{
			if(pos->count < pos_next->count)
			{
				SwapWord(pos,pos_next);
				flag = 1;
			}

			if(pos_next->next == flg_word)	//仿冒泡的i<n-i-1. 相當於flg_word每次減一
			{
				flg_word = pos_next;
				break;	//可有可無，while中已有條件
			}
			pos = pos->next;
			pos_next = pos_next->next;

		}
		if(flag == 0)	break;
	}

}

// 打印詞頻排行榜
void PrintWords(Words *headWord,const char *name)
{
	Words *pMove = headWord->next;
	if(headWord->next == NULL)	return;
	int count = 0;
	printf("\t<<%s>>中出現的最高頻率的單詞:\n",name);
	printf("\t單詞：\t\t\t\t\t\t出現次數：\n");
	while(pMove != NULL && count<200)
	{
		printf("\t%-50s%d\n",pMove->str,pMove->count);
		pMove = pMove->next;
		count++;
	}
	printf("\n");
}

//釋放鏈表
void free_Word(Words* headNode)
{
	Words *point = NULL;
	while(headNode != NULL)
	{
		point = headNode;//指向所釋放的空間
		headNode = headNode->next;//指針後移
		free(point);
	}
}



int main()
{
	Words *headWord = InitWord();		//初始化鏈表,創建表頭
	char bookName[][100] = {"小王子.txt","CountWords.cpp","test.txt","Harry Potter and The Half-Blood Prince.txt" };
										//當前文件*.cpp文件    //哈利波特
	ReadFile(headWord,bookName[0]);		//文件操作
	WordsSort(headWord);				//排序
	PrintWords(headWord,bookName[0]);	//打印
	free_Word(headWord);				//釋放鏈表
	return 0;
}

文件下載：
小王子.txt
鏈接：https://wwa.lanzous.com/icWhOe8z34j
Harry Potter and The Half-Blood Prince.txt
鏈接：https://wwa.lanzous.com/inix8e8z33i

發表評論

所有評論

還沒有人評論，想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.

[單鏈表]統計文本中英文單詞出現次數，並輸出前200個單詞及其出現次數

Spring Cloud 部署時如何使用 Kubernetes 作爲註冊中心和配置中心

[單鏈表]統計文本中英文單詞出現次數，並輸出前200個單詞及其出現次數

關於字符讀取，過濾回車的問題。

第 1 章：路由概念考試題——專業知識-標準分數

高級數據結構 | 創建二叉樹 —遞歸與非遞歸實現：先序中序創建、中序後序創建 ...

python學習筆記（六）——異常處理

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結