#pragma once
#ifndef IsHz
#define IsHz(x) (((x)>=0x81 && (x)<=0xA0)||((x)>=0xAA && (x)<=0xFE))
#define HzCode(x) ((((x)&0x7F)<<8)+((x)>>8))
#endif
#ifdef _DEBUG
#define _IndexBuffer_Size 0x80
#else
#define _IndexBuffer_Size 0x1000
#endif
class IndexMaker
{
private:
struct IndexMakerBuf //詞庫緩衝數組
{
char nWords[3]; //[WordMaxLen];
unsigned long _Last_FileNum;
unsigned long * _Data;
unsigned long * DataPtr;
unsigned long * DataEpr;
} _WordsIndex[0x8000];
unsigned long *_Diskbuf;
unsigned long _filenum; //文件號
char _mOutput_Path[512]; //索引輸出路徑
char* _mOutPath_EndPtr; //指向索引路徑末端
public:
long __Error;
//function
~IndexMaker(void)
{
if (_Diskbuf)
{
free(_Diskbuf);
}
}
IndexMaker(char *OutPutDir)//cache set:default=840, unsigned long CacheSet
{
_filenum=1;
__Error=NULL;
strcpy(_mOutput_Path,OutPutDir);
_mOutPath_EndPtr=_mOutput_Path+strlen(_mOutput_Path);
unsigned long *tbuf=_Diskbuf=(unsigned long *)calloc(0x8000*_IndexBuffer_Size,4);
if (!_Diskbuf) { __Error=_Error_Malloc;return ;}
memset(_WordsIndex,0,sizeof(_WordsIndex));
for(unsigned long i=0;i<0x8000;i++)
{
IndexMakerBuf *tWi=_WordsIndex+i;
*(unsigned short *)(tWi->nWords)=(unsigned short)(((i>>8)|0x80)+((i&0xFF)<<8));
tWi->nWords[2]=NULL;
tWi->_Last_FileNum=NULL;
tWi->DataPtr=tWi->_Data=tbuf;
tbuf+=_IndexBuffer_Size;
tWi->DataEpr=tbuf;
}
}
long _MakeIndex(DiskBuffer *m_Buffer)
{
char *m_FileText=NULL;
while (m_Buffer->_PopData(&m_FileText,&_filenum)) _fenci(m_FileText);
for(unsigned long i=0;i<0x8000;i++)
{
if (_WordsIndex[i].DataPtr!=_WordsIndex[i]._Data) _Write2File(_WordsIndex+i);
}
return __Error;
}
private:
void _Write2File(IndexMakerBuf *nDataCache)
{
strcpy(_mOutPath_EndPtr,nDataCache->nWords);//char name
FILE *outFile;
if (NULL==(outFile=fopen(_mOutput_Path,"ab+")))
{
printf("error write to file %s %lx ",_mOutput_Path,*_mOutPath_EndPtr);
}
else
{
fwrite(nDataCache->_Data,4,(nDataCache->DataPtr)-(nDataCache->_Data),outFile);
fclose(outFile);
}
nDataCache->DataPtr=nDataCache->_Data;
}
void _fenci(char *mTextCharPtr)
{
mTextCharPtr--;//起始位置減一,使第一個位置爲一而不是零
char * bptChar=mTextCharPtr;
while (*(bptChar))
{
if ((*bptChar)<0)
{
if (IsHz(*(unsigned char *)(bptChar)))
{
//_Write2Cache(_WordsIndex+HzCode(*(unsigned short *)(bptChar)),(unsigned long)(bptChar-mTextCharPtr));
IndexMakerBuf *tDataCache=_WordsIndex+HzCode(*(unsigned short *)(bptChar));
unsigned long pushData=(unsigned long)(bptChar-mTextCharPtr);
//void _Write2Cache(IndexMakerBuf *tDataCache,unsigned long pushData)
//{
if (tDataCache->DataPtr==tDataCache->DataEpr) _Write2File(tDataCache);
if (tDataCache->_Last_FileNum!=_filenum)
{
*(tDataCache->DataPtr)=_filenum;
tDataCache->_Last_FileNum=_filenum;
tDataCache->DataPtr++;
if (tDataCache->DataPtr==tDataCache->DataEpr) _Write2File(tDataCache);
}
*(tDataCache->DataPtr)=pushData;
tDataCache->DataPtr++;
//}
//end function _Write2Cache
}
bptChar+=2;
}
else bptChar++;
}
//*//_WriteZero();
//for(unsigned long i=1;i<0x7F00;i++) //Powered by barenx
IndexMakerBuf *nDataCacheEpr=_WordsIndex+0x7F00;
for(IndexMakerBuf *nDataCache=_WordsIndex+1;nDataCache<nDataCacheEpr;nDataCache++)
{
if (nDataCache->_Last_FileNum==_filenum)
{
if (nDataCache->DataPtr==nDataCache->DataEpr) _Write2File(nDataCache);
*(nDataCache->DataPtr)=NULL;
(nDataCache->DataPtr)++;
}
}
//end _WriteZero();*/
}
};