漢字字索引

#include "../DiskBuf.h"
#pragma once


#ifndef IsHz
    
#define IsHz(x) (((x)>=0x81 && (x)<=0xA0)||((x)>=0xAA && (x)<=0xFE))
    
#define HzCode(x) ((((x)&0x7F)<<8)+((x)>>8))
#endif

#ifdef _DEBUG
    
#define _IndexBuffer_Size 0x80
#else
    
#define _IndexBuffer_Size 0x1000
#endif

class IndexMaker
{
private:
    
struct IndexMakerBuf    //詞庫緩衝數組
    {
        
char nWords[3];        //[WordMaxLen];
        unsigned long _Last_FileNum;
        unsigned 
long * _Data;
        unsigned 
long * DataPtr;
        unsigned 
long * DataEpr;
    }
 _WordsIndex[0x8000];

    unsigned 
long *_Diskbuf;
    unsigned 
long _filenum;            //文件號
    char _mOutput_Path[512];        //索引輸出路徑
    char* _mOutPath_EndPtr;            //指向索引路徑末端
public:
    
long __Error;
    
//function
    ~IndexMaker(void)
    
{
        
if (_Diskbuf)
        
{
            free(_Diskbuf);
        }

    }


    IndexMaker(
char *OutPutDir)//cache set:default=840, unsigned long CacheSet
    {
        _filenum
=1;
        __Error
=NULL;

        strcpy(_mOutput_Path,OutPutDir);
        _mOutPath_EndPtr
=_mOutput_Path+strlen(_mOutput_Path);

        unsigned 
long *tbuf=_Diskbuf=(unsigned long *)calloc(0x8000*_IndexBuffer_Size,4);
        
if (!_Diskbuf) { __Error=_Error_Malloc;return ;}
        
        memset(_WordsIndex,
0,sizeof(_WordsIndex));
        
for(unsigned long i=0;i<0x8000;i++)
        
{
            IndexMakerBuf 
*tWi=_WordsIndex+i;
            
*(unsigned short *)(tWi->nWords)=(unsigned short)(((i>>8)|0x80)+((i&0xFF)<<8));
            tWi
->nWords[2]=NULL;
            tWi
->_Last_FileNum=NULL;
            tWi
->DataPtr=tWi->_Data=tbuf;
            tbuf
+=_IndexBuffer_Size;
            tWi
->DataEpr=tbuf;
        }

    }
        

    
long _MakeIndex(DiskBuffer *m_Buffer)
    
{
        
char *m_FileText=NULL;
        
while (m_Buffer->_PopData(&m_FileText,&_filenum)) _fenci(m_FileText);
        
for(unsigned long i=0;i<0x8000;i++)
        
{
            
if (_WordsIndex[i].DataPtr!=_WordsIndex[i]._Data) _Write2File(_WordsIndex+i);
        }

        
return __Error;
    }

private:
    
void _Write2File(IndexMakerBuf *nDataCache)
    
{
        strcpy(_mOutPath_EndPtr,nDataCache
->nWords);//char name
        FILE *outFile;
        
if (NULL==(outFile=fopen(_mOutput_Path,"ab+")))
        
{
            printf(
"error write to file %s %lx ",_mOutput_Path,*_mOutPath_EndPtr);
        }

        
else
        
{
            fwrite(nDataCache
->_Data,4,(nDataCache->DataPtr)-(nDataCache->_Data),outFile);
            fclose(outFile);
        }

        nDataCache
->DataPtr=nDataCache->_Data;
    }


    
void _fenci(char *mTextCharPtr)
    
{
        mTextCharPtr
--;//起始位置減一,使第一個位置爲一而不是零
        char * bptChar=mTextCharPtr;
        
while (*(bptChar)) 
        
{
            
if ((*bptChar)<0)
            
{
                
if (IsHz(*(unsigned char *)(bptChar)))
                
{
                    
//_Write2Cache(_WordsIndex+HzCode(*(unsigned short *)(bptChar)),(unsigned long)(bptChar-mTextCharPtr));
                    IndexMakerBuf *tDataCache=_WordsIndex+HzCode(*(unsigned short *)(bptChar));
                    unsigned 
long pushData=(unsigned long)(bptChar-mTextCharPtr);
                    
//void _Write2Cache(IndexMakerBuf *tDataCache,unsigned long pushData)
                    
//{
                        if (tDataCache->DataPtr==tDataCache->DataEpr) _Write2File(tDataCache);
                        
if (tDataCache->_Last_FileNum!=_filenum)
                        
{
                            
*(tDataCache->DataPtr)=_filenum;
                            tDataCache
->_Last_FileNum=_filenum;
                            tDataCache
->DataPtr++;
                            
if (tDataCache->DataPtr==tDataCache->DataEpr) _Write2File(tDataCache);
                        }

                        
*(tDataCache->DataPtr)=pushData;
                        tDataCache
->DataPtr++;
                    
//}
                    
//end function _Write2Cache
                }

                bptChar
+=2;
            }

            
else bptChar++;
        }

        
//*//_WriteZero();
        
//for(unsigned long i=1;i<0x7F00;i++) //Powered by barenx
        IndexMakerBuf *nDataCacheEpr=_WordsIndex+0x7F00;
        
for(IndexMakerBuf *nDataCache=_WordsIndex+1;nDataCache<nDataCacheEpr;nDataCache++)
        
{
            
if (nDataCache->_Last_FileNum==_filenum)
            
{
                
if (nDataCache->DataPtr==nDataCache->DataEpr) _Write2File(nDataCache);
                
*(nDataCache->DataPtr)=NULL;
                (nDataCache
->DataPtr)++;
            }

        }

        
//end _WriteZero();*/
    }

}
;

Powered by barenx
 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章