coreseek使用教程
前提依賴
- ubuntu 16.04
- coreseek 4.1 下載
- libiconv 下載
- 必備的其他依賴包:
sudo apt install automake libtool libxml2-dev libexpat1-dev
sudo apt install mysql-client mysql-server libmysqlclient-dev
sudo apt install python-dev
安裝mmseg分詞工具
cd mmseg-3.2.14
./bootstrap
./configure --prefix=/usr/local/mmseg3
sudo make && sudo make install
安裝libiconv 或者安裝1.13.1
wget -O - http://blog.atime.me/static/resource/libiconv-glibc-2.16.patch.gz | gzip -d - | patch -p0
cd libiconv-1.14
./configure
make && sudo make install
sudo ldconfig
安裝csft
- configure.ac中去掉AM_INIT_AUTOMAKE裏的-Werror
- buildconf.sh中automake後加入 –add-missing
wget -O - http:
sh buildconf.sh
./configure --prefix=/usr/local/coreseek --without-unixodbc --with-mmseg --with-mmseg-includes=/usr/local/mmseg3/include/mmseg/ --with-mmseg-libs=/usr/local/mmseg3/lib/ --with-mysql --with-python LIBS=-liconv
make && sudo make install
安裝libsphinxclient
sh buildconf.sh
./configure
make && sudo make install
安裝sphinx擴展
- 下載源碼點擊Browse Source,在最下面找到支持php7的源碼
./configure
make && sudo make install
centos下安裝教程
yum install make gcc gcc++ gcc-c++ libtool autoconf automake imake mysql-devel libxml2-devel expat-devel python-dev
cd mmseg-3.2.14
./configure --prefix=/usr/local/mmseg3
make && sudo make install
- 安裝csft
configure.ac中去掉AM_INIT_AUTOMAKE裏的-Werror
buildconf.sh中automake後加入 –add-missing
修改src/sphinxexpr.cpp,ExprEval 改爲 this->ExprEval
sh buildconf.sh
./configure --prefix=/usr/local/coreseek --without-unixodbc --with-mmseg --with-mmseg-includes=/usr/local/mmseg3/include/mmseg/ --with-mmseg-libs=/usr/local/mmseg3/lib/ --with-mysql --with-python
make && sudo make install
sh buildconf.sh
./configure
make && sudo make install
- 安裝sphinx擴展
下載源碼點擊Browse Source,在最下面找到支持php7的源碼
./configure
make && sudo make install
python 數據源設置
python
{
path = /usr/local/coreseek/etc/pysource
path = /usr/local/coreseek/etc/pysource/csft_demo
}
source python
{
type = python
name = csft_demo.MainSource
}
index python
{
source = python
path = /usr/local/coreseek/var/data/python
docinfo = extern
mlock = 0
morphology = none
min_word_len = 1
html_strip = 0
charset_dictpath = /usr/local/mmseg3/etc
charset_type = zh_cn.utf-8
}
indexer
{
mem_limit = 128M
}
searchd
{
listen = 9312
read_timeout = 5
max_children = 30
max_matches = 1000
seamless_rotate = 0
preopen_indexes = 0
unlink_old = 1
pid_file = /usr/local/coreseek/var/log/searchd_python.pid
log = /usr/local/coreseek/var/log/searchd_python.log
query_log = /usr/local/coreseek/var/log/query_python.log
binlog_path =
}
class MainSource(object):
def __init__(self, conf):
self.conf = conf
self.idx = 0
self.data = [
{'id':1, 'subject':u"愚人節最佳蠱惑爆料 谷歌300億美元收購百度", 'context':u'1111', 'published':1270131607, 'author_id':1},
{'id':2, 'subject':u'Twitter主頁改版 推普通用戶消息增加趨勢話題', 'context':u'2222', 'published':1270135548, 'author_id':1},
{'id':3, 'subject':u'死都要上!Opera Mini 體驗版搶先試用', 'context':u'3333', 'published':1270094460, 'author_id':2},
]
def GetScheme(self):
return [
('id' , {'docid':True, } ),
('subject', { 'type':'text'} ),
('context', { 'type':'text'} ),
('published', {'type':'integer'} ),
('author_id', {'type':'integer'} ),
]
def GetFieldOrder(self):
return [('subject')]
def Connected(self):
pass
def NextDocument(self, err):
if self.idx < len(self.data):
item = self.data[self.idx]
self.docid = self.id = item['id']
self.subject = item['subject'].encode('utf-8')
self.context = item['context'].encode('utf-8')
self.published = item['published']
self.author_id = item['author_id']
self.idx += 1
return True
else:
return False
if __name__ == "__main__":
conf = {}
source = MainSource(conf)
source.Connected()
while source.NextDocument():
print "id=%d, subject=%s" % (source.docid, source.subject)
pass