'''
開發一個給大百度的接口,各種要求,寫一個xml文件,倒是不是很難
'''
import xml,datetime,codecs
import xml.dom.minidom as minidom
def covert_to_unicode(msg):
'''''將轉入的編碼轉換爲unicode,只接受utf-8和unicode編碼'''
__re_str = None
if isinstance(msg, unicode):
__re_str = msg
elif isinstance(msg, str):
try:
__re_str = msg.decode('utf-8')
except Exception, errinfo:
raise Exception, '%s,%s' % (errinfo, str(msg))
else:
raise Exception, '%s 必須爲str或unicode類型' % msg
return __re_str
class CreateCovXml():
def __init__(self, cov_xml_path):
self.__cov_path = cov_xml_path
self.__dom = None
self.__root = None
def _covert_code(self, msg):
return covert_to_unicode(msg)
def __create_new_node(self, node_name, node_text = None):
'''''爲xml文件添加一個結點,其中node_text可以爲空'''
if self.__dom == None:
raise Exception, '創建結點時,document對象還不存在'
# 創建的是結點只有結點名
if None == node_text:
return self.__dom.createElement(self._covert_code(node_name))
else:
newNode = self.__dom.createElement(self._covert_code(node_name))
newText = self.__dom.createTextNode(self._covert_code(node_text))
newNode.appendChild(newText)
return newNode
def begin_cov(self):
'''''begin_cov(self) --> None 創建coverage.xml文檔和根結點'''
try:
impl = minidom.getDOMImplementation()
self.__dom = impl.createDocument(None, u'urlset', None)
self.__root = self.__dom.documentElement
except:
traceback.print_exc()
raise Exception, '創建coverage xml根結點失敗'
def add_cov(self, url, title, priority, Horizontalpicture1,dayRetrievalstatus,channel,Largeblock,smallblock,blocknumber):
if self.__root == None:
raise Exception, '創建結點時,root結點不存在對象不存不存在'
urlstart_node = self.__create_new_node("url")
loc_node = self.__create_new_node("loc")
lastmod_node = self.__create_new_node("lastmod")
changefreq_node = self.__create_new_node("changefreq")
priority_node = self.__create_new_node("priority",priority)
data_node = self.__create_new_node("data")
display_node = self.__create_new_node("display")
channel_node = self.__create_new_node("channel",channel)
Largeblock_node = self.__create_new_node("Largeblock",Largeblock)
smallblock_node = self.__create_new_node("smallblock",smallblock)
blocknumber_node = self.__create_new_node("blocknumber",blocknumber)
title_node = self.__create_new_node("title",title)
subtitle_node = self.__create_new_node("subtitle")
url_node = self.__create_new_node("url",url)
Horizontalpicture1_node = self.__create_new_node("Horizontalpicture1",Horizontalpicture1)
Horizontalpicture2_node = self.__create_new_node("Horizontalpicture2")
Horizontalpicture3_node = self.__create_new_node("Horizontalpicture3")
singer_node = self.__create_new_node("singer")
area_node = self.__create_new_node("area")
timelength_node = self.__create_new_node("timelength")
episode_node = self.__create_new_node("episode")
updatetime_node = self.__create_new_node("updatetime")
briefintroduction_node = self.__create_new_node("briefintroduction")
daySearchvolume_node = self.__create_new_node("daySearchvolume")
dayRetrievalstatus_node = self.__create_new_node("dayRetrievalstatus",dayRetrievalstatus)
sitename_node = self.__create_new_node("sitename")
self.__root.appendChild(urlstart_node)
urlstart_node.appendChild(loc_node)
urlstart_node.appendChild(lastmod_node)
urlstart_node.appendChild(changefreq_node)
urlstart_node.appendChild(priority_node)
urlstart_node.appendChild(data_node)
data_node.appendChild(display_node)
display_node.appendChild(channel_node)
display_node.appendChild(Largeblock_node)
display_node.appendChild(smallblock_node)
display_node.appendChild(blocknumber_node)
display_node.appendChild(title_node)
display_node.appendChild(subtitle_node)
display_node.appendChild(url_node)
display_node.appendChild(Horizontalpicture1_node)
display_node.appendChild(Horizontalpicture2_node)
display_node.appendChild(Horizontalpicture3_node)
display_node.appendChild(singer_node)
display_node.appendChild(area_node)
display_node.appendChild(timelength_node)
display_node.appendChild(episode_node)
display_node.appendChild(updatetime_node)
display_node.appendChild(briefintroduction_node)
display_node.appendChild(daySearchvolume_node)
display_node.appendChild(dayRetrievalstatus_node)
display_node.appendChild(sitename_node)
def end_cov(self):
try:
f = open(self.__cov_path, 'wb')
writer = codecs.lookup('utf-8')[3](f)
self.__dom.writexml(writer, encoding='utf-8')
writer.close()
f.close()
return True
except Exception, err:
traceback.print_exc()
raise Exception, '寫coverage.xml文件出錯'
#測試用例
test = CreateCovXml(result_filename+'.xml')
test.begin_cov()
priority_count = 0
for linkInfo in result_linkinfos:
priority_count = priority_count + 1
test.add_cov(
url = linkInfo.getTarget(),
title = linkInfo.getTitle(),
priority = str(priority_count),
Horizontalpicture1 = linkInfo.getDetailPicUrl(),
dayRetrievalstatus = "1",
channel = "資訊",
Largeblock = "帶圖區",
smallblock = "焦點小圖",
blocknumber = "3-16"
)
test.end_cov()
os.chdir(basePath)