- # -*- coding:UTF-8 -*-
- import sys
- from time import sleep
- import win32com.client
- from win32com.client import DispatchEx
- stdin, stdout, stderr = sys.stdin, sys.stdout, sys.stderr
- reload(sys)
- sys.setdefaultencoding("utf-8")
- sys.stdin, sys.stdout, sys.stderr = stdin, stdout, stderr
- class COM_IE:
- def __init__(self,url=None):
- self.url = url
- self.Visible = 1
- self.ie = self.openIE(url)
- self.document = ""
- self.text = ""
- self.charset = None
- def ExistIE(self,url):
- ShellWindowsCLSID = '{9BA05972-F6A8-11CF-A442-00A0C90A8F39}'
- ies=DispatchEx(ShellWindowsCLSID)
- if len(ies)==0:
- return None
- for ie in ies:
- if ie.LocationURL==url:
- return ie
- return None
- def NewIE(self,url):
- ie = DispatchEx("InternetExplorer.Application")
- ie.Visible = self.Visible
- ie.Navigate(url)
- return ie
- def openIE(self,url):
- ie = self.ExistIE(url)
- if ie==None:
- ie = self.NewIE(url)
- return ie
- def WaitIE(self):
- # while self.ie.Busy:
- # leep(1)
- while 1:
- state = self.ie.ReadyState
- if state ==4:
- # print "load done..."
- self.charset = self.ie.Document.charset
- self.document = self.ie.Document.body.innerHTML
- self.text = self.ie.Document.body.innerText
- break
- sleep(1)
- def Visible(self):
- self.ie.Visible = self.Visible
- def GetBody(self):
- self.WaitIE()
- return self.ie.Document.body
- def GetNodes(self,parentNode,tag):
- """
- >>> coldiv=GetNodes(body,"div")
- """
- childNodes=[]
- for childNode in parentNode.getElementsByTagName(tag):
- childNodes.append(childNode)
- return childNodes
- def NodeByAttr(self,Nodes,nodeattr,nodeval):
- """
- >>> div_id_editor=NodeByAttr(coldiv,"id","editor_ifr")
- """
- for node in Nodes:
- if str(node.getAttribute(nodeattr))==nodeval:
- return node
- return None
- def SetNodeHtml(self,body,node_type,node_attr,node_attr_val,node_inner_html):
- tags = self.GetNodes(body,node_type)
- node = self.NodeByAttr(tags,node_attr,node_attr_val)
- node.innerHTML = node_inner_html
- def SetNodeVal(self,body,node_type,node_attr,node_attr_val,node_value):
- tags = self.GetNodes(body,node_type)
- node = self.NodeByAttr(tags,node_attr,node_attr_val)
- node.value = node_value
- def NodeClick(self,body,node_type,node_attr,node_attr_val):
- tags = self.GetNodes(body,node_type)
- node = self.NodeByAttr(tags,node_attr,node_attr_val)
- node.click()
- def GetNodeHtml(self,body,node_type,node_attr,node_attr_val):
- tags = self.GetNodes(body,node_type)
- node = self.NodeByAttr(tags,node_attr,node_attr_val)
- html = node.innerHTML
- return html
- def GetNodeVal(self,body,node_type,node_attr,node_attr_val):
- tags = self.GetNodes(body,node_type)
- node = self.NodeByAttr(tags,node_attr,node_attr_val)
- value = node.value
- return value
- #mutiple nodes
- def NodesByAttr(self,Nodes,nodeattr=None,nodeval=None):
- """
- >>> div_id_editor=NodeByAttr(coldiv,"id","editor_ifr")
- """
- value_list = []
- for node in Nodes:
- # print node.nodeType,node.nodeName #,node.getAttribute("id"),node.innerText
- value_dict = {}
- if not nodeattr:
- nodeattr_list = ["id","nodeName","nodeType","nodeValue","className",
- "innerHTML","innerText","href","name","title","type","value"]
- for attr in nodeattr_list:
- value_dict[attr] = node.getAttribute(attr)
- value_list.append(value_dict)
- else:
- if not nodeval:
- value_dict[nodeattr] = node.getAttribute(nodeattr)
- value_list.append(value_dict)
- else:
- if str(node.getAttribute(nodeattr))==nodeval:
- value_dict[nodeattr] = node.getAttribute(nodeattr)
- value_list.append(value_dict)
- return value_list
- #mutiple nodes
- def GetNodesVal(self,body,node_type,node_attr=None,node_val=None):
- # print '*'*50
- tags = self.GetNodes(body,node_type)
- value_list = self.NodesByAttr(tags,node_attr,node_val)
- return value_list
- def Quit(self):
- self.ie.Quit()
- if __name__=="__main__":
- url = "http://blog.csdn.net/agoago_2009/"
- IE = COM_IE(url)
- BODY = IE.GetBody()
- # a_list = IE.GetNodesVal(BODY,"a","href")
- a_list = IE.GetNodesVal(BODY,"a")
- for a in a_list:
- print a.get("innerText"),a.get("href")
- '''''
- IE.SetNodeVal(BODY,"input","id","inputSearch","COM")
- IE.NodeClick(BODY,"input","id","btnSubmit")
- IE.WaitIE()
- print IE.document.strip()[:100]
- print IE.charset
- print IE.text.strip()[:100]
- '''
- raw_input('quit')
- IE.Quit()
python : COM-IE 操作2
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.