應一同學要求,寫了個刷新智聯簡歷的腳本,省得他一邊奔波於面試還要一邊刷簡歷。
主要工具還是神器casperjs!
1.capserjs程序:
1>訪問自制的一個iframe組合頁(驗證碼識別頁+智聯登錄頁)
2>把智聯頁的驗證碼截圖並寫入驗證碼識別頁並提交
3>把驗證碼識別頁返回的字符填入智聯頁的驗證碼框、用戶名密碼也填入相應位置,點擊登錄按鈕
4>等待頁面跳轉並點擊刷新按鈕
5>循環點擊刷新按鈕
[dongsong@localhost js_study]$ cat zhilian.js
var logPrint = true;
var icDir = '/tmp/raw_codes/';
function GUID ()
{
var S4 = function ()
{
return Math.floor(
Math.random() * 0x10000 /* 65536 */
).toString(16);
};
return (
S4() + S4() + "-" +
S4() + "-" +
S4() + "-" +
S4() + "-" +
S4() + S4() + S4()
);
}
function debug(st) {
require('utils').dump(st);
}
function check_arg(casper, fileName)
{
if (!casper.cli.has("username") || !casper.cli.has("passwd") || !casper.cli.has("starturl")) {
console.log("\nUsage:\n\tcasperjs " + fileName + " --starturl=http://www.centos6.com:9000/mainapp/customized_iframe --username=xx --passwd=xx\n");
casper.exit();
}
}
function refresh()
{
this.wait(10000,
function() {
this.click('a[title="簡歷刷新"]');
this.log('refreshed my resume');
}
);
this.run(refresh);
}
function main_navigation()
{
//創建casper對象
var casper = require("casper").create({
verbose: logPrint,
logLevel: "debug",
onError: function(self,m){
this.capture("error.png");
console.log("FATAL:" + m);
self.exit();
},
onTimeout:function(self) { this.log('entire casper navigation timeout', 'error'); },
timeout:86400 //整個腳本執行時間不能超過一天
});
//檢查參數
check_arg(casper, "zhilian.js");
var starturl = casper.cli.get("starturl");
var userName = casper.cli.get("username");
var passWd = casper.cli.get("passwd");
var icFile = null;
//請求智聯頁面和驗證碼識別頁面的iframe組合
casper.start(starturl+'?aimUrl='+encodeURIComponent("http://my.zhaopin.com/myzhaopin/resume_index.asp"),
function then() {this.page.switchToChildFrame(0);}
);
//等待驗證碼
casper.waitFor(function check(){
if (this.visible('img[id="vimg"]')) {
this.log('identifying code appears');
icFile = icDir + GUID() + '.png';
//bounds = this.getElementBounds('img[node-type="pincode"]');
//this.capture('whole_for_render_completed.png');
//this.capture('code_for_render_completed.png', {top:bounds['top'],left:bounds['left'],width:bounds['width'],height:bounds['height']});
//this.capture(icFile, {top:bounds['top']+7,left:bounds['left']+7,width:bounds['width'],height:bounds['height']}); //加上了校準
this.captureSelector(icFile, 'img[id="vimg"]');
this.page.switchToParentFrame();
this.page.switchToChildFrame(1);
this.fill('form#icform', {'picfile':icFile}, true);
return true;
}
else {
this.log('identifying code does not appear')
return false;
}
},
function then() {},
function onTimeout() { this.log('timeout: oauth form still exists and identifying code does not appear', 'error'); },
timeout = 30000
);
//獲取驗證碼字符
casper.waitFor(function check(){
if (this.exists('h')) {
recognizedCode = this.fetchText('h');
this.log('result code is ' + recognizedCode);
this.page.switchToParentFrame();
this.page.switchToChildFrame(0);
this.fill('form[name="frmLogin"]', {'loginname':userName}, false);
this.fill('form[name="frmLogin"]', {'password':passWd}, false);
this.fill('form[name="frmLogin"]', {'Validate':recognizedCode}, false);
this.click('input[id="loginbutton"]');
return true;
} else {
this.log('waiting for recognized string', 'debug');
return false;
}
},
function then() {this.capture('filled.png');},
function onTimeout() { this.log('timeout: waiting for recognized string', 'error'); },
timeout = 1000000
);
//等到頁面跳轉並點擊刷新按鈕
casper.waitForSelector('a[title="簡歷刷新"]', function() {
this.capture('to_click.png');
this.click('a[title="簡歷刷新"]');
},
function onTimeout() { this.log('wait form failed', 'error'); },
timeout = 10000
);
//loop:隔一段時間就刷新一次簡歷(用waitFor和wait實現循環,貌似不靠譜兒!)
/*
casper.waitFor(function check(){
this.wait(10000,
function() {
this.click('a[title="簡歷刷新"]');
this.log('refreshed my resume');
}
);
return false;
},
function then() {},
function onTimeout() {this.log('timeout: refresh loop competed.', 'error');},
timeout = 120000
);*/
//launch the navigation
casper.run(refresh);
}
main_navigation();
結果:
[dongsong@localhost js_study]$ casperjs zhilian.js --username="[email protected]" --passwd="xxxx" --starturl="http://localhost:9000/mainapp/customized_iframe"
[info] [phantom] Starting...
[info] [phantom] Execution timeout set to 86400000ms
[info] [phantom] Running suite: 5 steps
[debug] [phantom] opening url: http://localhost:9000/mainapp/customized_iframe?aimUrl=http%3A%2F%2Fmy.zhaopin.com%2Fmyzhaopin%2Fresume_index.asp, HTTP GET
[debug] [phantom] Navigation requested: url=http://localhost:9000/mainapp/customized_iframe?aimUrl=http://my.zhaopin.com/myzhaopin/resume_index.asp, type=Other, lock=true, isMainFrame=true
[debug] [phantom] url changed to "http://localhost:9000/mainapp/customized_iframe?aimUrl=http://my.zhaopin.com/myzhaopin/resume_index.asp"
[debug] [phantom] Navigation requested: url=http://my.zhaopin.com/myzhaopin/resume_index.asp, type=Other, lock=true, isMainFrame=false
[debug] [phantom] Navigation requested: url=http://localhost:9000/mainapp/fileform, type=Other, lock=true, isMainFrame=false
[debug] [phantom] Navigation requested: url=http://my.zhaopin.com/loginmgr/login.asp?BkUrl=/myzhaopin/resume_index.asp?, type=Other, lock=true, isMainFrame=false
[debug] [phantom] Successfully injected Casper client-side utilities
[info] [phantom] Step 2/5 http://localhost:9000/mainapp/customized_iframe?aimUrl=http://my.zhaopin.com/myzhaopin/resume_index.asp (HTTP 200)
[info] [phantom] Step 2/5: done in 9751ms.
[debug] [phantom] Successfully injected Casper client-side utilities
[info] [phantom] Step 3/5 http://my.zhaopin.com/loginmgr/login.asp?BkUrl=/myzhaopin/resume_index.asp? (HTTP 200)
[info] [phantom] Step 3/5: done in 9853ms.
[debug] [phantom] identifying code appears
[debug] [phantom] Capturing page to /home/dongsong/js_study/whole_for_render_completed.png
[info] [phantom] Capture saved to /home/dongsong/js_study/whole_for_render_completed.png
[debug] [phantom] Capturing page to /tmp/raw_codes/81358a65-119e-c715-53e5-617fdbf31c9.png with clipRect {"height":27,"left":941,"top":223,"width":72}
[info] [phantom] Capture saved to /tmp/raw_codes/81358a65-119e-c715-53e5-617fdbf31c9.png
[debug] [phantom] Successfully injected Casper client-side utilities
[info] [remote] attempting to fetch form element from selector: 'form#icform'
[debug] [remote] Set "picfile" field value to /tmp/raw_codes/81358a65-119e-c715-53e5-617fdbf31c9.png
[info] [remote] submitting form to /mainapp/recognize, HTTP POST
[info] [phantom] waitFor() finished in 352ms.
[debug] [phantom] Navigation requested: url=http://localhost:9000/mainapp/recognize, type=FormSubmitted, lock=true, isMainFrame=false
[debug] [phantom] Successfully injected Casper client-side utilities
[info] [phantom] Step 4/6 http://localhost:9000/mainapp/recognize (HTTP 200)
[info] [phantom] Step 4/6: done in 62320ms.
[info] [phantom] Step 5/6 http://localhost:9000/mainapp/recognize (HTTP 200)
[info] [phantom] Step 5/6: done in 62418ms.
[debug] [phantom] result code is dl5k
[info] [remote] attempting to fetch form element from selector: 'form[name="frmLogin"]'
[debug] [remote] Set "loginname" field value to [email protected]
[info] [remote] attempting to fetch form element from selector: 'form[name="frmLogin"]'
[debug] [remote] Set "password" field value to ********
[info] [remote] attempting to fetch form element from selector: 'form[name="frmLogin"]'
[debug] [remote] Set "Validate" field value to dl5k
[debug] [phantom] Mouse event 'click' on selector: input[id="loginbutton"]
[info] [phantom] waitFor() finished in 201ms.
[info] [phantom] Step 6/7 http://my.zhaopin.com/loginmgr/login.asp?BkUrl=/myzhaopin/resume_index.asp? (HTTP 200)
[debug] [phantom] Capturing page to /home/dongsong/js_study/filled.png
[info] [phantom] Capture saved to /home/dongsong/js_study/filled.png
[info] [phantom] Step 6/7: done in 62941ms.
[info] [phantom] Step 7/7 http://my.zhaopin.com/loginmgr/login.asp?BkUrl=/myzhaopin/resume_index.asp? (HTTP 200)
[info] [phantom] Step 7/7: done in 62946ms.
[debug] [phantom] Navigation requested: url=http://my.zhaopin.com/loginmgr/loginproc.asp, type=FormSubmitted, lock=true, isMainFrame=false
[debug] [phantom] Successfully injected Casper client-side utilities
[debug] [phantom] Navigation requested: url=http://my.zhaopin.com/myzhaopin/resume_index.asp?, type=Other, lock=true, isMainFrame=false
[debug] [phantom] Successfully injected Casper client-side utilities
[debug] [phantom] Navigation requested: url=http://my.zhaopin.com/MYZHAOPIN/new_register_tracking.asp, type=Other, lock=true, isMainFrame=false
[info] [phantom] waitFor() finished in 6367ms.
[info] [phantom] Step 8/8 http://my.zhaopin.com/myzhaopin/resume_index.asp? (HTTP 200)
[debug] [phantom] Capturing page to /home/dongsong/js_study/to_click.png
[info] [phantom] Capture saved to /home/dongsong/js_study/to_click.png
[debug] [phantom] Mouse event 'click' on selector: a[title="簡歷刷新"]
[debug] [phantom] Navigation requested: url=http://my.zhaopin.com/myzhaopin/resume_index.asp?#, type=LinkClicked, lock=true, isMainFrame=false
[info] [phantom] Step 8/8: done in 72774ms.
[info] [phantom] Done 8 steps in 72774ms
[info] [phantom] Running suite: 9 steps
[info] [phantom] Step 9/9 http://my.zhaopin.com/myzhaopin/resume_index.asp?# (HTTP 200)
[info] [phantom] Step 9/9: done in 72876ms.
[info] [phantom] wait() finished waiting for 10000ms.
[debug] [phantom] Mouse event 'click' on selector: a[title="簡歷刷新"]
[debug] [phantom] refreshed my resume
[info] [phantom] Done 9 steps in 82898ms
[info] [phantom] Running suite: 10 steps
[info] [phantom] Step 10/10 http://my.zhaopin.com/myzhaopin/resume_index.asp?# (HTTP 200)
[info] [phantom] Step 10/10: done in 83000ms.
[info] [phantom] wait() finished waiting for 10000ms.
[debug] [phantom] Mouse event 'click' on selector: a[title="簡歷刷新"]
[debug] [phantom] refreshed my resume
[info] [phantom] Done 10 steps in 93041ms
[info] [phantom] Running suite: 11 steps
[info] [phantom] Step 11/11 http://my.zhaopin.com/myzhaopin/resume_index.asp?# (HTTP 200)
[info] [phantom] Step 11/11: done in 93144ms.
^C
2.後臺驗證碼識別程序1
提供前述的iframe組合頁面
提供驗證碼提交的頁面:收到post來的圖片文件後通過郵件發送到目標郵箱並掃描數據庫該圖片對應的破解字符,掃描到字符就返回給瀏覽器
3.後臺驗證碼識別程序2
登錄發送驗證碼的郵箱,定時(30s)掃描新郵件,並把驗證碼的破解字符更新到數據庫
4.驗證碼的破解:需要人爲讀取郵件中的驗證碼圖片,並用普通字符回覆郵件
2和3的程序涉及文件比較多就不貼出來了,按照上述思想自己寫是沒多大難度的。
貼一個刷新的效果圖: