經過以上的鋪墊,我們就差最後一步了-破解!首選我們來分析一下要做的事情:
1、加載包含驗證碼的頁面,當然是用我們前面講的phantomaJS來加載啦,因爲極驗驗證碼是依賴於js渲染的,我們必須等頁面完全渲染完成後再執行拖動
2、收集一些頁面的參數發送到java後臺服務計算滑塊的目標位移並接受結果
3、通過js模擬鼠標事件來實現滑塊的移動
4、輸出驗證結果
好,讓我們一步步來講解如果實現上面的目標。
我們首先新建一個js文件,就叫做geetest_refresh.js好了,我們首先寫一些樣板代碼,比如創建對象,日誌處理和接收傳進來的參數:
var utils = require('utils'); var casper = require('casper').create({ //clientScripts: ["jquery-2.1.3.min.js"], pageSettings: { javascriptEnabled: true, XSSAuditingEnabled: true, loadImages: true, // The WebPage instance used by Casper will loadPlugins: false, // use these settings userAgent: "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.111 Safari/537.36" }, waitTimeout: 10000, exitOnError: false, httpStatusHandlers: { 404: function () { console.log(404); } }, onAlert: function (msg) { console.log(msg); }, onError: function (self, m) { console.log("FATAL:" + m); self.exit(); }, onDie: function () { console.log('dieing'); }, onLoadError: function (casper, url) { console.log(url + ' can\'t be loaded'); }, onPageInitialized: function () { }, onResourceReceived: function () { //console.log(arguments[1]['url'] + ' Received'); }, onResourceRequested: function () { //console.log(arguments[1]['url'] + ' requested'); }, onStepComplete: function () { //console.log('onStepComplete'); }, onStepTimeout: function () { console.log('timeout'); }, logLevel: "debug", // Only "info" level messages will be logged verbose: false // log messages will be printed out to the console }); casper.on('remote.message', function (msg) { this.log(msg, 'info'); }); var pageUrl = casper.cli.get(0);//傳進來的頁面url var deltaResolveServer = casper.cli.get(1);//傳進來的滑塊位置求解服務地址 //定義一些內部變量 var id =( new Date()).getTime(); var pageParam = null;
然後是實現第一個目標:加載並渲染頁面(這裏還對頁面做了一個判斷,是否包含極驗驗證碼):
casper.start(pageUrl).then(function () { this.wait(5000, function () { //this.echo("等待5秒以便頁面充分渲染"); }); }); casper.then(function () { if (!this.exists(".gt_slider_knob")) { this.echo("頁面中不存在極驗驗證碼模塊"); //this.echo(this.getPageContent()); this.exit(); } });
第二個目標:收集參數請求滑塊位置:
casper.waitFor(function check() { return this.evaluate(function () { return (document.querySelectorAll('.gt_cut_bg_slice').length == 52) && (document.querySelectorAll('.gt_cut_fullbg_slice').length == 52);//確保頁面已經渲染完成,出現了背景圖 }); }, function then() { this.echo("頁面渲染成功!"); var styleReg = new RegExp("background-image: url\\((.*?)\\); background-position: (.*?);"); var fullbgSrcArray = []; var fullbgCoordinateArray = []; var fullbgSliceArray = this.getElementsAttribute('.gt_cut_fullbg_slice', 'style'); for (var i = 0; i < fullbgSliceArray.length; i++) { var result = styleReg.exec(fullbgSliceArray[i]); if (result != null) { fullbgSrcArray.push(result[1]); fullbgCoordinateArray.push(result[2]); } else this.echo(fullbgSliceArray[i]); } var bgSrcArray = []; var bgCoordinateArray = []; var bgSliceArray = this.getElementsAttribute('.gt_cut_bg_slice', 'style'); for (var i = 0; i < bgSliceArray.length; i++) { var result = styleReg.exec(bgSliceArray[i]); if (result != null) { bgSrcArray.push(result[1]); bgCoordinateArray.push(result[2]); } } var data = {}; data.fullbgSrcArray = fullbgSrcArray; data.fullbgPositionArray = fullbgCoordinateArray; data.bgSrcArray = bgSrcArray; data.bgPositionArray = bgCoordinateArray; data.itemWidth = 10;//每個小塊的寬度(像素) data.itemHeight = 58;//每個小塊的高度(像素) data.lineItemCount = 26;//拼圖中每行包含的小圖片個數 pageParam = data; }, function () { this.echo("等待渲染超時!"); this.exist(); }, 10000); var deltaX = 0; casper.then(function () { if (pageParam == null) { this.echo("收集圖片參數失敗!"); //this.echo(this.getPageContent()); this.exit(); } this.echo("開始請求滑塊位置"); var result = casper.evaluate(function (url, param) { return JSON.parse(__utils__.sendAJAX(url, 'POST', param, false));//ajax請求求解滑塊位置 }, deltaResolveServer, {"params": JSON.stringify(pageParam)}); if (result != null && result.status == 1) { deltaX = result.data.deltaX; this.echo("滑塊位置求解成功:" + JSON.stringify(result.data)); } else { this.echo("請求滑塊位置失敗:" + JSON.stringify(result)); this.exit(); } });
其中滑塊位置求解後臺服務也就一個Spring的一個controller而已:
package com.yay.geetestIdentification.controller; import com.alibaba.fastjson.JSON; import com.yay.geetestIdentification.model.RestFulResult; import com.yay.geetestIdentification.utils.ImageUtils; import org.apache.commons.collections.MapUtils; import org.apache.commons.collections.map.HashedMap; import org.springframework.util.Assert; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RequestMethod; import org.springframework.web.bind.annotation.RestController; import javax.servlet.http.HttpServletResponse; import java.io.File; import java.util.ArrayList; import java.util.List; import java.util.Map; @RestController public class CaptchaController { @RequestMapping(value = "resolveGeetestSlicePosition", method = RequestMethod.POST) public RestFulResult resolveGeetestSlicePosition(HttpServletResponse response, String params) { response.addHeader("Access-Control-Allow-Origin", "*"); Map<String, Object> paramMap = (Map<String, Object>) JSON.parseObject(params, Map.class); if (paramMap == null) return RestFulResult.failure("參數不能爲空!"); List<String> fullbgSrcList = (List<String>) paramMap.get("fullbgSrcArray"); List<String> fullbgPositionList = (List<String>) paramMap.get("fullbgPositionArray"); List<String> bgSrcList = (List<String>) paramMap.get("bgSrcArray"); List<String> bgPositionList = (List<String>) paramMap.get("bgPositionArray"); int itemWidth = MapUtils.getIntValue(paramMap, "itemWidth"); int itemHeight = MapUtils.getIntValue(paramMap, "itemHeight"); int lineItemCount = MapUtils.getIntValue(paramMap, "lineItemCount"); try { Assert.notEmpty(fullbgSrcList); Assert.notEmpty(fullbgPositionList); Assert.notEmpty(bgSrcList); Assert.notEmpty(bgPositionList); Assert.isTrue(fullbgSrcList.size() == 52); Assert.isTrue(bgSrcList.size() == 52); Assert.isTrue(itemWidth > 0); Assert.isTrue(lineItemCount > 0); Assert.isTrue(itemHeight > 0); String tmpFolder = System.getProperty("user.dir") + "/tmp/"; File file = new File(tmpFolder); if (!file.exists() && !file.isDirectory()) file.mkdir(); String identification = String.valueOf(System.currentTimeMillis()); String imageSubfix = "jpg"; List<String[]> fullbgPointList = new ArrayList<>(); for (String positionStr : fullbgPositionList) { fullbgPointList.add(positionStr.replace("px", "").split(" ")); } List<String[]> bgPointList = new ArrayList<>(); for (String positionStr : bgPositionList) { bgPointList.add(positionStr.replace("px", "").split(" ")); } String fullbgImagePath = tmpFolder + identification + "_fullbg." + imageSubfix; String bgImagePath = tmpFolder + identification + "_bg." + imageSubfix; if (ImageUtils.combineImages(fullbgSrcList, fullbgPointList, lineItemCount, itemWidth, itemHeight, fullbgImagePath, imageSubfix) && ImageUtils.combineImages(bgSrcList, bgPointList, lineItemCount, itemWidth, itemHeight, bgImagePath, imageSubfix)) { int deltaX = ImageUtils.findXDiffRectangeOfTwoImage(fullbgImagePath, bgImagePath); //刪除緩存的圖片 deleteImage(fullbgImagePath); deleteImage(bgImagePath); Map<String, Object> resultMap = new HashedMap(); resultMap.put("deltaX", deltaX); resultMap.put("deltaY", 0); return RestFulResult.success(resultMap); } else { return RestFulResult.failure("合成圖片失敗!"); } } catch (Exception ex) { return RestFulResult.failure(ex.getMessage()); } } private void deleteImage(String fullbgImagePath) { File file = new File(fullbgImagePath); // 路徑爲文件且不爲空則進行刪除 if (file.isFile() && file.exists()) { file.delete(); } } }
第三個目標,實現滑塊移動到目標位置:
var currentTrailIndex = 0; casper.then(function () { if (deltaX <= 0) { this.echo("滑塊目標位移爲0:處理失敗"); this.exit(); } this.echo("開始移動滑塊,目標位移爲 " + deltaX); currentTrailIndex = this.evaluate(function (selector, deltaX) { var createEvent = function (eventName, ofsx, ofsy) { var evt = document.createEvent('MouseEvents'); evt.initMouseEvent(eventName, true, false, null, 0, 0, 0, ofsx, ofsy, false, false, false, false, 0, null); return evt; }; var trailArray = [ // 算法生成的鼠標軌跡數據,爲了不至於給極驗團隊帶來太多的麻煩,我這裏就省略了,請大家諒解 ]; var trailIndex = Math.round(Math.random() * (trailArray.length - 1)); var deltaArray = trailArray[trailIndex]; console.log('當前使用軌跡路徑:' + (trailIndex + 1)); var delta = deltaX - 7;//要移動的距離,減掉7是爲了防止過擬合導致驗證失敗 delta = delta > 200 ? 200 : delta; //查找要移動的對象 var obj = document.querySelector(selector); var startX = obj.getBoundingClientRect().left + 20; var startY = obj.getBoundingClientRect().top + 18; var nowX = startX; var nowY = startY; console.log("startX:" + startX); console.log("startY:" + startY); var moveToTarget = function (loopRec) { setTimeout(function () { nowX = nowX + deltaArray[loopRec][0]; nowY = nowY + deltaArray[loopRec][1]; //console.log(loopRec + "次移動滑塊"); obj.dispatchEvent(createEvent('mousemove', nowX, nowY)); console.log("當前滑塊位置:" + obj.getBoundingClientRect().left); if (nowX > (startX + delta - 2)) { obj.dispatchEvent(createEvent('mousemove', startX + delta, nowY)); obj.dispatchEvent(createEvent('mouseup', startX + delta, nowY)); console.log("最終滑塊位置:" + obj.getBoundingClientRect().left); } else { moveToTarget(loopRec + 1); } }, deltaArray[loopRec][2]); }; obj.dispatchEvent(createEvent("mousedown", startX, startY)); moveToTarget(2); return trailIndex; }, ".gt_slider_knob", deltaX); }).then(function () { casper.waitForSelectorTextChange('.gt_info_type', function () { var status = this.fetchText('.gt_info_type'); this.echo("驗證結果:" + status); this.capture(status.replace(":","_")+ id + "_" + currentTrailIndex + '.png');//對當前頁面進行截圖以便複查 if (status.indexOf("通過") > -1) { if (this.exists('#verify')) { this.click("#verify"); this.echo("點擊成功"); } } }, function () { this.echo("等待滑塊移動超時!"); }, 10000); });
代碼中的trailArray 保存着到目標位移的移動軌跡數據,也就是說先到哪個位置,再到哪個位置……。大家都知道極驗驗證碼最難的就是對這個軌跡做了行爲檢測來區分人和機器人,因此這個數據相當重要,爲了不給極驗團隊帶來太多麻煩,我這裏就省略了,畢竟人家也要吃飯啦。
最好一個目標,執行以上的操作並返回結果:
casper.run();
沒錯,就一行代碼,上面腳本中的所有輸出文字都可以在java代碼中接收,然後判斷是否驗證成功,而且可以把驗證結果的網頁截圖保存下來:
private static boolean startIdentification(String pageUrl,String domain,String cookies,String jsFileName, String deltaResolveAddress) { String result = CasperjsProgramManager.launch(jsFileName, pageUrl,deltaResolveAddress,domain,cookies, " web-security=no", "ignore-ssl-errors=true"); logger.info("驗證碼識別結果:\r\n" + result); return result != null && (result.contains("驗證通過") || result.contains("不存在極驗驗證碼")); }
運行結果:
[info] [phantom] Step then 7/10 http://user.geetest.com/login?url=http://account.geetest.com/report (HTTP 200) 頁面渲染成功! [info] [phantom] Step then 7/10: done in 69935ms. [info] [phantom] Step anonymous 8/10 http://user.geetest.com/login?url=http://account.geetest.com/report (HTTP 200) 開始請求滑塊位�? [debug] [remote] sendAJAX(): Using HTTP method: 'POST' 滑塊位置求解成功:{"deltaX":119,"deltaY":0} [info] [phantom] Step anonymous 8/10: done in 80502ms. [info] [phantom] Step anonymous 9/10 http://user.geetest.com/login?url=http://account.geetest.com/report (HTTP 200) 開始移動滑�?目標位移�? 119 [info] [phantom] 當前使用軌跡路徑:2 [info] [phantom] startX:51.03125 [info] [phantom] startY:292 [info] [phantom] Step anonymous 9/10: done in 80514ms. [info] [phantom] Step anonymous 10/10 http://user.geetest.com/login?url=http://account.geetest.com/report (HTTP 200) [info] [phantom] Step anonymous 10/10: done in 80528ms. [info] [phantom] Step _step 11/11 http://user.geetest.com/login?url=http://account.geetest.com/report (HTTP 200) [info] [phantom] Step _step 11/11: done in 80547ms. [info] [phantom] 當前滑塊位置:33.03125 [info] [phantom] 當前滑塊位置:33.03125 [info] [phantom] 當前滑塊位置:34.03125 [info] [phantom] 當前滑塊位置:35.03125 [info] [phantom] 當前滑塊位置:36.03125 [info] [phantom] 當前滑塊位置:37.03125 [info] [phantom] 當前滑塊位置:38.03125 [info] [phantom] 當前滑塊位置:39.03125 [info] [phantom] 當前滑塊位置:40.03125 [info] [phantom] 當前滑塊位置:41.03125 [info] [phantom] 當前滑塊位置:44.03125 [info] [phantom] 當前滑塊位置:46.03125 [info] [phantom] 當前滑塊位置:47.03125 [info] [phantom] 當前滑塊位置:48.03125 [info] [phantom] 當前滑塊位置:49.03125 [info] [phantom] 當前滑塊位置:50.03125 [info] [phantom] 當前滑塊位置:51.03125 [info] [phantom] 當前滑塊位置:53.03125 [info] [phantom] 當前滑塊位置:55.03125 [info] [phantom] 當前滑塊位置:56.03125 [info] [phantom] 當前滑塊位置:58.03125 [info] [phantom] 當前滑塊位置:60.03125 [info] [phantom] 當前滑塊位置:61.03125 [info] [phantom] 當前滑塊位置:64.03125 [info] [phantom] 當前滑塊位置:66.03125 [info] [phantom] 當前滑塊位置:67.03125 [info] [phantom] 當前滑塊位置:68.03125 [info] [phantom] 當前滑塊位置:69.03125 [info] [phantom] 當前滑塊位置:71.03125 [info] [phantom] 當前滑塊位置:73.03125 [info] [phantom] 當前滑塊位置:75.03125 [info] [phantom] 當前滑塊位置:76.03125 [info] [phantom] 當前滑塊位置:77.03125 [info] [phantom] 當前滑塊位置:78.03125 [info] [phantom] 當前滑塊位置:79.03125 [info] [phantom] 當前滑塊位置:81.03125 [info] [phantom] 當前滑塊位置:83.03125 [info] [phantom] 當前滑塊位置:85.03125 [info] [phantom] 當前滑塊位置:86.03125 [info] [phantom] 當前滑塊位置:87.03125 [info] [phantom] 當前滑塊位置:88.03125 [info] [phantom] 當前滑塊位置:89.03125 [info] [phantom] 當前滑塊位置:90.03125 [info] [phantom] 當前滑塊位置:91.03125 [info] [phantom] 當前滑塊位置:92.03125 [info] [phantom] 當前滑塊位置:94.03125 [info] [phantom] 當前滑塊位置:95.03125 [info] [phantom] 當前滑塊位置:96.03125 [info] [phantom] 當前滑塊位置:97.03125 [info] [phantom] 當前滑塊位置:98.03125 [info] [phantom] 當前滑塊位置:100.03125 [info] [phantom] 當前滑塊位置:103.03125 [info] [phantom] 當前滑塊位置:104.03125 [info] [phantom] 當前滑塊位置:105.03125 [info] [phantom] 當前滑塊位置:106.03125 [info] [phantom] 當前滑塊位置:108.03125 [info] [phantom] 當前滑塊位置:110.03125 [info] [phantom] 當前滑塊位置:114.03125 [info] [phantom] 當前滑塊位置:116.03125 [info] [phantom] 當前滑塊位置:118.03125 [info] [phantom] 當前滑塊位置:119.03125 [info] [phantom] 當前滑塊位置:121.03125 [info] [phantom] 當前滑塊位置:122.03125 [info] [phantom] 當前滑塊位置:124.03125 [info] [phantom] 當前滑塊位置:125.03125 [info] [phantom] 當前滑塊位置:127.03125 [info] [phantom] 當前滑塊位置:129.03125 [info] [phantom] 當前滑塊位置:130.03125 [info] [phantom] 當前滑塊位置:134.03125 [info] [phantom] 當前滑塊位置:135.03125 [info] [phantom] 當前滑塊位置:136.03125 [info] [phantom] 當前滑塊位置:137.03125 [info] [phantom] 當前滑塊位置:138.03125 [info] [phantom] 當前滑塊位置:139.03125 [info] [phantom] 當前滑塊位置:140.03125 [info] [phantom] 當前滑塊位置:142.03125 [info] [phantom] 最終滑塊位�?143.03125 [info] [phantom] waitFor() finished in 1913ms. [info] [phantom] Step anonymous 12/12 http://user.geetest.com/login?url=http://account.geetest.com/report (HTTP 200) 驗證結果:驗證通過: [debug] [phantom] Capturing page to D:/yayCrawler/demo/GeetestIdentification/target/classes/casperjs/js/驗證通過_1467992089127_1.png
驗證通過的截圖爲: