十三、學習分佈式爬蟲之字體反爬

字體反爬

字體反爬原理

  1. 網頁開發者自己創造一種字體,因爲在字體中每個文字都有其代號,那麼以後在網頁中不會直接顯示這個文字的最終的效果,而是顯示他的代號,因此即使獲取到了網頁中的文本內容,也只是獲取到文字的代號,而不是文字本身。
  2. 因爲創造字體費時費力,並且如果把中國3000多常用漢字都實現,那麼這個字體將達到幾十兆,也會影響網頁的加載。一般情況下爲了反爬蟲,僅會針對0-9以及少數漢字進行單獨創建,其他的還是使用用戶系統中自帶的字體。
    在這裏插入圖片描述
    在這裏插入圖片描述
    在這裏插入圖片描述
    58同城租房字體反爬實例
import io
import re
import base64
from fontTools.ttLib import TTFont
#pip install fontTools安裝模塊
import requests

#這裏的操作只是爲了將字體保存爲xml文件和ttf文件,用來做文字和形狀的映關係
# font_face = "AAEAAAALAIAAAwAwR1NVQiCLJXoAAAE4AAAAVE9TLzL4XQjtAAABjAAAAFZjbWFwq8R/YwAAAhAAAAIuZ2x5ZuWIN0cAAARYAAADdGhlYWQYboylAAAA4AAAADZoaGVhCtADIwAAALwAAAAkaG10eC7qAAAAAAHkAAAALGxvY2ED7gSyAAAEQAAAABhtYXhwARgANgAAARgAAAAgbmFtZTd6VP8AAAfMAAACanBvc3QFRAYqAAAKOAAAAEUAAQAABmb+ZgAABLEAAAAABGgAAQAAAAAAAAAAAAAAAAAAAAsAAQAAAAEAAOIAgERfDzz1AAsIAAAAAADac6DKAAAAANpzoMoAAP/mBGgGLgAAAAgAAgAAAAAAAAABAAAACwAqAAMAAAAAAAIAAAAKAAoAAAD/AAAAAAAAAAEAAAAKADAAPgACREZMVAAObGF0bgAaAAQAAAAAAAAAAQAAAAQAAAAAAAAAAQAAAAFsaWdhAAgAAAABAAAAAQAEAAQAAAABAAgAAQAGAAAAAQAAAAEERAGQAAUAAAUTBZkAAAEeBRMFmQAAA9cAZAIQAAACAAUDAAAAAAAAAAAAAAAAAAAAAAAAAAAAAFBmRWQAQJR2n6UGZv5mALgGZgGaAAAAAQAAAAAAAAAAAAAEsQAABLEAAASxAAAEsQAABLEAAASxAAAEsQAABLEAAASxAAAEsQAAAAAABQAAAAMAAAAsAAAABAAAAaYAAQAAAAAAoAADAAEAAAAsAAMACgAAAaYABAB0AAAAFAAQAAMABJR2lY+ZPJpLnjqeo59kn5Kfpf//AACUdpWPmTyaS546nqOfZJ+Sn6T//wAAAAAAAAAAAAAAAAAAAAAAAAABABQAFAAUABQAFAAUABQAFAAUAAAABwAFAAYABAAIAAMACgACAAEACQAAAQYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADAAAAAAAiAAAAAAAAAAKAACUdgAAlHYAAAAHAACVjwAAlY8AAAAFAACZPAAAmTwAAAAGAACaSwAAmksAAAAEAACeOgAAnjoAAAAIAACeowAAnqMAAAADAACfZAAAn2QAAAAKAACfkgAAn5IAAAACAACfpAAAn6QAAAABAACfpQAAn6UAAAAJAAAAAAAAACgAPgBmAJoAvgDoASQBOAF+AboAAgAA/+YEWQYnAAoAEgAAExAAISAREAAjIgATECEgERAhIFsBEAECAez+6/rs/v3IATkBNP7S/sEC6AGaAaX85v54/mEBigGB/ZcCcwKJAAABAAAAAAQ1Bi4ACQAAKQE1IREFNSURIQQ1/IgBW/6cAicBWqkEmGe0oPp7AAEAAAAABCYGJwAXAAApATUBPgE1NCYjIgc1NjMyFhUUAgcBFSEEGPxSAcK6fpSMz7y389Hym9j+nwLGqgHButl0hI2wx43iv5D+69b+pwQAAQAA/+YEGQYnACEAABMWMzI2NRAhIzUzIBE0ISIHNTYzMhYVEAUVHgEVFAAjIiePn8igu/5bgXsBdf7jo5CYy8bw/sqow/7T+tyHAQN7nYQBJqIBFP9uuVjPpf7QVwQSyZbR/wBSAAACAAAAAARoBg0ACgASAAABIxEjESE1ATMRMyERNDcjBgcBBGjGvv0uAq3jxv58BAQOLf4zAZL+bgGSfwP8/CACiUVaJlH9TwABAAD/5gQhBg0AGAAANxYzMjYQJiMiBxEhFSERNjMyBBUUACEiJ7GcqaDEx71bmgL6/bxXLPUBEv7a/v3Zbu5mswEppA4DE63+SgX42uH+6kAAAAACAAD/5gRbBicAFgAiAAABJiMiAgMzNjMyEhUUACMiABEQACEyFwEUFjMyNjU0JiMiBgP6eYTJ9AIFbvHJ8P7r1+z+8wFhASClXv1Qo4eAoJeLhKQFRj7+ov7R1f762eP+3AFxAVMBmgHjLfwBmdq8lKCytAAAAAABAAAAAARNBg0ABgAACQEjASE1IQRN/aLLAkD8+gPvBcn6NwVgrQAAAwAA/+YESgYnABUAHwApAAABJDU0JDMyFhUQBRUEERQEIyIkNRAlATQmIyIGFRQXNgEEFRQWMzI2NTQBtv7rAQTKufD+3wFT/un6zf7+AUwBnIJvaJLz+P78/uGoh4OkAy+B9avXyqD+/osEev7aweXitAEohwF7aHh9YcJlZ/7qdNhwkI9r4QAAAAACAAD/5gRGBicAFwAjAAA3FjMyEhEGJwYjIgA1NAAzMgAREAAhIicTFBYzMjY1NCYjIga5gJTQ5QICZvHD/wABGN/nAQT+sP7Xo3FxoI16pqWHfaTSSgFIAS4CAsIBDNbkASX+lf6l/lP+MjUEHJy3p3en274AAAAAABAAxgABAAAAAAABAA8AAAABAAAAAAACAAcADwABAAAAAAADAA8AFgABAAAAAAAEAA8AJQABAAAAAAAFAAsANAABAAAAAAAGAA8APwABAAAAAAAKACsATgABAAAAAAALABMAeQADAAEECQABAB4AjAADAAEECQACAA4AqgADAAEECQADAB4AuAADAAEECQAEAB4A1gADAAEECQAFABYA9AADAAEECQAGAB4BCgADAAEECQAKAFYBKAADAAEECQALACYBfmZhbmdjaGFuLXNlY3JldFJlZ3VsYXJmYW5nY2hhbi1zZWNyZXRmYW5nY2hhbi1zZWNyZXRWZXJzaW9uIDEuMGZhbmdjaGFuLXNlY3JldEdlbmVyYXRlZCBieSBzdmcydHRmIGZyb20gRm9udGVsbG8gcHJvamVjdC5odHRwOi8vZm9udGVsbG8uY29tAGYAYQBuAGcAYwBoAGEAbgAtAHMAZQBjAHIAZQB0AFIAZQBnAHUAbABhAHIAZgBhAG4AZwBjAGgAYQBuAC0AcwBlAGMAcgBlAHQAZgBhAG4AZwBjAGgAYQBuAC0AcwBlAGMAcgBlAHQAVgBlAHIAcwBpAG8AbgAgADEALgAwAGYAYQBuAGcAYwBoAGEAbgAtAHMAZQBjAHIAZQB0AEcAZQBuAGUAcgBhAHQAZQBkACAAYgB5ACAAcwB2AGcAMgB0AHQAZgAgAGYAcgBvAG0AIABGAG8AbgB0AGUAbABsAG8AIABwAHIAbwBqAGUAYwB0AC4AaAB0AHQAcAA6AC8ALwBmAG8AbgB0AGUAbABsAG8ALgBjAG8AbQAAAAIAAAAAAAAAFAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACwECAQMBBAEFAQYBBwEIAQkBCgELAQwAAAAAAAAAAAAAAAAAAAAA"
# b = base64.b64decode(font_face) #將字體進行base64解析
# with open('58sourse.ttf','wb') as f:
#     f.write(b) #保存字體
# base_font.saveXML('58sourse.xml')  #保存成xml文件

base_font = TTFont('58sourse.ttf') #讀取字體的映射關係
#獲取字體形狀
glyf = base_font['glyf']

#文字和形狀的映射關係
base_font_map = {
    0:glyf['glyph00001'],
    1:glyf['glyph00002'],
    2:glyf['glyph00003'],
    3:glyf['glyph00004'],
    4:glyf['glyph00005'],
    5:glyf['glyph00006'],
    6:glyf['glyph00007'],
    7:glyf['glyph00008'],
    8:glyf['glyph00009'],
    9:glyf['glyph00010'],
}

#從網絡上抓取源代碼,獲取code->name->文字形狀的映射關係
url = 'https://haikou.58.com/chuzu/'
headers = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.116 Safari/537.36',
}
resp = requests.get(url,headers=headers)
text = resp.text  #源代碼

font_face = re.search(r"@font-face.+?base64,(.+?)'\)",text).group(1)
#將字體的url路徑font_face用base64解碼後存放於內存管道中,這樣程序運行更快,不必將它存放在文件中讀取,因爲進行io操作程序會運行很慢
font_bytes = io.BytesIO(base64.b64decode(font_face))
current_font = TTFont(font_bytes)

#code->name
code_name_map = current_font.getBestCmap() #獲取cmap返回一個字典
current_glyf = current_font['glyf'] #獲取glyf返回一個對象
# print(code_name_map)
# print(current_glyf)
for code,name in code_name_map.items():  #code->name
    # print({'code':hex(code),'name':name})  #將code轉化爲16進制
    current_shape = current_glyf[name]
    # print(current_shape)
    for number,shape in base_font_map.items(): #name->形狀
        if current_shape == shape:
            webcode = str(hex(code)).replace('0',"&#",1) + ';'
            # print({'code':hex(code),'number':number})
            text = re.sub(webcode,str(number),text) #替換

with open('58.html','w',encoding='utf-8') as f:
    f.write(text)

實習僧字體反爬實例

import base64
import io
from fontTools.ttLib import TTFont
import requests
import re

font_face = "d09GRgABAAAAACicAAsAAAAAO9QAAQAAAAAAAAAAAAAAAAAAAAAAAAAAAABHU1VCAAABCAAAADMAAABCsP6z7U9TLzIAAAE8AAAARAAAAFZtBmSyY21hcAAAAYAAAAO9AAAJxPfegZVnbHlmAAAFQAAAHlgAACfUsfAWzGhlYWQAACOYAAAAMQAAADYbQ6XBaGhlYQAAI8wAAAAgAAAAJBCpBlFobXR4AAAj7AAAALQAAAGQUfP/MmxvY2EAACSgAAAAygAAAMr1ieq+bWF4cAAAJWwAAAAdAAAAIAF4AF9uYW1lAAAljAAAAVcAAAKFkAhoC3Bvc3QAACbkAAABuAAAA4PWD99UeJxjYGRgYOBikGPQYWB0cfMJYeBgYGGAAJAMY05meiJQDMoDyrGAaQ4gZoOIAgCKIwNPAHicY2Bk/cg4gYGVgYNVmD2FgYGxCkKzCjK0MO1kYGBiYGVmwAoC0lxTGBwYKn5wcZT/fcHwmaOcSQIozAiSAwC6kwuAeJzV1tlPVHcAxfEvRaBQ2tpOd7V0oSu2WDot1bbafQO6L9CF0hIT34w+AA/GVOMfYGJiNCZCTDS+GU3fjFErIbwZfFCfGrnMDDPDLMCduZgYtWc8/AMmJG3n5kOY+zQ/5pzDBWqAalkty/R2OVVUfm3Q3apb96tpuHV/WfXfev8Xf1DLewxd2TrZPXk9WBP0Bf3B9mBHsDs4GoxP1U8NTl1MxBObEjuTtcl48lCK1MpUW2pPan/q8nRs+lh6V3pfeiJ9KR1lqjNnMrnMjWxddnV2bfZk9sLM5plTuc7ctty53GT+YH4kfzg/lp8u1BSaC62FmWKs2FUcKZ6YPT9bnuuZG55LzK+aPzI/ERK2hx3hQLg3PB6OlnpLG0unS2dL2fKW8mD5WtQYtUTxaF00FB2IEguxhaaFgasNN2/qLLdzhhVLdobO4vASnuH//apSnm7nGl2ya4zxJbkqZ/iId1nDj3zNHSynicfo5Dna+Y27aOVVPmAlP/Eh6/iER3mJb7iPfj7mBTXubbr5lfd5mN/ZwMt8ztPEeIrv+Yov+I4WmrmTT+ngHrr4mS+5n2d4i295h+d5TV2s5zPqeJJHeIi7uZdV6uuLPMEb6vYP6u8v9NLHg8R5hcfp4U3W8iwreJ1GHqBNXa5mvY5R+2/G4D/yaqz8WPbn4rt+GVqkj3hlq+n7ZrLb9M0zed2UAYJFSgNBnykXBP2mhBBsN2WFYIcpNQS7TfkhOGpKEsG4KVNM1ZvSxdSgKWdMXTQljkTclD0Sm0wpJLHTlEeStaZkkoybMkrykCmtpDDlVvtnSrD23JRlUm2mVJPaY8o3qf2mpJO6bJW/53TMlH6mj5l6QHqXVf4HpfeZukF6wtQS0pdMfSEdmZpDptrUITJnTG0ikzP1iswNU8PI1pm6Rna1qXVk15r6R/akqYlkL5g6ycxmUzuZOWXqKblOU2PJbTN1l9w5U4vJTZr6TP6gqdnkR0wdJ3/Y1HbyY6bek582LQCFGtMWUGg2rQKFVtM+UJgxLQXFmGkzKHaa1oNil2lHKA6bFoXiiGlbKJ4wrQyz5017w2zZtDzM9Zg2iLlh0xoxlzDtEvOrTAvF/BHTVjE/YVotQqzyvBG2m5aMsMO0aYQDpnUj3GuVZ5jwuGnxCEet8kxT6jWtIKWNRuXeadMyUjpr2khKWdNaUt5i2k3Kg6YFpXzNtKVEjaZVJWox7StR3LS0ROtMm0s0ZFpfogNWWZ0oYVpkFmKmbWahybTSLAxY5XntaoOx/h8fUZ3FAAAAeJxtWgtcVNX2PmufxyAi8sYHKcPTDAmZB0RIREhEhFwkMiUzQyUzRSWfpESERCMhIhGSEQHhI0IlQi6iEiKa77yEpGZGiEpcU0ycmbP473NmQO+9f+d35swczuyz93p837fWlgGGGexl/BkHhjBMgMrRYYLDJIb+o98Gu7lXBW/GkrFjGHsbRulqa8O4u9GP0iel6xywAQK9eEvfi2IbPAVqPI9HSA8UQ4WxG9fgG5AD74ufkpXkI4YBaVBOI/gyIxjmcdAobXmNp62S0xizYQ6WEG+YvZMrWlRbql9vvteLPttZvhdCQKP2cndTaLQqfydHB6IARyU3xhgHzQ0bCnRnGr+/e+7O9g78grTugiMNF1MzN9cc23ztUB7easevOWk8upaxdDwlXYmTyl+rsVG6a1Sutmrl0KA2vOMEcHSgC/T+5Iu20n/inPfXw7v4+1d5RT+23sYTVT/gT/pLm4Bd/3kmeJWBw+DSA7POVODZVzi744XnBpk42WZ03snCKJPNwN3W3VapAZWtSmmrDnAXFMAltzaJvuSb31sxjeOsp6AK8nEp5G9kjUZX8tH02V7/EGeY56uk44yhPpHmayOt3lbpqLSlk+XoNJVu3ntLdpa2HDs4p5joxcZXfO6BHXaieHFp38L98Hi5NWv/DUbxA52/4c3QYX/OoTZwYtzo3OiAhFXR0ezkRTPUt0p/Z3CQrSMZW2D1azOP9xN+6V/Hb+Otn3vxLrwBrhVzxdivNqWXfvpxZjkfEYJleO5fqL/4O16C5TCT+v73KUb4rLOxoHRfrWQOky9fomsRGGYEuINSo+ReEst+JKFGPftv/rg+gJ/3DcOw8vyi6PwcmYnM4/SHHhq1h9kznK0D5+7mobHxBHlqjg7OWqBmME0W6vAWcCcO3MZfYSbe0J9ChGgoyViWnH93AI34W9WmD8rZJ9+/t/vnE6WIH3PrsPPIH9dqYdImWJ+85r03mhcswpupHSl5n7z928PYj5Djz93kATtbwVugZtKoGZW/bCw3b/tHjHUgveDsH4Qs/OuHQQbG/HodOKzBn79avrx087rKik3vlZ+PgkjwJaQZrNq7wBO3YQXORH8N51Ly/derv/6lachWii72Dcka9tTbroqWgWD2jRpGtg/eVPwilDIujKdkH08aWzS6pJhQ2jrQsPXyptPR0C/0DzTmbMDLm7GnH9zZvwqgWfybaxU7oe1szBJLhxR1Cp4spOlqWYjVEcEQTDCnoGAclw5eOTnYaUy7IkTHWM8lPfpufhzJM/qIqbgr3AvUJJIN1V8d9is/kvrVkWGU7vRhUny7jwAVODmrtAH0zI+8i5VWzjxxw5J+wqLXPfAfP54b4w6hf6ALed4h2OIFsYG1ZFmx58lntNOIvWha56+KfmELjX0vZrL0IGeFlzfHCwpvbYA2QF4xUXh6ebOMtEJQKRj6N4anl3lfcDufERqP1uJAfMLs+eHVZeJChY8x8GQzG524tBlT0NovhIRAUrsXzCaBwWqx0VjAxRpqIADxWmrU7AleFh6VHsG7ugoKENsmz13Ku+AkvBoZBV5wzR/9O2Jnw7i4AlOM4KAiVbBnFDKqULPTmbmDf4DC3ZV6gSgE3svbI0BrL/mHeqhBaKjSF1bx1nN1E7rxb7zGPhOG19SRYK3WsJjOL8Y0wX6gd80a1o8rv0PuiCf3Xa0PCStrzCAT9KX8PNEjmTHHwQPFJeFLZiQzlmYKA0pblYwM1O32Ts4cNZNGigtbLXUEtRwpJRF7SJRYt0dsqBcs8PDZeRHYBY3VOL9zvm7DHvYvUiPG5hmKuGTxcHJEW0F28Dp2TJ5RvZjvvbpkVeU+0zNFRZewnRlHsUPzyDNtHsadyRtKkCdi6y5FoCJAnoHZELZsD5lfBAQRgy9Z2TUlZuAdLPeNgF1gIZ4wlrFMHrrmQWCeb1hJZ1b+vgg86RcKJCqGuw/lmJhvOIF9JD7BuoANQjs8GRQMK4gTdBrGcfGGUrYBE1B3Mqiqf8nh1pt9oeGVdbugxOynfysKhK00kwOkmVMvgZKwRCnP3JU6yJ3IqaI0ndw1oA2QDUhx29HdVksXQaOPMfZzk4y9rG6M07hBZladtUMOuFg7OLS/Kt4KDITbrw4yKzHILxB2LO0THbZsxX6wKujvT1nBWeMlrOM/z8unYdlOc+60kBTltEsP9eLruDpCDa+QV3Ek/jMoCMbD7iLR7+lQsj8P+tEqT/xEpY0Py6w0YxLNt+9ovlkwNgxDze8qwxBrq3KlF5Rt0ADz4CecgnnXfgNveBH34C1hFC7Er7EIY/g5hgQII+HgMZy7Ek+NlHJXhgw6oK2KT24z3m5rY23aSIMYIYwSXydfDt3PPqD38xIfS/eyD4xn2shbwih9+RDv5Q3znhQBMkt509nJZAt5bVBvAEdsvHv8wPdNuIM8Jx4URt35rQv/sGDHiYUVxfCEmStS6DgKOcYk9lRyKfjscXyOm87P0Vfwc2g40qcweE/RTXPAnmaAhgliQpgwJoKJYmIkGqYoYUpG2XcKOgxFJqVG5WjOUQWNXIVSUBB3pRy7EqrYKym92mpZGVIpftJ3nh6gdHycaJTjQsJWsGMyw6OIVUmO3sjGQHUWVGeIbhkbuE5jswcps7RyuIPFVhPOFuRYW1iKMaH4dyfeycgAa7wjHVQW+eAF2Ce/y4foJp2PW1pyF7KyDIXWNoGzXDEUytRavrG+3njb13dF2o76dDRGJVitC1kKrofBCyvHQAv4ZoCveL6hoQH86utlk8n22EDtoaBRHs5EmqwnrZWV1skL3sMLFWRckpH0YZzLwkK21SNEIig8qSXYSZxPkcgVhXKFnLUd3sw7bW3dWLTL0oqIfSNeXIh9k0P0cdMVWCeqt27CAbAoAAH1S5IshCtt4Wss7FJDZuWkCTqRI0ay0piZo1NE3EHsEDguKcymOr9B3Hy4NCgqO3dxt7HFTQmXdZCJaTqMdHFZ6BssVl3i4qKtFxJbb48x6GXmncEPFC38PSl2A4YZ0FEZxC4RBaFKJOzSgVUTeW1pqf5EyRCXLxMWMKOZ8VK0a9wpjxONDePqTBOcNesJJ1ethlvWwq0dPD8A1MgLuZYfvk0vqf0GJtTWn4cnEOzgxRqy0zjlw/03jh/4+afPzXZ/oFgnzJSR+ElGRadnoxAUsn3HUhfQL8O0DNIsqT800ovGoukFNOpgjY9fUVDghkmuQtlAbrXFmObkXH0xcOwsMbSonBCcDZUlUImzCWeYyyaJHexy5Jp62qOCDzdXQRlr8aAFkSNC8EKrLD7fWCJ2pbMnTmTX1GSfyBZbwQr7h3IYVlA7sBJ6qGyrW4QFD7ZLqozy2DVhL+PK+DLBch5FS/Ej5cij4eD+yEtJg8cUOOaDJ6w2QHAWeLpyO3ueHgrC0iHsZQYgF3hrsSqATC1Sz7UeFx4RfDqrNatNB02+YhfbZ1yjA20eaHWGGK/kRa8Q7hrGTDheMd8NQ7LDwnK0C0m1GFe+blV8ZiS7N6NTTN5D4sKEENCLOpIqFpFk0YtN6AQuBJqxAVowGCKQCcKL4g0np5jQwFmFSlfyKozTTsBrOM8D8sjSDGiLzC+O1ZlxFVmunltCQfQxqqokKHQ06WKp2vFmHxF5jrCypYXL0F/4C/99/V+pfPUHJTXluVsaSsRFwoxDF/FnPfbi3jrw2XTg9oX9P57+0jz+nxSrvqejm3QCfUkhwtg7Cwpz7A7xt5RwXhSmW8TaTV9TaeCSWz7Zl1J/GCkRk9gxxp4ygx6uEi8q5D4jwZB09xSlnlDI9X3ztTws0WFsnthFXPKgZkh3v0z9bStjvI00fxnn6eJcbbmXfxjYWEVXs/EmvEvSIOaHAnGzsEAMOIvTzHjfz8VRfGVGEHdWFnesJO6owlAB24+58E7XT7YTOZj682+wBj/rOmM/noNRXJz4k/gLzHMJEmKxlEwkKsx108IJRmIOvKHoFd6iY3pTzH6aoVUJDSJHonCi+s3Dk5NUgqcXSyPPRvpoSwPM4WmgNnLTwJCucX+oa+i71p6c4MJ29AzCogg1bsf7eBvnqdWwGUb2vEV+RSus0YbAJNYn8ZKlgAONxibOEvuy5oUjFGbgQLFOl5/OCgs6LmwkFVT3VhytCpsduail/oOoiLcrr8LTrFUQ5tWGBRWB1XlMnh0XdtbYMDd+35LMsBWkLc+YncBnFJWkNmYayweby+ExGQ/uKnqE1ykOT6K5FPoQh8HOXoJayfEentTxssdN8og6hSi8eDPzaB4JCQky2BT2apEYW2Tc0boYeyBwQ6bNOIiH2T4fbwUnvBkTW7wmNTpvcZkr2UGCMQha0ZLLRQt6DiLB+h62R4wlNcILYlxcnJhF5dnHmfCU3ejwCJdYncsY3JeJA+FxtfML5hjfZJuwqjuhCyZlgU1WFt7Owo4sYT7WmjD0V0UTX0GxQ2GqTKTkPivMe1BaoMgY2MBX7DHkVsn3vU6xZDyNOVoVOCulNdvLS2LNC2KVk+B8KgbC1UixTOFguBPJpkD4QAR/WnRbxcaPZG2KoLuoCMcZZxWylcbbUizifZo/Sqo9aTkmjaY1WUmQ5mAnaX+WKjcX8ud64z72zREiEQ4b+4jAphu3HyLNcYFi6ukdonrycniGPFWUC4lFRefwepGY1T4/HBRkrngSv9OaeKWHNPPFjANFAcHddSy4a1S2AbIvJAwIIM2h7+DPTU0/gD3+GZUQNnWkO7xANpaBZQjWl4nl78z1NPNTN9cr17F0HLNK83d29HKXP9KBuN7dVAPvxxVwGF69sqXyGIoUuPz+7t4VOhu+hkRYBW3PnEukwu0XvIkHkk1j8lLtPva/xlSaC3eZyPk5PVRq1uACaIU3DdUNOChiPfgC7E3DGjJ2cSbshDdhEZyO7XgXK/EPvIyN4fDVXpkD7iouCPspBrrJPBZA81PqX9Cle5sYwGmapO29vKWYVVKUN4EXr7J1GOY4qdYE2ctUOEUXsUtOxOdb2a1LjhdbsItkhS3ckBb9JnGPXRxTlPgaWTuQKKwr6Vm8w9KuIDEdk6jT14ktJNh04F1ebShr4biYCEsSj1xe8rqEnCVhPhm5szYs1GUayv4mhItLsMlgabXKurRjefuQrnWi+tFG0qlSTWpGPLkcdcI+sWHCSL6tjbcdQ/zvi6vIYU9HKhpGid7jJ5BScexQ7S+4UVuPlLpPct9LtrGr9NlU+7u7MUlQCHPgOcjBJViNdbjiPEy8PgDj8dZff2In6YfV0IgxWI6bMIKqviV4Hb+B6eABPpSdqszPGaWIE6bSav4pOnVqV2pUCoZyxaTwlquo/yjrtQFenua72KG76DWYtWr1qjcist9cvXJ59sxZ63PXFApV3en7LO2qVqXrM/d88CGxSE16e2Vs9vKsDZuykpak6dbtyXyX2xxy8Oi5M/54907LqcgdpddPTcfXFR4PGtu50ETrpXyOwUk0/pkXXvbdH1fU+DuKHXeim5qvtMU8+Os8MxzngdRO7tRKE0DlqlH7grcvyF0bKdxlWeUsNdU4pZuXd/rzfW9DRMS6qhsFoO29vkTXnrs/7UxXHXbgg8R7geAeEtvzbHLszNRj6UeuBXa+sXrx3FUL3+nMPtmp8hrSLxaW1LfjTX0kc8PhkX6D6WxhiTE3sM2R4wQnPHodn7kMwY6cwLvAjKMQamPJ8WPATxj14C6XGB0/80VDmTDKsHRGbPCbXL5hQdDsZ1/hPh1eH7uH6gI5j6XWz1Aeywujeczu3rj/Nl6Cp8GiaMm7n1T9dGznJxv9o0F9D2zgJW1zwo3mQx1zTb4W73PL6FhjGFp/edrQ2lPjRFl4WItSi6k0KlY7LEa9uWXintNFB3bBVm6D/qweHLovv821tED1xtJ9e+CxOnhNLH7p4CKoevccTP6blo4v1hRiaHr9rVM1nWdLh/h/DuV/KzkXzJlAzcTNwXvoTcfi4IgRrYijeEtYYLgDBrF3uPe1l851JONkiv+hmDf1u6RWpLAA/EF4QMP9PA5gR9uRr6pqaqoqDpHHqEyehmfwLuqxCQKBP3TpZ9jacW3Inol03ImmjhqVVHK0aIeixRws1CLeywt2/5H0Wf3SB3Wb16Z9PnfVy/kHMh607kn/99uFoTOfitqyaOveqQdj4z6MCppe8E7Rt88OxYcQSDWLKT6GNItJsmgD7IfOQiDWnbxkzXGsQ18r1p68bMlbcKP7vuuxFEYQmw4uzlDNWvgHTg4x/s3FGU96hgZMZacaf/QI8wliNaZ1gKKFYqWtxO5mQKQcYSd1D2whkH0s8LX8+dEprFrvRbDedz548OvnZcZvWBNWhjaiDnU0REJIqGnOFHdPCgelHOLN/byHI/4/bb2MEnLCqGMrQmYWJ0Rmsm3GTNLet7TG0q4yKUNsLRXPT+MswaHovY2RGzLDirBXdDByYXMtM4nW0MBFDPWPfhNqhX9Q746X+0dS20Nrb6pXlMPPk6Sog3SNwJRBJjSqOcyXD8TuVQ2WDvWLy4zADhpt2NuwHTdgW0jQeXBrh3Wk9kEPdnCR86x2QHQ+pLeRdsyENFkD36UcvpY+XbIa5Ql7U9cOTF0jtryu7tIECCehIT5iLQ2qDrHSJ4ikgB973DBZkeUr3uyeHwdq6EZ/LIpMgNmEmNZCY7yYYsJoitUT5a6ItD8gKORePucOcvtBrRxqQ7TBsZP9WzbuOIi/XcV7dVsq8FLb7c9346fCqB++TG95nLP7saTtPj8Xx29+7xfxHbFry/tgOdT/6KbPGSErOrNaZ7vFXWSy2N7GTuDniPoacQ69Q+pjKxSdwm6qlCZKfh1WgGbdbyfrfnPRbRZ5EM7nlhr2lWI/Wyt2zXw9LO7S4sopU+FCOakVo9mogbm83rCPiyGPG68kJ3M6iPriQ18/cIVYvzVLIQhbC7CuAIsxaYgDD9CZ2A/1ZZ1k3S/Bo5fGVsUf+NN4T+kgWLQayN9aS86ylftO5a+uMByiQDgv3jmslHt8uN+kksdxkUfyhSeGOjvyZomz2bpe7ryqFwdfSn81rw06rgGDVaGvRYkbez/aXbINqt9/TUwVRnW2YNM8fnzSWjZXvFyasTb7YQ/5FVMfasQjWM6/Ih67Ip6/Al860uwcDzslvKbTmx48Y3okd+DhvsIxk09GSLJDI28tHDN+wL5lWMGuNp7hEvlz+qt7w3nnfaY67IHiivC1qacP/0/5RfHCi5evyy0jLxIGlhvKn3ya7RQnkWxxDSkT55Z8tbwwUMyFceXlL71SsmD5BHY9LIHDE4rzdFimwwIdWvCd/e3aNCgjwX69Z/TPcasPlkQP4Wo9xWMbyZ6ejuZS7H/qTNfhMvPsPfzzj47lpjITr/2zpIQWaL4XqSrEHvyuDp7IomXmt2dOfzFsD9ItvCfpV2l35zEwr0qjMoEs6Q4IrQcWxeaDB0/tiPfnmx7rqigzxrI1ZdX7frSXfj7oL96HLcJMxkJiPXu11t/JQXDzsrch7jJj+Y8FVVHE25EvLI745ddT0S/ERNxo4TvUz0ckRzyv/5Ci/kX3vX6wkgsf8k8CxXu7/6w/5eLayZFL+OHB+5V0ne/11TWRd+Hlw1vFj7i478/d+NWcbwwn9WzktTzyS2eWmZqwooD+MP70cx5snvu/qsWzXNxv/aOHnjmZcsB/xRM3GaNacV47WCsF3nIsjJdQnqL7zddD459nxw3bjz1If2vanzQ3itkDxsXkA/EAO1d8n6x/ll1WFmrMM+Po4GZFG39ZrolGPqyKpHO9oHuQmqbQDaS28x36SfzlPQauii3bZebXjxWNvEjjwEHqYNGKZqhEYs0lUjT/4lhD0QY8LVzS4xPcCy/BewN5fK7x5WfYE84wK5dNycvDTsNZHeeDkWatskxxSfhG5lZzIfmwx/AQa6RjElsqBpMW8faH30wOk+JYEwpH9hCjyNGLwWzyQDQPWAynbzeHRUEwtPnkZ0EVzipG/2IkhfKzFIouYRNjLddjKkFhB+YazJVN9q8ZOXGA7IkPFavFX8Q12jdggLvd3mwcgy2JcRDLvmxsw/eizDx0XXFb+IqykIoJpIuQ89GdalupHWDeNCCsDJm0zDD1Ks3L8BScOanVBEo7ezuurSTqRXZEw9SbB8EvXI29J47h0cmhENF0+B+PT54c3mNckr/oHdSytbhLwkqYLXBiOyY4RcXwZ9FpeuM/ha/Ue7fpxPNY9q/PoqO+utjVFBzV+CvYwy+hAepwcEXjInt77vvcXBRyxd6wWeFJcSVW1pwWXfBX8BnCyo/kvvVoKdqpKyXhYdYg/EeXd4jrt10mMe3bbo4cxY+07pEsKowyRpHMiS9FeIlpZn1EpN63ldRFfCThSErfqddDQub2txF/WkaccvlhChSzvwxj4AgTfj4OD73MjTAeYieY2ImsFUbtQ5u94n0zRnia7gcpo5S0xKXsRDy3bUPrbdvgDp9kmL5zJ/fPnUO9MQPl7e3UR94m5vv/GExWJo/iZgqpww/+MS8oqml2mfcUiC6FK+hGXMWrpWKUWAkOVeVRCTsWpk4g00H/885Af4iBEt9tH8JZ9NOhoBPDSSNwUETC/W6c1D/PLTu4I8G8N2ph0v9D2+kP6U1pznSlrfDsaH6cA8IyJHYuguUhGA3H1bYKtSOcA8KRjftSfAxvcbmT3lty2qignNK5LvwTFef24K6pnyvS9X4rY7Srac2O/ynDwOPhQuVil36noU/6uTuYpq+qtrIr2ZEjBos+oEXdopWVazJcWTfO2nDH2AwhzXk9WOXBHig03hkgsUk22aQD2xFOJuzbbrjMvpy9Mp5bXIj9f0PELCzB7mizD14RZgzv21N57MFy5oqUvvO09PIPgEdKU2HGCwvzcPB3PI7LaJU6D6bBx3hzkHm/LiUxVH/90ToVlzrrgGZuE8TBWmiQC9YfMXGabtF49uL/lKzmPkQ49cEoKUaVrmOlXT75xIeLe8SjbZAFa9vIanEZWcd+Y0zEJ6GZ3W+O0+Om342QRMRDZD6O5DiO/gNmgAdr9RjMpVkxRWwlGrZDjIP5kwPJt2buZNsplzhICGevVcndGal48tLYTAHKLQqwBtP/B5gGxys6IK+ZX3n1yyZYBpFXj5VDZPailMT0So5fhIli4O6j1bQGVfnC6Cw8bJc0e0ZSbfbi4dpHLfeCnSXfy7Z1FKih1YyrnSOtVygXKTi1Uc/XVzRsz8eJeBe8oba/BXQb393tBPf3Hln97SLwA2uEKLxhCCosr9aZ8AHyZF61epRXaQ59+vxbL1A6dTbx6FvTp+sTJXIy1zPXTfUS5U/1EA3+F5E6S91c4XpLSvqS4pa7KVspMa76fdku5fp78A5ZX7v9RI6YQz6Bpw9vEz/m4r49vi75GoaY8feWwih8R6M8gKGlgb1qeCNIUJgj3FtGXbsAqiGGOj7EU+WgsOMVHqxpu1TraYIDR9bbXkKHO/06sMO+r7+w5ArKdad+t+vpxFXLXrowGDix9Ek/+BL957BHSyw+h/0bsODzt9HHN5C4uQpo1VG4I+Bp1u5sSa24eGFKEHkABbgkE8/ZO0SH2jhgMNSOm1Do5AzTNsC4rsxanWt+biYas8/WiPGtOScIj09jT1gY+JDYNH/KW1cnl+iycIMbJF6t9Rru8y8b6vNL9bCgdCO2/1N70xDylnaCUhHPGcCh5+pCqezemV2yb/fHRXsBWfL9BepzQukhipbcwR8c6DndcP5cyXBPJJRqCNuhnSZzilBlZ6viQo9yy7pxdTOQ9B1Hq9ugnRSKm/Dbg3nkPRl7bil6hQrq7+eZF5kZJkaUyZA17WRKbpE39yXIpo7nH7mkkS4+BGdHpUymJn/Ke0wUqRXahS2H62BCyGSxYVdjaROxKO5yCVwcyl7Jx8KgpBZjErHI3+XvuzgM+8Au29XNy69lRb5/MKRgVFAYITfBDa+kcXFiJKmXDhKQmrPqcGx8eCfZIKZGJydGRKXb+eeExQnBBqN/Y25636youFQP35zQBGjMAY9sf9/IULCDWX7ZaTlLk3LwUg56RJNqSrBXSTNek+wn3udU5n0HGOLAZ4DChQktVGipG3xB/ZTmw7CXMeYiKMfwgh340oxR4A6bPU4lx7lY45E5E4NYqScgyJq/i2oMO1ljSB35iEf3h83bwxwPSq231Fk2G89B8WhrWfq/Aeyju8OecuN+XHR0BjuvsI6zbMnHQbzIMnDaAvBN4jI7vyTtbZiBPgoyUbz3GeRGBuLnW5Zidog/fPk8hnTgpa1bwQMvSQdJT12zOCN+r9Uo7pBOZ6hyiwvyxlfhI/U00pffUZAHTlFHthdc3FKAbj2RMTAKtuFMvB+jBgVcx90UyfeNhYPUuODRFpddNjuHYf4PbBCMynicY2BkYGAAYoml9bLx/DZfGbg5GEDgVokeB4z+b/T3OwcX214gl4OBCSQKAAo6ChoAAAB4nGNgZGDgKP/7guEzh8p/o/8POLgYgCIoIAUArBYHDHic42AAghQGBpaNxGEOBghm1USwkTGbBBAbANVKA/FTIJ6GKs8qB6GZFKF8GQjN8haI+bCbyfQbqC4KiD9D9YgA7WiF6psHpFWANDPEbJZoIC3GwMC8BagmENMsVlagGgeoWxuBfEsgzgSKfUO4h+kYECv/N2J5BzQH6CYWYyD9HrvbWBcC5bSBatKAeqIgYuwXoeYD3cj6Eyh3BYhzgLgL4m/2ekRYgOwA+8MGQgMALgwgWgAAAAAADAA0AEoAcgCmAMoA9AEwAUQBigHGAdQCHAJIApwC2AMSA3AD1AP6BBIEIgRGBFoE4gVYBXAFngX0BgAGgAawBuwHCgc0B6YIEggmCFAIhAioCNIJAglmCYgJvAocClQKjAqyCuwLCAs0C2QLmAu+C/gMMgxeDJIMqAzoDQwNPg1cDXQNtA3kDgYOLA5KDmYOhA6cDrgO4g8YDzwPog/ED94P9hAOEFQQhhDQERYRMhFSEYYRshHMEfwSdBKmEsQTRhNuE+oAAHicY2BkYGBIYQhm4GIAASYg5gKz/4P5DAAb1wHYAAAAeJxlkbtuwkAURMc88gApQomUJoq0TdIQzEOpUDokKCNR0BuzBiO/tF6QSJcPyHflE9Klyyekz2CuG8cr7547M3d9JQO4xjccnJ57vid2cMHqxDWc40G4Tv1JuEF+Fm6ijRfhM+oz4Ra6eBVu4wZvvMFpXLIa40PYQQefwjVc4Uu4Tv1HuEH+FW7i1mkKn6Hj3Am3sHC6wm08Ou8tpSZGe1av1PKggjSxPd8zJtSGTuinyVGa6/Uu8kxZludCmzxMEzV0B6U004k25W35fj2yNlCBSWM1paujKFWZSbfat+7G2mzc7weiu34aczzFNYGBhgfLfcV6iQP3ACkSaj349AxXSN9IT0j16JepOb01doiKbNWt1ovippz6sVYYwsXgX2rGVFIkq7Pl2PNrI6qW6eOshj0xaSq9mpNEZIWs8LZUfOouNkVXxp/d5woqebeYIf4D2J1ywQB4nG2RR3fUQBCE9RmMyTmbnDMKEySytJLIOWfWu/Z7XLjxHj8fVK0jOtTMVHdXt6qThcS+5eT/35wF1rGeRTawxEY2sZktbGUb29nBTnaxmz3sZR/7OcBBDnGYZY5wlGMc5wQnOcVpznCWc5znAhe5xGWucJVrXCclI6fA4QlESipucJNb3OYOd7lHTcOElo6e+zzgIY94zBOe8oznvOAlr3jNG97yjvd84COf+MwXvvKN7/xgygoz5qwm/Fn8/etnkQozYS4shE7ohUEYhaWwWvqHrkvT4fRdKtaXYvusFhvaZjjLps50tnkuPhb1kO3UwanSzVSvWbxTdmpqITcV33aaJSjqXVBdmA2vWHf98IrjNFHKlhFLm6HpLDNtbPJyYmdlelVeKdtmkgMuKpK7Vq8V4ZpUei+/vPK8fAm6B7srN6xJUXyM42T96JecCDE1x7rcHJH/LtgfOuNW1UMRrz34qbnsJ+qiSFAkTIWqiOKj+DgdtzVurVL36LPe3o32FXwhF53+zUnLyXM31vVSc3M5Wjr1H51QnpeHQUywbRljFX2wLSke/cBVzjRjNe7cJclfjO7Tgg=="
# b = base64.b64decode(font_face)
# with open('shixi.ttf','wb') as f:
#     f.write(b)

font_bytes = io.BytesIO(base64.b64decode(font_face))
base_font = TTFont(font_bytes)
# base_font.saveXML('shixi.xml')
#獲取字體形狀
glyf = base_font['glyf']

#字體和形狀映射
base_font_map = {
    0:glyf['uni30'],
    1:glyf['uni31'],
    2:glyf['uni32'],
    3:glyf['uni33'],
    4:glyf['uni34'],
    5:glyf['uni35'],
    6:glyf['uni36'],
    7:glyf['uni37'],
    8:glyf['uni38'],
    9:glyf['uni39'],
}

headers = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.116 Safari/537.36',
}
url = 'https://www.shixiseng.com/intern/inn_njckxepejl1q'

resp = requests.get(url,headers=headers)
text = resp.text
print(text)
current_font_face = re.search(r'font-family:myFont.+?base64,(.+?)"\)',text).group(1)
# print(current_font_face)
current_font_bytes = io.BytesIO(base64.b64decode(current_font_face))
current_font = TTFont(current_font_bytes)
#code->name
code_name_map = current_font.getBestCmap()
#name->字體
current_glyf = current_font['glyf']
for code,name in code_name_map.items():
    # print({'code':hex(code),'name':name})
    current_shape = current_glyf[name]
    for number,shape in base_font_map.items():
        if current_shape == shape:
            webcode = str(hex(code)).replace('0','&#',1)
            text = re.sub(webcode,str(number),text)

with open('shixi1.html','w',encoding='utf-8') as f:
    f.write(text)

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章