Python批量處理apk腳本

簡述

逆向分析應用時想快速瀏覽一批樣本的信息,人工逐個統計太費時費力,遂想着寫個python腳本批量處理,擺脫傻瓜式操作。

實現功能:
  • 批量獲取apk信息
  • 批量修改apk文件名
  • 批量下載apk
思路:
  1. androguard可以便捷地獲取apk的相關信息(文件md5、簽名md5、包名等),再用openpyxl寫入excel文件。
  2. 通過hashlib對apk文件進行摘要,獲取md5值後用os.rename()重命名文件。
  3. 將apk的下載鏈接放到excel表的第一列,用openpyxl獲取所有鏈接,再用request獲取response,然後判斷狀態碼和文件大小,,再以二進制寫入文件。
遇到的難點

1.一開始是想着用apktool加appt解析apk的,搜索後發現androguard更簡單方便,沾沾自喜後發現androguard沒提供直接獲取簽名md5的方法 - -,去github項目處裏發現一個androsign.py文件可以展示簽名信息,它是提取了META-INF下的證書信息,再使用hashlib.md5摘要獲取證書md5。最終整合了相關代碼,實現了獲取簽名md5。

2.用xlwt處理excel文件時不能生成xlsx格式,所以換了openpyxl庫。另外,自定義字體、填充顏色也挺方便,對應的類是Font、PatternFill。

3.在下載apk時要判斷文件格式是否爲zip,但請求頭不一定有Content-Type,所以只判斷狀態碼和文件大小。

完整代碼:
#!/usr/bin/python3

import getopt
import hashlib
import os
import re
import sys
import time
import requests
import openpyxl
import datetime

from androguard.core.bytecodes.apk import APK
from openpyxl import Workbook
from openpyxl.styles import Font, PatternFill

now = str(datetime.date.today())
download_dir = os.path.join(os.getcwd(), now)


def main(argv):
    global opts
    try:
        opts, args = getopt.getopt(argv[1:], 'hvd:r:i:', ['help', 'version', 'download=', 'rename=', 'info='])
    except getopt.GetoptError as err:
        print('讀取參數時發生錯誤!', err)

    for name, value in opts:
        if name in ('-h', '--help'):
            usage()
            return
        elif name in ('-v', '--version'):
            version()
            return
        elif name in ('-d', '--download'):
            download(value)
        elif name in ('-r', '--rename'):
            rename2md5(value)
        elif name in ('-i', '--info'):
            app_info(value)


def usage():
    print('———————— 使用說明 ————————')
    print('-h, --help:         幫助信息')
    print('-v, --version:      版本號')
    print('-d, --download:     批量爬蟲apk')
    print('-r, --rename:       將文件以md5重命名')
    print('-i, --info:         獲取apk信息')


def version():
    print('———————— 版本信息 ————————')
    print('apk工具箱 v1.0.1 build on 2018/10/12.')


def download(filename):
    if not os.path.isfile(filename):
        print('輸入參數有誤,不是文件!')
        return

    if not os.path.exists(download_dir):
        os.mkdir(download_dir)

    print('Index', '\tResult', '\t\t Url')
    start = time.time()
    count_succeed = 0
    count_failure = 0
    max_row, urls = get_all_links(filename)
    wd = WriteData(filename='download_apk_',
                   navigation_bar=['url', 'result', 'filename', 'file_md5', 'app_name', 'pkg_name', 'cert_md5',
                                   'app_version'])
    wd.write_data(is_init=True)
    for i in range(max_row):
        if download_apk(urls[i], i):
            print(max_row - i, '\tSucceed\t\t', urls[i])
            apk_name = rename_apk(i)
            count_succeed += 1

            excel_data = [urls[i], 'Succeed'] + get_apk_info(apk_name)
            wd.write_data(row=i, data=excel_data)
        else:
            print(max_row - i, '\tFailure\t\t', urls[i])
            count_failure += 1

            excel_data = [urls[i], 'Failure']
            wd.write_data(row=i, data=excel_data)

    print('\nTotal info:\n共%d條鏈接,成功:%d,失敗:%d\t耗時%.1fs' % (max_row, count_succeed, count_failure, time.time() - start))


def get_all_links(filename):
    links = []
    wb = openpyxl.load_workbook(filename)
    ws = wb['Sheet1']
    max_row = ws.max_row
    for row in range(1, max_row + 1):
        url = ws.cell(row=row, column=1).value
        if not re.match(r'^https?:/{2}\w.+$', url):
            url = "http://" + url
        links.append(url)
    return max_row, links


def download_apk(url, i):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Linux; U; Android 4.0.3; zh-cn; M032 Build/IML74K) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30',
        'Connection': 'keep-alive'}
    try:
        response = requests.get(url, headers=headers, timeout=15)
        # media_type = response.headers['Content-Type']
        # apk_type == 'application/vnd.android.package-archive'

        # 只下載小於 100M 的應用
        file_size = round(float(response.headers['Content-Length']) / 1048576.0, 2)
        if response.status_code == 200 and file_size <= 100:
            tmp_apk_path = os.path.join(download_dir, str(i) + '.apk')
            with open(tmp_apk_path, 'wb') as f:
                f.write(response.content)
            return True
        else:
            return False
    except Exception as exp:
        return False


def rename_apk(i):
    index_filename = str(i) + '.apk'
    apk_path = os.path.join(download_dir, index_filename)
    apk_md5 = get_file_md5(apk_path)
    apk_file = apk_md5 + '.apk'
    new_name = os.path.join(download_dir, apk_file)

    if not os.path.exists(new_name):
        os.rename(apk_path, new_name)
    else:
        os.remove(apk_path)

    return apk_file


def get_file_md5(file_path):
    with open(file_path, 'rb') as f:
        md5obj = hashlib.md5()
        md5obj.update(f.read())
        md5 = md5obj.hexdigest()
        md5 = str(md5).lower()
    return md5


def app_info(path):
    start = time.time()
    if not os.path.exists(download_dir):
        os.mkdir(download_dir)

    if os.path.isdir(path):
        wd = WriteData(filename='apk_info_',
                       navigation_bar=['filename', 'file_md5', 'app_name', 'pkg_name', 'cert_md5', 'app_version'])
        wd.write_data(is_init=True)
        for root, dirs, files in os.walk(path):
            for f in range(len(files)):
                info = get_apk_info(files[f], root)
                if info:
                    print(str(info))
                    wd.write_data(data=info, row=f)

            print('\nTotal info:\n已獲取%d個文件信息\t耗時%.2fs' % (len(files), time.time() - start))
    else:
        print('參數輸入有誤,不是一個目錄...')


def get_apk_info(f, root=None):
    """
    獲取apk信息
    :param root:
    :param f:
    :return:
    """
    if root:
        apk_path = os.path.join(root, f)
    else:
        apk_path = os.path.join(download_dir, f)

    apk_info = []
    try:
        apk = APK(apk_path)
        if apk.is_valid_APK():
            apk_info.append(f)
            apk_info.append(get_file_md5(apk_path))
            apk_info.append(apk.get_app_name())
            apk_info.append(apk.get_package())
            apk_info.append(get_cert_md5(apk))
            apk_info.append(apk.get_androidversion_name())
    except Exception as e:
        print(f + ' ->>', e)

    return apk_info


def get_cert_md5(a):
    """
    獲取證書md5
    :param a:
    :return:
    """
    cert_md5 = ''
    certs = set(a.get_certificates_der_v2() + [a.get_certificate_der(x) for x in a.get_signature_names()])
    for cert in certs:
        cert_md5 = hashlib.md5(cert).hexdigest()

    return cert_md5


class WriteData:
    def __init__(self, filename, navigation_bar):
        self.wb = Workbook()
        self.ws = self.wb.active
        self.filename = filename + now
        self.font = Font(name='等線', size=11)
        self.fill = PatternFill("solid", fgColor="E0EEE0")
        self.title = navigation_bar

    def write_data(self, row=1, data=None, is_init=False):
        """
        寫入數據
        :param row:
        :param data:
        :param is_init:
        :return:
        """
        if not is_init:
            for col in range(1, len(data) + 1):
                operate = self.ws.cell(row=row + 2, column=col, value=data[col - 1])
                operate.font = self.font
        else:
            for i in range(1, len(self.title) + 1):
                operate = self.ws.cell(row=row, column=i, value=self.title[i - 1])
                operate.font = self.font
                operate.fill = self.fill

        self.wb.save(os.path.join(download_dir, self.filename + '.xlsx'))


def rename2md5(path):
    start = time.time()
    if os.path.isdir(path):
        for root, dirs, files in os.walk(path):
            for i in range(len(files)):
                old_name = os.path.join(os.getcwd(), path, files[i])
                md5 = get_file_md5(old_name)
                new_name = os.path.join(os.getcwd(), path, md5 + '.apk')
                print('%s ->> %s' % (files[i] + '.apk', md5 + '.apk'))
                try:
                    os.rename(old_name, new_name)
                except FileExistsError as err:
                    print(err)
                    # 當有相同md5文件時執行去重操作
                    os.remove(old_name)
            print('\nTotal info:\n已對%d個文件進行重命名\t耗時%.2fs' % (len(files), time.time() - start))
    else:
        print('輸入路徑不是目錄,請檢查...\n%s' % path)


if __name__ == '__main__':
    main(sys.argv)

查看help:
在這裏插入圖片描述

下載信息:
在這裏插入圖片描述
在這裏插入圖片描述

批量獲取apk信息:
在這裏插入圖片描述
在這裏插入圖片描述

批量改名:
在這裏插入圖片描述

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章