Python | 將HDFS文件上傳到阿里雲

# -*- coding: UTF-8 -*- 
#!/usr/local/bin/python2.7 
##/usr/bin/python 

#import codecs
import os
import shutil
import json 
import sys 
import paramiko
import hashlib
import requests
import socket
import struct
from flask import Flask, request
from flask_api import status
import logging



opApp = Flask(__name__)

# 日誌系統配置
logging.basicConfig(filename='http_op.log', level=logging.INFO)


remoteAliHostIp="10.10.10.10"
remoteAliHostPort=22
remoteAliHostUser="root"
rsakey="/home/hadoop/.ssh/id_rsa"

remoteBasePath="/data/file/user"


from hdfs.client import Client 

def hdfs(hdfsPath):
    # hdfsPath='/tables/t/userlabel/test/' #sys.argv[1]
    localPath=hdfsPath #sys.argv[2]

    client = Client("http://192.168.0.1:50070")
    dirList = client.list(localPath  ,status=False )
    #print dirList 
    for i in dirList:
        #print(  "/datalog/nasLZO/2018/05/21/"+ i )
	if( i != '_SUCCESS' ):
        	hdfsLeafPath=hdfsPath+"/"+i

	        localPathExistsFlag=os.path.exists(localPath)
	        if( not localPathExistsFlag ):
        	        os.makedirs( localPath )

	        localLeafPath=localPath+"/"+i
        	fileFlag=os.path.exists( localLeafPath )
	        logging.info("localLeafPath:" + localLeafPath)
		logging.info(fileFlag)
		if(fileFlag):
	            os.remove(localLeafPath)
	            logging.info('The File Is Exists , Remove OK ! ')

	        client.download(hdfsLeafPath, localPath )


    logging.info( "#######################OP : " + hdfsPath + "<Migrate Nas OK>##########################")
    return True


# ssh
def sshAndtransport(localFolder,remoteFolder, localFile):
    ssh = paramiko.SSHClient()
    ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
    # 計算本地md5值
    localmd5 = None
    logging.info( "local:   {0}/{1}".format(localFolder, localFile))
    logging.info( "remote:  {0}/{1}/{2}".format(remoteBasePath, remoteFolder, localFile))
    f = open("{0}/{1}".format(localFolder, localFile),'rb')
    md5_obj = hashlib.md5()
    md5_obj.update(f.read())
    hash_code = md5_obj.hexdigest()
    f.close()
    localmd5 = str(hash_code).lower()
 
    try:
        ssh.connect(remoteAliHostIp,  port=remoteAliHostPort, username=remoteAliHostUser, key_filename=rsakey, timeout=30)
        # ssh.connect('10.10.10.10',port='22',username='root',key_filename="/home/bergXu/.ssh/id_rsa")
    except paramiko.SSHException as e:
        logging.error("ssh connect error: {0}".format(e))
    else:
        # 先創建遠程目錄
        stdin, stdout, stderr = ssh.exec_command("mkdir -p {0}/{1}".format(remoteBasePath, remoteFolder))
        # 遠程調用netcat開啓端口監聽
        stdin, stdout, stderr = ssh.exec_command("/usr/bin/ncat -n -4 -p 19898 -l -o {0}/{1}/{2} 1> /dev/null  &".format(remoteBasePath, remoteFolder, localFile))
        # 傳輸文件
        SendFile(remoteAliHostIp, 19898, "{0}/{1}".format(localFolder, localFile))
        # 遠程文件md5值
        stdin,stdout,stderr = ssh.exec_command("md5sum {0}/{1}/{2}".format(remoteBasePath, remoteFolder, localFile))
        remoteMd5=stdout.readlines()
        remoteMd5=str(remoteMd5[0]).split()[0]
        logging.info( "{0}----{1}".format(remoteMd5,localmd5))
        # md5比對
        if localmd5 == remoteMd5 :
            logging.info( "md5 check ok!")
            return True
        else:
            logging.info( "md5 check failed!")
            return False
    finally:
        # ssh關閉
        ssh.close()

# send file
def SendFile(ip, port, filename):
    BUFSIZE = 1024
    FILEINFO_SIZE=struct.calcsize('128s32sI8s')
    sendSock = socket.socket(socket.AF_INET,socket.SOCK_STREAM)
    sendSock.connect((ip, port))
    logging.info( "socket connected")

    fhead=struct.pack('128s11I',filename,0,0,0,0,0,0,0,0,os.stat(filename).st_size,0,0)
    # sendSock.send(fhead)
    fp = open(filename,'rb')

    while 1:
        filedata = fp.read(BUFSIZE)
        if not filedata: 
            break
        sendSock.send(filedata)
        logging.info( "file upload access.")

    fp.close()
    sendSock.close()
    logging.info( "socket disconnected.")


@opApp.route("/upload", methods=['POST'])
def main():
    if request.method == "POST":
        hdfsPath=request.form.get("path")
        key=request.form.get("key")
        localFolder=hdfsPath=os.path.join(hdfsPath, key)
        # 從hdfs下載文件
        hdfsState=hdfs(hdfsPath)
	remoteFolder = localFolder.replace('/tables/r/bigdata/http_op_user/','')  
	#2019/05/17/type
	logging.info(remoteFolder)
        #上傳到阿里雲
        for file in os.listdir(localFolder):
            i=0
            while i<5:
		uploadState=sshAndtransport(localFolder=localFolder,remoteFolder=remoteFolder, localFile=file)
		if uploadState:
			logging.info( "upload file ok, md5 check ok, script exit!")
			# 接口調用
			templatePath = os.path.join(remoteFolder, file)
			json_data = {"statue": 1,"templateKey": key, "templatePath": templatePath}
			logging.info(json_data)
			r=requests.post('http://10.10.10.10/dataSync/userTemplateNotice',json=json_data)
			#r11 = requests.post("http://192.168.1.147:8899/dataSync/userTemplateNotice",json=json_data)
			logging.info(r)
			break
                else:
                    i+=1
                    logging.info( "upload file ok, md5 check failed, try {0}".format(i))
                    uploadState=sshAndtransport(localFolder=localFolder, remoteFolder=remoteFolder,localFile=file)
            # print hdfsPath
        return "ok", status.HTTP_200_OK


if __name__ == '__main__':
    opApp.run(host="0.0.0.0", port=5000, debug=True)

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章