原创 spark-submit

# coding=utf-8 from __future__ import division # must first column. import datetime import numpy as np import pandas

原创 安裝pytorch (win10)

conda install -c anaconda mkl conda install -c peterjc123 pytorch-cpu ##後續會有Windows官方的pytorch。

原创 spark-submit 啓動參數設置

spark-submit --master yarn-client --num-executors 8 --driver-memory 4g --executor-memory 2g spark_demo.py

原创 shell_tips

# 取hive輸出table表含有數字的第12列 pack_unit_scale=$(echo $scale | grep '[[:digit:]]' | awk -F '|' '{print $12}') # 提取含有數字的倒數第一行

原创 sql 之 rank

SELECT t5.student_id , max(CASE WHEN t5.rank=1 THEN t5.rating END) AS near_comment_score_to_teacher , max(CASE WHEN t5.

原创 sql 標準表頭信息

-- ========================================================================= -- **創建人: xxx [email protected] -- **創建時間: 201

原创 python 判斷漢字

def is_chinese(uchar): """判斷一個unicode是否是漢字""" if uchar >= u'\u4e00' and uchar<=u'\u9fa5':

原创 hive 日期轉日期

select from_unixtime(unix_timestamp('20180801','yyyyMMdd'),'yyyy-MM-dd'); #2018-08-01 #昨日pt select from_unixtime(unix_t

原创 hadoop_殺任務,看錯誤日誌

yarn application -list yarn application -kill hadoop job -list hadoop job -kill yarn logs -applicationId application_1

原创 優化hive的性能配置

-- 優化hive性能:tez,spark,lmpala,mapreduce; 矢量化-- set hive.execution.engine = spark; set hive.vectorized.execution.enabled

原创 shell中 '\r': command not found 解決辦法

sed -i 's/\r$//' <filename> or dos2unix <filename>

原创 hadoop job kill

$ hadoop job -list $ hadoop job -kill job_2018xxxxxxxxx_12345

原创 hive_sql__tips

-- 優化時間計算導致的資源消耗 -- -- 優化hive性能:tez,spark,lmpala,mapreduce; 矢量化-- -- set hive.execution.engine = spark; -- set hive.v

原创 sql統計字段

SELECT activity_id, count(*) FROM activity_prize_lottery_record GROUP BY activity_id HAVING count(*) > 0 output: +-----

原创 mysql小技巧_1

show PROCESSLIST ; SELECT now(); SELECT DATE_SUB(CURDATE(), INTERVAL 30 DAY); CREATE DATABASE qx_test; CREATE TABLE I