原创 spark-submit
# coding=utf-8 from __future__ import division # must first column. import datetime import numpy as np import pandas
原创 安裝pytorch (win10)
conda install -c anaconda mkl conda install -c peterjc123 pytorch-cpu ##後續會有Windows官方的pytorch。
原创 spark-submit 啓動參數設置
spark-submit --master yarn-client --num-executors 8 --driver-memory 4g --executor-memory 2g spark_demo.py
原创 shell_tips
# 取hive輸出table表含有數字的第12列 pack_unit_scale=$(echo $scale | grep '[[:digit:]]' | awk -F '|' '{print $12}') # 提取含有數字的倒數第一行
原创 sql 之 rank
SELECT t5.student_id , max(CASE WHEN t5.rank=1 THEN t5.rating END) AS near_comment_score_to_teacher , max(CASE WHEN t5.
原创 sql 標準表頭信息
-- ========================================================================= -- **創建人: xxx [email protected] -- **創建時間: 201
原创 python 判斷漢字
def is_chinese(uchar): """判斷一個unicode是否是漢字""" if uchar >= u'\u4e00' and uchar<=u'\u9fa5':
原创 hive 日期轉日期
select from_unixtime(unix_timestamp('20180801','yyyyMMdd'),'yyyy-MM-dd'); #2018-08-01 #昨日pt select from_unixtime(unix_t
原创 hadoop_殺任務,看錯誤日誌
yarn application -list yarn application -kill hadoop job -list hadoop job -kill yarn logs -applicationId application_1
原创 優化hive的性能配置
-- 優化hive性能:tez,spark,lmpala,mapreduce; 矢量化-- set hive.execution.engine = spark; set hive.vectorized.execution.enabled
原创 shell中 '\r': command not found 解決辦法
sed -i 's/\r$//' <filename> or dos2unix <filename>
原创 hadoop job kill
$ hadoop job -list $ hadoop job -kill job_2018xxxxxxxxx_12345
原创 hive_sql__tips
-- 優化時間計算導致的資源消耗 -- -- 優化hive性能:tez,spark,lmpala,mapreduce; 矢量化-- -- set hive.execution.engine = spark; -- set hive.v
原创 sql統計字段
SELECT activity_id, count(*) FROM activity_prize_lottery_record GROUP BY activity_id HAVING count(*) > 0 output: +-----
原创 mysql小技巧_1
show PROCESSLIST ; SELECT now(); SELECT DATE_SUB(CURDATE(), INTERVAL 30 DAY); CREATE DATABASE qx_test; CREATE TABLE I