yum -y install lzo-devel zlib-devel gcc autoconf automake libtool
下載lzo
http://www.oberhumer.com/opensource/lzo/download/
lzo安裝
tar -zxvf lzo-2.10.tar.gz -C /opt/cd /opt/lzo-2.10
export CFLAGS=-m64
./configure -enable-shared -prefix=/opt/lzo/
make && sudo make install
安裝Hadoop-LZO
wget https://github.com/twitter/hadoop-lzo/archive/master.zip
unzip master
cd /opt/hadoop-lzo-master
修改項目的pom.xml(因爲我們的hadoop環境是2.6.0)
vi pom.xml
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<hadoop.current.version>2.6.0</hadoop.current.version>
<hadoop.old.version>1.0.4</hadoop.old.version>
</properties>
編譯該項目
export CFLAGS=-m64
export CXXFLAGS=-m64
export C_INCLUDE_PATH=/opt/lzo/include
export LIBRARY_PATH=/opt/lzo/lib
mvn clean package -Dmaven.test.skip=true
mvn clean install -Dmaven.test.skip=true 部署到本地倉庫(如果需要用這個編譯)
cd target/native/Linux-amd64-64
mkdir /opt/lzo/libgplcompression
tar -cBf - -C lib . | tar -xBvf - -C /opt/lzo/libgplcompression/
將生成的文件發送到hadoop應用(所有hadoop節點都需要)
cp /opt/lzo/libgplcompression/libgplcompression* $HADOOP_HOME/lib/native/
cp /opt/hadoop-lzo-master/target/hadoop-lzo-0.4.21-SNAPSHOT.jar $HADOOP_HOME/share/hadoop/common/
修改hadoop的配置文件(所有hadoop節點都要改)
1、在Hadoop中的$HADOOP_HOME/etc/hadoop/hadoop-env.sh加上下面配置:
export LD_LIBRARY_PATH=/opt/lzo/lib
2、在$HADOOP_HOME/etc/hadoop/core-site.xml加上如下配置:
<property>
<name>io.compression.codecs</name>
<value>org.apache.hadoop.io.compress.GzipCodec,
org.apache.hadoop.io.compress.DefaultCodec,
com.hadoop.compression.lzo.LzoCodec,
com.hadoop.compression.lzo.LzopCodec,
org.apache.hadoop.io.compress.BZip2Codec
</value>
</property>
<property>
<name>io.compression.codec.lzo.class</name>
<value>com.hadoop.compression.lzo.LzoCodec</value>
</property>
3、在$HADOOP_HOME/etc/hadoop/mapred-site.xml加上如下配置
<property>
<name>mapred.compress.map.output</name>
<value>true</value>
</property>
<property>
<name>mapred.map.output.compression.codec</name>
<value>com.hadoop.compression.lzo.LzoCodec</value>
</property>
<property>
<name>mapred.child.env</name>
<value>LD_LIBRARY_PATH=/opt/lzo/lib</value>
</property>
壓縮文件
給lzo文件添加index
hadoop jar $HADOOP_HOMOE/share/hadoop/common/hadoop-lzo-0.4.21-SNAPSHOT.jar com.hadoop.compression.lzo.DistributedLzoIndexer /user/hadoop/test/test.lzo
生成出來的索引文件後綴爲.index,並存放在lzo同一目錄下。
日誌文件的lzo壓縮與解壓(用lzop)
安裝lzop
wget http://www.lzop.org/download/lzop-1.04.tar.gz
tar zxvf lzop-1.04.tar.gz -C /opt/
cd /opt/lzop-1.04/
設置下不然會報錯:configure: error: LZO header files not found. Please check your installation or set the environment variable `CPPFLAGS'.
export C_INCLUDE_PATH=/opt/lzo/include
export LIBRARY_PATH=/opt/lzo/lib
./configure -enable-shared -prefix=/opt/lzop-1.04
make && make install
lzop使用
./bin/lzop -f /home/hadoop/trade-service_audit_2018032223_001.log 壓縮
./bin/lzop -dv /user/hadoop/test/trade-service_audit_2018032223_001.log.lzo 解壓