sqoop:
RDBMS和hdfs之間進行數據的export/import,工具
Sqoop 安裝
1.下載
下載地址:https://mirrors.tuna.tsinghua.edu.cn/apache/sqoop/
2.解壓
tar -xzvf sqoop-1.99.7-bin-hadoop200.tar.gz -C /soft/
3.配置環境變量
export SQOOP_HOME=/soft/sqoop
export PATH=$PATH:$SQOOP_HOME/bin
配置Sqoopserver和client
server:所有的client的入口點
client:可以安裝在任何節點上
server安裝
1.確保hadoop可用
2.確保HADOOP_HOME環境變量可用
3.配置hadoop的core-site.xml文件
<property>
<name>hadoop.proxyuser.sqoop2.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.sqoop2.groups</name>
<value>*</value>
</property>
4. 在hadoop配置文件 [/soft/hadoop/etc/hadoop/container-executor.cfg] 添加:
allowed.system.users=sqoop2
5.第三方類庫(驅動程序)
複製mysql jar包到 [/soft/sqoop/tools/lib] 目錄下
6.配置sqoop server配置文件
[/soft/sqoop/conf/sqoop_bootstrap.properties]
[/soft/sqoop/conf/sqoop.properties]
將 @BASEDIR@ @LOGDIR@ 替換爲真實路徑
org.apache.sqoop.log4j.debug=false
org.apache.sqoop.log4j.rootLogger=INFO, file
org.apache.sqoop.log4j.category.org.apache.sqoop=INFO
org.apache.sqoop.log4j.appender.file=org.apache.log4j.RollingFileAppender
org.apache.sqoop.log4j.appender.file.File=/soft/sqoop/logs/sqoop.log
#org.apache.sqoop.log4j.appender.file.File=@LOGDIR@/sqoop.log
org.apache.sqoop.log4j.appender.file.MaxFileSize=25MB
org.apache.sqoop.log4j.appender.file.MaxBackupIndex=5
org.apache.sqoop.log4j.appender.file.layout=org.apache.log4j.PatternLayout
org.apache.sqoop.log4j.appender.file.layout.ConversionPattern=%d{ISO8601} %-5p [%l] %m%n
# Audit logger for default configuration of FileAuditLogger
org.apache.sqoop.log4j.logger.audit=INFO, audit
org.apache.sqoop.log4j.appender.audit=org.apache.log4j.RollingFileAppender
org.apache.sqoop.log4j.appender.audit.File=/soft/sqoop/logs/audit.log
#org.apache.sqoop.log4j.appender.audit.File=@LOGDIR@/audit.log
org.apache.sqoop.log4j.appender.audit.MaxFileSize=25MB
org.apache.sqoop.log4j.appender.audit.MaxBackupIndex=5
org.apache.sqoop.log4j.appender.audit.layout=org.apache.log4j.PatternLayout
org.apache.sqoop.log4j.appender.audit.layout.ConversionPattern=%d{ISO8601} %-5p [%l] %m%n
org.apache.sqoop.auditlogger.default.class=org.apache.sqoop.audit.FileAuditLogger
org.apache.sqoop.auditlogger.default.logger=audit
# Repository provider
org.apache.sqoop.repository.provider=org.apache.sqoop.repository.JdbcRepositoryProvider
# Repository upgrade
# If set to true, it will not upgrade the sqoop respository schema, by default it will iniate the upgrade on server start-up
org.apache.sqoop.repository.schema.immutable=false
# JDBC repository provider configuration
org.apache.sqoop.repository.jdbc.handler=org.apache.sqoop.repository.derby.DerbyRepositoryHandler
org.apache.sqoop.repository.jdbc.transaction.isolation=READ_COMMITTED
org.apache.sqoop.repository.jdbc.maximum.connections=10
org.apache.sqoop.repository.jdbc.url=jdbc:derby:/soft/sqoop/repository/db;create=true
#org.apache.sqoop.repository.jdbc.url=jdbc:derby:@BASEDIR@/repository/db;create=true
org.apache.sqoop.repository.jdbc.driver=org.apache.derby.jdbc.EmbeddedDriver
org.apache.sqoop.repository.jdbc.user=sa
org.apache.sqoop.repository.jdbc.password=
# System properties for embedded Derby configuration
org.apache.sqoop.repository.sysprop.derby.stream.error.file=/soft/sqoop/logs/derbyrepo.log
# org.apache.sqoop.repository.sysprop.derby.stream.error.file=@LOGDIR@/derbyrepo.log
#
# Sqoop Connector configuration
# If set to true will initiate Connectors config upgrade during server startup
#
org.apache.sqoop.connector.autoupgrade=false
#
# Sqoop Driver configuration
# If set to true will initiate the Driver config upgrade during server startup
#
org.apache.sqoop.driver.autoupgrade=false
# Sleeping period for reloading configuration file (once a minute)
org.apache.sqoop.core.configuration.provider.properties.sleep=60000
#
# Submission engine configuration
#
# Submission engine class
org.apache.sqoop.submission.engine=org.apache.sqoop.submission.mapreduce.MapreduceSubmissionEngine
# Hadoop configuration directory
org.apache.sqoop.submission.engine.mapreduce.configuration.directory=/soft/hadoop/etc/hadoop
# org.apache.sqoop.submission.engine.mapreduce.configuration.directory=/etc/hadoop/conf/
# Log level for Sqoop Mapper/Reducer
org.apache.sqoop.submission.engine.mapreduce.configuration.loglevel=INFO
#
# Execution engine configuration
#
org.apache.sqoop.execution.engine=org.apache.sqoop.execution.mapreduce.MapreduceExecutionEngine
# External connectors load path
# "/path/to/external/connectors/": Add all the connector JARs in the specified folder
#
org.apache.sqoop.connector.external.loadpath=
# Sqoop application classpath
# ":" separated list of jars to be included in sqoop.
#
org.apache.sqoop.classpath.extra=
# Sqoop extra classpath to be included with all jobs
# ":" separated list of jars to be included in map job classpath.
#
org.apache.sqoop.classpath.job=
#
# Jetty Server configuration
#
#org.apache.sqoop.jetty.thread.pool.worker.max=500
#org.apache.sqoop.jetty.thread.pool.worker.min=5
#org.apache.sqoop.jetty.thread.pool.worker.alive.time=60
#org.apache.sqoop.jetty.port=12000
# Blacklisted Connectors
# ":" separated list of connector names as specified in their
# sqoopconnector.properties file
org.apache.sqoop.connector.blacklist=
7.倉庫的初始化
$bin>sqoop2-tool upgrade
8.驗證是否初始化成功
sqoop2-tool verify
9.server的啓動和停止
sqoop2-server start #啓動
sqoop2-server start #停止
client安裝
1.啓動客戶端
$bin>sqoop2-shell
2.連接到自己的服務器
sqoop:000> set server --host localhost --port 12000 --webapp sqoop
3.驗證連接
sqoop:000> show version --all
4.查看sqoop註冊的連接器
sqoop:000> show connector
5.創建mysql連接
sqoop:000> create link -connector generic-jdbc-connector
#創建連接步驟:
Creating link for connector with name generic-jdbc-connector
Please fill following values to create new link object
Name: mysql
Link configuration
JDBC Driver Class: com.mysql.jdbc.Driver
JDBC Connection String: jdbc:mysql://192.168.231.1:3306/database
Username: sqoop
Password: *****
JDBC Connection Properties:
There are currently 0 values in the map:
entry#protocol=tcp
New link was successfully created with validation status OK name First Link
6.創建hdfs連接
sqoop:000> create link -connector hdfs-connector
#創建連接步驟
Creating link for connector with name hdfs-connector
Please fill following values to create new link object
Name: hdfs
HDFS cluster
URI: file:///
Conf directory: /soft/hadoop/etc/hadoop
Additional configs::
There are currently 0 values in the map:
entry#
New link was successfully created with validation status OK and name hdfs
7.創建job
sqoop:000> create job -f "mysql" -t "hdfs"
8.顯示作業:
sqoop:000> show job
9.啓動作業
sqoop:000> start job -name myjob