在HIVE執行MR的時候,報如下錯誤
java.io.IOException: Call to server/10.64.49.21:9001 failed on local exception: java.io.IOException: Too many open files
at org.apache.hadoop.ipc.Client.wrapException(Client.java:1065)
at org.apache.hadoop.ipc.Client.call(Client.java:1033)
at org.apache.hadoop.ipc.RPC$Invoker.invoke(RPC.java:224)
at org.apache.hadoop.mapred.$Proxy10.getJobStatus(Unknown Source)
at org.apache.hadoop.mapred.JobClient.getJob(JobClient.java:1011)
at org.apache.hadoop.mapred.JobClient.getJob(JobClient.java:1023)
at org.apache.hadoop.hive.ql.exec.HadoopJobExecHelper.progress(HadoopJobExecHelper.java:285)
at org.apache.hadoop.hive.ql.exec.HadoopJobExecHelper.progress(HadoopJobExecHelper.java:685)
at org.apache.hadoop.hive.ql.exec.ExecDriver.execute(ExecDriver.java:458)
at org.apache.hadoop.hive.ql.exec.MapRedTask.execute(MapRedTask.java:136)
at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:133)
at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:57)
at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1332)
at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1123)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:931)
at org.apache.hadoop.hive.service.HiveServer$HiveServerHandler.execute(HiveServer.java:191)
at org.apache.hadoop.hive.service.ThriftHive$Processor$execute.getResult(ThriftHive.java:629)
at org.apache.hadoop.hive.service.ThriftHive$Processor$execute.getResult(ThriftHive.java:617)
at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:32)
at org.apache.thrift.TBaseProcessor.process(TBaseProcessor.java:34)
at org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:176)
at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
at java.lang.Thread.run(Thread.java:662)
Caused by: java.io.IOException: Too many open files
at sun.nio.ch.IOUtil.initPipe(Native Method)
at sun.nio.ch.EPollSelectorImpl.<init>(EPollSelectorImpl.java:49)
at sun.nio.ch.EPollSelectorProvider.openSelector(EPollSelectorProvider.java:18)
at org.apache.hadoop.net.SocketIOWithTimeout$SelectorPool.get(SocketIOWithTimeout.java:407)
at org.apache.hadoop.net.SocketIOWithTimeout$SelectorPool.select(SocketIOWithTimeout.java:322)
at org.apache.hadoop.net.SocketIOWithTimeout.doIO(SocketIOWithTimeout.java:157)
at org.apache.hadoop.net.SocketInputStream.read(SocketInputStream.java:155)
at org.apache.hadoop.net.SocketInputStream.read(SocketInputStream.java:128)
at java.io.FilterInputStream.read(FilterInputStream.java:116)
at org.apache.hadoop.ipc.Client$Connection$PingInputStream.read(Client.java:343)
at java.io.BufferedInputStream.fill(BufferedInputStream.java:218)
at java.io.BufferedInputStream.read(BufferedInputStream.java:237)
at java.io.DataInputStream.readInt(DataInputStream.java:370)
at org.apache.hadoop.ipc.Client$Connection.receiveResponse(Client.java:767)
at org.apache.hadoop.ipc.Client$Connection.run(Client.java:712)
文件數打開過多,造成這種情況的原因一般來說是由於應用程序對資源使用不當造成,比如沒有及時關閉Socket或數據庫連接等。但也可能應用確實需要打開比較多的文件句柄,而系統本身的設置限制了這一數量。
1.ulimit -a 查看linux系統打開文件的最大句柄數,如下
core file size (blocks, -c) 0
data seg size (kbytes, -d) unlimited
scheduling priority (-e) 0
file size (blocks, -f) unlimited
pending signals (-i) 16308
max locked memory (kbytes, -l) 32
max memory size (kbytes, -m) unlimited
open files (-n) 1024
pipe size (512 bytes, -p) 8
POSIX message queues (bytes, -q) 819200
real-time priority (-r) 0
stack size (kbytes, -s) 8192
cpu time (seconds, -t) unlimited
max user processes (-u) 16308
virtual memory (kbytes, -v) unlimited
file locks (-x) unlimited
open files (-n) 1024 默認爲1024
2.ps ef|grep hadoop 查看hadoop進程,記錄每個進程ID
3.查看每個進程的文件操作情況 lsof -p ID |wc -l
修改默認的文件句柄數
修改/etc/security/limits.conf文件
在文件末尾加入下內容
* soft nofile 65536
* hard nofile 65536
重啓shell生效。
在Debian下,重啓root用戶的shell後看不到文件句柄數的改變,其實在其他用戶下此設置已經生效。所以要對root用戶生效還需加上以下兩句。
root soft nofile 65536
root hard nofile 65536
否則在root用戶下看不到參數生效。在centos系統下就不必加這兩句。
另外修改系統環境變量的時候一般爲了不重啓系統,自然的就會想source下這個配置文件。此時在Debian下source會提示如下錯誤
-bash: iptab.sh: command not found
-bash: iptab.sh: command not found
而且在不同目錄下紅色部分都不一樣,而在centos下可以正常執行該命令。
至於出現該問題的原因應該與內核相關,不是很理解,暫時沒找到原因。有知道原因的請拍磚,謝謝。