CDH4 HA test
场景:
NNHA设置成功,HA切换客户端出现异常,
错误分析
用户执行Shell脚本问题
日志:
客户端
2012-08-0114:37:07,798WARNipc.Client(Client.java:run(787))-UnexpectederrorreadingresponsesonconnectionThread[IPCClient(1333933549)connectiontobigdata-3/172.16.206.206:9000frompeter,5,main]
java.lang.NullPointerException
atorg.apache.hadoop.ipc.Client$Connection.receiveResponse(Client.java:852)
atorg.apache.hadoop.ipc.Client$Connection.run(Client.java:781)
2012-08-0114:37:07,807WARNretry.RetryInvocationHandler(RetryInvocationHandler.java:invoke(118))-ExceptionwhileinvokingcompleteofclassClientNamenodeProtocolTranslatorPB.Tryingtofailoverimmediately.
2012-08-0114:37:07,970WARNretry.RetryInvocationHandler(RetryInvocationHandler.java:invoke(118))-ExceptionwhileinvokingcompleteofclassClientNamenodeProtocolTranslatorPBafter1failoverattempts.Tryingtofailoveraftersleepingfor713ms.
2012-08-0114:37:08,686WARNretry.RetryInvocationHandler(RetryInvocationHandler.java:invoke(118))-ExceptionwhileinvokingcompleteofclassClientNamenodeProtocolTranslatorPBafter2failoverattempts.Tryingtofailoveraftersleepingfor1596ms.
2012-08-0114:37:10,286WARNretry.RetryInvocationHandler(RetryInvocationHandler.java:invoke(118))-ExceptionwhileinvokingcompleteofclassClientNamenodeProtocolTranslatorPBafter3failoverattempts.Tryingtofailoveraftersleepingfor2974ms.
2012-08-0114:37:13,262WARNretry.RetryInvocationHandler(RetryInvocationHandler.java:invoke(118))-ExceptionwhileinvokingcompleteofclassClientNamenodeProtocolTranslatorPBafter4failoverattempts.Tryingtofailoveraftersleepingfor7861ms.
服务器端
2012-08-0114:54:45,614WARNorg.apache.hadoop.security.UserGroupInformation:Nogroupsavailableforuserpeter
2012-08-0114:54:45,619INFOorg.apache.hadoop.hdfs.StateChange:BLOCK*NameSystem.allocateBlock:/user/peter/FS/100wan/1413.BP-283690147-172.16.206.206-1343792626658blk_-6816230619303558443_3866{blockUCState=UNDER_CONSTRUCTION,primaryNodeIndex=-1,replicas=[ReplicaUnderConstruction[172.16.206.209:50010|RBW],ReplicaUnderConstruction[172.16.206.206:50010|RBW]]}
2012-08-0114:54:46,529INFOorg.apache.hadoop.hdfs.StateChange:BLOCK*addStoredBlock:blockMapupdated:172.16.206.206:50010isaddedtoblk_-6816230619303558443_3866{blockUCState=UNDER_CONSTRUCTION,primaryNodeIndex=-1,replicas=[ReplicaUnderConstruction[172.16.206.209:50010|RBW],ReplicaUnderConstruction[172.16.206.206:50010|RBW]]}size0
2012-08-0114:54:46,529INFOorg.apache.hadoop.hdfs.StateChange:BLOCK*addStoredBlock:blockMapupdated:172.16.206.209:50010isaddedtoblk_-6816230619303558443_3866{blockUCState=UNDER_CONSTRUCTION,primaryNodeIndex=-1,replicas=[ReplicaUnderConstruction[172.16.206.209:50010|RBW],ReplicaUnderConstruction[172.16.206.206:50010|RBW]]}size0
2012-08-0114:54:46,531INFOorg.apache.hadoop.hdfs.StateChange:DIR*NameSystem.completeFile:file/user/peter/FS/100wan/1413isclosedbyDFSClient_NONMAPREDUCE_-1368488343_1
2012-08-0114:54:46,540WARNorg.apache.hadoop.security.ShellBasedUnixGroupsMapping:gotexceptiontryingtogetgroupsforuserpeter
org.apache.hadoop.util.Shell$ExitCodeException:id:peter:无此用户
atorg.apache.hadoop.util.Shell.runCommand(Shell.java:261)
atorg.apache.hadoop.util.Shell.run(Shell.java:188)
atorg.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:381)
atorg.apache.hadoop.util.Shell.execCommand(Shell.java:467)
atorg.apache.hadoop.util.Shell.execCommand(Shell.java:450)
atorg.apache.hadoop.security.ShellBasedUnixGroupsMapping.getUnixGroups(ShellBasedUnixGroupsMapping.java:86)
atorg.apache.hadoop.security.ShellBasedUnixGroupsMapping.getGroups(ShellBasedUnixGroupsMapping.java:55)
atorg.apache.hadoop.security.Groups.getGroups(Groups.java:88)
atorg.apache.hadoop.security.UserGroupInformation.getGroupNames(UserGroupInformation.java:1116)
atorg.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.<init>(FSPermissionChecker.java:51)
atorg.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkPermission(FSNamesystem.java:4259)
atorg.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkAncestorAccess(FSNamesystem.java:4236)
atorg.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInternal(FSNamesystem.java:1579)
atorg.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFile(FSNamesystem.java:1514)
atorg.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.create(NameNodeRpcServer.java:408)
atorg.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.create(ClientNamenodeProtocolServerSideTranslatorPB.java:200)
atorg.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java:42590)
atorg.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:427)
atorg.apache.hadoop.ipc.RPC$Server.call(RPC.java:916)
atorg.apache.hadoop.ipc.Server$Handler$1.run(Server.java:1692)
atorg.apache.hadoop.ipc.Server$Handler$1.run(Server.java:1688)
atjava.security.AccessController.doPrivileged(NativeMethod)
atjavax.security.auth.Subject.doAs(Subject.java:396)
atorg.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1232)
atorg.apache.hadoop.ipc.Server$Handler.run(Server.java:1686)