Hadoop学习二十八:Hadoop-hdfs FSNamesystem源码 成员变量

一. FSNamesystem概述

     FSNamesystem为DataNode做簿记工作,直接点就是到DataNode的请求都是经过FSNamesystem运算后的。FSNamesystem管理着几个主要的数据结构。

  1. 文件名 -> 数据块(存放在FSImage和日志中)
  2. 合法的数据块列表(上面关系的逆关系)
  3. 数据块 -> DataNode(只保存在内存中,根据DataNode发过来的信息动态建立)
  4. DataNode上保存的数据块(上面关系的逆关系)
  5. 最近发送过心跳信息的DataNode(LRU)

二. FSNamesystem成员变量

      依次见下面

三. FSDirectory

      一个文件系统,一个FSNamesystem 一个FSDirectory。FSNamesystem 初始化时会初始化FSDirectory。

public FSDirectory dir;

四. 权限相关

//本地文件的用户文件属主和文件组,可以通过hadoop.job.ugi设置,如果没有设置,那么将使用启动HDFS的用户(通过whoami获得)和该用户所在的组(通过groups获得)作为值。
  private UserGroupInformation fsOwner;
  //对应配置项dfs.permissions.supergroup(默认是supergroup,启动hadoop所使用的用户通常是superuser),应用在defaultPermission中,是系统的超级组。
  private String supergroup;
  //缺省权限,缺省用户为fsOwner;缺省用户组为supergroup;缺省权限为0777,可以通过dfs.upgrade.permission修改。
  private PermissionStatus defaultPermission; 

五. 系统内各种状态的数据块

//正在复制的数据块
  volatile long pendingReplicationBlocksCount = 0L;
  //损坏的数据块
  volatile long corruptReplicaBlocksCount = 0L;
  //需要复制的数据块
  volatile long underReplicatedBlocksCount = 0L;
  //当前正在处理的复制工作数目
  volatile long scheduledReplicationBlocksCount = 0L;
  //超过配额的数据块
  volatile long excessBlocksCount = 0L;
  //正在删除的数据块
  volatile long pendingDeletionBlocksCount = 0L;



  //保存需要进行复制的数据块
  private UnderReplicatedBlocks neededReplications = new UnderReplicatedBlocks();
  // We also store pending replication-orders.
  //保存正在复制的数据块的相关信息
  private PendingReplicationBlocks pendingReplications;
  //保存每个DataNode上无效但还存在的数据块, StorageID -> ArrayList<Block>的对应关系。
  private Map<String, Collection<Block>> recentInvalidateSets = 
    new TreeMap<String, Collection<Block>>();
  //保存每个DataNode上有效,但超过配额需要删除的数据, StorageID -> TreeSet<Block>的对应关系
  Map<String, Collection<Block>> excessReplicateMap = 
    new TreeMap<String, Collection<Block>>();
  //保存失效(如:校验没通过)的Block -> DataNode的对应关系
  public CorruptReplicasMap corruptReplicas = new CorruptReplicasMap();

六. 系统内DataNode的状态

//Block -> BlockInfo(INode, datanodes, previous BlockInfo, next BlockInfo)的对应
  final BlocksMap blocksMap = new BlocksMap(DEFAULT_INITIAL_MAP_CAPACITY, DEFAULT_MAP_LOAD_FACTOR);
  //保存系统内所有的Datanode, StorageID -> DatanodeDescriptor的对应关系
  NavigableMap<String, DatanodeDescriptor> datanodeMap = 
    new TreeMap<String, DatanodeDescriptor>();
  //保存所有目前活着的DataNode,线程HeartbeatMonitor会定期检查。
  ArrayList<DatanodeDescriptor> heartbeats = new ArrayList<DatanodeDescriptor>();
  private Host2NodesMap host2DataNodeMap = new Host2NodesMap();

七. 系统容量

//系统总容量/已使用容量/剩余容量
  private long capacityTotal = 0L, capacityUsed = 0L, capacityRemaining = 0L;
  //系统总连接数,根据DataNode心跳信息跟新。
  private int totalLoad = 0;

八. 租约管理器

//租约管理器
  public LeaseManager leaseManager = new LeaseManager(this);

九. 复制因子

//  The maximum number of replicates we should allow for a single block
  private int maxReplication;
  //  How many outgoing replication streams a given node should have at one time
  private int maxReplicationStreams;
  // MIN_REPLICATION is how many copies we need in place or else we disallow the write
  private int minReplication;
  // Default replication
  private int defaultReplication;

十. 心跳周期

// heartbeatRecheckInterval is how often namenode checks for expired datanodes
  private long heartbeatRecheckInterval;
  // heartbeatExpireInterval is how long namenode waits for datanode to report
  // heartbeat
  private long heartbeatExpireInterval;
  //replicationRecheckInterval is how often namenode checks for new replication work
  private long replicationRecheckInterval;

十一. 网络拓扑结构

// datanode networktoplogy
  NetworkTopology clusterMap = new NetworkTopology();
  private DNSToSwitchMapping dnsToSwitchMapping;
  
  // for block replicas placement
  ReplicationTargetChooser replicator;

十二. 线程

//HeartbeatMonitor thread
  Daemon hbthread = null;   
  //LeaseMonitor thread
  public Daemon lmthread = null;
  //SafeModeMonitor thread
  Daemon smmthread = null;  
  //Replication thread
  public Daemon replthread = null;  
  //Replication metrics
  private ReplicationMonitor replmon = null;

相关推荐