From cc06d61ae811eae634bb54f151e2c5ff3229ede8 Mon Sep 17 00:00:00 2001 From: Felix Nguyen Date: Thu, 2 Apr 2026 15:46:04 +0800 Subject: [PATCH] HDFS-17878. Reduce frequency of getDatanodeListForReport calls for metrics --- .../org/apache/hadoop/hdfs/DFSConfigKeys.java | 4 + .../blockmanagement/DatanodeManager.java | 77 ++++++++++++++++--- .../hdfs/server/namenode/FSNamesystem.java | 49 ++++++------ .../src/main/resources/hdfs-default.xml | 10 +++ 4 files changed, 105 insertions(+), 35 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java index 96226f45f6a48..9de465aa27311 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java @@ -2116,4 +2116,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys { */ public static final String SUPPORTED_PACKAGES_CONFIG_NAME = "dfs.nodeplan.steps.supported.packages"; + + public static final String DFS_NAMENODE_DATANODE_LIST_CACHE_EXPIRATION_MS_KEY = + "dfs.namenode.datanode.list.cache.expiration.ms"; + public static final long DFS_NAMENODE_DATANODE_LIST_CACHE_EXPIRATION_MS_DEFAULT = 0; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java index 01f1af9624d05..68f4ebdef23fa 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java @@ -24,6 +24,8 @@ import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.util.Preconditions; +import org.apache.hadoop.thirdparty.com.google.common.cache.Cache; +import org.apache.hadoop.thirdparty.com.google.common.cache.CacheBuilder; import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableList; import org.apache.hadoop.thirdparty.com.google.common.net.InetAddresses; @@ -70,6 +72,7 @@ import java.net.InetSocketAddress; import java.net.UnknownHostException; import java.util.*; +import java.util.concurrent.ExecutionException; import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.TimeUnit; import java.util.function.Consumer; @@ -230,6 +233,9 @@ public class DatanodeManager { private final boolean randomNodeOrderEnabled; + /** Cached map of DatanodeReportType -> list of DatanodeDescriptor for metrics purposes. */ + private Cache> datanodeListSnapshots = null; + DatanodeManager(final BlockManager blockManager, final Namesystem namesystem, final Configuration conf) throws IOException { this.namesystem = namesystem; @@ -364,6 +370,17 @@ public class DatanodeManager { this.randomNodeOrderEnabled = conf.getBoolean( DFSConfigKeys.DFS_NAMENODE_RANDOM_NODE_ORDER_ENABLED, DFSConfigKeys.DFS_NAMENODE_RANDOM_NODE_ORDER_ENABLED_DEFAULT); + + long datanodeListCacheExpirationMs = + conf.getLong(DFSConfigKeys.DFS_NAMENODE_DATANODE_LIST_CACHE_EXPIRATION_MS_KEY, + DFSConfigKeys.DFS_NAMENODE_DATANODE_LIST_CACHE_EXPIRATION_MS_DEFAULT); + if (datanodeListCacheExpirationMs > 0) { + LOG.info("Using cached DN list for metrics, expiration time = {} ms.", + datanodeListCacheExpirationMs); + datanodeListSnapshots = CacheBuilder.newBuilder() + .expireAfterWrite(datanodeListCacheExpirationMs, TimeUnit.MILLISECONDS) + .build(); + } } /** @@ -963,6 +980,11 @@ private void wipeDatanode(final DatanodeID node) { synchronized (this) { host2DatanodeMap.remove(datanodeMap.remove(key)); } + Cache> tmpDatanodeListSnapshots = + datanodeListSnapshots; + if (tmpDatanodeListSnapshots != null) { + tmpDatanodeListSnapshots.invalidateAll(); + } if (LOG.isDebugEnabled()) { LOG.debug("{}.wipeDatanode({}): storage {} is removed from datanodeMap.", getClass().getSimpleName(), node, key); @@ -1438,7 +1460,7 @@ public int getNumLiveDataNodes() { /** @return the number of dead datanodes. */ public int getNumDeadDataNodes() { - return getDatanodeListForReport(DatanodeReportType.DEAD).size(); + return getDatanodeListForReportWithCache(DatanodeReportType.DEAD).size(); } /** @return the number of datanodes. */ @@ -1453,12 +1475,12 @@ public List getDecommissioningNodes() { // There is no need to take namesystem reader lock as // getDatanodeListForReport will synchronize on datanodeMap // A decommissioning DN may be "alive" or "dead". - return getDatanodeListForReport(DatanodeReportType.DECOMMISSIONING); + return getDatanodeListForReportWithCache(DatanodeReportType.DECOMMISSIONING); } /** @return list of datanodes that are entering maintenance. */ public List getEnteringMaintenanceNodes() { - return getDatanodeListForReport(DatanodeReportType.ENTERING_MAINTENANCE); + return getDatanodeListForReportWithCache(DatanodeReportType.ENTERING_MAINTENANCE); } /* Getter and Setter for stale DataNodes related attributes */ @@ -1532,17 +1554,35 @@ void setNumStaleStorages(int numStaleStorages) { this.numStaleStorages = numStaleStorages; } - /** Fetch live and dead datanodes. */ - public void fetchDatanodes(final List live, + public void fetchDatanodes(final List live, final List dead, final boolean removeDecommissionNode) { + fetchDatanodes(live, dead, removeDecommissionNode, false); + } + + /** + * Fetches live and dead datanodes via cache maps. Generates and caches results + * on cache miss. + */ + public void fetchDatanodesWithCache(final List live, + final List dead, final boolean removeDecommissionNode) { + fetchDatanodes(live, dead, removeDecommissionNode, true); + } + + /** Fetch live and dead datanodes. */ + private void fetchDatanodes(final List live, + final List dead, final boolean removeDecommissionNode, boolean useCache) { if (live == null && dead == null) { throw new HadoopIllegalArgumentException("Both live and dead lists are null"); } - // There is no need to take namesystem reader lock as - // getDatanodeListForReport will synchronize on datanodeMap - final List results = - getDatanodeListForReport(DatanodeReportType.ALL); + List results; + if (useCache) { + results = getDatanodeListForReportWithCache(DatanodeReportType.ALL); + } else { + // There is no need to take namesystem reader lock as + // getDatanodeListForReport will synchronize on datanodeMap + results = getDatanodeListForReport(DatanodeReportType.ALL); + } for(DatanodeDescriptor node : results) { if (isDatanodeDead(node)) { if (dead != null) { @@ -1635,6 +1675,25 @@ private DatanodeID parseDNFromHostsEntry(String hostLine) { return dnId; } + /** + * Low impact version of {@link #getDatanodeListForReport} with possible stale + * data for low impact usage (metrics). + */ + public List getDatanodeListForReportWithCache( + final DatanodeReportType type) { + Cache> tmpDatanodeListSnapshots = + datanodeListSnapshots; + if (tmpDatanodeListSnapshots == null) { + return getDatanodeListForReport(type); + } + try { + return tmpDatanodeListSnapshots.get(type, () -> getDatanodeListForReport(type)); + } catch (ExecutionException e) { + // Fallback if cache fails + return getDatanodeListForReport(type); + } + } + /** For generating datanode reports */ public List getDatanodeListForReport( final DatanodeReportType type) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 5938cd466846d..2da787d15c82e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -5789,8 +5789,8 @@ public int getNumDeadDataNodes() { @Metric({"NumDecomLiveDataNodes", "Number of datanodes which have been decommissioned and are now live"}) public int getNumDecomLiveDataNodes() { - final List live = new ArrayList(); - getBlockManager().getDatanodeManager().fetchDatanodes(live, null, false); + final List live = new ArrayList<>(); + getBlockManager().getDatanodeManager().fetchDatanodesWithCache(live, null, false); int liveDecommissioned = 0; for (DatanodeDescriptor node : live) { liveDecommissioned += node.isDecommissioned() ? 1 : 0; @@ -5802,8 +5802,8 @@ public int getNumDecomLiveDataNodes() { @Metric({"NumDecomDeadDataNodes", "Number of datanodes which have been decommissioned and are now dead"}) public int getNumDecomDeadDataNodes() { - final List dead = new ArrayList(); - getBlockManager().getDatanodeManager().fetchDatanodes(null, dead, false); + final List dead = new ArrayList<>(); + getBlockManager().getDatanodeManager().fetchDatanodesWithCache(null, dead, false); int deadDecommissioned = 0; for (DatanodeDescriptor node : dead) { deadDecommissioned += node.isDecommissioned() ? 1 : 0; @@ -5815,8 +5815,8 @@ public int getNumDecomDeadDataNodes() { @Metric({"NumInServiceLiveDataNodes", "Number of live datanodes which are currently in service"}) public int getNumInServiceLiveDataNodes() { - final List live = new ArrayList(); - getBlockManager().getDatanodeManager().fetchDatanodes(live, null, true); + final List live = new ArrayList<>(); + getBlockManager().getDatanodeManager().fetchDatanodesWithCache(live, null, true); int liveInService = live.size(); for (DatanodeDescriptor node : live) { liveInService -= node.isInMaintenance() ? 1 : 0; @@ -5828,8 +5828,8 @@ public int getNumInServiceLiveDataNodes() { @Metric({"VolumeFailuresTotal", "Total number of volume failures across all Datanodes"}) public int getVolumeFailuresTotal() { - List live = new ArrayList(); - getBlockManager().getDatanodeManager().fetchDatanodes(live, null, false); + List live = new ArrayList<>(); + getBlockManager().getDatanodeManager().fetchDatanodesWithCache(live, null, false); int volumeFailuresTotal = 0; for (DatanodeDescriptor node: live) { volumeFailuresTotal += node.getVolumeFailures(); @@ -5841,8 +5841,8 @@ public int getVolumeFailuresTotal() { @Metric({"EstimatedCapacityLostTotal", "An estimate of the total capacity lost due to volume failures"}) public long getEstimatedCapacityLostTotal() { - List live = new ArrayList(); - getBlockManager().getDatanodeManager().fetchDatanodes(live, null, false); + List live = new ArrayList<>(); + getBlockManager().getDatanodeManager().fetchDatanodesWithCache(live, null, false); long estimatedCapacityLostTotal = 0; for (DatanodeDescriptor node: live) { VolumeFailureSummary volumeFailureSummary = node.getVolumeFailureSummary(); @@ -6731,10 +6731,9 @@ public int getThreads() { */ @Override // NameNodeMXBean public String getLiveNodes() { - final Map> info = - new HashMap>(); - final List live = new ArrayList(); - blockManager.getDatanodeManager().fetchDatanodes(live, null, false); + final Map> info = new HashMap<>(); + final List live = new ArrayList<>(); + blockManager.getDatanodeManager().fetchDatanodesWithCache(live, null, false); for (DatanodeDescriptor node : live) { ImmutableMap.Builder innerinfo = ImmutableMap.builder(); @@ -6786,10 +6785,9 @@ public String getLiveNodes() { */ @Override // NameNodeMXBean public String getDeadNodes() { - final Map> info = - new HashMap>(); - final List dead = new ArrayList(); - blockManager.getDatanodeManager().fetchDatanodes(null, dead, false); + final Map> info = new HashMap<>(); + final List dead = new ArrayList<>(); + blockManager.getDatanodeManager().fetchDatanodesWithCache(null, dead, false); for (DatanodeDescriptor node : dead) { Map innerinfo = ImmutableMap.builder() .put("lastContact", getLastContact(node)) @@ -6917,10 +6915,9 @@ public String getNodeUsage() { float min = 0; float dev = 0; - final Map> info = - new HashMap>(); - final List live = new ArrayList(); - blockManager.getDatanodeManager().fetchDatanodes(live, null, true); + final Map> info = new HashMap<>(); + final List live = new ArrayList<>(); + blockManager.getDatanodeManager().fetchDatanodesWithCache(live, null, true); for (Iterator it = live.iterator(); it.hasNext();) { DatanodeDescriptor node = it.next(); if (!node.isInService()) { @@ -9095,8 +9092,8 @@ public long getBytesInFuture() { @Metric({"NumInMaintenanceLiveDataNodes", "Number of live Datanodes which are in maintenance state"}) public int getNumInMaintenanceLiveDataNodes() { - final List live = new ArrayList(); - getBlockManager().getDatanodeManager().fetchDatanodes(live, null, true); + final List live = new ArrayList<>(); + getBlockManager().getDatanodeManager().fetchDatanodesWithCache(live, null, true); int liveInMaintenance = 0; for (DatanodeDescriptor node : live) { liveInMaintenance += node.isInMaintenance() ? 1 : 0; @@ -9108,8 +9105,8 @@ public int getNumInMaintenanceLiveDataNodes() { @Metric({"NumInMaintenanceDeadDataNodes", "Number of dead Datanodes which are in maintenance state"}) public int getNumInMaintenanceDeadDataNodes() { - final List dead = new ArrayList(); - getBlockManager().getDatanodeManager().fetchDatanodes(null, dead, true); + final List dead = new ArrayList<>(); + getBlockManager().getDatanodeManager().fetchDatanodesWithCache(null, dead, true); int deadInMaintenance = 0; for (DatanodeDescriptor node : dead) { deadInMaintenance += node.isInMaintenance() ? 1 : 0; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml index 7d7e53773ff07..7b8480892fd38 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml @@ -6720,4 +6720,14 @@ If you have more than one package, separate the packages using commas. + + dfs.namenode.datanode.list.cache.expiration.ms + 0 + + Set to a positive number to cache values for DatanodeManager.getDatanodeListForReport for + performance purpose. Milliseconds for cache expiration from insertion. 0 or negative value + to disable this cache. + Non metrics usage will bypass this cache (fsck, datanodeReport, etc.) + +