001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hdfs.server.namenode; 019 020import com.google.common.annotations.VisibleForTesting; 021import com.google.common.base.Joiner; 022import com.google.common.base.Preconditions; 023import com.google.common.collect.Lists; 024 025import org.apache.commons.logging.Log; 026import org.apache.commons.logging.LogFactory; 027import org.apache.hadoop.HadoopIllegalArgumentException; 028import org.apache.hadoop.classification.InterfaceAudience; 029import org.apache.hadoop.conf.Configuration; 030import org.apache.hadoop.fs.FileSystem; 031import org.apache.hadoop.fs.Trash; 032import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; 033import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo; 034import org.apache.hadoop.ha.HAServiceStatus; 035import org.apache.hadoop.ha.HealthCheckFailedException; 036import org.apache.hadoop.ha.ServiceFailedException; 037import org.apache.hadoop.hdfs.DFSConfigKeys; 038import org.apache.hadoop.hdfs.DFSUtil; 039import org.apache.hadoop.hdfs.HAUtil; 040import org.apache.hadoop.hdfs.HdfsConfiguration; 041import org.apache.hadoop.hdfs.protocol.ClientProtocol; 042import org.apache.hadoop.hdfs.protocol.HdfsConstants; 043import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole; 044import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.RollingUpgradeStartupOption; 045import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption; 046import org.apache.hadoop.hdfs.server.namenode.ha.*; 047import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics; 048import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress; 049import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgressMetrics; 050import org.apache.hadoop.hdfs.server.protocol.*; 051import org.apache.hadoop.ipc.Server; 052import org.apache.hadoop.ipc.StandbyException; 053import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; 054import org.apache.hadoop.metrics2.util.MBeans; 055import org.apache.hadoop.net.NetUtils; 056import org.apache.hadoop.security.AccessControlException; 057import org.apache.hadoop.security.RefreshUserMappingsProtocol; 058import org.apache.hadoop.security.SecurityUtil; 059import org.apache.hadoop.security.UserGroupInformation; 060import org.apache.hadoop.security.authorize.RefreshAuthorizationPolicyProtocol; 061import org.apache.hadoop.ipc.RefreshCallQueueProtocol; 062import org.apache.hadoop.tools.GetUserMappingsProtocol; 063import org.apache.hadoop.tracing.SpanReceiverHost; 064import org.apache.hadoop.tracing.TraceAdminProtocol; 065import org.apache.hadoop.util.ExitUtil.ExitException; 066import org.apache.hadoop.util.GenericOptionsParser; 067import org.apache.hadoop.util.JvmPauseMonitor; 068import org.apache.hadoop.util.ServicePlugin; 069import org.apache.hadoop.util.StringUtils; 070 071import javax.management.ObjectName; 072 073import java.io.IOException; 074import java.io.PrintStream; 075import java.net.InetSocketAddress; 076import java.net.URI; 077import java.security.PrivilegedExceptionAction; 078import java.util.ArrayList; 079import java.util.Arrays; 080import java.util.Collection; 081import java.util.List; 082import java.util.concurrent.atomic.AtomicBoolean; 083 084import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY; 085import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_DEFAULT; 086import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY; 087import static org.apache.hadoop.hdfs.DFSConfigKeys.*; 088import static org.apache.hadoop.util.ExitUtil.terminate; 089import static org.apache.hadoop.util.ToolRunner.confirmPrompt; 090 091/********************************************************** 092 * NameNode serves as both directory namespace manager and 093 * "inode table" for the Hadoop DFS. There is a single NameNode 094 * running in any DFS deployment. (Well, except when there 095 * is a second backup/failover NameNode, or when using federated NameNodes.) 096 * 097 * The NameNode controls two critical tables: 098 * 1) filename->blocksequence (namespace) 099 * 2) block->machinelist ("inodes") 100 * 101 * The first table is stored on disk and is very precious. 102 * The second table is rebuilt every time the NameNode comes up. 103 * 104 * 'NameNode' refers to both this class as well as the 'NameNode server'. 105 * The 'FSNamesystem' class actually performs most of the filesystem 106 * management. The majority of the 'NameNode' class itself is concerned 107 * with exposing the IPC interface and the HTTP server to the outside world, 108 * plus some configuration management. 109 * 110 * NameNode implements the 111 * {@link org.apache.hadoop.hdfs.protocol.ClientProtocol} interface, which 112 * allows clients to ask for DFS services. 113 * {@link org.apache.hadoop.hdfs.protocol.ClientProtocol} is not designed for 114 * direct use by authors of DFS client code. End-users should instead use the 115 * {@link org.apache.hadoop.fs.FileSystem} class. 116 * 117 * NameNode also implements the 118 * {@link org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol} interface, 119 * used by DataNodes that actually store DFS data blocks. These 120 * methods are invoked repeatedly and automatically by all the 121 * DataNodes in a DFS deployment. 122 * 123 * NameNode also implements the 124 * {@link org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol} interface, 125 * used by secondary namenodes or rebalancing processes to get partial 126 * NameNode state, for example partial blocksMap etc. 127 **********************************************************/ 128@InterfaceAudience.Private 129public class NameNode implements NameNodeStatusMXBean { 130 static{ 131 HdfsConfiguration.init(); 132 } 133 134 /** 135 * Categories of operations supported by the namenode. 136 */ 137 public static enum OperationCategory { 138 /** Operations that are state agnostic */ 139 UNCHECKED, 140 /** Read operation that does not change the namespace state */ 141 READ, 142 /** Write operation that changes the namespace state */ 143 WRITE, 144 /** Operations related to checkpointing */ 145 CHECKPOINT, 146 /** Operations related to {@link JournalProtocol} */ 147 JOURNAL 148 } 149 150 /** 151 * HDFS configuration can have three types of parameters: 152 * <ol> 153 * <li>Parameters that are common for all the name services in the cluster.</li> 154 * <li>Parameters that are specific to a name service. These keys are suffixed 155 * with nameserviceId in the configuration. For example, 156 * "dfs.namenode.rpc-address.nameservice1".</li> 157 * <li>Parameters that are specific to a single name node. These keys are suffixed 158 * with nameserviceId and namenodeId in the configuration. for example, 159 * "dfs.namenode.rpc-address.nameservice1.namenode1"</li> 160 * </ol> 161 * 162 * In the latter cases, operators may specify the configuration without 163 * any suffix, with a nameservice suffix, or with a nameservice and namenode 164 * suffix. The more specific suffix will take precedence. 165 * 166 * These keys are specific to a given namenode, and thus may be configured 167 * globally, for a nameservice, or for a specific namenode within a nameservice. 168 */ 169 public static final String[] NAMENODE_SPECIFIC_KEYS = { 170 DFS_NAMENODE_RPC_ADDRESS_KEY, 171 DFS_NAMENODE_RPC_BIND_HOST_KEY, 172 DFS_NAMENODE_NAME_DIR_KEY, 173 DFS_NAMENODE_EDITS_DIR_KEY, 174 DFS_NAMENODE_SHARED_EDITS_DIR_KEY, 175 DFS_NAMENODE_CHECKPOINT_DIR_KEY, 176 DFS_NAMENODE_CHECKPOINT_EDITS_DIR_KEY, 177 DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, 178 DFS_NAMENODE_SERVICE_RPC_BIND_HOST_KEY, 179 DFS_NAMENODE_HTTP_ADDRESS_KEY, 180 DFS_NAMENODE_HTTPS_ADDRESS_KEY, 181 DFS_NAMENODE_HTTP_BIND_HOST_KEY, 182 DFS_NAMENODE_HTTPS_BIND_HOST_KEY, 183 DFS_NAMENODE_KEYTAB_FILE_KEY, 184 DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY, 185 DFS_NAMENODE_SECONDARY_HTTPS_ADDRESS_KEY, 186 DFS_SECONDARY_NAMENODE_KEYTAB_FILE_KEY, 187 DFS_NAMENODE_BACKUP_ADDRESS_KEY, 188 DFS_NAMENODE_BACKUP_HTTP_ADDRESS_KEY, 189 DFS_NAMENODE_BACKUP_SERVICE_RPC_ADDRESS_KEY, 190 DFS_NAMENODE_KERBEROS_PRINCIPAL_KEY, 191 DFS_NAMENODE_KERBEROS_INTERNAL_SPNEGO_PRINCIPAL_KEY, 192 DFS_HA_FENCE_METHODS_KEY, 193 DFS_HA_ZKFC_PORT_KEY, 194 DFS_HA_FENCE_METHODS_KEY 195 }; 196 197 /** 198 * @see #NAMENODE_SPECIFIC_KEYS 199 * These keys are specific to a nameservice, but may not be overridden 200 * for a specific namenode. 201 */ 202 public static final String[] NAMESERVICE_SPECIFIC_KEYS = { 203 DFS_HA_AUTO_FAILOVER_ENABLED_KEY 204 }; 205 206 private static final String USAGE = "Usage: java NameNode [" 207 + StartupOption.BACKUP.getName() + "] | \n\t[" 208 + StartupOption.CHECKPOINT.getName() + "] | \n\t[" 209 + StartupOption.FORMAT.getName() + " [" 210 + StartupOption.CLUSTERID.getName() + " cid ] [" 211 + StartupOption.FORCE.getName() + "] [" 212 + StartupOption.NONINTERACTIVE.getName() + "] ] | \n\t[" 213 + StartupOption.UPGRADE.getName() + 214 " [" + StartupOption.CLUSTERID.getName() + " cid]" + 215 " [" + StartupOption.RENAMERESERVED.getName() + "<k-v pairs>] ] | \n\t[" 216 + StartupOption.UPGRADEONLY.getName() + 217 " [" + StartupOption.CLUSTERID.getName() + " cid]" + 218 " [" + StartupOption.RENAMERESERVED.getName() + "<k-v pairs>] ] | \n\t[" 219 + StartupOption.ROLLBACK.getName() + "] | \n\t[" 220 + StartupOption.ROLLINGUPGRADE.getName() + " " 221 + RollingUpgradeStartupOption.getAllOptionString() + " ] | \n\t[" 222 + StartupOption.FINALIZE.getName() + "] | \n\t[" 223 + StartupOption.IMPORT.getName() + "] | \n\t[" 224 + StartupOption.INITIALIZESHAREDEDITS.getName() + "] | \n\t[" 225 + StartupOption.BOOTSTRAPSTANDBY.getName() + "] | \n\t[" 226 + StartupOption.RECOVER.getName() + " [ " 227 + StartupOption.FORCE.getName() + "] ] | \n\t[" 228 + StartupOption.METADATAVERSION.getName() + " ] " 229 + " ]"; 230 231 232 public long getProtocolVersion(String protocol, 233 long clientVersion) throws IOException { 234 if (protocol.equals(ClientProtocol.class.getName())) { 235 return ClientProtocol.versionID; 236 } else if (protocol.equals(DatanodeProtocol.class.getName())){ 237 return DatanodeProtocol.versionID; 238 } else if (protocol.equals(NamenodeProtocol.class.getName())){ 239 return NamenodeProtocol.versionID; 240 } else if (protocol.equals(RefreshAuthorizationPolicyProtocol.class.getName())){ 241 return RefreshAuthorizationPolicyProtocol.versionID; 242 } else if (protocol.equals(RefreshUserMappingsProtocol.class.getName())){ 243 return RefreshUserMappingsProtocol.versionID; 244 } else if (protocol.equals(RefreshCallQueueProtocol.class.getName())) { 245 return RefreshCallQueueProtocol.versionID; 246 } else if (protocol.equals(GetUserMappingsProtocol.class.getName())){ 247 return GetUserMappingsProtocol.versionID; 248 } else if (protocol.equals(TraceAdminProtocol.class.getName())){ 249 return TraceAdminProtocol.versionID; 250 } else { 251 throw new IOException("Unknown protocol to name node: " + protocol); 252 } 253 } 254 255 public static final int DEFAULT_PORT = 8020; 256 public static final Log LOG = LogFactory.getLog(NameNode.class.getName()); 257 public static final Log stateChangeLog = LogFactory.getLog("org.apache.hadoop.hdfs.StateChange"); 258 public static final Log blockStateChangeLog = LogFactory.getLog("BlockStateChange"); 259 public static final HAState ACTIVE_STATE = new ActiveState(); 260 public static final HAState STANDBY_STATE = new StandbyState(); 261 262 protected FSNamesystem namesystem; 263 protected final Configuration conf; 264 protected final NamenodeRole role; 265 private volatile HAState state; 266 private final boolean haEnabled; 267 private final HAContext haContext; 268 protected final boolean allowStaleStandbyReads; 269 private AtomicBoolean started = new AtomicBoolean(false); 270 271 272 /** httpServer */ 273 protected NameNodeHttpServer httpServer; 274 private Thread emptier; 275 /** only used for testing purposes */ 276 protected boolean stopRequested = false; 277 /** Registration information of this name-node */ 278 protected NamenodeRegistration nodeRegistration; 279 /** Activated plug-ins. */ 280 private List<ServicePlugin> plugins; 281 282 private NameNodeRpcServer rpcServer; 283 284 private JvmPauseMonitor pauseMonitor; 285 private ObjectName nameNodeStatusBeanName; 286 SpanReceiverHost spanReceiverHost; 287 /** 288 * The namenode address that clients will use to access this namenode 289 * or the name service. For HA configurations using logical URI, it 290 * will be the logical address. 291 */ 292 private String clientNamenodeAddress; 293 294 /** Format a new filesystem. Destroys any filesystem that may already 295 * exist at this location. **/ 296 public static void format(Configuration conf) throws IOException { 297 format(conf, true, true); 298 } 299 300 static NameNodeMetrics metrics; 301 private static final StartupProgress startupProgress = new StartupProgress(); 302 /** Return the {@link FSNamesystem} object. 303 * @return {@link FSNamesystem} object. 304 */ 305 public FSNamesystem getNamesystem() { 306 return namesystem; 307 } 308 309 public NamenodeProtocols getRpcServer() { 310 return rpcServer; 311 } 312 313 static void initMetrics(Configuration conf, NamenodeRole role) { 314 metrics = NameNodeMetrics.create(conf, role); 315 } 316 317 public static NameNodeMetrics getNameNodeMetrics() { 318 return metrics; 319 } 320 321 /** 322 * Returns object used for reporting namenode startup progress. 323 * 324 * @return StartupProgress for reporting namenode startup progress 325 */ 326 public static StartupProgress getStartupProgress() { 327 return startupProgress; 328 } 329 330 /** 331 * Return the service name of the issued delegation token. 332 * 333 * @return The name service id in HA-mode, or the rpc address in non-HA mode 334 */ 335 public String getTokenServiceName() { 336 return getClientNamenodeAddress(); 337 } 338 339 /** 340 * Set the namenode address that will be used by clients to access this 341 * namenode or name service. This needs to be called before the config 342 * is overriden. 343 */ 344 public void setClientNamenodeAddress(Configuration conf) { 345 String nnAddr = conf.get(FS_DEFAULT_NAME_KEY); 346 if (nnAddr == null) { 347 // default fs is not set. 348 clientNamenodeAddress = null; 349 return; 350 } 351 352 LOG.info(FS_DEFAULT_NAME_KEY + " is " + nnAddr); 353 URI nnUri = URI.create(nnAddr); 354 355 String nnHost = nnUri.getHost(); 356 if (nnHost == null) { 357 clientNamenodeAddress = null; 358 return; 359 } 360 361 if (DFSUtil.getNameServiceIds(conf).contains(nnHost)) { 362 // host name is logical 363 clientNamenodeAddress = nnHost; 364 } else if (nnUri.getPort() > 0) { 365 // physical address with a valid port 366 clientNamenodeAddress = nnUri.getAuthority(); 367 } else { 368 // the port is missing or 0. Figure out real bind address later. 369 clientNamenodeAddress = null; 370 return; 371 } 372 LOG.info("Clients are to use " + clientNamenodeAddress + " to access" 373 + " this namenode/service."); 374 } 375 376 /** 377 * Get the namenode address to be used by clients. 378 * @return nn address 379 */ 380 public String getClientNamenodeAddress() { 381 return clientNamenodeAddress; 382 } 383 384 public static InetSocketAddress getAddress(String address) { 385 return NetUtils.createSocketAddr(address, DEFAULT_PORT); 386 } 387 388 /** 389 * Set the configuration property for the service rpc address 390 * to address 391 */ 392 public static void setServiceAddress(Configuration conf, 393 String address) { 394 LOG.info("Setting ADDRESS " + address); 395 conf.set(DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, address); 396 } 397 398 /** 399 * Fetches the address for services to use when connecting to namenode 400 * based on the value of fallback returns null if the special 401 * address is not specified or returns the default namenode address 402 * to be used by both clients and services. 403 * Services here are datanodes, backup node, any non client connection 404 */ 405 public static InetSocketAddress getServiceAddress(Configuration conf, 406 boolean fallback) { 407 String addr = conf.get(DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY); 408 if (addr == null || addr.isEmpty()) { 409 return fallback ? getAddress(conf) : null; 410 } 411 return getAddress(addr); 412 } 413 414 public static InetSocketAddress getAddress(Configuration conf) { 415 URI filesystemURI = FileSystem.getDefaultUri(conf); 416 return getAddress(filesystemURI); 417 } 418 419 420 /** 421 * @return address of file system 422 */ 423 public static InetSocketAddress getAddress(URI filesystemURI) { 424 String authority = filesystemURI.getAuthority(); 425 if (authority == null) { 426 throw new IllegalArgumentException(String.format( 427 "Invalid URI for NameNode address (check %s): %s has no authority.", 428 FileSystem.FS_DEFAULT_NAME_KEY, filesystemURI.toString())); 429 } 430 if (!HdfsConstants.HDFS_URI_SCHEME.equalsIgnoreCase( 431 filesystemURI.getScheme())) { 432 throw new IllegalArgumentException(String.format( 433 "Invalid URI for NameNode address (check %s): %s is not of scheme '%s'.", 434 FileSystem.FS_DEFAULT_NAME_KEY, filesystemURI.toString(), 435 HdfsConstants.HDFS_URI_SCHEME)); 436 } 437 return getAddress(authority); 438 } 439 440 public static URI getUri(InetSocketAddress namenode) { 441 int port = namenode.getPort(); 442 String portString = port == DEFAULT_PORT ? "" : (":"+port); 443 return URI.create(HdfsConstants.HDFS_URI_SCHEME + "://" 444 + namenode.getHostName()+portString); 445 } 446 447 // 448 // Common NameNode methods implementation for the active name-node role. 449 // 450 public NamenodeRole getRole() { 451 return role; 452 } 453 454 boolean isRole(NamenodeRole that) { 455 return role.equals(that); 456 } 457 458 /** 459 * Given a configuration get the address of the service rpc server 460 * If the service rpc is not configured returns null 461 */ 462 protected InetSocketAddress getServiceRpcServerAddress(Configuration conf) { 463 return NameNode.getServiceAddress(conf, false); 464 } 465 466 protected InetSocketAddress getRpcServerAddress(Configuration conf) { 467 return getAddress(conf); 468 } 469 470 /** Given a configuration get the bind host of the service rpc server 471 * If the bind host is not configured returns null. 472 */ 473 protected String getServiceRpcServerBindHost(Configuration conf) { 474 String addr = conf.getTrimmed(DFS_NAMENODE_SERVICE_RPC_BIND_HOST_KEY); 475 if (addr == null || addr.isEmpty()) { 476 return null; 477 } 478 return addr; 479 } 480 481 /** Given a configuration get the bind host of the client rpc server 482 * If the bind host is not configured returns null. 483 */ 484 protected String getRpcServerBindHost(Configuration conf) { 485 String addr = conf.getTrimmed(DFS_NAMENODE_RPC_BIND_HOST_KEY); 486 if (addr == null || addr.isEmpty()) { 487 return null; 488 } 489 return addr; 490 } 491 492 /** 493 * Modifies the configuration passed to contain the service rpc address setting 494 */ 495 protected void setRpcServiceServerAddress(Configuration conf, 496 InetSocketAddress serviceRPCAddress) { 497 setServiceAddress(conf, NetUtils.getHostPortString(serviceRPCAddress)); 498 } 499 500 protected void setRpcServerAddress(Configuration conf, 501 InetSocketAddress rpcAddress) { 502 FileSystem.setDefaultUri(conf, getUri(rpcAddress)); 503 } 504 505 protected InetSocketAddress getHttpServerAddress(Configuration conf) { 506 return getHttpAddress(conf); 507 } 508 509 /** 510 * HTTP server address for binding the endpoint. This method is 511 * for use by the NameNode and its derivatives. It may return 512 * a different address than the one that should be used by clients to 513 * connect to the NameNode. See 514 * {@link DFSConfigKeys#DFS_NAMENODE_HTTP_BIND_HOST_KEY} 515 * 516 * @param conf 517 * @return 518 */ 519 protected InetSocketAddress getHttpServerBindAddress(Configuration conf) { 520 InetSocketAddress bindAddress = getHttpServerAddress(conf); 521 522 // If DFS_NAMENODE_HTTP_BIND_HOST_KEY exists then it overrides the 523 // host name portion of DFS_NAMENODE_HTTP_ADDRESS_KEY. 524 final String bindHost = conf.getTrimmed(DFS_NAMENODE_HTTP_BIND_HOST_KEY); 525 if (bindHost != null && !bindHost.isEmpty()) { 526 bindAddress = new InetSocketAddress(bindHost, bindAddress.getPort()); 527 } 528 529 return bindAddress; 530 } 531 532 /** @return the NameNode HTTP address. */ 533 public static InetSocketAddress getHttpAddress(Configuration conf) { 534 return NetUtils.createSocketAddr( 535 conf.get(DFS_NAMENODE_HTTP_ADDRESS_KEY, DFS_NAMENODE_HTTP_ADDRESS_DEFAULT)); 536 } 537 538 protected void loadNamesystem(Configuration conf) throws IOException { 539 this.namesystem = FSNamesystem.loadFromDisk(conf); 540 } 541 542 NamenodeRegistration getRegistration() { 543 return nodeRegistration; 544 } 545 546 NamenodeRegistration setRegistration() { 547 nodeRegistration = new NamenodeRegistration( 548 NetUtils.getHostPortString(rpcServer.getRpcAddress()), 549 NetUtils.getHostPortString(getHttpAddress()), 550 getFSImage().getStorage(), getRole()); 551 return nodeRegistration; 552 } 553 554 /* optimize ugi lookup for RPC operations to avoid a trip through 555 * UGI.getCurrentUser which is synch'ed 556 */ 557 public static UserGroupInformation getRemoteUser() throws IOException { 558 UserGroupInformation ugi = Server.getRemoteUser(); 559 return (ugi != null) ? ugi : UserGroupInformation.getCurrentUser(); 560 } 561 562 563 /** 564 * Login as the configured user for the NameNode. 565 */ 566 void loginAsNameNodeUser(Configuration conf) throws IOException { 567 InetSocketAddress socAddr = getRpcServerAddress(conf); 568 SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY, 569 DFS_NAMENODE_KERBEROS_PRINCIPAL_KEY, socAddr.getHostName()); 570 } 571 572 /** 573 * Initialize name-node. 574 * 575 * @param conf the configuration 576 */ 577 protected void initialize(Configuration conf) throws IOException { 578 if (conf.get(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS) == null) { 579 String intervals = conf.get(DFS_METRICS_PERCENTILES_INTERVALS_KEY); 580 if (intervals != null) { 581 conf.set(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS, 582 intervals); 583 } 584 } 585 586 UserGroupInformation.setConfiguration(conf); 587 loginAsNameNodeUser(conf); 588 589 NameNode.initMetrics(conf, this.getRole()); 590 StartupProgressMetrics.register(startupProgress); 591 592 if (NamenodeRole.NAMENODE == role) { 593 startHttpServer(conf); 594 } 595 596 this.spanReceiverHost = SpanReceiverHost.getInstance(conf); 597 598 loadNamesystem(conf); 599 600 rpcServer = createRpcServer(conf); 601 if (clientNamenodeAddress == null) { 602 // This is expected for MiniDFSCluster. Set it now using 603 // the RPC server's bind address. 604 clientNamenodeAddress = 605 NetUtils.getHostPortString(rpcServer.getRpcAddress()); 606 LOG.info("Clients are to use " + clientNamenodeAddress + " to access" 607 + " this namenode/service."); 608 } 609 if (NamenodeRole.NAMENODE == role) { 610 httpServer.setNameNodeAddress(getNameNodeAddress()); 611 httpServer.setFSImage(getFSImage()); 612 } 613 614 pauseMonitor = new JvmPauseMonitor(conf); 615 pauseMonitor.start(); 616 metrics.getJvmMetrics().setPauseMonitor(pauseMonitor); 617 618 startCommonServices(conf); 619 } 620 621 /** 622 * Create the RPC server implementation. Used as an extension point for the 623 * BackupNode. 624 */ 625 protected NameNodeRpcServer createRpcServer(Configuration conf) 626 throws IOException { 627 return new NameNodeRpcServer(conf, this); 628 } 629 630 /** Start the services common to active and standby states */ 631 private void startCommonServices(Configuration conf) throws IOException { 632 namesystem.startCommonServices(conf, haContext); 633 registerNNSMXBean(); 634 if (NamenodeRole.NAMENODE != role) { 635 startHttpServer(conf); 636 httpServer.setNameNodeAddress(getNameNodeAddress()); 637 httpServer.setFSImage(getFSImage()); 638 } 639 rpcServer.start(); 640 plugins = conf.getInstances(DFS_NAMENODE_PLUGINS_KEY, 641 ServicePlugin.class); 642 for (ServicePlugin p: plugins) { 643 try { 644 p.start(this); 645 } catch (Throwable t) { 646 LOG.warn("ServicePlugin " + p + " could not be started", t); 647 } 648 } 649 LOG.info(getRole() + " RPC up at: " + rpcServer.getRpcAddress()); 650 if (rpcServer.getServiceRpcAddress() != null) { 651 LOG.info(getRole() + " service RPC up at: " 652 + rpcServer.getServiceRpcAddress()); 653 } 654 } 655 656 private void stopCommonServices() { 657 if(rpcServer != null) rpcServer.stop(); 658 if(namesystem != null) namesystem.close(); 659 if (pauseMonitor != null) pauseMonitor.stop(); 660 if (plugins != null) { 661 for (ServicePlugin p : plugins) { 662 try { 663 p.stop(); 664 } catch (Throwable t) { 665 LOG.warn("ServicePlugin " + p + " could not be stopped", t); 666 } 667 } 668 } 669 stopHttpServer(); 670 } 671 672 private void startTrashEmptier(final Configuration conf) throws IOException { 673 long trashInterval = 674 conf.getLong(FS_TRASH_INTERVAL_KEY, FS_TRASH_INTERVAL_DEFAULT); 675 if (trashInterval == 0) { 676 return; 677 } else if (trashInterval < 0) { 678 throw new IOException("Cannot start trash emptier with negative interval." 679 + " Set " + FS_TRASH_INTERVAL_KEY + " to a positive value."); 680 } 681 682 // This may be called from the transitionToActive code path, in which 683 // case the current user is the administrator, not the NN. The trash 684 // emptier needs to run as the NN. See HDFS-3972. 685 FileSystem fs = SecurityUtil.doAsLoginUser( 686 new PrivilegedExceptionAction<FileSystem>() { 687 @Override 688 public FileSystem run() throws IOException { 689 return FileSystem.get(conf); 690 } 691 }); 692 this.emptier = new Thread(new Trash(fs, conf).getEmptier(), "Trash Emptier"); 693 this.emptier.setDaemon(true); 694 this.emptier.start(); 695 } 696 697 private void stopTrashEmptier() { 698 if (this.emptier != null) { 699 emptier.interrupt(); 700 emptier = null; 701 } 702 } 703 704 private void startHttpServer(final Configuration conf) throws IOException { 705 httpServer = new NameNodeHttpServer(conf, this, getHttpServerBindAddress(conf)); 706 httpServer.start(); 707 httpServer.setStartupProgress(startupProgress); 708 } 709 710 private void stopHttpServer() { 711 try { 712 if (httpServer != null) httpServer.stop(); 713 } catch (Exception e) { 714 LOG.error("Exception while stopping httpserver", e); 715 } 716 } 717 718 /** 719 * Start NameNode. 720 * <p> 721 * The name-node can be started with one of the following startup options: 722 * <ul> 723 * <li>{@link StartupOption#REGULAR REGULAR} - normal name node startup</li> 724 * <li>{@link StartupOption#FORMAT FORMAT} - format name node</li> 725 * <li>{@link StartupOption#BACKUP BACKUP} - start backup node</li> 726 * <li>{@link StartupOption#CHECKPOINT CHECKPOINT} - start checkpoint node</li> 727 * <li>{@link StartupOption#UPGRADE UPGRADE} - start the cluster 728 * <li>{@link StartupOption#UPGRADEONLY UPGRADEONLY} - upgrade the cluster 729 * upgrade and create a snapshot of the current file system state</li> 730 * <li>{@link StartupOption#RECOVER RECOVERY} - recover name node 731 * metadata</li> 732 * <li>{@link StartupOption#ROLLBACK ROLLBACK} - roll the 733 * cluster back to the previous state</li> 734 * <li>{@link StartupOption#FINALIZE FINALIZE} - finalize 735 * previous upgrade</li> 736 * <li>{@link StartupOption#IMPORT IMPORT} - import checkpoint</li> 737 * </ul> 738 * The option is passed via configuration field: 739 * <tt>dfs.namenode.startup</tt> 740 * 741 * The conf will be modified to reflect the actual ports on which 742 * the NameNode is up and running if the user passes the port as 743 * <code>zero</code> in the conf. 744 * 745 * @param conf confirguration 746 * @throws IOException 747 */ 748 public NameNode(Configuration conf) throws IOException { 749 this(conf, NamenodeRole.NAMENODE); 750 } 751 752 protected NameNode(Configuration conf, NamenodeRole role) 753 throws IOException { 754 this.conf = conf; 755 this.role = role; 756 setClientNamenodeAddress(conf); 757 String nsId = getNameServiceId(conf); 758 String namenodeId = HAUtil.getNameNodeId(conf, nsId); 759 this.haEnabled = HAUtil.isHAEnabled(conf, nsId); 760 state = createHAState(getStartupOption(conf)); 761 this.allowStaleStandbyReads = HAUtil.shouldAllowStandbyReads(conf); 762 this.haContext = createHAContext(); 763 try { 764 initializeGenericKeys(conf, nsId, namenodeId); 765 initialize(conf); 766 try { 767 haContext.writeLock(); 768 state.prepareToEnterState(haContext); 769 state.enterState(haContext); 770 } finally { 771 haContext.writeUnlock(); 772 } 773 } catch (IOException e) { 774 this.stop(); 775 throw e; 776 } catch (HadoopIllegalArgumentException e) { 777 this.stop(); 778 throw e; 779 } 780 this.started.set(true); 781 } 782 783 protected HAState createHAState(StartupOption startOpt) { 784 if (!haEnabled || startOpt == StartupOption.UPGRADE 785 || startOpt == StartupOption.UPGRADEONLY) { 786 return ACTIVE_STATE; 787 } else { 788 return STANDBY_STATE; 789 } 790 } 791 792 protected HAContext createHAContext() { 793 return new NameNodeHAContext(); 794 } 795 796 /** 797 * Wait for service to finish. 798 * (Normally, it runs forever.) 799 */ 800 public void join() { 801 try { 802 rpcServer.join(); 803 } catch (InterruptedException ie) { 804 LOG.info("Caught interrupted exception ", ie); 805 } 806 } 807 808 /** 809 * Stop all NameNode threads and wait for all to finish. 810 */ 811 public void stop() { 812 synchronized(this) { 813 if (stopRequested) 814 return; 815 stopRequested = true; 816 } 817 try { 818 if (state != null) { 819 state.exitState(haContext); 820 } 821 } catch (ServiceFailedException e) { 822 LOG.warn("Encountered exception while exiting state ", e); 823 } finally { 824 stopCommonServices(); 825 if (metrics != null) { 826 metrics.shutdown(); 827 } 828 if (namesystem != null) { 829 namesystem.shutdown(); 830 } 831 if (nameNodeStatusBeanName != null) { 832 MBeans.unregister(nameNodeStatusBeanName); 833 nameNodeStatusBeanName = null; 834 } 835 if (this.spanReceiverHost != null) { 836 this.spanReceiverHost.closeReceivers(); 837 } 838 } 839 } 840 841 synchronized boolean isStopRequested() { 842 return stopRequested; 843 } 844 845 /** 846 * Is the cluster currently in safe mode? 847 */ 848 public boolean isInSafeMode() { 849 return namesystem.isInSafeMode(); 850 } 851 852 /** get FSImage */ 853 @VisibleForTesting 854 public FSImage getFSImage() { 855 return namesystem.getFSImage(); 856 } 857 858 /** 859 * @return NameNode RPC address 860 */ 861 public InetSocketAddress getNameNodeAddress() { 862 return rpcServer.getRpcAddress(); 863 } 864 865 /** 866 * @return NameNode RPC address in "host:port" string form 867 */ 868 public String getNameNodeAddressHostPortString() { 869 return NetUtils.getHostPortString(rpcServer.getRpcAddress()); 870 } 871 872 /** 873 * @return NameNode service RPC address if configured, the 874 * NameNode RPC address otherwise 875 */ 876 public InetSocketAddress getServiceRpcAddress() { 877 final InetSocketAddress serviceAddr = rpcServer.getServiceRpcAddress(); 878 return serviceAddr == null ? rpcServer.getRpcAddress() : serviceAddr; 879 } 880 881 /** 882 * @return NameNode HTTP address, used by the Web UI, image transfer, 883 * and HTTP-based file system clients like Hftp and WebHDFS 884 */ 885 public InetSocketAddress getHttpAddress() { 886 return httpServer.getHttpAddress(); 887 } 888 889 /** 890 * @return NameNode HTTPS address, used by the Web UI, image transfer, 891 * and HTTP-based file system clients like Hftp and WebHDFS 892 */ 893 public InetSocketAddress getHttpsAddress() { 894 return httpServer.getHttpsAddress(); 895 } 896 897 /** 898 * Verify that configured directories exist, then 899 * Interactively confirm that formatting is desired 900 * for each existing directory and format them. 901 * 902 * @param conf configuration to use 903 * @param force if true, format regardless of whether dirs exist 904 * @return true if formatting was aborted, false otherwise 905 * @throws IOException 906 */ 907 private static boolean format(Configuration conf, boolean force, 908 boolean isInteractive) throws IOException { 909 String nsId = DFSUtil.getNamenodeNameServiceId(conf); 910 String namenodeId = HAUtil.getNameNodeId(conf, nsId); 911 initializeGenericKeys(conf, nsId, namenodeId); 912 checkAllowFormat(conf); 913 914 if (UserGroupInformation.isSecurityEnabled()) { 915 InetSocketAddress socAddr = getAddress(conf); 916 SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY, 917 DFS_NAMENODE_KERBEROS_PRINCIPAL_KEY, socAddr.getHostName()); 918 } 919 920 Collection<URI> nameDirsToFormat = FSNamesystem.getNamespaceDirs(conf); 921 List<URI> sharedDirs = FSNamesystem.getSharedEditsDirs(conf); 922 List<URI> dirsToPrompt = new ArrayList<URI>(); 923 dirsToPrompt.addAll(nameDirsToFormat); 924 dirsToPrompt.addAll(sharedDirs); 925 List<URI> editDirsToFormat = 926 FSNamesystem.getNamespaceEditsDirs(conf); 927 928 // if clusterID is not provided - see if you can find the current one 929 String clusterId = StartupOption.FORMAT.getClusterId(); 930 if(clusterId == null || clusterId.equals("")) { 931 //Generate a new cluster id 932 clusterId = NNStorage.newClusterID(); 933 } 934 System.out.println("Formatting using clusterid: " + clusterId); 935 936 FSImage fsImage = new FSImage(conf, nameDirsToFormat, editDirsToFormat); 937 try { 938 FSNamesystem fsn = new FSNamesystem(conf, fsImage); 939 fsImage.getEditLog().initJournalsForWrite(); 940 941 if (!fsImage.confirmFormat(force, isInteractive)) { 942 return true; // aborted 943 } 944 945 fsImage.format(fsn, clusterId); 946 } catch (IOException ioe) { 947 LOG.warn("Encountered exception during format: ", ioe); 948 fsImage.close(); 949 throw ioe; 950 } 951 return false; 952 } 953 954 public static void checkAllowFormat(Configuration conf) throws IOException { 955 if (!conf.getBoolean(DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY, 956 DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_DEFAULT)) { 957 throw new IOException("The option " + DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY 958 + " is set to false for this filesystem, so it " 959 + "cannot be formatted. You will need to set " 960 + DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY +" parameter " 961 + "to true in order to format this filesystem"); 962 } 963 } 964 965 @VisibleForTesting 966 public static boolean initializeSharedEdits(Configuration conf) throws IOException { 967 return initializeSharedEdits(conf, true); 968 } 969 970 @VisibleForTesting 971 public static boolean initializeSharedEdits(Configuration conf, 972 boolean force) throws IOException { 973 return initializeSharedEdits(conf, force, false); 974 } 975 976 /** 977 * Clone the supplied configuration but remove the shared edits dirs. 978 * 979 * @param conf Supplies the original configuration. 980 * @return Cloned configuration without the shared edit dirs. 981 * @throws IOException on failure to generate the configuration. 982 */ 983 private static Configuration getConfigurationWithoutSharedEdits( 984 Configuration conf) 985 throws IOException { 986 List<URI> editsDirs = FSNamesystem.getNamespaceEditsDirs(conf, false); 987 String editsDirsString = Joiner.on(",").join(editsDirs); 988 989 Configuration confWithoutShared = new Configuration(conf); 990 confWithoutShared.unset(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY); 991 confWithoutShared.setStrings(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY, 992 editsDirsString); 993 return confWithoutShared; 994 } 995 996 /** 997 * Format a new shared edits dir and copy in enough edit log segments so that 998 * the standby NN can start up. 999 * 1000 * @param conf configuration 1001 * @param force format regardless of whether or not the shared edits dir exists 1002 * @param interactive prompt the user when a dir exists 1003 * @return true if the command aborts, false otherwise 1004 */ 1005 private static boolean initializeSharedEdits(Configuration conf, 1006 boolean force, boolean interactive) throws IOException { 1007 String nsId = DFSUtil.getNamenodeNameServiceId(conf); 1008 String namenodeId = HAUtil.getNameNodeId(conf, nsId); 1009 initializeGenericKeys(conf, nsId, namenodeId); 1010 1011 if (conf.get(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY) == null) { 1012 LOG.fatal("No shared edits directory configured for namespace " + 1013 nsId + " namenode " + namenodeId); 1014 return false; 1015 } 1016 1017 if (UserGroupInformation.isSecurityEnabled()) { 1018 InetSocketAddress socAddr = getAddress(conf); 1019 SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY, 1020 DFS_NAMENODE_KERBEROS_PRINCIPAL_KEY, socAddr.getHostName()); 1021 } 1022 1023 NNStorage existingStorage = null; 1024 FSImage sharedEditsImage = null; 1025 try { 1026 FSNamesystem fsns = 1027 FSNamesystem.loadFromDisk(getConfigurationWithoutSharedEdits(conf)); 1028 1029 existingStorage = fsns.getFSImage().getStorage(); 1030 NamespaceInfo nsInfo = existingStorage.getNamespaceInfo(); 1031 1032 List<URI> sharedEditsDirs = FSNamesystem.getSharedEditsDirs(conf); 1033 1034 sharedEditsImage = new FSImage(conf, 1035 Lists.<URI>newArrayList(), 1036 sharedEditsDirs); 1037 sharedEditsImage.getEditLog().initJournalsForWrite(); 1038 1039 if (!sharedEditsImage.confirmFormat(force, interactive)) { 1040 return true; // abort 1041 } 1042 1043 NNStorage newSharedStorage = sharedEditsImage.getStorage(); 1044 // Call Storage.format instead of FSImage.format here, since we don't 1045 // actually want to save a checkpoint - just prime the dirs with 1046 // the existing namespace info 1047 newSharedStorage.format(nsInfo); 1048 sharedEditsImage.getEditLog().formatNonFileJournals(nsInfo); 1049 1050 // Need to make sure the edit log segments are in good shape to initialize 1051 // the shared edits dir. 1052 fsns.getFSImage().getEditLog().close(); 1053 fsns.getFSImage().getEditLog().initJournalsForWrite(); 1054 fsns.getFSImage().getEditLog().recoverUnclosedStreams(); 1055 1056 copyEditLogSegmentsToSharedDir(fsns, sharedEditsDirs, newSharedStorage, 1057 conf); 1058 } catch (IOException ioe) { 1059 LOG.error("Could not initialize shared edits dir", ioe); 1060 return true; // aborted 1061 } finally { 1062 if (sharedEditsImage != null) { 1063 try { 1064 sharedEditsImage.close(); 1065 } catch (IOException ioe) { 1066 LOG.warn("Could not close sharedEditsImage", ioe); 1067 } 1068 } 1069 // Have to unlock storage explicitly for the case when we're running in a 1070 // unit test, which runs in the same JVM as NNs. 1071 if (existingStorage != null) { 1072 try { 1073 existingStorage.unlockAll(); 1074 } catch (IOException ioe) { 1075 LOG.warn("Could not unlock storage directories", ioe); 1076 return true; // aborted 1077 } 1078 } 1079 } 1080 return false; // did not abort 1081 } 1082 1083 private static void copyEditLogSegmentsToSharedDir(FSNamesystem fsns, 1084 Collection<URI> sharedEditsDirs, NNStorage newSharedStorage, 1085 Configuration conf) throws IOException { 1086 Preconditions.checkArgument(!sharedEditsDirs.isEmpty(), 1087 "No shared edits specified"); 1088 // Copy edit log segments into the new shared edits dir. 1089 List<URI> sharedEditsUris = new ArrayList<URI>(sharedEditsDirs); 1090 FSEditLog newSharedEditLog = new FSEditLog(conf, newSharedStorage, 1091 sharedEditsUris); 1092 newSharedEditLog.initJournalsForWrite(); 1093 newSharedEditLog.recoverUnclosedStreams(); 1094 1095 FSEditLog sourceEditLog = fsns.getFSImage().editLog; 1096 1097 long fromTxId = fsns.getFSImage().getMostRecentCheckpointTxId(); 1098 1099 Collection<EditLogInputStream> streams = null; 1100 try { 1101 streams = sourceEditLog.selectInputStreams(fromTxId + 1, 0); 1102 1103 // Set the nextTxid to the CheckpointTxId+1 1104 newSharedEditLog.setNextTxId(fromTxId + 1); 1105 1106 // Copy all edits after last CheckpointTxId to shared edits dir 1107 for (EditLogInputStream stream : streams) { 1108 LOG.debug("Beginning to copy stream " + stream + " to shared edits"); 1109 FSEditLogOp op; 1110 boolean segmentOpen = false; 1111 while ((op = stream.readOp()) != null) { 1112 if (LOG.isTraceEnabled()) { 1113 LOG.trace("copying op: " + op); 1114 } 1115 if (!segmentOpen) { 1116 newSharedEditLog.startLogSegment(op.txid, false); 1117 segmentOpen = true; 1118 } 1119 1120 newSharedEditLog.logEdit(op); 1121 1122 if (op.opCode == FSEditLogOpCodes.OP_END_LOG_SEGMENT) { 1123 newSharedEditLog.logSync(); 1124 newSharedEditLog.endCurrentLogSegment(false); 1125 LOG.debug("ending log segment because of END_LOG_SEGMENT op in " 1126 + stream); 1127 segmentOpen = false; 1128 } 1129 } 1130 1131 if (segmentOpen) { 1132 LOG.debug("ending log segment because of end of stream in " + stream); 1133 newSharedEditLog.logSync(); 1134 newSharedEditLog.endCurrentLogSegment(false); 1135 segmentOpen = false; 1136 } 1137 } 1138 } finally { 1139 if (streams != null) { 1140 FSEditLog.closeAllStreams(streams); 1141 } 1142 } 1143 } 1144 1145 @VisibleForTesting 1146 public static boolean doRollback(Configuration conf, 1147 boolean isConfirmationNeeded) throws IOException { 1148 String nsId = DFSUtil.getNamenodeNameServiceId(conf); 1149 String namenodeId = HAUtil.getNameNodeId(conf, nsId); 1150 initializeGenericKeys(conf, nsId, namenodeId); 1151 1152 FSNamesystem nsys = new FSNamesystem(conf, new FSImage(conf)); 1153 System.err.print( 1154 "\"rollBack\" will remove the current state of the file system,\n" 1155 + "returning you to the state prior to initiating your recent.\n" 1156 + "upgrade. This action is permanent and cannot be undone. If you\n" 1157 + "are performing a rollback in an HA environment, you should be\n" 1158 + "certain that no NameNode process is running on any host."); 1159 if (isConfirmationNeeded) { 1160 if (!confirmPrompt("Roll back file system state?")) { 1161 System.err.println("Rollback aborted."); 1162 return true; 1163 } 1164 } 1165 nsys.getFSImage().doRollback(nsys); 1166 return false; 1167 } 1168 1169 private static void printUsage(PrintStream out) { 1170 out.println(USAGE + "\n"); 1171 } 1172 1173 @VisibleForTesting 1174 static StartupOption parseArguments(String args[]) { 1175 int argsLen = (args == null) ? 0 : args.length; 1176 StartupOption startOpt = StartupOption.REGULAR; 1177 for(int i=0; i < argsLen; i++) { 1178 String cmd = args[i]; 1179 if (StartupOption.FORMAT.getName().equalsIgnoreCase(cmd)) { 1180 startOpt = StartupOption.FORMAT; 1181 for (i = i + 1; i < argsLen; i++) { 1182 if (args[i].equalsIgnoreCase(StartupOption.CLUSTERID.getName())) { 1183 i++; 1184 if (i >= argsLen) { 1185 // if no cluster id specified, return null 1186 LOG.fatal("Must specify a valid cluster ID after the " 1187 + StartupOption.CLUSTERID.getName() + " flag"); 1188 return null; 1189 } 1190 String clusterId = args[i]; 1191 // Make sure an id is specified and not another flag 1192 if (clusterId.isEmpty() || 1193 clusterId.equalsIgnoreCase(StartupOption.FORCE.getName()) || 1194 clusterId.equalsIgnoreCase( 1195 StartupOption.NONINTERACTIVE.getName())) { 1196 LOG.fatal("Must specify a valid cluster ID after the " 1197 + StartupOption.CLUSTERID.getName() + " flag"); 1198 return null; 1199 } 1200 startOpt.setClusterId(clusterId); 1201 } 1202 1203 if (args[i].equalsIgnoreCase(StartupOption.FORCE.getName())) { 1204 startOpt.setForceFormat(true); 1205 } 1206 1207 if (args[i].equalsIgnoreCase(StartupOption.NONINTERACTIVE.getName())) { 1208 startOpt.setInteractiveFormat(false); 1209 } 1210 } 1211 } else if (StartupOption.GENCLUSTERID.getName().equalsIgnoreCase(cmd)) { 1212 startOpt = StartupOption.GENCLUSTERID; 1213 } else if (StartupOption.REGULAR.getName().equalsIgnoreCase(cmd)) { 1214 startOpt = StartupOption.REGULAR; 1215 } else if (StartupOption.BACKUP.getName().equalsIgnoreCase(cmd)) { 1216 startOpt = StartupOption.BACKUP; 1217 } else if (StartupOption.CHECKPOINT.getName().equalsIgnoreCase(cmd)) { 1218 startOpt = StartupOption.CHECKPOINT; 1219 } else if (StartupOption.UPGRADE.getName().equalsIgnoreCase(cmd) 1220 || StartupOption.UPGRADEONLY.getName().equalsIgnoreCase(cmd)) { 1221 startOpt = StartupOption.UPGRADE.getName().equalsIgnoreCase(cmd) ? 1222 StartupOption.UPGRADE : StartupOption.UPGRADEONLY; 1223 /* Can be followed by CLUSTERID with a required parameter or 1224 * RENAMERESERVED with an optional parameter 1225 */ 1226 while (i + 1 < argsLen) { 1227 String flag = args[i + 1]; 1228 if (flag.equalsIgnoreCase(StartupOption.CLUSTERID.getName())) { 1229 if (i + 2 < argsLen) { 1230 i += 2; 1231 startOpt.setClusterId(args[i]); 1232 } else { 1233 LOG.fatal("Must specify a valid cluster ID after the " 1234 + StartupOption.CLUSTERID.getName() + " flag"); 1235 return null; 1236 } 1237 } else if (flag.equalsIgnoreCase(StartupOption.RENAMERESERVED 1238 .getName())) { 1239 if (i + 2 < argsLen) { 1240 FSImageFormat.setRenameReservedPairs(args[i + 2]); 1241 i += 2; 1242 } else { 1243 FSImageFormat.useDefaultRenameReservedPairs(); 1244 i += 1; 1245 } 1246 } else { 1247 LOG.fatal("Unknown upgrade flag " + flag); 1248 return null; 1249 } 1250 } 1251 } else if (StartupOption.ROLLINGUPGRADE.getName().equalsIgnoreCase(cmd)) { 1252 startOpt = StartupOption.ROLLINGUPGRADE; 1253 ++i; 1254 if (i >= argsLen) { 1255 LOG.fatal("Must specify a rolling upgrade startup option " 1256 + RollingUpgradeStartupOption.getAllOptionString()); 1257 return null; 1258 } 1259 startOpt.setRollingUpgradeStartupOption(args[i]); 1260 } else if (StartupOption.ROLLBACK.getName().equalsIgnoreCase(cmd)) { 1261 startOpt = StartupOption.ROLLBACK; 1262 } else if (StartupOption.FINALIZE.getName().equalsIgnoreCase(cmd)) { 1263 startOpt = StartupOption.FINALIZE; 1264 } else if (StartupOption.IMPORT.getName().equalsIgnoreCase(cmd)) { 1265 startOpt = StartupOption.IMPORT; 1266 } else if (StartupOption.BOOTSTRAPSTANDBY.getName().equalsIgnoreCase(cmd)) { 1267 startOpt = StartupOption.BOOTSTRAPSTANDBY; 1268 return startOpt; 1269 } else if (StartupOption.INITIALIZESHAREDEDITS.getName().equalsIgnoreCase(cmd)) { 1270 startOpt = StartupOption.INITIALIZESHAREDEDITS; 1271 for (i = i + 1 ; i < argsLen; i++) { 1272 if (StartupOption.NONINTERACTIVE.getName().equals(args[i])) { 1273 startOpt.setInteractiveFormat(false); 1274 } else if (StartupOption.FORCE.getName().equals(args[i])) { 1275 startOpt.setForceFormat(true); 1276 } else { 1277 LOG.fatal("Invalid argument: " + args[i]); 1278 return null; 1279 } 1280 } 1281 return startOpt; 1282 } else if (StartupOption.RECOVER.getName().equalsIgnoreCase(cmd)) { 1283 if (startOpt != StartupOption.REGULAR) { 1284 throw new RuntimeException("Can't combine -recover with " + 1285 "other startup options."); 1286 } 1287 startOpt = StartupOption.RECOVER; 1288 while (++i < argsLen) { 1289 if (args[i].equalsIgnoreCase( 1290 StartupOption.FORCE.getName())) { 1291 startOpt.setForce(MetaRecoveryContext.FORCE_FIRST_CHOICE); 1292 } else { 1293 throw new RuntimeException("Error parsing recovery options: " + 1294 "can't understand option \"" + args[i] + "\""); 1295 } 1296 } 1297 } else if (StartupOption.METADATAVERSION.getName().equalsIgnoreCase(cmd)) { 1298 startOpt = StartupOption.METADATAVERSION; 1299 } else { 1300 return null; 1301 } 1302 } 1303 return startOpt; 1304 } 1305 1306 private static void setStartupOption(Configuration conf, StartupOption opt) { 1307 conf.set(DFS_NAMENODE_STARTUP_KEY, opt.name()); 1308 } 1309 1310 static StartupOption getStartupOption(Configuration conf) { 1311 return StartupOption.valueOf(conf.get(DFS_NAMENODE_STARTUP_KEY, 1312 StartupOption.REGULAR.toString())); 1313 } 1314 1315 private static void doRecovery(StartupOption startOpt, Configuration conf) 1316 throws IOException { 1317 String nsId = DFSUtil.getNamenodeNameServiceId(conf); 1318 String namenodeId = HAUtil.getNameNodeId(conf, nsId); 1319 initializeGenericKeys(conf, nsId, namenodeId); 1320 if (startOpt.getForce() < MetaRecoveryContext.FORCE_ALL) { 1321 if (!confirmPrompt("You have selected Metadata Recovery mode. " + 1322 "This mode is intended to recover lost metadata on a corrupt " + 1323 "filesystem. Metadata recovery mode often permanently deletes " + 1324 "data from your HDFS filesystem. Please back up your edit log " + 1325 "and fsimage before trying this!\n\n" + 1326 "Are you ready to proceed? (Y/N)\n")) { 1327 System.err.println("Recovery aborted at user request.\n"); 1328 return; 1329 } 1330 } 1331 MetaRecoveryContext.LOG.info("starting recovery..."); 1332 UserGroupInformation.setConfiguration(conf); 1333 NameNode.initMetrics(conf, startOpt.toNodeRole()); 1334 FSNamesystem fsn = null; 1335 try { 1336 fsn = FSNamesystem.loadFromDisk(conf); 1337 fsn.getFSImage().saveNamespace(fsn); 1338 MetaRecoveryContext.LOG.info("RECOVERY COMPLETE"); 1339 } catch (IOException e) { 1340 MetaRecoveryContext.LOG.info("RECOVERY FAILED: caught exception", e); 1341 throw e; 1342 } catch (RuntimeException e) { 1343 MetaRecoveryContext.LOG.info("RECOVERY FAILED: caught exception", e); 1344 throw e; 1345 } finally { 1346 if (fsn != null) 1347 fsn.close(); 1348 } 1349 } 1350 1351 /** 1352 * Verify that configured directories exist, then print the metadata versions 1353 * of the software and the image. 1354 * 1355 * @param conf configuration to use 1356 * @throws IOException 1357 */ 1358 private static boolean printMetadataVersion(Configuration conf) 1359 throws IOException { 1360 final String nsId = DFSUtil.getNamenodeNameServiceId(conf); 1361 final String namenodeId = HAUtil.getNameNodeId(conf, nsId); 1362 NameNode.initializeGenericKeys(conf, nsId, namenodeId); 1363 final FSImage fsImage = new FSImage(conf); 1364 final FSNamesystem fs = new FSNamesystem(conf, fsImage, false); 1365 return fsImage.recoverTransitionRead( 1366 StartupOption.METADATAVERSION, fs, null); 1367 } 1368 1369 public static NameNode createNameNode(String argv[], Configuration conf) 1370 throws IOException { 1371 LOG.info("createNameNode " + Arrays.asList(argv)); 1372 if (conf == null) 1373 conf = new HdfsConfiguration(); 1374 // Parse out some generic args into Configuration. 1375 GenericOptionsParser hParser = new GenericOptionsParser(conf, argv); 1376 argv = hParser.getRemainingArgs(); 1377 // Parse the rest, NN specific args. 1378 StartupOption startOpt = parseArguments(argv); 1379 if (startOpt == null) { 1380 printUsage(System.err); 1381 return null; 1382 } 1383 setStartupOption(conf, startOpt); 1384 1385 switch (startOpt) { 1386 case FORMAT: { 1387 boolean aborted = format(conf, startOpt.getForceFormat(), 1388 startOpt.getInteractiveFormat()); 1389 terminate(aborted ? 1 : 0); 1390 return null; // avoid javac warning 1391 } 1392 case GENCLUSTERID: { 1393 System.err.println("Generating new cluster id:"); 1394 System.out.println(NNStorage.newClusterID()); 1395 terminate(0); 1396 return null; 1397 } 1398 case FINALIZE: { 1399 System.err.println("Use of the argument '" + StartupOption.FINALIZE + 1400 "' is no longer supported. To finalize an upgrade, start the NN " + 1401 " and then run `hdfs dfsadmin -finalizeUpgrade'"); 1402 terminate(1); 1403 return null; // avoid javac warning 1404 } 1405 case ROLLBACK: { 1406 boolean aborted = doRollback(conf, true); 1407 terminate(aborted ? 1 : 0); 1408 return null; // avoid warning 1409 } 1410 case BOOTSTRAPSTANDBY: { 1411 String toolArgs[] = Arrays.copyOfRange(argv, 1, argv.length); 1412 int rc = BootstrapStandby.run(toolArgs, conf); 1413 terminate(rc); 1414 return null; // avoid warning 1415 } 1416 case INITIALIZESHAREDEDITS: { 1417 boolean aborted = initializeSharedEdits(conf, 1418 startOpt.getForceFormat(), 1419 startOpt.getInteractiveFormat()); 1420 terminate(aborted ? 1 : 0); 1421 return null; // avoid warning 1422 } 1423 case BACKUP: 1424 case CHECKPOINT: { 1425 NamenodeRole role = startOpt.toNodeRole(); 1426 DefaultMetricsSystem.initialize(role.toString().replace(" ", "")); 1427 return new BackupNode(conf, role); 1428 } 1429 case RECOVER: { 1430 NameNode.doRecovery(startOpt, conf); 1431 return null; 1432 } 1433 case METADATAVERSION: { 1434 printMetadataVersion(conf); 1435 terminate(0); 1436 return null; // avoid javac warning 1437 } 1438 case UPGRADEONLY: { 1439 DefaultMetricsSystem.initialize("NameNode"); 1440 new NameNode(conf); 1441 terminate(0); 1442 return null; 1443 } 1444 default: { 1445 DefaultMetricsSystem.initialize("NameNode"); 1446 return new NameNode(conf); 1447 } 1448 } 1449 } 1450 1451 /** 1452 * In federation configuration is set for a set of 1453 * namenode and secondary namenode/backup/checkpointer, which are 1454 * grouped under a logical nameservice ID. The configuration keys specific 1455 * to them have suffix set to configured nameserviceId. 1456 * 1457 * This method copies the value from specific key of format key.nameserviceId 1458 * to key, to set up the generic configuration. Once this is done, only 1459 * generic version of the configuration is read in rest of the code, for 1460 * backward compatibility and simpler code changes. 1461 * 1462 * @param conf 1463 * Configuration object to lookup specific key and to set the value 1464 * to the key passed. Note the conf object is modified 1465 * @param nameserviceId name service Id (to distinguish federated NNs) 1466 * @param namenodeId the namenode ID (to distinguish HA NNs) 1467 * @see DFSUtil#setGenericConf(Configuration, String, String, String...) 1468 */ 1469 public static void initializeGenericKeys(Configuration conf, 1470 String nameserviceId, String namenodeId) { 1471 if ((nameserviceId != null && !nameserviceId.isEmpty()) || 1472 (namenodeId != null && !namenodeId.isEmpty())) { 1473 if (nameserviceId != null) { 1474 conf.set(DFS_NAMESERVICE_ID, nameserviceId); 1475 } 1476 if (namenodeId != null) { 1477 conf.set(DFS_HA_NAMENODE_ID_KEY, namenodeId); 1478 } 1479 1480 DFSUtil.setGenericConf(conf, nameserviceId, namenodeId, 1481 NAMENODE_SPECIFIC_KEYS); 1482 DFSUtil.setGenericConf(conf, nameserviceId, null, 1483 NAMESERVICE_SPECIFIC_KEYS); 1484 } 1485 1486 // If the RPC address is set use it to (re-)configure the default FS 1487 if (conf.get(DFS_NAMENODE_RPC_ADDRESS_KEY) != null) { 1488 URI defaultUri = URI.create(HdfsConstants.HDFS_URI_SCHEME + "://" 1489 + conf.get(DFS_NAMENODE_RPC_ADDRESS_KEY)); 1490 conf.set(FS_DEFAULT_NAME_KEY, defaultUri.toString()); 1491 LOG.debug("Setting " + FS_DEFAULT_NAME_KEY + " to " + defaultUri.toString()); 1492 } 1493 } 1494 1495 /** 1496 * Get the name service Id for the node 1497 * @return name service Id or null if federation is not configured 1498 */ 1499 protected String getNameServiceId(Configuration conf) { 1500 return DFSUtil.getNamenodeNameServiceId(conf); 1501 } 1502 1503 /** 1504 */ 1505 public static void main(String argv[]) throws Exception { 1506 if (DFSUtil.parseHelpArgument(argv, NameNode.USAGE, System.out, true)) { 1507 System.exit(0); 1508 } 1509 1510 try { 1511 StringUtils.startupShutdownMessage(NameNode.class, argv, LOG); 1512 NameNode namenode = createNameNode(argv, null); 1513 if (namenode != null) { 1514 namenode.join(); 1515 } 1516 } catch (Throwable e) { 1517 LOG.fatal("Failed to start namenode.", e); 1518 terminate(1, e); 1519 } 1520 } 1521 1522 synchronized void monitorHealth() 1523 throws HealthCheckFailedException, AccessControlException { 1524 namesystem.checkSuperuserPrivilege(); 1525 if (!haEnabled) { 1526 return; // no-op, if HA is not enabled 1527 } 1528 getNamesystem().checkAvailableResources(); 1529 if (!getNamesystem().nameNodeHasResourcesAvailable()) { 1530 throw new HealthCheckFailedException( 1531 "The NameNode has no resources available"); 1532 } 1533 } 1534 1535 synchronized void transitionToActive() 1536 throws ServiceFailedException, AccessControlException { 1537 namesystem.checkSuperuserPrivilege(); 1538 if (!haEnabled) { 1539 throw new ServiceFailedException("HA for namenode is not enabled"); 1540 } 1541 state.setState(haContext, ACTIVE_STATE); 1542 } 1543 1544 synchronized void transitionToStandby() 1545 throws ServiceFailedException, AccessControlException { 1546 namesystem.checkSuperuserPrivilege(); 1547 if (!haEnabled) { 1548 throw new ServiceFailedException("HA for namenode is not enabled"); 1549 } 1550 state.setState(haContext, STANDBY_STATE); 1551 } 1552 1553 synchronized HAServiceStatus getServiceStatus() 1554 throws ServiceFailedException, AccessControlException { 1555 namesystem.checkSuperuserPrivilege(); 1556 if (!haEnabled) { 1557 throw new ServiceFailedException("HA for namenode is not enabled"); 1558 } 1559 if (state == null) { 1560 return new HAServiceStatus(HAServiceState.INITIALIZING); 1561 } 1562 HAServiceState retState = state.getServiceState(); 1563 HAServiceStatus ret = new HAServiceStatus(retState); 1564 if (retState == HAServiceState.STANDBY) { 1565 String safemodeTip = namesystem.getSafeModeTip(); 1566 if (!safemodeTip.isEmpty()) { 1567 ret.setNotReadyToBecomeActive( 1568 "The NameNode is in safemode. " + 1569 safemodeTip); 1570 } else { 1571 ret.setReadyToBecomeActive(); 1572 } 1573 } else if (retState == HAServiceState.ACTIVE) { 1574 ret.setReadyToBecomeActive(); 1575 } else { 1576 ret.setNotReadyToBecomeActive("State is " + state); 1577 } 1578 return ret; 1579 } 1580 1581 synchronized HAServiceState getServiceState() { 1582 if (state == null) { 1583 return HAServiceState.INITIALIZING; 1584 } 1585 return state.getServiceState(); 1586 } 1587 1588 /** 1589 * Register NameNodeStatusMXBean 1590 */ 1591 private void registerNNSMXBean() { 1592 nameNodeStatusBeanName = MBeans.register("NameNode", "NameNodeStatus", this); 1593 } 1594 1595 @Override // NameNodeStatusMXBean 1596 public String getNNRole() { 1597 String roleStr = ""; 1598 NamenodeRole role = getRole(); 1599 if (null != role) { 1600 roleStr = role.toString(); 1601 } 1602 return roleStr; 1603 } 1604 1605 @Override // NameNodeStatusMXBean 1606 public String getState() { 1607 String servStateStr = ""; 1608 HAServiceState servState = getServiceState(); 1609 if (null != servState) { 1610 servStateStr = servState.toString(); 1611 } 1612 return servStateStr; 1613 } 1614 1615 @Override // NameNodeStatusMXBean 1616 public String getHostAndPort() { 1617 return getNameNodeAddressHostPortString(); 1618 } 1619 1620 @Override // NameNodeStatusMXBean 1621 public boolean isSecurityEnabled() { 1622 return UserGroupInformation.isSecurityEnabled(); 1623 } 1624 1625 /** 1626 * Shutdown the NN immediately in an ungraceful way. Used when it would be 1627 * unsafe for the NN to continue operating, e.g. during a failed HA state 1628 * transition. 1629 * 1630 * @param t exception which warrants the shutdown. Printed to the NN log 1631 * before exit. 1632 * @throws ExitException thrown only for testing. 1633 */ 1634 protected synchronized void doImmediateShutdown(Throwable t) 1635 throws ExitException { 1636 String message = "Error encountered requiring NN shutdown. " + 1637 "Shutting down immediately."; 1638 try { 1639 LOG.fatal(message, t); 1640 } catch (Throwable ignored) { 1641 // This is unlikely to happen, but there's nothing we can do if it does. 1642 } 1643 terminate(1, t); 1644 } 1645 1646 /** 1647 * Class used to expose {@link NameNode} as context to {@link HAState} 1648 */ 1649 protected class NameNodeHAContext implements HAContext { 1650 @Override 1651 public void setState(HAState s) { 1652 state = s; 1653 } 1654 1655 @Override 1656 public HAState getState() { 1657 return state; 1658 } 1659 1660 @Override 1661 public void startActiveServices() throws IOException { 1662 try { 1663 namesystem.startActiveServices(); 1664 startTrashEmptier(conf); 1665 } catch (Throwable t) { 1666 doImmediateShutdown(t); 1667 } 1668 } 1669 1670 @Override 1671 public void stopActiveServices() throws IOException { 1672 try { 1673 if (namesystem != null) { 1674 namesystem.stopActiveServices(); 1675 } 1676 stopTrashEmptier(); 1677 } catch (Throwable t) { 1678 doImmediateShutdown(t); 1679 } 1680 } 1681 1682 @Override 1683 public void startStandbyServices() throws IOException { 1684 try { 1685 namesystem.startStandbyServices(conf); 1686 } catch (Throwable t) { 1687 doImmediateShutdown(t); 1688 } 1689 } 1690 1691 @Override 1692 public void prepareToStopStandbyServices() throws ServiceFailedException { 1693 try { 1694 namesystem.prepareToStopStandbyServices(); 1695 } catch (Throwable t) { 1696 doImmediateShutdown(t); 1697 } 1698 } 1699 1700 @Override 1701 public void stopStandbyServices() throws IOException { 1702 try { 1703 if (namesystem != null) { 1704 namesystem.stopStandbyServices(); 1705 } 1706 } catch (Throwable t) { 1707 doImmediateShutdown(t); 1708 } 1709 } 1710 1711 @Override 1712 public void writeLock() { 1713 namesystem.writeLock(); 1714 namesystem.lockRetryCache(); 1715 } 1716 1717 @Override 1718 public void writeUnlock() { 1719 namesystem.unlockRetryCache(); 1720 namesystem.writeUnlock(); 1721 } 1722 1723 /** Check if an operation of given category is allowed */ 1724 @Override 1725 public void checkOperation(final OperationCategory op) 1726 throws StandbyException { 1727 state.checkOperation(haContext, op); 1728 } 1729 1730 @Override 1731 public boolean allowStaleReads() { 1732 return allowStaleStandbyReads; 1733 } 1734 1735 } 1736 1737 public boolean isStandbyState() { 1738 return (state.equals(STANDBY_STATE)); 1739 } 1740 1741 public boolean isActiveState() { 1742 return (state.equals(ACTIVE_STATE)); 1743 } 1744 1745 /** 1746 * Returns whether the NameNode is completely started 1747 */ 1748 boolean isStarted() { 1749 return this.started.get(); 1750 } 1751 1752 /** 1753 * Check that a request to change this node's HA state is valid. 1754 * In particular, verifies that, if auto failover is enabled, non-forced 1755 * requests from the HAAdmin CLI are rejected, and vice versa. 1756 * 1757 * @param req the request to check 1758 * @throws AccessControlException if the request is disallowed 1759 */ 1760 void checkHaStateChange(StateChangeRequestInfo req) 1761 throws AccessControlException { 1762 boolean autoHaEnabled = conf.getBoolean(DFS_HA_AUTO_FAILOVER_ENABLED_KEY, 1763 DFS_HA_AUTO_FAILOVER_ENABLED_DEFAULT); 1764 switch (req.getSource()) { 1765 case REQUEST_BY_USER: 1766 if (autoHaEnabled) { 1767 throw new AccessControlException( 1768 "Manual HA control for this NameNode is disallowed, because " + 1769 "automatic HA is enabled."); 1770 } 1771 break; 1772 case REQUEST_BY_USER_FORCED: 1773 if (autoHaEnabled) { 1774 LOG.warn("Allowing manual HA control from " + 1775 Server.getRemoteAddress() + 1776 " even though automatic HA is enabled, because the user " + 1777 "specified the force flag"); 1778 } 1779 break; 1780 case REQUEST_BY_ZKFC: 1781 if (!autoHaEnabled) { 1782 throw new AccessControlException( 1783 "Request from ZK failover controller at " + 1784 Server.getRemoteAddress() + " denied since automatic HA " + 1785 "is not enabled"); 1786 } 1787 break; 1788 } 1789 } 1790}