001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hdfs.server.datanode.fsdataset;
019
020
021import java.io.EOFException;
022import java.io.File;
023import java.io.FileDescriptor;
024import java.io.FileNotFoundException;
025import java.io.IOException;
026import java.io.InputStream;
027import java.util.Collection;
028import java.util.List;
029import java.util.Map;
030
031import org.apache.hadoop.classification.InterfaceAudience;
032import org.apache.hadoop.conf.Configuration;
033import org.apache.hadoop.hdfs.DFSConfigKeys;
034import org.apache.hadoop.hdfs.StorageType;
035import org.apache.hadoop.hdfs.protocol.Block;
036import org.apache.hadoop.hdfs.protocol.BlockListAsLongs;
037import org.apache.hadoop.hdfs.protocol.BlockLocalPathInfo;
038import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
039import org.apache.hadoop.hdfs.protocol.HdfsBlocksMetadata;
040import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.ReplicaState;
041import org.apache.hadoop.hdfs.server.datanode.DataNode;
042import org.apache.hadoop.hdfs.server.datanode.DataStorage;
043import org.apache.hadoop.hdfs.server.datanode.FinalizedReplica;
044import org.apache.hadoop.hdfs.server.datanode.Replica;
045import org.apache.hadoop.hdfs.server.datanode.ReplicaInPipelineInterface;
046import org.apache.hadoop.hdfs.server.datanode.ReplicaNotFoundException;
047import org.apache.hadoop.hdfs.server.datanode.StorageLocation;
048import org.apache.hadoop.hdfs.server.datanode.UnexpectedReplicaStateException;
049import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetFactory;
050import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsVolumeImpl;
051import org.apache.hadoop.hdfs.server.datanode.metrics.FSDatasetMBean;
052import org.apache.hadoop.hdfs.server.protocol.BlockRecoveryCommand.RecoveringBlock;
053import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage;
054import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
055import org.apache.hadoop.hdfs.server.protocol.ReplicaRecoveryInfo;
056import org.apache.hadoop.hdfs.server.protocol.StorageReport;
057import org.apache.hadoop.util.DiskChecker.DiskErrorException;
058import org.apache.hadoop.util.ReflectionUtils;
059
060/**
061 * This is a service provider interface for the underlying storage that
062 * stores replicas for a data node.
063 * The default implementation stores replicas on local drives. 
064 */
065@InterfaceAudience.Private
066public interface FsDatasetSpi<V extends FsVolumeSpi> extends FSDatasetMBean {
067  /**
068   * A factory for creating {@link FsDatasetSpi} objects.
069   */
070  public static abstract class Factory<D extends FsDatasetSpi<?>> {
071    /** @return the configured factory. */
072    public static Factory<?> getFactory(Configuration conf) {
073      @SuppressWarnings("rawtypes")
074      final Class<? extends Factory> clazz = conf.getClass(
075          DFSConfigKeys.DFS_DATANODE_FSDATASET_FACTORY_KEY,
076          FsDatasetFactory.class,
077          Factory.class);
078      return ReflectionUtils.newInstance(clazz, conf);
079    }
080
081    /** Create a new object. */
082    public abstract D newInstance(DataNode datanode, DataStorage storage,
083        Configuration conf) throws IOException;
084
085    /** Does the factory create simulated objects? */
086    public boolean isSimulated() {
087      return false;
088    }
089  }
090
091  /**
092   * Create rolling logs.
093   *
094   * @param prefix the prefix of the log names.
095   * @return rolling logs
096   */
097  public RollingLogs createRollingLogs(String bpid, String prefix
098      ) throws IOException;
099
100  /** @return a list of volumes. */
101  public List<V> getVolumes();
102
103  /** Add an array of StorageLocation to FsDataset. */
104  public void addVolume(
105      final StorageLocation location,
106      final List<NamespaceInfo> nsInfos) throws IOException;
107
108  /** Removes a collection of volumes from FsDataset. */
109  public void removeVolumes(Collection<StorageLocation> volumes);
110
111  /** @return a storage with the given storage ID */
112  public DatanodeStorage getStorage(final String storageUuid);
113
114  /** @return one or more storage reports for attached volumes. */
115  public StorageReport[] getStorageReports(String bpid)
116      throws IOException;
117
118  /** @return the volume that contains a replica of the block. */
119  public V getVolume(ExtendedBlock b);
120
121  /** @return a volume information map (name => info). */
122  public Map<String, Object> getVolumeInfoMap();
123
124  /** @return a list of finalized blocks for the given block pool. */
125  public List<FinalizedReplica> getFinalizedBlocks(String bpid);
126
127  /** @return a list of finalized blocks for the given block pool. */
128  public List<FinalizedReplica> getFinalizedBlocksOnPersistentStorage(String bpid);
129
130  /**
131   * Check whether the in-memory block record matches the block on the disk,
132   * and, in case that they are not matched, update the record or mark it
133   * as corrupted.
134   */
135  public void checkAndUpdate(String bpid, long blockId, File diskFile,
136      File diskMetaFile, FsVolumeSpi vol) throws IOException;
137
138  /**
139   * @param b - the block
140   * @return a stream if the meta-data of the block exists;
141   *         otherwise, return null.
142   * @throws IOException
143   */
144  public LengthInputStream getMetaDataInputStream(ExtendedBlock b
145      ) throws IOException;
146
147  /**
148   * Returns the specified block's on-disk length (excluding metadata)
149   * @return   the specified block's on-disk length (excluding metadta)
150   * @throws IOException on error
151   */
152  public long getLength(ExtendedBlock b) throws IOException;
153
154  /**
155   * Get reference to the replica meta info in the replicasMap. 
156   * To be called from methods that are synchronized on {@link FSDataset}
157   * @return replica from the replicas map
158   */
159  @Deprecated
160  public Replica getReplica(String bpid, long blockId);
161
162  /**
163   * @return replica meta information
164   */
165  public String getReplicaString(String bpid, long blockId);
166
167  /**
168   * @return the generation stamp stored with the block.
169   */
170  public Block getStoredBlock(String bpid, long blkid) throws IOException;
171  
172  /**
173   * Returns an input stream at specified offset of the specified block
174   * @param b block
175   * @param seekOffset offset with in the block to seek to
176   * @return an input stream to read the contents of the specified block,
177   *  starting at the offset
178   * @throws IOException
179   */
180  public InputStream getBlockInputStream(ExtendedBlock b, long seekOffset)
181            throws IOException;
182
183  /**
184   * Returns an input stream at specified offset of the specified block
185   * The block is still in the tmp directory and is not finalized
186   * @return an input stream to read the contents of the specified block,
187   *  starting at the offset
188   * @throws IOException
189   */
190  public ReplicaInputStreams getTmpInputStreams(ExtendedBlock b, long blkoff,
191      long ckoff) throws IOException;
192
193  /**
194   * Creates a temporary replica and returns the meta information of the replica
195   * 
196   * @param b block
197   * @return the meta info of the replica which is being written to
198   * @throws IOException if an error occurs
199   */
200  public ReplicaInPipelineInterface createTemporary(StorageType storageType,
201      ExtendedBlock b) throws IOException;
202
203  /**
204   * Creates a RBW replica and returns the meta info of the replica
205   * 
206   * @param b block
207   * @return the meta info of the replica which is being written to
208   * @throws IOException if an error occurs
209   */
210  public ReplicaInPipelineInterface createRbw(StorageType storageType,
211      ExtendedBlock b, boolean allowLazyPersist) throws IOException;
212
213  /**
214   * Recovers a RBW replica and returns the meta info of the replica
215   * 
216   * @param b block
217   * @param newGS the new generation stamp for the replica
218   * @param minBytesRcvd the minimum number of bytes that the replica could have
219   * @param maxBytesRcvd the maximum number of bytes that the replica could have
220   * @return the meta info of the replica which is being written to
221   * @throws IOException if an error occurs
222   */
223  public ReplicaInPipelineInterface recoverRbw(ExtendedBlock b, 
224      long newGS, long minBytesRcvd, long maxBytesRcvd) throws IOException;
225
226  /**
227   * Covert a temporary replica to a RBW.
228   * @param temporary the temporary replica being converted
229   * @return the result RBW
230   */
231  public ReplicaInPipelineInterface convertTemporaryToRbw(
232      ExtendedBlock temporary) throws IOException;
233
234  /**
235   * Append to a finalized replica and returns the meta info of the replica
236   * 
237   * @param b block
238   * @param newGS the new generation stamp for the replica
239   * @param expectedBlockLen the number of bytes the replica is expected to have
240   * @return the meata info of the replica which is being written to
241   * @throws IOException
242   */
243  public ReplicaInPipelineInterface append(ExtendedBlock b, long newGS,
244      long expectedBlockLen) throws IOException;
245
246  /**
247   * Recover a failed append to a finalized replica
248   * and returns the meta info of the replica
249   * 
250   * @param b block
251   * @param newGS the new generation stamp for the replica
252   * @param expectedBlockLen the number of bytes the replica is expected to have
253   * @return the meta info of the replica which is being written to
254   * @throws IOException
255   */
256  public ReplicaInPipelineInterface recoverAppend(ExtendedBlock b, long newGS,
257      long expectedBlockLen) throws IOException;
258  
259  /**
260   * Recover a failed pipeline close
261   * It bumps the replica's generation stamp and finalize it if RBW replica
262   * 
263   * @param b block
264   * @param newGS the new generation stamp for the replica
265   * @param expectedBlockLen the number of bytes the replica is expected to have
266   * @return the storage uuid of the replica.
267   * @throws IOException
268   */
269  public String recoverClose(ExtendedBlock b, long newGS, long expectedBlockLen
270      ) throws IOException;
271  
272  /**
273   * Finalizes the block previously opened for writing using writeToBlock.
274   * The block size is what is in the parameter b and it must match the amount
275   *  of data written
276   * @throws IOException
277   */
278  public void finalizeBlock(ExtendedBlock b) throws IOException;
279
280  /**
281   * Unfinalizes the block previously opened for writing using writeToBlock.
282   * The temporary file associated with this block is deleted.
283   * @throws IOException
284   */
285  public void unfinalizeBlock(ExtendedBlock b) throws IOException;
286
287  /**
288   * Returns one block report per volume.
289   * @param bpid Block Pool Id
290   * @return - a map of DatanodeStorage to block report for the volume.
291   */
292  public Map<DatanodeStorage, BlockListAsLongs> getBlockReports(String bpid);
293
294  /**
295   * Returns the cache report - the full list of cached block IDs of a
296   * block pool.
297   * @param   bpid Block Pool Id
298   * @return  the cache report - the full list of cached block IDs.
299   */
300  public List<Long> getCacheReport(String bpid);
301
302  /** Does the dataset contain the block? */
303  public boolean contains(ExtendedBlock block);
304
305  /**
306   * Check if a block is valid.
307   *
308   * @param b           The block to check.
309   * @param minLength   The minimum length that the block must have.  May be 0.
310   * @param state       If this is null, it is ignored.  If it is non-null, we
311   *                        will check that the replica has this state.
312   *
313   * @throws ReplicaNotFoundException          If the replica is not found
314   *
315   * @throws UnexpectedReplicaStateException   If the replica is not in the 
316   *                                             expected state.
317   * @throws FileNotFoundException             If the block file is not found or there 
318   *                                              was an error locating it.
319   * @throws EOFException                      If the replica length is too short.
320   * 
321   * @throws IOException                       May be thrown from the methods called. 
322   */
323  public void checkBlock(ExtendedBlock b, long minLength, ReplicaState state)
324      throws ReplicaNotFoundException, UnexpectedReplicaStateException,
325      FileNotFoundException, EOFException, IOException;
326      
327  
328  /**
329   * Is the block valid?
330   * @return - true if the specified block is valid
331   */
332  public boolean isValidBlock(ExtendedBlock b);
333
334  /**
335   * Is the block a valid RBW?
336   * @return - true if the specified block is a valid RBW
337   */
338  public boolean isValidRbw(ExtendedBlock b);
339
340  /**
341   * Invalidates the specified blocks
342   * @param bpid Block pool Id
343   * @param invalidBlks - the blocks to be invalidated
344   * @throws IOException
345   */
346  public void invalidate(String bpid, Block invalidBlks[]) throws IOException;
347
348  /**
349   * Caches the specified blocks
350   * @param bpid Block pool id
351   * @param blockIds - block ids to cache
352   */
353  public void cache(String bpid, long[] blockIds);
354
355  /**
356   * Uncaches the specified blocks
357   * @param bpid Block pool id
358   * @param blockIds - blocks ids to uncache
359   */
360  public void uncache(String bpid, long[] blockIds);
361
362  /**
363   * Determine if the specified block is cached.
364   * @param bpid Block pool id
365   * @param blockIds - block id
366   * @return true if the block is cached
367   */
368  public boolean isCached(String bpid, long blockId);
369
370    /**
371     * Check if all the data directories are healthy
372     * @throws DiskErrorException
373     */
374  public void checkDataDir() throws DiskErrorException;
375
376  /**
377   * Shutdown the FSDataset
378   */
379  public void shutdown();
380
381  /**
382   * Sets the file pointer of the checksum stream so that the last checksum
383   * will be overwritten
384   * @param b block
385   * @param outs The streams for the data file and checksum file
386   * @param checksumSize number of bytes each checksum has
387   * @throws IOException
388   */
389  public void adjustCrcChannelPosition(ExtendedBlock b,
390      ReplicaOutputStreams outs, int checksumSize) throws IOException;
391
392  /**
393   * Checks how many valid storage volumes there are in the DataNode.
394   * @return true if more than the minimum number of valid volumes are left 
395   * in the FSDataSet.
396   */
397  public boolean hasEnoughResource();
398
399  /**
400   * Get visible length of the specified replica.
401   */
402  long getReplicaVisibleLength(final ExtendedBlock block) throws IOException;
403
404  /**
405   * Initialize a replica recovery.
406   * @return actual state of the replica on this data-node or 
407   * null if data-node does not have the replica.
408   */
409  public ReplicaRecoveryInfo initReplicaRecovery(RecoveringBlock rBlock
410      ) throws IOException;
411
412  /**
413   * Update replica's generation stamp and length and finalize it.
414   * @return the ID of storage that stores the block
415   */
416  public String updateReplicaUnderRecovery(ExtendedBlock oldBlock,
417      long recoveryId, long newLength) throws IOException;
418
419  /**
420   * add new block pool ID
421   * @param bpid Block pool Id
422   * @param conf Configuration
423   */
424  public void addBlockPool(String bpid, Configuration conf) throws IOException;
425  
426  /**
427   * Shutdown and remove the block pool from underlying storage.
428   * @param bpid Block pool Id to be removed
429   */
430  public void shutdownBlockPool(String bpid) ;
431  
432  /**
433   * Deletes the block pool directories. If force is false, directories are 
434   * deleted only if no block files exist for the block pool. If force 
435   * is true entire directory for the blockpool is deleted along with its
436   * contents.
437   * @param bpid BlockPool Id to be deleted.
438   * @param force If force is false, directories are deleted only if no
439   *        block files exist for the block pool, otherwise entire 
440   *        directory for the blockpool is deleted along with its contents.
441   * @throws IOException
442   */
443  public void deleteBlockPool(String bpid, boolean force) throws IOException;
444  
445  /**
446   * Get {@link BlockLocalPathInfo} for the given block.
447   */
448  public BlockLocalPathInfo getBlockLocalPathInfo(ExtendedBlock b
449      ) throws IOException;
450
451  /**
452   * Get a {@link HdfsBlocksMetadata} corresponding to the list of blocks in 
453   * <code>blocks</code>.
454   * 
455   * @param bpid pool to query
456   * @param blockIds List of block ids for which to return metadata
457   * @return metadata Metadata for the list of blocks
458   * @throws IOException
459   */
460  public HdfsBlocksMetadata getHdfsBlocksMetadata(String bpid,
461      long[] blockIds) throws IOException;
462
463  /**
464   * Enable 'trash' for the given dataset. When trash is enabled, files are
465   * moved to a separate trash directory instead of being deleted immediately.
466   * This can be useful for example during rolling upgrades.
467   */
468  public void enableTrash(String bpid);
469
470  /**
471   * Restore trash
472   */
473  public void restoreTrash(String bpid);
474
475  /**
476   * @return true when trash is enabled
477   */
478  public boolean trashEnabled(String bpid);
479
480  /**
481   * Create a marker file indicating that a rolling upgrade is in progress.
482   */
483  public void setRollingUpgradeMarker(String bpid) throws IOException;
484
485  /**
486   * Delete the rolling upgrade marker file if it exists.
487   * @param bpid
488   */
489  public void clearRollingUpgradeMarker(String bpid) throws IOException;
490
491  /**
492   * submit a sync_file_range request to AsyncDiskService
493   */
494  public void submitBackgroundSyncFileRangeRequest(final ExtendedBlock block,
495      final FileDescriptor fd, final long offset, final long nbytes,
496      final int flags);
497
498  /**
499   * Callback from RamDiskAsyncLazyPersistService upon async lazy persist task end
500   */
501   public void onCompleteLazyPersist(String bpId, long blockId,
502      long creationTime, File[] savedFiles, FsVolumeImpl targetVolume);
503
504   /**
505    * Callback from RamDiskAsyncLazyPersistService upon async lazy persist task fail
506    */
507   public void onFailLazyPersist(String bpId, long blockId);
508}