001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import java.io.IOException; 021import java.util.Collection; 022import java.util.List; 023import java.util.Optional; 024import java.util.OptionalInt; 025import java.util.OptionalLong; 026import java.util.function.Predicate; 027import java.util.function.ToLongFunction; 028import java.util.stream.Collectors; 029import org.apache.hadoop.conf.Configuration; 030import org.apache.hadoop.hbase.Cell; 031import org.apache.hadoop.hbase.CellComparator; 032import org.apache.hadoop.hbase.CellUtil; 033import org.apache.hadoop.hbase.CompoundConfiguration; 034import org.apache.hadoop.hbase.ExtendedCell; 035import org.apache.hadoop.hbase.HConstants; 036import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; 037import org.apache.hadoop.hbase.client.TableDescriptor; 038import org.apache.hadoop.hbase.io.hfile.HFile; 039import org.apache.hadoop.hbase.util.ChecksumType; 040import org.apache.yetus.audience.InterfaceAudience; 041import org.slf4j.Logger; 042import org.slf4j.LoggerFactory; 043 044/** 045 * Utility functions for region server storage layer. 046 */ 047@InterfaceAudience.Private 048public final class StoreUtils { 049 050 private static final Logger LOG = LoggerFactory.getLogger(StoreUtils.class); 051 052 private StoreUtils() { 053 } 054 055 /** 056 * Creates a deterministic hash code for store file collection. 057 */ 058 public static OptionalInt getDeterministicRandomSeed(Collection<HStoreFile> files) { 059 return files.stream().mapToInt(f -> f.getPath().getName().hashCode()).findFirst(); 060 } 061 062 /** 063 * Determines whether any files in the collection are references. 064 * @param files The files. 065 */ 066 public static boolean hasReferences(Collection<HStoreFile> files) { 067 // TODO: make sure that we won't pass null here in the future. 068 return files != null && files.stream().anyMatch(HStoreFile::isReference); 069 } 070 071 /** 072 * Gets lowest timestamp from candidate StoreFiles 073 */ 074 public static long getLowestTimestamp(Collection<HStoreFile> candidates) throws IOException { 075 long minTs = Long.MAX_VALUE; 076 for (HStoreFile storeFile : candidates) { 077 minTs = Math.min(minTs, storeFile.getModificationTimestamp()); 078 } 079 return minTs; 080 } 081 082 /** 083 * Gets the largest file (with reader) out of the list of files. 084 * @param candidates The files to choose from. 085 * @return The largest file; null if no file has a reader. 086 */ 087 static Optional<HStoreFile> getLargestFile(Collection<HStoreFile> candidates) { 088 return candidates.stream().filter(f -> f.getReader() != null) 089 .max((f1, f2) -> Long.compare(f1.getReader().length(), f2.getReader().length())); 090 } 091 092 /** 093 * Return the largest memstoreTS found across all storefiles in the given list. Store files that 094 * were created by a mapreduce bulk load are ignored, as they do not correspond to any specific 095 * put operation, and thus do not have a memstoreTS associated with them. 096 */ 097 public static OptionalLong getMaxMemStoreTSInList(Collection<HStoreFile> sfs) { 098 return sfs.stream().filter(sf -> !sf.isBulkLoadResult()).mapToLong(HStoreFile::getMaxMemStoreTS) 099 .max(); 100 } 101 102 /** 103 * Return the highest sequence ID found across all storefiles in the given list. 104 */ 105 public static OptionalLong getMaxSequenceIdInList(Collection<HStoreFile> sfs) { 106 return sfs.stream().mapToLong(HStoreFile::getMaxSequenceId).max(); 107 } 108 109 /** 110 * Gets the approximate mid-point of the given file that is optimal for use in splitting it. 111 * @param file the store file 112 * @param comparator Comparator used to compare KVs. 113 * @return The split point row, or null if splitting is not possible, or reader is null. 114 */ 115 static Optional<byte[]> getFileSplitPoint(HStoreFile file, CellComparator comparator) 116 throws IOException { 117 StoreFileReader reader = file.getReader(); 118 if (reader == null) { 119 LOG.warn("Storefile " + file + " Reader is null; cannot get split point"); 120 return Optional.empty(); 121 } 122 // Get first, last, and mid keys. Midkey is the key that starts block 123 // in middle of hfile. Has column and timestamp. Need to return just 124 // the row we want to split on as midkey. 125 Optional<ExtendedCell> optionalMidKey = reader.midKey(); 126 if (!optionalMidKey.isPresent()) { 127 return Optional.empty(); 128 } 129 Cell midKey = optionalMidKey.get(); 130 Cell firstKey = reader.getFirstKey().get(); 131 Cell lastKey = reader.getLastKey().get(); 132 // if the midkey is the same as the first or last keys, we cannot (ever) split this region. 133 if ( 134 comparator.compareRows(midKey, firstKey) == 0 || comparator.compareRows(midKey, lastKey) == 0 135 ) { 136 if (LOG.isDebugEnabled()) { 137 LOG.debug("cannot split {} because midkey is the same as first or last row", file); 138 } 139 return Optional.empty(); 140 } 141 return Optional.of(CellUtil.cloneRow(midKey)); 142 } 143 144 /** 145 * Gets the mid point of the largest file passed in as split point. 146 */ 147 static Optional<byte[]> getSplitPoint(Collection<HStoreFile> storefiles, 148 CellComparator comparator) throws IOException { 149 Optional<HStoreFile> largestFile = StoreUtils.getLargestFile(storefiles); 150 return largestFile.isPresent() 151 ? StoreUtils.getFileSplitPoint(largestFile.get(), comparator) 152 : Optional.empty(); 153 } 154 155 /** 156 * Returns the configured checksum algorithm. 157 * @param conf The configuration 158 * @return The checksum algorithm that is set in the configuration 159 */ 160 public static ChecksumType getChecksumType(Configuration conf) { 161 return ChecksumType.nameToType( 162 conf.get(HConstants.CHECKSUM_TYPE_NAME, ChecksumType.getDefaultChecksumType().getName())); 163 } 164 165 /** 166 * Returns the configured bytesPerChecksum value. 167 * @param conf The configuration 168 * @return The bytesPerChecksum that is set in the configuration 169 */ 170 public static int getBytesPerChecksum(Configuration conf) { 171 return conf.getInt(HConstants.BYTES_PER_CHECKSUM, HFile.DEFAULT_BYTES_PER_CHECKSUM); 172 } 173 174 public static Configuration createStoreConfiguration(Configuration conf, TableDescriptor td, 175 ColumnFamilyDescriptor cfd) { 176 // CompoundConfiguration will look for keys in reverse order of addition, so we'd 177 // add global config first, then table and cf overrides, then cf metadata. 178 return new CompoundConfiguration().add(conf).addBytesMap(td.getValues()) 179 .addStringMap(cfd.getConfiguration()).addBytesMap(cfd.getValues()); 180 } 181 182 public static List<StoreFileInfo> toStoreFileInfo(Collection<HStoreFile> storefiles) { 183 return storefiles.stream().map(HStoreFile::getFileInfo).collect(Collectors.toList()); 184 } 185 186 public static long getTotalUncompressedBytes(List<HStoreFile> files) { 187 return files.stream() 188 .mapToLong(file -> getStorefileFieldSize(file, StoreFileReader::getTotalUncompressedBytes)) 189 .sum(); 190 } 191 192 public static long getStorefilesSize(Collection<HStoreFile> files, 193 Predicate<HStoreFile> predicate) { 194 return files.stream().filter(predicate) 195 .mapToLong(file -> getStorefileFieldSize(file, StoreFileReader::length)).sum(); 196 } 197 198 public static long getStorefileFieldSize(HStoreFile file, ToLongFunction<StoreFileReader> f) { 199 if (file == null) { 200 return 0L; 201 } 202 StoreFileReader reader = file.getReader(); 203 if (reader == null) { 204 return 0L; 205 } 206 return f.applyAsLong(reader); 207 } 208}