001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.mob; 019 020import java.io.IOException; 021import java.io.InterruptedIOException; 022import java.util.ArrayList; 023import java.util.Date; 024import java.util.HashSet; 025import java.util.List; 026import java.util.Set; 027import java.util.function.Consumer; 028import org.apache.hadoop.conf.Configuration; 029import org.apache.hadoop.fs.Path; 030import org.apache.hadoop.hbase.Cell; 031import org.apache.hadoop.hbase.HConstants; 032import org.apache.hadoop.hbase.KeyValue; 033import org.apache.hadoop.hbase.TableName; 034import org.apache.hadoop.hbase.monitoring.MonitoredTask; 035import org.apache.hadoop.hbase.regionserver.DefaultStoreFlusher; 036import org.apache.hadoop.hbase.regionserver.FlushLifeCycleTracker; 037import org.apache.hadoop.hbase.regionserver.HMobStore; 038import org.apache.hadoop.hbase.regionserver.HStore; 039import org.apache.hadoop.hbase.regionserver.InternalScanner; 040import org.apache.hadoop.hbase.regionserver.MemStoreSnapshot; 041import org.apache.hadoop.hbase.regionserver.ScannerContext; 042import org.apache.hadoop.hbase.regionserver.StoreFileWriter; 043import org.apache.hadoop.hbase.regionserver.throttle.ThroughputControlUtil; 044import org.apache.hadoop.hbase.regionserver.throttle.ThroughputController; 045import org.apache.hadoop.hbase.util.Bytes; 046import org.apache.hadoop.util.StringUtils; 047import org.apache.yetus.audience.InterfaceAudience; 048import org.slf4j.Logger; 049import org.slf4j.LoggerFactory; 050 051import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableSetMultimap; 052 053/** 054 * An implementation of the StoreFlusher. It extends the DefaultStoreFlusher. If the store is not a 055 * mob store, the flusher flushes the MemStore the same with DefaultStoreFlusher, If the store is a 056 * mob store, the flusher flushes the MemStore into two places. One is the store files of HBase, the 057 * other is the mob files. 058 * <ol> 059 * <li>Cells that are not PUT type or have the delete mark will be directly flushed to HBase.</li> 060 * <li>If the size of a cell value is larger than a threshold, it'll be flushed to a mob file, 061 * another cell with the path of this file will be flushed to HBase.</li> 062 * <li>If the size of a cell value is smaller than or equal with a threshold, it'll be flushed to 063 * HBase directly.</li> 064 * </ol> 065 */ 066@InterfaceAudience.Private 067public class DefaultMobStoreFlusher extends DefaultStoreFlusher { 068 069 private static final Logger LOG = LoggerFactory.getLogger(DefaultMobStoreFlusher.class); 070 private final Object flushLock = new Object(); 071 private long mobCellValueSizeThreshold = 0; 072 private Path targetPath; 073 private HMobStore mobStore; 074 // MOB file reference set 075 static ThreadLocal<Set<String>> mobRefSet = new ThreadLocal<Set<String>>() { 076 @Override 077 protected Set<String> initialValue() { 078 return new HashSet<String>(); 079 } 080 }; 081 082 public DefaultMobStoreFlusher(Configuration conf, HStore store) throws IOException { 083 super(conf, store); 084 if (!(store instanceof HMobStore)) { 085 throw new IllegalArgumentException("The store " + store + " is not a HMobStore"); 086 } 087 mobCellValueSizeThreshold = store.getColumnFamilyDescriptor().getMobThreshold(); 088 this.targetPath = 089 MobUtils.getMobFamilyPath(conf, store.getTableName(), store.getColumnFamilyName()); 090 if (!this.store.getFileSystem().exists(targetPath)) { 091 this.store.getFileSystem().mkdirs(targetPath); 092 } 093 this.mobStore = (HMobStore) store; 094 } 095 096 /** 097 * Flushes the snapshot of the MemStore. If this store is not a mob store, flush the cells in the 098 * snapshot to store files of HBase. If the store is a mob one, the flusher flushes the MemStore 099 * into two places. One is the store files of HBase, the other is the mob files. 100 * <ol> 101 * <li>Cells that are not PUT type or have the delete mark will be directly flushed to HBase.</li> 102 * <li>If the size of a cell value is larger than a threshold, it'll be flushed to a mob file, 103 * another cell with the path of this file will be flushed to HBase.</li> 104 * <li>If the size of a cell value is smaller than or equal with a threshold, it'll be flushed to 105 * HBase directly.</li> 106 * </ol> 107 */ 108 @Override 109 public List<Path> flushSnapshot(MemStoreSnapshot snapshot, long cacheFlushId, 110 MonitoredTask status, ThroughputController throughputController, FlushLifeCycleTracker tracker, 111 Consumer<Path> writerCreationTracker) throws IOException { 112 ArrayList<Path> result = new ArrayList<>(); 113 long cellsCount = snapshot.getCellsCount(); 114 if (cellsCount == 0) return result; // don't flush if there are no entries 115 116 // Use a store scanner to find which rows to flush. 117 InternalScanner scanner = createScanner(snapshot.getScanners(), tracker); 118 StoreFileWriter writer; 119 try { 120 // TODO: We can fail in the below block before we complete adding this flush to 121 // list of store files. Add cleanup of anything put on filesystem if we fail. 122 synchronized (flushLock) { 123 status.setStatus("Flushing " + store + ": creating writer"); 124 // Write the map out to the disk 125 writer = createWriter(snapshot, true, writerCreationTracker); 126 IOException e = null; 127 try { 128 // It's a mob store, flush the cells in a mob way. This is the difference of flushing 129 // between a normal and a mob store. 130 performMobFlush(snapshot, cacheFlushId, scanner, writer, status, throughputController, 131 writerCreationTracker); 132 } catch (IOException ioe) { 133 e = ioe; 134 // throw the exception out 135 throw ioe; 136 } finally { 137 if (e != null) { 138 writer.close(); 139 } else { 140 finalizeWriter(writer, cacheFlushId, status); 141 } 142 } 143 } 144 } finally { 145 scanner.close(); 146 } 147 LOG.info("Mob store is flushed, sequenceid=" + cacheFlushId + ", memsize=" 148 + StringUtils.TraditionalBinaryPrefix.long2String(snapshot.getDataSize(), "", 1) 149 + ", hasBloomFilter=" + writer.hasGeneralBloom() + ", into tmp file " + writer.getPath()); 150 result.add(writer.getPath()); 151 return result; 152 } 153 154 /** 155 * Flushes the cells in the mob store. 156 * <ol> 157 * In the mob store, the cells with PUT type might have or have no mob tags. 158 * <li>If a cell does not have a mob tag, flushing the cell to different files depends on the 159 * value length. If the length is larger than a threshold, it's flushed to a mob file and the mob 160 * file is flushed to a store file in HBase. Otherwise, directly flush the cell to a store file in 161 * HBase.</li> 162 * <li>If a cell have a mob tag, its value is a mob file name, directly flush it to a store file 163 * in HBase.</li> 164 * </ol> 165 * @param snapshot Memstore snapshot. 166 * @param cacheFlushId Log cache flush sequence number. 167 * @param scanner The scanner of memstore snapshot. 168 * @param writer The store file writer. 169 * @param status Task that represents the flush operation and may be updated with 170 * status. 171 * @param throughputController A controller to avoid flush too fast. 172 */ 173 protected void performMobFlush(MemStoreSnapshot snapshot, long cacheFlushId, 174 InternalScanner scanner, StoreFileWriter writer, MonitoredTask status, 175 ThroughputController throughputController, Consumer<Path> writerCreationTracker) 176 throws IOException { 177 StoreFileWriter mobFileWriter = null; 178 int compactionKVMax = 179 conf.getInt(HConstants.COMPACTION_KV_MAX, HConstants.COMPACTION_KV_MAX_DEFAULT); 180 long mobCount = 0; 181 long mobSize = 0; 182 long time = snapshot.getTimeRangeTracker().getMax(); 183 mobFileWriter = mobStore.getStoreEngine().requireWritingToTmpDirFirst() 184 ? mobStore.createWriterInTmp(new Date(time), snapshot.getCellsCount(), 185 store.getColumnFamilyDescriptor().getCompressionType(), store.getRegionInfo().getStartKey(), 186 false) 187 : mobStore.createWriter(new Date(time), snapshot.getCellsCount(), 188 store.getColumnFamilyDescriptor().getCompressionType(), store.getRegionInfo().getStartKey(), 189 false, writerCreationTracker); 190 // the target path is {tableName}/.mob/{cfName}/mobFiles 191 // the relative path is mobFiles 192 byte[] fileName = Bytes.toBytes(mobFileWriter.getPath().getName()); 193 ScannerContext scannerContext = 194 ScannerContext.newBuilder().setBatchLimit(compactionKVMax).build(); 195 List<Cell> cells = new ArrayList<>(); 196 boolean hasMore; 197 String flushName = ThroughputControlUtil.getNameForThrottling(store, "flush"); 198 boolean control = 199 throughputController != null && !store.getRegionInfo().getTable().isSystemTable(); 200 if (control) { 201 throughputController.start(flushName); 202 } 203 IOException ioe = null; 204 // Clear all past MOB references 205 mobRefSet.get().clear(); 206 try { 207 do { 208 hasMore = scanner.next(cells, scannerContext); 209 if (!cells.isEmpty()) { 210 for (Cell c : cells) { 211 // If we know that this KV is going to be included always, then let us 212 // set its memstoreTS to 0. This will help us save space when writing to 213 // disk. 214 if ( 215 c.getValueLength() <= mobCellValueSizeThreshold || MobUtils.isMobReferenceCell(c) 216 || c.getTypeByte() != KeyValue.Type.Put.getCode() 217 ) { 218 writer.append(c); 219 } else { 220 // append the original keyValue in the mob file. 221 mobFileWriter.append(c); 222 mobSize += c.getValueLength(); 223 mobCount++; 224 // append the tags to the KeyValue. 225 // The key is same, the value is the filename of the mob file 226 Cell reference = 227 MobUtils.createMobRefCell(c, fileName, this.mobStore.getRefCellTags()); 228 writer.append(reference); 229 } 230 if (control) { 231 throughputController.control(flushName, c.getSerializedSize()); 232 } 233 } 234 cells.clear(); 235 } 236 } while (hasMore); 237 } catch (InterruptedException e) { 238 ioe = 239 new InterruptedIOException("Interrupted while control throughput of flushing " + flushName); 240 throw ioe; 241 } catch (IOException e) { 242 ioe = e; 243 throw e; 244 } finally { 245 if (control) { 246 throughputController.finish(flushName); 247 } 248 if (ioe != null) { 249 mobFileWriter.close(); 250 } 251 } 252 253 if (mobCount > 0) { 254 // commit the mob file from temp folder to target folder. 255 // If the mob file is committed successfully but the store file is not, 256 // the committed mob file will be handled by the sweep tool as an unused 257 // file. 258 status.setStatus("Flushing mob file " + store + ": appending metadata"); 259 mobFileWriter.appendMetadata(cacheFlushId, false, mobCount); 260 status.setStatus("Flushing mob file " + store + ": closing flushed file"); 261 mobFileWriter.close(); 262 mobStore.commitFile(mobFileWriter.getPath(), targetPath); 263 LOG.debug("Flush store file: {}, store: {}", writer.getPath(), getStoreInfo()); 264 mobStore.updateMobFlushCount(); 265 mobStore.updateMobFlushedCellsCount(mobCount); 266 mobStore.updateMobFlushedCellsSize(mobSize); 267 // Add mob reference to store file metadata 268 mobRefSet.get().add(mobFileWriter.getPath().getName()); 269 } else { 270 try { 271 status.setStatus("Flushing mob file " + store + ": no mob cells, closing flushed file"); 272 mobFileWriter.close(); 273 // If the mob file is empty, delete it instead of committing. 274 store.getFileSystem().delete(mobFileWriter.getPath(), true); 275 } catch (IOException e) { 276 LOG.error("Failed to delete the temp mob file", e); 277 } 278 } 279 } 280 281 @Override 282 protected void finalizeWriter(StoreFileWriter writer, long cacheFlushSeqNum, MonitoredTask status) 283 throws IOException { 284 // Write out the log sequence number that corresponds to this output 285 // hfile. Also write current time in metadata as minFlushTime. 286 // The hfile is current up to and including cacheFlushSeqNum. 287 status.setStatus("Flushing " + store + ": appending metadata"); 288 writer.appendMetadata(cacheFlushSeqNum, false); 289 writer.appendMobMetadata(ImmutableSetMultimap.<TableName, String> builder() 290 .putAll(store.getTableName(), mobRefSet.get()).build()); 291 status.setStatus("Flushing " + store + ": closing flushed file"); 292 writer.close(); 293 } 294 295 private String getStoreInfo() { 296 return String.format("[table=%s family=%s region=%s]", store.getTableName().getNameAsString(), 297 store.getColumnFamilyName(), store.getRegionInfo().getEncodedName()); 298 } 299}