001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import static org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder.NEW_VERSION_BEHAVIOR; 021import static org.apache.hadoop.hbase.regionserver.StoreFileWriter.ENABLE_HISTORICAL_COMPACTION_FILES; 022import static org.junit.Assert.assertEquals; 023import static org.junit.Assert.assertTrue; 024 025import java.io.IOException; 026import java.util.ArrayList; 027import java.util.Arrays; 028import java.util.Collection; 029import java.util.List; 030import java.util.Random; 031import org.apache.hadoop.conf.Configuration; 032import org.apache.hadoop.hbase.Cell; 033import org.apache.hadoop.hbase.CellUtil; 034import org.apache.hadoop.hbase.HBaseClassTestRule; 035import org.apache.hadoop.hbase.HBaseTestingUtil; 036import org.apache.hadoop.hbase.KeepDeletedCells; 037import org.apache.hadoop.hbase.MemoryCompactionPolicy; 038import org.apache.hadoop.hbase.TableName; 039import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; 040import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 041import org.apache.hadoop.hbase.client.Delete; 042import org.apache.hadoop.hbase.client.Put; 043import org.apache.hadoop.hbase.client.Scan; 044import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 045import org.apache.hadoop.hbase.regionserver.compactions.CompactionConfiguration; 046import org.apache.hadoop.hbase.testclassification.MediumTests; 047import org.apache.hadoop.hbase.testclassification.RegionServerTests; 048import org.apache.hadoop.hbase.util.Bytes; 049import org.junit.After; 050import org.junit.Before; 051import org.junit.ClassRule; 052import org.junit.Test; 053import org.junit.experimental.categories.Category; 054import org.junit.runner.RunWith; 055import org.junit.runners.Parameterized; 056 057/** 058 * Store file writer does not do any compaction. Each cell written to either the live or historical 059 * file. Regular (i.e., not-raw) scans that reads the latest put cells scans only live files. To 060 * ensure the correctness of store file writer, we need to verify that live files includes all live 061 * cells. This test indirectly verify this as follows. The test creates two tables, each with one 062 * region and one store. The dual file writing (live vs historical) is configured on only one of the 063 * tables. The test generates exact set of mutations on both tables. These mutations include all 064 * types of cells and these cells are written to multiple files using multiple memstore flushes. 065 * After writing all cells, the test first verify that both tables return the same set of cells for 066 * regular and raw scans. Then the same verification is done after tables are minor and finally 067 * major compacted. The test also verifies that flushes do not generate historical files and the 068 * historical files are generated only when historical file generation is enabled (by the config 069 * hbase.enable.historical.compaction.files). 070 */ 071@Category({ MediumTests.class, RegionServerTests.class }) 072@RunWith(Parameterized.class) 073public class TestStoreFileWriter { 074 @ClassRule 075 public static final HBaseClassTestRule CLASS_RULE = 076 HBaseClassTestRule.forClass(TestStoreFileWriter.class); 077 private final int ROW_NUM = 100; 078 private final Random RANDOM = new Random(11); 079 private final HBaseTestingUtil testUtil = new HBaseTestingUtil(); 080 private HRegion[] regions = new HRegion[2]; 081 private final byte[][] qualifiers = 082 { Bytes.toBytes("0"), Bytes.toBytes("1"), Bytes.toBytes("2") }; 083 // This keeps track of all cells. It is a list of rows, each row is a list of columns, each 084 // column is a list of CellInfo object 085 private ArrayList<ArrayList<ArrayList<CellInfo>>> insertedCells; 086 private TableName[] tableName = new TableName[2]; 087 private final Configuration conf = testUtil.getConfiguration(); 088 private int flushCount = 0; 089 090 @Parameterized.Parameter(0) 091 public KeepDeletedCells keepDeletedCells; 092 @Parameterized.Parameter(1) 093 public int maxVersions; 094 @Parameterized.Parameter(2) 095 public boolean newVersionBehavior; 096 097 @Parameterized.Parameters(name = "keepDeletedCells={0}, maxVersions={1}, newVersionBehavior={2}") 098 public static synchronized Collection<Object[]> data() { 099 return Arrays.asList( 100 new Object[][] { { KeepDeletedCells.FALSE, 1, true }, { KeepDeletedCells.FALSE, 2, false }, 101 { KeepDeletedCells.FALSE, 3, true }, { KeepDeletedCells.TRUE, 1, false }, 102 // { KeepDeletedCells.TRUE, 2, true }, see HBASE-28442 103 { KeepDeletedCells.TRUE, 3, false } }); 104 } 105 106 // In memory representation of a cell. We only need to know timestamp and type field for our 107 // testing for cell. Please note the row for the cell is implicit in insertedCells. 108 private static class CellInfo { 109 long timestamp; 110 Cell.Type type; 111 112 CellInfo(long timestamp, Cell.Type type) { 113 this.timestamp = timestamp; 114 this.type = type; 115 } 116 } 117 118 private void createTable(int index, boolean enableDualFileWriter) throws IOException { 119 tableName[index] = TableName.valueOf(getClass().getSimpleName() + "_" + index); 120 ColumnFamilyDescriptor familyDescriptor = 121 ColumnFamilyDescriptorBuilder.newBuilder(HBaseTestingUtil.fam1).setMaxVersions(maxVersions) 122 .setKeepDeletedCells(keepDeletedCells) 123 .setValue(NEW_VERSION_BEHAVIOR, Boolean.toString(newVersionBehavior)).build(); 124 TableDescriptorBuilder builder = 125 TableDescriptorBuilder.newBuilder(tableName[index]).setColumnFamily(familyDescriptor) 126 .setValue(ENABLE_HISTORICAL_COMPACTION_FILES, Boolean.toString(enableDualFileWriter)); 127 testUtil.createTable(builder.build(), null); 128 regions[index] = testUtil.getMiniHBaseCluster().getRegions(tableName[index]).get(0); 129 } 130 131 @Before 132 public void setUp() throws Exception { 133 conf.setInt(CompactionConfiguration.HBASE_HSTORE_COMPACTION_MAX_KEY, 6); 134 conf.set(CompactingMemStore.COMPACTING_MEMSTORE_TYPE_KEY, 135 String.valueOf(MemoryCompactionPolicy.NONE)); 136 testUtil.startMiniCluster(); 137 createTable(0, false); 138 createTable(1, true); 139 insertedCells = new ArrayList<>(ROW_NUM); 140 for (int r = 0; r < ROW_NUM; r++) { 141 insertedCells.add(new ArrayList<>(qualifiers.length)); 142 for (int q = 0; q < qualifiers.length; q++) { 143 insertedCells.get(r).add(new ArrayList<>(10)); 144 } 145 } 146 } 147 148 @After 149 public void tearDown() throws Exception { 150 this.testUtil.shutdownMiniCluster(); 151 testUtil.cleanupTestDir(); 152 } 153 154 @Test 155 public void testCompactedFiles() throws Exception { 156 for (int i = 0; i < 10; i++) { 157 insertRows(ROW_NUM * maxVersions); 158 deleteRows(ROW_NUM / 8); 159 deleteRowVersions(ROW_NUM / 8); 160 deleteColumns(ROW_NUM / 8); 161 deleteColumnVersions(ROW_NUM / 8); 162 flushRegion(); 163 } 164 165 verifyCells(); 166 167 HStore[] stores = new HStore[2]; 168 169 stores[0] = regions[0].getStore(HBaseTestingUtil.fam1); 170 assertEquals(flushCount, stores[0].getStorefilesCount()); 171 172 stores[1] = regions[1].getStore(HBaseTestingUtil.fam1); 173 assertEquals(flushCount, stores[1].getStorefilesCount()); 174 175 regions[0].compact(false); 176 assertEquals(flushCount - stores[0].getCompactedFiles().size() + 1, 177 stores[0].getStorefilesCount()); 178 179 regions[1].compact(false); 180 assertEquals(flushCount - stores[1].getCompactedFiles().size() + 2, 181 stores[1].getStorefilesCount()); 182 183 verifyCells(); 184 185 regions[0].compact(true); 186 assertEquals(1, stores[0].getStorefilesCount()); 187 188 regions[1].compact(true); 189 assertEquals(keepDeletedCells == KeepDeletedCells.FALSE ? 1 : 2, 190 stores[1].getStorefilesCount()); 191 192 verifyCells(); 193 } 194 195 private void verifyCells() throws Exception { 196 scanAndCompare(false); 197 scanAndCompare(true); 198 } 199 200 private void flushRegion() throws Exception { 201 regions[0].flush(true); 202 regions[1].flush(true); 203 flushCount++; 204 } 205 206 private Long getRowTimestamp(int row) { 207 Long maxTimestamp = null; 208 for (int q = 0; q < qualifiers.length; q++) { 209 int size = insertedCells.get(row).get(q).size(); 210 if (size > 0) { 211 CellInfo mostRecentCellInfo = insertedCells.get(row).get(q).get(size - 1); 212 if (mostRecentCellInfo.type == Cell.Type.Put) { 213 if (maxTimestamp == null || maxTimestamp < mostRecentCellInfo.timestamp) { 214 maxTimestamp = mostRecentCellInfo.timestamp; 215 } 216 } 217 } 218 } 219 return maxTimestamp; 220 } 221 222 private long getNewTimestamp(long timestamp) throws Exception { 223 long newTimestamp = System.currentTimeMillis(); 224 if (timestamp == newTimestamp) { 225 Thread.sleep(1); 226 newTimestamp = System.currentTimeMillis(); 227 assertTrue(timestamp < newTimestamp); 228 } 229 return newTimestamp; 230 } 231 232 private void insertRows(int rowCount) throws Exception { 233 int row; 234 long timestamp = System.currentTimeMillis(); 235 for (int r = 0; r < rowCount; r++) { 236 row = RANDOM.nextInt(ROW_NUM); 237 Put put = new Put(Bytes.toBytes(String.valueOf(row)), timestamp); 238 for (int q = 0; q < qualifiers.length; q++) { 239 put.addColumn(HBaseTestingUtil.fam1, qualifiers[q], 240 Bytes.toBytes(String.valueOf(timestamp))); 241 insertedCells.get(row).get(q).add(new CellInfo(timestamp, Cell.Type.Put)); 242 } 243 regions[0].put(put); 244 regions[1].put(put); 245 timestamp = getNewTimestamp(timestamp); 246 } 247 } 248 249 private void deleteRows(int rowCount) throws Exception { 250 int row; 251 for (int r = 0; r < rowCount; r++) { 252 long timestamp = System.currentTimeMillis(); 253 row = RANDOM.nextInt(ROW_NUM); 254 Delete delete = new Delete(Bytes.toBytes(String.valueOf(row))); 255 regions[0].delete(delete); 256 regions[1].delete(delete); 257 // For simplicity, the family delete markers are inserted for all columns (instead of 258 // allocating a separate column for them) in the memory representation of the data stored 259 // to HBase 260 for (int q = 0; q < qualifiers.length; q++) { 261 insertedCells.get(row).get(q).add(new CellInfo(timestamp, Cell.Type.DeleteFamily)); 262 } 263 } 264 } 265 266 private void deleteSingleRowVersion(int row, long timestamp) throws IOException { 267 Delete delete = new Delete(Bytes.toBytes(String.valueOf(row))); 268 delete.addFamilyVersion(HBaseTestingUtil.fam1, timestamp); 269 regions[0].delete(delete); 270 regions[1].delete(delete); 271 // For simplicity, the family delete version markers are inserted for all columns (instead of 272 // allocating a separate column for them) in the memory representation of the data stored 273 // to HBase 274 for (int q = 0; q < qualifiers.length; q++) { 275 insertedCells.get(row).get(q).add(new CellInfo(timestamp, Cell.Type.DeleteFamilyVersion)); 276 } 277 } 278 279 private void deleteRowVersions(int rowCount) throws Exception { 280 int row; 281 for (int r = 0; r < rowCount; r++) { 282 row = RANDOM.nextInt(ROW_NUM); 283 Long timestamp = getRowTimestamp(row); 284 if (timestamp != null) { 285 deleteSingleRowVersion(row, timestamp); 286 } 287 } 288 // Just insert one more delete marker possibly does not delete any row version 289 row = RANDOM.nextInt(ROW_NUM); 290 deleteSingleRowVersion(row, System.currentTimeMillis()); 291 } 292 293 private void deleteColumns(int rowCount) throws Exception { 294 int row; 295 for (int r = 0; r < rowCount; r++) { 296 long timestamp = System.currentTimeMillis(); 297 row = RANDOM.nextInt(ROW_NUM); 298 int q = RANDOM.nextInt(qualifiers.length); 299 Delete delete = new Delete(Bytes.toBytes(String.valueOf(row)), timestamp); 300 delete.addColumns(HBaseTestingUtil.fam1, qualifiers[q], timestamp); 301 regions[0].delete(delete); 302 regions[1].delete(delete); 303 insertedCells.get(row).get(q).add(new CellInfo(timestamp, Cell.Type.DeleteColumn)); 304 } 305 } 306 307 private void deleteColumnVersions(int rowCount) throws Exception { 308 int row; 309 for (int r = 0; r < rowCount; r++) { 310 row = RANDOM.nextInt(ROW_NUM); 311 Long timestamp = getRowTimestamp(row); 312 if (timestamp != null) { 313 Delete delete = new Delete(Bytes.toBytes(String.valueOf(row))); 314 int q = RANDOM.nextInt(qualifiers.length); 315 delete.addColumn(HBaseTestingUtil.fam1, qualifiers[q], timestamp); 316 regions[0].delete(delete); 317 regions[1].delete(delete); 318 insertedCells.get(row).get(q).add(new CellInfo(timestamp, Cell.Type.Delete)); 319 } 320 } 321 } 322 323 private Scan createScan(boolean raw) { 324 Scan scan = new Scan(); 325 scan.readAllVersions(); 326 scan.setRaw(raw); 327 return scan; 328 } 329 330 private void scanAndCompare(boolean raw) throws Exception { 331 try (RegionScanner firstRS = regions[0].getScanner(createScan(raw))) { 332 try (RegionScanner secondRS = regions[1].getScanner(createScan(raw))) { 333 boolean firstHasMore; 334 boolean secondHasMore; 335 do { 336 List<Cell> firstRowList = new ArrayList<>(); 337 List<Cell> secondRowList = new ArrayList<>(); 338 firstHasMore = firstRS.nextRaw(firstRowList); 339 secondHasMore = secondRS.nextRaw(secondRowList); 340 assertEquals(firstRowList.size(), secondRowList.size()); 341 int size = firstRowList.size(); 342 for (int i = 0; i < size; i++) { 343 Cell firstCell = firstRowList.get(i); 344 Cell secondCell = secondRowList.get(i); 345 assertTrue(CellUtil.matchingRowColumn(firstCell, secondCell)); 346 assertTrue(firstCell.getType() == secondCell.getType()); 347 assertTrue( 348 Bytes.equals(CellUtil.cloneValue(firstCell), CellUtil.cloneValue(firstCell))); 349 } 350 } while (firstHasMore && secondHasMore); 351 assertEquals(firstHasMore, secondHasMore); 352 } 353 } 354 } 355}