001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import static org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder.NEW_VERSION_BEHAVIOR;
021import static org.apache.hadoop.hbase.regionserver.StoreFileWriter.ENABLE_HISTORICAL_COMPACTION_FILES;
022import static org.junit.Assert.assertEquals;
023import static org.junit.Assert.assertTrue;
024
025import java.io.IOException;
026import java.util.ArrayList;
027import java.util.Arrays;
028import java.util.Collection;
029import java.util.List;
030import java.util.Random;
031import org.apache.hadoop.conf.Configuration;
032import org.apache.hadoop.hbase.Cell;
033import org.apache.hadoop.hbase.CellUtil;
034import org.apache.hadoop.hbase.HBaseClassTestRule;
035import org.apache.hadoop.hbase.HBaseTestingUtil;
036import org.apache.hadoop.hbase.KeepDeletedCells;
037import org.apache.hadoop.hbase.MemoryCompactionPolicy;
038import org.apache.hadoop.hbase.TableName;
039import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
040import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
041import org.apache.hadoop.hbase.client.Delete;
042import org.apache.hadoop.hbase.client.Put;
043import org.apache.hadoop.hbase.client.Scan;
044import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
045import org.apache.hadoop.hbase.regionserver.compactions.CompactionConfiguration;
046import org.apache.hadoop.hbase.testclassification.MediumTests;
047import org.apache.hadoop.hbase.testclassification.RegionServerTests;
048import org.apache.hadoop.hbase.util.Bytes;
049import org.junit.After;
050import org.junit.Before;
051import org.junit.ClassRule;
052import org.junit.Test;
053import org.junit.experimental.categories.Category;
054import org.junit.runner.RunWith;
055import org.junit.runners.Parameterized;
056
057/**
058 * Store file writer does not do any compaction. Each cell written to either the live or historical
059 * file. Regular (i.e., not-raw) scans that reads the latest put cells scans only live files. To
060 * ensure the correctness of store file writer, we need to verify that live files includes all live
061 * cells. This test indirectly verify this as follows. The test creates two tables, each with one
062 * region and one store. The dual file writing (live vs historical) is configured on only one of the
063 * tables. The test generates exact set of mutations on both tables. These mutations include all
064 * types of cells and these cells are written to multiple files using multiple memstore flushes.
065 * After writing all cells, the test first verify that both tables return the same set of cells for
066 * regular and raw scans. Then the same verification is done after tables are minor and finally
067 * major compacted. The test also verifies that flushes do not generate historical files and the
068 * historical files are generated only when historical file generation is enabled (by the config
069 * hbase.enable.historical.compaction.files).
070 */
071@Category({ MediumTests.class, RegionServerTests.class })
072@RunWith(Parameterized.class)
073public class TestStoreFileWriter {
074  @ClassRule
075  public static final HBaseClassTestRule CLASS_RULE =
076    HBaseClassTestRule.forClass(TestStoreFileWriter.class);
077  private final int ROW_NUM = 100;
078  private final Random RANDOM = new Random(11);
079  private final HBaseTestingUtil testUtil = new HBaseTestingUtil();
080  private HRegion[] regions = new HRegion[2];
081  private final byte[][] qualifiers =
082    { Bytes.toBytes("0"), Bytes.toBytes("1"), Bytes.toBytes("2") };
083  // This keeps track of all cells. It is a list of rows, each row is a list of columns, each
084  // column is a list of CellInfo object
085  private ArrayList<ArrayList<ArrayList<CellInfo>>> insertedCells;
086  private TableName[] tableName = new TableName[2];
087  private final Configuration conf = testUtil.getConfiguration();
088  private int flushCount = 0;
089
090  @Parameterized.Parameter(0)
091  public KeepDeletedCells keepDeletedCells;
092  @Parameterized.Parameter(1)
093  public int maxVersions;
094  @Parameterized.Parameter(2)
095  public boolean newVersionBehavior;
096
097  @Parameterized.Parameters(name = "keepDeletedCells={0}, maxVersions={1}, newVersionBehavior={2}")
098  public static synchronized Collection<Object[]> data() {
099    return Arrays.asList(
100      new Object[][] { { KeepDeletedCells.FALSE, 1, true }, { KeepDeletedCells.FALSE, 2, false },
101        { KeepDeletedCells.FALSE, 3, true }, { KeepDeletedCells.TRUE, 1, false },
102        // { KeepDeletedCells.TRUE, 2, true }, see HBASE-28442
103        { KeepDeletedCells.TRUE, 3, false } });
104  }
105
106  // In memory representation of a cell. We only need to know timestamp and type field for our
107  // testing for cell. Please note the row for the cell is implicit in insertedCells.
108  private static class CellInfo {
109    long timestamp;
110    Cell.Type type;
111
112    CellInfo(long timestamp, Cell.Type type) {
113      this.timestamp = timestamp;
114      this.type = type;
115    }
116  }
117
118  private void createTable(int index, boolean enableDualFileWriter) throws IOException {
119    tableName[index] = TableName.valueOf(getClass().getSimpleName() + "_" + index);
120    ColumnFamilyDescriptor familyDescriptor =
121      ColumnFamilyDescriptorBuilder.newBuilder(HBaseTestingUtil.fam1).setMaxVersions(maxVersions)
122        .setKeepDeletedCells(keepDeletedCells)
123        .setValue(NEW_VERSION_BEHAVIOR, Boolean.toString(newVersionBehavior)).build();
124    TableDescriptorBuilder builder =
125      TableDescriptorBuilder.newBuilder(tableName[index]).setColumnFamily(familyDescriptor)
126        .setValue(ENABLE_HISTORICAL_COMPACTION_FILES, Boolean.toString(enableDualFileWriter));
127    testUtil.createTable(builder.build(), null);
128    regions[index] = testUtil.getMiniHBaseCluster().getRegions(tableName[index]).get(0);
129  }
130
131  @Before
132  public void setUp() throws Exception {
133    conf.setInt(CompactionConfiguration.HBASE_HSTORE_COMPACTION_MAX_KEY, 6);
134    conf.set(CompactingMemStore.COMPACTING_MEMSTORE_TYPE_KEY,
135      String.valueOf(MemoryCompactionPolicy.NONE));
136    testUtil.startMiniCluster();
137    createTable(0, false);
138    createTable(1, true);
139    insertedCells = new ArrayList<>(ROW_NUM);
140    for (int r = 0; r < ROW_NUM; r++) {
141      insertedCells.add(new ArrayList<>(qualifiers.length));
142      for (int q = 0; q < qualifiers.length; q++) {
143        insertedCells.get(r).add(new ArrayList<>(10));
144      }
145    }
146  }
147
148  @After
149  public void tearDown() throws Exception {
150    this.testUtil.shutdownMiniCluster();
151    testUtil.cleanupTestDir();
152  }
153
154  @Test
155  public void testCompactedFiles() throws Exception {
156    for (int i = 0; i < 10; i++) {
157      insertRows(ROW_NUM * maxVersions);
158      deleteRows(ROW_NUM / 8);
159      deleteRowVersions(ROW_NUM / 8);
160      deleteColumns(ROW_NUM / 8);
161      deleteColumnVersions(ROW_NUM / 8);
162      flushRegion();
163    }
164
165    verifyCells();
166
167    HStore[] stores = new HStore[2];
168
169    stores[0] = regions[0].getStore(HBaseTestingUtil.fam1);
170    assertEquals(flushCount, stores[0].getStorefilesCount());
171
172    stores[1] = regions[1].getStore(HBaseTestingUtil.fam1);
173    assertEquals(flushCount, stores[1].getStorefilesCount());
174
175    regions[0].compact(false);
176    assertEquals(flushCount - stores[0].getCompactedFiles().size() + 1,
177      stores[0].getStorefilesCount());
178
179    regions[1].compact(false);
180    assertEquals(flushCount - stores[1].getCompactedFiles().size() + 2,
181      stores[1].getStorefilesCount());
182
183    verifyCells();
184
185    regions[0].compact(true);
186    assertEquals(1, stores[0].getStorefilesCount());
187
188    regions[1].compact(true);
189    assertEquals(keepDeletedCells == KeepDeletedCells.FALSE ? 1 : 2,
190      stores[1].getStorefilesCount());
191
192    verifyCells();
193  }
194
195  private void verifyCells() throws Exception {
196    scanAndCompare(false);
197    scanAndCompare(true);
198  }
199
200  private void flushRegion() throws Exception {
201    regions[0].flush(true);
202    regions[1].flush(true);
203    flushCount++;
204  }
205
206  private Long getRowTimestamp(int row) {
207    Long maxTimestamp = null;
208    for (int q = 0; q < qualifiers.length; q++) {
209      int size = insertedCells.get(row).get(q).size();
210      if (size > 0) {
211        CellInfo mostRecentCellInfo = insertedCells.get(row).get(q).get(size - 1);
212        if (mostRecentCellInfo.type == Cell.Type.Put) {
213          if (maxTimestamp == null || maxTimestamp < mostRecentCellInfo.timestamp) {
214            maxTimestamp = mostRecentCellInfo.timestamp;
215          }
216        }
217      }
218    }
219    return maxTimestamp;
220  }
221
222  private long getNewTimestamp(long timestamp) throws Exception {
223    long newTimestamp = System.currentTimeMillis();
224    if (timestamp == newTimestamp) {
225      Thread.sleep(1);
226      newTimestamp = System.currentTimeMillis();
227      assertTrue(timestamp < newTimestamp);
228    }
229    return newTimestamp;
230  }
231
232  private void insertRows(int rowCount) throws Exception {
233    int row;
234    long timestamp = System.currentTimeMillis();
235    for (int r = 0; r < rowCount; r++) {
236      row = RANDOM.nextInt(ROW_NUM);
237      Put put = new Put(Bytes.toBytes(String.valueOf(row)), timestamp);
238      for (int q = 0; q < qualifiers.length; q++) {
239        put.addColumn(HBaseTestingUtil.fam1, qualifiers[q],
240          Bytes.toBytes(String.valueOf(timestamp)));
241        insertedCells.get(row).get(q).add(new CellInfo(timestamp, Cell.Type.Put));
242      }
243      regions[0].put(put);
244      regions[1].put(put);
245      timestamp = getNewTimestamp(timestamp);
246    }
247  }
248
249  private void deleteRows(int rowCount) throws Exception {
250    int row;
251    for (int r = 0; r < rowCount; r++) {
252      long timestamp = System.currentTimeMillis();
253      row = RANDOM.nextInt(ROW_NUM);
254      Delete delete = new Delete(Bytes.toBytes(String.valueOf(row)));
255      regions[0].delete(delete);
256      regions[1].delete(delete);
257      // For simplicity, the family delete markers are inserted for all columns (instead of
258      // allocating a separate column for them) in the memory representation of the data stored
259      // to HBase
260      for (int q = 0; q < qualifiers.length; q++) {
261        insertedCells.get(row).get(q).add(new CellInfo(timestamp, Cell.Type.DeleteFamily));
262      }
263    }
264  }
265
266  private void deleteSingleRowVersion(int row, long timestamp) throws IOException {
267    Delete delete = new Delete(Bytes.toBytes(String.valueOf(row)));
268    delete.addFamilyVersion(HBaseTestingUtil.fam1, timestamp);
269    regions[0].delete(delete);
270    regions[1].delete(delete);
271    // For simplicity, the family delete version markers are inserted for all columns (instead of
272    // allocating a separate column for them) in the memory representation of the data stored
273    // to HBase
274    for (int q = 0; q < qualifiers.length; q++) {
275      insertedCells.get(row).get(q).add(new CellInfo(timestamp, Cell.Type.DeleteFamilyVersion));
276    }
277  }
278
279  private void deleteRowVersions(int rowCount) throws Exception {
280    int row;
281    for (int r = 0; r < rowCount; r++) {
282      row = RANDOM.nextInt(ROW_NUM);
283      Long timestamp = getRowTimestamp(row);
284      if (timestamp != null) {
285        deleteSingleRowVersion(row, timestamp);
286      }
287    }
288    // Just insert one more delete marker possibly does not delete any row version
289    row = RANDOM.nextInt(ROW_NUM);
290    deleteSingleRowVersion(row, System.currentTimeMillis());
291  }
292
293  private void deleteColumns(int rowCount) throws Exception {
294    int row;
295    for (int r = 0; r < rowCount; r++) {
296      long timestamp = System.currentTimeMillis();
297      row = RANDOM.nextInt(ROW_NUM);
298      int q = RANDOM.nextInt(qualifiers.length);
299      Delete delete = new Delete(Bytes.toBytes(String.valueOf(row)), timestamp);
300      delete.addColumns(HBaseTestingUtil.fam1, qualifiers[q], timestamp);
301      regions[0].delete(delete);
302      regions[1].delete(delete);
303      insertedCells.get(row).get(q).add(new CellInfo(timestamp, Cell.Type.DeleteColumn));
304    }
305  }
306
307  private void deleteColumnVersions(int rowCount) throws Exception {
308    int row;
309    for (int r = 0; r < rowCount; r++) {
310      row = RANDOM.nextInt(ROW_NUM);
311      Long timestamp = getRowTimestamp(row);
312      if (timestamp != null) {
313        Delete delete = new Delete(Bytes.toBytes(String.valueOf(row)));
314        int q = RANDOM.nextInt(qualifiers.length);
315        delete.addColumn(HBaseTestingUtil.fam1, qualifiers[q], timestamp);
316        regions[0].delete(delete);
317        regions[1].delete(delete);
318        insertedCells.get(row).get(q).add(new CellInfo(timestamp, Cell.Type.Delete));
319      }
320    }
321  }
322
323  private Scan createScan(boolean raw) {
324    Scan scan = new Scan();
325    scan.readAllVersions();
326    scan.setRaw(raw);
327    return scan;
328  }
329
330  private void scanAndCompare(boolean raw) throws Exception {
331    try (RegionScanner firstRS = regions[0].getScanner(createScan(raw))) {
332      try (RegionScanner secondRS = regions[1].getScanner(createScan(raw))) {
333        boolean firstHasMore;
334        boolean secondHasMore;
335        do {
336          List<Cell> firstRowList = new ArrayList<>();
337          List<Cell> secondRowList = new ArrayList<>();
338          firstHasMore = firstRS.nextRaw(firstRowList);
339          secondHasMore = secondRS.nextRaw(secondRowList);
340          assertEquals(firstRowList.size(), secondRowList.size());
341          int size = firstRowList.size();
342          for (int i = 0; i < size; i++) {
343            Cell firstCell = firstRowList.get(i);
344            Cell secondCell = secondRowList.get(i);
345            assertTrue(CellUtil.matchingRowColumn(firstCell, secondCell));
346            assertTrue(firstCell.getType() == secondCell.getType());
347            assertTrue(
348              Bytes.equals(CellUtil.cloneValue(firstCell), CellUtil.cloneValue(firstCell)));
349          }
350        } while (firstHasMore && secondHasMore);
351        assertEquals(firstHasMore, secondHasMore);
352      }
353    }
354  }
355}