001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.io.hfile;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertFalse;
022import static org.junit.Assert.assertTrue;
023
024import java.io.IOException;
025import java.util.ArrayList;
026import java.util.Collection;
027import java.util.Iterator;
028import java.util.List;
029import org.apache.hadoop.conf.Configuration;
030import org.apache.hadoop.fs.FSDataOutputStream;
031import org.apache.hadoop.fs.FileSystem;
032import org.apache.hadoop.fs.Path;
033import org.apache.hadoop.hbase.ArrayBackedTag;
034import org.apache.hadoop.hbase.ByteBufferKeyValue;
035import org.apache.hadoop.hbase.Cell;
036import org.apache.hadoop.hbase.ExtendedCell;
037import org.apache.hadoop.hbase.HBaseClassTestRule;
038import org.apache.hadoop.hbase.HBaseTestingUtil;
039import org.apache.hadoop.hbase.HConstants;
040import org.apache.hadoop.hbase.KeyValue;
041import org.apache.hadoop.hbase.PrivateCellUtil;
042import org.apache.hadoop.hbase.Tag;
043import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
044import org.apache.hadoop.hbase.testclassification.IOTests;
045import org.apache.hadoop.hbase.testclassification.SmallTests;
046import org.apache.hadoop.hbase.util.Bytes;
047import org.junit.Before;
048import org.junit.ClassRule;
049import org.junit.Test;
050import org.junit.experimental.categories.Category;
051import org.junit.runner.RunWith;
052import org.junit.runners.Parameterized;
053import org.junit.runners.Parameterized.Parameters;
054
055/**
056 * Test {@link HFileScanner#seekTo(Cell)} and its variants.
057 */
058@Category({ IOTests.class, SmallTests.class })
059@RunWith(Parameterized.class)
060public class TestSeekTo {
061
062  @ClassRule
063  public static final HBaseClassTestRule CLASS_RULE = HBaseClassTestRule.forClass(TestSeekTo.class);
064
065  private final static HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
066  private final DataBlockEncoding encoding;
067
068  @Parameters
069  public static Collection<Object[]> parameters() {
070    List<Object[]> paramList = new ArrayList<>();
071    for (DataBlockEncoding encoding : DataBlockEncoding.values()) {
072      paramList.add(new Object[] { encoding });
073    }
074    return paramList;
075  }
076
077  static boolean switchKVs = false;
078
079  public TestSeekTo(DataBlockEncoding encoding) {
080    this.encoding = encoding;
081  }
082
083  @Before
084  public void setUp() {
085    // reset
086    switchKVs = false;
087  }
088
089  static KeyValue toKV(String row, TagUsage tagUsage) {
090    if (tagUsage == TagUsage.NO_TAG) {
091      return new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("qualifier"),
092        Bytes.toBytes("value"));
093    } else if (tagUsage == TagUsage.ONLY_TAG) {
094      Tag t = new ArrayBackedTag((byte) 1, "myTag1");
095      Tag[] tags = new Tag[1];
096      tags[0] = t;
097      return new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("qualifier"),
098        HConstants.LATEST_TIMESTAMP, Bytes.toBytes("value"), tags);
099    } else {
100      if (!switchKVs) {
101        switchKVs = true;
102        return new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("qualifier"),
103          HConstants.LATEST_TIMESTAMP, Bytes.toBytes("value"));
104      } else {
105        switchKVs = false;
106        Tag t = new ArrayBackedTag((byte) 1, "myTag1");
107        Tag[] tags = new Tag[1];
108        tags[0] = t;
109        return new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("qualifier"),
110          HConstants.LATEST_TIMESTAMP, Bytes.toBytes("value"), tags);
111      }
112    }
113  }
114
115  static String toRowStr(Cell c) {
116    return Bytes.toString(c.getRowArray(), c.getRowOffset(), c.getRowLength());
117  }
118
119  Path makeNewFile(TagUsage tagUsage) throws IOException {
120    Path ncTFile = new Path(TEST_UTIL.getDataTestDir(), "basic.hfile");
121    FSDataOutputStream fout = TEST_UTIL.getTestFileSystem().create(ncTFile);
122    int blocksize = toKV("a", tagUsage).getLength() * 3;
123    HFileContext context = new HFileContextBuilder().withBlockSize(blocksize)
124      .withDataBlockEncoding(encoding).withIncludesTags(true).build();
125    Configuration conf = TEST_UTIL.getConfiguration();
126    HFile.Writer writer =
127      HFile.getWriterFactoryNoCache(conf).withOutputStream(fout).withFileContext(context).create();
128    // 4 bytes * 3 * 2 for each key/value +
129    // 3 for keys, 15 for values = 42 (woot)
130    writer.append(toKV("c", tagUsage));
131    writer.append(toKV("e", tagUsage));
132    writer.append(toKV("g", tagUsage));
133    // block transition
134    writer.append(toKV("i", tagUsage));
135    writer.append(toKV("k", tagUsage));
136    writer.close();
137    fout.close();
138    return ncTFile;
139  }
140
141  @Test
142  public void testSeekBefore() throws Exception {
143    testSeekBeforeInternals(TagUsage.NO_TAG);
144    testSeekBeforeInternals(TagUsage.ONLY_TAG);
145    testSeekBeforeInternals(TagUsage.PARTIAL_TAG);
146  }
147
148  protected void testSeekBeforeInternals(TagUsage tagUsage) throws IOException {
149    Path p = makeNewFile(tagUsage);
150    FileSystem fs = TEST_UTIL.getTestFileSystem();
151    Configuration conf = TEST_UTIL.getConfiguration();
152    HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), true, conf);
153    HFileScanner scanner = reader.getScanner(conf, false, true);
154    assertFalse(scanner.seekBefore(toKV("a", tagUsage)));
155
156    assertFalse(scanner.seekBefore(toKV("c", tagUsage)));
157
158    assertTrue(scanner.seekBefore(toKV("d", tagUsage)));
159    assertEquals("c", toRowStr(scanner.getCell()));
160
161    assertTrue(scanner.seekBefore(toKV("e", tagUsage)));
162    assertEquals("c", toRowStr(scanner.getCell()));
163
164    assertTrue(scanner.seekBefore(toKV("f", tagUsage)));
165    assertEquals("e", toRowStr(scanner.getCell()));
166
167    assertTrue(scanner.seekBefore(toKV("g", tagUsage)));
168    assertEquals("e", toRowStr(scanner.getCell()));
169    assertTrue(scanner.seekBefore(toKV("h", tagUsage)));
170    assertEquals("g", toRowStr(scanner.getCell()));
171    assertTrue(scanner.seekBefore(toKV("i", tagUsage)));
172    assertEquals("g", toRowStr(scanner.getCell()));
173    assertTrue(scanner.seekBefore(toKV("j", tagUsage)));
174    assertEquals("i", toRowStr(scanner.getCell()));
175    ExtendedCell cell = scanner.getCell();
176    if (tagUsage != TagUsage.NO_TAG && cell.getTagsLength() > 0) {
177      Iterator<Tag> tagsIterator = PrivateCellUtil.tagsIterator(cell);
178      while (tagsIterator.hasNext()) {
179        Tag next = tagsIterator.next();
180        assertEquals("myTag1", Bytes.toString(Tag.cloneValue(next)));
181      }
182    }
183    assertTrue(scanner.seekBefore(toKV("k", tagUsage)));
184    assertEquals("i", toRowStr(scanner.getCell()));
185    assertTrue(scanner.seekBefore(toKV("l", tagUsage)));
186    assertEquals("k", toRowStr(scanner.getCell()));
187
188    reader.close();
189    deleteTestDir(fs);
190  }
191
192  protected void deleteTestDir(FileSystem fs) throws IOException {
193    Path dataTestDir = TEST_UTIL.getDataTestDir();
194    if (fs.exists(dataTestDir)) {
195      fs.delete(dataTestDir, true);
196    }
197  }
198
199  @Test
200  public void testSeekBeforeWithReSeekTo() throws Exception {
201    testSeekBeforeWithReSeekToInternals(TagUsage.NO_TAG);
202    testSeekBeforeWithReSeekToInternals(TagUsage.ONLY_TAG);
203    testSeekBeforeWithReSeekToInternals(TagUsage.PARTIAL_TAG);
204  }
205
206  protected void testSeekBeforeWithReSeekToInternals(TagUsage tagUsage) throws IOException {
207    Path p = makeNewFile(tagUsage);
208    FileSystem fs = TEST_UTIL.getTestFileSystem();
209    Configuration conf = TEST_UTIL.getConfiguration();
210    HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), true, conf);
211    HFileScanner scanner = reader.getScanner(conf, false, true);
212    assertFalse(scanner.seekBefore(toKV("a", tagUsage)));
213    assertFalse(scanner.seekBefore(toKV("b", tagUsage)));
214    assertFalse(scanner.seekBefore(toKV("c", tagUsage)));
215
216    // seekBefore d, so the scanner points to c
217    assertTrue(scanner.seekBefore(toKV("d", tagUsage)));
218    assertFalse(scanner.getCell() instanceof ByteBufferKeyValue);
219    assertEquals("c", toRowStr(scanner.getCell()));
220    // reseekTo e and g
221    assertEquals(0, scanner.reseekTo(toKV("c", tagUsage)));
222    assertEquals("c", toRowStr(scanner.getCell()));
223    assertEquals(0, scanner.reseekTo(toKV("g", tagUsage)));
224    assertEquals("g", toRowStr(scanner.getCell()));
225
226    // seekBefore e, so the scanner points to c
227    assertTrue(scanner.seekBefore(toKV("e", tagUsage)));
228    assertEquals("c", toRowStr(scanner.getCell()));
229    // reseekTo e and g
230    assertEquals(0, scanner.reseekTo(toKV("e", tagUsage)));
231    assertEquals("e", toRowStr(scanner.getCell()));
232    assertEquals(0, scanner.reseekTo(toKV("g", tagUsage)));
233    assertEquals("g", toRowStr(scanner.getCell()));
234
235    // seekBefore f, so the scanner points to e
236    assertTrue(scanner.seekBefore(toKV("f", tagUsage)));
237    assertEquals("e", toRowStr(scanner.getCell()));
238    // reseekTo e and g
239    assertEquals(0, scanner.reseekTo(toKV("e", tagUsage)));
240    assertEquals("e", toRowStr(scanner.getCell()));
241    assertEquals(0, scanner.reseekTo(toKV("g", tagUsage)));
242    assertEquals("g", toRowStr(scanner.getCell()));
243
244    // seekBefore g, so the scanner points to e
245    assertTrue(scanner.seekBefore(toKV("g", tagUsage)));
246    assertEquals("e", toRowStr(scanner.getCell()));
247    // reseekTo e and g again
248    assertEquals(0, scanner.reseekTo(toKV("e", tagUsage)));
249    assertEquals("e", toRowStr(scanner.getCell()));
250    assertEquals(0, scanner.reseekTo(toKV("g", tagUsage)));
251    assertEquals("g", toRowStr(scanner.getCell()));
252
253    // seekBefore h, so the scanner points to g
254    assertTrue(scanner.seekBefore(toKV("h", tagUsage)));
255    assertEquals("g", toRowStr(scanner.getCell()));
256    // reseekTo g
257    assertEquals(0, scanner.reseekTo(toKV("g", tagUsage)));
258    assertEquals("g", toRowStr(scanner.getCell()));
259
260    // seekBefore i, so the scanner points to g
261    assertTrue(scanner.seekBefore(toKV("i", tagUsage)));
262    assertEquals("g", toRowStr(scanner.getCell()));
263    // reseekTo g
264    assertEquals(0, scanner.reseekTo(toKV("g", tagUsage)));
265    assertEquals("g", toRowStr(scanner.getCell()));
266
267    // seekBefore j, so the scanner points to i
268    assertTrue(scanner.seekBefore(toKV("j", tagUsage)));
269    assertEquals("i", toRowStr(scanner.getCell()));
270    // reseekTo i
271    assertEquals(0, scanner.reseekTo(toKV("i", tagUsage)));
272    assertEquals("i", toRowStr(scanner.getCell()));
273
274    // seekBefore k, so the scanner points to i
275    assertTrue(scanner.seekBefore(toKV("k", tagUsage)));
276    assertEquals("i", toRowStr(scanner.getCell()));
277    // reseekTo i and k
278    assertEquals(0, scanner.reseekTo(toKV("i", tagUsage)));
279    assertEquals("i", toRowStr(scanner.getCell()));
280    assertEquals(0, scanner.reseekTo(toKV("k", tagUsage)));
281    assertEquals("k", toRowStr(scanner.getCell()));
282
283    // seekBefore l, so the scanner points to k
284    assertTrue(scanner.seekBefore(toKV("l", tagUsage)));
285    assertEquals("k", toRowStr(scanner.getCell()));
286    // reseekTo k
287    assertEquals(0, scanner.reseekTo(toKV("k", tagUsage)));
288    assertEquals("k", toRowStr(scanner.getCell()));
289    deleteTestDir(fs);
290  }
291
292  @Test
293  public void testSeekTo() throws Exception {
294    testSeekToInternals(TagUsage.NO_TAG);
295    testSeekToInternals(TagUsage.ONLY_TAG);
296    testSeekToInternals(TagUsage.PARTIAL_TAG);
297  }
298
299  protected void testSeekToInternals(TagUsage tagUsage) throws IOException {
300    Path p = makeNewFile(tagUsage);
301    FileSystem fs = TEST_UTIL.getTestFileSystem();
302    Configuration conf = TEST_UTIL.getConfiguration();
303    HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), true, conf);
304    assertEquals(2, reader.getDataBlockIndexReader().getRootBlockCount());
305    HFileScanner scanner = reader.getScanner(conf, false, true);
306    // lies before the start of the file.
307    assertEquals(-1, scanner.seekTo(toKV("a", tagUsage)));
308
309    assertEquals(1, scanner.seekTo(toKV("d", tagUsage)));
310    assertEquals("c", toRowStr(scanner.getCell()));
311
312    // Across a block boundary now.
313    // 'h' does not exist so we will get a '1' back for not found.
314    assertEquals(0, scanner.seekTo(toKV("i", tagUsage)));
315    assertEquals("i", toRowStr(scanner.getCell()));
316
317    assertEquals(1, scanner.seekTo(toKV("l", tagUsage)));
318    assertEquals("k", toRowStr(scanner.getCell()));
319
320    reader.close();
321    deleteTestDir(fs);
322  }
323
324  @Test
325  public void testBlockContainingKey() throws Exception {
326    testBlockContainingKeyInternals(TagUsage.NO_TAG);
327    testBlockContainingKeyInternals(TagUsage.ONLY_TAG);
328    testBlockContainingKeyInternals(TagUsage.PARTIAL_TAG);
329  }
330
331  protected void testBlockContainingKeyInternals(TagUsage tagUsage) throws IOException {
332    Path p = makeNewFile(tagUsage);
333    FileSystem fs = TEST_UTIL.getTestFileSystem();
334    Configuration conf = TEST_UTIL.getConfiguration();
335    HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), true, conf);
336    HFileBlockIndex.BlockIndexReader blockIndexReader = reader.getDataBlockIndexReader();
337    System.out.println(blockIndexReader.toString());
338    // falls before the start of the file.
339    assertEquals(-1, blockIndexReader.rootBlockContainingKey(toKV("a", tagUsage)));
340    assertEquals(0, blockIndexReader.rootBlockContainingKey(toKV("c", tagUsage)));
341    assertEquals(0, blockIndexReader.rootBlockContainingKey(toKV("d", tagUsage)));
342    assertEquals(0, blockIndexReader.rootBlockContainingKey(toKV("e", tagUsage)));
343    assertEquals(0, blockIndexReader.rootBlockContainingKey(toKV("g", tagUsage)));
344    assertEquals(1, blockIndexReader.rootBlockContainingKey(toKV("h", tagUsage)));
345    assertEquals(1, blockIndexReader.rootBlockContainingKey(toKV("i", tagUsage)));
346    assertEquals(1, blockIndexReader.rootBlockContainingKey(toKV("j", tagUsage)));
347    assertEquals(1, blockIndexReader.rootBlockContainingKey(toKV("k", tagUsage)));
348    assertEquals(1, blockIndexReader.rootBlockContainingKey(toKV("l", tagUsage)));
349    reader.close();
350    deleteTestDir(fs);
351  }
352}