001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.io.hfile; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertFalse; 022import static org.junit.Assert.assertTrue; 023 024import java.io.IOException; 025import java.util.ArrayList; 026import java.util.Collection; 027import java.util.Iterator; 028import java.util.List; 029import org.apache.hadoop.conf.Configuration; 030import org.apache.hadoop.fs.FSDataOutputStream; 031import org.apache.hadoop.fs.FileSystem; 032import org.apache.hadoop.fs.Path; 033import org.apache.hadoop.hbase.ArrayBackedTag; 034import org.apache.hadoop.hbase.ByteBufferKeyValue; 035import org.apache.hadoop.hbase.Cell; 036import org.apache.hadoop.hbase.ExtendedCell; 037import org.apache.hadoop.hbase.HBaseClassTestRule; 038import org.apache.hadoop.hbase.HBaseTestingUtil; 039import org.apache.hadoop.hbase.HConstants; 040import org.apache.hadoop.hbase.KeyValue; 041import org.apache.hadoop.hbase.PrivateCellUtil; 042import org.apache.hadoop.hbase.Tag; 043import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; 044import org.apache.hadoop.hbase.testclassification.IOTests; 045import org.apache.hadoop.hbase.testclassification.SmallTests; 046import org.apache.hadoop.hbase.util.Bytes; 047import org.junit.Before; 048import org.junit.ClassRule; 049import org.junit.Test; 050import org.junit.experimental.categories.Category; 051import org.junit.runner.RunWith; 052import org.junit.runners.Parameterized; 053import org.junit.runners.Parameterized.Parameters; 054 055/** 056 * Test {@link HFileScanner#seekTo(Cell)} and its variants. 057 */ 058@Category({ IOTests.class, SmallTests.class }) 059@RunWith(Parameterized.class) 060public class TestSeekTo { 061 062 @ClassRule 063 public static final HBaseClassTestRule CLASS_RULE = HBaseClassTestRule.forClass(TestSeekTo.class); 064 065 private final static HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); 066 private final DataBlockEncoding encoding; 067 068 @Parameters 069 public static Collection<Object[]> parameters() { 070 List<Object[]> paramList = new ArrayList<>(); 071 for (DataBlockEncoding encoding : DataBlockEncoding.values()) { 072 paramList.add(new Object[] { encoding }); 073 } 074 return paramList; 075 } 076 077 static boolean switchKVs = false; 078 079 public TestSeekTo(DataBlockEncoding encoding) { 080 this.encoding = encoding; 081 } 082 083 @Before 084 public void setUp() { 085 // reset 086 switchKVs = false; 087 } 088 089 static KeyValue toKV(String row, TagUsage tagUsage) { 090 if (tagUsage == TagUsage.NO_TAG) { 091 return new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("qualifier"), 092 Bytes.toBytes("value")); 093 } else if (tagUsage == TagUsage.ONLY_TAG) { 094 Tag t = new ArrayBackedTag((byte) 1, "myTag1"); 095 Tag[] tags = new Tag[1]; 096 tags[0] = t; 097 return new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("qualifier"), 098 HConstants.LATEST_TIMESTAMP, Bytes.toBytes("value"), tags); 099 } else { 100 if (!switchKVs) { 101 switchKVs = true; 102 return new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("qualifier"), 103 HConstants.LATEST_TIMESTAMP, Bytes.toBytes("value")); 104 } else { 105 switchKVs = false; 106 Tag t = new ArrayBackedTag((byte) 1, "myTag1"); 107 Tag[] tags = new Tag[1]; 108 tags[0] = t; 109 return new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("qualifier"), 110 HConstants.LATEST_TIMESTAMP, Bytes.toBytes("value"), tags); 111 } 112 } 113 } 114 115 static String toRowStr(Cell c) { 116 return Bytes.toString(c.getRowArray(), c.getRowOffset(), c.getRowLength()); 117 } 118 119 Path makeNewFile(TagUsage tagUsage) throws IOException { 120 Path ncTFile = new Path(TEST_UTIL.getDataTestDir(), "basic.hfile"); 121 FSDataOutputStream fout = TEST_UTIL.getTestFileSystem().create(ncTFile); 122 int blocksize = toKV("a", tagUsage).getLength() * 3; 123 HFileContext context = new HFileContextBuilder().withBlockSize(blocksize) 124 .withDataBlockEncoding(encoding).withIncludesTags(true).build(); 125 Configuration conf = TEST_UTIL.getConfiguration(); 126 HFile.Writer writer = 127 HFile.getWriterFactoryNoCache(conf).withOutputStream(fout).withFileContext(context).create(); 128 // 4 bytes * 3 * 2 for each key/value + 129 // 3 for keys, 15 for values = 42 (woot) 130 writer.append(toKV("c", tagUsage)); 131 writer.append(toKV("e", tagUsage)); 132 writer.append(toKV("g", tagUsage)); 133 // block transition 134 writer.append(toKV("i", tagUsage)); 135 writer.append(toKV("k", tagUsage)); 136 writer.close(); 137 fout.close(); 138 return ncTFile; 139 } 140 141 @Test 142 public void testSeekBefore() throws Exception { 143 testSeekBeforeInternals(TagUsage.NO_TAG); 144 testSeekBeforeInternals(TagUsage.ONLY_TAG); 145 testSeekBeforeInternals(TagUsage.PARTIAL_TAG); 146 } 147 148 protected void testSeekBeforeInternals(TagUsage tagUsage) throws IOException { 149 Path p = makeNewFile(tagUsage); 150 FileSystem fs = TEST_UTIL.getTestFileSystem(); 151 Configuration conf = TEST_UTIL.getConfiguration(); 152 HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), true, conf); 153 HFileScanner scanner = reader.getScanner(conf, false, true); 154 assertFalse(scanner.seekBefore(toKV("a", tagUsage))); 155 156 assertFalse(scanner.seekBefore(toKV("c", tagUsage))); 157 158 assertTrue(scanner.seekBefore(toKV("d", tagUsage))); 159 assertEquals("c", toRowStr(scanner.getCell())); 160 161 assertTrue(scanner.seekBefore(toKV("e", tagUsage))); 162 assertEquals("c", toRowStr(scanner.getCell())); 163 164 assertTrue(scanner.seekBefore(toKV("f", tagUsage))); 165 assertEquals("e", toRowStr(scanner.getCell())); 166 167 assertTrue(scanner.seekBefore(toKV("g", tagUsage))); 168 assertEquals("e", toRowStr(scanner.getCell())); 169 assertTrue(scanner.seekBefore(toKV("h", tagUsage))); 170 assertEquals("g", toRowStr(scanner.getCell())); 171 assertTrue(scanner.seekBefore(toKV("i", tagUsage))); 172 assertEquals("g", toRowStr(scanner.getCell())); 173 assertTrue(scanner.seekBefore(toKV("j", tagUsage))); 174 assertEquals("i", toRowStr(scanner.getCell())); 175 ExtendedCell cell = scanner.getCell(); 176 if (tagUsage != TagUsage.NO_TAG && cell.getTagsLength() > 0) { 177 Iterator<Tag> tagsIterator = PrivateCellUtil.tagsIterator(cell); 178 while (tagsIterator.hasNext()) { 179 Tag next = tagsIterator.next(); 180 assertEquals("myTag1", Bytes.toString(Tag.cloneValue(next))); 181 } 182 } 183 assertTrue(scanner.seekBefore(toKV("k", tagUsage))); 184 assertEquals("i", toRowStr(scanner.getCell())); 185 assertTrue(scanner.seekBefore(toKV("l", tagUsage))); 186 assertEquals("k", toRowStr(scanner.getCell())); 187 188 reader.close(); 189 deleteTestDir(fs); 190 } 191 192 protected void deleteTestDir(FileSystem fs) throws IOException { 193 Path dataTestDir = TEST_UTIL.getDataTestDir(); 194 if (fs.exists(dataTestDir)) { 195 fs.delete(dataTestDir, true); 196 } 197 } 198 199 @Test 200 public void testSeekBeforeWithReSeekTo() throws Exception { 201 testSeekBeforeWithReSeekToInternals(TagUsage.NO_TAG); 202 testSeekBeforeWithReSeekToInternals(TagUsage.ONLY_TAG); 203 testSeekBeforeWithReSeekToInternals(TagUsage.PARTIAL_TAG); 204 } 205 206 protected void testSeekBeforeWithReSeekToInternals(TagUsage tagUsage) throws IOException { 207 Path p = makeNewFile(tagUsage); 208 FileSystem fs = TEST_UTIL.getTestFileSystem(); 209 Configuration conf = TEST_UTIL.getConfiguration(); 210 HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), true, conf); 211 HFileScanner scanner = reader.getScanner(conf, false, true); 212 assertFalse(scanner.seekBefore(toKV("a", tagUsage))); 213 assertFalse(scanner.seekBefore(toKV("b", tagUsage))); 214 assertFalse(scanner.seekBefore(toKV("c", tagUsage))); 215 216 // seekBefore d, so the scanner points to c 217 assertTrue(scanner.seekBefore(toKV("d", tagUsage))); 218 assertFalse(scanner.getCell() instanceof ByteBufferKeyValue); 219 assertEquals("c", toRowStr(scanner.getCell())); 220 // reseekTo e and g 221 assertEquals(0, scanner.reseekTo(toKV("c", tagUsage))); 222 assertEquals("c", toRowStr(scanner.getCell())); 223 assertEquals(0, scanner.reseekTo(toKV("g", tagUsage))); 224 assertEquals("g", toRowStr(scanner.getCell())); 225 226 // seekBefore e, so the scanner points to c 227 assertTrue(scanner.seekBefore(toKV("e", tagUsage))); 228 assertEquals("c", toRowStr(scanner.getCell())); 229 // reseekTo e and g 230 assertEquals(0, scanner.reseekTo(toKV("e", tagUsage))); 231 assertEquals("e", toRowStr(scanner.getCell())); 232 assertEquals(0, scanner.reseekTo(toKV("g", tagUsage))); 233 assertEquals("g", toRowStr(scanner.getCell())); 234 235 // seekBefore f, so the scanner points to e 236 assertTrue(scanner.seekBefore(toKV("f", tagUsage))); 237 assertEquals("e", toRowStr(scanner.getCell())); 238 // reseekTo e and g 239 assertEquals(0, scanner.reseekTo(toKV("e", tagUsage))); 240 assertEquals("e", toRowStr(scanner.getCell())); 241 assertEquals(0, scanner.reseekTo(toKV("g", tagUsage))); 242 assertEquals("g", toRowStr(scanner.getCell())); 243 244 // seekBefore g, so the scanner points to e 245 assertTrue(scanner.seekBefore(toKV("g", tagUsage))); 246 assertEquals("e", toRowStr(scanner.getCell())); 247 // reseekTo e and g again 248 assertEquals(0, scanner.reseekTo(toKV("e", tagUsage))); 249 assertEquals("e", toRowStr(scanner.getCell())); 250 assertEquals(0, scanner.reseekTo(toKV("g", tagUsage))); 251 assertEquals("g", toRowStr(scanner.getCell())); 252 253 // seekBefore h, so the scanner points to g 254 assertTrue(scanner.seekBefore(toKV("h", tagUsage))); 255 assertEquals("g", toRowStr(scanner.getCell())); 256 // reseekTo g 257 assertEquals(0, scanner.reseekTo(toKV("g", tagUsage))); 258 assertEquals("g", toRowStr(scanner.getCell())); 259 260 // seekBefore i, so the scanner points to g 261 assertTrue(scanner.seekBefore(toKV("i", tagUsage))); 262 assertEquals("g", toRowStr(scanner.getCell())); 263 // reseekTo g 264 assertEquals(0, scanner.reseekTo(toKV("g", tagUsage))); 265 assertEquals("g", toRowStr(scanner.getCell())); 266 267 // seekBefore j, so the scanner points to i 268 assertTrue(scanner.seekBefore(toKV("j", tagUsage))); 269 assertEquals("i", toRowStr(scanner.getCell())); 270 // reseekTo i 271 assertEquals(0, scanner.reseekTo(toKV("i", tagUsage))); 272 assertEquals("i", toRowStr(scanner.getCell())); 273 274 // seekBefore k, so the scanner points to i 275 assertTrue(scanner.seekBefore(toKV("k", tagUsage))); 276 assertEquals("i", toRowStr(scanner.getCell())); 277 // reseekTo i and k 278 assertEquals(0, scanner.reseekTo(toKV("i", tagUsage))); 279 assertEquals("i", toRowStr(scanner.getCell())); 280 assertEquals(0, scanner.reseekTo(toKV("k", tagUsage))); 281 assertEquals("k", toRowStr(scanner.getCell())); 282 283 // seekBefore l, so the scanner points to k 284 assertTrue(scanner.seekBefore(toKV("l", tagUsage))); 285 assertEquals("k", toRowStr(scanner.getCell())); 286 // reseekTo k 287 assertEquals(0, scanner.reseekTo(toKV("k", tagUsage))); 288 assertEquals("k", toRowStr(scanner.getCell())); 289 deleteTestDir(fs); 290 } 291 292 @Test 293 public void testSeekTo() throws Exception { 294 testSeekToInternals(TagUsage.NO_TAG); 295 testSeekToInternals(TagUsage.ONLY_TAG); 296 testSeekToInternals(TagUsage.PARTIAL_TAG); 297 } 298 299 protected void testSeekToInternals(TagUsage tagUsage) throws IOException { 300 Path p = makeNewFile(tagUsage); 301 FileSystem fs = TEST_UTIL.getTestFileSystem(); 302 Configuration conf = TEST_UTIL.getConfiguration(); 303 HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), true, conf); 304 assertEquals(2, reader.getDataBlockIndexReader().getRootBlockCount()); 305 HFileScanner scanner = reader.getScanner(conf, false, true); 306 // lies before the start of the file. 307 assertEquals(-1, scanner.seekTo(toKV("a", tagUsage))); 308 309 assertEquals(1, scanner.seekTo(toKV("d", tagUsage))); 310 assertEquals("c", toRowStr(scanner.getCell())); 311 312 // Across a block boundary now. 313 // 'h' does not exist so we will get a '1' back for not found. 314 assertEquals(0, scanner.seekTo(toKV("i", tagUsage))); 315 assertEquals("i", toRowStr(scanner.getCell())); 316 317 assertEquals(1, scanner.seekTo(toKV("l", tagUsage))); 318 assertEquals("k", toRowStr(scanner.getCell())); 319 320 reader.close(); 321 deleteTestDir(fs); 322 } 323 324 @Test 325 public void testBlockContainingKey() throws Exception { 326 testBlockContainingKeyInternals(TagUsage.NO_TAG); 327 testBlockContainingKeyInternals(TagUsage.ONLY_TAG); 328 testBlockContainingKeyInternals(TagUsage.PARTIAL_TAG); 329 } 330 331 protected void testBlockContainingKeyInternals(TagUsage tagUsage) throws IOException { 332 Path p = makeNewFile(tagUsage); 333 FileSystem fs = TEST_UTIL.getTestFileSystem(); 334 Configuration conf = TEST_UTIL.getConfiguration(); 335 HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), true, conf); 336 HFileBlockIndex.BlockIndexReader blockIndexReader = reader.getDataBlockIndexReader(); 337 System.out.println(blockIndexReader.toString()); 338 // falls before the start of the file. 339 assertEquals(-1, blockIndexReader.rootBlockContainingKey(toKV("a", tagUsage))); 340 assertEquals(0, blockIndexReader.rootBlockContainingKey(toKV("c", tagUsage))); 341 assertEquals(0, blockIndexReader.rootBlockContainingKey(toKV("d", tagUsage))); 342 assertEquals(0, blockIndexReader.rootBlockContainingKey(toKV("e", tagUsage))); 343 assertEquals(0, blockIndexReader.rootBlockContainingKey(toKV("g", tagUsage))); 344 assertEquals(1, blockIndexReader.rootBlockContainingKey(toKV("h", tagUsage))); 345 assertEquals(1, blockIndexReader.rootBlockContainingKey(toKV("i", tagUsage))); 346 assertEquals(1, blockIndexReader.rootBlockContainingKey(toKV("j", tagUsage))); 347 assertEquals(1, blockIndexReader.rootBlockContainingKey(toKV("k", tagUsage))); 348 assertEquals(1, blockIndexReader.rootBlockContainingKey(toKV("l", tagUsage))); 349 reader.close(); 350 deleteTestDir(fs); 351 } 352}