001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.io; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertNull; 022import static org.junit.Assert.assertTrue; 023 024import java.io.IOException; 025import java.util.ArrayList; 026import java.util.List; 027import org.apache.hadoop.conf.Configuration; 028import org.apache.hadoop.fs.FileSystem; 029import org.apache.hadoop.fs.Path; 030import org.apache.hadoop.hbase.Cell; 031import org.apache.hadoop.hbase.CellComparatorImpl; 032import org.apache.hadoop.hbase.CellUtil; 033import org.apache.hadoop.hbase.HBaseClassTestRule; 034import org.apache.hadoop.hbase.HBaseTestingUtility; 035import org.apache.hadoop.hbase.KeyValue; 036import org.apache.hadoop.hbase.KeyValueUtil; 037import org.apache.hadoop.hbase.io.hfile.CacheConfig; 038import org.apache.hadoop.hbase.io.hfile.HFile; 039import org.apache.hadoop.hbase.io.hfile.HFileContext; 040import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder; 041import org.apache.hadoop.hbase.io.hfile.HFileScanner; 042import org.apache.hadoop.hbase.io.hfile.ReaderContext; 043import org.apache.hadoop.hbase.io.hfile.ReaderContextBuilder; 044import org.apache.hadoop.hbase.regionserver.StoreFileInfo; 045import org.apache.hadoop.hbase.testclassification.IOTests; 046import org.apache.hadoop.hbase.testclassification.SmallTests; 047import org.apache.hadoop.hbase.util.Bytes; 048import org.junit.AfterClass; 049import org.junit.BeforeClass; 050import org.junit.ClassRule; 051import org.junit.Test; 052import org.junit.experimental.categories.Category; 053 054@Category({ IOTests.class, SmallTests.class }) 055public class TestHalfStoreFileReader { 056 057 @ClassRule 058 public static final HBaseClassTestRule CLASS_RULE = 059 HBaseClassTestRule.forClass(TestHalfStoreFileReader.class); 060 061 private static HBaseTestingUtility TEST_UTIL; 062 063 @BeforeClass 064 public static void setupBeforeClass() throws Exception { 065 TEST_UTIL = new HBaseTestingUtility(); 066 } 067 068 @AfterClass 069 public static void tearDownAfterClass() throws Exception { 070 TEST_UTIL.cleanupTestDir(); 071 } 072 073 /** 074 * Test the scanner and reseek of a half hfile scanner. The scanner API demands that seekTo and 075 * reseekTo() only return < 0 if the key lies before the start of the file (with no position on 076 * the scanner). Returning 0 if perfect match (rare), and return > 1 if we got an imperfect match. 077 * The latter case being the most common, we should generally be returning 1, and if we do, there 078 * may or may not be a 'next' in the scanner/file. A bug in the half file scanner was returning -1 079 * at the end of the bottom half, and that was causing the infrastructure above to go null causing 080 * NPEs and other problems. This test reproduces that failure, and also tests both the bottom and 081 * top of the file while we are at it. 082 */ 083 @Test 084 public void testHalfScanAndReseek() throws IOException { 085 String root_dir = TEST_UTIL.getDataTestDir().toString(); 086 Path p = new Path(root_dir, "test"); 087 088 Configuration conf = TEST_UTIL.getConfiguration(); 089 FileSystem fs = FileSystem.get(conf); 090 CacheConfig cacheConf = new CacheConfig(conf); 091 HFileContext meta = new HFileContextBuilder().withBlockSize(1024).build(); 092 HFile.Writer w = 093 HFile.getWriterFactory(conf, cacheConf).withPath(fs, p).withFileContext(meta).create(); 094 095 // write some things. 096 List<KeyValue> items = genSomeKeys(); 097 for (KeyValue kv : items) { 098 w.append(kv); 099 } 100 w.close(); 101 102 HFile.Reader r = HFile.createReader(fs, p, cacheConf, true, conf); 103 Cell midKV = r.midKey().get(); 104 byte[] midkey = CellUtil.cloneRow(midKV); 105 106 // System.out.println("midkey: " + midKV + " or: " + Bytes.toStringBinary(midkey)); 107 108 Reference bottom = new Reference(midkey, Reference.Range.bottom); 109 doTestOfScanAndReseek(p, fs, bottom, cacheConf); 110 111 Reference top = new Reference(midkey, Reference.Range.top); 112 doTestOfScanAndReseek(p, fs, top, cacheConf); 113 114 r.close(); 115 } 116 117 private void doTestOfScanAndReseek(Path p, FileSystem fs, Reference bottom, CacheConfig cacheConf) 118 throws IOException { 119 ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, p).build(); 120 StoreFileInfo storeFileInfo = 121 new StoreFileInfo(TEST_UTIL.getConfiguration(), fs, fs.getFileStatus(p), bottom); 122 storeFileInfo.initHFileInfo(context); 123 final HalfStoreFileReader halfreader = 124 (HalfStoreFileReader) storeFileInfo.createReader(context, cacheConf); 125 storeFileInfo.getHFileInfo().initMetaAndIndex(halfreader.getHFileReader()); 126 halfreader.loadFileInfo(); 127 final HFileScanner scanner = halfreader.getScanner(false, false); 128 129 scanner.seekTo(); 130 Cell curr; 131 do { 132 curr = scanner.getCell(); 133 KeyValue reseekKv = getLastOnCol(curr); 134 int ret = scanner.reseekTo(reseekKv); 135 assertTrue("reseek to returned: " + ret, ret > 0); 136 // System.out.println(curr + ": " + ret); 137 } while (scanner.next()); 138 139 int ret = scanner.reseekTo(getLastOnCol(curr)); 140 // System.out.println("Last reseek: " + ret); 141 assertTrue(ret > 0); 142 143 halfreader.close(true); 144 } 145 146 // Tests the scanner on an HFile that is backed by HalfStoreFiles 147 @Test 148 public void testHalfScanner() throws IOException { 149 String root_dir = TEST_UTIL.getDataTestDir().toString(); 150 Path p = new Path(root_dir, "test"); 151 Configuration conf = TEST_UTIL.getConfiguration(); 152 FileSystem fs = FileSystem.get(conf); 153 CacheConfig cacheConf = new CacheConfig(conf); 154 HFileContext meta = new HFileContextBuilder().withBlockSize(1024).build(); 155 HFile.Writer w = 156 HFile.getWriterFactory(conf, cacheConf).withPath(fs, p).withFileContext(meta).create(); 157 158 // write some things. 159 List<KeyValue> items = genSomeKeys(); 160 for (KeyValue kv : items) { 161 w.append(kv); 162 } 163 w.close(); 164 165 HFile.Reader r = HFile.createReader(fs, p, cacheConf, true, conf); 166 Cell midKV = r.midKey().get(); 167 byte[] midkey = CellUtil.cloneRow(midKV); 168 169 Reference bottom = new Reference(midkey, Reference.Range.bottom); 170 Reference top = new Reference(midkey, Reference.Range.top); 171 172 // Ugly code to get the item before the midkey 173 KeyValue beforeMidKey = null; 174 for (KeyValue item : items) { 175 if (CellComparatorImpl.COMPARATOR.compare(item, midKV) >= 0) { 176 break; 177 } 178 beforeMidKey = item; 179 } 180 System.out.println("midkey: " + midKV + " or: " + Bytes.toStringBinary(midkey)); 181 System.out.println("beforeMidKey: " + beforeMidKey); 182 183 // Seek on the splitKey, should be in top, not in bottom 184 Cell foundKeyValue = doTestOfSeekBefore(p, fs, bottom, midKV, cacheConf); 185 assertEquals(beforeMidKey, foundKeyValue); 186 187 // Seek tot the last thing should be the penultimate on the top, the one before the midkey on 188 // the bottom. 189 foundKeyValue = doTestOfSeekBefore(p, fs, top, items.get(items.size() - 1), cacheConf); 190 assertEquals(items.get(items.size() - 2), foundKeyValue); 191 192 foundKeyValue = doTestOfSeekBefore(p, fs, bottom, items.get(items.size() - 1), cacheConf); 193 assertEquals(beforeMidKey, foundKeyValue); 194 195 // Try and seek before something that is in the bottom. 196 foundKeyValue = doTestOfSeekBefore(p, fs, top, items.get(0), cacheConf); 197 assertNull(foundKeyValue); 198 199 // Try and seek before the first thing. 200 foundKeyValue = doTestOfSeekBefore(p, fs, bottom, items.get(0), cacheConf); 201 assertNull(foundKeyValue); 202 203 // Try and seek before the second thing in the top and bottom. 204 foundKeyValue = doTestOfSeekBefore(p, fs, top, items.get(1), cacheConf); 205 assertNull(foundKeyValue); 206 207 foundKeyValue = doTestOfSeekBefore(p, fs, bottom, items.get(1), cacheConf); 208 assertEquals(items.get(0), foundKeyValue); 209 210 // Try to seek before the splitKey in the top file 211 foundKeyValue = doTestOfSeekBefore(p, fs, top, midKV, cacheConf); 212 assertNull(foundKeyValue); 213 } 214 215 private Cell doTestOfSeekBefore(Path p, FileSystem fs, Reference bottom, Cell seekBefore, 216 CacheConfig cacheConfig) throws IOException { 217 ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, p).build(); 218 StoreFileInfo storeFileInfo = 219 new StoreFileInfo(TEST_UTIL.getConfiguration(), fs, fs.getFileStatus(p), bottom); 220 storeFileInfo.initHFileInfo(context); 221 final HalfStoreFileReader halfreader = 222 (HalfStoreFileReader) storeFileInfo.createReader(context, cacheConfig); 223 storeFileInfo.getHFileInfo().initMetaAndIndex(halfreader.getHFileReader()); 224 halfreader.loadFileInfo(); 225 final HFileScanner scanner = halfreader.getScanner(false, false); 226 scanner.seekBefore(seekBefore); 227 return scanner.getCell(); 228 } 229 230 private KeyValue getLastOnCol(Cell curr) { 231 return KeyValueUtil.createLastOnRow(curr.getRowArray(), curr.getRowOffset(), 232 curr.getRowLength(), curr.getFamilyArray(), curr.getFamilyOffset(), curr.getFamilyLength(), 233 curr.getQualifierArray(), curr.getQualifierOffset(), curr.getQualifierLength()); 234 } 235 236 static final int SIZE = 1000; 237 238 static byte[] _b(String s) { 239 return Bytes.toBytes(s); 240 } 241 242 List<KeyValue> genSomeKeys() { 243 List<KeyValue> ret = new ArrayList<>(SIZE); 244 for (int i = 0; i < SIZE; i++) { 245 KeyValue kv = 246 new KeyValue(_b(String.format("row_%04d", i)), _b("family"), _b("qualifier"), 1000, // timestamp 247 _b("value")); 248 ret.add(kv); 249 } 250 return ret; 251 } 252}