001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.mapreduce; 019 020import static org.junit.Assert.assertEquals; 021 022import java.util.HashMap; 023import java.util.Map; 024import org.apache.hadoop.fs.FileStatus; 025import org.apache.hadoop.fs.FileSystem; 026import org.apache.hadoop.fs.Path; 027import org.apache.hadoop.hbase.HBaseClassTestRule; 028import org.apache.hadoop.hbase.HBaseTestingUtil; 029import org.apache.hadoop.hbase.TableName; 030import org.apache.hadoop.hbase.client.Put; 031import org.apache.hadoop.hbase.client.Table; 032import org.apache.hadoop.hbase.io.ImmutableBytesWritable; 033import org.apache.hadoop.hbase.testclassification.LargeTests; 034import org.apache.hadoop.hbase.util.Bytes; 035import org.apache.hadoop.io.MapFile; 036import org.junit.AfterClass; 037import org.junit.Assert; 038import org.junit.BeforeClass; 039import org.junit.ClassRule; 040import org.junit.Rule; 041import org.junit.Test; 042import org.junit.experimental.categories.Category; 043import org.junit.rules.TestName; 044import org.slf4j.Logger; 045import org.slf4j.LoggerFactory; 046 047import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableMap; 048import org.apache.hbase.thirdparty.com.google.common.collect.Maps; 049 050/** 051 * Basic test for the HashTable M/R tool 052 */ 053@Category(LargeTests.class) 054public class TestHashTable { 055 @ClassRule 056 public static final HBaseClassTestRule CLASS_RULE = 057 HBaseClassTestRule.forClass(TestHashTable.class); 058 059 private static final Logger LOG = LoggerFactory.getLogger(TestHashTable.class); 060 061 private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); 062 063 @Rule 064 public TestName name = new TestName(); 065 066 @BeforeClass 067 public static void beforeClass() throws Exception { 068 TEST_UTIL.startMiniCluster(3); 069 } 070 071 @AfterClass 072 public static void afterClass() throws Exception { 073 TEST_UTIL.shutdownMiniCluster(); 074 } 075 076 @Test 077 public void testHashTable() throws Exception { 078 final TableName tableName = TableName.valueOf(name.getMethodName()); 079 final byte[] family = Bytes.toBytes("family"); 080 final byte[] column1 = Bytes.toBytes("c1"); 081 final byte[] column2 = Bytes.toBytes("c2"); 082 final byte[] column3 = Bytes.toBytes("c3"); 083 084 int numRows = 100; 085 int numRegions = 10; 086 int numHashFiles = 3; 087 088 byte[][] splitRows = new byte[numRegions - 1][]; 089 for (int i = 1; i < numRegions; i++) { 090 splitRows[i - 1] = Bytes.toBytes(numRows * i / numRegions); 091 } 092 093 long timestamp = 1430764183454L; 094 // put rows into the first table 095 Table t1 = TEST_UTIL.createTable(tableName, family, splitRows); 096 for (int i = 0; i < numRows; i++) { 097 Put p = new Put(Bytes.toBytes(i), timestamp); 098 p.addColumn(family, column1, column1); 099 p.addColumn(family, column2, column2); 100 p.addColumn(family, column3, column3); 101 t1.put(p); 102 } 103 t1.close(); 104 105 HashTable hashTable = new HashTable(TEST_UTIL.getConfiguration()); 106 107 Path testDir = TEST_UTIL.getDataTestDirOnTestFS(tableName.getNameAsString()); 108 109 long batchSize = 300; 110 int code = 111 hashTable.run(new String[] { "--batchsize=" + batchSize, "--numhashfiles=" + numHashFiles, 112 "--scanbatch=2", tableName.getNameAsString(), testDir.toString() }); 113 assertEquals("test job failed", 0, code); 114 115 FileSystem fs = TEST_UTIL.getTestFileSystem(); 116 117 HashTable.TableHash tableHash = HashTable.TableHash.read(fs.getConf(), testDir); 118 assertEquals(tableName.getNameAsString(), tableHash.tableName); 119 assertEquals(batchSize, tableHash.batchSize); 120 assertEquals(numHashFiles, tableHash.numHashFiles); 121 assertEquals(numHashFiles - 1, tableHash.partitions.size()); 122 for (ImmutableBytesWritable bytes : tableHash.partitions) { 123 LOG.debug("partition: " + Bytes.toInt(bytes.get())); 124 } 125 126 ImmutableMap<Integer, ImmutableBytesWritable> expectedHashes = 127 ImmutableMap.<Integer, ImmutableBytesWritable> builder() 128 .put(-1, new ImmutableBytesWritable(Bytes.fromHex("714cb10a9e3b5569852980edd8c6ca2f"))) 129 .put(5, new ImmutableBytesWritable(Bytes.fromHex("28d961d9252ce8f8d44a07b38d3e1d96"))) 130 .put(10, new ImmutableBytesWritable(Bytes.fromHex("f6bbc4a224d8fd929b783a92599eaffa"))) 131 .put(15, new ImmutableBytesWritable(Bytes.fromHex("522deb5d97f73a414ecc11457be46881"))) 132 .put(20, new ImmutableBytesWritable(Bytes.fromHex("b026f2611aaa46f7110116d807545352"))) 133 .put(25, new ImmutableBytesWritable(Bytes.fromHex("39ffc1a3094aa12a2e90ffd9cef2ce93"))) 134 .put(30, new ImmutableBytesWritable(Bytes.fromHex("f6b4d75727ce9a30ac29e4f08f601666"))) 135 .put(35, new ImmutableBytesWritable(Bytes.fromHex("422e2d2f1eb79a8f02171a705a42c090"))) 136 .put(40, new ImmutableBytesWritable(Bytes.fromHex("559ad61c900fffefea0a15abf8a97bc3"))) 137 .put(45, new ImmutableBytesWritable(Bytes.fromHex("23019084513eca41cee436b2a29611cb"))) 138 .put(50, new ImmutableBytesWritable(Bytes.fromHex("b40467d222ddb4949b142fe145ee9edc"))) 139 .put(55, new ImmutableBytesWritable(Bytes.fromHex("372bf89fcd8ca4b7ab3c1add9d07f7e4"))) 140 .put(60, new ImmutableBytesWritable(Bytes.fromHex("69ae0585e6255de27dce974e332b8f8b"))) 141 .put(65, new ImmutableBytesWritable(Bytes.fromHex("8029610044297aad0abdbecd485d8e59"))) 142 .put(70, new ImmutableBytesWritable(Bytes.fromHex("de5f784f7f78987b6e57ecfd81c8646f"))) 143 .put(75, new ImmutableBytesWritable(Bytes.fromHex("1cd757cc4e1715c8c3b1c24447a1ec56"))) 144 .put(80, new ImmutableBytesWritable(Bytes.fromHex("f9a53aacfeb6142b08066615e7038095"))) 145 .put(85, new ImmutableBytesWritable(Bytes.fromHex("89b872b7e639df32d3276b33928c0c91"))) 146 .put(90, new ImmutableBytesWritable(Bytes.fromHex("45eeac0646d46a474ea0484175faed38"))) 147 .put(95, new ImmutableBytesWritable(Bytes.fromHex("f57c447e32a08f4bf1abb2892839ac56"))) 148 .build(); 149 150 Map<Integer, ImmutableBytesWritable> actualHashes = new HashMap<>(); 151 Path dataDir = new Path(testDir, HashTable.HASH_DATA_DIR); 152 for (int i = 0; i < numHashFiles; i++) { 153 Path hashPath = new Path(dataDir, HashTable.TableHash.getDataFileName(i)); 154 155 MapFile.Reader reader = new MapFile.Reader(hashPath, fs.getConf()); 156 ImmutableBytesWritable key = new ImmutableBytesWritable(); 157 ImmutableBytesWritable hash = new ImmutableBytesWritable(); 158 while (reader.next(key, hash)) { 159 String keyString = Bytes.toHex(key.get(), key.getOffset(), key.getLength()); 160 LOG.debug("Key: " + (keyString.isEmpty() ? "-1" : Integer.parseInt(keyString, 16)) 161 + " Hash: " + Bytes.toHex(hash.get(), hash.getOffset(), hash.getLength())); 162 163 int intKey = -1; 164 if (key.getLength() > 0) { 165 intKey = Bytes.toInt(key.get(), key.getOffset(), key.getLength()); 166 } 167 if (actualHashes.containsKey(intKey)) { 168 Assert.fail("duplicate key in data files: " + intKey); 169 } 170 actualHashes.put(intKey, new ImmutableBytesWritable(hash.copyBytes())); 171 } 172 reader.close(); 173 } 174 175 FileStatus[] files = fs.listStatus(testDir); 176 for (FileStatus file : files) { 177 LOG.debug("Output file: " + file.getPath()); 178 } 179 180 files = fs.listStatus(dataDir); 181 for (FileStatus file : files) { 182 LOG.debug("Data file: " + file.getPath()); 183 } 184 185 if (!expectedHashes.equals(actualHashes)) { 186 LOG.error("Diff: " + Maps.difference(expectedHashes, actualHashes)); 187 } 188 Assert.assertEquals(expectedHashes, actualHashes); 189 190 TEST_UTIL.deleteTable(tableName); 191 TEST_UTIL.cleanupDataTestDirOnTestFS(); 192 } 193}