001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import java.io.IOException; 021import java.util.List; 022import java.util.concurrent.ConcurrentHashMap; 023import java.util.concurrent.atomic.AtomicInteger; 024import org.apache.hadoop.conf.Configuration; 025import org.apache.hadoop.hbase.HBaseClassTestRule; 026import org.apache.hadoop.hbase.HBaseTestingUtil; 027import org.apache.hadoop.hbase.NotServingRegionException; 028import org.apache.hadoop.hbase.SingleProcessHBaseCluster; 029import org.apache.hadoop.hbase.StartTestingClusterOption; 030import org.apache.hadoop.hbase.TableName; 031import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 032import org.apache.hadoop.hbase.client.Durability; 033import org.apache.hadoop.hbase.client.Get; 034import org.apache.hadoop.hbase.client.Put; 035import org.apache.hadoop.hbase.client.Result; 036import org.apache.hadoop.hbase.client.Table; 037import org.apache.hadoop.hbase.client.TableDescriptor; 038import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 039import org.apache.hadoop.hbase.testclassification.FlakeyTests; 040import org.apache.hadoop.hbase.testclassification.LargeTests; 041import org.apache.hadoop.hbase.util.Bytes; 042import org.apache.hadoop.hbase.util.ConcurrentMapUtils; 043import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil; 044import org.junit.AfterClass; 045import org.junit.BeforeClass; 046import org.junit.ClassRule; 047import org.junit.Test; 048import org.junit.experimental.categories.Category; 049 050import org.apache.hbase.thirdparty.com.google.protobuf.ByteString; 051import org.apache.hbase.thirdparty.com.google.protobuf.RpcController; 052import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException; 053import org.apache.hbase.thirdparty.org.apache.commons.collections4.CollectionUtils; 054 055import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ReplicateWALEntryRequest; 056import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ReplicateWALEntryResponse; 057import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.WALEntry; 058 059/** 060 * Test region replication when error occur. 061 * <p/> 062 * We can not simply move the secondary replicas as we will trigger a flush for the primary replica 063 * when secondary replica is online, which will always make the data of the two regions in sync. So 064 * here we need to simulate request errors. 065 */ 066@Category({ FlakeyTests.class, LargeTests.class }) 067public class TestRegionReplicaReplicationError { 068 069 @ClassRule 070 public static final HBaseClassTestRule CLASS_RULE = 071 HBaseClassTestRule.forClass(TestRegionReplicaReplicationError.class); 072 073 public static final class ErrorReplayRSRpcServices extends RSRpcServices { 074 075 private final ConcurrentHashMap<HRegion, AtomicInteger> regionToCounter = 076 new ConcurrentHashMap<HRegion, AtomicInteger>(); 077 078 public ErrorReplayRSRpcServices(HRegionServer rs) throws IOException { 079 super(rs); 080 } 081 082 @Override 083 public ReplicateWALEntryResponse replicateToReplica(RpcController controller, 084 ReplicateWALEntryRequest request) throws ServiceException { 085 List<WALEntry> entries = request.getEntryList(); 086 if (CollectionUtils.isEmpty(entries)) { 087 return ReplicateWALEntryResponse.getDefaultInstance(); 088 } 089 ByteString regionName = entries.get(0).getKey().getEncodedRegionName(); 090 HRegion region; 091 try { 092 region = server.getRegionByEncodedName(regionName.toStringUtf8()); 093 } catch (NotServingRegionException e) { 094 throw new ServiceException(e); 095 } 096 097 AtomicInteger counter = 098 ConcurrentMapUtils.computeIfAbsent(regionToCounter, region, () -> new AtomicInteger(0)); 099 100 // fail the first several request 101 if (region.getRegionInfo().getReplicaId() == 1 && counter.addAndGet(entries.size()) < 100) { 102 throw new ServiceException("Inject error!"); 103 } 104 return super.replicateToReplica(controller, request); 105 } 106 } 107 108 public static final class RSForTest 109 extends SingleProcessHBaseCluster.MiniHBaseClusterRegionServer { 110 111 public RSForTest(Configuration conf) throws IOException, InterruptedException { 112 super(conf); 113 } 114 115 @Override 116 protected RSRpcServices createRpcServices() throws IOException { 117 return new ErrorReplayRSRpcServices(this); 118 } 119 } 120 121 private static final HBaseTestingUtil HTU = new HBaseTestingUtil(); 122 123 private static String TN = "test"; 124 125 private static byte[] CF = Bytes.toBytes("cf"); 126 127 private static byte[] CQ = Bytes.toBytes("cq"); 128 129 @BeforeClass 130 public static void setUp() throws Exception { 131 HTU.getConfiguration().setBoolean(ServerRegionReplicaUtil.REGION_REPLICA_REPLICATION_CONF_KEY, 132 true); 133 HTU.startMiniCluster( 134 StartTestingClusterOption.builder().rsClass(RSForTest.class).numRegionServers(3).build()); 135 } 136 137 @AfterClass 138 public static void tearDown() throws Exception { 139 HTU.shutdownMiniCluster(); 140 } 141 142 private boolean checkReplica(Table table, int replicaId) throws IOException { 143 boolean ret = true; 144 for (int i = 0; i < 500; i++) { 145 Result result = table.get(new Get(Bytes.toBytes(i)).setReplicaId(replicaId)); 146 byte[] value = result.getValue(CF, CQ); 147 ret &= value != null && value.length > 0 && Bytes.toInt(value) == i; 148 } 149 return ret; 150 } 151 152 @Test 153 public void testDefaultDurability() throws IOException { 154 doTest(false); 155 } 156 157 @Test 158 public void testSkipWAL() throws IOException { 159 doTest(true); 160 } 161 162 private void doTest(boolean skipWAL) throws IOException { 163 TableName tableName = TableName.valueOf(TN + (skipWAL ? "_skipWAL" : "")); 164 TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableName) 165 .setRegionReplication(3).setColumnFamily(ColumnFamilyDescriptorBuilder.of(CF)); 166 if (skipWAL) { 167 builder.setDurability(Durability.SKIP_WAL); 168 } 169 TableDescriptor td = builder.build(); 170 HTU.getAdmin().createTable(td); 171 172 try (Table table = HTU.getConnection().getTable(tableName)) { 173 for (int i = 0; i < 500; i++) { 174 table.put(new Put(Bytes.toBytes(i)).addColumn(CF, CQ, Bytes.toBytes(i))); 175 } 176 HTU.waitFor(30000, () -> checkReplica(table, 2)); 177 HTU.waitFor(30000, () -> checkReplica(table, 1)); 178 } 179 } 180}