以 com.dangdang.ddframe.rdb.sharding.example.jdbc.Main
剖析分庫分表配置與實現,其部分原始碼如下:
public final class Main {
public static void main(final String[] args) throws SQLException {
// step1: 配置sharding資料源
DataSource dataSource = getShardingDataSource();
// step2:建立表
createTable(dataSource);
// step3:插入資料
insertData(dataSource);
printSimpleSelect(dataSource);
printGroupBy(dataSource);
printHintSimpleSelect(dataSource);
dropTable(dataSource);
}
... ...
}
接下來分析第一步,即如何建立ShardingDataSource;
①ShardingDataSource
硬編碼建立ShardingDataSource的核心實現原始碼如下:
private static ShardingDataSource getShardingDataSource() throws SQLException {
// 構造DataSourceRule,即key與資料源的KV對;
DataSourceRule dataSourceRule = new DataSourceRule(createDataSourceMap());
// 建立邏輯表是t_order,實際表是t_order_0,t_order_1的TableRule
TableRule orderTableRule = TableRule.builder("t_order").actualTables(Arrays.asList("t_order_0", "t_order_1")).dataSourceRule(dataSourceRule).build();
// 建立邏輯表是t_order_item,實際表是t_order_item_0,t_order_item_1的TableRule
TableRule orderItemTableRule = TableRule.builder("t_order_item").actualTables(Arrays.asList("t_order_item_0", "t_order_item_1")).dataSourceRule(dataSourceRule).build();
ShardingRule shardingRule = ShardingRule.builder()
.dataSourceRule(dataSourceRule)
.tableRules(Arrays.asList(orderTableRule, orderItemTableRule))
// 增加系結表--系結表代表一組表,這組表的邏輯表與實際表之間的對映關係是相同的。比如t_order與t_order_item就是這樣一組系結表關係,它們的分庫與分表策略是完全相同的,那麼可以使用它們的表規則將它們配置成系結表,系結表所有路由計算將會只使用主表的策略;
.bindingTableRules(Collections.singletonList(new BindingTableRule(Arrays.asList(orderTableRule, orderItemTableRule))))
// 指定資料庫sharding策略--根據user_id欄位的值取模
.databaseShardingStrategy(new DatabaseShardingStrategy("user_id", new ModuloDatabaseShardingAlgorithm()))
// 指定表sharding策略--根據order_id欄位的值取模
.tableShardingStrategy(new TableShardingStrategy("order_id", new ModuloTableShardingAlgorithm())).build();
return new ShardingDataSource(shardingRule);
}
// 建立兩個資料源,一個是ds_jdbc_0,一個是ds_jdbc_1,並系結對映關係key
private static Map<String, DataSource> createDataSourceMap() {
Map<String, DataSource> result = new HashMap<>(2);
result.put("ds_jdbc_0", createDataSource("ds_jdbc_0"));
result.put("ds_jdbc_1", createDataSource("ds_jdbc_1"));
return result;
}
// 以dbcp元件建立一個資料源
private static DataSource createDataSource(final String dataSourceName) {
BasicDataSource result = new BasicDataSource();
result.setDriverClassName(com.mysql.jdbc.Driver.class.getName());
result.setUrl(String.format("jdbc:mysql://localhost:3306/%s", dataSourceName));
result.setUsername("root");
// sharding-jdbc預設以密碼為空的root使用者訪問,如果修改了root使用者的密碼,這裡修改為真實的密碼即可;
result.setPassword("");
return result;
}
備註:邏輯表(LogicTable)即資料分片的邏輯表,對於水平拆分的資料庫(表),同一類表的總稱。例:訂單資料根據訂單ID取模拆分為16張表,分別是torder0到torder15,他們的邏輯表名為torder;實際表(ActualTable)是指在分片的資料庫中真實存在的物理表。即這個示例中的torder0到torder_15。摘自sharding-jdbc核心概念
分表原則
根據上面的程式碼中 .tableShardingStrategy(newTableShardingStrategy("order_id",newModuloTableShardingAlgorithm()))
這段程式碼可知,分表策略透過ModuloTableShardingAlgorithm.java實現,且是透過ShardingStrategy.java中的doSharding()方法呼叫,核心原始碼如下:
private Collection<String> doSharding(final Collection<ShardingValue>> shardingValues, final Collection<String> availableTargetNames) {
// shardingAlgorithm即sharding演演算法分為三種:NoneKey,SingleKey和MultipleKeys
if (shardingAlgorithm instanceof NoneKeyShardingAlgorithm) {
return Collections.singletonList(((NoneKeyShardingAlgorithm) shardingAlgorithm).doSharding(availableTargetNames, shardingValues.iterator().next()));
}
if (shardingAlgorithm instanceof SingleKeyShardingAlgorithm) {
// 得到SingleKeyShardingAlgorithm的具體實現,在ShardingStrategy的構造方法中賦值
SingleKeyShardingAlgorithm> singleKeyShardingAlgorithm = (SingleKeyShardingAlgorithm>) shardingAlgorithm;
// ShardingValue就是sharding的列和該列的值,在這裡分別為order_id和1000
ShardingValue shardingValue = shardingValues.iterator().next();
// sharding列的型別分為三種:SINGLE,LIST和RANGE
switch (shardingValue.getType()) {
// 如果是where order_id=1000,那麼type就是SINGLE
case SINGLE:
// doEqualSharding只傳回一個值,為了doSharding()傳回值的統一,用Collections.singletonList()包裝成集合;
return Collections.singletonList(singleKeyShardingAlgorithm.doEqualSharding(availableTargetNames, shardingValue));
case LIST:
return singleKeyShardingAlgorithm.doInSharding(availableTargetNames, shardingValue);
case RANGE:
return singleKeyShardingAlgorithm.doBetweenSharding(availableTargetNames, shardingValue);
default:
throw new UnsupportedOperationException(shardingValue.getType().getClass().getName());
}
}
if (shardingAlgorithm instanceof MultipleKeysShardingAlgorithm) {
return ((MultipleKeysShardingAlgorithm) shardingAlgorithm).doSharding(availableTargetNames, shardingValues);
}
throw new UnsupportedOperationException(shardingAlgorithm.getClass().getName());
}
如果SQL中分表列orderid條件為where orderid=?,那麼shardingValue的type為SINGLE,分表邏輯走doEqualSharding();
如果SQL中分表列orderid條件為where orderid in(?, ?),那麼shardingValue的type為LIST,那麼分表邏輯走doInSharding();
如果SQL中分表列orderid條件為where orderid between in(?, ?),那麼shardingValue的type為RANGE,那麼分表邏輯走doBetweenSharding();
shardingValue的type的判斷依據如下程式碼:
public ShardingValueType getType() {
//
if (null != value) {
return ShardingValueType.SINGLE;
}
if (!values.isEmpty()) {
return ShardingValueType.LIST;
}
return ShardingValueType.RANGE;
}
表的取模核心實現原始碼如下:
public final class ModuloTableShardingAlgorithm implements SingleKeyTableShardingAlgorithm<Integer> {
// 分析前提,假設預期分到兩個表中[t_order_0,t_order_1],且執行的SQL為SELECT o.* FROM t_order o where o.order_id=1001 AND o.user_id=10,那麼分表列order_id的值為1001
@Override
public String doEqualSharding(final Collection<String> tableNames, final ShardingValue<Integer> shardingValue) {
// 遍歷表名[t_order_0,t_order_1]
for (String each : tableNames) {
// 直到表名是以分表列order_id的值1001對2取模的值即1結尾,那麼就是命中的表名,即t_order_1
if (each.endsWith(shardingValue.getValue() % tableNames.size() + "")) {
return each;
}
}
throw new UnsupportedOperationException();
}
@Override
public Collection<String> doInSharding(final Collection<String> tableNames, final ShardingValue<Integer> shardingValue) {
Collection<String> result = new LinkedHashSet<>(tableNames.size());
// 從這裡可知,doInSharding()和doEqualSharding()的區別就是doInSharding()時分表列有多個值(shardingValue.getValues()),例如order_id的值為[1001,1002],遍歷這些值,然後每個值按照doEqualSharding()的邏輯計算表名
for (Integer value : shardingValue.getValues()) {
for (String tableName : tableNames) {
if (tableName.endsWith(value % tableNames.size() + "")) {
result.add(tableName);
}
}
}
return result;
}
@Override
public Collection<String> doBetweenSharding(final Collection<String> tableNames, final ShardingValue<Integer> shardingValue) {
Collection<String> result = new LinkedHashSet<>(tableNames.size());
// 從這裡可知,doBetweenSharding()和doInSharding()的區別就是doBetweenSharding()時分表列的多個值透過shardingValue.getValueRange()得到;而doInSharding()是透過shardingValue.getValues()得到;
Range<Integer> range = shardingValue.getValueRange();
for (Integer i = range.lowerEndpoint(); i <= range.upperEndpoint(); i++) {
for (String each : tableNames) {
if (each.endsWith(i % tableNames.size() + "")) {
result.add(each);
}
}
}
return result;
}
}
如果SQL中分表列orderid條件為where orderid=?,那麼分表邏輯走doEqualSharding();
如果SQL中分表列orderid條件為where orderid in(?, ?),那麼分表邏輯走doInSharding();
如果SQL中分表列orderid條件為where orderid between in(?, ?),那麼分表邏輯走doBetweenSharding();
這些條件判斷依據程式碼如下,當SimpleRoutingEngine中呼叫routeTables()進行路由表判定時會呼叫下麵的方法,且透過這段程式碼可知,sharding列只支援=,in和between的操作:
public ShardingValue> getShardingValue(final List<Object> parameters) {
List<Comparable>> conditionValues = getValues(parameters);
switch (operator) {
case EQUAL:
return new ShardingValue<Comparable>>(column.getTableName(), column.getName(), conditionValues.get(0));
case IN:
return new ShardingValue<>(column.getTableName(), column.getName(), conditionValues);
case BETWEEN:
return new ShardingValue<>(column.getTableName(), column.getName(), Range.range(conditionValues.get(0), BoundType.CLOSED, conditionValues.get(1), BoundType.CLOSED));
default:
throw new UnsupportedOperationException(operator.getExpression());
}
}
分庫原則
根據上面的程式碼中 .databaseShardingStrategy(newDatabaseShardingStrategy("user_id",newModuloDatabaseShardingAlgorithm()))
這段程式碼可知,分庫策略透過ModuloDatabaseShardingAlgorithm.java實現; 透過比較ModuloDatabaseShardingAlgorithm.java和ModuloTableShardingAlgorithm.java,發現兩者的實現邏輯完全一致,小小的區別就是ModuloDatabaseShardingAlgorithm.java根據分庫的列例如 user_id
進行分庫;而ModuloTableShardingAlgorithm.java根據分表的列例如 order_id
進行分表;所以分庫在這裡就不分析了;
說明:由於模組
sharding-jdbc-example-jdbc
中的Main方法建立的資料庫和表數量都是2,所以ModuloDatabaseShardingAlgorithm.java和ModuloTableShardingAlgorithm.java的邏輯程式碼中寫死了對2取模(% 2);這樣的話,如果debug過程中,修改了資料庫和表的數量為3,或者4,改動程式碼如下所示,就會出現問題:
private static ShardingDataSource getShardingDataSource() throws SQLException {
DataSourceRule dataSourceRule = new DataSourceRule(createDataSourceMap());
TableRule orderTableRule = TableRule
.builder("t_order")
.actualTables(Arrays.asList("t_order_0", "t_order_1", "t_order_2"))
.dataSourceRule(dataSourceRule)
.build();
TableRule orderItemTableRule = TableRule
.builder("t_order_item")
.actualTables(Arrays.asList("t_order_item_0", "t_order_item_1", "t_order_item_2"))
.dataSourceRule(dataSourceRule)
.build();
... ...
}
private static Map<String, DataSource> createDataSourceMap() {
Map<String, DataSource> result = new HashMap<>(3);
result.put("ds_jdbc_0", createDataSource("ds_jdbc_0"));
result.put("ds_jdbc_1", createDataSource("ds_jdbc_1"));
result.put("ds_jdbc_2", createDataSource("ds_jdbc_2"));
return result;
}
想要糾正這個潛在的錯誤,只需要將原始碼中ModuloDatabaseShardingAlgorithm.java中的 %2
改為 %dataSourceNames.size()
,ModuloTableShardingAlgorithm.java中的 %2
改為 %tableNames.size()
即可;
END