canal.adapter | 問題
canal.adapter | 原理
- canal.adapter 會給它配置一個配置文件 shop.yml礼殊;
- shop.yml 中會定義當 canal.deployer 同步過來 MySQL 的 binlog 時 canal.adapter 要執(zhí)行的 SQL沫勿,這里定義的 SQL 長這樣:
select a.id,a.name,a.tags,concat(a.latitude,',',a.longitude) as location,a.remark_score,a.price_per_man,a.category_id,b.name as category_name,a.seller_id,c.remark_score as seller_remark_score,c.disabled_flag as seller_disabled_flag from shop a inner join category b on a.category_id = b.id inner join seller c on c.id = a.seller_id
canal.adapter | 問題
- 一整條 SQL 要查出來這么多的字段短荐,但是 canal.adapter 只查變更的字段;
- 并且卸奉,如果 SQL 中有 a.name, b.name 的話慰毅,canal.adapter 無法區(qū)分這是兩張表的兩個字段德撬,會把 a.name 和 b.name 同時更新成新的值谨胞;
canal.adapter | 問題 | 驗證步驟
- 清空 ElasticSearch 中索引 shop 的全部文檔固歪;
- 修改 MySQL 中 shop 表的一條記錄的 name 字段的值;
- canal.adapter 索引進 ElasticSearch 的文檔只有更新的 name 字段胯努;
canal.adapter | 問題 | 解決方案
- 不用 canal.adapter 同步數(shù)據(jù)到 ElasticSearch 中了牢裳;
- 通過引入 canal 的依賴,在 Java 程序中康聂,自定義的將 canal.deployer 同步過來的 binlog 索引進 ElasticSearch 中贰健;
canal.adapter | 適用場景
- 如果是簡單的將 MySQL 中的一張表和 ElasticSearch 中的一個索引對應胞四,用 canal.adapter 還是可以的恬汁;
SpringBoot 中引入 canal
- 目前 mvn repository 中最新的依賴版本是 1.1.4,剛好之前自己編譯的 canal 也是 1.1.4辜伟,那就正好氓侧,不用重啟 canal.deployer 的其他版本;
- 注意 canal.deployer 的版本要和引入依賴的版本一致导狡;
canal | 依賴
<dependency>
<groupId>com.alibaba.otter</groupId>
<artifactId>canal.client</artifactId>
<version>1.1.4</version>
</dependency>
<dependency>
<groupId>com.alibaba.otter</groupId>
<artifactId>canal.common</artifactId>
<version>1.1.4</version>
</dependency>
<dependency>
<groupId>com.alibaba.otter</groupId>
<artifactId>canal.protocol</artifactId>
<version>1.1.4</version>
</dependency>
<dependency>
<groupId>com.google.protobuf</groupId>
<artifactId>protobuf-java</artifactId>
<version>3.5.1</version>
</dependency>
Bean | 連接 canal.deployer
package tech.lixinlei.dianping.canal;
import com.alibaba.google.common.collect.Lists;
import com.alibaba.otter.canal.client.CanalConnector;
import com.alibaba.otter.canal.client.CanalConnectors;
import org.springframework.beans.factory.DisposableBean;
import org.springframework.context.annotation.Bean;
import org.springframework.stereotype.Component;
import java.net.InetSocketAddress;
@Component
public class CanalClient implements DisposableBean{
private CanalConnector canalConnector;
@Bean
public CanalConnector getCanalConnector(){
canalConnector = CanalConnectors.newClusterConnector(Lists.newArrayList(
new InetSocketAddress("127.0.0.1", 11111)),
"example",
"canal",
"canal"
);
canalConnector.connect();
// 指定filter约巷,格式{database}.{table},不傳參數(shù)就是 subscribe 所有的內(nèi)容
canalConnector.subscribe();
// 回滾尋找上次中斷的位置
canalConnector.rollback();
return canalConnector;
}
/**
* 容器銷毀時調(diào)用
* @throws Exception
*/
@Override
public void destroy() throws Exception {
if(canalConnector != null){
canalConnector.disconnect();
}
}
}
Bean | 定時從 canal.deployer 中讀取 binlog 并解析旱捧、索引進 ElasticSearch
package tech.lixinlei.dianping.canal;
import com.alibaba.otter.canal.client.CanalConnector;
import com.alibaba.otter.canal.protocol.CanalEntry;
import com.alibaba.otter.canal.protocol.Message;
import com.google.protobuf.InvalidProtocolBufferException;
import org.apache.commons.lang3.StringUtils;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.springframework.beans.BeansException;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.ApplicationContext;
import org.springframework.context.ApplicationContextAware;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;
import tech.lixinlei.dianping.dal.ShopModelMapper;
import javax.annotation.Resource;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@Component
public class CanalScheduling implements Runnable, ApplicationContextAware {
private ApplicationContext applicationContext;
@Autowired
private ShopModelMapper shopModelMapper;
@Resource
private CanalConnector canalConnector;
@Autowired
private RestHighLevelClient restHighLevelClient;
@Override
@Scheduled(fixedDelay = 100)
public void run() {
System.out.println("run");
long batchId = -1;
try{
int batchSize = 1000;
Message message = canalConnector.getWithoutAck(batchSize);
batchId = message.getId();
List<CanalEntry.Entry> entries = message.getEntries();
if(batchId != -1 && entries.size() > 0){
entries.forEach(entry -> {
if(entry.getEntryType() == CanalEntry.EntryType.ROWDATA){
// 解析處理
publishCanalEvent(entry);
}
});
}
canalConnector.ack(batchId);
}catch(Exception e){
e.printStackTrace();
canalConnector.rollback(batchId);
}
}
/**
* 將 binlog 中的一條(entry)独郎,
* 解析成受影響的記錄(change),再逐條解析受影響的記錄(change)枚赡,
* 將記錄(rowData)的數(shù)據(jù)結構從 List 轉成 Map氓癌,
* 完了交給 indexES 方式索引進 ElasticSearch;
* @param entry binlog 中的一條贫橙;
*/
private void publishCanalEvent(CanalEntry.Entry entry){
CanalEntry.EventType eventType = entry.getHeader().getEventType();
String database = entry.getHeader().getSchemaName();
String table = entry.getHeader().getTableName();
CanalEntry.RowChange change = null;
try {
change = CanalEntry.RowChange.parseFrom(entry.getStoreValue());
} catch (InvalidProtocolBufferException e) {
e.printStackTrace();
return;
}
change.getRowDatasList().forEach(rowData -> {
List<CanalEntry.Column> columns = rowData.getAfterColumnsList();
String primaryKey = "id";
CanalEntry.Column idColumn = columns.stream().filter(column -> column.getIsKey()
&& primaryKey.equals(column.getName())).findFirst().orElse(null);
Map<String,Object> dataMap = parseColumnsToMap(columns);
try{
indexES(dataMap, database, table);
} catch (IOException e) {
e.printStackTrace();
}
});
}
Map<String,Object> parseColumnsToMap(List<CanalEntry.Column> columns){
Map<String,Object> jsonMap = new HashMap<>();
columns.forEach(column -> {
if(column == null){
return;
}
jsonMap.put(column.getName(), column.getValue());
});
return jsonMap;
}
private void indexES(Map<String,Object> dataMap, String database, String table) throws IOException {
if(!StringUtils.equals("dianping", database)){
return;
}
// result 查出來的記錄是全字段贪婉,不像 canal.adapter 只能查出更改的字段;
List<Map<String,Object>> result = new ArrayList<>();
if(StringUtils.equals("seller", table)) {
result = shopModelMapper.buildESQuery(new Integer((String)dataMap.get("id")), null, null);
} else if (StringUtils.equals("category", table)){
result = shopModelMapper.buildESQuery(null, new Integer((String)dataMap.get("id")), null);
} else if (StringUtils.equals("shop", table)){
result = shopModelMapper.buildESQuery(null, null, new Integer((String)dataMap.get("id")));
} else {
return;
}
// 調(diào)用 ES API 將 MySQL 中變化的數(shù)據(jù)索引進 ElasticSearch
for(Map<String,Object> map : result){
IndexRequest indexRequest = new IndexRequest("shop");
indexRequest.id(String.valueOf(map.get("id")));
indexRequest.source(map);
restHighLevelClient.index(indexRequest, RequestOptions.DEFAULT);
}
}
@Override
public void setApplicationContext(ApplicationContext applicationContext) throws BeansException {
this.applicationContext = applicationContext;
}
}
在 SpringBoot 層面打開定時任務的開關
@SpringBootApplication(scanBasePackages = {"tech.lixinlei.dianping"})
@MapperScan("tech.lixinlei.dianping.dal")
@EnableAspectJAutoProxy(proxyTargetClass = true)
@EnableScheduling
public class DianpingApplication {
public static void main(String[] args) {
SpringApplication.run(DianpingApplication.class, args);
}
}
根據(jù) binlog 中的內(nèi)容卢肃,查出需要更新進 ElasticSearch 中的數(shù)據(jù)的 SQL
public interface ShopModelMapper {
List<Map<String,Object>> buildESQuery(@Param("sellerId")Integer sellerId,
@Param("categoryId")Integer categoryId,
@Param("shopId")Integer shopId);
}
<select id="buildESQuery" resultType="java.util.Map">
select a.id,a.name,a.tags,concat(a.latitude,',',a.longitude) as location,
a.remark_score,a.price_per_man,a.category_id,b.name as category_name,a.seller_id,
c.remark_score as seller_remark_score,c.disabled_flag as seller_disabled_flag
from shop a inner join category b on a.category_id = b.id inner join seller c on c.id=a.seller_id
<if test="sellerId != null">
and c.id = #{sellerId}
</if>
<if test="categoryId != null">
and b.id = #{categoryId}
</if>
<if test="shopId != null">
and a.id = #{shopId}
</if>
</select>