MySQL 与 Elasticsearch 数据不对称问题解决办法

jdbc-input-plugin 只能实现数据库的追加,对于 elasticsearch 增量写入,但经常jdbc源一端的数据库可能会做数据库删除或者更新操作。这样一来数据库与搜索引擎的数据库就出现了不对称的情况。

当然你如果有开发团队可以写程序在删除或者更新的时候同步对搜索引擎操作。如果你没有这个能力,可以尝试下面的方法。

这里有一个数据表 article , mtime 字段定义了 ON UPDATE CURRENT_TIMESTAMP 所以每次更新mtime的时间都会变化

mysql> desc article;+-------------+--------------+------+-----+--------------------------------+-------+| Field    | Type     | Null | Key | Default            | Extra |+-------------+--------------+------+-----+--------------------------------+-------+| id     | int(11)   | NO  |   | 0               |    || title    | mediumtext  | NO  |   | NULL              |    || description | mediumtext  | YES |   | NULL              |    || author   | varchar(100) | YES |   | NULL              |    || source   | varchar(100) | YES |   | NULL              |    || content   | longtext   | YES |   | NULL              |    || status   | enum('Y','N')| NO  |   | 'N'              |    || ctime    | timestamp  | NO  |   | CURRENT_TIMESTAMP       |    || mtime    | timestamp  | YES |   | ON UPDATE CURRENT_TIMESTAMP  |    |+-------------+--------------+------+-----+--------------------------------+-------+7 rows in set (0.00 sec)
jdbc {  jdbc_driver_library => "/usr/share/java/mysql-connector-java.jar"  jdbc_driver_class => "com.mysql.jdbc.Driver"  jdbc_connection_string => "jdbc:mysql://localhost:3306/cms"  jdbc_user => "cms"  jdbc_password => "password"  schedule => "* * * * *" #定时cron的表达式,这里是每分钟执行一次  statement => "select * from article where mtime > :sql_last_value"  use_column_value => true  tracking_column => "mtime"  tracking_column_type => "timestamp"   record_last_run => true  last_run_metadata_path => "/var/tmp/article-mtime.last" }
CREATE TABLE `elasticsearch_trash` ( `id` int(11) NOT NULL, `ctime` timestamp NULL DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY (`id`)) ENGINE=InnoDB DEFAULT CHARSET=utf8
CREATE DEFINER=`dba`@`%` TRIGGER `article_BEFORE_UPDATE` BEFORE UPDATE ON `article` FOR EACH ROWBEGIN -- 此处的逻辑是解决文章状态变为 N 的时候,需要将搜索引擎中对应的数据删除。 IF NEW.status = 'N' THEN insert into elasticsearch_trash(id) values(OLD.id); END IF; -- 此处逻辑是修改状态到 Y 的时候,方式elasticsearch_trash仍然存在该文章ID,导致误删除。所以需要删除回收站中得回收记录。  IF NEW.status = 'Y' THEN delete from elasticsearch_trash where id = OLD.id; END IF;ENDCREATE DEFINER=`dba`@`%` TRIGGER `article_BEFORE_DELETE` BEFORE DELETE ON `article` FOR EACH ROWBEGIN -- 此处逻辑是文章被删除同事将改文章放入搜索引擎回收站。 insert into elasticsearch_trash(id) values(OLD.id);END

你还可以开发相关的程序,这里提供一个 Spring boot 定时任务例子。

实体

package cn.netkiller.api.domain.elasticsearch;import java.util.Date;import javax.persistence.Column;import javax.persistence.Entity;import javax.persistence.Id;import javax.persistence.Table;@Entity@Tablepublic class ElasticsearchTrash { @Id private int id; @Column(columnDefinition = "TIMESTAMP DEFAULT CURRENT_TIMESTAMP") private Date ctime; public int getId() { return id; } public void setId(int id) { this.id = id; } public Date getCtime() { return ctime; } public void setCtime(Date ctime) { this.ctime = ctime; }}
package cn.netkiller.api.repository.elasticsearch;import org.springframework.data.repository.CrudRepository;import com.example.api.domain.elasticsearch.ElasticsearchTrash;public interface ElasticsearchTrashRepository extends CrudRepository<ElasticsearchTrash, Integer>{}
package cn.netkiller.api.schedule;import org.elasticsearch.action.delete.DeleteResponse;import org.elasticsearch.client.transport.TransportClient;import org.elasticsearch.rest.RestStatus;import org.slf4j.Logger;import org.slf4j.LoggerFactory;import org.springframework.beans.factory.annotation.Autowired;import org.springframework.scheduling.annotation.Scheduled;import org.springframework.stereotype.Component;import com.example.api.domain.elasticsearch.ElasticsearchTrash;import com.example.api.repository.elasticsearch.ElasticsearchTrashRepository;@Componentpublic class ScheduledTasks { private static final Logger logger = LoggerFactory.getLogger(ScheduledTasks.class); @Autowired private TransportClient client; @Autowired private ElasticsearchTrashRepository alasticsearchTrashRepository; public ScheduledTasks() { } @Scheduled(fixedRate = 1000 * 60) // 60秒运行一次调度任务 public void cleanTrash() { for (ElasticsearchTrash elasticsearchTrash : alasticsearchTrashRepository.findAll()) {  DeleteResponse response = client.prepareDelete("information", "article", elasticsearchTrash.getId() + "").get();  RestStatus status = response.status();  logger.info("delete {} {}", elasticsearchTrash.getId(), status.toString());  if (status == RestStatus.OK || status == RestStatus.NOT_FOUND) {  alasticsearchTrashRepository.delete(elasticsearchTrash);  } } }}
package cn.netkiller.api;import org.springframework.boot.SpringApplication;import org.springframework.boot.autoconfigure.SpringBootApplication;import org.springframework.scheduling.annotation.EnableScheduling;@SpringBootApplication@EnableSchedulingpublic class Application { public static void main(String[] args) { SpringApplication.run(Application.class, args); }} 

更多相关文章

  1. Android,LIstView中的OnItemClick点击无效的解决办法
  2. 华为手机Android(安卓)Studio开发不显示Logcat解决办法
  3. 安卓9.0 http请求数据失败解决办法
  4. Android(安卓)Studio 导入包时报 Duplicate files copied in APK
  5. Android(安卓)启动时闪一下黑屏问题的解决办法
  6. Android(安卓)webview Not allowed to load local resource异常
  7. Android(安卓)SDK Manager更新版慢解决办法
  8. 关于新版SDK报错You need to use a Theme.AppCompat theme的两种
  9. Android小问题解决办法记录

随机推荐

  1. android 学习示例
  2. 集成Android免费语音合成功能(在线、离线
  3. Android上 PeerConnection 与 PeerConnec
  4. eclipse如何进行真机调试
  5. 【Android】TypedArray和obtainStyledAtt
  6. android应用程序跳转到系统的各个设置页
  7. Android实现调用系统相册和拍照的Demo示
  8. Android Input事件APP端流程分析
  9. Android Studio运行慢-提速到10s,问你要不
  10. 疯狂android讲义学习总结---各种按钮的制