当前位置：网站首页>[infrastructure] [flink] Flink / Flink CDC code to achieve business access

[infrastructure] [flink] Flink / Flink CDC code to achieve business access

2022-07-22 00:05:00 【0xYGC】

brief introduction

DataStream and FlinkSQL A comparison of ways
DataStream stay Flink1.12 and 1.13 Both can be used. , and FlinkSQL Only in Flink1.13 Use .
DataStream It can monitor multiple databases and tables at the same time , and FlinkSQL Only a single table can be monitored .

Method / step

One ： Encoding

1.1 Import correlation dependency

  <dependencies>
    <dependency>
      <groupId>org.apache.flink</groupId>
      <artifactId>flink-java</artifactId>
      <version>1.12.0</version>
    </dependency>
    <dependency>
      <groupId>org.apache.flink</groupId>
      <artifactId>flink-streaming-java_2.12</artifactId>
      <version>1.12.0</version>
    </dependency>
    <dependency>
      <groupId>org.apache.flink</groupId>
      <artifactId>flink-clients_2.12</artifactId>
      <version>1.12.0</version>
    </dependency>
    <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-client</artifactId>
      <version>3.1.3</version>
    </dependency>
    <dependency>
      <groupId>mysql</groupId>
      <artifactId>mysql-connector-java</artifactId>
      <version>5.1.49</version>
    </dependency>
    <dependency>
      <groupId>org.apache.flink</groupId>
      <artifactId>flink-table-planner-blink_2.12</artifactId>
      <version>1.12.0</version>
    </dependency>
    <dependency>
      <groupId>com.ververica</groupId>
      <artifactId>flink-connector-mysql-cdc</artifactId>
      <version>2.0.0</version>
    </dependency>
    <dependency>
      <groupId>com.alibaba</groupId>
      <artifactId>fastjson</artifactId>
      <version>1.2.75</version>
    </dependency>
  </dependencies>

  <build>
    <plugins>
      <plugin>
        <groupId>org.apache.maven.plugins</groupId>
        <!--  You can hit dependency to jar In bag  -->
        <artifactId>maven-assembly-plugin</artifactId>
        <version>3.0.0</version>
        <configuration>
          <descriptorRefs>
            <descriptorRef>jar-with-dependencies</descriptorRef>
          </descriptorRefs>
        </configuration>
        <executions>
          <execution>
            <id>make-assembly</id>
            <phase>package</phase>
            <goals>
              <goal>single</goal>
            </goals>
          </execution>
        </executions>
      </plugin>
    </plugins>
  </build>

1.2 Business coding

1.2.1 Entrance class

import com.ververica.cdc.connectors.mysql.MySqlSource;
import com.ververica.cdc.connectors.mysql.table.StartupOptions;
import com.ververica.cdc.debezium.DebeziumSourceFunction;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

/** * Description: * * @author: YangGC */
public class FlinkCDC2 {
    
    public static void main(String[] args) throws Exception {
    

        //1. obtain Flink  execution environment 
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        //1.1  Turn on  Checkpoint
        env.enableCheckpointing(5000);
        env.getCheckpointConfig().setCheckpointTimeout(10000);
        env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        env.getCheckpointConfig().setMaxConcurrentCheckpoints(1);
//
// env.setStateBackend(new FsStateBackend("hdfs://hadoop102:8020/cdc-test/ck"));

        //2. adopt FlinkCDC structure SourceFunction
        DebeziumSourceFunction<String> sourceFunction = MySqlSource.<String>builder()
                .hostname("192.168.1.220")
                .port(3308)
                .username("root")
                .password("useradmin")
                //flinkcdc  All the watches below 
                .databaseList("flinkcdc.*")
// .tableList("flinkcdc.user_info")
                // Use a custom deserializer 
                .deserializer(new CustomerDeserializationSchema())
                .startupOptions(StartupOptions.initial())
                .build();
        DataStreamSource<String> dataStreamSource = env.addSource(sourceFunction);

        //3. The data to print 
        dataStreamSource.print();

        //4. Start the task 
        env.execute("FlinkCDC2");
    }

}

1.2.2 Custom deserializer

import com.alibaba.fastjson.JSONObject;
import com.ververica.cdc.debezium.DebeziumDeserializationSchema;
import io.debezium.data.Envelope;
import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.util.Collector;
import org.apache.kafka.connect.data.Field;
import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.data.Struct;
import org.apache.kafka.connect.source.SourceRecord;

import java.util.List;


/** *  Custom deserializer  * Description: * * @author: YangGC */
public class CustomerDeserializationSchema implements DebeziumDeserializationSchema<String> {
    


    /** * { * "db":"", * "tableName":"", * "before":{"id":"1001","name":""...}, * "after":{"id":"1001","name":""...}, * "op":"" * } */
    @Override
    public void deserialize(SourceRecord sourceRecord, Collector<String> collector) throws Exception {
    

        // establish JSON Object is used to encapsulate the result data 
        JSONObject result = new JSONObject();

        // Get library name & Table name 
        String topic = sourceRecord.topic();
        String[] fields = topic.split("\\.");
        result.put("db", fields[1]);
        result.put("tableName", fields[2]);

        // obtain before data 
        Struct value = (Struct) sourceRecord.value();
        Struct before = value.getStruct("before");
        JSONObject beforeJson = new JSONObject();
        if (before != null) {
    
            // Get column information 
            Schema schema = before.schema();
            List<Field> fieldList = schema.fields();

            for (Field field : fieldList) {
    
                beforeJson.put(field.name(), before.get(field));
            }
        }
        result.put("before", beforeJson);

        // obtain after data 
        Struct after = value.getStruct("after");
        JSONObject afterJson = new JSONObject();
        if (after != null) {
    
            // Get column information 
            Schema schema = after.schema();
            List<Field> fieldList = schema.fields();

            for (Field field : fieldList) {
    
                afterJson.put(field.name(), after.get(field));
            }
        }
        result.put("after", afterJson);

        // Get operation type 
        Envelope.Operation operation = Envelope.operationFor(sourceRecord);
        result.put("op", operation);

        // Output data 
        collector.collect(result.toJSONString());

    }

    @Override
    public TypeInformation<String> getProducedType() {
    
        return BasicTypeInfo.STRING_TYPE_INFO;
    }

1.3 Business packaging

Insert picture description here
Packing is done ：

Two ：Flink Job to task panel

2.1 Add tasks from the command line

hold cdc-connector-1.0-SNAPSHOT-jar-with-dependencies.jar Packages uploaded to To flink Home directory
And run the following command line

#  It mainly configures the entry class   Appoint flink The running address of 
bin/flink run -m 127.0.0.1:8081 -c com.yanggc.cdc.FlinkCDC2  ./cdc-connector-1.0-SNAPSHOT-jar-with-dependencies.jar

Insert picture description here