增加配置stopasgroup=true既可,實(shí)際案列如下配置所示:
[program:flow-file-to-kafka]
command=python3 /data/collector/kafka_file_convert/file_to_kafka_flow.py -topic=test -filePath=/data/ -processNum=50
process_name=%(process_num)01d
numprocs=1
autorstart=true
autorestart=true
startsecs=5
startretries=100
stopasgroup=true
多進(jìn)程代碼示例如下:
#!/usr/bin/python3
# -*- coding: utf-8 -*-
import json
import logging
import gzip
import argparse
from multiprocessing import Queue, Process
import os
import time
import kafkatool
KAFKA_HOST_PRODUCER = ["192.168.1.1:9092"]
logging.basicConfig(format='%(asctime)s - %(pathname)s[line:%(lineno)d] - %(levelname)s: %(message)s',
level=logging.INFO)
def handle_data(file_path, topic):
producer = kafkatool.Kafka_producer(KAFKA_HOST_PRODUCER, topic)
with gzip.open(file_path, 'rb') as f:
for line in f:
data = str(line, encoding = "utf-8").rstrip('\n')
data = data.encode()
producer.send(topic, data)
def my_consumer(queue, topic):
producer = kafkatool.Kafka_producer(KAFKA_HOST_PRODUCER, topic)
logging.info("kafka producer start: bootstrap_servers:{}".format(KAFKA_HOST_PRODUCER))
logging.info("kafka producer start: topic:{}".format(topic))
while True:
data = queue.get()
producer.send(topic, data)
def my_producer(queue, filepath):
while True:
files = os.listdir(filepath)
count = 0
for file in files:
if file.endswith('.tmp'):
continue
file_path = os.path.join(filepath, file)
count += 1
with gzip.open(file_path, 'rb') as f:
for line in f:
data = str(line, encoding="utf-8").rstrip('\n')
data = data.encode()
queue.put(data)
os.remove(file_path)
if count > 0:
logging.info(f"One Loop Results: {count}")
continue
time.sleep(1)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("-topic", "--topic", default="logs", help="consumer topic")
parser.add_argument("-filePath", "--filePath", default="/root/yy", help="file path")
parser.add_argument("-processNum", "--processNum", default=5, type=int, help="process number")
args = parser.parse_args()
consumers = []
q = Queue()
p = Process(target=my_producer, args=(q, args.filePath))
p.start()
for i in range(args.processNum):
c = Process(target=my_consumer, args=(q, args.topic))
c.start()
consumers.append(c)
for p in consumers:
p.join()