KnowHow

技術的なメモを中心にまとめます。
検索にて調べることができます。

[Thread版:改良3]pythonとbashを用いたLinuxサーバのバックアッププログラム

登録日 :2025/06/23 04:32
カテゴリ :Python基礎

バックアッププログラムの改良版。ソースディレクトリ、バックアップディレクトリをsettings.pyで指定するようにしました。
スクリプトパスについても、os, sysを使ってパス情報を自動で取得するようにして、初期設定やメンテナンスをしやすくしました。
バックアップするbashで用いるコマンドはrsyncですが、フルバックアップと差分バックアップを分ける必要がないように思ったので、コマンドをシンプルに見直しました。


Homeディレクトリに多数のユーザがいる場合、バックアップに時間がかかる。

バックアップサーバとメインサーバ間はインフィニバンドで接続してデータ転送速度が良いため、IOバウンドがボトルネックとなる。そのため、できる限り帯域を効率的に用いるには、Threadなどでバックアップ処理を並列化したほうが良い。

Home領域の増減もあるため、pythonを用いて自動的にhome領域のディレクトリを取得して、Threadでバックアップ処理を実施するプログラムをとする。

フォルダ構成

[root@ManageServer backup_script]# ll

-rwxr-xr-x. 1 root root 6969  6 22 21:54 backup_home.py
drwxr-xr-x. 4 root root   87  6 22 21:55 config
drwxr-xr-x. 3 root root   45  6 22 20:02 log
drwxr-xr-x. 4 root root   55  6 22 21:48 script

バックアップbash

script/backup1.sh

#!/bin/bash

# 使用方法
usage() {
    echo "Usage: $0 <source_directory>"
    echo "Example: $0 /home/user"
}

if [[ $# -eq 0 ]]; then
    usage
    exit 1
fi

SOURCE_DIR="$1"
BACKUP_DIR="/backup_dir/$(basename "$SOURCE_DIR")"

# バックアップ先ディレクトリ作成
mkdir -p "$BACKUP_DIR"

# rsyncによるバックアップ(常に最新を反映)
rsync -a --delete \
    --exclude='.cache' \
    --exclude='*.tmp' \
    --exclude='*.log' \
    "$SOURCE_DIR/" "$BACKUP_DIR/"

echo "Backup completed: $BACKUP_DIR"

Threadでbashを実行するpythonプログラム

backup_home.py

#!/usr/bin/python3

from abc import ABC, abstractmethod
import subprocess
from subprocess import PIPE
import queue
import threading
import logging
import time
import datetime
import signal
import os
import sys
import gc
import socket

dir_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(dir_path)

from config import settings

"""
created by Tagawa.
Backup home/*
Settings by config/settings.py
method Thread
version 2025.06.22
"""

logging.basicConfig(
        filename=settings.LOG_FILE,
        level=logging.INFO,
        format='%(asctime)s:%(name)s:%(levelname)s:%(threadName)s:%(message)s')
logger = logging.getLogger(__name__)

logger.debug({'add path': dir_path})


class ShellCommand(object):
    def __init__(self, dt_now, timeout: int, command: str):
        self.stdout = False
        self.stderr = False
        self.returncode = False
        self.command = False
        self._timeout = timeout
        self._command = command
        self._dt_now = dt_now
        self._command_result = False
        self._errlog = False

    def submit_command(self, command):
        self.command = command
        result = subprocess.run(
                self.command,
                shell=True,
                stdout=PIPE,
                stderr=PIPE,
                timeout=self._timeout)
        self.stdout = result.stdout.decode('utf-8')
        self.stderr = result.stderr.decode('utf-8')
        self.returncode = result.returncode

        if result.returncode != 0:
            raise Exception(self.stderr)

    def execute_command(self):
        try:
            self.submit_command(self._command)
            self._command_result = self.stdout
        except Exception as e:
            self._command_result = self.stderr
            self._errlog = str(e)
            logger.error({
                'time': self._dt_now,
                'status': 'failed',
                'action':'ExceuteShellComand',
                'error': self._errlog,
                'command': self._command})


class FetchHomeDir(object):
    def __init__(self, dt_now, timeout, home):
        self._dt_now = dt_now
        self._timeout = timeout
        self._home = home
        self._status = None
        self._command = 'ls -a ' + home
        self.shell = ShellCommand(dt_now, timeout, self._command)
        self.homedirs = []

    def run_command(self):
        self.shell.execute_command()
        logger.debug({'return command result': self.shell._command_result})
        if not self.shell._errlog and self.shell._command_result != "":
            self._status = 'success'
            homedirs = self.shell._command_result.split('\n')
            for _home in homedirs[2:]:
                # skip '.', '..'
                if _home != "":
                    _path = self._home + '/' + _home
                    logger.debug(_path)
                    self.homedirs.append(_path)
        else:
            self._status = 'failed'
            logger.error({
                'time': self._dt_now,
                'status': self._status,
                'action': FetchHomeDir,
                'command': self._command,
                'home': self._home})

        if settings.DEBUG:
            print(f'{self._status}: {__file__} FetchHomeDir from {self._home}')
            for _home in self.homedirs:
                print(_home)


class IThreadWorker(ABC):
    def __init__(self, dt_now, queue, num_of_thread, timeout):
        self.dt_now = dt_now
        self.queue = queue
        self.num_of_thread = num_of_thread
        self.timeout = timeout
        self.command = None

    def run(self):
        ts = []
        for _ in range(self.num_of_thread):
            t = threading.Thread(target=self.worker)
            t.start()
            ts.append(t)
        [self.queue.put(None) for _ in range(len(ts))]
        [t.join() for t in ts]

    @abstractmethod
    def worker(self):
        logging.debug('start')
        while True:
            item = self.queue.get()
            if item is None:
                break
            print({'thread': item})
            self.some_process()
            self.queue.task_done()
        logging.debug('end')

    def some_process(self):
        pass


class ThreadHomeBackup(IThreadWorker):
    def __init__(self, dt_now, queue, num_of_thread, timeout, backup_script):
        super().__init__(dt_now, queue, num_of_thread, timeout)
        #test-------------------------------
        #self.command = 'ls -l '
        #self.command = 'sleep 3 || ls -l '
        #-----------------------------------
        self.command = backup_script + ' '
        self.result = []

    def worker(self):
        logging.debug('start')
        while True:
            path_dir = self.queue.get()
            if path_dir is None:
                break
            self.check_home_dir(path_dir)
            self.queue.task_done()
        logging.debug('end')

    def check_home_dir(self, path_dir):
        try:
            _command = self.command + path_dir
            _shell = ShellCommand(self.dt_now, self.timeout, _command)
            _shell.execute_command()
            logger.info({
                'status': 'success',
                'source': path_dir,
                'result': _shell._command_result.split('\n')[0],
                })

        except Exception as e:
            print({'command Error': str(e)})
            logger.error({
                'time': self.dt_now,
                'status': 'failed',
                'action': 'ThreadHomeDirChecker',
                'message': str(e),
                'path': path_dir})


"""
test code
"""
def test_shell_command():

    timeout = settings.TIMEOUT
    dt_now = datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S')

    home = '/home'
    check_home_dir = FetchHomeDir(dt_now, timeout, home)
    check_home_dir.run_command()

    print({'result': check_home_dir.homedirs})


if __name__ == '__main__':

    # settings
    home = settings.HOME_DIR
    timeout = settings.TIMEOUT
    threads = settings.THREADING_NUM
    backup_script = settings.BACKUP_SCRIPT

    # initial set
    dt_now = datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S')
    fetch_home_dir = FetchHomeDir(dt_now, timeout, home)
    homedirs_queue = queue.Queue()
    start = time.time()

    # start main
    logger.info({'Start Backup': dt_now})

    # set queue
    fetch_home_dir.run_command()
    for homedir in fetch_home_dir.homedirs:
        homedirs_queue.put(homedir)

    # backup start
    thread_home_backup = ThreadHomeBackup(
            dt_now, homedirs_queue, threads, timeout, backup_script)
    thread_home_backup.run()

    end = time.time()
    logger.info({
        'action': 'Backup by threads',
        'elapsed time': '{: 4f} sec'.format(end - start)})


    print('thread time: {: 4f}\n'.format(end - start))

    del fetch_home_dir,thread_home_backup,homedirs_queue
    gc.collect()

バックアップオプションを設定するconfigファイル

config/settings.py

"""
created by Tagawa
version 2024.9.17.1
Please Change Option
 number of threading -> integer
 Timeout             -> integer
 172800sec = 48days
"""

LOG_FILE       = "/home/APPLI/TOOLS/backup_script/log/check_result.log"
BACKUP_SCRIPT  = "/home/APPLI/TOOLS/backup_script/script/backup1.sh"
THREADING_ON   = True
THREADING_NUM  = 4
PROCESSES_NUM  = 4
TIMEOUT        = 172800
DEBUG          = True
TEST_CODE      = False
HOME_DIR       = '/home'

実行

プログラムを実行すると、backup1.shにconfigに設定しているバックアップ元(/home)が、設定されているバックアップ先(/backup_dir)に保存される。

[root@ManageServer backup_script]# ./backup_home.py
success: ./backup_home.py FetchHomeDir from /home
/home/APPLI
/home/N1001
/home/N1002
/home/N1003
/home/N1004
/home/N1005
/home/N1006
/home/N1007
/home/N1008
/home/N1009
/home/N1010
/home/download
/home/install
/home/settings
/home/user01
thread time:  0.122335

保存元

[root@ManageServer backup_script]# ls /home
APPLI  N1001  N1002  N1003  N1004  N1005  N1006  N1007  N1008  N1009  N1010  download  install  settings  user01

保存先

[root@ManageServer backup_script]# ls /backup_dir/
APPLI  N1001  N1002  N1003  N1004  N1005  N1006  N1007  N1008  N1009  N1010  download  install  settings  user01

実行ログ(log/check_result.log)

2025-06-23 04:40:08,333:__main__:INFO:MainThread:{'Start Backup': '2025/06/23 04:40:08'}
2025-06-23 04:40:08,370:__main__:INFO:Thread-4:{'status': 'success', 'source': '/home/N1003', 'result': 'Backup completed: /backup_dir/N1003'}
2025-06-23 04:40:08,383:__main__:INFO:Thread-2:{'status': 'success', 'source': '/home/N1001', 'result': 'Backup completed: /backup_dir/N1001'}
2025-06-23 04:40:08,385:__main__:INFO:Thread-3:{'status': 'success', 'source': '/home/N1002', 'result': 'Backup completed: /backup_dir/N1002'}
2025-06-23 04:40:08,395:__main__:INFO:Thread-4:{'status': 'success', 'source': '/home/N1004', 'result': 'Backup completed: /backup_dir/N1004'}
2025-06-23 04:40:08,397:__main__:INFO:Thread-2:{'status': 'success', 'source': '/home/N1005', 'result': 'Backup completed: /backup_dir/N1005'}
2025-06-23 04:40:08,400:__main__:INFO:Thread-1:{'status': 'success', 'source': '/home/APPLI', 'result': 'Backup completed: /backup_dir/APPLI'}
2025-06-23 04:40:08,402:__main__:INFO:Thread-3:{'status': 'success', 'source': '/home/N1006', 'result': 'Backup completed: /backup_dir/N1006'}
2025-06-23 04:40:08,421:__main__:INFO:Thread-2:{'status': 'success', 'source': '/home/N1008', 'result': 'Backup completed: /backup_dir/N1008'}
2025-06-23 04:40:08,423:__main__:INFO:Thread-1:{'status': 'success', 'source': '/home/N1009', 'result': 'Backup completed: /backup_dir/N1009'}
2025-06-23 04:40:08,424:__main__:INFO:Thread-4:{'status': 'success', 'source': '/home/N1007', 'result': 'Backup completed: /backup_dir/N1007'}
2025-06-23 04:40:08,438:__main__:INFO:Thread-3:{'status': 'success', 'source': '/home/N1010', 'result': 'Backup completed: /backup_dir/N1010'}
2025-06-23 04:40:08,452:__main__:INFO:Thread-3:{'status': 'success', 'source': '/home/user01', 'result': 'Backup completed: /backup_dir/user01'}
2025-06-23 04:40:08,454:__main__:INFO:Thread-2:{'status': 'success', 'source': '/home/download', 'result': 'Backup completed: /backup_dir/download'}
2025-06-23 04:40:08,455:__main__:INFO:Thread-4:{'status': 'success', 'source': '/home/settings', 'result': 'Backup completed: /backup_dir/settings'}
2025-06-23 04:40:08,455:__main__:INFO:Thread-1:{'status': 'success', 'source': '/home/install', 'result': 'Backup completed: /backup_dir/install'}
2025-06-23 04:40:08,455:__main__:INFO:MainThread:{'action': 'Backup by threads', 'elapsed time': ' 0.122335 sec'}