######################################################################
#
# File: b2sdk/sync/policy.py
#
# Copyright 2019 Backblaze Inc. All Rights Reserved.
#
# License https://www.backblaze.com/using_b2_code.html
#
######################################################################
from abc import ABCMeta, abstractmethod
from enum import Enum, unique
import logging
from ..exception import DestFileNewer
from .encryption_provider import AbstractSyncEncryptionSettingsProvider, SERVER_DEFAULT_SYNC_ENCRYPTION_SETTINGS_PROVIDER
from .action import LocalDeleteAction, B2CopyAction, B2DeleteAction, B2DownloadAction, B2HideAction, B2UploadAction
from .exception import InvalidArgument
ONE_DAY_IN_MS = 24 * 60 * 60 * 1000
logger = logging.getLogger(__name__)
[docs]@unique
class NewerFileSyncMode(Enum):
""" Mode of handling files newer on destination than on source """
SKIP = 101 #: skip syncing such file
REPLACE = 102 #: replace the file on the destination with the (older) file on source
RAISE_ERROR = 103 #: raise a non-transient error, failing the sync operation
[docs]@unique
class CompareVersionMode(Enum):
""" Mode of comparing versions of files to determine what should be synced and what shouldn't """
MODTIME = 201 #: use file modification time on source filesystem
SIZE = 202 #: compare using file size
NONE = 203 #: compare using file name only
[docs]class AbstractFileSyncPolicy(metaclass=ABCMeta):
"""
Abstract policy class.
"""
DESTINATION_PREFIX = NotImplemented
SOURCE_PREFIX = NotImplemented
[docs] def __init__(
self,
source_file,
source_folder,
dest_file,
dest_folder,
now_millis,
keep_days,
newer_file_mode,
compare_threshold,
compare_version_mode=CompareVersionMode.MODTIME,
encryption_settings_provider:
AbstractSyncEncryptionSettingsProvider = SERVER_DEFAULT_SYNC_ENCRYPTION_SETTINGS_PROVIDER,
):
"""
:param b2sdk.v1.File source_file: source file object
:param b2sdk.v1.AbstractFolder source_folder: source folder object
:param b2sdk.v1.File dest_file: destination file object
:param b2sdk.v1.AbstractFolder dest_folder: destination folder object
:param int now_millis: current time in milliseconds
:param int keep_days: days to keep before delete
:param b2sdk.v1.NEWER_FILE_MODES newer_file_mode: setting which determines handling for destination files newer than on the source
:param int compare_threshold: when comparing with size or time for sync
:param b2sdk.v1.COMPARE_VERSION_MODES compare_version_mode: how to compare source and destination files
:param b2sdk.v1.AbstractSyncEncryptionSettingsProvider encryption_settings_provider: encryption setting provider
"""
self._source_file = source_file
self._source_folder = source_folder
self._dest_file = dest_file
self._keep_days = keep_days
self._newer_file_mode = newer_file_mode
self._compare_version_mode = compare_version_mode
self._compare_threshold = compare_threshold
self._dest_folder = dest_folder
self._now_millis = now_millis
self._transferred = False
self._encryption_settings_provider = encryption_settings_provider
def _should_transfer(self):
"""
Decide whether to transfer the file from the source to the destination.
"""
if self._source_file is None or self._source_file.latest_version().action == 'hide':
# No source file. Nothing to transfer.
return False
elif self._dest_file is None:
# Source file exists, but no destination file. Always transfer.
return True
else:
# Both exist. Transfer only if the two are different.
return self.files_are_different(
self._source_file,
self._dest_file,
self._compare_threshold,
self._compare_version_mode,
self._newer_file_mode,
)
[docs] @classmethod
def files_are_different(
cls,
source_file,
dest_file,
compare_threshold=None,
compare_version_mode=CompareVersionMode.MODTIME,
newer_file_mode=NewerFileSyncMode.RAISE_ERROR,
):
"""
Compare two files and determine if the the destination file
should be replaced by the source file.
:param b2sdk.v1.File source_file: source file object
:param b2sdk.v1.File dest_file: destination file object
:param int compare_threshold: compare threshold when comparing by time or size
:param b2sdk.v1.CompareVersionMode compare_version_mode: source file version comparator method
:param b2sdk.v1.NewerFileSyncMode newer_file_mode: newer destination handling method
"""
# Optionally set a compare threshold for fuzzy comparison
compare_threshold = compare_threshold or 0
# Compare using file name only
if compare_version_mode == CompareVersionMode.NONE:
return False
# Compare using modification time
elif compare_version_mode == CompareVersionMode.MODTIME:
# Get the modification time of the latest versions
source_mod_time = source_file.latest_version().mod_time
dest_mod_time = dest_file.latest_version().mod_time
diff_mod_time = abs(source_mod_time - dest_mod_time)
compare_threshold_exceeded = diff_mod_time > compare_threshold
logger.debug(
'File %s: source time %s, dest time %s, diff %s, threshold %s, diff > threshold %s',
source_file.name,
source_mod_time,
dest_mod_time,
diff_mod_time,
compare_threshold,
compare_threshold_exceeded,
)
if compare_threshold_exceeded:
# Source is newer
if dest_mod_time < source_mod_time:
return True
# Source is older
elif source_mod_time < dest_mod_time:
if newer_file_mode == NewerFileSyncMode.REPLACE:
return True
elif newer_file_mode == NewerFileSyncMode.SKIP:
return False
else:
raise DestFileNewer(
dest_file, source_file, cls.DESTINATION_PREFIX, cls.SOURCE_PREFIX
)
# Compare using file size
elif compare_version_mode == CompareVersionMode.SIZE:
# Get file size of the latest versions
source_size = source_file.latest_version().size
dest_size = dest_file.latest_version().size
diff_size = abs(source_size - dest_size)
compare_threshold_exceeded = diff_size > compare_threshold
logger.debug(
'File %s: source size %s, dest size %s, diff %s, threshold %s, diff > threshold %s',
source_file.name,
source_size,
dest_size,
diff_size,
compare_threshold,
compare_threshold_exceeded,
)
# Replace if size difference is over threshold
return compare_threshold_exceeded
else:
raise InvalidArgument('compare_version_mode', 'is invalid option')
[docs] def get_all_actions(self):
"""
Yield file actions.
"""
if self._should_transfer():
yield self._make_transfer_action()
self._transferred = True
assert self._dest_file is not None or self._source_file is not None
for action in self._get_hide_delete_actions():
yield action
def _get_hide_delete_actions(self):
"""
Subclass policy can override this to hide or delete files.
"""
return []
def _get_source_mod_time(self):
return self._source_file.latest_version().mod_time
@abstractmethod
def _make_transfer_action(self):
"""
Return an action representing transfer of file according to the selected policy.
"""
[docs]class DownPolicy(AbstractFileSyncPolicy):
"""
File is synced down (from the cloud to disk).
"""
DESTINATION_PREFIX = 'local://'
SOURCE_PREFIX = 'b2://'
def _make_transfer_action(self):
return B2DownloadAction(
self._source_file,
self._source_folder.make_full_path(self._source_file.name),
self._dest_folder.make_full_path(self._source_file.name),
self._encryption_settings_provider,
)
[docs]class UpPolicy(AbstractFileSyncPolicy):
"""
File is synced up (from disk the cloud).
"""
DESTINATION_PREFIX = 'b2://'
SOURCE_PREFIX = 'local://'
def _make_transfer_action(self):
return B2UploadAction(
self._source_folder.make_full_path(self._source_file.name),
self._source_file.name,
self._dest_folder.make_full_path(self._source_file.name),
self._get_source_mod_time(),
self._source_file.latest_version().size,
self._encryption_settings_provider,
)
[docs]class UpAndDeletePolicy(UpPolicy):
"""
File is synced up (from disk to the cloud) and the delete flag is SET.
"""
def _get_hide_delete_actions(self):
for action in super(UpAndDeletePolicy, self)._get_hide_delete_actions():
yield action
for action in make_b2_delete_actions(
self._source_file,
self._dest_file,
self._dest_folder,
self._transferred,
):
yield action
[docs]class UpAndKeepDaysPolicy(UpPolicy):
"""
File is synced up (from disk to the cloud) and the keepDays flag is SET.
"""
def _get_hide_delete_actions(self):
for action in super(UpAndKeepDaysPolicy, self)._get_hide_delete_actions():
yield action
for action in make_b2_keep_days_actions(
self._source_file,
self._dest_file,
self._dest_folder,
self._transferred,
self._keep_days,
self._now_millis,
):
yield action
[docs]class DownAndDeletePolicy(DownPolicy):
"""
File is synced down (from the cloud to disk) and the delete flag is SET.
"""
def _get_hide_delete_actions(self):
for action in super(DownAndDeletePolicy, self)._get_hide_delete_actions():
yield action
if self._dest_file is not None and (
self._source_file is None or self._source_file.latest_version().action == 'hide'
):
# Local files have either 0 or 1 versions. If the file is there,
# it must have exactly 1 version.
yield LocalDeleteAction(self._dest_file.name, self._dest_file.versions[0].id_)
[docs]class DownAndKeepDaysPolicy(DownPolicy):
"""
File is synced down (from the cloud to disk) and the keepDays flag is SET.
"""
pass
[docs]class CopyPolicy(AbstractFileSyncPolicy):
"""
File is copied (server-side).
"""
DESTINATION_PREFIX = 'b2://'
SOURCE_PREFIX = 'b2://'
def _make_transfer_action(self):
return B2CopyAction(
self._source_folder.make_full_path(self._source_file.name),
self._source_file,
self._dest_folder.make_full_path(self._source_file.name),
self._source_folder.bucket,
self._dest_folder.bucket,
self._encryption_settings_provider,
)
[docs]class CopyAndDeletePolicy(CopyPolicy):
"""
File is copied (server-side) and the delete flag is SET.
"""
def _get_hide_delete_actions(self):
for action in super()._get_hide_delete_actions():
yield action
for action in make_b2_delete_actions(
self._source_file,
self._dest_file,
self._dest_folder,
self._transferred,
):
yield action
[docs]class CopyAndKeepDaysPolicy(CopyPolicy):
"""
File is copied (server-side) and the keepDays flag is SET.
"""
def _get_hide_delete_actions(self):
for action in super()._get_hide_delete_actions():
yield action
for action in make_b2_keep_days_actions(
self._source_file,
self._dest_file,
self._dest_folder,
self._transferred,
self._keep_days,
self._now_millis,
):
yield action
[docs]def make_b2_delete_note(version, index, transferred):
"""
Create a note message for delete action.
:param b2sdk.v1.FileVersionInfo version: an object which contains file version info
:param int index: file version index
:param bool transferred: if True, file has been transferred, False otherwise
"""
note = ''
if version.action == 'hide':
note = '(hide marker)'
elif transferred or 0 < index:
note = '(old version)'
return note
[docs]def make_b2_delete_actions(source_file, dest_file, dest_folder, transferred):
"""
Create the actions to delete files stored on B2, which are not present locally.
:param b2sdk.v1.File source_file: source file object
:param b2sdk.v1.File dest_file: destination file object
:param b2sdk.v1.AbstractFolder dest_folder: destination folder
:param bool transferred: if True, file has been transferred, False otherwise
"""
if dest_file is None:
# B2 does not really store folders, so there is no need to hide
# them or delete them
return
for version_index, version in enumerate(dest_file.versions):
keep = (version_index == 0) and (source_file is not None) and not transferred
if not keep:
yield B2DeleteAction(
dest_file.name,
dest_folder.make_full_path(dest_file.name),
version.id_,
make_b2_delete_note(version, version_index, transferred),
)
[docs]def make_b2_keep_days_actions(
source_file, dest_file, dest_folder, transferred, keep_days, now_millis
):
"""
Create the actions to hide or delete existing versions of a file
stored in b2.
When keepDays is set, all files that were visible any time from
keepDays ago until now must be kept. If versions were uploaded 5
days ago, 15 days ago, and 25 days ago, and the keepDays is 10,
only the 25-day old version can be deleted. The 15 day-old version
was visible 10 days ago.
:param b2sdk.v1.File source_file: source file object
:param b2sdk.v1.File dest_file: destination file object
:param b2sdk.v1.AbstractFolder dest_folder: destination folder object
:param bool transferred: if True, file has been transferred, False otherwise
:param int keep_days: how many days to keep a file
:param int now_millis: current time in milliseconds
"""
deleting = False
if dest_file is None:
# B2 does not really store folders, so there is no need to hide
# them or delete them
return
for version_index, version in enumerate(dest_file.versions):
# How old is this version?
age_days = (now_millis - version.mod_time) / ONE_DAY_IN_MS
# Mostly, the versions are ordered by time, newest first,
# BUT NOT ALWAYS. The mod time we have is the src_last_modified_millis
# from the file info (if present), or the upload start time
# (if not present). The user-specified src_last_modified_millis
# may not be in order. Because of that, we no longer
# assert that age_days is non-decreasing.
#
# Note that if there is an out-of-order date that is old enough
# to trigger deletions, all of the versions uploaded before that
# (the ones after it in the list) will be deleted, even if they
# aren't over the age threshold.
# Do we need to hide this version?
if version_index == 0 and source_file is None and version.action == 'upload':
yield B2HideAction(dest_file.name, dest_folder.make_full_path(dest_file.name))
# Can we start deleting? Once we start deleting, all older
# versions will also be deleted.
if version.action == 'hide':
if keep_days < age_days:
deleting = True
# Delete this version
if deleting:
yield B2DeleteAction(
dest_file.name,
dest_folder.make_full_path(dest_file.name),
version.id_,
make_b2_delete_note(version, version_index, transferred),
)
# Can we start deleting with the next version, based on the
# age of this one?
if keep_days < age_days:
deleting = True