Source code for b2sdk.transferer.transferer

# File: b2sdk/transferer/
# Copyright 2019 Backblaze Inc. All Rights Reserved.
# License

import six

from ..download_dest import DownloadDestProgressWrapper
from ..exception import ChecksumMismatch, UnexpectedCloudBehaviour, TruncatedOutput, InvalidRange
from ..progress import DoNothingProgressListener
from ..raw_api import SRC_LAST_MODIFIED_MILLIS
from ..utils import B2TraceMetaAbstract
from .file_metadata import FileMetadata
from .parallel import ParallelDownloader
from .simple import SimpleDownloader

[docs]@six.add_metaclass(B2TraceMetaAbstract) class Transferer(object): """ Handle complex actions around downloads and uploads to free raw_api from that responsibility. """ # how many chunks to break a downloaded file into DEFAULT_MAX_STREAMS = 8 # minimum size of a download chunk DEFAULT_MIN_PART_SIZE = 100 * 1024 * 1024 # block size used when downloading file. If it is set to a high value, progress reporting will be jumpy, if it's too low, it impacts CPU MIN_CHUNK_SIZE = 8192 # ~1MB file will show ~1% progress increment MAX_CHUNK_SIZE = 1024**2
[docs] def __init__(self, session, account_info): """ :param max_streams: limit on a number of streams to use when downloading in multiple parts :param min_part_size: the smallest part size for which a stream will be run when downloading in multiple parts """ self.session = session self.account_info = account_info self.strategies = [ ParallelDownloader( max_streams=self.DEFAULT_MAX_STREAMS, min_part_size=self.DEFAULT_MIN_PART_SIZE, min_chunk_size=self.MIN_CHUNK_SIZE, max_chunk_size=self.MAX_CHUNK_SIZE, ), #IOTDownloader(), # TODO: curl -s | tee /dev/stderr 2>ble | sha1sum | cut -c -40 SimpleDownloader( min_chunk_size=self.MIN_CHUNK_SIZE, max_chunk_size=self.MAX_CHUNK_SIZE, ), ]
[docs] def download_file_from_url( self, url, download_dest, progress_listener=None, range_=None, ): """ :param url: url from which the file should be downloaded :param download_dest: where to put the file when it is downloaded :param progress_listener: where to notify about progress downloading :param range_: 2-element tuple containing data of http Range header """ progress_listener = progress_listener or DoNothingProgressListener() download_dest = DownloadDestProgressWrapper(download_dest, progress_listener) with self.session.download_file_from_url( url, url_factory=self.account_info.get_download_url, range_=range_, ) as response: metadata = FileMetadata.from_response(response) if range_ is not None: if 'Content-Range' not in response.headers: raise UnexpectedCloudBehaviour('Content-Range header was expected') if (range_[1] - range_[0] + 1) != metadata.content_length: raise InvalidRange(metadata.content_length, range_) mod_time_millis = int( metadata.file_info.get( SRC_LAST_MODIFIED_MILLIS, response.headers['x-bz-upload-timestamp'], ) ) with download_dest.make_file_context( metadata.file_id, metadata.file_name, metadata.content_length, metadata.content_type, metadata.content_sha1, metadata.file_info, mod_time_millis, range_=range_, ) as file: for strategy in self.strategies: if strategy.is_suitable(metadata, progress_listener): bytes_read, actual_sha1 = file, response, metadata, self.session ) break else: assert False, 'no strategy suitable for download was found!' self._validate_download( range_, bytes_read, actual_sha1, metadata ) # raises exceptions return metadata.as_info_dict()
def _validate_download(self, range_, bytes_read, actual_sha1, metadata): if range_ is None: if bytes_read != metadata.content_length: raise TruncatedOutput(bytes_read, metadata.content_length) if metadata.content_sha1 != 'none' and \ actual_sha1 != metadata.content_sha1: # no yapf raise ChecksumMismatch( checksum_type='sha1', expected=metadata.content_length, actual=actual_sha1, ) else: desired_length = range_[1] - range_[0] + 1 if bytes_read != desired_length: raise TruncatedOutput(bytes_read, desired_length)