
Changed the JSON RollManager to write JSON payloads to a working directory until a certain directory size is met. Then the directory is tar/gzipped and moved into a destination directory. No CRC is available for the tarball. Change-Id: I119da646104eeaea86f7153a9bcf123d3143a0ba
224 lines
7.7 KiB
Python
224 lines
7.7 KiB
Python
# Copyright (c) 2014 Dark Secret Software Inc.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
# implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
import fnmatch
|
|
import hashlib
|
|
import os
|
|
import os.path
|
|
import tarfile
|
|
|
|
import notification_utils
|
|
|
|
import archive
|
|
import disk_storage
|
|
|
|
|
|
class NoMoreFiles(Exception):
|
|
pass
|
|
|
|
|
|
class NoValidFile(Exception):
|
|
pass
|
|
|
|
|
|
class BadWorkingDirectory(Exception):
|
|
pass
|
|
|
|
|
|
class RollManager(object):
|
|
def __init__(self, filename_template, directory=".",
|
|
archive_class=None, archive_callback=None):
|
|
self.filename_template = filename_template
|
|
self.directory = directory
|
|
self.active_archive = None
|
|
self.archive_class = archive_class
|
|
self.active_filename = None
|
|
self.archive_callback = archive_callback
|
|
|
|
def _roll_archive(self):
|
|
self.close()
|
|
self.get_active_archive()
|
|
|
|
def close(self):
|
|
if self.active_archive:
|
|
self.active_archive.close()
|
|
if self.archive_callback:
|
|
self.archive_callback.on_close(self.active_filename)
|
|
self.active_archive = None
|
|
self.active_filename = None
|
|
|
|
|
|
class ReadingRollManager(RollManager):
|
|
def __init__(self, filename_template, directory=".",
|
|
destination_directory=".",
|
|
archive_class = archive.ArchiveReader,
|
|
archive_callback=None, roll_size_mb=1000):
|
|
super(ReadingRollManager, self).__init__(
|
|
filename_template,
|
|
directory=directory,
|
|
archive_callback=archive_callback,
|
|
archive_class=archive_class)
|
|
self.files_to_read = self._get_matching_files(directory,
|
|
filename_template)
|
|
|
|
def _get_matching_files(self, directory, filename_template):
|
|
files = [os.path.join(directory, f)
|
|
for f in os.listdir(self.directory)
|
|
if os.path.isfile(os.path.join(directory, f))]
|
|
return sorted(fnmatch.filter(files, filename_template))
|
|
|
|
def read(self):
|
|
# (metadata, payload)
|
|
for x in range(3): # 3 attempts to file a valid file.
|
|
a = self.get_active_archive()
|
|
try:
|
|
return a.read()
|
|
except disk_storage.EndOfFile:
|
|
self._roll_archive()
|
|
raise NoValidFile("Unable to find a valid file after 3 attempts")
|
|
|
|
def get_active_archive(self):
|
|
if not self.active_archive:
|
|
if not self.files_to_read:
|
|
raise NoMoreFiles()
|
|
self.active_filename = self.files_to_read.pop(0)
|
|
self.active_archive = self.archive_class(self.active_filename)
|
|
if self.archive_callback:
|
|
self.archive_callback.on_open(self.active_filename)
|
|
return self.active_archive
|
|
|
|
|
|
class WritingRollManager(RollManager):
|
|
def __init__(self, filename_template, roll_checker, directory=".",
|
|
destination_directory=".",
|
|
archive_class=archive.ArchiveWriter,
|
|
archive_callback=None, roll_size_mb=1000):
|
|
super(WritingRollManager, self).__init__(
|
|
filename_template,
|
|
directory=directory,
|
|
archive_callback=archive_callback,
|
|
archive_class=archive_class)
|
|
self.roll_checker = roll_checker
|
|
|
|
def write(self, metadata, payload):
|
|
"""metadata is string:string dict.
|
|
payload must be encoded as string.
|
|
"""
|
|
a = self.get_active_archive()
|
|
a.write(metadata, payload)
|
|
if self._should_roll_archive():
|
|
self._roll_archive()
|
|
|
|
def _make_filename(self):
|
|
f = notification_utils.now().strftime(self.filename_template)
|
|
f = f.replace(" ", "_")
|
|
f = f.replace("/", "_")
|
|
f = f.replace(":", "_")
|
|
return os.path.join(self.directory, f)
|
|
|
|
def get_active_archive(self):
|
|
if not self.active_archive:
|
|
self.active_filename = self._make_filename()
|
|
self.active_archive = self.archive_class(self.active_filename)
|
|
if self.archive_callback:
|
|
self.archive_callback.on_open(self.active_filename)
|
|
self.roll_checker.start(self.active_archive)
|
|
return self.active_archive
|
|
|
|
def _should_roll_archive(self):
|
|
return self.roll_checker.check(self.active_archive)
|
|
|
|
|
|
class WritingJSONRollManager(object):
|
|
"""No archiver. No roll checker. Just write the file locally.
|
|
Once the working_directory gets big enough, the files are
|
|
.tar.gz'ed into the destination_directory. Then
|
|
the working_directory is erased.
|
|
Expects an external tool like rsync to move the file.
|
|
A SHA-256 of the payload may be included in the filename."""
|
|
def __init__(self, *args, **kwargs):
|
|
self.filename_template = args[0]
|
|
self.directory = kwargs.get('directory', '.')
|
|
self.destination_directory = kwargs.get('destination_directory', '.')
|
|
self.roll_size_mb = int(kwargs.get('roll_size_mb', 1000))
|
|
self.check_delay = 0 # Only check directory size every N events.
|
|
|
|
def _make_filename(self, crc, prefix):
|
|
now = notification_utils.now()
|
|
dt = str(notification_utils.dt_to_decimal(now))
|
|
f = now.strftime(self.filename_template)
|
|
f = f.replace(" ", "_")
|
|
f = f.replace("/", "_")
|
|
f = f.replace(":", "_")
|
|
f = f.replace("[[CRC]]", crc)
|
|
f = f.replace("[[TIMESTAMP]]", dt)
|
|
return os.path.join(prefix, f)
|
|
|
|
def _should_tar(self):
|
|
size = 0
|
|
for f in os.listdir(self.directory):
|
|
full = os.path.join(self.directory, f)
|
|
if os.path.isfile(full):
|
|
size += os.path.getsize(full)
|
|
|
|
return (size / 1048576) >= self.roll_size_mb
|
|
|
|
def _tar_directory(self):
|
|
# tar all the files in working directory into an archive
|
|
# in destination_directory.
|
|
crc = "archive"
|
|
filename = self._make_filename(crc, self.destination_directory) + ".tar.gz"
|
|
|
|
# No contextmgr for tarfile in 2.6 :(
|
|
tar = tarfile.open(filename, "w:gz")
|
|
for f in os.listdir(self.directory):
|
|
full = os.path.join(self.directory, f)
|
|
if os.path.isfile(full):
|
|
tar.add(full)
|
|
tar.close()
|
|
|
|
def _clean_working_directory(self):
|
|
for f in os.listdir(self.directory):
|
|
full = os.path.join(self.directory, f)
|
|
if os.path.isfile(full):
|
|
os.remove(full)
|
|
|
|
def _delay_check(self):
|
|
self.check_delay += 1
|
|
if self.check_delay > 250:
|
|
self.check_delay = 0
|
|
return False
|
|
return True
|
|
|
|
def write(self, metadata, json_payload):
|
|
# Metadata is ignored.
|
|
crc = hashlib.sha256(json_payload).hexdigest()
|
|
filename = self._make_filename(crc, self.directory)
|
|
with open(filename, "w") as f:
|
|
f.write(json_payload)
|
|
|
|
if self._delay_check():
|
|
return
|
|
|
|
if not self._should_tar():
|
|
return
|
|
|
|
self._tar_directory()
|
|
self._clean_working_directory()
|
|
|
|
|
|
def close(self):
|
|
pass
|