From a8b4bc6ff3eddbc1c38fdaa1e2bc5c061b256069 Mon Sep 17 00:00:00 2001
From: "James E. Blair" <jim@acmegating.com>
Date: Mon, 24 Jan 2022 13:45:25 -0800
Subject: [PATCH] Add upload-logs-ibm role

This role uploads logs to IBM Cloud object storage.

Change-Id: Ibe1131f863a64051b427fcb03b126b1577c4843a
---
 doc/source/log-roles.rst                      |   1 +
 .../library/test_zuul_ibm_upload.py           |  68 ++++
 .../library/zuul_ibm_upload.py                | 368 ++++++++++++++++++
 roles/upload-logs-ibm/README.rst              |  91 +++++
 roles/upload-logs-ibm/defaults/main.yaml      |   3 +
 roles/upload-logs-ibm/meta/main.yaml          |   2 +
 roles/upload-logs-ibm/tasks/main.yaml         |  42 ++
 test-requirements.txt                         |   3 +
 8 files changed, 578 insertions(+)
 create mode 100644 roles/upload-logs-base/library/test_zuul_ibm_upload.py
 create mode 100755 roles/upload-logs-base/library/zuul_ibm_upload.py
 create mode 100644 roles/upload-logs-ibm/README.rst
 create mode 100644 roles/upload-logs-ibm/defaults/main.yaml
 create mode 100644 roles/upload-logs-ibm/meta/main.yaml
 create mode 100644 roles/upload-logs-ibm/tasks/main.yaml

diff --git a/doc/source/log-roles.rst b/doc/source/log-roles.rst
index de7782b22..986e22405 100644
--- a/doc/source/log-roles.rst
+++ b/doc/source/log-roles.rst
@@ -15,5 +15,6 @@ Log Roles
 .. zuul:autorole:: upload-logs
 .. zuul:autorole:: upload-logs-azure
 .. zuul:autorole:: upload-logs-gcs
+.. zuul:autorole:: upload-logs-ibm
 .. zuul:autorole:: upload-logs-s3
 .. zuul:autorole:: upload-logs-swift
diff --git a/roles/upload-logs-base/library/test_zuul_ibm_upload.py b/roles/upload-logs-base/library/test_zuul_ibm_upload.py
new file mode 100644
index 000000000..24821eb22
--- /dev/null
+++ b/roles/upload-logs-base/library/test_zuul_ibm_upload.py
@@ -0,0 +1,68 @@
+# Copyright (C) 2018-2019 Red Hat, Inc.
+# Copyright (C) 2021-2022 Acme Gating, LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+#
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Make coding more python3-ish
+from __future__ import (absolute_import, division, print_function)
+__metaclass__ = type
+
+import os
+import testtools
+try:
+    from unittest import mock
+except ImportError:
+    import mock
+
+from .zuul_ibm_upload import Uploader
+from ..module_utils.zuul_jobs.upload_utils import FileDetail
+
+
+FIXTURE_DIR = os.path.join(os.path.dirname(__file__),
+                           'test-fixtures')
+
+
+class TestUpload(testtools.TestCase):
+
+    def test_upload_result(self):
+        client = mock.Mock()
+        uploader = Uploader(client=client, bucket="bucket",
+                            endpoint_url='http://example.com')
+
+        # Get some test files to upload
+        files = [
+            FileDetail(
+                os.path.join(FIXTURE_DIR, "logs/job-output.json"),
+                "job-output.json",
+            ),
+            FileDetail(
+                os.path.join(FIXTURE_DIR, "logs/zuul-info/inventory.yaml"),
+                "inventory.yaml",
+            ),
+        ]
+
+        uploader.upload(files)
+        client.put_bucket_cors.assert_called_with(
+            Bucket='bucket',
+            CORSConfiguration={
+                'CORSRules': [{
+                    'AllowedMethods': ['GET', 'HEAD'],
+                    'AllowedOrigins': ['*']}]
+            })
+
+        upload_calls = uploader.client.upload_fileobj.mock_calls
+        upload_call_filenames = [x[1][2] for x in upload_calls]
+        self.assertIn('job-output.json', upload_call_filenames)
+        self.assertIn('inventory.yaml', upload_call_filenames)
diff --git a/roles/upload-logs-base/library/zuul_ibm_upload.py b/roles/upload-logs-base/library/zuul_ibm_upload.py
new file mode 100755
index 000000000..07215fd30
--- /dev/null
+++ b/roles/upload-logs-base/library/zuul_ibm_upload.py
@@ -0,0 +1,368 @@
+#!/usr/bin/env python3
+#
+# Copyright 2014 Rackspace Australia
+# Copyright 2018-2019 Red Hat, Inc
+# Copyright 2021-2022 Acme Gating, LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+# Make coding more python3-ish
+from __future__ import (absolute_import, division, print_function)
+__metaclass__ = type
+
+
+"""
+Utility to upload files to IBM Cloud
+
+Run this from the CLI from the zuul-jobs/roles directory with:
+
+  python -m upload-logs-base.library.zuul_ibm_upload
+"""
+
+import argparse
+import logging
+import os
+try:
+    import queue as queuelib
+except ImportError:
+    import Queue as queuelib
+import sys
+import threading
+
+from ibm_botocore.client import Config
+import ibm_boto3
+import ibm_boto3.s3.transfer
+
+from ansible.module_utils.basic import AnsibleModule
+
+try:
+    # Ansible context
+    from ansible.module_utils.zuul_jobs.upload_utils import (
+        FileList,
+        GZIPCompressedStream,
+        Indexer,
+        retry_function,
+    )
+except ImportError:
+    # Test context
+    from ..module_utils.zuul_jobs.upload_utils import (
+        FileList,
+        GZIPCompressedStream,
+        Indexer,
+        retry_function,
+    )
+
+MAX_UPLOAD_THREADS = 24
+
+
+class Uploader():
+    def __init__(self, client, bucket, prefix=None, public=True,
+                 dry_run=False, endpoint_url=None,
+                 bucket_location=None):
+        self.dry_run = dry_run
+        self.public = public
+        if dry_run:
+            self.url = 'https://example.com/a/path/'
+            return
+
+        self.client = client
+        self.prefix = prefix or ''
+        self.bucket = bucket
+
+        self.url = os.path.join(endpoint_url,
+                                bucket, self.prefix)
+
+        try:
+            self._set_cors(bucket)
+        except self.client.exceptions.NoSuchBucket:
+            if not bucket_location:
+                raise Exception("Bucket location must be specified")
+            if public:
+                acl = 'public-read'
+            else:
+                acl = 'private'
+            self.client.create_bucket(
+                ACL=acl,
+                Bucket=bucket,
+                CreateBucketConfiguration={
+                    'LocationConstraint': bucket_location
+                }
+            )
+            self._set_cors(bucket)
+
+    def _set_cors(self, bucket):
+        self.client.put_bucket_cors(
+            Bucket=bucket,
+            CORSConfiguration={
+                'CORSRules': [{
+                    'AllowedMethods': [
+                        'GET',
+                        'HEAD',
+                    ],
+                    'AllowedOrigins': [
+                        '*',
+                    ],
+                }],
+            },
+        )
+
+    def upload(self, file_list):
+        """Spin up thread pool to upload to storage"""
+
+        if self.dry_run:
+            return
+
+        num_threads = min(len(file_list), MAX_UPLOAD_THREADS)
+        threads = []
+        queue = queuelib.Queue()
+        # add items to queue
+        for f in file_list:
+            queue.put(f)
+
+        for x in range(num_threads):
+            t = threading.Thread(target=self.post_thread, args=(queue,))
+            threads.append(t)
+            t.start()
+
+        for t in threads:
+            t.join()
+
+    def post_thread(self, queue):
+        while True:
+            try:
+                file_detail = queue.get_nowait()
+                logging.debug("%s: processing job %s",
+                              threading.current_thread(),
+                              file_detail)
+                retry_function(lambda: self._post_file(file_detail))
+            except IOError:
+                # Do our best to attempt to upload all the files
+                logging.exception("Error opening file")
+                continue
+            except queuelib.Empty:
+                # No more work to do
+                return
+
+    @staticmethod
+    def _is_text_type(mimetype):
+        # We want to compress all text types.
+        if mimetype.startswith('text/'):
+            return True
+
+        # Further compress types that typically contain text but are no
+        # text sub type.
+        compress_types = [
+            'application/json',
+            'image/svg+xml',
+        ]
+        if mimetype in compress_types:
+            return True
+        return False
+
+    def _post_file(self, file_detail):
+        relative_path = os.path.join(self.prefix, file_detail.relative_path)
+        content_encoding = None
+
+        if file_detail.folder:
+            # We don't need to upload folders to IBM
+            return
+
+        if (file_detail.encoding is None and
+            self._is_text_type(file_detail.mimetype)):
+            content_encoding = 'gzip'
+            data = GZIPCompressedStream(open(file_detail.full_path, 'rb'))
+        else:
+            if (not file_detail.filename.endswith(".gz") and
+                file_detail.encoding):
+                # Don't apply gzip encoding to files that we receive as
+                # already gzipped. The reason for this is storage will
+                # serve this back to users as an uncompressed file if they
+                # don't set an accept-encoding that includes gzip. This
+                # can cause problems when the desired file state is
+                # compressed as with .tar.gz tarballs.
+                content_encoding = file_detail.encoding
+            data = open(file_detail.full_path, 'rb')
+
+        extra_args = dict(
+            ContentType=file_detail.mimetype,
+        )
+        if content_encoding:
+            extra_args['ContentEncoding'] = content_encoding
+
+        if self.public:
+            extra_args['ACL'] = 'public-read'
+
+        self.client.upload_fileobj(
+            data,
+            self.bucket,
+            relative_path,
+            ExtraArgs=extra_args
+        )
+
+
+def run(bucket, files,
+        indexes=True, parent_links=True, topdir_parent_link=False,
+        partition=False, footer='index_footer.html',
+        prefix=None, public=True, dry_run=False, api_key=None,
+        instance_id=None, endpoint_url=None, bucket_location=None):
+
+    client = ibm_boto3.client(
+        "s3",
+        ibm_api_key_id=api_key,
+        ibm_service_instance_id=instance_id,
+        config=Config(signature_version="oauth"),
+        endpoint_url=endpoint_url,
+    )
+
+    if prefix:
+        prefix = prefix.lstrip('/')
+    if partition and prefix:
+        parts = prefix.split('/')
+        if len(parts) > 1:
+            bucket += '_' + parts[0]
+            prefix = '/'.join(parts[1:])
+
+    # Create the objects to make sure the arguments are sound.
+    with FileList() as file_list:
+        # Scan the files.
+        for file_path in files:
+            file_list.add(file_path)
+
+        indexer = Indexer(file_list)
+
+        # (Possibly) make indexes.
+        if indexes:
+            indexer.make_indexes(create_parent_links=parent_links,
+                                 create_topdir_parent_link=topdir_parent_link,
+                                 append_footer=footer)
+
+        logging.debug("List of files prepared to upload:")
+        for x in file_list:
+            logging.debug(x)
+
+        # Upload.
+        uploader = Uploader(client, bucket, prefix, public, dry_run,
+                            endpoint_url, bucket_location)
+        uploader.upload(file_list)
+        return uploader.url
+
+
+def ansible_main():
+    module = AnsibleModule(
+        argument_spec=dict(
+            bucket=dict(required=True, type='str'),
+            files=dict(required=True, type='list'),
+            partition=dict(type='bool', default=False),
+            indexes=dict(type='bool', default=True),
+            parent_links=dict(type='bool', default=True),
+            topdir_parent_link=dict(type='bool', default=False),
+            public=dict(type='bool', default=True),
+            footer=dict(type='str'),
+            prefix=dict(type='str'),
+            api_key=dict(type='str'),
+            instance_id=dict(type='str'),
+            endpoint_url=dict(type='str'),
+            bucket_location=dict(type='str'),
+        )
+    )
+
+    p = module.params
+    url = run(p.get('bucket'), p.get('files'),
+              indexes=p.get('indexes'),
+              parent_links=p.get('parent_links'),
+              topdir_parent_link=p.get('topdir_parent_link'),
+              partition=p.get('partition'),
+              footer=p.get('footer'),
+              prefix=p.get('prefix'),
+              public=p.get('public'),
+              api_key=p.get('api_key'),
+              instance_id=p.get('instance_id'),
+              endpoint_url=p.get('endpoint_url'),
+              bucket_location=p.get('bucket_location'))
+    module.exit_json(changed=True,
+                     url=url)
+
+
+def cli_main():
+    parser = argparse.ArgumentParser(
+        description="Upload files to IBM Cloud Storage"
+    )
+    parser.add_argument('--verbose', action='store_true',
+                        help='show debug information')
+    parser.add_argument('--no-indexes', action='store_true',
+                        help='do not generate any indexes at all')
+    parser.add_argument('--no-parent-links', action='store_true',
+                        help='do not include links back to a parent dir')
+    parser.add_argument('--create-topdir-parent-link', action='store_true',
+                        help='include a link in the root directory of the '
+                             'files to the parent directory which may be the '
+                             'index of all results')
+    parser.add_argument('--no-public', action='store_true',
+                        help='do not create the bucket as public')
+    parser.add_argument('--partition', action='store_true',
+                        help='partition the prefix into multiple buckets')
+    parser.add_argument('--append-footer', default='index_footer.html',
+                        help='when generating an index, if the given file is '
+                             'present in a directory, append it to the index '
+                             '(set to "none" to disable)')
+    parser.add_argument('--prefix',
+                        help='Prepend this path to the object names when '
+                             'uploading')
+    parser.add_argument('--dry-run', action='store_true',
+                        help='do not attempt to create buckets or upload, '
+                             'useful with --verbose for debugging')
+    parser.add_argument('--api-key',
+                        help='An IBM Cloud API key')
+    parser.add_argument('--instance-id',
+                        help='An IBM Cloud Object Storage instance ID')
+    parser.add_argument('--endpoint-url',
+                        help='An IBM Cloud Object Storage endpoint URL')
+    parser.add_argument('--bucket-location',
+                        help='The location constraint for the bucket')
+    parser.add_argument('bucket',
+                        help='Name of the bucket to use when uploading')
+    parser.add_argument('files', nargs='+',
+                        help='the file(s) to upload with recursive glob '
+                        'matching when supplied as a string')
+
+    args = parser.parse_args()
+
+    if args.verbose:
+        logging.basicConfig(level=logging.DEBUG)
+        logging.captureWarnings(True)
+
+    append_footer = args.append_footer
+    if append_footer.lower() == 'none':
+        append_footer = None
+
+    url = run(args.bucket, args.files,
+              indexes=not args.no_indexes,
+              parent_links=not args.no_parent_links,
+              topdir_parent_link=args.create_topdir_parent_link,
+              partition=args.partition,
+              footer=append_footer,
+              prefix=args.prefix,
+              public=not args.no_public,
+              dry_run=args.dry_run,
+              api_key=args.api_key,
+              instance_id=args.instance_id,
+              endpoint_url=args.endpoint_url,
+              bucket_location=args.bucket_location)
+    print(url)
+
+
+if __name__ == '__main__':
+    if sys.stdin.isatty():
+        cli_main()
+    else:
+        ansible_main()
diff --git a/roles/upload-logs-ibm/README.rst b/roles/upload-logs-ibm/README.rst
new file mode 100644
index 000000000..508625e10
--- /dev/null
+++ b/roles/upload-logs-ibm/README.rst
@@ -0,0 +1,91 @@
+Upload logs to IBM Cloud Storage
+
+Before using this role, create a cloud object storage `service instance`_
+and a `service credential`_.
+
+You may create a bucket within the instance, or allow this role to
+create the bucket (or buckets) for you.
+
+**Role Variables**
+
+.. zuul:rolevar:: zuul_site_upload_logs
+   :default: true
+
+   Controls when logs are uploaded. true, the default, means always
+   upload logs. false means never upload logs. 'failure' means to only
+   upload logs when the job has failed.
+
+   .. note:: Intended to be set by admins via site-variables.
+
+.. zuul:rolevar:: zuul_log_partition
+   :default: false
+
+   If set to true, then the first component of the log path will be
+   removed from the object name and added to the bucket name, so
+   that logs for different changes are distributed across a large
+   number of buckets.
+
+.. zuul:rolevar:: zuul_log_bucket
+
+   If partitioning is not enabled, this is the name of the bucket
+   which will be used.  If partitioning is enabled, then this will be
+   used as the prefix for the bucket name which will be separated
+   from the partition name by an underscore.  For example, "logs_42"
+   would be the bucket name for partition 42.
+
+.. zuul:rolevar:: zuul_log_bucket_public
+   :default: true
+
+   If the bucket is created, this indicates whether it should be
+   created with global read ACLs.  If the bucket already exists, it
+   will not be modified.
+
+.. zuul:rolevar:: zuul_log_bucket_location
+
+   If the bucket is created, this `storage location`_ will be used as
+   the location constraint.
+
+.. zuul:rolevar:: zuul_log_path
+   :default: Generated by the role `set-zuul-log-path-fact`
+
+   Prepend this path to the object names when uploading.
+
+.. zuul:rolevar:: zuul_log_create_indexes
+   :default: true
+
+   Whether to create `index.html` files with directory indexes.
+
+.. zuul:rolevar:: zuul_log_path_shard_build
+   :default: false
+
+   This var is consumed by set-zuul-log-path-fact which
+   upload-logs-ibm calls into. If you set this you will get log
+   paths prefixed with the first three characters of the build
+   uuid. This will improve log file sharding.
+
+   More details can be found at
+   :zuul:rolevar:`set-zuul-log-path-fact.zuul_log_path_shard_build`.
+
+.. zuul:rolevar:: zuul_log_api_key
+
+   The API key that was created as part of the `service credential`_.
+   This is required.
+
+.. zuul:rolevar:: zuul_log_instance_id
+
+   The instance id that appears in the `service credential`_.
+   This is required.
+
+.. zuul:rolevar:: zuul_log_endpoint_url
+
+   The cloud storage `endpoint`_.
+   This is required.
+
+
+.. _service instance: https://cloud.ibm.com/docs/cloud-object-storage/iam/service-credentials.html
+
+.. _service credential: https://cloud.ibm.com/docs/cloud-object-storage/iam?topic=cloud-object-storage-service-credentials
+
+.. _endpoint: https://cloud.ibm.com/docs/cloud-object-storage?topic=cloud-object-storage-endpoints#endpoints
+
+.. _storage location: https://cloud.ibm.com/docs/cloud-object-storage?topic=cloud-object-storage-classes#classes
diff --git a/roles/upload-logs-ibm/defaults/main.yaml b/roles/upload-logs-ibm/defaults/main.yaml
new file mode 100644
index 000000000..35ca84b6f
--- /dev/null
+++ b/roles/upload-logs-ibm/defaults/main.yaml
@@ -0,0 +1,3 @@
+zuul_log_partition: false
+zuul_log_bucket_public: true
+zuul_log_create_indexes: true
diff --git a/roles/upload-logs-ibm/meta/main.yaml b/roles/upload-logs-ibm/meta/main.yaml
new file mode 100644
index 000000000..3a4055855
--- /dev/null
+++ b/roles/upload-logs-ibm/meta/main.yaml
@@ -0,0 +1,2 @@
+dependencies:
+  - role: upload-logs-base
diff --git a/roles/upload-logs-ibm/tasks/main.yaml b/roles/upload-logs-ibm/tasks/main.yaml
new file mode 100644
index 000000000..3b1b1ed20
--- /dev/null
+++ b/roles/upload-logs-ibm/tasks/main.yaml
@@ -0,0 +1,42 @@
+- name: Set zuul-log-path fact
+  include_role:
+    name: set-zuul-log-path-fact
+  when: zuul_log_path is not defined
+
+# Always upload (true), never upload (false) or only on failure ('failure')
+- when: zuul_site_upload_logs | default(true) | bool or
+        (zuul_site_upload_logs == 'failure' and not zuul_success | bool)
+  block:
+    # Use chmod instead of file because ansible 2.5 file with recurse and
+    # follow can't really handle symlinks to .
+    - name: Ensure logs are readable before uploading
+      delegate_to: localhost
+      command: "chmod -R u=rwX,g=rX,o=rX {{ zuul.executor.log_root }}/"
+      # ANSIBLE0007 chmod used in place of argument mode to file
+      tags:
+        - skip_ansible_lint
+
+    - name: Upload logs to IBM Cloud
+      delegate_to: localhost
+      no_log: true
+      zuul_ibm_upload:
+        partition: "{{ zuul_log_partition }}"
+        bucket: "{{ zuul_log_bucket }}"
+        bucket_location: "{{ zuul_log_bucket_location }}"
+        public: "{{ zuul_log_bucket_public }}"
+        prefix: "{{ zuul_log_path }}"
+        indexes: "{{ zuul_log_create_indexes }}"
+        api_key: "{{ zuul_log_api_key }}"
+        instance_id: "{{ zuul_log_instance_id }}"
+        endpoint_url: "{{ zuul_log_endpoint_url }}"
+        files:
+          - "{{ zuul.executor.log_root }}/"
+      register: upload_results
+
+- name: Return log URL to Zuul
+  delegate_to: localhost
+  zuul_return:
+    data:
+      zuul:
+        log_url: "{{ upload_results.url }}/"
+  when: upload_results is defined
diff --git a/test-requirements.txt b/test-requirements.txt
index 6b7cbda8c..50bcaa58d 100644
--- a/test-requirements.txt
+++ b/test-requirements.txt
@@ -42,5 +42,8 @@ boto3
 # For upload-logs-azure
 azure-storage-blob
 
+# For upload-logs-ibm
+ibm-cos-sdk
+
 # unittest.mock compatibility package for Python < 3.3
 mock;python_version<'3.3'