Skip to content
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion ci/conda_env_archery.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
click

# bot, crossbow
github3.py
jinja2
jira
pygit2
Comment thread
kszucs marked this conversation as resolved.
Expand Down
2 changes: 1 addition & 1 deletion ci/conda_env_crossbow.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# under the License.

click
github3.py
pygithub

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we keep this list alphabetically-ordered?

jinja2
jira
pygit2
Expand Down
2 changes: 0 additions & 2 deletions dev/archery/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,6 @@ to use the functionality of it:
To install: `pip install -e "arrow/dev/archery[release]"`
* crossbow – to trigger + interact with the crossbow build system
To install: `pip install -e "arrow/dev/archery[crossbow]"`
* crossbow-upload
To install: `pip install -e "arrow/dev/archery[crossbow-upload]"`

Additionally, if you would prefer to install everything at once,
`pip install -e "arrow/dev/archery[all]"` is an alias for all of
Expand Down
11 changes: 5 additions & 6 deletions dev/archery/archery/crossbow/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -531,13 +531,13 @@ def need_download():
return False

if need_download():
import github3
from github import GithubException
max_n_retries = 5
n_retries = 0
while True:
try:
asset.download(path)
except github3.exceptions.GitHubException as error:
asset.download_asset(str(path))
except GithubException as error:
n_retries += 1
if n_retries == max_n_retries:
raise
Expand Down Expand Up @@ -565,12 +565,11 @@ def need_download():
@click.argument('patterns', nargs=-1, required=True)
@click.option('--sha', required=True, help='Target committish')
@click.option('--tag', required=True, help='Target tag')
@click.option('--method', default='curl', help='Use cURL to upload')
@click.pass_obj
def upload_artifacts(obj, tag, sha, patterns, method):
def upload_artifacts(obj, tag, sha, patterns):
queue = obj['queue']
queue.github_overwrite_release_assets(
tag_name=tag, target_commitish=sha, method=method, patterns=patterns
tag_name=tag, target_commitish=sha, patterns=patterns
)


Expand Down
146 changes: 56 additions & 90 deletions dev/archery/archery/crossbow/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import time
import logging
import mimetypes
import subprocess
import textwrap
import uuid
from io import StringIO
Expand All @@ -34,11 +33,11 @@
from ruamel.yaml import YAML

try:
import github3
_have_github3 = True
from github import Github, GithubException
from github import Auth as GithubAuth
_have_github = True
except ImportError:
github3 = object
_have_github3 = False
_have_github = False

try:
import pygit2
Expand All @@ -52,7 +51,7 @@
from ..utils.source import ArrowSources


for pkg in ["requests", "urllib3", "github3"]:
for pkg in ["requests", "urllib3", "github"]:
logging.getLogger(pkg).setLevel(logging.WARNING)

logger = logging.getLogger("crossbow")
Expand Down Expand Up @@ -448,110 +447,85 @@ def file_contents(self, commit_id, file):
blob = self.repo[entry.id]
return blob.data

def _github_login(self, github_token):
"""Returns a logged in github3.GitHub instance"""
if not _have_github3:
raise ImportError('Must install github3.py')
def _github_login(self, github_token=None):
"""Returns a logged in Github instance using PyGithub"""
if not _have_github:
raise ImportError('Must install PyGithub')
github_token = github_token or self.github_token
session = github3.session.GitHubSession(
default_connect_timeout=10,
default_read_timeout=30
)
github = github3.GitHub(session=session)
github.login(token=github_token)
return github
return Github(auth=GithubAuth.Token(github_token), timeout=30)

def as_github_repo(self, github_token=None):
"""Converts it to a repository object which wraps the GitHub API"""
if self._github_repo is None:
github = self._github_login(github_token)
username, reponame = _parse_github_user_repo(self.remote_url)
self._github_repo = github.repository(username, reponame)
self._github_repo = github.get_repo(f"{username}/{reponame}")
return self._github_repo

def token_expiration_date(self, github_token=None):
"""Returns the expiration date for the github_token provided"""
github = self._github_login(github_token)
# github3 hides the headers from us. Use the _get method
# to access the response headers.
resp = github._get(github.session.base_url)
# Response in the form '2023-01-23 10:40:28 UTC'
date_string = resp.headers.get(
'github-authentication-token-expiration')
# PyGithub doesn't expose the token expiration header through a
# dedicated API, so request it via the public Requester escape hatch.
headers, _ = github.requester.requestJsonAndCheck("GET", "/user")
# Response header in the form '2023-01-23 10:40:28 UTC'
date_string = headers.get('github-authentication-token-expiration')
if date_string:
return date.fromisoformat(date_string.split()[0])
return None

def github_commit(self, sha):
repo = self.as_github_repo()
return repo.commit(sha)
return repo.get_commit(sha)

def github_release(self, tag):
repo = self.as_github_repo()
try:
return repo.release_from_tag(tag)
except github3.exceptions.NotFoundError:
return None

def github_upload_asset_requests(self, release, path, name, mime,
max_retries=None, retry_backoff=None):
return repo.get_release(tag)
except GithubException as e:
if e.status == 404:
return None
raise
Comment thread
fangchenli marked this conversation as resolved.
Comment thread
kszucs marked this conversation as resolved.
Comment on lines 481 to +488

def github_upload_asset(self, release, path, name, mime,
max_retries=None, retry_backoff=None):
if max_retries is None:
max_retries = int(os.environ.get('CROSSBOW_MAX_RETRIES', 8))
if retry_backoff is None:
retry_backoff = int(os.environ.get('CROSSBOW_RETRY_BACKOFF', 5))

for i in range(max_retries):
try:
with open(path, 'rb') as fp:
result = release.upload_asset(name=name, asset=fp,
content_type=mime)
except github3.exceptions.ResponseError as e:
result = release.upload_asset(path, name=name,
content_type=mime)
logger.info(f"Attempt {i + 1} has finished.")
return result
Comment thread
fangchenli marked this conversation as resolved.
except GithubException as e:
logger.error(f"Attempt {i + 1} has failed with message: {e}.")
logger.error(f"Error message {e.msg}")
logger.error("List of errors provided by GitHub:")
for err in e.errors:
logger.error(f" - {err}")
if hasattr(e, 'data'):
logger.error(f"Error data: {e.data}")

if e.code == 422:
if e.status == 422:
# 422 Validation Failed, probably raised because
# ReleaseAsset already exists, so try to remove it before
# reattempting the asset upload
for asset in release.assets():
for asset in release.get_assets():
if asset.name == name:
logger.info(f"Release asset {name} already exists, "
"removing it...")
asset.delete()
asset.delete_asset()
logger.info(f"Asset {name} removed.")
break
except github3.exceptions.ConnectionError as e:
except IOError as e:
# Catch network and file I/O errors (includes requests exceptions)
logger.error(f"Attempt {i + 1} has failed with message: {e}.")
Comment on lines +519 to 521
Comment on lines +519 to 521
Comment on lines +519 to 521
else:
logger.info(f"Attempt {i + 1} has finished.")
return result

time.sleep(retry_backoff)

raise RuntimeError('GitHub asset uploading has failed!')

def github_upload_asset_curl(self, release, path, name, mime):
upload_url, _ = release.upload_url.split('{?')
upload_url += f"?name={name}"

command = [
'curl',
'--fail',
'-H', f"Authorization: token {self.github_token}",
'-H', f"Content-Type: {mime}",
'--data-binary', f'@{path}',
upload_url
]
return subprocess.run(command, shell=False, check=True)

def github_overwrite_release_assets(self, tag_name, target_commitish,
patterns, method='requests'):
# Since github has changed something the asset uploading via requests
# got instable, so prefer the cURL alternative.
# Potential cause:
# sigmavirus24/github3.py/issues/779#issuecomment-379470626
patterns):
repo = self.as_github_repo()
if not tag_name:
raise CrossbowError('Empty tag name')
Expand All @@ -560,13 +534,14 @@ def github_overwrite_release_assets(self, tag_name, target_commitish,

# remove the whole release if it already exists
try:
release = repo.release_from_tag(tag_name)
except github3.exceptions.NotFoundError:
pass
else:
release.delete()

release = repo.create_release(tag_name, target_commitish)
release = repo.get_release(tag_name)
release.delete_release()
except GithubException as e:
if e.status != 404:
raise
Comment on lines 535 to +541
Comment on lines 528 to +541

release = repo.create_git_release(tag_name, tag_name, "",
target_commitish=target_commitish)
Comment on lines +537 to +544
for pattern in patterns:
for path in glob.glob(pattern, recursive=True):
name = os.path.basename(path)
Expand All @@ -578,16 +553,7 @@ def github_overwrite_release_assets(self, tag_name, target_commitish,
f"{size}..."
)

if method == 'requests':
self.github_upload_asset_requests(release, path, name=name,
mime=mime)
elif method == 'curl':
self.github_upload_asset_curl(release, path, name=name,
mime=mime)
else:
raise CrossbowError(
f"Unsupported upload method {method}"
)
self.github_upload_asset(release, path, name=name, mime=mime)

def github_pr(self, title, head=None, base=None, body=None,
github_token=None, create=False):
Expand All @@ -598,12 +564,11 @@ def github_pr(self, title, head=None, base=None, body=None,
repo = self.as_github_repo(github_token=github_token)
if create:
return repo.create_pull(title=title, base=base, head=head,
body=body)
body=body or "")
else:
# Retrieve open PR for base and head.
# There should be a single open one with that title.
for pull in repo.pull_requests(state="open", head=head,
base=base):
for pull in repo.get_pulls(state="open", head=head, base=base):
if title in pull.title:
return pull
raise CrossbowError(
Expand Down Expand Up @@ -1005,7 +970,7 @@ class TaskStatus:

Parameters
----------
commit : github3.Commit
commit : github.Commit.Commit
Commit to query the combined status for.

Returns
Expand All @@ -1019,8 +984,8 @@ class TaskStatus:
"""

def __init__(self, commit):
status = commit.status()
check_runs = list(commit.check_runs())
status = commit.get_combined_status()
check_runs = list(commit.get_check_runs())
states = [s.state for s in status.statuses]

for check in check_runs:
Expand Down Expand Up @@ -1068,7 +1033,7 @@ def __init__(self, github_release, artifact_patterns,
if github_release is None:
github_assets = {} # no assets have been uploaded for the task
else:
github_assets = {a.name: a for a in github_release.assets()}
github_assets = {a.name: a for a in github_release.get_assets()}

if not validate_patterns:
# shortcut to avoid pattern validation and just set all artifacts
Expand All @@ -1088,9 +1053,10 @@ def __init__(self, github_release, artifact_patterns,
elif num_matches == 1:
self[pattern] = github_assets[matches[0].group(0)]
else:
matched_names = [m.group(0) for m in matches]
raise CrossbowError(
f"Only a single asset should match pattern `{pattern}`, "
f"there are multiple ones: {', '.join(matches)}"
f"there are multiple ones: {', '.join(matched_names)}"
)

def missing_patterns(self):
Expand Down
Loading
Loading