Skip to content

Escape auto-linked GitHub usernames more precisely #230

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 30 additions & 1 deletion homu/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,35 @@

VARIABLES_RE = re.compile(r'\${([a-zA-Z_]+)}')

# Pattern for matching an auto-linked GitHub username.
#
# This is the behavior used by GitHub when detecting usernames, as deduced from
# the username constraints for new accounts and testing in their Markdown
# renderer:
# - Usernames are auto-linked with an @ prefix.
# - Usernames can only contain alphanumeric characters or hyphens and must be
# between 1 and 39 characters (inclusive): /@[A-Za-z0-9\-]{1,39}/
# - Usernames cannot start with hyphen: /@[A-Za-z0-9][A-Za-z0-9\-]{,38}/
# - A username preceded by an alphanumeric character or underscore is not
# auto-linked: /(?<![A-Za-z0-9_])/
# - A username followed by an underscore or slash is not auto-linked:
# /(?![A-Za-z0-9\-_\/])/
#
# Although usernames cannot contain consecutive hyphens or end with a hyphen,
# GitHub still auto-links such patterns.
#
# The logic for username auto-linking appears to not be open source.
# github-markup (https://github.com/github/markup), the markup renderer used by
# GitHub, does not contain this logic, nor do its dependencies. Their README
# says this is part of their "special sauce".
#
# This pattern does not handle Markdown code blocks.
GITHUB_USERNAME_RE = re.compile(
r'(?<![A-Za-z0-9_])'
r'(@[A-Za-z0-9][A-Za-z0-9\-]{,38})'
r'(?![A-Za-z0-9\-_/])'
)

IGNORE_BLOCK_START = '<!-- homu-ignore:start -->'
IGNORE_BLOCK_END = '<!-- homu-ignore:end -->'
IGNORE_BLOCK_RE = re.compile(
Expand All @@ -56,7 +85,7 @@
# Replace @mention with `@mention` to suppress pings in merge commits.
# Note: Don't replace non-mentions like "[email protected]".
def suppress_pings(text):
return re.sub(r'\B(@\S+)', r'`\g<1>`', text) # noqa
return GITHUB_USERNAME_RE.sub(r'`\g<1>`', text)


# Replace any text between IGNORE_BLOCK_START and IGNORE_BLOCK_END
Expand Down
50 changes: 46 additions & 4 deletions homu/tests/test_pr_body.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,58 @@


def test_suppress_pings_in_PR_body():
# This behavior can be verified by pasting the text into a Markdown editor
# on Github and checking which usernames are auto-linked.

body = (
"r? @matklad\n" # should escape
"@bors r+\n" # shouldn't
"[email protected]" # shouldn't
# Should escape:
"r? @matklad\n"
"@bors r+\n"
"@a\n" # Minimum length
"@abcdefghijklmnopqrstuvwxyzabcdefghijklm\n" # Maximum length
"@user. @user, @user; @user? @user!\n"
"@user@user\n" # Only the first is auto-linked
"@user/@user/@user\n" # Only the last is auto-linked
"@@user\n"
"/@user\n"
"-@user\n"
"@user--name\n" # Auto-linked, despite being an invalid username
"@user-\n" # Auto-linked, despite being an invalid username
"`@user`\n" # Code block handling is not implemented

# Shouldn't escape:
"[email protected]\n"
"@abcdefghijklmnopqrstuvwxyzabcdefghijklmo\n" # Over maximum length
"text@user\n"
"@-\n"
"@-user\n"
"@user/\n"
"@user_\n"
"_@user\n"
)

expect = (
"r? `@matklad`\n"
"`@bors` r+\n"
"[email protected]"
"`@a`\n"
"`@abcdefghijklmnopqrstuvwxyzabcdefghijklm`\n"
"`@user`. `@user`, `@user`; `@user`? `@user`!\n"
"`@user`@user\n"
"@user/@user/`@user`\n"
"@`@user`\n"
"/`@user`\n"
"-`@user`\n"
"`@user--name`\n"
"`@user-`\n"
"``@user``\n"
"[email protected]\n"
"@abcdefghijklmnopqrstuvwxyzabcdefghijklmo\n"
"text@user\n"
"@-\n"
"@-user\n"
"@user/\n"
"@user_\n"
"_@user\n"
)

assert suppress_pings(body) == expect
Expand Down