Skip to content

Commit 5af0bb3

Browse files
Add comprehensive project standardization and governance files
This commit brings mcp-semclone in line with other SEMCL.ONE projects by adding: Project Documentation: - CONTRIBUTING.md: Development guidelines and setup instructions - SECURITY.md: Security policy and vulnerability reporting procedures - SUPPORT.md: Support channels and troubleshooting guidance - CODE_OF_CONDUCT.md: Community standards and enforcement guidelines - AUTHORS.md: Project contributors and attribution - CHANGELOG.md: Version history and release notes GitHub Workflows: - test.yml: Comprehensive testing across Python versions and OS platforms - python-publish.yml: Automated PyPI publishing on releases - license-check.yml: OSLiLi-powered license compliance verification - pr-validation.yml: Code quality checks with Black, isort, flake8, mypy Git Security Hooks: - check-problematic-words.sh: Prevents AI references and security issues - pre-commit: Validates staged files for problematic content - commit-msg: Validates commit messages for compliance - Core hooks path configured to use .githooks directory Configuration Files: - .coveragerc: Test coverage reporting configuration - pytest.ini: Test discovery and execution settings - MANIFEST.in: Package distribution file inclusion rules - .github/allowed-licenses.txt: License whitelist for compliance checks Security Features: - Git hooks prevent contamination with AI-related references - Automated security scanning with Trivy - License compliance validation in CI/CD pipeline - Comprehensive error handling for sensitive data detection This standardization ensures consistency with other SEMCL.ONE projects and provides robust governance, security, and quality assurance mechanisms.
1 parent 922a5b9 commit 5af0bb3

17 files changed

+1115
-0
lines changed

.coveragerc

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
[run]
2+
source = mcp_semclone
3+
omit =
4+
*/tests/*
5+
*/test_*.py
6+
*/__pycache__/*
7+
*/site-packages/*
8+
9+
[report]
10+
exclude_lines =
11+
pragma: no cover
12+
def __repr__
13+
raise AssertionError
14+
raise NotImplementedError
15+
if __name__ == .__main__.:
16+
if TYPE_CHECKING:
17+
@abstractmethod
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
#!/bin/bash
2+
# Git hook to check for problematic words in commits
3+
4+
# Define problematic words/patterns
5+
PROBLEMATIC_PATTERNS=(
6+
# AI/Assistant references
7+
"claude|Claude"
8+
"anthropic|Anthropic"
9+
"AI-generated|ai-generated"
10+
"AI generated|ai generated"
11+
"artificial intelligence"
12+
"machine learning model"
13+
"language model"
14+
"Co-Authored-By:.*Claude"
15+
"noreply@anthropic"
16+
"Generated with.*Claude"
17+
"assistant|Assistant"
18+
"chatbot|Chatbot"
19+
20+
# Generic/problematic code patterns (optional)
21+
"TODO:.*fix.*later"
22+
"HACK:"
23+
"XXX:"
24+
"FIXME:.*urgent"
25+
26+
# Security issues
27+
"password.*=.*['\"]"
28+
"api_key.*=.*['\"]"
29+
"secret.*=.*['\"]"
30+
"token.*=.*['\"]"
31+
32+
# Profanity/inappropriate content
33+
"wtf|WTF"
34+
"damn|DAMN"
35+
36+
# Company/personal info that shouldn't be committed
37+
"internal only"
38+
"confidential"
39+
"do not distribute"
40+
)
41+
42+
# Color codes for output
43+
RED='\033[0;31m'
44+
YELLOW='\033[1;33m'
45+
GREEN='\033[0;32m'
46+
NC='\033[0m' # No Color
47+
48+
# Function to check content for problematic patterns
49+
check_content() {
50+
local content="$1"
51+
local context="$2"
52+
local found_issues=0
53+
54+
for pattern in "${PROBLEMATIC_PATTERNS[@]}"; do
55+
if echo "$content" | grep -iE "$pattern" > /dev/null 2>&1; then
56+
if [ $found_issues -eq 0 ]; then
57+
echo -e "${RED}❌ Problematic content found in $context:${NC}"
58+
found_issues=1
59+
fi
60+
echo -e "${YELLOW} Pattern: $pattern${NC}"
61+
echo "$content" | grep -iE "$pattern" --color=always | head -3
62+
echo ""
63+
fi
64+
done
65+
66+
return $found_issues
67+
}
68+
69+
# Check commit message
70+
if [ "$1" = "message" ]; then
71+
COMMIT_MSG_FILE="$2"
72+
if [ -f "$COMMIT_MSG_FILE" ]; then
73+
COMMIT_MSG=$(cat "$COMMIT_MSG_FILE")
74+
check_content "$COMMIT_MSG" "commit message"
75+
exit $?
76+
fi
77+
fi
78+
79+
# Check staged files
80+
if [ "$1" = "files" ]; then
81+
echo "Checking staged files for problematic content..."
82+
83+
# Get list of staged files
84+
STAGED_FILES=$(git diff --cached --name-only --diff-filter=ACM)
85+
86+
FOUND_ISSUES=0
87+
for file in $STAGED_FILES; do
88+
# Skip binary files
89+
if file "$file" | grep -q "binary"; then
90+
continue
91+
fi
92+
93+
# Skip the .githooks directory itself (it contains the patterns we're checking for!)
94+
case "$file" in
95+
.githooks/*)
96+
continue
97+
;;
98+
esac
99+
100+
# Skip certain file types
101+
case "$file" in
102+
*.jpg|*.png|*.gif|*.pdf|*.zip|*.tar|*.gz|*.pyc|*.so|*.dll)
103+
continue
104+
;;
105+
esac
106+
107+
# Check file content
108+
if [ -f "$file" ]; then
109+
CONTENT=$(git diff --cached "$file" | grep "^+[^+]" | sed 's/^+//')
110+
if [ -n "$CONTENT" ]; then
111+
check_content "$CONTENT" "file: $file"
112+
if [ $? -ne 0 ]; then
113+
FOUND_ISSUES=1
114+
fi
115+
fi
116+
fi
117+
done
118+
119+
exit $FOUND_ISSUES
120+
fi
121+
122+
# Usage instructions if called without arguments
123+
echo "Usage:"
124+
echo " $0 message <commit-msg-file> - Check commit message"
125+
echo " $0 files - Check staged files"
126+
echo ""
127+
echo "Checks for problematic words including:"
128+
echo " - AI/Claude references"
129+
echo " - Security issues (hardcoded passwords/keys)"
130+
echo " - TODO/FIXME/HACK markers"
131+
echo " - Inappropriate language"
132+
echo " - Confidential markers"

.githooks/commit-msg

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#!/bin/bash
2+
# Git commit-msg hook to check for problematic words
3+
4+
HOOK_DIR="$(dirname "$0")"
5+
"$HOOK_DIR/check-problematic-words.sh" message "$1"
6+
exit $?

.githooks/pre-commit

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#!/bin/bash
2+
# Git pre-commit hook to check staged files for problematic words
3+
4+
HOOK_DIR="$(dirname "$0")"
5+
"$HOOK_DIR/check-problematic-words.sh" files
6+
exit $?

.github/allowed-licenses.txt

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Allowed licenses for mcp-semclone project
2+
# This file is used by the license-check workflow
3+
4+
# Permissive licenses
5+
Apache-2.0
6+
MIT
7+
BSD-2-Clause
8+
BSD-3-Clause
9+
ISC
10+
11+
# Other commercial-friendly licenses
12+
0BSD
13+
Unlicense
14+
15+
# Python-specific licenses
16+
PSF-2.0
17+
Python-2.0
18+
19+
# MCP and related frameworks
20+
# Add any specific licenses for MCP dependencies here
21+
22+
# Note: GPL licenses are generally not allowed
23+
# to maintain commercial compatibility

0 commit comments

Comments
 (0)