Skip to content

Optimizing CI builds #122

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,70 @@ jobs:
runs-on: ubuntu-latest

steps:
- uses: actions/setup-python@v2
with:
python-version: '3.10'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pandas
pip install numpy
pip install inotify
- run: sudo apt update
- uses: actions/checkout@v3
- run: python .github/workflows/inotify_script.py /home/runner/work/GradleTestCI/GradleTestCI /home/runner/inotify-logs.csv & echo 'optimizing-ci-builds'
- run: touch starting_build_uses-checkout_10
- run: rm starting_build_uses-checkout_10
- uses: actions/checkout@v3
- run: touch starting_build_SetupJDK8_11
- run: rm starting_build_SetupJDK8_11
- name: Set up JDK 8
uses: actions/setup-java@v3
with:
java-version: '8'
distribution: 'adopt'
- run: touch starting_build_Buildmodules_16
- run: rm starting_build_Buildmodules_16
- name: Build modules
run: gradle clean build
- run: touch starting_finished_finished_8979874
if: always()
- run: rm starting_finished_finished_8979874
if: always()
- name: rat check
if: always()
run: |
if [ -f /home/runner/work/GradleTestCI/GradleTestCI/target/rat.txt ]; then cat /home/runner/work/GradleTestCI/GradleTestCI/target/rat.txt; fi
- name: Check script file exists and execute
if: always()
run: |
[ -f .github/workflows/script.py ] && python .github/workflows/script.py
[ -f /home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/job.csv ] || mkdir -p /home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/; echo "${GITHUB_RUN_ID},${GITHUB_JOB},GradleTestCI,${GITHUB_WORKFLOW}" > /home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/job.csv
- name: Checkout to destination CI-analyzes repo
uses: actions/checkout@v3
if: always()
with:
path: GradleTestCI
ref: '1691374686-548a464'
repository: 'UT-SE-Research/ci-analyzes'
token: '${{ secrets.API_TOKEN_GITHUB }}'
persist-credentials: true
- name: Copy files to push to another directory
if: always()
run: |
mkdir -p GradleTestCI/GradleTestCI/.github/workflows/ci/build
cp -rvT optimizing-ci-builds-ci-analysis GradleTestCI/GradleTestCI/.github/workflows/ci/build
- run: echo https://github.com/UT-SE-Research/ci-analyzes/tree/1691374686-548a464/GradleTestCI/.github/workflows/ci/build
- name: Pushes analysis to another repository
if: always()
working-directory: GradleTestCI
run: |
commit_message=$GITHUB_REPOSITORY@$GITHUB_WORKFLOW_SHA
git config --global user.name 'UT-SE-Research'
git config --global user.email '${{ secrets.EMAIL }}'
git add .
git commit -m $commit_message
while ! git push origin 1691374686-548a464; do
git pull --rebase origin 1691374686-548a464
sleep $((RANDOM % 5 + 1))
done
22 changes: 22 additions & 0 deletions .github/workflows/inotify_script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import inotify.adapters
from datetime import datetime
import sys

target_dir = sys.argv[1]
log_file = sys.argv[2]

with open(log_file, "w") as f:
pass

def _main():
i = inotify.adapters.InotifyTree(target_dir)

for event in i.event_gen(yield_nones=False):
(_, type_names, path, filename) = event
timestamp = datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%S.%fZ')
with open(log_file, "a") as f:
f.write(f"{timestamp};{path};{filename};{','.join(type_names)}\n")
f.flush()

if __name__ == '__main__':
_main()
223 changes: 223 additions & 0 deletions .github/workflows/script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,223 @@
import pandas as pd
import numpy as np
import os
import shutil

def show_directories(file_path):
with open(file_path, 'r') as f:
df = pd.read_csv(file_path, sep=',')
paths = df["file_name"].to_list()
root = TreeNode("", None)

for path in paths:
find_and_insert(root, path.split("/")[1:])

stack = []
root.print(True, stack)
return stack


class TreeNode:
def __init__(self, name, parent):
self.parent = parent
self.name = name
self.number_of_children = 0
self.children = []

def add_child(self, node):
self.children.append(node)
self.number_of_children+=1
return node

def print(self, is_root, stack):
pre_0 = " "
pre_1 = "\u2502 "
pre_2 = "\u251c\u2500\u2500 "
pre_3 = "\u2514\u2500\u2500 "

tree = self
prefix = pre_2 if tree.parent and id(tree) != id(tree.parent.children[-1]) else pre_3

while tree.parent and tree.parent.parent:
if tree.parent.parent and id(tree.parent) != id(tree.parent.parent.children[-1]):
prefix = pre_1 + prefix
else:
prefix = pre_0 + prefix

tree = tree.parent

if is_root:
stack.append(self.name)
else:
stack.append(f"{prefix} {self.name} {str(self.number_of_children)}")

for child in self.children:
child.print(False, stack)


def find_and_insert(parent, edges):
# Terminate if there is no edge
if not edges:
return

# Find a child with the name edges[0] in the current node
match = [tree for tree in parent.children if tree.name == edges[0]]

# If there is already a node with the name edges[0] in the children, set "pointer" tree to this node. If there is no such node, add a node in the current tree node then set "pointer" tree to it
tree = match[0] if match else parent.add_child(TreeNode(edges[0], parent))

# Recursively process the following edges[1:]
find_and_insert(tree, edges[1:])


df = pd.read_csv('/home/runner/inotify-logs.csv', sep = ';', names=['time', 'watched_filename', 'event_filename', 'event_name'])
df['event_filename'] = df['event_filename'].replace(np.nan, '')
steps = {}
starting_indexes = df[(df['event_filename'].str.contains('starting_')) & (df['event_name'] == 'CREATE')].index.to_list() + [df.shape[0]]
ending_indexes = [0] + df[(df['event_filename'].str.contains('starting_')) & (df['event_name'] == 'DELETE')].index.to_list()
starting_df = df[df['event_filename'].str.contains('starting_')]
touch_file_names = ['setup'] + [x.replace('starting_', '') for x in starting_df['event_filename'].value_counts().index.to_list()]
for starting_index, ending_index, touch_file_name in zip(starting_indexes, ending_indexes, touch_file_names):
if touch_file_name == 'setup': continue
steps[touch_file_name] = (ending_index, starting_index)
touch_file_names.pop(0)
df['watched_filename'] = df['watched_filename'] + df['event_filename']
df.drop('event_filename', axis=1, inplace=True)
df.rename(columns={'watched_filename':'file_name'}, inplace=True)
modify_df = df[(df['event_name'] == 'MODIFY') | (df['event_name'] == 'CREATE')]
file_names = modify_df['file_name'].value_counts().index.to_list()
info = []
useful = []

for file_name in file_names:
last_access_step = ''
last_modify_step = ''
creation_step = ''
if df[(df['file_name'] == file_name) & (df['event_name'] == 'MODIFY')].shape[0] == 0: last_modify_index = -1; last_modify_step = 'Not provided'
else: last_modify_index = df[(df['file_name'] == file_name) & (df['event_name'] == 'MODIFY')].index.to_list()[-1]
if df[(df['file_name'] == file_name) & (df['event_name'] == 'ACCESS')].shape[0] == 0: last_access_index = -1; last_access_step = 'Not provided'
else: last_access_index = df[(df['file_name'] == file_name) & (df['event_name'] == 'ACCESS')].index.to_list()[-1]
if df[(df['file_name'] == file_name) & (df['event_name'] == 'CREATE')].shape[0] == 0: creation_index = -1; creation_step = 'Not provided'
else: creation_index = df[(df['file_name'] == file_name) & (df['event_name'] == 'CREATE')].index.to_list()[0]

if last_access_index < last_modify_index:
for touch_file_name, (starting_index, ending_index) in steps.items():
if (last_access_index > starting_index) and (last_access_index < ending_index):
last_access_step = touch_file_name.split('_')[1]
if (last_modify_index > starting_index) and (last_modify_index < ending_index):
last_modify_step = touch_file_name.split('_')[1]
if (creation_index > starting_index) and (creation_index < ending_index):
creation_step = touch_file_name.split('_')[1]
if f'/home/runner/work/GradleTestCI/GradleTestCI/.git/' not in file_name:
info.append({'file_name': file_name, 'last_access_index': last_access_index, 'last_modify_index': last_modify_index, 'creation_index': creation_index, 'last_access_step':last_access_step , 'last_modify_step':last_modify_step, 'creation_step': creation_step})

if last_access_index > last_modify_index:
for touch_file_name, (starting_index, ending_index) in steps.items():
if (last_access_index > starting_index) and (last_access_index < ending_index):
last_access_step = touch_file_name.split('_')[1]
if (last_modify_index > starting_index) and (last_modify_index < ending_index):
last_modify_step = touch_file_name.split('_')[1]
if (creation_index > starting_index) and (creation_index < ending_index):
creation_step = touch_file_name.split('_')[1]
if f'/home/runner/work/GradleTestCI/GradleTestCI/.git/' not in file_name:
useful.append({'file_name': file_name, 'last_access_index': last_access_index, 'last_modify_index': last_modify_index, 'creation_index': creation_index, 'last_access_step':last_access_step , 'last_modify_step':last_modify_step, 'creation_step': creation_step})

os.mkdir(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis')
os.mkdir(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details')

# Add the job.csv file to the directory optimizing-ci-builds-ci-analysis
current_run_id = os.environ['GITHUB_RUN_ID']
job_id = os.environ['GITHUB_JOB']
workflow = os.environ['GITHUB_WORKFLOW']
save_path = f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/job.csv'
# write the current_run_id to the file job.csv
with open(save_path, 'w') as f:
f.write(current_run_id + ',' + job_id + ',' + f"GradleTestCI" + ',' + workflow)

info_flag=0
if ( len(info) > 0 ):
info_df = pd.DataFrame(info)
info_flag=1
useful_df = pd.DataFrame(useful)
step_statistics = []
print(info_flag)
if (info_flag == 1) :
for step, (starting_index, ending_index) in steps.items():
step_name = step.split('_')[1]
if step_name == 'finished': continue
c = info_df['creation_step'] == step_name
m = info_df['last_modify_step'] == step_name
a = info_df['last_access_step'] == step_name
# _a is accessed in another step
# __a is never accessed
_a = (info_df['last_access_step'] != step_name) & (info_df['last_access_index'] != -1)
__a = info_df['last_access_index'] == -1
cma = info_df[c & m & a].shape[0]
cm_a = info_df[c & m & _a].shape[0]
cm__a = info_df[c & m & __a].shape[0]
c_ma = info_df[c & ~m & a].shape[0]
c_m_a = info_df[c & ~m & _a].shape[0]
c_m__a = info_df[c & ~m & __a].shape[0]
_cma = info_df[~c & m & a].shape[0]
_cm_a = info_df[~c & m & _a].shape[0]
_cm__a = info_df[~c & m & __a].shape[0]
_c_ma = info_df[~c & ~m & a].shape[0]
_c_m_a = info_df[~c & ~m & _a].shape[0]
_c_m__a = info_df[~c & ~m & __a].shape[0]
created_file_count = info_df[c].shape[0]
modified_file_count = info_df[m].shape[0]
starting_time = list(map(int, df.iloc[starting_index]['time'].split(':')))
if ending_index == len(df): ending_time = list(map(int, df.iloc[ending_index-1]['time'].split(':')))
else: ending_time = list(map(int, df.iloc[ending_index]['time'].split(':')))
hour = ending_time[0] - starting_time[0]
if starting_time[1] > ending_time[1]:
minute = ending_time[1] - starting_time[1] + 60
hour -= 1
else: minute = ending_time[1] - starting_time[1]
if starting_time[2] > ending_time[2]:
second = ending_time[2] - starting_time[2] + 60
minute -= 1
else: second = ending_time[2] - starting_time[2]
total_seconds = second + (minute * 60) + (hour * 60 * 60)
if step_name != '':
if not os.path.exists(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details/{step_name}'):
os.mkdir(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details/{step_name}')
if created_file_count > 0: info_df[c]["file_name"].to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details/{step_name}/c.csv')
if modified_file_count > 0: info_df[m]["file_name"].to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details/{step_name}/m.csv')
if cma > 0: info_df[c & m & a]["file_name"].to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details/{step_name}/cma.csv')
if cm_a > 0: info_df[c & m & _a]["file_name"].to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details/{step_name}/cm_a.csv')
if cm__a > 0: info_df[c & m & __a]["file_name"].to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details/{step_name}/cm__a.csv')
if c_ma > 0: info_df[c & ~m & a]["file_name"].to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details/{step_name}/c_ma.csv')
if c_m_a > 0: info_df[c & ~m & _a]["file_name"].to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details/{step_name}/c_m_a.csv')
if c_m__a > 0: info_df[c & ~m & __a]["file_name"].to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details/{step_name}/c_m__a.csv')
if _cma > 0: info_df[~c & m & a]["file_name"].to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details/{step_name}/_cma.csv')
if _cm_a > 0: info_df[~c & m & _a]["file_name"].to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details/{step_name}/_cm_a.csv')
if _cm__a > 0: info_df[~c & m & __a]["file_name"].to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details/{step_name}/_cm__a.csv')
if _c_ma > 0: info_df[~c & ~m & a]["file_name"].to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details/{step_name}/_c_ma.csv')
if _c_m_a > 0: info_df[~c & ~m & _a]["file_name"].to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details/{step_name}/_c_m_a.csv')
if _c_m__a > 0: info_df[~c & ~m & __a]["file_name"].to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details/{step_name}/_c_m__a.csv')
step_statistics.append({'step_name': step_name, '#c': created_file_count, '#m': modified_file_count,
'cma': cma, 'cm_a': cm_a, 'cm__a': cm__a, 'c_ma': c_ma, 'c_m_a': c_m_a, 'c_m__a': c_m__a, '_cma': _cma, '_cm_a': _cm_a, '_cm__a': _cm__a, '_c_ma': _c_ma, '_c_m_a': _c_m_a, '_c_m__a': _c_m__a, 'time': total_seconds})
step_df = pd.DataFrame(step_statistics)
step_df.to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/steps.csv')
info_df.to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/files.csv')
useful_df.to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/useful.csv')
directories = show_directories('/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/files.csv')
string_version = ''
for line in directories:
string_version += line + '\n'
with open("/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/directories.txt", "w+", encoding="utf-8") as f:
f.write(string_version)
shutil.copy2("/home/runner/inotify-logs.csv", "/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/")

size = 0
Folderpath = "/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis"
for path, dirs, files in os.walk(Folderpath):
for f in files:
fp = os.path.join(path, f)
size += os.path.getsize(fp)
if size > 99000000:
shutil.make_archive("optimizing-ci-builds-ci-analysis", "zip", Folderpath)
shutil.rmtree(Folderpath)
os.mkdir(Folderpath)
shutil.move("/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis.zip", "/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis")