optimizing-ci-builds · optimizing-ci-builds · Aug 7, 2023
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -7,11 +7,70 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
+      - uses: actions/setup-python@v2
+        with:
+          python-version: '3.10'
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install pandas
+          pip install numpy
+          pip install inotify
+      - run: sudo apt update
+      - uses: actions/checkout@v3
+      - run: python .github/workflows/inotify_script.py /home/runner/work/GradleTestCI/GradleTestCI /home/runner/inotify-logs.csv & echo 'optimizing-ci-builds'
+      - run: touch starting_build_uses-checkout_10
+      - run: rm starting_build_uses-checkout_10
       - uses: actions/checkout@v3
+      - run: touch starting_build_SetupJDK8_11
+      - run: rm starting_build_SetupJDK8_11
       - name: Set up JDK 8
         uses: actions/setup-java@v3
         with:
           java-version: '8'
           distribution: 'adopt'
+      - run: touch starting_build_Buildmodules_16
+      - run: rm starting_build_Buildmodules_16
       - name: Build modules
         run: gradle clean build
+      - run: touch starting_finished_finished_8979874
+        if: always()
+      - run: rm starting_finished_finished_8979874
+        if: always()
+      - name: rat check
+        if: always()
+        run: |
+           if [ -f /home/runner/work/GradleTestCI/GradleTestCI/target/rat.txt ]; then cat /home/runner/work/GradleTestCI/GradleTestCI/target/rat.txt; fi
+      - name: Check script file exists and execute
+        if: always()
+        run: |
+          [ -f .github/workflows/script.py ] && python .github/workflows/script.py
+          [ -f /home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/job.csv ] || mkdir -p /home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/; echo "${GITHUB_RUN_ID},${GITHUB_JOB},GradleTestCI,${GITHUB_WORKFLOW}" > /home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/job.csv
+      - name: Checkout to destination CI-analyzes repo
+        uses: actions/checkout@v3
+        if: always()
+        with:
+          path: GradleTestCI
+          ref: '1691374686-548a464'
+          repository: 'UT-SE-Research/ci-analyzes'
+          token: '${{ secrets.API_TOKEN_GITHUB }}'
+          persist-credentials: true
+      - name: Copy files to push to another directory
+        if: always()
+        run: |
+          mkdir -p GradleTestCI/GradleTestCI/.github/workflows/ci/build
+          cp -rvT optimizing-ci-builds-ci-analysis GradleTestCI/GradleTestCI/.github/workflows/ci/build
+      - run: echo https://github.com/UT-SE-Research/ci-analyzes/tree/1691374686-548a464/GradleTestCI/.github/workflows/ci/build
+      - name: Pushes analysis to another repository
+        if: always()
+        working-directory: GradleTestCI
+        run: |
+          commit_message=$GITHUB_REPOSITORY@$GITHUB_WORKFLOW_SHA
+          git config --global user.name 'UT-SE-Research'
+          git config --global user.email '${{ secrets.EMAIL }}'
+          git add .
+          git commit -m $commit_message
+          while ! git push origin 1691374686-548a464; do
+            git pull --rebase origin 1691374686-548a464
+            sleep $((RANDOM % 5 + 1))
+          done
diff --git a/.github/workflows/inotify_script.py b/.github/workflows/inotify_script.py
@@ -0,0 +1,22 @@
+import inotify.adapters
+from datetime import datetime
+import sys
+
+target_dir = sys.argv[1]
+log_file = sys.argv[2]
+
+with open(log_file, "w") as f:
+    pass
+
+def _main():
+    i = inotify.adapters.InotifyTree(target_dir)
+
+    for event in i.event_gen(yield_nones=False):
+        (_, type_names, path, filename) = event
+        timestamp = datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%S.%fZ')
+        with open(log_file, "a") as f:
+            f.write(f"{timestamp};{path};{filename};{','.join(type_names)}\n")
+            f.flush()
+
+if __name__ == '__main__':
+    _main()
diff --git a/.github/workflows/script.py b/.github/workflows/script.py
@@ -0,0 +1,223 @@
+import pandas as pd
+import numpy as np
+import os
+import shutil
+
+def show_directories(file_path):
+    with open(file_path, 'r') as f:
+        df = pd.read_csv(file_path, sep=',')
+        paths = df["file_name"].to_list()
+        root = TreeNode("", None)
+
+        for path in paths:
+            find_and_insert(root, path.split("/")[1:])
+
+        stack = []
+        root.print(True, stack)
+        return stack
+
+
+class TreeNode:
+    def __init__(self, name, parent):
+        self.parent = parent
+        self.name = name
+        self.number_of_children = 0
+        self.children = []
+
+    def add_child(self, node):
+        self.children.append(node)
+        self.number_of_children+=1
+        return node
+
+    def print(self, is_root, stack):
+        pre_0 = "    "
+        pre_1 = "\u2502   "
+        pre_2 = "\u251c\u2500\u2500 "
+        pre_3 = "\u2514\u2500\u2500 "
+
+        tree = self
+        prefix = pre_2 if tree.parent and id(tree) != id(tree.parent.children[-1]) else pre_3
+
+        while tree.parent and tree.parent.parent:
+            if tree.parent.parent and id(tree.parent) != id(tree.parent.parent.children[-1]):
+                prefix = pre_1 + prefix
+            else:
+                prefix = pre_0 + prefix
+
+            tree = tree.parent
+
+        if is_root:
+            stack.append(self.name)
+        else:
+            stack.append(f"{prefix} {self.name} {str(self.number_of_children)}")
+
+        for child in self.children:
+            child.print(False, stack)
+
+
+def find_and_insert(parent, edges):
+    # Terminate if there is no edge
+    if not edges:
+        return
+
+    # Find a child with the name edges[0] in the current node
+    match = [tree for tree in parent.children if tree.name == edges[0]]
+
+    # If there is already a node with the name edges[0] in the children, set "pointer" tree to this node. If there is no such node, add a node in the current tree node then set "pointer" tree to it
+    tree = match[0] if match else parent.add_child(TreeNode(edges[0], parent))
+
+    # Recursively process the following edges[1:]
+    find_and_insert(tree, edges[1:])
+
+
+df = pd.read_csv('/home/runner/inotify-logs.csv', sep = ';', names=['time', 'watched_filename', 'event_filename', 'event_name'])
+df['event_filename'] = df['event_filename'].replace(np.nan, '')
+steps = {}
+starting_indexes = df[(df['event_filename'].str.contains('starting_')) & (df['event_name'] == 'CREATE')].index.to_list() + [df.shape[0]]
+ending_indexes = [0] + df[(df['event_filename'].str.contains('starting_')) & (df['event_name'] == 'DELETE')].index.to_list()
+starting_df = df[df['event_filename'].str.contains('starting_')]
+touch_file_names = ['setup'] + [x.replace('starting_', '') for x in starting_df['event_filename'].value_counts().index.to_list()]
+for starting_index, ending_index, touch_file_name in zip(starting_indexes, ending_indexes, touch_file_names):
+    if touch_file_name == 'setup': continue
+    steps[touch_file_name] = (ending_index, starting_index)
+touch_file_names.pop(0)
+df['watched_filename'] = df['watched_filename'] + df['event_filename']
+df.drop('event_filename', axis=1, inplace=True)
+df.rename(columns={'watched_filename':'file_name'}, inplace=True)
+modify_df = df[(df['event_name'] == 'MODIFY') | (df['event_name'] == 'CREATE')]
+file_names = modify_df['file_name'].value_counts().index.to_list()
+info = []
+useful = []
+
+for file_name in file_names:
+    last_access_step = ''
+    last_modify_step = ''
+    creation_step = ''
+    if df[(df['file_name'] == file_name) & (df['event_name'] == 'MODIFY')].shape[0] == 0: last_modify_index = -1; last_modify_step = 'Not provided'
+    else: last_modify_index = df[(df['file_name'] == file_name) & (df['event_name'] == 'MODIFY')].index.to_list()[-1]
+    if df[(df['file_name'] == file_name) & (df['event_name'] == 'ACCESS')].shape[0] == 0: last_access_index = -1; last_access_step = 'Not provided'
+    else: last_access_index = df[(df['file_name'] == file_name) & (df['event_name'] == 'ACCESS')].index.to_list()[-1]
+    if df[(df['file_name'] == file_name) & (df['event_name'] == 'CREATE')].shape[0] == 0: creation_index = -1; creation_step = 'Not provided'
+    else: creation_index = df[(df['file_name'] == file_name) & (df['event_name'] == 'CREATE')].index.to_list()[0]
+
+    if last_access_index < last_modify_index:
+        for touch_file_name, (starting_index, ending_index) in steps.items():
+            if (last_access_index > starting_index) and (last_access_index < ending_index):
+                last_access_step = touch_file_name.split('_')[1]
+            if (last_modify_index > starting_index) and (last_modify_index < ending_index):
+                last_modify_step = touch_file_name.split('_')[1]
+            if (creation_index > starting_index) and (creation_index < ending_index):
+                creation_step = touch_file_name.split('_')[1]
+        if f'/home/runner/work/GradleTestCI/GradleTestCI/.git/' not in file_name:
+            info.append({'file_name': file_name, 'last_access_index': last_access_index, 'last_modify_index': last_modify_index, 'creation_index': creation_index, 'last_access_step':last_access_step , 'last_modify_step':last_modify_step, 'creation_step': creation_step})
+
+    if last_access_index > last_modify_index:
+        for touch_file_name, (starting_index, ending_index) in steps.items():
+            if (last_access_index > starting_index) and (last_access_index < ending_index):
+                last_access_step = touch_file_name.split('_')[1]
+            if (last_modify_index > starting_index) and (last_modify_index < ending_index):
+                last_modify_step = touch_file_name.split('_')[1]
+            if (creation_index > starting_index) and (creation_index < ending_index):
+                creation_step = touch_file_name.split('_')[1]
+        if f'/home/runner/work/GradleTestCI/GradleTestCI/.git/' not in file_name:
+            useful.append({'file_name': file_name, 'last_access_index': last_access_index, 'last_modify_index': last_modify_index, 'creation_index': creation_index, 'last_access_step':last_access_step , 'last_modify_step':last_modify_step, 'creation_step': creation_step})
+
+os.mkdir(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis')
+os.mkdir(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details')
+
+# Add the job.csv file to the directory optimizing-ci-builds-ci-analysis
+current_run_id = os.environ['GITHUB_RUN_ID']
+job_id = os.environ['GITHUB_JOB']
+workflow = os.environ['GITHUB_WORKFLOW']
+save_path = f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/job.csv'
+# write the current_run_id to the file job.csv
+with open(save_path, 'w') as f:
+    f.write(current_run_id  + ',' + job_id + ',' + f"GradleTestCI" + ',' + workflow)
+
+info_flag=0
+if ( len(info) > 0 ):
+    info_df = pd.DataFrame(info)
+    info_flag=1
+useful_df = pd.DataFrame(useful)
+step_statistics = []
+print(info_flag)
+if (info_flag == 1) :
+    for step, (starting_index, ending_index) in steps.items():
+        step_name = step.split('_')[1]
+        if step_name == 'finished': continue
+        c = info_df['creation_step'] == step_name
+        m = info_df['last_modify_step'] == step_name
+        a = info_df['last_access_step'] == step_name
+        # _a  is accessed in another step
+        # __a is never accessed
+        _a  = (info_df['last_access_step'] != step_name) & (info_df['last_access_index'] != -1)
+        __a = info_df['last_access_index'] == -1
+        cma = info_df[c & m & a].shape[0]
+        cm_a = info_df[c & m & _a].shape[0]
+        cm__a = info_df[c & m & __a].shape[0]
+        c_ma = info_df[c & ~m & a].shape[0]
+        c_m_a = info_df[c & ~m & _a].shape[0]
+        c_m__a = info_df[c & ~m & __a].shape[0]
+        _cma = info_df[~c & m & a].shape[0]
+        _cm_a = info_df[~c & m & _a].shape[0]
+        _cm__a = info_df[~c & m & __a].shape[0]
+        _c_ma = info_df[~c & ~m & a].shape[0]
+        _c_m_a = info_df[~c & ~m & _a].shape[0]
+        _c_m__a = info_df[~c & ~m & __a].shape[0]
+        created_file_count = info_df[c].shape[0]
+        modified_file_count = info_df[m].shape[0]
+        starting_time = list(map(int, df.iloc[starting_index]['time'].split(':')))
+        if ending_index == len(df): ending_time = list(map(int, df.iloc[ending_index-1]['time'].split(':')))
+        else: ending_time = list(map(int, df.iloc[ending_index]['time'].split(':')))
+        hour = ending_time[0] - starting_time[0]
+        if starting_time[1] > ending_time[1]:
+            minute = ending_time[1] - starting_time[1] + 60
+            hour -= 1
+        else: minute = ending_time[1] - starting_time[1]
+        if starting_time[2] > ending_time[2]:
+            second = ending_time[2] - starting_time[2] + 60
+            minute -= 1
+        else: second = ending_time[2] - starting_time[2]
+        total_seconds = second + (minute * 60) + (hour * 60 * 60)
+        if step_name != '':
+            if not os.path.exists(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details/{step_name}'):
+                os.mkdir(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details/{step_name}')
+            if created_file_count > 0: info_df[c]["file_name"].to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details/{step_name}/c.csv')
+            if modified_file_count > 0: info_df[m]["file_name"].to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details/{step_name}/m.csv')
+            if cma > 0: info_df[c & m & a]["file_name"].to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details/{step_name}/cma.csv')
+            if cm_a > 0: info_df[c & m & _a]["file_name"].to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details/{step_name}/cm_a.csv')
+            if cm__a > 0: info_df[c & m & __a]["file_name"].to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details/{step_name}/cm__a.csv')
+            if c_ma > 0: info_df[c & ~m & a]["file_name"].to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details/{step_name}/c_ma.csv')
+            if c_m_a > 0: info_df[c & ~m & _a]["file_name"].to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details/{step_name}/c_m_a.csv')
+            if c_m__a > 0: info_df[c & ~m & __a]["file_name"].to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details/{step_name}/c_m__a.csv')
+            if _cma > 0: info_df[~c & m & a]["file_name"].to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details/{step_name}/_cma.csv')
+            if _cm_a > 0: info_df[~c & m & _a]["file_name"].to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details/{step_name}/_cm_a.csv')
+            if _cm__a > 0: info_df[~c & m & __a]["file_name"].to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details/{step_name}/_cm__a.csv')
+            if _c_ma > 0: info_df[~c & ~m & a]["file_name"].to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details/{step_name}/_c_ma.csv')
+            if _c_m_a > 0: info_df[~c & ~m & _a]["file_name"].to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details/{step_name}/_c_m_a.csv')
+            if _c_m__a > 0: info_df[~c & ~m & __a]["file_name"].to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/step-details/{step_name}/_c_m__a.csv')
+            step_statistics.append({'step_name': step_name, '#c': created_file_count, '#m': modified_file_count,
+            'cma': cma, 'cm_a': cm_a, 'cm__a': cm__a, 'c_ma': c_ma, 'c_m_a': c_m_a, 'c_m__a': c_m__a, '_cma': _cma, '_cm_a': _cm_a, '_cm__a': _cm__a, '_c_ma': _c_ma, '_c_m_a': _c_m_a, '_c_m__a': _c_m__a, 'time': total_seconds})
+    step_df = pd.DataFrame(step_statistics)
+    step_df.to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/steps.csv')
+    info_df.to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/files.csv')
+    useful_df.to_csv(f'/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/useful.csv')
+    directories = show_directories('/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/files.csv')
+    string_version = ''
+    for line in directories:
+        string_version += line + '\n'
+    with open("/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/directories.txt", "w+", encoding="utf-8") as f:
+        f.write(string_version)
+shutil.copy2("/home/runner/inotify-logs.csv", "/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis/")
+
+size = 0
+Folderpath = "/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis"
+for path, dirs, files in os.walk(Folderpath):
+    for f in files:
+        fp = os.path.join(path, f)
+        size += os.path.getsize(fp)
+if size > 99000000:
+    shutil.make_archive("optimizing-ci-builds-ci-analysis", "zip", Folderpath)
+    shutil.rmtree(Folderpath)
+    os.mkdir(Folderpath)
+    shutil.move("/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis.zip", "/home/runner/work/GradleTestCI/GradleTestCI/optimizing-ci-builds-ci-analysis")