docs-builder/.github/workflows/detect-duplicate-issues.yml at main · 0939783833/docs-builder · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
---
name: Detect Duplicate Issues

on:
  issues:
    types:
      - opened

permissions:
  contents: read
  issues: write
  models: read

jobs:
  detect-duplicates:
    runs-on: ubuntu-latest
    steps:
      - name: Detect potential duplicate issues
        uses: actions/github-script@v8
        with:
          script: |
            const { owner, repo } = context.repo;
            const issueNumber = context.issue.number;

            // Get the newly created issue
            const { data: newIssue } = await github.rest.issues.get({
              owner,
              repo,
              issue_number: issueNumber,
            });

            // Skip if the issue is a pull request
            if (newIssue.pull_request) {
              console.log('Skipping pull request');
              return;
            }

            console.log('Analyzing issue #' + issueNumber + ': "' + newIssue.title + '"');

            // Get existing open issues (excluding the current one)
            const { data: existingIssues } = await github.rest.issues.listForRepo({
              owner,
              repo,
              state: 'open',
              per_page: 100,
            });

            // Filter out pull requests and the current issue
            const openIssues = existingIssues.filter(issue =>
              !issue.pull_request && issue.number !== issueNumber
            );

            console.log('Found ' + openIssues.length + ' existing open issues to compare against');

            if (openIssues.length === 0) {
              console.log('No existing issues to compare against');
              return;
            }

            // Use GitHub Models to find potential duplicates
            const duplicates = [];

            if (openIssues.length === 0) {
              console.log('No existing issues to compare against');
              return;
            }

            console.log('Analyzing ' + openIssues.length + ' existing issues for potential duplicates');

            try {
              // Helper function to safely escape content for prompts
              function sanitizeContent(content) {
                if (!content) return 'No description provided';
                return content.replace(/[`'"\\]/g, ' ').slice(0, 500); // Limit length and escape problematic chars
              }

              // Helper function to retry AI calls with exponential backoff
              async function retryApiCall(apiCallFn, maxRetries = 2) {
                for (let attempt = 0; attempt <= maxRetries; attempt++) {
                  try {
                    const response = await apiCallFn();
                    if (response.ok) return response;

                    if (attempt < maxRetries) {
                      const delay = Math.pow(2, attempt) * 1000; // 1s, 2s, 4s delays
                      console.log('API call failed, retrying in ' + delay + 'ms (attempt ' + (attempt + 1) + '/' + (maxRetries + 1) + ')');
                      await new Promise(resolve => setTimeout(resolve, delay));
                    } else {
                      return response; // Return the failed response on final attempt
                    }
                  } catch (error) {
                    if (attempt === maxRetries) throw error;
                    const delay = Math.pow(2, attempt) * 1000;
                    console.log('API call error, retrying in ' + delay + 'ms: ' + error.message);
                    await new Promise(resolve => setTimeout(resolve, delay));
                  }
                }
              }

              // Limit the number of issues to analyze to prevent token overflow
              const maxIssuesForAnalysis = Math.min(openIssues.length, 50); // Limit to 50 issues max
              const issuesToAnalyze = openIssues.slice(0, maxIssuesForAnalysis);

              if (issuesToAnalyze.length < openIssues.length) {
                console.log('Limiting analysis to ' + maxIssuesForAnalysis + ' most recent issues (out of ' + openIssues.length + ' total)');
              }

              // Step 1: Send issue titles and numbers to get top 5 candidates
              let titlePrompt = 'Analyze this NEW ISSUE against EXISTING ISSUES and identify the top 5 most similar ones:\n\n';
              titlePrompt += 'NEW ISSUE:\n';
              titlePrompt += 'Title: ' + sanitizeContent(newIssue.title) + '\n';
              titlePrompt += 'Body: ' + sanitizeContent(newIssue.body) + '\n\n';
              titlePrompt += 'EXISTING ISSUES:\n';

              issuesToAnalyze.forEach((issue, index) => {
                titlePrompt += (index + 1) + '. Issue #' + issue.number + ' - ' + sanitizeContent(issue.title) + '\n';
              });

              titlePrompt += '\nRespond with a JSON object containing the top 5 most similar issues. Format: {"similar_issues": [{"rank": 1, "issue_number": 123, "similarity": "high|medium"}, ...]}';

              const titleResponse = await retryApiCall(() =>
                fetch('https://models.inference.ai.azure.com/chat/completions', {
                  method: 'POST',
                  headers: {
                    'Authorization': 'Bearer ' + github.token,
                    'Content-Type': 'application/json',
                  },
                  body: JSON.stringify({
                    messages: [
                      {
                        role: 'system',
                        content: 'You are an expert at analyzing GitHub issues to detect duplicates. Compare issue titles and descriptions to identify the most similar ones. Respond only with valid JSON containing the top 5 most similar issues ranked by relevance. Use "high" for likely duplicates and "medium" for related issues.'
                      },
                      {
                        role: 'user',
                        content: titlePrompt
                      }
                    ],
                    model: 'gpt-4o-mini',
                    temperature: 0.1,
                    max_tokens: 200
                  })
                })
              );

              if (!titleResponse.ok) {
                const errorText = await titleResponse.text();
                console.log('First AI call failed after retries: ' + titleResponse.status + ' - ' + errorText);
                return;
              }

              const titleResult = await titleResponse.json();
              const titleAnalysis = titleResult.choices[0]?.message?.content?.trim();
              console.log('AI title analysis result: ' + titleAnalysis);

              // Parse JSON response to get top 5 candidates
              let candidateIssueNumbers = [];
              try {
                const jsonMatch = titleAnalysis.match(/\{.*\}/s);
                if (jsonMatch) {
                  const jsonData = JSON.parse(jsonMatch[0]);
                  candidateIssueNumbers = jsonData.similar_issues || [];
                }
              } catch (parseError) {
                console.log('Failed to parse JSON response, falling back to number extraction');
                // Fallback: extract issue numbers from response
                const numberMatches = titleAnalysis.match(/#(\d+)/g);
                if (numberMatches) {
                  candidateIssueNumbers = numberMatches.slice(0, 5).map(match => ({
                    issue_number: parseInt(match.replace('#', '')),
                    similarity: 'medium'
                  }));
                }
              }

              if (candidateIssueNumbers.length === 0) {
                console.log('No candidate issues identified in first pass');
                return;
              }

              console.log('Found ' + candidateIssueNumbers.length + ' candidate issues from title analysis');

              // Step 2: Get full details for top candidates and do detailed analysis
              const candidateIssues = [];
              for (const candidate of candidateIssueNumbers) {
                const issue = openIssues.find(i => i.number === candidate.issue_number);
                if (issue) {
                  candidateIssues.push({
                    issue,
                    initialSimilarity: candidate.similarity
                  });
                }
              }

              if (candidateIssues.length === 0) {
                console.log('No valid candidate issues found');
                return;
              }

              // Step 3: Detailed analysis with full issue bodies
              let detailPrompt = 'Perform detailed comparison of this NEW ISSUE against the TOP CANDIDATE ISSUES:\n\n';
              detailPrompt += 'NEW ISSUE:\n';
              detailPrompt += 'Title: ' + sanitizeContent(newIssue.title) + '\n';
              detailPrompt += 'Body: ' + sanitizeContent(newIssue.body) + '\n\n';
              detailPrompt += 'CANDIDATE ISSUES FOR DETAILED ANALYSIS:\n';

              candidateIssues.forEach((candidate, index) => {
                detailPrompt += (index + 1) + '. Issue #' + candidate.issue.number + '\n';
                detailPrompt += '   Title: ' + sanitizeContent(candidate.issue.title) + '\n';
                detailPrompt += '   Body: ' + sanitizeContent(candidate.issue.body) + '\n\n';
              });

              detailPrompt += 'Respond with JSON format: {"duplicates": [{"issue_number": 123, "classification": "DUPLICATE|SIMILAR|DIFFERENT", "reason": "brief explanation"}]}';

              const detailResponse = await retryApiCall(() =>
                fetch('https://models.inference.ai.azure.com/chat/completions', {
                  method: 'POST',
                  headers: {
                    'Authorization': 'Bearer ' + github.token,
                    'Content-Type': 'application/json',
                  },
                  body: JSON.stringify({
                    messages: [
                      {
                        role: 'system',
                        content: 'You are an expert at analyzing GitHub issues for duplicates. Compare the full content and determine: DUPLICATE (same core problem), SIMILAR (related but different aspects), or DIFFERENT (unrelated). Respond only with valid JSON.'
                      },
                      {
                        role: 'user',
                        content: detailPrompt
                      }
                    ],
                    model: 'gpt-4o-mini',
                    temperature: 0.1,
                    max_tokens: 300
                  })
                })
              );

              if (detailResponse.ok) {
                const detailResult = await detailResponse.json();
                const detailAnalysis = detailResult.choices[0]?.message?.content?.trim();
                console.log('AI detailed analysis result: ' + detailAnalysis);

                // Parse detailed analysis JSON
                try {
                  const jsonMatch = detailAnalysis.match(/\{.*\}/s);
                  if (jsonMatch) {
                    const jsonData = JSON.parse(jsonMatch[0]);
                    const results = jsonData.duplicates || [];

                    for (const result of results) {
                      if (result.classification === 'DUPLICATE' || result.classification === 'SIMILAR') {
                        const issue = candidateIssues.find(c => c.issue.number === result.issue_number)?.issue;
                        if (issue) {
                          duplicates.push({
                            issue,
                            similarity: result.classification === 'DUPLICATE' ? 'high' : 'medium'
                          });
                          console.log('Found ' + result.classification.toLowerCase() + ' issue: #' + issue.number + ' - ' + issue.title);
                        }
                      }
                    }
                  }
                } catch (parseError) {
                  console.log('Failed to parse detailed analysis JSON, using fallback');
                  // Fallback: look for DUPLICATE/SIMILAR mentions
                  candidateIssues.forEach(candidate => {
                    const issueRef = '#' + candidate.issue.number;
                    if (detailAnalysis.includes(issueRef) &&
                        (detailAnalysis.includes('DUPLICATE') || detailAnalysis.includes('SIMILAR'))) {
                      duplicates.push({
                        issue: candidate.issue,
                        similarity: detailAnalysis.includes('DUPLICATE') ? 'high' : 'medium'
                      });
                      console.log('Found similar issue (fallback): #' + candidate.issue.number + ' - ' + candidate.issue.title);
                    }
                  });
                }
              } else {
                const errorText = await detailResponse.text();
                console.log('Detailed analysis failed after retries: ' + detailResponse.status + ' - ' + errorText);
              }

            } catch (error) {
              console.log('Error in AI analysis: ' + error.message);
            }

            // Post comment if duplicates found
            if (duplicates.length > 0) {
              const highPriority = duplicates.filter(d => d.similarity === 'high');
              const mediumPriority = duplicates.filter(d => d.similarity === 'medium');

              let commentBody = '👋 **Potential duplicate issues detected**\n\n';
              commentBody += 'This issue appears to be similar to existing open issues:\n\n';

              if (highPriority.length > 0) {
                commentBody += '### 🚨 Likely Duplicates\n';
                for (const { issue } of highPriority) {
                  commentBody += '- #' + issue.number + ' - [' + issue.title + '](' + issue.html_url + ')\n';
                }
                commentBody += '\n';
              }

              if (mediumPriority.length > 0) {
                commentBody += '### 🔍 Similar Issues\n';
                for (const { issue } of mediumPriority) {
                  commentBody += '- #' + issue.number + ' - [' + issue.title + '](' + issue.html_url + ')\n';
                }
                commentBody += '\n';
              }

              commentBody += 'Please review these issues to see if your issue is already covered. ';
              commentBody += 'If this is indeed a duplicate, consider closing this issue and contributing to the existing discussion.\n\n';
              commentBody += '---\n';
              commentBody += '*This comment was automatically generated using AI to help identify potential duplicates.*';

              await github.rest.issues.createComment({
                owner,
                repo,
                issue_number: issueNumber,
                body: commentBody,
              });

              console.log('Posted comment with ' + duplicates.length + ' potential duplicate(s)');
            } else {
              console.log('No potential duplicates found');
            }