From 3ede6081fc51b11d73d7a956ead890f81b45b1ca Mon Sep 17 00:00:00 2001 From: Brent Salisbury Date: Wed, 26 Jun 2024 01:33:04 -0400 Subject: [PATCH] Deep Search PDF to MD file conversion Signed-off-by: Brent Salisbury --- .env.example | 6 + .gitignore | 1 - src/app/api/conversion/route.ts | 144 +++++++++++++++++ src/app/api/pr/knowledge/route.ts | 2 +- src/app/api/pr/skill/route.ts | 2 +- src/app/api/upload/route.ts | 16 +- .../edit-submission/knowledge/[id]/page.tsx | 4 +- .../Contribute/Knowledge/UploadFile.tsx | 134 +++++++++------- src/components/Contribute/Knowledge/index.tsx | 137 ++++++++++++---- .../Contribute/Knowledge/knowledge.css | 1 - src/lib/api/deepsearch/index.ts | 149 ++++++++++++++++++ src/utils/github.ts | 28 ++++ 12 files changed, 522 insertions(+), 102 deletions(-) create mode 100644 src/app/api/conversion/route.ts create mode 100644 src/lib/api/deepsearch/index.ts diff --git a/.env.example b/.env.example index 9abe7573..694dfbb6 100644 --- a/.env.example +++ b/.env.example @@ -14,3 +14,9 @@ IL_GRANITE_API= IL_GRANITE_MODEL_NAME= IL_MERLINITE_API= IL_MERLINITE_MODEL_NAME= + +DS_USERNAME= +DS_API_KEY= +DS_HOST= +DS_PROJ_KEY= +DS_PROJ_NAME= diff --git a/.gitignore b/.gitignore index d909b5d7..d00a474e 100644 --- a/.gitignore +++ b/.gitignore @@ -5,7 +5,6 @@ npm-debug.log .env *.env coverage -lib taxonomy config.yaml generated diff --git a/src/app/api/conversion/route.ts b/src/app/api/conversion/route.ts new file mode 100644 index 00000000..9006d0ff --- /dev/null +++ b/src/app/api/conversion/route.ts @@ -0,0 +1,144 @@ +'use server'; + +import { NextResponse, NextRequest } from 'next/server'; +import fetch from 'node-fetch'; + +interface AuthData { + access_token: string; +} + +interface ConvertData { + task_id: string; +} + +interface TaskStatus { + task_status: string; + result?: { + json_file_url: string; + md_file_url: string; + document_hash: string; + }; +} + +export async function POST(req: NextRequest) { + const { repoUrl, documentNames } = await req.json(); + const USERNAME = process.env.DS_USERNAME; + const API_KEY = process.env.DS_API_KEY; + const HOST = process.env.DS_HOST; + const PROJ_KEY = process.env.DS_PROJ_KEY; + const BRANCH = 'main'; + + if (!USERNAME || !API_KEY || !HOST || !PROJ_KEY) { + console.error('Missing environment variables'); + return NextResponse.json({ error: 'Missing environment variables' }, { status: 500 }); + } + + const pdfFileName = documentNames.find((name: string) => name.endsWith('.pdf')); + if (!pdfFileName) { + console.error('No PDF file found for conversion'); + return NextResponse.json({ error: 'No PDF file found for conversion' }, { status: 400 }); + } + + const [repoOwner, repoName] = repoUrl.replace('https://github.com/', '').split('/'); + const PDF_URL = `https://raw.githubusercontent.com/${repoOwner}/${repoName}/${BRANCH}/${pdfFileName}`; + console.log(`PDF URL for conversion: ${PDF_URL}`); + + try { + console.log('Starting authentication...'); + const authResponse = await fetch(`${HOST}/api/cps/user/v1/user/token`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Authorization: `Basic ${Buffer.from(`${USERNAME}:${API_KEY}`).toString('base64')}` + }, + body: JSON.stringify({}) + }); + + if (!authResponse.ok) { + const error = await authResponse.text(); + console.error('Error during authentication:', error); + return NextResponse.json({ error }, { status: authResponse.status }); + } + + const authData = (await authResponse.json()) as AuthData; + const token = authData.access_token; + console.log('Authentication successful. Token obtained.'); + + console.log('Starting PDF conversion...'); + const convertResponse = await fetch(`${HOST}/api/cps/public/v2/project/${PROJ_KEY}/convert`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Authorization: token + }, + body: JSON.stringify({ + http_source: { url: PDF_URL, headers: {} } + }) + }); + + if (!convertResponse.ok) { + const error = await convertResponse.text(); + console.error('Error during PDF conversion:', error); + return NextResponse.json({ error }, { status: convertResponse.status }); + } + + const convertData = (await convertResponse.json()) as ConvertData; + const taskId = convertData.task_id; + console.log(`PDF conversion started. Task ID: ${taskId}`); + + console.log('Checking conversion task status...'); + let taskStatus: TaskStatus = { task_status: '', result: undefined }; + let isTaskComplete = false; + while (!isTaskComplete) { + const taskResponse = await fetch(`${HOST}/api/cps/public/v2/project/${PROJ_KEY}/convert_tasks/${taskId}?wait=10`, { + method: 'GET', + headers: { + Authorization: token + } + }); + + if (!taskResponse.ok) { + const error = await taskResponse.text(); + console.error('Error during task status check:', error); + return NextResponse.json({ error }, { status: taskResponse.status }); + } + + const taskText = await taskResponse.text(); + try { + taskStatus = JSON.parse(taskText) as TaskStatus; + } catch (parseError) { + console.error('Error parsing task status response:', taskText); + return NextResponse.json({ error: 'Failed to parse task status response' }, { status: 500 }); + } + + console.log(`Task status: ${taskStatus.task_status}`); + + if (taskStatus.result && ['SUCCESS', 'FAILURE'].includes(taskStatus.task_status)) { + isTaskComplete = true; + } else { + await new Promise((resolve) => setTimeout(resolve, 10000)); // Wait for 10 seconds before polling again + } + } + + if (taskStatus.task_status === 'FAILURE') { + console.error('PDF Conversion Task failed.'); + return NextResponse.json({ error: 'PDF Conversion Task failed' }, { status: 500 }); + } + + const result = { + json_file_url: taskStatus.result!.json_file_url, + md_file_url: taskStatus.result!.md_file_url, + document_hash: taskStatus.result!.document_hash + }; + + console.log('Task completed successfully.'); + console.log(`JSON file URL: ${result.json_file_url}`); + console.log(`Markdown file URL: ${result.md_file_url}`); + console.log(`Document hash: ${result.document_hash}`); + + return NextResponse.json(result); + } catch (error: unknown) { + console.error('Unexpected error:', error); + return NextResponse.json({ error: (error as Error).message }, { status: 500 }); + } +} diff --git a/src/app/api/pr/knowledge/route.ts b/src/app/api/pr/knowledge/route.ts index 3bd0098a..49f05af4 100644 --- a/src/app/api/pr/knowledge/route.ts +++ b/src/app/api/pr/knowledge/route.ts @@ -12,7 +12,7 @@ const BASE_BRANCH = 'main'; export async function POST(req: NextRequest) { const token = await getToken({ req, secret: process.env.NEXTAUTH_SECRET! }); - console.log('GitHub Token:', token); + // console.log('GitHub Token:', token); if (!token || !token.accessToken) { console.error('Unauthorized: Missing or invalid access token'); diff --git a/src/app/api/pr/skill/route.ts b/src/app/api/pr/skill/route.ts index 20ab7278..c1296793 100644 --- a/src/app/api/pr/skill/route.ts +++ b/src/app/api/pr/skill/route.ts @@ -12,7 +12,7 @@ const BASE_BRANCH = 'main'; export async function POST(req: NextRequest) { const token = await getToken({ req, secret: process.env.NEXTAUTH_SECRET! }); - console.log('GitHub Token:', token); + // console.log('GitHub Token:', token); if (!token || !token.accessToken) { console.error('Unauthorized: Missing or invalid access token'); diff --git a/src/app/api/upload/route.ts b/src/app/api/upload/route.ts index b1e2dc78..25ff643e 100644 --- a/src/app/api/upload/route.ts +++ b/src/app/api/upload/route.ts @@ -4,12 +4,12 @@ import { getToken } from 'next-auth/jwt'; import { NextRequest } from 'next/server'; const GITHUB_API_URL = 'https://api.github.com'; -const TAXONOMY_DOCUMENTS_REPO = process.env.TAXONOMY_DOCUMENTS_REPO!; +const TAXONOMY_DOCUMENTS_REPO = process.env.NEXT_PUBLIC_TAXONOMY_DOCUMENTS_REPO!; const BASE_BRANCH = 'main'; export async function POST(req: NextRequest) { const token = await getToken({ req, secret: process.env.NEXTAUTH_SECRET! }); - console.log('GitHub Token:', token); + // console.log('GitHub Token:', token); if (!token || !token.accessToken) { console.error('Unauthorized: Missing or invalid access token'); @@ -64,7 +64,8 @@ export async function POST(req: NextRequest) { const [name, extension] = file.fileName.split(/\.(?=[^.]+$)/); return { fileName: `${name}-${timestamp}.${extension}`, - fileContent: file.fileContent + fileContent: file.fileContent, + encoding: extension === 'pdf' ? 'base64' : 'utf-8' }; }); @@ -160,7 +161,7 @@ async function createFilesCommit( owner: string, repo: string, branchName: string, - files: { fileName: string; fileContent: string }[], + files: { fileName: string; fileContent: string; encoding: string }[], userEmail: string, baseSha: string ): Promise { @@ -173,7 +174,7 @@ async function createFilesCommit( headers, body: JSON.stringify({ content: file.fileContent, - encoding: 'utf-8' + encoding: file.encoding }) }).then((response) => response.json()) ) @@ -202,12 +203,9 @@ async function createFilesCommit( } const treeData = await createTreeResponse.json(); - console.log('Tree created:', treeData); + // console.log('Tree created:', treeData); // Create commit with DCO sign-off - // TODO: if the user's github does not have an associated github email, we need to specify one in the upload section - // or reuse the one from the form. If we use the email field from the form, it needs to be null checked when - // the user clicks the upload documents button. const createCommitResponse = await fetch(`${GITHUB_API_URL}/repos/${owner}/${repo}/git/commits`, { method: 'POST', headers, diff --git a/src/app/edit-submission/knowledge/[id]/page.tsx b/src/app/edit-submission/knowledge/[id]/page.tsx index 3f7b5c61..bab83f0b 100644 --- a/src/app/edit-submission/knowledge/[id]/page.tsx +++ b/src/app/edit-submission/knowledge/[id]/page.tsx @@ -503,7 +503,7 @@ Creator names: ${updatedAttributionData.creator_names} className={useFileUpload ? 'button-active' : 'button-secondary'} onClick={() => setUseFileUpload(true)} > - Automatically Upload Documents + Upload Documents @@ -537,7 +537,7 @@ Creator names: ${updatedAttributionData.creator_names} ) : ( <> - + diff --git a/src/components/Contribute/Knowledge/UploadFile.tsx b/src/components/Contribute/Knowledge/UploadFile.tsx index 75eae440..b75d1cb5 100644 --- a/src/components/Contribute/Knowledge/UploadFile.tsx +++ b/src/components/Contribute/Knowledge/UploadFile.tsx @@ -1,5 +1,4 @@ // src/components/Contribute/Knowledge/UploadFile.tsx -'use client'; import React, { useState, useEffect } from 'react'; import { MultipleFileUploadStatusItem, @@ -10,24 +9,33 @@ import { import { Modal } from '@patternfly/react-core/dist/dynamic/next/components/Modal'; import UploadIcon from '@patternfly/react-icons/dist/esm/icons/upload-icon'; import { ExclamationTriangleIcon } from '@patternfly/react-icons/dist/dynamic/icons/exclamation-triangle-icon'; -import { FileRejection, DropEvent } from 'react-dropzone'; import { Button } from '@patternfly/react-core/dist/dynamic/components/Button'; import { HelperText, HelperTextItem } from '@patternfly/react-core/dist/dynamic/components/HelperText'; +import { Spinner } from '@patternfly/react-core/dist/dynamic/components/Spinner'; -interface readFile { +interface ReadFile { fileName: string; data?: string; loadResult?: 'danger' | 'success'; loadError?: DOMException; } -export const UploadFile: React.FunctionComponent<{ onFilesChange: (files: File[]) => void }> = ({ onFilesChange }) => { - const [currentFiles, setCurrentFiles] = useState([]); - const [readFileData, setReadFileData] = useState([]); +interface UploadFileProps { + onFilesChange: (files: File[]) => void; + files: File[]; + isConverting: boolean; + conversionMessage: string; +} + +export const UploadFile: React.FunctionComponent = ({ onFilesChange, files, isConverting, conversionMessage }) => { + // State hooks for managing file upload state and modal state + const [currentFiles, setCurrentFiles] = useState(files || []); + const [readFileData, setReadFileData] = useState([]); const [showStatus, setShowStatus] = useState(false); const [statusIcon, setStatusIcon] = useState<'inProgress' | 'success' | 'danger'>('inProgress'); const [modalText, setModalText] = useState(''); + // Effect hook to show or hide the upload status based on current files useEffect(() => { if (currentFiles.length > 0) { setShowStatus(true); @@ -36,6 +44,7 @@ export const UploadFile: React.FunctionComponent<{ onFilesChange: (files: File[] } }, [currentFiles]); + // Effect hook to update the status icon based on the read file results useEffect(() => { if (readFileData.length < currentFiles.length) { setStatusIcon('inProgress'); @@ -46,6 +55,18 @@ export const UploadFile: React.FunctionComponent<{ onFilesChange: (files: File[] } }, [readFileData, currentFiles]); + // Effect hook to trigger the onFilesChange callback when current files are updated + useEffect(() => { + console.log('Current files updated:', currentFiles); + onFilesChange(currentFiles); + }, [currentFiles, onFilesChange]); + + // Effect hook to set current files from props + useEffect(() => { + setCurrentFiles(files); + }, [files]); + + // Function to remove files from the current file list const removeFiles = (namesOfFilesToRemove: string[]) => { const newCurrentFiles = currentFiles.filter((file) => !namesOfFilesToRemove.includes(file.name)); const newReadFiles = readFileData.filter((file) => !namesOfFilesToRemove.includes(file.fileName)); @@ -53,48 +74,7 @@ export const UploadFile: React.FunctionComponent<{ onFilesChange: (files: File[] setReadFileData(newReadFiles); }; - const handleFileDrop = (_event: DropEvent, droppedFiles: File[]) => { - const currentFileNames = currentFiles.map((file) => file.name); - const reUploads = droppedFiles.filter((file) => currentFileNames.includes(file.name)); - - const newFiles = [ - ...currentFiles.filter((file) => !reUploads.includes(file)), - ...droppedFiles.filter((file) => !currentFileNames.includes(file.name)) - ]; - setCurrentFiles(newFiles); - onFilesChange(newFiles); - }; - - const handleReadSuccess = (data: string, file: File) => { - setReadFileData((prevReadFiles) => { - const existingFile = prevReadFiles.find((readFile) => readFile.fileName === file.name); - if (existingFile) { - return prevReadFiles; - } - return [...prevReadFiles, { data, fileName: file.name, loadResult: 'success' }]; - }); - }; - - const handleReadFail = (error: DOMException, file: File) => { - setReadFileData((prevReadFiles) => { - const existingFile = prevReadFiles.find((readFile) => readFile.fileName === file.name); - if (existingFile) { - return prevReadFiles; - } - return [...prevReadFiles, { loadError: error, fileName: file.name, loadResult: 'danger' }]; - }); - }; - - const handleDropRejected = (fileRejections: FileRejection[]) => { - console.warn('Files rejected:', fileRejections); - if (fileRejections.length === 1) { - setModalText(`${fileRejections[0].file.name} is not an accepted file type`); - } else { - const rejectedMessages = fileRejections.reduce((acc, fileRejection) => (acc += `${fileRejection.file.name}, `), ''); - setModalText(`${rejectedMessages} are not accepted file types`); - } - }; - + // Function to create helper text for file upload status const createHelperText = (file: File) => { const fileResult = readFileData.find((readFile) => readFile.fileName === file.name); if (fileResult?.loadError) { @@ -113,20 +93,46 @@ export const UploadFile: React.FunctionComponent<{ onFilesChange: (files: File[] return ( <> { + const newFiles = droppedFiles.reduce( + (acc, file) => { + const index = acc.findIndex((f) => f.name === file.name); + if (index !== -1) { + acc[index] = file; // Overwrite existing file + } else { + acc.push(file); + } + return acc; + }, + [...currentFiles] + ); + + setCurrentFiles(newFiles); + console.log('Files after drop:', newFiles); + }} dropzoneProps={{ accept: { 'application/pdf': ['.pdf'], 'text/markdown': ['.md'] }, - onDropRejected: handleDropRejected + // Handle file rejection + onDropRejected: (fileRejections) => { + console.warn('Files rejected:', fileRejections); + if (fileRejections.length === 1) { + setModalText(`${fileRejections[0].file.name} is not an accepted file type`); + } else { + const rejectedMessages = fileRejections.reduce((acc, fileRejection) => (acc += `${fileRejection.file.name}, `), ''); + setModalText(`${rejectedMessages} are not accepted file types`); + } + } }} > } titleText="Drag and drop files here" titleTextSeparator="or" - infoText="Accepted file types: PDF, Markdown" + infoText="Accepted file types are PDF and Markdown. PDF files will be converted to Markdown via Deep Search. All documents will be automatically stored in a fork in the user's GitHub account. That only applies to PDFs needing conversion to Markdown." /> {showStatus && ( removeFiles([file.name])} - onReadSuccess={handleReadSuccess} - onReadFail={handleReadFail} + onReadSuccess={(data, file) => { + setReadFileData((prevReadFiles) => { + const existingFile = prevReadFiles.find((readFile) => readFile.fileName === file.name); + if (existingFile) { + return prevReadFiles; + } + return [...prevReadFiles, { data, fileName: file.name, loadResult: 'success' }]; + }); + }} + onReadFail={(error, file) => { + setReadFileData((prevReadFiles) => { + const existingFile = prevReadFiles.find((readFile) => readFile.fileName === file.name); + if (existingFile) { + return prevReadFiles; + } + return [...prevReadFiles, { loadError: error, fileName: file.name, loadResult: 'danger' }]; + }); + }} progressHelperText={createHelperText(file)} /> ))} @@ -161,6 +183,12 @@ export const UploadFile: React.FunctionComponent<{ onFilesChange: (files: File[] + {isConverting && ( +
+ + {conversionMessage} +
+ )} ); }; diff --git a/src/components/Contribute/Knowledge/index.tsx b/src/components/Contribute/Knowledge/index.tsx index f3a2edb5..63fdcf10 100644 --- a/src/components/Contribute/Knowledge/index.tsx +++ b/src/components/Contribute/Knowledge/index.tsx @@ -70,9 +70,10 @@ export const KnowledgeForm: React.FunctionComponent = () => { const [useFileUpload, setUseFileUpload] = useState(false); const [uploadedFiles, setUploadedFiles] = useState([]); - const [isModalOpen, setIsModalOpen] = useState(false); const [yamlContent, setYamlContent] = useState(''); + const [isConverting, setIsConverting] = useState(false); + const [conversionMessage, setConversionMessage] = useState(''); const handleInputChange = (index: number, type: string, value: string) => { switch (type) { @@ -127,15 +128,12 @@ export const KnowledgeForm: React.FunctionComponent = () => { const onCloseSuccessAlert = () => { setIsSuccessAlertVisible(false); + setIsConverting(false); }; const onCloseFailureAlert = () => { setIsFailureAlertVisible(false); - }; - - const handleFilesChange = (files: File[]) => { - setUploadedFiles(files); - setPatterns(files.map((file) => file.name).join(', ')); // Populate the patterns field + setIsConverting(false); }; const handleSubmit = async (event: React.FormEvent) => { @@ -244,7 +242,11 @@ export const KnowledgeForm: React.FunctionComponent = () => { const handleDocumentUpload = async () => { if (uploadedFiles.length > 0) { - const fileContents: { fileName: string; fileContent: string }[] = []; + setIsConverting(true); + setConversionMessage('Files are being processed...'); + + const markdownFiles: { fileName: string; fileContent: string }[] = []; + const pdfFiles: { fileName: string; fileContent: string }[] = []; await Promise.all( uploadedFiles.map( @@ -253,46 +255,110 @@ export const KnowledgeForm: React.FunctionComponent = () => { const reader = new FileReader(); reader.onload = (e) => { const fileContent = e.target!.result as string; - fileContents.push({ fileName: file.name, fileContent }); + const fileType = file.type; + + if (fileType === 'application/pdf') { + // For PDF files, extract base64 content + pdfFiles.push({ + fileName: file.name, + fileContent: fileContent.split(',')[1] + }); + } else { + // For Markdown and other text files, use the full content + markdownFiles.push({ + fileName: file.name, + fileContent + }); + } resolve(); }; reader.onerror = reject; - reader.readAsText(file); + + if (file.type === 'application/pdf') { + reader.readAsDataURL(file); // Read PDF files as base64 + } else { + reader.readAsText(file); // Read Markdown and other files as text + } }) ) ); - if (fileContents.length === uploadedFiles.length) { - try { - const response = await fetch('/api/upload', { + try { + // Upload PDF files for conversion + if (pdfFiles.length > 0) { + const pdfUploadResponse = await fetch('/api/upload', { method: 'POST', headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ files: fileContents }) + body: JSON.stringify({ files: pdfFiles }) }); - const result = await response.json(); - if (response.ok) { - setRepo(result.repoUrl); - setCommit(result.commitSha); - setPatterns(result.documentNames.join(', ')); // Populate the patterns field - console.log('Files uploaded:', result.documentNames); - setSuccessAlertTitle('Document uploaded successfully!'); - setSuccessAlertMessage('Documents have been uploaded to your repo to be referenced in the knowledge submission.'); - setSuccessAlertLink(result.prUrl); - setIsSuccessAlertVisible(true); - setUseFileUpload(false); // Switch back to manual mode to display the newly created values in the knowledge submission - } else { - throw new Error(result.error || 'Failed to upload document'); + const pdfUploadResult = await pdfUploadResponse.json(); + if (!pdfUploadResponse.ok) { + throw new Error(pdfUploadResult.error || 'Failed to upload PDF files'); } - } catch (error: unknown) { - if (error instanceof Error) { - setFailureAlertTitle('Failed to upload document'); - setFailureAlertMessage(error.message); - setIsFailureAlertVisible(true); + + const conversionResponse = await fetch('/api/conversion', { + method: 'POST', + headers: { + 'Content-Type': 'application/json' + }, + body: JSON.stringify({ repoUrl: pdfUploadResult.repoUrl, documentNames: pdfUploadResult.documentNames }) + }); + + const conversionResult = await conversionResponse.json(); + if (!conversionResponse.ok) { + throw new Error(conversionResult.error || 'Failed to convert PDF'); } + + const mdFileUrl = conversionResult.md_file_url; + + // Download the converted Markdown file + const mdFileResponse = await fetch(mdFileUrl); + const mdFileContent = await mdFileResponse.text(); + + // Add converted Markdown to the list of Markdown files + markdownFiles.push({ + fileName: pdfFiles[0].fileName.replace('.pdf', '.md'), + fileContent: mdFileContent + }); } + + // Upload all Markdown files in a single commit + if (markdownFiles.length > 0) { + const markdownUploadResponse = await fetch('/api/upload', { + method: 'POST', + headers: { + 'Content-Type': 'application/json' + }, + body: JSON.stringify({ files: markdownFiles }) + }); + + const markdownUploadResult = await markdownUploadResponse.json(); + if (!markdownUploadResponse.ok) { + throw new Error(markdownUploadResult.error || 'Failed to upload Markdown files'); + } + + setRepo(markdownUploadResult.repoUrl); + setCommit(markdownUploadResult.commitSha); + setPatterns(markdownUploadResult.documentNames.join(', ')); // Populate the patterns field + console.log('Markdown files uploaded:', markdownUploadResult.documentNames); + + setSuccessAlertTitle('Document uploaded successfully!'); + setSuccessAlertMessage('Documents have been uploaded to your repo to be referenced in the knowledge submission.'); + setSuccessAlertLink(markdownUploadResult.prUrl); + setIsSuccessAlertVisible(true); + setUseFileUpload(false); // Switch back to manual mode to display the newly created values in the knowledge submission + } + } catch (error: unknown) { + if (error instanceof Error) { + setFailureAlertTitle('Failed to upload document'); + setFailureAlertMessage(error.message); + setIsFailureAlertVisible(true); + } + } finally { + setIsConverting(false); } } }; @@ -576,7 +642,10 @@ Creator names: ${creators} + } > @@ -593,7 +662,7 @@ Creator names: ${creators} className={useFileUpload ? 'button-active' : 'button-secondary'} onClick={() => setUseFileUpload(true)} > - Automatically Upload Documents + Upload Documents @@ -627,7 +696,7 @@ Creator names: ${creators} ) : ( <> - + diff --git a/src/components/Contribute/Knowledge/knowledge.css b/src/components/Contribute/Knowledge/knowledge.css index 083f1f18..c044730c 100644 --- a/src/components/Contribute/Knowledge/knowledge.css +++ b/src/components/Contribute/Knowledge/knowledge.css @@ -6,7 +6,6 @@ margin-bottom: 50px; } - .submit-k:hover, .download-k-yaml:hover, .download-k-attribution:hover, diff --git a/src/lib/api/deepsearch/index.ts b/src/lib/api/deepsearch/index.ts new file mode 100644 index 00000000..bf4b01cc --- /dev/null +++ b/src/lib/api/deepsearch/index.ts @@ -0,0 +1,149 @@ +// src/lib/api/index.ts +const taskPollWaitSeconds = 10; + +interface ApiResult { + status: number; + error?: string; + payload?: R; +} + +interface Task { + task_id: string; + task_status: 'SUCCESS' | 'FAILURE' | string; + result: { transaction_id: string }; +} + +interface DocumentArtifactsPageImage { + page_no: number; + url: string; +} + +interface DocumentArtifacts { + document_pdf: string; + document_md: string; + document_json: string; + page_images: DocumentArtifactsPageImage[]; +} + +export default class Client { + private host: string; + private token: string; + + constructor(host: string) { + this.host = host; + this.token = ''; + } + + async authenticate(userName: string, apiKey: string): Promise> { + const url = `${this.host}/api/cps/user/v1/user/token`; + + const response = await fetch(url, { + method: 'POST', + headers: { + Authorization: 'Basic ' + btoa(`${userName}:${apiKey}`), + 'Content-Type': 'application/json' + }, + body: JSON.stringify({}) + }); + + const result = await this.payloadOrError<{ access_token: string }>(response); + + const tokenResult = this.mapResult(result, (r) => r['access_token']); + + if (tokenResult.payload) { + this.token = tokenResult.payload ?? ''; + } + + return tokenResult; + } + + async launchConvert(projKey: string, indexKey: string, sourceURL: string): Promise> { + return await this.post<{ file_url: string[] }, Task>({ + path: `api/cps/public/v1/project/${projKey}/data_indices/${indexKey}/actions/ccs_convert_upload`, + payload: { file_url: [sourceURL] } + }); + } + + async waitForTask(projKey: string, taskId: string): Promise> { + // eslint-disable-next-line + while (true) { + const response = await this.get({ + path: `api/cps/public/v2/project/${projKey}/celery_tasks/${taskId}?wait=${taskPollWaitSeconds}` + }); + + if (response.payload) { + console.debug('Task status: ', response.payload.task_status); + + if (['SUCCESS', 'FAILURE'].includes(response.payload.task_status)) { + return response; + } + } else { + console.debug('Failed to retrieve task status: ', response.status, response.error); + } + } + } + + async getDocumentHashes(projKey: string, indexKey: string, transactionId: string): Promise> { + const response = await this.get<{ documents: { document_hash: string }[] }>({ + path: `api/cps/public/v2/project/${projKey}/data_indices/${indexKey}/documents/transactions/${transactionId}` + }); + + // eslint-disable-next-line + return this.mapResult(response, (r) => r.documents.map((d: any) => d.document_hash)); + } + + async getDocumentArtifacts(projKey: string, indexKey: string, documentHash: string): Promise> { + const response = await this.get<{ artifacts: DocumentArtifacts }>({ + path: `api/cps/public/v2/project/${projKey}/data_indices/${indexKey}/documents/${documentHash}/artifacts` + }); + + return this.mapResult(response, (r) => r.artifacts); + } + + private async post({ path, payload }: { path: string; payload: P }): Promise> { + try { + const response = await fetch(`${this.host}/${path}`, { + method: 'POST', + body: JSON.stringify(payload), + headers: { + Authorization: this.token, + 'Content-Type': 'application/json' + } + }); + + return this.payloadOrError(response); + } catch (ex) { + console.error(ex); + throw ex; + } + } + + private async get({ path }: { path: string }): Promise> { + const response = await fetch(`${this.host}/${path}`, { + method: 'GET', + headers: { + Authorization: this.token + } + }); + + return this.payloadOrError(response); + } + + /** + * Convert an API response into a usable payload or an error message. + */ + private async payloadOrError(response: Response): Promise> { + if (response.ok) { + return { status: response.status, payload: (await response.json()) as R }; + } else { + return { status: response.status, error: response.statusText }; + } + } + + /** + * Map API results payload into something else. + */ + private mapResult(result: ApiResult, f: (input: I) => O): ApiResult { + return result.payload ? { ...result, payload: f(result.payload) } : ({ ...result } as ApiResult); + } +} diff --git a/src/utils/github.ts b/src/utils/github.ts index 91c3967b..7d26b746 100644 --- a/src/utils/github.ts +++ b/src/utils/github.ts @@ -4,6 +4,7 @@ import { PullRequestUpdateData } from '@/types'; const UPSTREAM_REPO_OWNER = process.env.NEXT_PUBLIC_TAXONOMY_REPO_OWNER!; const UPSTREAM_REPO_NAME = process.env.NEXT_PUBLIC_TAXONOMY_REPO!; +const NEXT_PUBLIC_TAXONOMY_DOCUMENTS_REPO_NAME = process.env.NEXT_PUBLIC_TAXONOMY_DOCUMENTS_REPO_NAME!; export async function fetchPullRequests(token: string) { try { @@ -92,6 +93,33 @@ export const fetchFileContent = async (token: string, filePath: string, ref: str } }; +export const fetchKnowledgeFileContent = async (token: string, username: string, filePath: string): Promise => { + try { + console.log(`Fetching knowledge file content for path: ${filePath} from repo: ${NEXT_PUBLIC_TAXONOMY_DOCUMENTS_REPO_NAME}`); + const response = await axios.get(`https://api.github.com/repos/${username}/${NEXT_PUBLIC_TAXONOMY_DOCUMENTS_REPO_NAME}/contents/${filePath}`, { + headers: { + Authorization: `Bearer ${token}`, + Accept: 'application/vnd.github.v3.raw' + }, + responseType: 'blob' + }); + + if (response.status === 404) { + throw new Error(`File not found: ${filePath}`); + } + + console.log('Fetched knowledge file content:', response.data); + return response.data; // return the Blob content + } catch (error) { + if (axios.isAxiosError(error)) { + console.error('Error fetching knowledge file content:', error.response ? error.response.data : error.message); + } else { + console.error('Error fetching knowledge file content:', error); + } + throw error; + } +}; + export const updatePullRequest = async (token: string, prNumber: number, data: PullRequestUpdateData) => { try { console.log(`Updating PR Number: ${prNumber} with data:`, data);