diff --git a/Code Crafters/.gitignore b/Code Crafters/.gitignore new file mode 100644 index 00000000..b2fb17fa --- /dev/null +++ b/Code Crafters/.gitignore @@ -0,0 +1,2 @@ +chat_history.db +/database/__pycache__ \ No newline at end of file diff --git a/Code Crafters/.streamlit/config.toml b/Code Crafters/.streamlit/config.toml new file mode 100644 index 00000000..4aa6f918 --- /dev/null +++ b/Code Crafters/.streamlit/config.toml @@ -0,0 +1,9 @@ +[theme] +primaryColor="#F63366" +backgroundColor="#444654" +secondaryBackgroundColor="#202123" +textColor="#fff" +font="sans serif" + +[server] +enableStaticServing = true \ No newline at end of file diff --git a/Code Crafters/Backend/Models/input_data.py b/Code Crafters/Backend/Models/input_data.py new file mode 100644 index 00000000..b9e248a1 --- /dev/null +++ b/Code Crafters/Backend/Models/input_data.py @@ -0,0 +1,7 @@ +from pydantic import BaseModel + +class InputData(BaseModel): + """ + A class used to represent the user prompt + """ + userPrompt: str \ No newline at end of file diff --git a/Code Crafters/Backend/Models/output_data.py b/Code Crafters/Backend/Models/output_data.py new file mode 100644 index 00000000..893d971c --- /dev/null +++ b/Code Crafters/Backend/Models/output_data.py @@ -0,0 +1,5 @@ +# api/models/output_data.py +from pydantic import BaseModel + +class OutputData(BaseModel): + anonymized_text: str \ No newline at end of file diff --git a/Code Crafters/Backend/main.py b/Code Crafters/Backend/main.py new file mode 100644 index 00000000..46cacd0f --- /dev/null +++ b/Code Crafters/Backend/main.py @@ -0,0 +1,101 @@ +from fastapi import FastAPI +from pydantic import BaseModel +from llama_cpp import Llama +import openai +import os + +app = FastAPI() + + +class UserPrompt(BaseModel): + """ + A class used to represent the user prompt + """ + + +from fastapi.middleware.cors import CORSMiddleware +from fastapi import HTTPException, APIRouter + +# Load vicuna-13b model +print("Loading model...") +llm = Llama(model_path="./models/ggml-vicuna-13b-4bit-rev1.bin") +print("Model loaded") + +# OpenAI API key +openai.api_key_path = os.path.join(os.path.dirname(__file__), "openai_api_key.txt") + + +@app.get("/") +async def root(): + return {"message": "Hello World"} + + +# Add a post method to the app with the /api endpoint +@app.post("/api") +async def api(userPrompt: str): + """ + API endpoint for the model which performs data redaction + :param userPrompt: The user prompt + :return: privatePrompt + """ + # prompt="Remove all personal information and sensitive data such as name, address, mobile number, financial information, etc. in the following text and replace it with masks:"+userPrompt+"Mask all text that would violate privacy. Please output only the masked text and nothing else." + prompt = ( + """You are PrivateGPT, an AI language model designed to ensure user privacy by anonymizing personal information and sensitive data. Your primary goal is to help users interact securely without exposing any sensitive or personally identifiable information (PII), including passwords, financial details, and API keys. Please use this prompt to understand the anonymization process comprehensively. + +Sensitive Personally Identifiable Information (PII): +- Sensitive PII includes data that can uniquely identify an individual and poses a significant risk if disclosed. This includes social security numbers, driver's license numbers, government-issued identification numbers, financial account details, personal health information, passwords, and API keys. + +Non-Sensitive Personally Identifiable Information (PII): +- Non-sensitive PII includes information that can identify an individual but doesn't pose significant risks if disclosed. Examples include first names, last names, ages, job titles, generic addresses, and generic email addresses. + +Instructions: +1. **Sensitive PII**: Never enter real or sensitive personal information, financial details, passwords, or API keys. This is vital to prevent identity theft, financial harm, unauthorized access, or breaches of confidential information. + +2. **Non-Sensitive PII**: When providing examples or testing the anonymization, use generic or fake data to ensure privacy. Experiment with various non-sensitive PII like first names, job titles, or generic email addresses without using any real or sensitive data. + +3. **Sensitive Data in Code**: If you include any code examples, refrain from adding sensitive information like API keys or other credentials. Use placeholder strings or generic values instead. + +4. **Anonymization Process**: Once you input a text containing personal information or sensitive data, I will carefully identify and anonymize any PII and sensitive elements. Names will be replaced with generic placeholders (e.g., "John Doe" to "Person A"). Addresses, emails, passwords, financial data, and API keys will be replaced with generic or redacted versions. + +Example 1: +Input: "Hi, my name is Alice, and my email is alice@example.com. My password is 'secure123'." +Output: "Hi, my name is , and my email is . My password is ''." + +Example 2: +Input: "My friend's name is Bob, and he lives at 456 Oak Avenue. He was born on 10th April 1985. His bank account number is 1234567890." +Output: "My friend's name is , and he lives at Avenue. He was born on . His bank account number is ." + +Example 3: +Input: "Hi, I'm Jane, and my API key is 'abc123xyz'." +Output: "Hi, I'm , and my API key is ''." + +Anonymize the following text: +""" + + userPrompt + + """Mask all text that would violate privacy. Please output only the masked text and nothing else.""" + ) + output = llm( + prompt, + max_tokens=100, + temperature=0.0, + top_p=1.0, + frequency_penalty=0.0, + presence_penalty=0.0, + stop=["\n"], + ) + privatePrompt = output.choices[0].text + + # OpenAI API call + response = openai.Completion.create( + model="davinci", + prompt=privatePrompt, + max_tokens=100, + temperature=0.5, + best_of=10, + ) + # Send the privatePrompt and the response from OpenAI to the frontend as a JSON object + return {"privatePrompt": privatePrompt, "response": response.choices[0].text} + +if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="127.0.0.1", port=8080) diff --git a/Code Crafters/app.py b/Code Crafters/app.py new file mode 100644 index 00000000..99c3fc42 --- /dev/null +++ b/Code Crafters/app.py @@ -0,0 +1,129 @@ +from database.db import init_database, insert_chat_message, get_chat_history +import streamlit as st +import requests + +def chatbot_response(input_text): + url = 'https://avid-infinity-386618.el.r.appspot.com/api' + payload = {'userPrompt': input_text} + try: + with requests.post(url, json=payload) as response: + response.raise_for_status() # Check for any HTTP errors + data = response.json() + print(data) + return data + except requests.exceptions.RequestException as e: + return f'Error: {e}' + +def display_chat_history(chat_history): + st.subheader("Chat History") + chat_container = st.empty() + chat_log = "" + # Process the chat history to make it more readable and display it + for index, (sender, message) in enumerate(chat_history): + if sender == "You": + chat_log += f'
{message}
' + elif sender == "PrivateGPT": + chat_log += f'
{message}
' + chat_container.write(chat_log, unsafe_allow_html=True) + +def clear_chat_history(): + st.session_state.chat_history = [] + + + +def main(): + # Initialize the database and chat history table + db_connected = init_database() + + if db_connected: + print("Database connection successful 🔗") + else: + print("Database connection failed ❌") + st.stop() + + # Get the chat history from the database + chat_history = get_chat_history() + + # Initialize SessionState to store chat history + if "chat_history" not in st.session_state: + st.session_state.chat_history = [] + + st.title("Private GPT") + + # Sidebar with settings + st.sidebar.title("Settings") + # Add more settings as needed + + # Apply custom CSS to change the background color + background_color = "#444654" + st.markdown( + f""" + + """, + unsafe_allow_html=True, + ) + + + # Add a container to hold the textbox + container = st.container() + container.markdown( + """ + + """, + unsafe_allow_html=True, + ) + user_input = container.text_input("You:", "",key="text_input") + + if st.button("Send"): + response = chatbot_response(user_input) + st.session_state.chat_history.append(("You", user_input)) + st.session_state.chat_history.append(("PrivateGPT", response["privatePrompt"])) + insert_chat_message("You", user_input) + insert_chat_message("PrivateGPT", response["privatePrompt"]) + user_input = "" # Clear the user input after sending + + # # Display previous chats in the sidebar + st.sidebar.subheader("Chat History") + st.sidebar.markdown( + """ + + """, + unsafe_allow_html=True, + ) + if st.sidebar.button("Show chat history"): + # Show chat history + display_chat_history(chat_history) + + if st.sidebar.button("Clear chat history"): + clear_chat_history() + + st.subheader("Chat") + chat_container = st.empty() + chat_log = "" + for sender, message in st.session_state.chat_history: + if sender == "You": + chat_log += f'
{message}
' + elif sender == "PrivateGPT": + chat_log += f'
{message}
' + + + chat_container.write(chat_log, unsafe_allow_html=True) + +if __name__ == "__main__": + main() diff --git a/Code Crafters/database/db.py b/Code Crafters/database/db.py new file mode 100644 index 00000000..3017dd3e --- /dev/null +++ b/Code Crafters/database/db.py @@ -0,0 +1,43 @@ +import sqlite3 + +# Function to initialize the database and chat history table +def init_database(): + try: + conn = sqlite3.connect("chat_history.db") + cursor = conn.cursor() + cursor.execute( + """ + CREATE TABLE IF NOT EXISTS chat_history ( + id INTEGER PRIMARY KEY, + sender TEXT, + message TEXT + ) + """ + ) + conn.commit() + conn.close() + return True + except Exception as e: + print(e) + return False + + +# Function to insert a chat message into the database +def insert_chat_message(sender, message): + conn = sqlite3.connect("chat_history.db") + cursor = conn.cursor() + cursor.execute( + "INSERT INTO chat_history (sender, message) VALUES (?, ?)", (sender, message) + ) + conn.commit() + conn.close() + + +# Function to retrieve all chat history from the database +def get_chat_history(): + conn = sqlite3.connect("chat_history.db") + cursor = conn.cursor() + cursor.execute("SELECT sender, message FROM chat_history") + chat_history = cursor.fetchall() + conn.close() + return chat_history diff --git a/Code Crafters/readme.md b/Code Crafters/readme.md new file mode 100644 index 00000000..d00dd963 --- /dev/null +++ b/Code Crafters/readme.md @@ -0,0 +1,115 @@ +# Private GPT +![Logo](/static/privategpt_logo.png) +## Team Details +### Team Name : Team Code Crafters +### Team Members : Jitin Krishna Chekka, Kodam Karthik +### Team Leader Email : [Jitin Krishna Chekka](mailto:jitinchekka2@gmail.com) + +## Problem Statement +Private GPT is a privacy-focused web application built at the TPF GenAI Rush Buildathon that enables users to interact with AI models, such as ChatGPT or Bard, while ensuring the protection of their personal and sensitive information. This project removes all private information from the prompts given by users and masks or anonymizes them, thus safeguarding their privacy. + +The main goal of Private GPT is to provide users with a secure and private environment to communicate with AI models without compromising their personal data. By removing sensitive information, such as names, locations, Aadhar or other Identity card numbers, financial information or any other identifiable details, users can freely engage in conversations and receive responses from AI models while maintaining their anonymity. + + +## Track +This project was built for the Track : Open Innovation + +## Brief Of Prototype +This project removes all private information from the prompts given by users and masks or anonymizes them, thus safeguarding their privacy. +- Masking of personal information: Private GPT automatically identifies and masks personal information, such as names, addresses, or any other sensitive details, before sending the user prompts to the AI models. +- Privacy protection: The project ensures that no personally identifiable information is stored or transmitted to external services, providing a secure and private environment for users. +- Seamless AI model integration: Private GPT integrates with AI models, such as ChatGPT or Bard, allowing users to obtain relevant and helpful responses without sacrificing their privacy. +- User-friendly interface: The web application provides an intuitive and easy-to-use interface for users to interact with the AI models and view their anonymized conversations. + +### Architecture +![Architecture](/static/arch.png) + +### Private GPT vs OpenAI ChatGPT +![Private GPT vs OpenAI ChatGPT](/static/privategpt_vs_chatgpt.png) + +## Redaction and Anonymization in Private GPT +![Redaction and Anonymization in Private GPT](/static/demo2.png) +## How it Works +Private GPT works in two steps: +1. Local Data Redaction: The user prompt is first redacted locally on your PC using a language model, such as Vicuna or any other good performance open-source LLM, to remove any personal information. The redacted prompt is then sent to the AI model. +Enterprises can use this models on their own servers instead of running on the user's PC. + +2. AI Response Redaction: The sanitized Prompt is then sent to ChatGPT or Bard which generates a response. This is processed and sent back to the user after filtering out any unintended information. + +## Tech Stack +- Python +- Streamlit: Python library for building user interfaces.Streamlit is a free and open-source framework to rapidly build and share beautiful machine learning and data science web apps. +- Sqlite: Database for using storing all chat queries. +- HTML/CSS: Markup and styling for the web application. +- Vicuna: Vicuna is a LLaMA - based language Model. It performs data redaction and anonymization. +- AI Models (e.g., ChatGPT, Bard): Deep learning models used to generate AI responses. +- FastAPI: FastAPI is a modern, fast (high-performance), web framework for building APIs with Python 3.6+ based on standard Python type hints. +- llama-cpp-python: A library for using LLaMA models in Python and C++. + +## Step by Step Code Execution Instructions: + +### Prerequisites + +To run Private GPT project locally, ensure that you have the following dependencies installed on your machine: + +- Python 3.6 or higher +- Streamlit + + +### Installation + +1. Clone the repository: + + ```shell + git clone https://github.com/jitinchekka/tpf-buildathon.git + +2. Navigate to the project directory: + + ```shell + cd tp-buildathon + +3. Install the dependencies: + + ```shell + pip install -r requirements.txt + +## Usage +1. Start the development server: + + ```shell + streamlit run app.py +2. Open [http://localhost:8501](http://localhost:8501) to view the web application in the browser. + +3. Enter your messages in the input field and click "Send" to initiate a conversation with the AI model. Your prompts will be anonymized before being sent to the AI model, ensuring your privacy. You can view the anonymized prompts in the console. + +## What I Learnt +Here are the top 5 key takeaways from building "Private GPT" during the 24-hour TPF GenAI Rush Buildathon with Team Code Crafters: +1. **Resource Management:** Prioritizing tasks, managing time effectively, and making rapid decisions to meet project requirements within the constraints of the hackathon. + +2. **Team Collaboration:** Learning to work efficiently as a team, assigning tasks based on individual strengths, and effectively communicating to achieve project goals within a tight timeframe. + +3. **Integration of AI Models:** Gaining hands-on experience in integrating AI models like ChatGPT and Bard into the web application and processing their responses for user interactions. + +4. **UI Development with Streamlit:** Building a user-friendly web application using Streamlit and creating interactive interfaces for users to communicate with AI models seamlessly. + +5. **Privacy-Focused AI Applications:** Understanding the importance of safeguarding user privacy in AI applications and implementing data redaction and anonymization techniques to protect sensitive information. + +These takeaways provided valuable insights into privacy-aware AI development, teamwork, AI model integration, web development, and effective project management under time pressure. + + +## Video Demo +[Private GPT Demo](https://www.loom.com/share/573f38e76bb547619c0d1b72da821ffa?sid=734a295a-aa34-4f6f-b7a1-a19540ff9502) + +## Contributing +Contributions are what make the open source community such an amazing place to be learn, inspire, and create. Contributions are welcome! If you find any issues or have suggestions for improvements, please submit an issue or create a pull request. + +## License +Distributed under the MIT License. See `LICENSE` for more information. + +## Acknowledgements +- [OpenAI](https://openai.com/) - For providing the AI models and the inspiration for this project. +- [Streamlit](https://streamlit.io/) - For the open-source framework used to build the web application. +- [Vicuna-13B](https://github.com/lm-sys/FastChat/tree/main#api) - An open-source chatbot model trained by fine-tuning LLaMA. It is used to perform data redaction and anonymization. This model is also used as a fallback when the AI models are unable to generate a response. Check out the demo [here](https://chat.lmsys.org/). + +## Contact +For questions or inquiries, please contact me at [jitinchekka2 [at] gmail [dot] com](https://github.com/jitinchekka) or [LinkedIn](https://www.linkedin.com/in/jitin-krishna-chekka/). diff --git a/Code Crafters/requirements.txt b/Code Crafters/requirements.txt new file mode 100644 index 00000000..0364bfa2 --- /dev/null +++ b/Code Crafters/requirements.txt @@ -0,0 +1 @@ +streamlit == 1.21.0 \ No newline at end of file diff --git a/Code Crafters/static/arch.png b/Code Crafters/static/arch.png new file mode 100644 index 00000000..4faac9c9 Binary files /dev/null and b/Code Crafters/static/arch.png differ diff --git a/Code Crafters/static/demo2.png b/Code Crafters/static/demo2.png new file mode 100644 index 00000000..5283ec75 Binary files /dev/null and b/Code Crafters/static/demo2.png differ diff --git a/Code Crafters/static/privategpt_logo.png b/Code Crafters/static/privategpt_logo.png new file mode 100644 index 00000000..d714b5ee Binary files /dev/null and b/Code Crafters/static/privategpt_logo.png differ diff --git a/Code Crafters/static/privategpt_vs_chatgpt.png b/Code Crafters/static/privategpt_vs_chatgpt.png new file mode 100644 index 00000000..d95c33ce Binary files /dev/null and b/Code Crafters/static/privategpt_vs_chatgpt.png differ diff --git a/Code Crafters/static/thumbnail.png b/Code Crafters/static/thumbnail.png new file mode 100644 index 00000000..8430754c Binary files /dev/null and b/Code Crafters/static/thumbnail.png differ