Skip to content

Commit

Permalink
Merge branch 'langgenius:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
Dofine-dufei authored Jan 14, 2025
2 parents ff1cbe3 + 435eddd commit 8f97226
Show file tree
Hide file tree
Showing 1,302 changed files with 31,508 additions and 11,833 deletions.
6 changes: 6 additions & 0 deletions .github/workflows/api-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,12 @@ jobs:
- name: Run Tool
run: poetry run -C api bash dev/pytest/pytest_tools.sh

- name: Run mypy
run: |
pushd api
poetry run python -m mypy --install-types --non-interactive .
popd
- name: Set up dotenvs
run: |
cp docker/.env.example docker/.env
Expand Down
27 changes: 27 additions & 0 deletions .github/workflows/style.yml
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,33 @@ jobs:
if: steps.changed-files.outputs.any_changed == 'true'
run: yarn run lint

docker-compose-template:
name: Docker Compose Template
runs-on: ubuntu-latest

steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Check changed files
id: changed-files
uses: tj-actions/changed-files@v45
with:
files: |
docker/generate_docker_compose
docker/.env.example
docker/docker-compose-template.yaml
docker/docker-compose.yaml
- name: Generate Docker Compose
if: steps.changed-files.outputs.any_changed == 'true'
run: |
cd docker
./generate_docker_compose
- name: Check for changes
if: steps.changed-files.outputs.any_changed == 'true'
run: git diff --exit-code

superlinter:
name: SuperLinter
Expand Down
10 changes: 7 additions & 3 deletions api/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ FILES_ACCESS_TIMEOUT=300
# Access token expiration time in minutes
ACCESS_TOKEN_EXPIRE_MINUTES=60

# Refresh token expiration time in days
REFRESH_TOKEN_EXPIRE_DAYS=30

# celery configuration
CELERY_BROKER_URL=redis://:difyai123456@localhost:6379/1

Expand Down Expand Up @@ -65,7 +68,7 @@ OPENDAL_FS_ROOT=storage

# S3 Storage configuration
S3_USE_AWS_MANAGED_IAM=false
S3_ENDPOINT=https://your-bucket-name.storage.s3.clooudflare.com
S3_ENDPOINT=https://your-bucket-name.storage.s3.cloudflare.com
S3_BUCKET_NAME=your-bucket-name
S3_ACCESS_KEY=your-access-key
S3_SECRET_KEY=your-secret-key
Expand All @@ -74,7 +77,7 @@ S3_REGION=your-region
# Azure Blob Storage configuration
AZURE_BLOB_ACCOUNT_NAME=your-account-name
AZURE_BLOB_ACCOUNT_KEY=your-account-key
AZURE_BLOB_CONTAINER_NAME=yout-container-name
AZURE_BLOB_CONTAINER_NAME=your-container-name
AZURE_BLOB_ACCOUNT_URL=https://<your_account_name>.blob.core.windows.net

# Aliyun oss Storage configuration
Expand All @@ -88,7 +91,7 @@ ALIYUN_OSS_REGION=your-region
ALIYUN_OSS_PATH=your-path

# Google Storage configuration
GOOGLE_STORAGE_BUCKET_NAME=yout-bucket-name
GOOGLE_STORAGE_BUCKET_NAME=your-bucket-name
GOOGLE_STORAGE_SERVICE_ACCOUNT_JSON_BASE64=your-google-service-account-json-base64-string

# Tencent COS Storage configuration
Expand Down Expand Up @@ -399,6 +402,7 @@ INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH=4000
WORKFLOW_MAX_EXECUTION_STEPS=500
WORKFLOW_MAX_EXECUTION_TIME=1200
WORKFLOW_CALL_MAX_DEPTH=5
WORKFLOW_PARALLEL_DEPTH_LIMIT=3
MAX_VARIABLE_SIZE=204800

# App configuration
Expand Down
7 changes: 3 additions & 4 deletions api/.ruff.toml
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,9 @@ ignore = [
"SIM105", # suppressible-exception
"SIM107", # return-in-try-except-finally
"SIM108", # if-else-block-instead-of-if-exp
"SIM113", # eumerate-for-loop
"SIM113", # enumerate-for-loop
"SIM117", # multiple-with-statements
"SIM210", # if-expr-with-true-false
"SIM300", # yoda-conditions,
]

[lint.per-file-ignores]
Expand All @@ -86,11 +85,11 @@ ignore = [
]
"tests/*" = [
"F811", # redefined-while-unused
"F401", # unused-import
]

[lint.pyflakes]
extend-generics = [
allowed-unused-imports = [
"_pytest.monkeypatch",
"tests.integration_tests",
"tests.unit_tests",
]
2 changes: 1 addition & 1 deletion api/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ RUN apt-get update \
&& echo "deb http://deb.debian.org/debian testing main" > /etc/apt/sources.list \
&& apt-get update \
# For Security
&& apt-get install -y --no-install-recommends expat=2.6.4-1 libldap-2.5-0=2.5.18+dfsg-3+b1 perl=5.40.0-8 libsqlite3-0=3.46.1-1 zlib1g=1:1.3.dfsg+really1.3.1-1+b1 \
&& apt-get install -y --no-install-recommends expat=2.6.4-1 libldap-2.5-0=2.5.19+dfsg-1 perl=5.40.0-8 libsqlite3-0=3.46.1-1 zlib1g=1:1.3.dfsg+really1.3.1-1+b1 \
# install a chinese font to support the use of tools like matplotlib
&& apt-get install -y fonts-noto-cjk \
&& apt-get autoremove -y \
Expand Down
29 changes: 20 additions & 9 deletions api/app.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
from libs import version_utils

# preparation before creating app
version_utils.check_supported_python_version()
import os
import sys


def is_db_command():
import sys

if len(sys.argv) > 1 and sys.argv[0].endswith("flask") and sys.argv[1] == "db":
return True
return False
Expand All @@ -18,10 +14,25 @@ def is_db_command():

app = create_migrations_app()
else:
from app_factory import create_app
from libs import threadings_utils
# It seems that JetBrains Python debugger does not work well with gevent,
# so we need to disable gevent in debug mode.
# If you are using debugpy and set GEVENT_SUPPORT=True, you can debug with gevent.
if (flask_debug := os.environ.get("FLASK_DEBUG", "0")) and flask_debug.lower() in {"false", "0", "no"}:
from gevent import monkey # type: ignore

# gevent
monkey.patch_all()

from grpc.experimental import gevent as grpc_gevent # type: ignore

threadings_utils.apply_gevent_threading_patch()
# grpc gevent
grpc_gevent.init_gevent()

import psycogreen.gevent # type: ignore

psycogreen.gevent.patch_psycopg()

from app_factory import create_app

app = create_app()
celery = app.extensions["celery"]
Expand Down
27 changes: 18 additions & 9 deletions api/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,7 @@ def migrate_annotation_vector_database():
try:
# get apps info
apps = (
db.session.query(App)
.filter(App.status == "normal")
App.query.filter(App.status == "normal")
.order_by(App.created_at.desc())
.paginate(page=page, per_page=50)
)
Expand Down Expand Up @@ -285,8 +284,7 @@ def migrate_knowledge_vector_database():
while True:
try:
datasets = (
db.session.query(Dataset)
.filter(Dataset.indexing_technique == "high_quality")
Dataset.query.filter(Dataset.indexing_technique == "high_quality")
.order_by(Dataset.created_at.desc())
.paginate(page=page, per_page=50)
)
Expand Down Expand Up @@ -450,7 +448,8 @@ def convert_to_agent_apps():
if app_id not in proceeded_app_ids:
proceeded_app_ids.append(app_id)
app = db.session.query(App).filter(App.id == app_id).first()
apps.append(app)
if app is not None:
apps.append(app)

if len(apps) == 0:
break
Expand Down Expand Up @@ -555,14 +554,20 @@ def create_tenant(email: str, language: Optional[str] = None, name: Optional[str
if language not in languages:
language = "en-US"

name = name.strip()
# Validates name encoding for non-Latin characters.
name = name.strip().encode("utf-8").decode("utf-8") if name else None

# generate random password
new_password = secrets.token_urlsafe(16)

# register account
account = RegisterService.register(email=email, name=account_name, password=new_password, language=language)

account = RegisterService.register(
email=email,
name=account_name,
password=new_password,
language=language,
create_workspace_required=False,
)
TenantService.create_owner_tenant_if_not_exist(account, name)

click.echo(
Expand All @@ -582,7 +587,7 @@ def upgrade_db():
click.echo(click.style("Starting database migration.", fg="green"))

# run db migration
import flask_migrate
import flask_migrate # type: ignore

flask_migrate.upgrade()

Expand Down Expand Up @@ -620,6 +625,10 @@ def fix_app_site_missing():

try:
app = db.session.query(App).filter(App.id == app_id).first()
if not app:
print(f"App {app_id} not found")
continue

tenant = app.tenant
if tenant:
accounts = tenant.get_accounts()
Expand Down
39 changes: 30 additions & 9 deletions api/configs/feature/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,6 @@ class HttpConfig(BaseSettings):
)

@computed_field
@property
def CONSOLE_CORS_ALLOW_ORIGINS(self) -> list[str]:
return self.inner_CONSOLE_CORS_ALLOW_ORIGINS.split(",")

Expand All @@ -250,7 +249,6 @@ def CONSOLE_CORS_ALLOW_ORIGINS(self) -> list[str]:
)

@computed_field
@property
def WEB_API_CORS_ALLOW_ORIGINS(self) -> list[str]:
return self.inner_WEB_API_CORS_ALLOW_ORIGINS.split(",")

Expand Down Expand Up @@ -433,6 +431,11 @@ class WorkflowConfig(BaseSettings):
default=5,
)

WORKFLOW_PARALLEL_DEPTH_LIMIT: PositiveInt = Field(
description="Maximum allowed depth for nested parallel executions",
default=3,
)

MAX_VARIABLE_SIZE: PositiveInt = Field(
description="Maximum size in bytes for a single variable in workflows. Default to 200 KB.",
default=200 * 1024,
Expand Down Expand Up @@ -485,6 +488,11 @@ class AuthConfig(BaseSettings):
default=60,
)

REFRESH_TOKEN_EXPIRE_DAYS: PositiveFloat = Field(
description="Expiration time for refresh tokens in days",
default=30,
)

LOGIN_LOCKOUT_DURATION: PositiveInt = Field(
description="Time (in seconds) a user must wait before retrying login after exceeding the rate limit.",
default=86400,
Expand Down Expand Up @@ -598,7 +606,7 @@ class RagEtlConfig(BaseSettings):

UNSTRUCTURED_API_KEY: Optional[str] = Field(
description="API key for Unstructured.io service",
default=None,
default="",
)

SCARF_NO_ANALYTICS: Optional[str] = Field(
Expand Down Expand Up @@ -664,6 +672,11 @@ class IndexingConfig(BaseSettings):
default=4000,
)

CHILD_CHUNKS_PREVIEW_NUMBER: PositiveInt = Field(
description="Maximum number of child chunks to preview",
default=50,
)


class MultiModalTransferConfig(BaseSettings):
MULTIMODAL_SEND_FORMAT: Literal["base64", "url"] = Field(
Expand Down Expand Up @@ -710,27 +723,27 @@ class PositionConfig(BaseSettings):
default="",
)

@computed_field
@property
def POSITION_PROVIDER_PINS_LIST(self) -> list[str]:
return [item.strip() for item in self.POSITION_PROVIDER_PINS.split(",") if item.strip() != ""]

@computed_field
@property
def POSITION_PROVIDER_INCLUDES_SET(self) -> set[str]:
return {item.strip() for item in self.POSITION_PROVIDER_INCLUDES.split(",") if item.strip() != ""}

@computed_field
@property
def POSITION_PROVIDER_EXCLUDES_SET(self) -> set[str]:
return {item.strip() for item in self.POSITION_PROVIDER_EXCLUDES.split(",") if item.strip() != ""}

@computed_field
@property
def POSITION_TOOL_PINS_LIST(self) -> list[str]:
return [item.strip() for item in self.POSITION_TOOL_PINS.split(",") if item.strip() != ""]

@computed_field
@property
def POSITION_TOOL_INCLUDES_SET(self) -> set[str]:
return {item.strip() for item in self.POSITION_TOOL_INCLUDES.split(",") if item.strip() != ""}

@computed_field
@property
def POSITION_TOOL_EXCLUDES_SET(self) -> set[str]:
return {item.strip() for item in self.POSITION_TOOL_EXCLUDES.split(",") if item.strip() != ""}

Expand Down Expand Up @@ -762,6 +775,13 @@ class LoginConfig(BaseSettings):
)


class AccountConfig(BaseSettings):
ACCOUNT_DELETION_TOKEN_EXPIRY_MINUTES: PositiveInt = Field(
description="Duration in minutes for which a account deletion token remains valid",
default=5,
)


class FeatureConfig(
# place the configs in alphabet order
AppExecutionConfig,
Expand Down Expand Up @@ -789,6 +809,7 @@ class FeatureConfig(
WorkflowNodeExecutionConfig,
WorkspaceConfig,
LoginConfig,
AccountConfig,
# hosted services config
HostedServiceConfig,
CeleryBeatConfig,
Expand Down
4 changes: 0 additions & 4 deletions api/configs/middleware/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,6 @@ class DatabaseConfig(BaseSettings):
)

@computed_field
@property
def SQLALCHEMY_DATABASE_URI(self) -> str:
db_extras = (
f"{self.DB_EXTRAS}&client_encoding={self.DB_CHARSET}" if self.DB_CHARSET else self.DB_EXTRAS
Expand Down Expand Up @@ -168,7 +167,6 @@ def SQLALCHEMY_DATABASE_URI(self) -> str:
)

@computed_field
@property
def SQLALCHEMY_ENGINE_OPTIONS(self) -> dict[str, Any]:
return {
"pool_size": self.SQLALCHEMY_POOL_SIZE,
Expand Down Expand Up @@ -206,15 +204,13 @@ class CeleryConfig(DatabaseConfig):
)

@computed_field
@property
def CELERY_RESULT_BACKEND(self) -> str | None:
return (
"db+{}".format(self.SQLALCHEMY_DATABASE_URI)
if self.CELERY_BACKEND == "database"
else self.CELERY_BROKER_URL
)

@computed_field
@property
def BROKER_USE_SSL(self) -> bool:
return self.CELERY_BROKER_URL.startswith("rediss://") if self.CELERY_BROKER_URL else False
Expand Down
Loading

0 comments on commit 8f97226

Please sign in to comment.