From 80e60a58c5ea505222575482a77680724a9a862d Mon Sep 17 00:00:00 2001
From: yiboyasss <3359595624@qq.com>
Date: Fri, 26 Sep 2025 11:27:01 +0800
Subject: [PATCH 01/25] feat: Add model feature in Launch Model list

---
 xinference/ui/web/ui/src/locales/en.json      |  25 +-
 xinference/ui/web/ui/src/locales/ja.json      |  25 +-
 xinference/ui/web/ui/src/locales/ko.json      |  25 +-
 xinference/ui/web/ui/src/locales/zh.json      |  25 +-
 .../launch_model/components/addModelDialog.js | 228 ++++++++++++++++++
 .../web/ui/src/scenes/launch_model/index.js   |  19 +-
 6 files changed, 341 insertions(+), 6 deletions(-)
 create mode 100644 xinference/ui/web/ui/src/scenes/launch_model/components/addModelDialog.js

diff --git a/xinference/ui/web/ui/src/locales/en.json b/xinference/ui/web/ui/src/locales/en.json
index a12662732d..b39f1e2939 100644
--- a/xinference/ui/web/ui/src/locales/en.json
+++ b/xinference/ui/web/ui/src/locales/en.json
@@ -124,7 +124,30 @@
     "featured": "featured",
     "all": "all",
     "cancelledSuccessfully": "Cancelled Successfully!",
-    "mustBeUnique": "{{key}} must be unique"
+    "mustBeUnique": "{{key}} must be unique",
+    "addModel": "Add Model",
+    "addModelDialog": {
+      "introPrefix": "To add a model, please use",
+      "platformLinkText": "Model Management Platform",
+      "introSuffix": " and paste the model's URL",
+      "example": "Example: The URL for {{modelName}} on the platform is {{modelUrl}}",
+      "urlLabel": "URL"
+    },
+    "loginDialog": {
+      "title": "No permission to download this model. Please log in and try again.",
+      "usernameOrEmail": "Username or Email",
+      "password": "Password",
+      "login": "Login"
+    },
+    "error": {
+      "cannotExtractModelId": "Unable to extract model_id from URL. Please check your input.",
+      "downloadFailed": "Download failed: {{status}} {{text}}",
+      "requestFailed": "Request failed",
+      "loginFailedText": "Login failed: {{status}} {{text}}",
+      "noTokenAfterLogin": "Login succeeded but no token was returned",
+      "modelPrivate": "This model is private and requires download permission.",
+      "noPermissionAfterLogin": "The logged-in account does not have permission to download this model. Please contact the administrator or use a different account."
+    }
   },
 
   "runningModels": {
diff --git a/xinference/ui/web/ui/src/locales/ja.json b/xinference/ui/web/ui/src/locales/ja.json
index dc1636bfd3..2dd70bc1ab 100644
--- a/xinference/ui/web/ui/src/locales/ja.json
+++ b/xinference/ui/web/ui/src/locales/ja.json
@@ -124,7 +124,30 @@
     "featured": "おすすめとお気に入り",
     "all": "すべて",
     "cancelledSuccessfully": "正常にキャンセルされました！",
-    "mustBeUnique": "{{key}} は一意でなければなりません"
+    "mustBeUnique": "{{key}} は一意でなければなりません",
+    "addModel": "モデルを追加",
+    "addModelDialog": {
+      "introPrefix": "モデルを追加するには",
+      "platformLinkText": "モデル管理プラットフォーム",
+      "introSuffix": "に基づき、対応するURLを入力してください",
+      "example": "例：{{modelName}} のモデル管理プラットフォーム上のURLは {{modelUrl}} です",
+      "urlLabel": "URL"
+    },
+    "loginDialog": {
+      "title": "このモデルをダウンロードする権限がありません。ログイン後に再度お試しください",
+      "usernameOrEmail": "ユーザー名またはメールアドレス",
+      "password": "パスワード",
+      "login": "ログイン"
+    },
+    "error": {
+      "cannotExtractModelId": "URLから model_id を抽出できません。入力内容を確認してください",
+      "downloadFailed": "ダウンロード失敗: {{status}} {{text}}",
+      "requestFailed": "リクエスト失敗",
+      "loginFailedText": "ログイン失敗: {{status}} {{text}}",
+      "noTokenAfterLogin": "ログインは成功しましたが、トークンを取得できませんでした",
+      "modelPrivate": "このモデルは非公開であり、ダウンロード権限が必要です。",
+      "noPermissionAfterLogin": "このアカウントにはモデルをダウンロードする権限がありません。管理者に連絡するか、別のアカウントを使用してください。"
+    }
   },
 
   "runningModels": {
diff --git a/xinference/ui/web/ui/src/locales/ko.json b/xinference/ui/web/ui/src/locales/ko.json
index 17ad7626a6..f6eeb9b51d 100644
--- a/xinference/ui/web/ui/src/locales/ko.json
+++ b/xinference/ui/web/ui/src/locales/ko.json
@@ -124,7 +124,30 @@
     "featured": "추천 및 즐겨찾기",
     "all": "모두",
     "cancelledSuccessfully": "성공적으로 취소되었습니다!",
-    "mustBeUnique": "{{key}} 는 고유해야 합니다"
+    "mustBeUnique": "{{key}} 는 고유해야 합니다",
+    "addModel": "모델 추가",
+    "addModelDialog": {
+      "introPrefix": "모델을 추가하려면",
+      "platformLinkText": "모델 관리 플랫폼",
+      "introSuffix": "을(를) 기반으로 해당 URL을 입력하세요",
+      "example": "예: {{modelName}}의 모델 관리 플랫폼 URL은 {{modelUrl}} 입니다",
+      "urlLabel": "URL"
+    },
+    "loginDialog": {
+      "title": "이 모델을 다운로드할 권한이 없습니다. 로그인 후 다시 시도하세요",
+      "usernameOrEmail": "사용자 이름 또는 이메일",
+      "password": "비밀번호",
+      "login": "로그인"
+    },
+    "error": {
+      "cannotExtractModelId": "URL에서 model_id를 추출할 수 없습니다. 입력을 확인하세요",
+      "downloadFailed": "다운로드 실패: {{status}} {{text}}",
+      "requestFailed": "요청 실패",
+      "loginFailedText": "로그인 실패: {{status}} {{text}}",
+      "noTokenAfterLogin": "로그인은 성공했지만 토큰을 가져오지 못했습니다",
+      "modelPrivate": "이 모델은 비공개이며 다운로드 권한이 필요합니다.",
+      "noPermissionAfterLogin": "이 계정에는 해당 모델을 다운로드할 권한이 없습니다. 관리자에게 문의하거나 다른 계정을 사용하세요."
+    }
   },
 
   "runningModels": {
diff --git a/xinference/ui/web/ui/src/locales/zh.json b/xinference/ui/web/ui/src/locales/zh.json
index 36daec1756..066781855a 100644
--- a/xinference/ui/web/ui/src/locales/zh.json
+++ b/xinference/ui/web/ui/src/locales/zh.json
@@ -124,7 +124,30 @@
     "featured": "推荐和收藏",
     "all": "全部",
     "cancelledSuccessfully": "取消成功!",
-    "mustBeUnique": "{{key}} 必须唯一"
+    "mustBeUnique": "{{key}} 必须唯一",
+    "addModel": "添加模型",
+    "addModelDialog": {
+      "introPrefix": "添加模型需基于",
+      "platformLinkText": "模型管理平台",
+      "introSuffix": "，填写模型对应的 URL",
+      "example": "例：{{modelName}}在模型管理平台上对应的 URL 如下 {{modelUrl}}",
+      "urlLabel": "URL"
+    },
+    "loginDialog": {
+      "title": "暂无权限下载该模型，登录后重新尝试下载",
+      "usernameOrEmail": "用户名或邮箱",
+      "password": "密码",
+      "login": "登录"
+    },
+    "error": {
+      "cannotExtractModelId": "无法从 URL 中提取 model_id，请检查输入",
+      "downloadFailed": "下载失败: {{status}} {{text}}",
+      "requestFailed": "请求失败",
+      "loginFailedText": "登录失败: {{status}} {{text}}",
+      "noTokenAfterLogin": "登录成功但未获取到 token",
+      "modelPrivate": "该模型为私有，需要具有下载权限。",
+      "noPermissionAfterLogin": "该登录账户暂无权限下载该模型，请联系管理员或更换账户。"
+    }
   },
 
   "runningModels": {
diff --git a/xinference/ui/web/ui/src/scenes/launch_model/components/addModelDialog.js b/xinference/ui/web/ui/src/scenes/launch_model/components/addModelDialog.js
new file mode 100644
index 0000000000..08484e70ba
--- /dev/null
+++ b/xinference/ui/web/ui/src/scenes/launch_model/components/addModelDialog.js
@@ -0,0 +1,228 @@
+
+import {
+    Button,
+    Dialog,
+    DialogActions,
+    DialogContent,
+    DialogTitle,
+    TextField
+} from '@mui/material'
+import React, { useState } from 'react'
+import { useTranslation } from 'react-i18next'
+
+const API_BASE_URL = 'https://model.xinference.io'
+
+const AddModelDialog = ({ open, onClose }) => {
+    const { t } = useTranslation()
+    const [url, setUrl] = useState('')
+    const [loginOpen, setLoginOpen] = useState(false)
+    const [usernameOrEmail, setUsernameOrEmail] = useState('')
+    const [password, setPassword] = useState('')
+    const [pendingModelId, setPendingModelId] = useState(null)
+    const [loading, setLoading] = useState(false)
+    const [errorMsg, setErrorMsg] = useState('')
+
+    const handleClose = (type) => {
+        setErrorMsg('');
+      
+        const actions = {
+          add: onClose,
+          login: () => setLoginOpen(false),
+        };
+      
+        actions[type]?.();
+    };      
+
+    const extractModelId = (input) => {
+        try {
+            const u = new URL(input)
+            const m1 = u.pathname.match(/\/(\d+)(?:\/?$)/)
+            if (m1 && m1[1]) return m1[1]
+            const qp = u.searchParams.get('model_id')
+            if (qp) return qp
+        } catch (e) {
+            const m2 = String(input).match(/(\d+)(?:\/?$)/)
+            if (m2 && m2[1]) return m2[1]
+        }
+        return null
+    }
+
+    const performDownload = async (modelId, token, fromLogin = false) => {
+        const endpoint = `${API_BASE_URL}/api/models/download?model_id=${encodeURIComponent(modelId)}`
+        const headers = token ? { Authorization: `Bearer ${token}` } : {}
+        setLoading(true)
+        setErrorMsg('')
+        try {
+            const res = await fetch(endpoint, {
+                method: 'GET',
+                headers,
+            })
+            if (res.status === 403) {
+                let detailMsg = ''
+                try {
+                    const body = await res.json()
+                    if (body?.error_code === 'MODEL_PRIVATE') {
+                        detailMsg = t('launchModel.error.modelPrivate')
+                    } else if (body?.message) {
+                        detailMsg = body.message
+                    }
+                } catch {
+                    // ignore and use default message
+                }
+
+                if (fromLogin) {
+                    setErrorMsg(detailMsg || t('launchModel.error.noPermissionAfterLogin'))
+                    return
+                } else {
+                    setPendingModelId(modelId)
+                    setLoginOpen(true)
+                    return
+                }
+            }
+            if (!res.ok) {
+                const text = await res.text().catch(() => '')
+                throw new Error(t('launchModel.error.downloadFailed', { status: res.status, text }))
+            }
+            const data = await res.json()
+            console.log('models/download 响应:', data)
+            handleClose('add')
+        } catch (err) {
+            console.error(err)
+            setErrorMsg(err.message || t('launchModel.error.requestFailed'))
+        } finally {
+            setLoading(false)
+        }
+    }
+
+    const handleFormSubmit = async (e) => {
+        e.preventDefault()
+        const modelId = extractModelId(url?.trim())
+        if (!modelId) {
+            setErrorMsg(t('launchModel.error.cannotExtractModelId'))
+            return
+        }
+        await performDownload(modelId)
+    }
+
+    const handleLoginSubmit = async (e) => {
+        e.preventDefault()
+        if (!pendingModelId) return
+        setLoading(true)
+        setErrorMsg('')
+        try {
+            const loginRes = await fetch(`${API_BASE_URL}/api/users/login`, {
+                method: 'POST',
+                headers: { 'Content-Type': 'application/json' },
+                body: JSON.stringify({
+                    usernameOrEmail: usernameOrEmail.trim(),
+                    password: password,
+                }),
+            })
+            if (!loginRes.ok) {
+                const text = await loginRes.text().catch(() => '')
+                throw new Error(t('launchModel.error.loginFailedText', { status: loginRes.status, text }))
+            }
+            const loginJson = await loginRes.json()
+            const token = loginJson.data?.accessToken
+            if (!token) {
+                throw new Error(t('launchModel.error.noTokenAfterLogin'))
+            }
+            handleClose('login')
+            await performDownload(pendingModelId, token, true)
+        } catch (err) {
+            console.error(err)
+            setErrorMsg(err.message || t('launchModel.error.requestFailed'))
+        } finally {
+            setLoading(false)
+        }
+    }
+
+    return (
+        <Dialog open={open} onClose={() => handleClose('add')} width={500}>
+            <DialogTitle>{t('launchModel.addModel')}</DialogTitle>
+            <DialogContent>
+                <div style={{ width: '500px', minHeight: '160px', display: 'flex', flexDirection: 'column', gap: 8 }}>
+                    <div>
+                        {t('launchModel.addModelDialog.introPrefix')}{' '}
+                        <a
+                            href='https://model.xinference.io/models'
+                            target="_blank"
+                            rel="noopener noreferrer"
+                            style={{textDecoration: 'none', color: '#1976d2'}}
+                        >
+                            {t('launchModel.addModelDialog.platformLinkText')}
+                        </a>
+                        {t('launchModel.addModelDialog.introSuffix')}
+                    </div>
+                    <div>
+                        {t('launchModel.addModelDialog.example', {
+                            modelName: 'qwen3',
+                            modelUrl: 'https://model.xinference.io/models/detail/250'
+                        })}
+                    </div>
+                    <form onSubmit={handleFormSubmit} id="subscription-form">
+                        <TextField
+                            autoFocus
+                            required
+                            margin="dense"
+                            id="url"
+                            name="url"
+                            label={t('launchModel.addModelDialog.urlLabel')}
+                            fullWidth
+                            placeholder={t('launchModel.placeholderTip')}
+                            value={url}
+                            onChange={(e) => {
+                                setUrl(e.target.value)
+                            }}
+                            disabled={loading}
+                        />
+                    </form>
+                    {errorMsg && <div style={{ color: '#d32f2f' }}>{errorMsg}</div>}
+                </div>
+            </DialogContent>
+            <DialogActions>
+                <Button onClick={() => handleClose('add')} disabled={loading}>{t('launchModel.cancel')}</Button>
+                <Button autoFocus type="submit" form="subscription-form" disabled={loading}>
+                    {t('launchModel.confirm')}
+                </Button>
+            </DialogActions>
+
+            {/* 403 */}
+            <Dialog open={loginOpen} onClose={() => handleClose('login')}>
+                <DialogTitle>{t('launchModel.loginDialog.title')}</DialogTitle>
+                <DialogContent>
+                    <form onSubmit={handleLoginSubmit} id="login-form" style={{ display: 'flex', flexDirection: 'column', gap: 12, width: 360, paddingTop: 10 }}>
+                        <TextField
+                            required
+                            id="usernameOrEmail"
+                            name="usernameOrEmail"
+                            label={t('launchModel.loginDialog.usernameOrEmail')}
+                            fullWidth
+                            value={usernameOrEmail}
+                            onChange={(e) => setUsernameOrEmail(e.target.value)}
+                            disabled={loading}
+                        />
+                        <TextField
+                            required
+                            id="password"
+                            name="password"
+                            label={t('launchModel.loginDialog.password')}
+                            type="password"
+                            fullWidth
+                            value={password}
+                            onChange={(e) => setPassword(e.target.value)}
+                            disabled={loading}
+                        />
+                    </form>
+                    {errorMsg && <div style={{ color: '#d32f2f', marginTop: 8 }}>{errorMsg}</div>}
+                </DialogContent>
+                <DialogActions>
+                    <Button onClick={() => handleClose('login')} disabled={loading}>{t('launchModel.cancel')}</Button>
+                    <Button type="submit" form="login-form" disabled={loading}>{t('launchModel.loginDialog.login')}</Button>
+                </DialogActions>
+            </Dialog>
+        </Dialog>
+    )
+}
+
+export default AddModelDialog
\ No newline at end of file
diff --git a/xinference/ui/web/ui/src/scenes/launch_model/index.js b/xinference/ui/web/ui/src/scenes/launch_model/index.js
index 24f886a80d..dda8595cbf 100644
--- a/xinference/ui/web/ui/src/scenes/launch_model/index.js
+++ b/xinference/ui/web/ui/src/scenes/launch_model/index.js
@@ -1,5 +1,6 @@
+import Add from '@mui/icons-material/Add'
 import { TabContext, TabList, TabPanel } from '@mui/lab'
-import { Box, Tab } from '@mui/material'
+import { Box, Button, Tab } from '@mui/material'
 import React, { useContext, useEffect, useState } from 'react'
 import { useCookies } from 'react-cookie'
 import { useTranslation } from 'react-i18next'
@@ -11,6 +12,7 @@ import fetchWrapper from '../../components/fetchWrapper'
 import SuccessMessageSnackBar from '../../components/successMessageSnackBar'
 import Title from '../../components/Title'
 import { isValidBearerToken } from '../../components/utils'
+import AddModelDialog from './components/addModelDialog'
 import { featureModels } from './data/data'
 import LaunchCustom from './launchCustom'
 import LaunchModelComponent from './LaunchModel'
@@ -22,6 +24,7 @@ const LaunchModel = () => {
       : '/launch_model/llm'
   )
   const [gpuAvailable, setGPUAvailable] = useState(-1)
+  const [open, setOpen] = useState(false)
 
   const { setErrorMsg } = useContext(ApiContext)
   const [cookie] = useCookies(['token'])
@@ -65,7 +68,15 @@ const LaunchModel = () => {
       <ErrorMessageSnackBar />
       <SuccessMessageSnackBar />
       <TabContext value={value}>
-        <Box sx={{ borderBottom: 1, borderColor: 'divider' }}>
+        <Box
+          sx={{
+            borderBottom: 1,
+            borderColor: 'divider',
+            display: 'flex',
+            justifyContent: 'space-between',
+            alignItems: 'center',
+          }}
+        >
           <TabList value={value} onChange={handleTabChange} aria-label="tabs">
             <Tab label={t('model.languageModels')} value="/launch_model/llm" />
             <Tab
@@ -81,6 +92,9 @@ const LaunchModel = () => {
               value="/launch_model/custom/llm"
             />
           </TabList>
+          <Button variant="outlined" startIcon={<Add />} onClick={() => setOpen(true)}>
+            {t('launchModel.addModel')}
+          </Button>
         </Box>
         <TabPanel value="/launch_model/llm" sx={{ padding: 0 }}>
           <LaunchModelComponent
@@ -142,6 +156,7 @@ const LaunchModel = () => {
           <LaunchCustom gpuAvailable={gpuAvailable} />
         </TabPanel>
       </TabContext>
+      <AddModelDialog open={open} onClose={() => setOpen(false)} />
     </Box>
   )
 }

From 0eab739b3d097bf1522508578d964c6d96c24933 Mon Sep 17 00:00:00 2001
From: yiboyasss <3359595624@qq.com>
Date: Fri, 26 Sep 2025 11:29:59 +0800
Subject: [PATCH 02/25] fix: detail

---
 .../launch_model/components/addModelDialog.js | 449 ++++++++++--------
 .../web/ui/src/scenes/launch_model/index.js   |   6 +-
 2 files changed, 250 insertions(+), 205 deletions(-)

diff --git a/xinference/ui/web/ui/src/scenes/launch_model/components/addModelDialog.js b/xinference/ui/web/ui/src/scenes/launch_model/components/addModelDialog.js
index 08484e70ba..7f6093e8a7 100644
--- a/xinference/ui/web/ui/src/scenes/launch_model/components/addModelDialog.js
+++ b/xinference/ui/web/ui/src/scenes/launch_model/components/addModelDialog.js
@@ -1,11 +1,10 @@
-
 import {
-    Button,
-    Dialog,
-    DialogActions,
-    DialogContent,
-    DialogTitle,
-    TextField
+  Button,
+  Dialog,
+  DialogActions,
+  DialogContent,
+  DialogTitle,
+  TextField,
 } from '@mui/material'
 import React, { useState } from 'react'
 import { useTranslation } from 'react-i18next'
@@ -13,216 +12,258 @@ import { useTranslation } from 'react-i18next'
 const API_BASE_URL = 'https://model.xinference.io'
 
 const AddModelDialog = ({ open, onClose }) => {
-    const { t } = useTranslation()
-    const [url, setUrl] = useState('')
-    const [loginOpen, setLoginOpen] = useState(false)
-    const [usernameOrEmail, setUsernameOrEmail] = useState('')
-    const [password, setPassword] = useState('')
-    const [pendingModelId, setPendingModelId] = useState(null)
-    const [loading, setLoading] = useState(false)
-    const [errorMsg, setErrorMsg] = useState('')
+  const { t } = useTranslation()
+  const [url, setUrl] = useState('')
+  const [loginOpen, setLoginOpen] = useState(false)
+  const [usernameOrEmail, setUsernameOrEmail] = useState('')
+  const [password, setPassword] = useState('')
+  const [pendingModelId, setPendingModelId] = useState(null)
+  const [loading, setLoading] = useState(false)
+  const [errorMsg, setErrorMsg] = useState('')
 
-    const handleClose = (type) => {
-        setErrorMsg('');
-      
-        const actions = {
-          add: onClose,
-          login: () => setLoginOpen(false),
-        };
-      
-        actions[type]?.();
-    };      
+  const handleClose = (type) => {
+    setErrorMsg('')
 
-    const extractModelId = (input) => {
-        try {
-            const u = new URL(input)
-            const m1 = u.pathname.match(/\/(\d+)(?:\/?$)/)
-            if (m1 && m1[1]) return m1[1]
-            const qp = u.searchParams.get('model_id')
-            if (qp) return qp
-        } catch (e) {
-            const m2 = String(input).match(/(\d+)(?:\/?$)/)
-            if (m2 && m2[1]) return m2[1]
-        }
-        return null
+    const actions = {
+      add: onClose,
+      login: () => setLoginOpen(false),
+    }
+
+    actions[type]?.()
+  }
+
+  const extractModelId = (input) => {
+    try {
+      const u = new URL(input)
+      const m1 = u.pathname.match(/\/(\d+)(?:\/?$)/)
+      if (m1 && m1[1]) return m1[1]
+      const qp = u.searchParams.get('model_id')
+      if (qp) return qp
+    } catch (e) {
+      const m2 = String(input).match(/(\d+)(?:\/?$)/)
+      if (m2 && m2[1]) return m2[1]
     }
+    return null
+  }
 
-    const performDownload = async (modelId, token, fromLogin = false) => {
-        const endpoint = `${API_BASE_URL}/api/models/download?model_id=${encodeURIComponent(modelId)}`
-        const headers = token ? { Authorization: `Bearer ${token}` } : {}
-        setLoading(true)
-        setErrorMsg('')
+  const performDownload = async (modelId, token, fromLogin = false) => {
+    const endpoint = `${API_BASE_URL}/api/models/download?model_id=${encodeURIComponent(
+      modelId
+    )}`
+    const headers = token ? { Authorization: `Bearer ${token}` } : {}
+    setLoading(true)
+    setErrorMsg('')
+    try {
+      const res = await fetch(endpoint, {
+        method: 'GET',
+        headers,
+      })
+      if (res.status === 403) {
+        let detailMsg = ''
         try {
-            const res = await fetch(endpoint, {
-                method: 'GET',
-                headers,
-            })
-            if (res.status === 403) {
-                let detailMsg = ''
-                try {
-                    const body = await res.json()
-                    if (body?.error_code === 'MODEL_PRIVATE') {
-                        detailMsg = t('launchModel.error.modelPrivate')
-                    } else if (body?.message) {
-                        detailMsg = body.message
-                    }
-                } catch {
-                    // ignore and use default message
-                }
+          const body = await res.json()
+          if (body?.error_code === 'MODEL_PRIVATE') {
+            detailMsg = t('launchModel.error.modelPrivate')
+          } else if (body?.message) {
+            detailMsg = body.message
+          }
+        } catch {
+          // ignore and use default message
+        }
 
-                if (fromLogin) {
-                    setErrorMsg(detailMsg || t('launchModel.error.noPermissionAfterLogin'))
-                    return
-                } else {
-                    setPendingModelId(modelId)
-                    setLoginOpen(true)
-                    return
-                }
-            }
-            if (!res.ok) {
-                const text = await res.text().catch(() => '')
-                throw new Error(t('launchModel.error.downloadFailed', { status: res.status, text }))
-            }
-            const data = await res.json()
-            console.log('models/download 响应:', data)
-            handleClose('add')
-        } catch (err) {
-            console.error(err)
-            setErrorMsg(err.message || t('launchModel.error.requestFailed'))
-        } finally {
-            setLoading(false)
+        if (fromLogin) {
+          setErrorMsg(
+            detailMsg || t('launchModel.error.noPermissionAfterLogin')
+          )
+          return
+        } else {
+          setPendingModelId(modelId)
+          setLoginOpen(true)
+          return
         }
+      }
+      if (!res.ok) {
+        const text = await res.text().catch(() => '')
+        throw new Error(
+          t('launchModel.error.downloadFailed', { status: res.status, text })
+        )
+      }
+      const data = await res.json()
+      console.log('models/download 响应:', data)
+      handleClose('add')
+    } catch (err) {
+      console.error(err)
+      setErrorMsg(err.message || t('launchModel.error.requestFailed'))
+    } finally {
+      setLoading(false)
     }
+  }
 
-    const handleFormSubmit = async (e) => {
-        e.preventDefault()
-        const modelId = extractModelId(url?.trim())
-        if (!modelId) {
-            setErrorMsg(t('launchModel.error.cannotExtractModelId'))
-            return
-        }
-        await performDownload(modelId)
+  const handleFormSubmit = async (e) => {
+    e.preventDefault()
+    const modelId = extractModelId(url?.trim())
+    if (!modelId) {
+      setErrorMsg(t('launchModel.error.cannotExtractModelId'))
+      return
     }
+    await performDownload(modelId)
+  }
 
-    const handleLoginSubmit = async (e) => {
-        e.preventDefault()
-        if (!pendingModelId) return
-        setLoading(true)
-        setErrorMsg('')
-        try {
-            const loginRes = await fetch(`${API_BASE_URL}/api/users/login`, {
-                method: 'POST',
-                headers: { 'Content-Type': 'application/json' },
-                body: JSON.stringify({
-                    usernameOrEmail: usernameOrEmail.trim(),
-                    password: password,
-                }),
-            })
-            if (!loginRes.ok) {
-                const text = await loginRes.text().catch(() => '')
-                throw new Error(t('launchModel.error.loginFailedText', { status: loginRes.status, text }))
-            }
-            const loginJson = await loginRes.json()
-            const token = loginJson.data?.accessToken
-            if (!token) {
-                throw new Error(t('launchModel.error.noTokenAfterLogin'))
-            }
-            handleClose('login')
-            await performDownload(pendingModelId, token, true)
-        } catch (err) {
-            console.error(err)
-            setErrorMsg(err.message || t('launchModel.error.requestFailed'))
-        } finally {
-            setLoading(false)
-        }
+  const handleLoginSubmit = async (e) => {
+    e.preventDefault()
+    if (!pendingModelId) return
+    setLoading(true)
+    setErrorMsg('')
+    try {
+      const loginRes = await fetch(`${API_BASE_URL}/api/users/login`, {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({
+          usernameOrEmail: usernameOrEmail.trim(),
+          password: password,
+        }),
+      })
+      if (!loginRes.ok) {
+        const text = await loginRes.text().catch(() => '')
+        throw new Error(
+          t('launchModel.error.loginFailedText', {
+            status: loginRes.status,
+            text,
+          })
+        )
+      }
+      const loginJson = await loginRes.json()
+      const token = loginJson.data?.accessToken
+      if (!token) {
+        throw new Error(t('launchModel.error.noTokenAfterLogin'))
+      }
+      handleClose('login')
+      await performDownload(pendingModelId, token, true)
+    } catch (err) {
+      console.error(err)
+      setErrorMsg(err.message || t('launchModel.error.requestFailed'))
+    } finally {
+      setLoading(false)
     }
+  }
 
-    return (
-        <Dialog open={open} onClose={() => handleClose('add')} width={500}>
-            <DialogTitle>{t('launchModel.addModel')}</DialogTitle>
-            <DialogContent>
-                <div style={{ width: '500px', minHeight: '160px', display: 'flex', flexDirection: 'column', gap: 8 }}>
-                    <div>
-                        {t('launchModel.addModelDialog.introPrefix')}{' '}
-                        <a
-                            href='https://model.xinference.io/models'
-                            target="_blank"
-                            rel="noopener noreferrer"
-                            style={{textDecoration: 'none', color: '#1976d2'}}
-                        >
-                            {t('launchModel.addModelDialog.platformLinkText')}
-                        </a>
-                        {t('launchModel.addModelDialog.introSuffix')}
-                    </div>
-                    <div>
-                        {t('launchModel.addModelDialog.example', {
-                            modelName: 'qwen3',
-                            modelUrl: 'https://model.xinference.io/models/detail/250'
-                        })}
-                    </div>
-                    <form onSubmit={handleFormSubmit} id="subscription-form">
-                        <TextField
-                            autoFocus
-                            required
-                            margin="dense"
-                            id="url"
-                            name="url"
-                            label={t('launchModel.addModelDialog.urlLabel')}
-                            fullWidth
-                            placeholder={t('launchModel.placeholderTip')}
-                            value={url}
-                            onChange={(e) => {
-                                setUrl(e.target.value)
-                            }}
-                            disabled={loading}
-                        />
-                    </form>
-                    {errorMsg && <div style={{ color: '#d32f2f' }}>{errorMsg}</div>}
-                </div>
-            </DialogContent>
-            <DialogActions>
-                <Button onClick={() => handleClose('add')} disabled={loading}>{t('launchModel.cancel')}</Button>
-                <Button autoFocus type="submit" form="subscription-form" disabled={loading}>
-                    {t('launchModel.confirm')}
-                </Button>
-            </DialogActions>
+  return (
+    <Dialog open={open} onClose={() => handleClose('add')} width={500}>
+      <DialogTitle>{t('launchModel.addModel')}</DialogTitle>
+      <DialogContent>
+        <div
+          style={{
+            width: '500px',
+            minHeight: '160px',
+            display: 'flex',
+            flexDirection: 'column',
+            gap: 8,
+          }}
+        >
+          <div>
+            {t('launchModel.addModelDialog.introPrefix')}{' '}
+            <a
+              href="https://model.xinference.io/models"
+              target="_blank"
+              rel="noopener noreferrer"
+              style={{ textDecoration: 'none', color: '#1976d2' }}
+            >
+              {t('launchModel.addModelDialog.platformLinkText')}
+            </a>
+            {t('launchModel.addModelDialog.introSuffix')}
+          </div>
+          <div>
+            {t('launchModel.addModelDialog.example', {
+              modelName: 'qwen3',
+              modelUrl: 'https://model.xinference.io/models/detail/250',
+            })}
+          </div>
+          <form onSubmit={handleFormSubmit} id="subscription-form">
+            <TextField
+              autoFocus
+              required
+              margin="dense"
+              id="url"
+              name="url"
+              label={t('launchModel.addModelDialog.urlLabel')}
+              fullWidth
+              placeholder={t('launchModel.placeholderTip')}
+              value={url}
+              onChange={(e) => {
+                setUrl(e.target.value)
+              }}
+              disabled={loading}
+            />
+          </form>
+          {errorMsg && <div style={{ color: '#d32f2f' }}>{errorMsg}</div>}
+        </div>
+      </DialogContent>
+      <DialogActions>
+        <Button onClick={() => handleClose('add')} disabled={loading}>
+          {t('launchModel.cancel')}
+        </Button>
+        <Button
+          autoFocus
+          type="submit"
+          form="subscription-form"
+          disabled={loading}
+        >
+          {t('launchModel.confirm')}
+        </Button>
+      </DialogActions>
 
-            {/* 403 */}
-            <Dialog open={loginOpen} onClose={() => handleClose('login')}>
-                <DialogTitle>{t('launchModel.loginDialog.title')}</DialogTitle>
-                <DialogContent>
-                    <form onSubmit={handleLoginSubmit} id="login-form" style={{ display: 'flex', flexDirection: 'column', gap: 12, width: 360, paddingTop: 10 }}>
-                        <TextField
-                            required
-                            id="usernameOrEmail"
-                            name="usernameOrEmail"
-                            label={t('launchModel.loginDialog.usernameOrEmail')}
-                            fullWidth
-                            value={usernameOrEmail}
-                            onChange={(e) => setUsernameOrEmail(e.target.value)}
-                            disabled={loading}
-                        />
-                        <TextField
-                            required
-                            id="password"
-                            name="password"
-                            label={t('launchModel.loginDialog.password')}
-                            type="password"
-                            fullWidth
-                            value={password}
-                            onChange={(e) => setPassword(e.target.value)}
-                            disabled={loading}
-                        />
-                    </form>
-                    {errorMsg && <div style={{ color: '#d32f2f', marginTop: 8 }}>{errorMsg}</div>}
-                </DialogContent>
-                <DialogActions>
-                    <Button onClick={() => handleClose('login')} disabled={loading}>{t('launchModel.cancel')}</Button>
-                    <Button type="submit" form="login-form" disabled={loading}>{t('launchModel.loginDialog.login')}</Button>
-                </DialogActions>
-            </Dialog>
-        </Dialog>
-    )
+      {/* 403 */}
+      <Dialog open={loginOpen} onClose={() => handleClose('login')}>
+        <DialogTitle>{t('launchModel.loginDialog.title')}</DialogTitle>
+        <DialogContent>
+          <form
+            onSubmit={handleLoginSubmit}
+            id="login-form"
+            style={{
+              display: 'flex',
+              flexDirection: 'column',
+              gap: 12,
+              width: 360,
+              paddingTop: 10,
+            }}
+          >
+            <TextField
+              required
+              id="usernameOrEmail"
+              name="usernameOrEmail"
+              label={t('launchModel.loginDialog.usernameOrEmail')}
+              fullWidth
+              value={usernameOrEmail}
+              onChange={(e) => setUsernameOrEmail(e.target.value)}
+              disabled={loading}
+            />
+            <TextField
+              required
+              id="password"
+              name="password"
+              label={t('launchModel.loginDialog.password')}
+              type="password"
+              fullWidth
+              value={password}
+              onChange={(e) => setPassword(e.target.value)}
+              disabled={loading}
+            />
+          </form>
+          {errorMsg && (
+            <div style={{ color: '#d32f2f', marginTop: 8 }}>{errorMsg}</div>
+          )}
+        </DialogContent>
+        <DialogActions>
+          <Button onClick={() => handleClose('login')} disabled={loading}>
+            {t('launchModel.cancel')}
+          </Button>
+          <Button type="submit" form="login-form" disabled={loading}>
+            {t('launchModel.loginDialog.login')}
+          </Button>
+        </DialogActions>
+      </Dialog>
+    </Dialog>
+  )
 }
 
-export default AddModelDialog
\ No newline at end of file
+export default AddModelDialog
diff --git a/xinference/ui/web/ui/src/scenes/launch_model/index.js b/xinference/ui/web/ui/src/scenes/launch_model/index.js
index dda8595cbf..e1cfd1b0e1 100644
--- a/xinference/ui/web/ui/src/scenes/launch_model/index.js
+++ b/xinference/ui/web/ui/src/scenes/launch_model/index.js
@@ -92,7 +92,11 @@ const LaunchModel = () => {
               value="/launch_model/custom/llm"
             />
           </TabList>
-          <Button variant="outlined" startIcon={<Add />} onClick={() => setOpen(true)}>
+          <Button
+            variant="outlined"
+            startIcon={<Add />}
+            onClick={() => setOpen(true)}
+          >
             {t('launchModel.addModel')}
           </Button>
         </Box>

From 3f859be262be4a91f5e2cd78fa578bf422b69070 Mon Sep 17 00:00:00 2001
From: yiboyasss <3359595624@qq.com>
Date: Mon, 20 Oct 2025 11:08:58 +0800
Subject: [PATCH 03/25] fix: login dialog

---
 .../launch_model/components/addModelDialog.js | 185 +++++++++---------
 1 file changed, 91 insertions(+), 94 deletions(-)

diff --git a/xinference/ui/web/ui/src/scenes/launch_model/components/addModelDialog.js b/xinference/ui/web/ui/src/scenes/launch_model/components/addModelDialog.js
index 7f6093e8a7..9f0c58d6f0 100644
--- a/xinference/ui/web/ui/src/scenes/launch_model/components/addModelDialog.js
+++ b/xinference/ui/web/ui/src/scenes/launch_model/components/addModelDialog.js
@@ -6,7 +6,7 @@ import {
   DialogTitle,
   TextField,
 } from '@mui/material'
-import React, { useState } from 'react'
+import React, { useEffect, useRef, useState } from 'react'
 import { useTranslation } from 'react-i18next'
 
 const API_BASE_URL = 'https://model.xinference.io'
@@ -15,11 +15,10 @@ const AddModelDialog = ({ open, onClose }) => {
   const { t } = useTranslation()
   const [url, setUrl] = useState('')
   const [loginOpen, setLoginOpen] = useState(false)
-  const [usernameOrEmail, setUsernameOrEmail] = useState('')
-  const [password, setPassword] = useState('')
   const [pendingModelId, setPendingModelId] = useState(null)
   const [loading, setLoading] = useState(false)
   const [errorMsg, setErrorMsg] = useState('')
+  const loginIframeRef = useRef(null)
 
   const handleClose = (type) => {
     setErrorMsg('')
@@ -46,11 +45,17 @@ const AddModelDialog = ({ open, onClose }) => {
     return null
   }
 
-  const performDownload = async (modelId, token, fromLogin = false) => {
+  // 修改：download 默认从 sessionStorage 读取 token（若传参提供则优先）
+  // performDownload：收到 token 后直连接口，获取 JSON
+  const performDownload = async (modelId, tokenFromParam, fromLogin = false) => {
     const endpoint = `${API_BASE_URL}/api/models/download?model_id=${encodeURIComponent(
       modelId
     )}`
-    const headers = token ? { Authorization: `Bearer ${token}` } : {}
+    const effectiveToken =
+      tokenFromParam ||
+      sessionStorage.getItem('model_hub_token') ||
+      localStorage.getItem('io_login_success')
+    const headers = effectiveToken ? { Authorization: `Bearer ${effectiveToken}` } : {}
     setLoading(true)
     setErrorMsg('')
     try {
@@ -58,6 +63,44 @@ const AddModelDialog = ({ open, onClose }) => {
         method: 'GET',
         headers,
       })
+
+      if (res.status === 401) {
+        const refreshToken = sessionStorage.getItem('model_hub_refresh_token')
+        if (!refreshToken) {
+          sessionStorage.removeItem('model_hub_token')
+          setPendingModelId(modelId)
+          setLoginOpen(true)
+          return
+        }
+        try {
+          const refreshRes = await fetch(`${API_BASE_URL}/api/users/refresh`, {
+            method: 'POST',
+            headers: { 'Content-Type': 'application/json' },
+            body: JSON.stringify({ token: refreshToken }),
+          })
+          if (!refreshRes.ok) {
+            throw new Error(`refresh failed: ${refreshRes.status}`)
+          }
+          const refreshData = await refreshRes.json().catch(() => ({}))
+          const newToken = refreshData?.data?.accessToken
+          if (newToken) {
+            sessionStorage.setItem('model_hub_token', newToken)
+            await performDownload(modelId, newToken, false)
+            return
+          } else {
+            sessionStorage.removeItem('model_hub_token')
+            setPendingModelId(modelId)
+            setLoginOpen(true)
+            return
+          }
+        } catch (e) {
+          sessionStorage.removeItem('model_hub_token')
+          setPendingModelId(modelId)
+          setLoginOpen(true)
+          return
+        }
+      }
+
       if (res.status === 403) {
         let detailMsg = ''
         try {
@@ -68,13 +111,11 @@ const AddModelDialog = ({ open, onClose }) => {
             detailMsg = body.message
           }
         } catch {
-          // ignore and use default message
+          console.log('');
+          
         }
-
         if (fromLogin) {
-          setErrorMsg(
-            detailMsg || t('launchModel.error.noPermissionAfterLogin')
-          )
+          setErrorMsg(detailMsg || t('launchModel.error.noPermissionAfterLogin'))
           return
         } else {
           setPendingModelId(modelId)
@@ -82,6 +123,7 @@ const AddModelDialog = ({ open, onClose }) => {
           return
         }
       }
+
       if (!res.ok) {
         const text = await res.text().catch(() => '')
         throw new Error(
@@ -109,43 +151,26 @@ const AddModelDialog = ({ open, onClose }) => {
     await performDownload(modelId)
   }
 
-  const handleLoginSubmit = async (e) => {
-    e.preventDefault()
-    if (!pendingModelId) return
-    setLoading(true)
-    setErrorMsg('')
-    try {
-      const loginRes = await fetch(`${API_BASE_URL}/api/users/login`, {
-        method: 'POST',
-        headers: { 'Content-Type': 'application/json' },
-        body: JSON.stringify({
-          usernameOrEmail: usernameOrEmail.trim(),
-          password: password,
-        }),
-      })
-      if (!loginRes.ok) {
-        const text = await loginRes.text().catch(() => '')
-        throw new Error(
-          t('launchModel.error.loginFailedText', {
-            status: loginRes.status,
-            text,
-          })
-        )
-      }
-      const loginJson = await loginRes.json()
-      const token = loginJson.data?.accessToken
-      if (!token) {
-        throw new Error(t('launchModel.error.noTokenAfterLogin'))
+  useEffect(() => {
+    const listener = (event) => {
+      if (event.origin !== API_BASE_URL) return
+      const { type, token, refresh_token } = event.data || {}
+
+      if (type === 'io_login_success' && token && refresh_token) {
+        handleClose('login')
+        sessionStorage.setItem('model_hub_token', token)
+        sessionStorage.setItem('model_hub_refresh_token', refresh_token)
+        if (pendingModelId) {
+          void performDownload(pendingModelId, token, true)
+        }
       }
-      handleClose('login')
-      await performDownload(pendingModelId, token, true)
-    } catch (err) {
-      console.error(err)
-      setErrorMsg(err.message || t('launchModel.error.requestFailed'))
-    } finally {
-      setLoading(false)
     }
-  }
+
+    window.addEventListener('message', listener)
+    return () => {
+      window.removeEventListener('message', listener)
+    }
+  }, [pendingModelId])
 
   return (
     <Dialog open={open} onClose={() => handleClose('add')} width={500}>
@@ -163,7 +188,7 @@ const AddModelDialog = ({ open, onClose }) => {
           <div>
             {t('launchModel.addModelDialog.introPrefix')}{' '}
             <a
-              href="https://model.xinference.io/models"
+              href={`${API_BASE_URL}/models`}
               target="_blank"
               rel="noopener noreferrer"
               style={{ textDecoration: 'none', color: '#1976d2' }}
@@ -212,55 +237,27 @@ const AddModelDialog = ({ open, onClose }) => {
         </Button>
       </DialogActions>
 
-      {/* 403 */}
       <Dialog open={loginOpen} onClose={() => handleClose('login')}>
-        <DialogTitle>{t('launchModel.loginDialog.title')}</DialogTitle>
-        <DialogContent>
-          <form
-            onSubmit={handleLoginSubmit}
-            id="login-form"
-            style={{
-              display: 'flex',
-              flexDirection: 'column',
-              gap: 12,
-              width: 360,
-              paddingTop: 10,
-            }}
-          >
-            <TextField
-              required
-              id="usernameOrEmail"
-              name="usernameOrEmail"
-              label={t('launchModel.loginDialog.usernameOrEmail')}
-              fullWidth
-              value={usernameOrEmail}
-              onChange={(e) => setUsernameOrEmail(e.target.value)}
-              disabled={loading}
-            />
-            <TextField
-              required
-              id="password"
-              name="password"
-              label={t('launchModel.loginDialog.password')}
-              type="password"
-              fullWidth
-              value={password}
-              onChange={(e) => setPassword(e.target.value)}
-              disabled={loading}
-            />
-          </form>
-          {errorMsg && (
-            <div style={{ color: '#d32f2f', marginTop: 8 }}>{errorMsg}</div>
-          )}
-        </DialogContent>
-        <DialogActions>
-          <Button onClick={() => handleClose('login')} disabled={loading}>
-            {t('launchModel.cancel')}
-          </Button>
-          <Button type="submit" form="login-form" disabled={loading}>
-            {t('launchModel.loginDialog.login')}
-          </Button>
-        </DialogActions>
+        <div
+          style={{
+            width: '100%',
+            maxWidth: 640,
+            padding: 16,
+            boxSizing: 'border-box',
+          }}
+        >
+          <iframe
+            ref={loginIframeRef}
+            src={`${API_BASE_URL}/signin`}
+            title="Model Platform Signin"
+            style={{ width: '100%', minHeight: 520, border: 0 }}
+          />
+          <div style={{ display: 'flex', justifyContent: 'flex-end', marginTop: 12 }}>
+            <Button onClick={() => handleClose('login')} disabled={loading}>
+              关闭
+            </Button>
+          </div>
+        </div>
       </Dialog>
     </Dialog>
   )

From 542abc5d693ced006f0162a90e866996ec628c95 Mon Sep 17 00:00:00 2001
From: yiboyasss <3359595624@qq.com>
Date: Mon, 20 Oct 2025 11:09:26 +0800
Subject: [PATCH 04/25] fix: detail

---
 .../launch_model/components/addModelDialog.js | 25 ++++++++++++++-----
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/xinference/ui/web/ui/src/scenes/launch_model/components/addModelDialog.js b/xinference/ui/web/ui/src/scenes/launch_model/components/addModelDialog.js
index 9f0c58d6f0..5791d7e364 100644
--- a/xinference/ui/web/ui/src/scenes/launch_model/components/addModelDialog.js
+++ b/xinference/ui/web/ui/src/scenes/launch_model/components/addModelDialog.js
@@ -47,7 +47,11 @@ const AddModelDialog = ({ open, onClose }) => {
 
   // 修改：download 默认从 sessionStorage 读取 token（若传参提供则优先）
   // performDownload：收到 token 后直连接口，获取 JSON
-  const performDownload = async (modelId, tokenFromParam, fromLogin = false) => {
+  const performDownload = async (
+    modelId,
+    tokenFromParam,
+    fromLogin = false
+  ) => {
     const endpoint = `${API_BASE_URL}/api/models/download?model_id=${encodeURIComponent(
       modelId
     )}`
@@ -55,7 +59,9 @@ const AddModelDialog = ({ open, onClose }) => {
       tokenFromParam ||
       sessionStorage.getItem('model_hub_token') ||
       localStorage.getItem('io_login_success')
-    const headers = effectiveToken ? { Authorization: `Bearer ${effectiveToken}` } : {}
+    const headers = effectiveToken
+      ? { Authorization: `Bearer ${effectiveToken}` }
+      : {}
     setLoading(true)
     setErrorMsg('')
     try {
@@ -111,11 +117,12 @@ const AddModelDialog = ({ open, onClose }) => {
             detailMsg = body.message
           }
         } catch {
-          console.log('');
-          
+          console.log('')
         }
         if (fromLogin) {
-          setErrorMsg(detailMsg || t('launchModel.error.noPermissionAfterLogin'))
+          setErrorMsg(
+            detailMsg || t('launchModel.error.noPermissionAfterLogin')
+          )
           return
         } else {
           setPendingModelId(modelId)
@@ -252,7 +259,13 @@ const AddModelDialog = ({ open, onClose }) => {
             title="Model Platform Signin"
             style={{ width: '100%', minHeight: 520, border: 0 }}
           />
-          <div style={{ display: 'flex', justifyContent: 'flex-end', marginTop: 12 }}>
+          <div
+            style={{
+              display: 'flex',
+              justifyContent: 'flex-end',
+              marginTop: 12,
+            }}
+          >
             <Button onClick={() => handleClose('login')} disabled={loading}>
               关闭
             </Button>

From 668502fd94c45ee4df5ac358af64741b823325c8 Mon Sep 17 00:00:00 2001
From: OliverBryant <2713999266@qq.com>
Date: Wed, 22 Oct 2025 15:45:53 +0800
Subject: [PATCH 05/25] =?UTF-8?q?FEAT=EF=BC=9Aadd=20model=20backend?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 xinference/api/restful_api.py |  46 ++++++
 xinference/core/supervisor.py | 273 ++++++++++++++++++++++++++++++++++
 2 files changed, 319 insertions(+)

diff --git a/xinference/api/restful_api.py b/xinference/api/restful_api.py
index 84c7b18d80..8dea5ab6c8 100644
--- a/xinference/api/restful_api.py
+++ b/xinference/api/restful_api.py
@@ -198,6 +198,11 @@ class RegisterModelRequest(BaseModel):
     persist: bool
 
 
+class AddModelRequest(BaseModel):
+    model_type: str
+    model_json: Dict[str, Any]
+
+
 class BuildGradioInterfaceRequest(BaseModel):
     model_type: str
     model_name: str
@@ -900,6 +905,16 @@ async def internal_exception_handler(request: Request, exc: Exception):
                 else None
             ),
         )
+        self._router.add_api_route(
+            "/v1/models/add",
+            self.add_model,
+            methods=["POST"],
+            dependencies=(
+                [Security(self._auth_service, scopes=["models:add"])]
+                if self.is_authenticated()
+                else None
+            ),
+        )
         self._router.add_api_route(
             "/v1/cache/models",
             self.list_cached_models,
@@ -3123,6 +3138,37 @@ async def unregister_model(self, model_type: str, model_name: str) -> JSONRespon
             raise HTTPException(status_code=500, detail=str(e))
         return JSONResponse(content=None)
 
+    async def add_model(self, request: Request) -> JSONResponse:
+        try:
+            # Parse request
+            raw_json = await request.json()
+            logger.info(f"[DEBUG] add_model API received raw JSON: {json.dumps(raw_json, indent=2)}")
+
+            body = AddModelRequest.parse_obj(raw_json)
+            model_type = body.model_type
+            model_json = body.model_json
+
+            logger.info(f"[DEBUG] Parsed request - model_type: {model_type}")
+            logger.info(f"[DEBUG] Parsed request - model_json keys: {list(model_json.keys())}")
+            logger.info(f"[DEBUG] model_name from JSON: {model_json.get('model_name', 'NOT_FOUND')}")
+
+            # Call supervisor
+            supervisor_ref = await self._get_supervisor_ref()
+            logger.info(f"[DEBUG] Got supervisor ref: {supervisor_ref}")
+
+            await supervisor_ref.add_model(model_type, model_json)
+
+            logger.info(f"[DEBUG] Supervisor add_model completed successfully")
+
+        except ValueError as re:
+            logger.error(f"[DEBUG] ValueError in add_model API: {re}", exc_info=True)
+            raise HTTPException(status_code=400, detail=str(re))
+        except Exception as e:
+            logger.error(f"[DEBUG] Unexpected error in add_model API: {e}", exc_info=True)
+            raise HTTPException(status_code=500, detail=str(e))
+
+        return JSONResponse(content={"message": f"Model added successfully for type: {model_type}"})
+
     async def list_model_registrations(
         self, model_type: str, detailed: bool = Query(False)
     ) -> JSONResponse:
diff --git a/xinference/core/supervisor.py b/xinference/core/supervisor.py
index 1ed96cd703..ade4830035 100644
--- a/xinference/core/supervisor.py
+++ b/xinference/core/supervisor.py
@@ -14,6 +14,7 @@
 
 import asyncio
 import itertools
+import json
 import os
 import signal
 import time
@@ -932,6 +933,278 @@ async def register_model(
         else:
             raise ValueError(f"Unsupported model type: {model_type}")
 
+    @log_async(logger=logger)
+    async def add_model(self, model_type: str, model_json: Dict[str, Any]):
+        """
+        Add a new model by parsing the provided JSON and registering it.
+
+        Args:
+            model_type: Type of model (LLM, embedding, image, etc.)
+            model_json: JSON configuration for the model
+        """
+        logger.info(f"[DEBUG] Supervisor add_model called with model_type: {model_type}")
+        logger.info(f"[DEBUG] Supervisor add_model received JSON with keys: {list(model_json.keys())}")
+        logger.info(f"[DEBUG] JSON content: {json.dumps(model_json, indent=2)}")
+
+        # Validate model type
+        supported_types = list(self._custom_register_type_to_cls.keys())
+        logger.info(f"[DEBUG] Supported model types: {supported_types}")
+
+        if model_type not in self._custom_register_type_to_cls:
+            logger.error(f"[DEBUG] Unsupported model type: {model_type}")
+            raise ValueError(
+                f"Unsupported model type '{model_type}'. "
+                f"Supported types are: {', '.join(supported_types)}"
+            )
+
+        logger.info(f"[DEBUG] Model type validation passed for: {model_type}")
+
+        # Get the appropriate model class and register function
+        (
+            model_spec_cls,
+            register_fn,
+            unregister_fn,
+            generate_fn,
+        ) = self._custom_register_type_to_cls[model_type]
+
+        logger.info(f"[DEBUG] Got model spec class: {model_spec_cls}")
+        logger.info(f"[DEBUG] Got register function: {register_fn}")
+        logger.info(f"[DEBUG] Got unregister function: {unregister_fn}")
+        logger.info(f"[DEBUG] Got generate function: {generate_fn}")
+
+        # Validate required fields
+        required_fields = ["model_name", "model_specs"]
+        logger.info(f"[DEBUG] Checking required fields: {required_fields}")
+
+        for field in required_fields:
+            if field not in model_json:
+                logger.error(f"[DEBUG] Missing required field: {field}")
+                raise ValueError(f"Missing required field: {field}")
+            logger.info(f"[DEBUG] Field {field} found: {type(model_json[field])}")
+
+        # Validate model name format
+        from ..model.utils import is_valid_model_name
+        model_name = model_json["model_name"]
+        logger.info(f"[DEBUG] Validating model name: {model_name}")
+
+        if not is_valid_model_name(model_name):
+            logger.error(f"[DEBUG] Invalid model name format: {model_name}")
+            raise ValueError(f"Invalid model name format: {model_name}")
+
+        logger.info(f"[DEBUG] Model name validation passed")
+
+        # Convert model hub JSON format to Xinference expected format
+        try:
+            logger.info(f"[DEBUG] Converting model JSON format if needed...")
+            converted_model_json = self._convert_model_json_format(model_json)
+            logger.info(f"[DEBUG] JSON conversion completed successfully")
+        except Exception as e:
+            logger.error(f"[DEBUG] JSON conversion failed: {e}", exc_info=True)
+            raise ValueError(f"Failed to convert model JSON format: {str(e)}")
+
+        # Parse the JSON into the appropriate model spec
+        try:
+            logger.info(f"[DEBUG] Attempting to parse converted JSON with {model_spec_cls}")
+            model_spec = model_spec_cls.parse_obj(converted_model_json)
+            logger.info(f"[DEBUG] JSON parsing successful, model_spec created: {model_spec}")
+        except Exception as e:
+            logger.error(f"[DEBUG] JSON parsing failed: {e}", exc_info=True)
+            raise ValueError(f"Invalid model JSON format: {str(e)}")
+
+        # Check if model already exists
+        try:
+            logger.info(f"[DEBUG] Checking if model '{model_spec.model_name}' already exists")
+            existing_model = await self.get_model_registration(
+                model_type, model_spec.model_name
+            )
+            logger.info(f"[DEBUG] Existing model check result: {existing_model}")
+
+            if existing_model is not None:
+                logger.error(f"[DEBUG] Model already exists: {model_spec.model_name}")
+                raise ValueError(
+                    f"Model '{model_spec.model_name}' already exists for type '{model_type}'. "
+                    f"Please choose a different model name or remove the existing model first."
+                )
+
+            logger.info(f"[DEBUG] Model does not exist, can proceed with registration")
+
+        except ValueError as e:
+            if "not found" in str(e):
+                # Model doesn't exist, we can proceed
+                logger.info(f"[DEBUG] Model not found (expected): {e}")
+                pass
+            else:
+                # Re-raise validation errors
+                logger.error(f"[DEBUG] ValueError during model existence check: {e}")
+                raise e
+        except Exception as ex:
+            logger.error(f"[DEBUG] Unexpected error checking model registration for '{model_spec.model_name}': {ex}", exc_info=True)
+            raise ValueError(f"Failed to validate model registration: {str(ex)}")
+
+        # Register the model (persist=True for adding models)
+        try:
+            logger.info(f"[DEBUG] Starting model registration process")
+            logger.info(f"[DEBUG] Calling register_fn with persist=True")
+
+            register_fn(model_spec, persist=True)
+            logger.info(f"[DEBUG] register_fn completed successfully")
+
+            # Record model version
+            logger.info(f"[DEBUG] Generating version info")
+            version_info = generate_fn(model_spec)
+            logger.info(f"[DEBUG] Version info generated: {version_info}")
+
+            logger.info(f"[DEBUG] Recording model version to cache tracker")
+            await self._cache_tracker_ref.record_model_version(
+                version_info, self.address
+            )
+            logger.info(f"[DEBUG] Model version recorded successfully")
+
+            # Sync to workers if not local deployment
+            is_local = self.is_local_deployment()
+            logger.info(f"[DEBUG] Is local deployment: {is_local}")
+
+            if not is_local:
+                logger.info(f"[DEBUG] Syncing model to workers")
+                await self._sync_register_model(
+                    model_type, converted_model_json, True, model_spec.model_name
+                )
+                logger.info(f"[DEBUG] Model synced to workers successfully")
+
+            logger.info(f"Successfully added model '{model_spec.model_name}' (type: {model_type})")
+            logger.info(f"[DEBUG] add_model process completed successfully")
+
+        except ValueError as e:
+            # Validation errors - don't need cleanup as model wasn't registered
+            logger.error(f"[DEBUG] Validation error during model registration: {e}")
+            raise e
+        except Exception as e:
+            # Unexpected errors - attempt cleanup
+            logger.error(f"[DEBUG] Unexpected error during model registration: {e}", exc_info=True)
+            try:
+                logger.info(f"[DEBUG] Attempting cleanup of failed registration")
+                unregister_fn(model_spec.model_name, raise_error=False)
+                logger.info(f"[DEBUG] Cleanup completed successfully")
+            except Exception as cleanup_error:
+                logger.warning(f"[DEBUG] Cleanup failed: {cleanup_error}")
+            raise ValueError(f"Failed to register model '{model_spec.model_name}': {str(e)}")
+
+    def _convert_model_json_format(self, model_json: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Convert model hub JSON format to Xinference expected format.
+
+        The input format uses nested 'model_src' structure, but Xinference expects
+        flattened fields at the spec level.
+        """
+        logger.info(f"[DEBUG] Starting JSON format conversion")
+
+        # Check if conversion is needed (detect model_src structure)
+        needs_conversion = False
+        for spec in model_json["model_specs"]:
+            if "model_src" in spec:
+                needs_conversion = True
+                break
+
+        if not needs_conversion:
+            logger.info(f"[DEBUG] No conversion needed, JSON is already in expected format")
+            return model_json
+
+        logger.info(f"[DEBUG] Converting model_src nested structure to flattened format")
+
+        converted = model_json.copy()
+        converted_specs = []
+
+        for spec in model_json["model_specs"]:
+            model_format = spec["model_format"]
+            model_size = spec["model_size_in_billions"]
+
+            logger.info(f"[DEBUG] Processing spec: {model_format} - {model_size}B")
+
+            if "model_src" not in spec:
+                # No model_src, keep spec as is but ensure required fields
+                converted_spec = spec.copy()
+                if "quantization" not in converted_spec:
+                    converted_spec["quantization"] = "none"  # Default
+                converted_specs.append(converted_spec)
+                continue
+
+            model_src = spec["model_src"]
+
+            # Handle different model sources
+            if "huggingface" in model_src:
+                hf_info = model_src["huggingface"]
+                quantizations = hf_info.get("quantizations", ["none"])
+
+                logger.info(f"[DEBUG] Found {len(quantizations)} quantizations for {model_format}")
+
+                # Create separate specs for each quantization
+                for quant in quantizations:
+                    converted_spec = {
+                        "model_format": model_format,
+                        "model_size_in_billions": model_size,
+                        "quantization": quant,
+                        "model_hub": "huggingface",
+                    }
+
+                    # Add common fields
+                    if "model_id" in hf_info:
+                        converted_spec["model_id"] = hf_info["model_id"]
+                    if "model_revision" in hf_info:
+                        converted_spec["model_revision"] = hf_info["model_revision"]
+
+                    # Format-specific fields
+                    if model_format == "ggufv2":
+                        if "model_id" in hf_info:
+                            converted_spec["model_id"] = hf_info["model_id"]
+                        if "model_file_name_template" in hf_info:
+                            converted_spec["model_file_name_template"] = hf_info["model_file_name_template"]
+                        else:
+                            # Default template
+                            model_name = model_json["model_name"]
+                            converted_spec["model_file_name_template"] = f"{model_name}-{{quantization}}.gguf"
+                    elif model_format in ["pytorch", "mlx"]:
+                        if "model_id" in hf_info:
+                            converted_spec["model_id"] = hf_info["model_id"]
+                        if "model_revision" in hf_info:
+                            converted_spec["model_revision"] = hf_info["model_revision"]
+
+                    converted_specs.append(converted_spec)
+                    logger.debug(f"[DEBUG] Created spec: {model_format} - {quant}")
+
+            elif "modelscope" in model_src:
+                # Handle ModelScope similarly
+                ms_info = model_src["modelscope"]
+                quantizations = ms_info.get("quantizations", ["none"])
+
+                for quant in quantizations:
+                    converted_spec = {
+                        "model_format": model_format,
+                        "model_size_in_billions": model_size,
+                        "quantization": quant,
+                        "model_hub": "modelscope",
+                    }
+
+                    if "model_id" in ms_info:
+                        converted_spec["model_id"] = ms_info["model_id"]
+                    if "model_revision" in ms_info:
+                        converted_spec["model_revision"] = ms_info["model_revision"]
+
+                    converted_specs.append(converted_spec)
+
+            else:
+                # Unknown model source, skip or handle as error
+                logger.warning(f"[DEBUG] Unknown model source in spec: {list(model_src.keys())}")
+                # Keep original spec but add required fields
+                converted_spec = spec.copy()
+                if "quantization" not in converted_spec:
+                    converted_spec["quantization"] = "none"
+                converted_specs.append(converted_spec)
+
+        converted["model_specs"] = converted_specs
+        logger.info(f"[DEBUG] Conversion completed: {len(model_json['model_specs'])} -> {len(converted_specs)} specs")
+
+        return converted
+
     async def _sync_register_model(
         self, model_type: str, model: str, persist: bool, model_name: str
     ):

From 3cc4aa1a67b59fe5c84406d99fd54cb15a0872aa Mon Sep 17 00:00:00 2001
From: OliverBryant <2713999266@qq.com>
Date: Wed, 22 Oct 2025 15:52:13 +0800
Subject: [PATCH 06/25] =?UTF-8?q?FEAT=EF=BC=9Aadd=20model=20backend?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 xinference/api/restful_api.py | 20 +++++++---
 xinference/core/supervisor.py | 71 ++++++++++++++++++++++++++---------
 2 files changed, 69 insertions(+), 22 deletions(-)

diff --git a/xinference/api/restful_api.py b/xinference/api/restful_api.py
index 8dea5ab6c8..57a623ed5d 100644
--- a/xinference/api/restful_api.py
+++ b/xinference/api/restful_api.py
@@ -3142,15 +3142,21 @@ async def add_model(self, request: Request) -> JSONResponse:
         try:
             # Parse request
             raw_json = await request.json()
-            logger.info(f"[DEBUG] add_model API received raw JSON: {json.dumps(raw_json, indent=2)}")
+            logger.info(
+                f"[DEBUG] add_model API received raw JSON: {json.dumps(raw_json, indent=2)}"
+            )
 
             body = AddModelRequest.parse_obj(raw_json)
             model_type = body.model_type
             model_json = body.model_json
 
             logger.info(f"[DEBUG] Parsed request - model_type: {model_type}")
-            logger.info(f"[DEBUG] Parsed request - model_json keys: {list(model_json.keys())}")
-            logger.info(f"[DEBUG] model_name from JSON: {model_json.get('model_name', 'NOT_FOUND')}")
+            logger.info(
+                f"[DEBUG] Parsed request - model_json keys: {list(model_json.keys())}"
+            )
+            logger.info(
+                f"[DEBUG] model_name from JSON: {model_json.get('model_name', 'NOT_FOUND')}"
+            )
 
             # Call supervisor
             supervisor_ref = await self._get_supervisor_ref()
@@ -3164,10 +3170,14 @@ async def add_model(self, request: Request) -> JSONResponse:
             logger.error(f"[DEBUG] ValueError in add_model API: {re}", exc_info=True)
             raise HTTPException(status_code=400, detail=str(re))
         except Exception as e:
-            logger.error(f"[DEBUG] Unexpected error in add_model API: {e}", exc_info=True)
+            logger.error(
+                f"[DEBUG] Unexpected error in add_model API: {e}", exc_info=True
+            )
             raise HTTPException(status_code=500, detail=str(e))
 
-        return JSONResponse(content={"message": f"Model added successfully for type: {model_type}"})
+        return JSONResponse(
+            content={"message": f"Model added successfully for type: {model_type}"}
+        )
 
     async def list_model_registrations(
         self, model_type: str, detailed: bool = Query(False)
diff --git a/xinference/core/supervisor.py b/xinference/core/supervisor.py
index ade4830035..33b728653c 100644
--- a/xinference/core/supervisor.py
+++ b/xinference/core/supervisor.py
@@ -942,8 +942,12 @@ async def add_model(self, model_type: str, model_json: Dict[str, Any]):
             model_type: Type of model (LLM, embedding, image, etc.)
             model_json: JSON configuration for the model
         """
-        logger.info(f"[DEBUG] Supervisor add_model called with model_type: {model_type}")
-        logger.info(f"[DEBUG] Supervisor add_model received JSON with keys: {list(model_json.keys())}")
+        logger.info(
+            f"[DEBUG] Supervisor add_model called with model_type: {model_type}"
+        )
+        logger.info(
+            f"[DEBUG] Supervisor add_model received JSON with keys: {list(model_json.keys())}"
+        )
         logger.info(f"[DEBUG] JSON content: {json.dumps(model_json, indent=2)}")
 
         # Validate model type
@@ -984,6 +988,7 @@ async def add_model(self, model_type: str, model_json: Dict[str, Any]):
 
         # Validate model name format
         from ..model.utils import is_valid_model_name
+
         model_name = model_json["model_name"]
         logger.info(f"[DEBUG] Validating model name: {model_name}")
 
@@ -1004,16 +1009,22 @@ async def add_model(self, model_type: str, model_json: Dict[str, Any]):
 
         # Parse the JSON into the appropriate model spec
         try:
-            logger.info(f"[DEBUG] Attempting to parse converted JSON with {model_spec_cls}")
+            logger.info(
+                f"[DEBUG] Attempting to parse converted JSON with {model_spec_cls}"
+            )
             model_spec = model_spec_cls.parse_obj(converted_model_json)
-            logger.info(f"[DEBUG] JSON parsing successful, model_spec created: {model_spec}")
+            logger.info(
+                f"[DEBUG] JSON parsing successful, model_spec created: {model_spec}"
+            )
         except Exception as e:
             logger.error(f"[DEBUG] JSON parsing failed: {e}", exc_info=True)
             raise ValueError(f"Invalid model JSON format: {str(e)}")
 
         # Check if model already exists
         try:
-            logger.info(f"[DEBUG] Checking if model '{model_spec.model_name}' already exists")
+            logger.info(
+                f"[DEBUG] Checking if model '{model_spec.model_name}' already exists"
+            )
             existing_model = await self.get_model_registration(
                 model_type, model_spec.model_name
             )
@@ -1038,7 +1049,10 @@ async def add_model(self, model_type: str, model_json: Dict[str, Any]):
                 logger.error(f"[DEBUG] ValueError during model existence check: {e}")
                 raise e
         except Exception as ex:
-            logger.error(f"[DEBUG] Unexpected error checking model registration for '{model_spec.model_name}': {ex}", exc_info=True)
+            logger.error(
+                f"[DEBUG] Unexpected error checking model registration for '{model_spec.model_name}': {ex}",
+                exc_info=True,
+            )
             raise ValueError(f"Failed to validate model registration: {str(ex)}")
 
         # Register the model (persist=True for adding models)
@@ -1066,12 +1080,16 @@ async def add_model(self, model_type: str, model_json: Dict[str, Any]):
 
             if not is_local:
                 logger.info(f"[DEBUG] Syncing model to workers")
+                # Convert back to JSON string for sync compatibility
+                model_json_str = json.dumps(converted_model_json)
                 await self._sync_register_model(
-                    model_type, converted_model_json, True, model_spec.model_name
+                    model_type, model_json_str, True, model_spec.model_name
                 )
                 logger.info(f"[DEBUG] Model synced to workers successfully")
 
-            logger.info(f"Successfully added model '{model_spec.model_name}' (type: {model_type})")
+            logger.info(
+                f"Successfully added model '{model_spec.model_name}' (type: {model_type})"
+            )
             logger.info(f"[DEBUG] add_model process completed successfully")
 
         except ValueError as e:
@@ -1080,14 +1098,19 @@ async def add_model(self, model_type: str, model_json: Dict[str, Any]):
             raise e
         except Exception as e:
             # Unexpected errors - attempt cleanup
-            logger.error(f"[DEBUG] Unexpected error during model registration: {e}", exc_info=True)
+            logger.error(
+                f"[DEBUG] Unexpected error during model registration: {e}",
+                exc_info=True,
+            )
             try:
                 logger.info(f"[DEBUG] Attempting cleanup of failed registration")
                 unregister_fn(model_spec.model_name, raise_error=False)
                 logger.info(f"[DEBUG] Cleanup completed successfully")
             except Exception as cleanup_error:
                 logger.warning(f"[DEBUG] Cleanup failed: {cleanup_error}")
-            raise ValueError(f"Failed to register model '{model_spec.model_name}': {str(e)}")
+            raise ValueError(
+                f"Failed to register model '{model_spec.model_name}': {str(e)}"
+            )
 
     def _convert_model_json_format(self, model_json: Dict[str, Any]) -> Dict[str, Any]:
         """
@@ -1106,10 +1129,14 @@ def _convert_model_json_format(self, model_json: Dict[str, Any]) -> Dict[str, An
                 break
 
         if not needs_conversion:
-            logger.info(f"[DEBUG] No conversion needed, JSON is already in expected format")
+            logger.info(
+                f"[DEBUG] No conversion needed, JSON is already in expected format"
+            )
             return model_json
 
-        logger.info(f"[DEBUG] Converting model_src nested structure to flattened format")
+        logger.info(
+            f"[DEBUG] Converting model_src nested structure to flattened format"
+        )
 
         converted = model_json.copy()
         converted_specs = []
@@ -1135,7 +1162,9 @@ def _convert_model_json_format(self, model_json: Dict[str, Any]) -> Dict[str, An
                 hf_info = model_src["huggingface"]
                 quantizations = hf_info.get("quantizations", ["none"])
 
-                logger.info(f"[DEBUG] Found {len(quantizations)} quantizations for {model_format}")
+                logger.info(
+                    f"[DEBUG] Found {len(quantizations)} quantizations for {model_format}"
+                )
 
                 # Create separate specs for each quantization
                 for quant in quantizations:
@@ -1157,11 +1186,15 @@ def _convert_model_json_format(self, model_json: Dict[str, Any]) -> Dict[str, An
                         if "model_id" in hf_info:
                             converted_spec["model_id"] = hf_info["model_id"]
                         if "model_file_name_template" in hf_info:
-                            converted_spec["model_file_name_template"] = hf_info["model_file_name_template"]
+                            converted_spec["model_file_name_template"] = hf_info[
+                                "model_file_name_template"
+                            ]
                         else:
                             # Default template
                             model_name = model_json["model_name"]
-                            converted_spec["model_file_name_template"] = f"{model_name}-{{quantization}}.gguf"
+                            converted_spec["model_file_name_template"] = (
+                                f"{model_name}-{{quantization}}.gguf"
+                            )
                     elif model_format in ["pytorch", "mlx"]:
                         if "model_id" in hf_info:
                             converted_spec["model_id"] = hf_info["model_id"]
@@ -1193,7 +1226,9 @@ def _convert_model_json_format(self, model_json: Dict[str, Any]) -> Dict[str, An
 
             else:
                 # Unknown model source, skip or handle as error
-                logger.warning(f"[DEBUG] Unknown model source in spec: {list(model_src.keys())}")
+                logger.warning(
+                    f"[DEBUG] Unknown model source in spec: {list(model_src.keys())}"
+                )
                 # Keep original spec but add required fields
                 converted_spec = spec.copy()
                 if "quantization" not in converted_spec:
@@ -1201,7 +1236,9 @@ def _convert_model_json_format(self, model_json: Dict[str, Any]) -> Dict[str, An
                 converted_specs.append(converted_spec)
 
         converted["model_specs"] = converted_specs
-        logger.info(f"[DEBUG] Conversion completed: {len(model_json['model_specs'])} -> {len(converted_specs)} specs")
+        logger.info(
+            f"[DEBUG] Conversion completed: {len(model_json['model_specs'])} -> {len(converted_specs)} specs"
+        )
 
         return converted
 

From 79ad0d02e54136e9aac898f22056878607296d12 Mon Sep 17 00:00:00 2001
From: OliverBryant <2713999266@qq.com>
Date: Wed, 22 Oct 2025 18:23:06 +0800
Subject: [PATCH 07/25] remove model_specs verify

---
 xinference/api/restful_api.py |  21 +------
 xinference/core/supervisor.py | 111 ++++++----------------------------
 2 files changed, 21 insertions(+), 111 deletions(-)

diff --git a/xinference/api/restful_api.py b/xinference/api/restful_api.py
index 57a623ed5d..97534aaaa9 100644
--- a/xinference/api/restful_api.py
+++ b/xinference/api/restful_api.py
@@ -3142,37 +3142,20 @@ async def add_model(self, request: Request) -> JSONResponse:
         try:
             # Parse request
             raw_json = await request.json()
-            logger.info(
-                f"[DEBUG] add_model API received raw JSON: {json.dumps(raw_json, indent=2)}"
-            )
 
             body = AddModelRequest.parse_obj(raw_json)
             model_type = body.model_type
             model_json = body.model_json
 
-            logger.info(f"[DEBUG] Parsed request - model_type: {model_type}")
-            logger.info(
-                f"[DEBUG] Parsed request - model_json keys: {list(model_json.keys())}"
-            )
-            logger.info(
-                f"[DEBUG] model_name from JSON: {model_json.get('model_name', 'NOT_FOUND')}"
-            )
-
             # Call supervisor
             supervisor_ref = await self._get_supervisor_ref()
-            logger.info(f"[DEBUG] Got supervisor ref: {supervisor_ref}")
-
             await supervisor_ref.add_model(model_type, model_json)
 
-            logger.info(f"[DEBUG] Supervisor add_model completed successfully")
-
         except ValueError as re:
-            logger.error(f"[DEBUG] ValueError in add_model API: {re}", exc_info=True)
+            logger.error(f"ValueError in add_model API: {re}", exc_info=True)
             raise HTTPException(status_code=400, detail=str(re))
         except Exception as e:
-            logger.error(
-                f"[DEBUG] Unexpected error in add_model API: {e}", exc_info=True
-            )
+            logger.error(f"Unexpected error in add_model API: {e}", exc_info=True)
             raise HTTPException(status_code=500, detail=str(e))
 
         return JSONResponse(
diff --git a/xinference/core/supervisor.py b/xinference/core/supervisor.py
index 33b728653c..098c29a385 100644
--- a/xinference/core/supervisor.py
+++ b/xinference/core/supervisor.py
@@ -942,27 +942,15 @@ async def add_model(self, model_type: str, model_json: Dict[str, Any]):
             model_type: Type of model (LLM, embedding, image, etc.)
             model_json: JSON configuration for the model
         """
-        logger.info(
-            f"[DEBUG] Supervisor add_model called with model_type: {model_type}"
-        )
-        logger.info(
-            f"[DEBUG] Supervisor add_model received JSON with keys: {list(model_json.keys())}"
-        )
-        logger.info(f"[DEBUG] JSON content: {json.dumps(model_json, indent=2)}")
-
         # Validate model type
         supported_types = list(self._custom_register_type_to_cls.keys())
-        logger.info(f"[DEBUG] Supported model types: {supported_types}")
 
         if model_type not in self._custom_register_type_to_cls:
-            logger.error(f"[DEBUG] Unsupported model type: {model_type}")
             raise ValueError(
                 f"Unsupported model type '{model_type}'. "
                 f"Supported types are: {', '.join(supported_types)}"
             )
 
-        logger.info(f"[DEBUG] Model type validation passed for: {model_type}")
-
         # Get the appropriate model class and register function
         (
             model_spec_cls,
@@ -971,143 +959,85 @@ async def add_model(self, model_type: str, model_json: Dict[str, Any]):
             generate_fn,
         ) = self._custom_register_type_to_cls[model_type]
 
-        logger.info(f"[DEBUG] Got model spec class: {model_spec_cls}")
-        logger.info(f"[DEBUG] Got register function: {register_fn}")
-        logger.info(f"[DEBUG] Got unregister function: {unregister_fn}")
-        logger.info(f"[DEBUG] Got generate function: {generate_fn}")
-
-        # Validate required fields
-        required_fields = ["model_name", "model_specs"]
-        logger.info(f"[DEBUG] Checking required fields: {required_fields}")
-
+        # Validate required fields (only model_name is required)
+        required_fields = ["model_name"]
         for field in required_fields:
             if field not in model_json:
-                logger.error(f"[DEBUG] Missing required field: {field}")
                 raise ValueError(f"Missing required field: {field}")
-            logger.info(f"[DEBUG] Field {field} found: {type(model_json[field])}")
-
         # Validate model name format
         from ..model.utils import is_valid_model_name
 
         model_name = model_json["model_name"]
-        logger.info(f"[DEBUG] Validating model name: {model_name}")
 
         if not is_valid_model_name(model_name):
-            logger.error(f"[DEBUG] Invalid model name format: {model_name}")
             raise ValueError(f"Invalid model name format: {model_name}")
 
-        logger.info(f"[DEBUG] Model name validation passed")
-
         # Convert model hub JSON format to Xinference expected format
         try:
-            logger.info(f"[DEBUG] Converting model JSON format if needed...")
             converted_model_json = self._convert_model_json_format(model_json)
-            logger.info(f"[DEBUG] JSON conversion completed successfully")
         except Exception as e:
-            logger.error(f"[DEBUG] JSON conversion failed: {e}", exc_info=True)
             raise ValueError(f"Failed to convert model JSON format: {str(e)}")
 
         # Parse the JSON into the appropriate model spec
         try:
-            logger.info(
-                f"[DEBUG] Attempting to parse converted JSON with {model_spec_cls}"
-            )
             model_spec = model_spec_cls.parse_obj(converted_model_json)
-            logger.info(
-                f"[DEBUG] JSON parsing successful, model_spec created: {model_spec}"
-            )
         except Exception as e:
-            logger.error(f"[DEBUG] JSON parsing failed: {e}", exc_info=True)
             raise ValueError(f"Invalid model JSON format: {str(e)}")
 
         # Check if model already exists
         try:
-            logger.info(
-                f"[DEBUG] Checking if model '{model_spec.model_name}' already exists"
-            )
             existing_model = await self.get_model_registration(
                 model_type, model_spec.model_name
             )
-            logger.info(f"[DEBUG] Existing model check result: {existing_model}")
 
             if existing_model is not None:
-                logger.error(f"[DEBUG] Model already exists: {model_spec.model_name}")
                 raise ValueError(
                     f"Model '{model_spec.model_name}' already exists for type '{model_type}'. "
                     f"Please choose a different model name or remove the existing model first."
                 )
 
-            logger.info(f"[DEBUG] Model does not exist, can proceed with registration")
-
         except ValueError as e:
             if "not found" in str(e):
                 # Model doesn't exist, we can proceed
-                logger.info(f"[DEBUG] Model not found (expected): {e}")
                 pass
             else:
                 # Re-raise validation errors
-                logger.error(f"[DEBUG] ValueError during model existence check: {e}")
                 raise e
         except Exception as ex:
-            logger.error(
-                f"[DEBUG] Unexpected error checking model registration for '{model_spec.model_name}': {ex}",
-                exc_info=True,
-            )
             raise ValueError(f"Failed to validate model registration: {str(ex)}")
 
         # Register the model (persist=True for adding models)
         try:
-            logger.info(f"[DEBUG] Starting model registration process")
-            logger.info(f"[DEBUG] Calling register_fn with persist=True")
-
             register_fn(model_spec, persist=True)
-            logger.info(f"[DEBUG] register_fn completed successfully")
 
             # Record model version
-            logger.info(f"[DEBUG] Generating version info")
             version_info = generate_fn(model_spec)
-            logger.info(f"[DEBUG] Version info generated: {version_info}")
-
-            logger.info(f"[DEBUG] Recording model version to cache tracker")
             await self._cache_tracker_ref.record_model_version(
                 version_info, self.address
             )
-            logger.info(f"[DEBUG] Model version recorded successfully")
 
             # Sync to workers if not local deployment
             is_local = self.is_local_deployment()
-            logger.info(f"[DEBUG] Is local deployment: {is_local}")
-
             if not is_local:
-                logger.info(f"[DEBUG] Syncing model to workers")
                 # Convert back to JSON string for sync compatibility
                 model_json_str = json.dumps(converted_model_json)
                 await self._sync_register_model(
                     model_type, model_json_str, True, model_spec.model_name
                 )
-                logger.info(f"[DEBUG] Model synced to workers successfully")
 
             logger.info(
                 f"Successfully added model '{model_spec.model_name}' (type: {model_type})"
             )
-            logger.info(f"[DEBUG] add_model process completed successfully")
 
         except ValueError as e:
             # Validation errors - don't need cleanup as model wasn't registered
-            logger.error(f"[DEBUG] Validation error during model registration: {e}")
             raise e
         except Exception as e:
             # Unexpected errors - attempt cleanup
-            logger.error(
-                f"[DEBUG] Unexpected error during model registration: {e}",
-                exc_info=True,
-            )
             try:
-                logger.info(f"[DEBUG] Attempting cleanup of failed registration")
                 unregister_fn(model_spec.model_name, raise_error=False)
-                logger.info(f"[DEBUG] Cleanup completed successfully")
             except Exception as cleanup_error:
-                logger.warning(f"[DEBUG] Cleanup failed: {cleanup_error}")
+                logger.warning(f"Cleanup failed: {cleanup_error}")
             raise ValueError(
                 f"Failed to register model '{model_spec.model_name}': {str(e)}"
             )
@@ -1118,8 +1048,22 @@ def _convert_model_json_format(self, model_json: Dict[str, Any]) -> Dict[str, An
 
         The input format uses nested 'model_src' structure, but Xinference expects
         flattened fields at the spec level.
+
+        Also handles cases where model_specs field is missing by providing a default.
         """
-        logger.info(f"[DEBUG] Starting JSON format conversion")
+        # If model_specs is missing, provide a default minimal spec
+        if "model_specs" not in model_json or not model_json["model_specs"]:
+            # Create a minimal default spec
+            return {
+                **model_json,
+                "model_specs": [
+                    {
+                        "model_format": "pytorch",
+                        "model_size_in_billions": None,
+                        "quantization": "none",
+                    }
+                ],
+            }
 
         # Check if conversion is needed (detect model_src structure)
         needs_conversion = False
@@ -1129,15 +1073,8 @@ def _convert_model_json_format(self, model_json: Dict[str, Any]) -> Dict[str, An
                 break
 
         if not needs_conversion:
-            logger.info(
-                f"[DEBUG] No conversion needed, JSON is already in expected format"
-            )
             return model_json
 
-        logger.info(
-            f"[DEBUG] Converting model_src nested structure to flattened format"
-        )
-
         converted = model_json.copy()
         converted_specs = []
 
@@ -1145,8 +1082,6 @@ def _convert_model_json_format(self, model_json: Dict[str, Any]) -> Dict[str, An
             model_format = spec["model_format"]
             model_size = spec["model_size_in_billions"]
 
-            logger.info(f"[DEBUG] Processing spec: {model_format} - {model_size}B")
-
             if "model_src" not in spec:
                 # No model_src, keep spec as is but ensure required fields
                 converted_spec = spec.copy()
@@ -1162,10 +1097,6 @@ def _convert_model_json_format(self, model_json: Dict[str, Any]) -> Dict[str, An
                 hf_info = model_src["huggingface"]
                 quantizations = hf_info.get("quantizations", ["none"])
 
-                logger.info(
-                    f"[DEBUG] Found {len(quantizations)} quantizations for {model_format}"
-                )
-
                 # Create separate specs for each quantization
                 for quant in quantizations:
                     converted_spec = {
@@ -1202,7 +1133,6 @@ def _convert_model_json_format(self, model_json: Dict[str, Any]) -> Dict[str, An
                             converted_spec["model_revision"] = hf_info["model_revision"]
 
                     converted_specs.append(converted_spec)
-                    logger.debug(f"[DEBUG] Created spec: {model_format} - {quant}")
 
             elif "modelscope" in model_src:
                 # Handle ModelScope similarly
@@ -1227,7 +1157,7 @@ def _convert_model_json_format(self, model_json: Dict[str, Any]) -> Dict[str, An
             else:
                 # Unknown model source, skip or handle as error
                 logger.warning(
-                    f"[DEBUG] Unknown model source in spec: {list(model_src.keys())}"
+                    f"Unknown model source in spec: {list(model_src.keys())}"
                 )
                 # Keep original spec but add required fields
                 converted_spec = spec.copy()
@@ -1236,9 +1166,6 @@ def _convert_model_json_format(self, model_json: Dict[str, Any]) -> Dict[str, An
                 converted_specs.append(converted_spec)
 
         converted["model_specs"] = converted_specs
-        logger.info(
-            f"[DEBUG] Conversion completed: {len(model_json['model_specs'])} -> {len(converted_specs)} specs"
-        )
 
         return converted
 

From 424ec5ef85aff4ae889728d793157613f63adfca Mon Sep 17 00:00:00 2001
From: OliverBryant <2713999266@qq.com>
Date: Thu, 23 Oct 2025 09:47:40 +0800
Subject: [PATCH 08/25] model_size_in_billions

---
 xinference/core/supervisor.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/xinference/core/supervisor.py b/xinference/core/supervisor.py
index 098c29a385..6e5a10368f 100644
--- a/xinference/core/supervisor.py
+++ b/xinference/core/supervisor.py
@@ -1079,14 +1079,16 @@ def _convert_model_json_format(self, model_json: Dict[str, Any]) -> Dict[str, An
         converted_specs = []
 
         for spec in model_json["model_specs"]:
-            model_format = spec["model_format"]
-            model_size = spec["model_size_in_billions"]
+            model_format = spec.get("model_format", "pytorch")
+            model_size = spec.get("model_size_in_billions")
 
             if "model_src" not in spec:
                 # No model_src, keep spec as is but ensure required fields
                 converted_spec = spec.copy()
                 if "quantization" not in converted_spec:
-                    converted_spec["quantization"] = "none"  # Default
+                    converted_spec["quantization"] = "none"
+                if "model_format" not in converted_spec:
+                    converted_spec["model_format"] = "pytorch"
                 converted_specs.append(converted_spec)
                 continue
 
@@ -1163,6 +1165,8 @@ def _convert_model_json_format(self, model_json: Dict[str, Any]) -> Dict[str, An
                 converted_spec = spec.copy()
                 if "quantization" not in converted_spec:
                     converted_spec["quantization"] = "none"
+                if "model_format" not in converted_spec:
+                    converted_spec["model_format"] = "pytorch"
                 converted_specs.append(converted_spec)
 
         converted["model_specs"] = converted_specs

From 05a7b06a5f91e0d09f2bbcea5eade74c9a94418a Mon Sep 17 00:00:00 2001
From: yiboyasss <3359595624@qq.com>
Date: Thu, 23 Oct 2025 14:52:05 +0800
Subject: [PATCH 09/25] fix: frontend

---
 xinference/ui/web/ui/src/locales/en.json      |  27 +-
 xinference/ui/web/ui/src/locales/ja.json      |  29 +-
 xinference/ui/web/ui/src/locales/ko.json      |  25 +-
 xinference/ui/web/ui/src/locales/zh.json      |  25 +-
 .../launch_model/components/addModelDialog.js | 300 +++++++-----------
 .../web/ui/src/scenes/launch_model/index.js   |  63 +++-
 6 files changed, 199 insertions(+), 270 deletions(-)

diff --git a/xinference/ui/web/ui/src/locales/en.json b/xinference/ui/web/ui/src/locales/en.json
index b39f1e2939..437fb45a5c 100644
--- a/xinference/ui/web/ui/src/locales/en.json
+++ b/xinference/ui/web/ui/src/locales/en.json
@@ -127,26 +127,19 @@
     "mustBeUnique": "{{key}} must be unique",
     "addModel": "Add Model",
     "addModelDialog": {
-      "introPrefix": "To add a model, please use",
-      "platformLinkText": "Model Management Platform",
-      "introSuffix": " and paste the model's URL",
-      "example": "Example: The URL for {{modelName}} on the platform is {{modelUrl}}",
-      "urlLabel": "URL"
-    },
-    "loginDialog": {
-      "title": "No permission to download this model. Please log in and try again.",
-      "usernameOrEmail": "Username or Email",
-      "password": "Password",
-      "login": "Login"
+      "introPrefix": "To add a model, please go to the",
+      "platformLinkText": "Xinference Model Hub",
+      "introSuffix": "and fill in the corresponding model name.",
+      "modelName": "Model Name",
+      "modelName.tip": "Please enter the model name",
+      "placeholder": "e.g. qwen3 (case-sensitive)"
     },
+    "update": "Update",
     "error": {
-      "cannotExtractModelId": "Unable to extract model_id from URL. Please check your input.",
-      "downloadFailed": "Download failed: {{status}} {{text}}",
+      "name_not_matched": "No exact model name match found (case-sensitive)",
+      "downloadFailed": "Download failed",
       "requestFailed": "Request failed",
-      "loginFailedText": "Login failed: {{status}} {{text}}",
-      "noTokenAfterLogin": "Login succeeded but no token was returned",
-      "modelPrivate": "This model is private and requires download permission.",
-      "noPermissionAfterLogin": "The logged-in account does not have permission to download this model. Please contact the administrator or use a different account."
+      "json_parse_error": "Failed to parse JSON"
     }
   },
 
diff --git a/xinference/ui/web/ui/src/locales/ja.json b/xinference/ui/web/ui/src/locales/ja.json
index 2dd70bc1ab..e4075f9e1d 100644
--- a/xinference/ui/web/ui/src/locales/ja.json
+++ b/xinference/ui/web/ui/src/locales/ja.json
@@ -127,26 +127,19 @@
     "mustBeUnique": "{{key}} は一意でなければなりません",
     "addModel": "モデルを追加",
     "addModelDialog": {
-      "introPrefix": "モデルを追加するには",
-      "platformLinkText": "モデル管理プラットフォーム",
-      "introSuffix": "に基づき、対応するURLを入力してください",
-      "example": "例：{{modelName}} のモデル管理プラットフォーム上のURLは {{modelUrl}} です",
-      "urlLabel": "URL"
-    },
-    "loginDialog": {
-      "title": "このモデルをダウンロードする権限がありません。ログイン後に再度お試しください",
-      "usernameOrEmail": "ユーザー名またはメールアドレス",
-      "password": "パスワード",
-      "login": "ログイン"
+      "introPrefix": "モデルを追加するには、",
+      "platformLinkText": "Xinference モデルセンター",
+      "introSuffix": "で対応するモデル名を入力してください。",
+      "modelName": "モデル名",
+      "modelName.tip": "モデル名を入力してください",
+      "placeholder": "例：qwen3（大文字と小文字を区別します）"
     },
+    "update": "更新",
     "error": {
-      "cannotExtractModelId": "URLから model_id を抽出できません。入力内容を確認してください",
-      "downloadFailed": "ダウンロード失敗: {{status}} {{text}}",
-      "requestFailed": "リクエスト失敗",
-      "loginFailedText": "ログイン失敗: {{status}} {{text}}",
-      "noTokenAfterLogin": "ログインは成功しましたが、トークンを取得できませんでした",
-      "modelPrivate": "このモデルは非公開であり、ダウンロード権限が必要です。",
-      "noPermissionAfterLogin": "このアカウントにはモデルをダウンロードする権限がありません。管理者に連絡するか、別のアカウントを使用してください。"
+      "name_not_matched": "完全に一致するモデル名が見つかりません（大文字と小文字を区別します）",
+      "downloadFailed": "ダウンロードに失敗しました",
+      "requestFailed": "リクエストに失敗しました",
+      "json_parse_error": "JSON の解析に失敗しました"
     }
   },
 
diff --git a/xinference/ui/web/ui/src/locales/ko.json b/xinference/ui/web/ui/src/locales/ko.json
index f6eeb9b51d..36fd0cd0c2 100644
--- a/xinference/ui/web/ui/src/locales/ko.json
+++ b/xinference/ui/web/ui/src/locales/ko.json
@@ -128,25 +128,18 @@
     "addModel": "모델 추가",
     "addModelDialog": {
       "introPrefix": "모델을 추가하려면",
-      "platformLinkText": "모델 관리 플랫폼",
-      "introSuffix": "을(를) 기반으로 해당 URL을 입력하세요",
-      "example": "예: {{modelName}}의 모델 관리 플랫폼 URL은 {{modelUrl}} 입니다",
-      "urlLabel": "URL"
-    },
-    "loginDialog": {
-      "title": "이 모델을 다운로드할 권한이 없습니다. 로그인 후 다시 시도하세요",
-      "usernameOrEmail": "사용자 이름 또는 이메일",
-      "password": "비밀번호",
-      "login": "로그인"
+      "platformLinkText": "Xinference 모델 센터",
+      "introSuffix": "에서 해당 모델 이름을 입력하세요.",
+      "modelName": "모델 이름",
+      "modelName.tip": "모델 이름을 입력하세요",
+      "placeholder": "예: qwen3 (대소문자를 구분합니다)"
     },
+    "update": "업데이트",
     "error": {
-      "cannotExtractModelId": "URL에서 model_id를 추출할 수 없습니다. 입력을 확인하세요",
-      "downloadFailed": "다운로드 실패: {{status}} {{text}}",
+      "name_not_matched": "완전히 일치하는 모델 이름을 찾을 수 없습니다(대소문자 구분)",
+      "downloadFailed": "다운로드 실패",
       "requestFailed": "요청 실패",
-      "loginFailedText": "로그인 실패: {{status}} {{text}}",
-      "noTokenAfterLogin": "로그인은 성공했지만 토큰을 가져오지 못했습니다",
-      "modelPrivate": "이 모델은 비공개이며 다운로드 권한이 필요합니다.",
-      "noPermissionAfterLogin": "이 계정에는 해당 모델을 다운로드할 권한이 없습니다. 관리자에게 문의하거나 다른 계정을 사용하세요."
+      "json_parse_error": "JSON 구문 분석에 실패했습니다"
     }
   },
 
diff --git a/xinference/ui/web/ui/src/locales/zh.json b/xinference/ui/web/ui/src/locales/zh.json
index 066781855a..3a0a1d7a19 100644
--- a/xinference/ui/web/ui/src/locales/zh.json
+++ b/xinference/ui/web/ui/src/locales/zh.json
@@ -128,25 +128,18 @@
     "addModel": "添加模型",
     "addModelDialog": {
       "introPrefix": "添加模型需基于",
-      "platformLinkText": "模型管理平台",
-      "introSuffix": "，填写模型对应的 URL",
-      "example": "例：{{modelName}}在模型管理平台上对应的 URL 如下 {{modelUrl}}",
-      "urlLabel": "URL"
-    },
-    "loginDialog": {
-      "title": "暂无权限下载该模型，登录后重新尝试下载",
-      "usernameOrEmail": "用户名或邮箱",
-      "password": "密码",
-      "login": "登录"
+      "platformLinkText": "Xinference 模型中心",
+      "introSuffix": "，填写模型对应的名称",
+      "modelName": "模型名称",
+      "modelName.tip": "请输入模型名称",
+      "placeholder": "例如：qwen3（需大小写完全匹配）"
     },
+    "update": "更新",
     "error": {
-      "cannotExtractModelId": "无法从 URL 中提取 model_id，请检查输入",
-      "downloadFailed": "下载失败: {{status}} {{text}}",
+      "name_not_matched": "未找到完全匹配的模型名称（需大小写一致）",
+      "downloadFailed": "下载失败",
       "requestFailed": "请求失败",
-      "loginFailedText": "登录失败: {{status}} {{text}}",
-      "noTokenAfterLogin": "登录成功但未获取到 token",
-      "modelPrivate": "该模型为私有，需要具有下载权限。",
-      "noPermissionAfterLogin": "该登录账户暂无权限下载该模型，请联系管理员或更换账户。"
+      "json_parse_error": "JSON 解析失败"
     }
   },
 
diff --git a/xinference/ui/web/ui/src/scenes/launch_model/components/addModelDialog.js b/xinference/ui/web/ui/src/scenes/launch_model/components/addModelDialog.js
index 5791d7e364..af38bcffba 100644
--- a/xinference/ui/web/ui/src/scenes/launch_model/components/addModelDialog.js
+++ b/xinference/ui/web/ui/src/scenes/launch_model/components/addModelDialog.js
@@ -6,181 +6,134 @@ import {
   DialogTitle,
   TextField,
 } from '@mui/material'
-import React, { useEffect, useRef, useState } from 'react'
+import React, { useContext, useState } from 'react'
 import { useTranslation } from 'react-i18next'
 
+import { ApiContext } from '../../../components/apiContext'
+
 const API_BASE_URL = 'https://model.xinference.io'
 
-const AddModelDialog = ({ open, onClose }) => {
+function AddModelDialog({ open, onClose }) {
   const { t } = useTranslation()
-  const [url, setUrl] = useState('')
-  const [loginOpen, setLoginOpen] = useState(false)
-  const [pendingModelId, setPendingModelId] = useState(null)
+  const [modelName, setModelName] = useState('')
   const [loading, setLoading] = useState(false)
-  const [errorMsg, setErrorMsg] = useState('')
-  const loginIframeRef = useRef(null)
-
-  const handleClose = (type) => {
-    setErrorMsg('')
+  const { endPoint, setErrorMsg } = useContext(ApiContext)
 
-    const actions = {
-      add: onClose,
-      login: () => setLoginOpen(false),
+  const searchModelByName = async (name) => {
+    try {
+      const url = `${API_BASE_URL}/api/models?order=featured&query=${encodeURIComponent(
+        name
+      )}&page=1&pageSize=5`
+      const res = await fetch(url, { method: 'GET' })
+      const rawText = await res.text().catch(() => '')
+      if (!res.ok) {
+        setErrorMsg(rawText || `HTTP ${res.status}`)
+        return null
+      }
+      try {
+        const data = JSON.parse(rawText)
+        const items = data?.data || []
+        const exact = items.find((it) => it?.model_name === name)
+        if (!exact) {
+          setErrorMsg(t('launchModel.error.name_not_matched'))
+          return null
+        }
+        const id = exact?.id
+        const modelType = exact?.model_type
+        if (!id || !modelType) {
+          setErrorMsg(t('launchModel.error.downloadFailed'))
+          return null
+        }
+        return { id, modelType }
+      } catch {
+        setErrorMsg(rawText || t('launchModel.error.json_parse_error'))
+        return null
+      }
+    } catch (err) {
+      console.error(err)
+      setErrorMsg(err.message || t('launchModel.error.requestFailed'))
+      return null
     }
-
-    actions[type]?.()
   }
 
-  const extractModelId = (input) => {
+  const fetchModelJson = async (modelId) => {
     try {
-      const u = new URL(input)
-      const m1 = u.pathname.match(/\/(\d+)(?:\/?$)/)
-      if (m1 && m1[1]) return m1[1]
-      const qp = u.searchParams.get('model_id')
-      if (qp) return qp
-    } catch (e) {
-      const m2 = String(input).match(/(\d+)(?:\/?$)/)
-      if (m2 && m2[1]) return m2[1]
+      const res = await fetch(
+        `${API_BASE_URL}/api/models/download?model_id=${encodeURIComponent(
+          modelId
+        )}`,
+        { method: 'GET' }
+      )
+      const rawText = await res.text().catch(() => '')
+      if (!res.ok) {
+        setErrorMsg(rawText || `HTTP ${res.status}`)
+        return null
+      }
+      try {
+        const data = JSON.parse(rawText)
+        return data
+      } catch {
+        setErrorMsg(rawText || t('launchModel.error.json_parse_error'))
+        return null
+      }
+    } catch (err) {
+      console.error(err)
+      setErrorMsg(err.message || t('launchModel.error.requestFailed'))
+      return null
     }
-    return null
   }
 
-  // 修改：download 默认从 sessionStorage 读取 token（若传参提供则优先）
-  // performDownload：收到 token 后直连接口，获取 JSON
-  const performDownload = async (
-    modelId,
-    tokenFromParam,
-    fromLogin = false
-  ) => {
-    const endpoint = `${API_BASE_URL}/api/models/download?model_id=${encodeURIComponent(
-      modelId
-    )}`
-    const effectiveToken =
-      tokenFromParam ||
-      sessionStorage.getItem('model_hub_token') ||
-      localStorage.getItem('io_login_success')
-    const headers = effectiveToken
-      ? { Authorization: `Bearer ${effectiveToken}` }
-      : {}
-    setLoading(true)
-    setErrorMsg('')
+  const addToLocal = async (modelType, modelJson) => {
     try {
-      const res = await fetch(endpoint, {
-        method: 'GET',
-        headers,
+      const res = await fetch(endPoint + '/v1/models/add', {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({ model_type: modelType, model_json: modelJson}),
       })
-
-      if (res.status === 401) {
-        const refreshToken = sessionStorage.getItem('model_hub_refresh_token')
-        if (!refreshToken) {
-          sessionStorage.removeItem('model_hub_token')
-          setPendingModelId(modelId)
-          setLoginOpen(true)
-          return
-        }
-        try {
-          const refreshRes = await fetch(`${API_BASE_URL}/api/users/refresh`, {
-            method: 'POST',
-            headers: { 'Content-Type': 'application/json' },
-            body: JSON.stringify({ token: refreshToken }),
-          })
-          if (!refreshRes.ok) {
-            throw new Error(`refresh failed: ${refreshRes.status}`)
-          }
-          const refreshData = await refreshRes.json().catch(() => ({}))
-          const newToken = refreshData?.data?.accessToken
-          if (newToken) {
-            sessionStorage.setItem('model_hub_token', newToken)
-            await performDownload(modelId, newToken, false)
-            return
-          } else {
-            sessionStorage.removeItem('model_hub_token')
-            setPendingModelId(modelId)
-            setLoginOpen(true)
-            return
-          }
-        } catch (e) {
-          sessionStorage.removeItem('model_hub_token')
-          setPendingModelId(modelId)
-          setLoginOpen(true)
-          return
-        }
+      const rawText = await res.text().catch(() => '')
+      if (!res.ok) {
+        setErrorMsg(rawText || `HTTP ${res.status}`)
+        return
       }
-
-      if (res.status === 403) {
-        let detailMsg = ''
-        try {
-          const body = await res.json()
-          if (body?.error_code === 'MODEL_PRIVATE') {
-            detailMsg = t('launchModel.error.modelPrivate')
-          } else if (body?.message) {
-            detailMsg = body.message
-          }
-        } catch {
-          console.log('')
-        }
-        if (fromLogin) {
-          setErrorMsg(
-            detailMsg || t('launchModel.error.noPermissionAfterLogin')
-          )
-          return
-        } else {
-          setPendingModelId(modelId)
-          setLoginOpen(true)
-          return
-        }
+      try {
+        const data = JSON.parse(rawText)
+        console.log('本地 /v1/models/add 响应:', data)
+      } catch {
+        console.log('本地 /v1/models/add 原始响应:', rawText)
       }
-
-      if (!res.ok) {
-        const text = await res.text().catch(() => '')
-        throw new Error(
-          t('launchModel.error.downloadFailed', { status: res.status, text })
-        )
+    } catch (error) {
+      console.error('Error:', error)
+      if (error?.response?.status !== 403) {
+        setErrorMsg(error.message)
       }
-      const data = await res.json()
-      console.log('models/download 响应:', data)
-      handleClose('add')
-    } catch (err) {
-      console.error(err)
-      setErrorMsg(err.message || t('launchModel.error.requestFailed'))
-    } finally {
-      setLoading(false)
     }
   }
 
   const handleFormSubmit = async (e) => {
     e.preventDefault()
-    const modelId = extractModelId(url?.trim())
-    if (!modelId) {
-      setErrorMsg(t('launchModel.error.cannotExtractModelId'))
+    const name = modelName?.trim()
+    if (!name) {
+      setErrorMsg(t('launchModel.addModelDialog.modelName.tip'))
       return
     }
-    await performDownload(modelId)
-  }
-
-  useEffect(() => {
-    const listener = (event) => {
-      if (event.origin !== API_BASE_URL) return
-      const { type, token, refresh_token } = event.data || {}
+    setLoading(true)
+    setErrorMsg('')
+    try {
+      const found = await searchModelByName(name)
+      if (!found) return
+      const { id, modelType } = found
 
-      if (type === 'io_login_success' && token && refresh_token) {
-        handleClose('login')
-        sessionStorage.setItem('model_hub_token', token)
-        sessionStorage.setItem('model_hub_refresh_token', refresh_token)
-        if (pendingModelId) {
-          void performDownload(pendingModelId, token, true)
-        }
-      }
-    }
+      const modelJson = await fetchModelJson(id)
+      if (!modelJson) return
 
-    window.addEventListener('message', listener)
-    return () => {
-      window.removeEventListener('message', listener)
+      await addToLocal(modelType, modelJson)
+    } finally {
+      setLoading(false)
     }
-  }, [pendingModelId])
+  }
 
   return (
-    <Dialog open={open} onClose={() => handleClose('add')} width={500}>
+    <Dialog open={open} onClose={onClose} width={500}>
       <DialogTitle>{t('launchModel.addModel')}</DialogTitle>
       <DialogContent>
         <div
@@ -195,7 +148,7 @@ const AddModelDialog = ({ open, onClose }) => {
           <div>
             {t('launchModel.addModelDialog.introPrefix')}{' '}
             <a
-              href={`${API_BASE_URL}/models`}
+              href="https://model.xinference.io/models"
               target="_blank"
               rel="noopener noreferrer"
               style={{ textDecoration: 'none', color: '#1976d2' }}
@@ -204,74 +157,33 @@ const AddModelDialog = ({ open, onClose }) => {
             </a>
             {t('launchModel.addModelDialog.introSuffix')}
           </div>
-          <div>
-            {t('launchModel.addModelDialog.example', {
-              modelName: 'qwen3',
-              modelUrl: 'https://model.xinference.io/models/detail/250',
-            })}
-          </div>
           <form onSubmit={handleFormSubmit} id="subscription-form">
             <TextField
               autoFocus
               required
               margin="dense"
-              id="url"
-              name="url"
-              label={t('launchModel.addModelDialog.urlLabel')}
+              id="modelName"
+              name="modelName"
+              label={t('launchModel.addModelDialog.modelName')}
               fullWidth
-              placeholder={t('launchModel.placeholderTip')}
-              value={url}
+              placeholder={t('launchModel.addModelDialog.placeholder')}
+              value={modelName}
               onChange={(e) => {
-                setUrl(e.target.value)
+                setModelName(e.target.value)
               }}
               disabled={loading}
             />
           </form>
-          {errorMsg && <div style={{ color: '#d32f2f' }}>{errorMsg}</div>}
         </div>
       </DialogContent>
       <DialogActions>
-        <Button onClick={() => handleClose('add')} disabled={loading}>
+        <Button onClick={onClose} disabled={loading}>
           {t('launchModel.cancel')}
         </Button>
-        <Button
-          autoFocus
-          type="submit"
-          form="subscription-form"
-          disabled={loading}
-        >
+        <Button autoFocus type="submit" form="subscription-form" disabled={loading}>
           {t('launchModel.confirm')}
         </Button>
       </DialogActions>
-
-      <Dialog open={loginOpen} onClose={() => handleClose('login')}>
-        <div
-          style={{
-            width: '100%',
-            maxWidth: 640,
-            padding: 16,
-            boxSizing: 'border-box',
-          }}
-        >
-          <iframe
-            ref={loginIframeRef}
-            src={`${API_BASE_URL}/signin`}
-            title="Model Platform Signin"
-            style={{ width: '100%', minHeight: 520, border: 0 }}
-          />
-          <div
-            style={{
-              display: 'flex',
-              justifyContent: 'flex-end',
-              marginTop: 12,
-            }}
-          >
-            <Button onClick={() => handleClose('login')} disabled={loading}>
-              关闭
-            </Button>
-          </div>
-        </div>
-      </Dialog>
     </Dialog>
   )
 }
diff --git a/xinference/ui/web/ui/src/scenes/launch_model/index.js b/xinference/ui/web/ui/src/scenes/launch_model/index.js
index e1cfd1b0e1..77cd304a38 100644
--- a/xinference/ui/web/ui/src/scenes/launch_model/index.js
+++ b/xinference/ui/web/ui/src/scenes/launch_model/index.js
@@ -1,6 +1,6 @@
 import Add from '@mui/icons-material/Add'
-import { TabContext, TabList, TabPanel } from '@mui/lab'
-import { Box, Button, Tab } from '@mui/material'
+import { LoadingButton, TabContext, TabList, TabPanel } from '@mui/lab'
+import { Box, Button, MenuItem, Select, Tab } from '@mui/material'
 import React, { useContext, useEffect, useState } from 'react'
 import { useCookies } from 'react-cookie'
 import { useTranslation } from 'react-i18next'
@@ -25,6 +25,8 @@ const LaunchModel = () => {
   )
   const [gpuAvailable, setGPUAvailable] = useState(-1)
   const [open, setOpen] = useState(false)
+  const [modelType, setModelType] = useState('llm')
+  const [loading, setLoading] = useState(false)
 
   const { setErrorMsg } = useContext(ApiContext)
   const [cookie] = useCookies(['token'])
@@ -62,6 +64,11 @@ const LaunchModel = () => {
     }
   }, [cookie.token])
 
+  const downloadModels = () => {
+    setLoading(true)
+    console.log('modelType', modelType);
+  }
+
   return (
     <Box m="20px">
       <Title title={t('menu.launchModel')} />
@@ -92,13 +99,51 @@ const LaunchModel = () => {
               value="/launch_model/custom/llm"
             />
           </TabList>
-          <Button
-            variant="outlined"
-            startIcon={<Add />}
-            onClick={() => setOpen(true)}
-          >
-            {t('launchModel.addModel')}
-          </Button>
+          <Box sx={{
+            display: 'flex',
+            alignItems: 'center',
+            gap: '10px'
+          }}>
+            <Box sx={{ display: 'flex', gap: 0 }}>
+              <Select
+                value={modelType}
+                onChange={(e) => setModelType(e.target.value)}
+                size="small"
+                sx={{
+                  borderTopRightRadius: 0,
+                  borderBottomRightRadius: 0,
+                  minWidth: 100,
+                }}
+              >
+                <MenuItem value="llm">LLM</MenuItem>
+                <MenuItem value="embedding">Embedding</MenuItem>
+                <MenuItem value="rerank">Rerank</MenuItem>
+                <MenuItem value="image">Image</MenuItem>
+                <MenuItem value="audio">Audio</MenuItem>
+                <MenuItem value="video">Video</MenuItem>
+              </Select>
+
+              <LoadingButton
+                variant="contained"
+                onClick={downloadModels}
+                loading={loading}
+                sx={{
+                  borderTopLeftRadius: 0,
+                  borderBottomLeftRadius: 0,
+                  whiteSpace: 'nowrap',
+                }}
+              >
+                {t('launchModel.update')}
+              </LoadingButton>
+            </Box>
+            <Button
+              variant="outlined"
+              startIcon={<Add />}
+              onClick={() => setOpen(true)}
+            >
+              {t('launchModel.addModel')}
+            </Button>
+          </Box>
         </Box>
         <TabPanel value="/launch_model/llm" sx={{ padding: 0 }}>
           <LaunchModelComponent

From 43bfd12bd49e558d707a77be52fcf845ac926c7c Mon Sep 17 00:00:00 2001
From: yiboyasss <3359595624@qq.com>
Date: Thu, 23 Oct 2025 14:52:41 +0800
Subject: [PATCH 10/25] fix: detail

---
 .../launch_model/components/addModelDialog.js      |  9 +++++++--
 .../ui/web/ui/src/scenes/launch_model/index.js     | 14 ++++++++------
 2 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/xinference/ui/web/ui/src/scenes/launch_model/components/addModelDialog.js b/xinference/ui/web/ui/src/scenes/launch_model/components/addModelDialog.js
index af38bcffba..f72c3bfc75 100644
--- a/xinference/ui/web/ui/src/scenes/launch_model/components/addModelDialog.js
+++ b/xinference/ui/web/ui/src/scenes/launch_model/components/addModelDialog.js
@@ -88,7 +88,7 @@ function AddModelDialog({ open, onClose }) {
       const res = await fetch(endPoint + '/v1/models/add', {
         method: 'POST',
         headers: { 'Content-Type': 'application/json' },
-        body: JSON.stringify({ model_type: modelType, model_json: modelJson}),
+        body: JSON.stringify({ model_type: modelType, model_json: modelJson }),
       })
       const rawText = await res.text().catch(() => '')
       if (!res.ok) {
@@ -180,7 +180,12 @@ function AddModelDialog({ open, onClose }) {
         <Button onClick={onClose} disabled={loading}>
           {t('launchModel.cancel')}
         </Button>
-        <Button autoFocus type="submit" form="subscription-form" disabled={loading}>
+        <Button
+          autoFocus
+          type="submit"
+          form="subscription-form"
+          disabled={loading}
+        >
           {t('launchModel.confirm')}
         </Button>
       </DialogActions>
diff --git a/xinference/ui/web/ui/src/scenes/launch_model/index.js b/xinference/ui/web/ui/src/scenes/launch_model/index.js
index 77cd304a38..9ee2ceaf7e 100644
--- a/xinference/ui/web/ui/src/scenes/launch_model/index.js
+++ b/xinference/ui/web/ui/src/scenes/launch_model/index.js
@@ -66,7 +66,7 @@ const LaunchModel = () => {
 
   const downloadModels = () => {
     setLoading(true)
-    console.log('modelType', modelType);
+    console.log('modelType', modelType)
   }
 
   return (
@@ -99,11 +99,13 @@ const LaunchModel = () => {
               value="/launch_model/custom/llm"
             />
           </TabList>
-          <Box sx={{
-            display: 'flex',
-            alignItems: 'center',
-            gap: '10px'
-          }}>
+          <Box
+            sx={{
+              display: 'flex',
+              alignItems: 'center',
+              gap: '10px',
+            }}
+          >
             <Box sx={{ display: 'flex', gap: 0 }}>
               <Select
                 value={modelType}

From 6dd1dcd09757d44ca990393141ff4d9dde2b0298 Mon Sep 17 00:00:00 2001
From: OliverBryant <2713999266@qq.com>
Date: Fri, 24 Oct 2025 12:12:37 +0800
Subject: [PATCH 11/25] Function 1: Add a specific model from ModelHub

---
 xinference/api/restful_api.py |  60 +++++
 xinference/core/supervisor.py | 474 ++++++++++++++++++++++++++++++++--
 2 files changed, 517 insertions(+), 17 deletions(-)

diff --git a/xinference/api/restful_api.py b/xinference/api/restful_api.py
index 97534aaaa9..de7e06abca 100644
--- a/xinference/api/restful_api.py
+++ b/xinference/api/restful_api.py
@@ -3162,13 +3162,65 @@ async def add_model(self, request: Request) -> JSONResponse:
             content={"message": f"Model added successfully for type: {model_type}"}
         )
 
+    async def add_model(self, request: Request) -> JSONResponse:
+        try:
+            # Debug: Log incoming request
+            logger.info(f"[DEBUG] Add model API called")
+            logger.info(f"[DEBUG] Request headers: {dict(request.headers)}")
+
+            # Parse request
+            raw_json = await request.json()
+            logger.info(f"[DEBUG] Raw request JSON: {raw_json}")
+
+            body = AddModelRequest.parse_obj(raw_json)
+            model_type = body.model_type
+            model_json = body.model_json
+
+            logger.info(f"[DEBUG] Parsed model_type: {model_type}")
+            logger.info(f"[DEBUG] Parsed model_json keys: {list(model_json.keys()) if isinstance(model_json, dict) else 'Not a dict'}")
+            if isinstance(model_json, dict):
+                logger.info(f"[DEBUG] Model JSON content: {model_json}")
+
+            # Debug: Check supervisor reference
+            logger.info(f"[DEBUG] Getting supervisor reference...")
+            supervisor_ref = await self._get_supervisor_ref()
+            logger.info(f"[DEBUG] Supervisor reference obtained: {supervisor_ref}")
+
+            # Call supervisor
+            logger.info(f"[DEBUG] Calling supervisor.add_model with model_type: {model_type}")
+            await supervisor_ref.add_model(model_type, model_json)
+            logger.info(f"[DEBUG] Supervisor.add_model completed successfully")
+
+        except ValueError as re:
+            logger.error(f"[DEBUG] ValueError in add_model API: {re}", exc_info=True)
+            logger.error(f"[DEBUG] ValueError details: {type(re).__name__}: {re}")
+            raise HTTPException(status_code=400, detail=str(re))
+        except Exception as e:
+            logger.error(f"[DEBUG] Unexpected error in add_model API: {e}", exc_info=True)
+            logger.error(f"[DEBUG] Error details: {type(e).__name__}: {e}")
+            import traceback
+            logger.error(f"[DEBUG] Full traceback: {traceback.format_exc()}")
+            raise HTTPException(status_code=500, detail=str(e))
+
+        logger.info(f"[DEBUG] Add model API completed successfully for model_type: {model_type}")
+        return JSONResponse(
+            content={"message": f"Model added successfully for type: {model_type}"}
+        )
+
     async def list_model_registrations(
         self, model_type: str, detailed: bool = Query(False)
     ) -> JSONResponse:
         try:
+            logger.info(f"[DEBUG API] list_model_registrations called with model_type: {model_type}, detailed: {detailed}")
+
             data = await (await self._get_supervisor_ref()).list_model_registrations(
                 model_type, detailed=detailed
             )
+
+            logger.info(f"[DEBUG API] Raw data from supervisor: {len(data)} items")
+            for i, item in enumerate(data):
+                logger.info(f"[DEBUG API] Item {i}: {item.get('model_name', 'Unknown')} (builtin: {item.get('is_builtin', 'Unknown')})")
+
             # Remove duplicate model names.
             model_names = set()
             final_data = []
@@ -3176,11 +3228,19 @@ async def list_model_registrations(
                 if item["model_name"] not in model_names:
                     model_names.add(item["model_name"])
                     final_data.append(item)
+
+            logger.info(f"[DEBUG API] After deduplication: {len(final_data)} items")
+            builtin_count = sum(1 for item in final_data if item.get('is_builtin', False))
+            custom_count = sum(1 for item in final_data if not item.get('is_builtin', False))
+            logger.info(f"[DEBUG API] Built-in models: {builtin_count}, Custom models: {custom_count}")
+
             return JSONResponse(content=final_data)
         except ValueError as re:
+            logger.error(f"[DEBUG API] ValueError in list_model_registrations: {re}", exc_info=True)
             logger.error(re, exc_info=True)
             raise HTTPException(status_code=400, detail=str(re))
         except Exception as e:
+            logger.error(f"[DEBUG API] Unexpected error in list_model_registrations: {e}", exc_info=True)
             logger.error(e, exc_info=True)
             raise HTTPException(status_code=500, detail=str(e))
 
diff --git a/xinference/core/supervisor.py b/xinference/core/supervisor.py
index 6e5a10368f..a9a8fe626c 100644
--- a/xinference/core/supervisor.py
+++ b/xinference/core/supervisor.py
@@ -610,28 +610,69 @@ def sort_helper(item):
             assert isinstance(item["model_name"], str)
             return item.get("model_name").lower()
 
+        logger.info(f"[DEBUG SUPERVISOR] list_model_registrations called with model_type: {model_type}, detailed: {detailed}")
+
         ret = []
         if not self.is_local_deployment():
+            logger.info(f"[DEBUG SUPERVISOR] Not local deployment, checking workers...")
             workers = list(self._worker_address_to_worker.values())
             for worker in workers:
-                ret.extend(await worker.list_model_registrations(model_type, detailed))
+                worker_data = await worker.list_model_registrations(model_type, detailed)
+                logger.info(f"[DEBUG SUPERVISOR] Worker returned {len(worker_data)} models")
+                ret.extend(worker_data)
+        else:
+            logger.info(f"[DEBUG SUPERVISOR] Local deployment mode")
 
         if model_type == "LLM":
             from ..model.llm import BUILTIN_LLM_FAMILIES, get_user_defined_llm_families
 
+            logger.info(f"[DEBUG SUPERVISOR] Processing LLM models")
+            logger.info(f"[DEBUG SUPERVISOR] Found {len(BUILTIN_LLM_FAMILIES)} builtin LLM families")
+
             for family in BUILTIN_LLM_FAMILIES:
+                logger.debug(f"[DEBUG SUPERVISOR] Processing builtin LLM: {family.model_name}")
                 if detailed:
-                    ret.append(await self._to_llm_reg(family, True))
+                    reg_data = await self._to_llm_reg(family, True)
+                    logger.debug(f"[DEBUG SUPERVISOR] Builtin LLM reg data: {reg_data['model_name']}")
+                    ret.append(reg_data)
                 else:
                     ret.append({"model_name": family.model_name, "is_builtin": True})
 
-            for family in get_user_defined_llm_families():
+            user_defined_families = get_user_defined_llm_families()
+            logger.info(f"[DEBUG SUPERVISOR] Found {len(user_defined_families)} user-defined LLM families")
+
+            for family in user_defined_families:
+                logger.info(f"[DEBUG SUPERVISOR] Processing user-defined LLM: {family.model_name}")
+
+                # Check if this model is persisted (added via add_model API)
+                # Persisted models from model hub should be treated as built-in
+                from ..model.cache_manager import CacheManager
+                cache_manager = CacheManager(family)
+                persist_path = cache_manager._get_persist_path() if hasattr(cache_manager, '_get_persist_path') else None
+
+                # If persist path exists, this model was added via add_model API and should be treated as built-in
+                is_persisted_model = False
+                if hasattr(cache_manager, '_v2_custom_dir_prefix'):
+                    import os
+                    potential_persist_path = os.path.join(
+                        cache_manager._v2_custom_dir_prefix,
+                        "llm",
+                        f"{family.model_name}.json"
+                    )
+                    is_persisted_model = os.path.exists(potential_persist_path)
+
+                is_builtin = is_persisted_model  # Treat persisted models as built-in
+                logger.info(f"[DEBUG SUPERVISOR] Model {family.model_name} persisted: {is_persisted_model}, treating as builtin: {is_builtin}")
+
                 if detailed:
-                    ret.append(await self._to_llm_reg(family, False))
+                    reg_data = await self._to_llm_reg(family, is_builtin)
+                    logger.info(f"[DEBUG SUPERVISOR] User-defined LLM reg data: {reg_data['model_name']}, builtin: {reg_data.get('is_builtin', False)}")
+                    ret.append(reg_data)
                 else:
-                    ret.append({"model_name": family.model_name, "is_builtin": False})
+                    ret.append({"model_name": family.model_name, "is_builtin": is_builtin})
 
             ret.sort(key=sort_helper)
+            logger.info(f"[DEBUG SUPERVISOR] LLM: Returning {len(ret)} total models (builtin: {sum(1 for r in ret if r.get('is_builtin', False))}, custom: {sum(1 for r in ret if not r.get('is_builtin', False))})")
             return ret
         elif model_type == "embedding":
             from ..model.embedding import BUILTIN_EMBEDDING_MODELS
@@ -646,13 +687,29 @@ def sort_helper(item):
                     ret.append({"model_name": model_name, "is_builtin": True})
 
             for model_spec in get_user_defined_embeddings():
+                # Check if this model is persisted (added via add_model API)
+                from ..model.cache_manager import CacheManager
+                cache_manager = CacheManager(model_spec)
+                is_persisted_model = False
+                if hasattr(cache_manager, '_v2_custom_dir_prefix'):
+                    import os
+                    potential_persist_path = os.path.join(
+                        cache_manager._v2_custom_dir_prefix,
+                        "embedding",
+                        f"{model_spec.model_name}.json"
+                    )
+                    is_persisted_model = os.path.exists(potential_persist_path)
+
+                is_builtin = is_persisted_model  # Treat persisted models as built-in
+                logger.info(f"[DEBUG SUPERVISOR] Embedding model {model_spec.model_name} persisted: {is_persisted_model}, treating as builtin: {is_builtin}")
+
                 if detailed:
                     ret.append(
-                        await self._to_embedding_model_reg(model_spec, is_builtin=False)
+                        await self._to_embedding_model_reg(model_spec, is_builtin=is_builtin)
                     )
                 else:
                     ret.append(
-                        {"model_name": model_spec.model_name, "is_builtin": False}
+                        {"model_name": model_spec.model_name, "is_builtin": is_builtin}
                     )
 
             ret.sort(key=sort_helper)
@@ -671,13 +728,29 @@ def sort_helper(item):
                     ret.append({"model_name": model_name, "is_builtin": True})
 
             for model_spec in get_user_defined_images():
+                # Check if this model is persisted (added via add_model API)
+                from ..model.cache_manager import CacheManager
+                cache_manager = CacheManager(model_spec)
+                is_persisted_model = False
+                if hasattr(cache_manager, '_v2_custom_dir_prefix'):
+                    import os
+                    potential_persist_path = os.path.join(
+                        cache_manager._v2_custom_dir_prefix,
+                        "image",
+                        f"{model_spec.model_name}.json"
+                    )
+                    is_persisted_model = os.path.exists(potential_persist_path)
+
+                is_builtin = is_persisted_model  # Treat persisted models as built-in
+                logger.info(f"[DEBUG SUPERVISOR] Image model {model_spec.model_name} persisted: {is_persisted_model}, treating as builtin: {is_builtin}")
+
                 if detailed:
                     ret.append(
-                        await self._to_image_model_reg(model_spec, is_builtin=False)
+                        await self._to_image_model_reg(model_spec, is_builtin=is_builtin)
                     )
                 else:
                     ret.append(
-                        {"model_name": model_spec.model_name, "is_builtin": False}
+                        {"model_name": model_spec.model_name, "is_builtin": is_builtin}
                     )
 
             ret.sort(key=sort_helper)
@@ -696,13 +769,29 @@ def sort_helper(item):
                     ret.append({"model_name": model_name, "is_builtin": True})
 
             for model_spec in get_user_defined_audios():
+                # Check if this model is persisted (added via add_model API)
+                from ..model.cache_manager import CacheManager
+                cache_manager = CacheManager(model_spec)
+                is_persisted_model = False
+                if hasattr(cache_manager, '_v2_custom_dir_prefix'):
+                    import os
+                    potential_persist_path = os.path.join(
+                        cache_manager._v2_custom_dir_prefix,
+                        "audio",
+                        f"{model_spec.model_name}.json"
+                    )
+                    is_persisted_model = os.path.exists(potential_persist_path)
+
+                is_builtin = is_persisted_model  # Treat persisted models as built-in
+                logger.info(f"[DEBUG SUPERVISOR] Audio model {model_spec.model_name} persisted: {is_persisted_model}, treating as builtin: {is_builtin}")
+
                 if detailed:
                     ret.append(
-                        await self._to_audio_model_reg(model_spec, is_builtin=False)
+                        await self._to_audio_model_reg(model_spec, is_builtin=is_builtin)
                     )
                 else:
                     ret.append(
-                        {"model_name": model_spec.model_name, "is_builtin": False}
+                        {"model_name": model_spec.model_name, "is_builtin": is_builtin}
                     )
 
             ret.sort(key=sort_helper)
@@ -732,13 +821,29 @@ def sort_helper(item):
                     ret.append({"model_name": model_name, "is_builtin": True})
 
             for model_spec in get_user_defined_reranks():
+                # Check if this model is persisted (added via add_model API)
+                from ..model.cache_manager import CacheManager
+                cache_manager = CacheManager(model_spec)
+                is_persisted_model = False
+                if hasattr(cache_manager, '_v2_custom_dir_prefix'):
+                    import os
+                    potential_persist_path = os.path.join(
+                        cache_manager._v2_custom_dir_prefix,
+                        "rerank",
+                        f"{model_spec.model_name}.json"
+                    )
+                    is_persisted_model = os.path.exists(potential_persist_path)
+
+                is_builtin = is_persisted_model  # Treat persisted models as built-in
+                logger.info(f"[DEBUG SUPERVISOR] Rerank model {model_spec.model_name} persisted: {is_persisted_model}, treating as builtin: {is_builtin}")
+
                 if detailed:
                     ret.append(
-                        await self._to_rerank_model_reg(model_spec, is_builtin=False)
+                        await self._to_rerank_model_reg(model_spec, is_builtin=is_builtin)
                     )
                 else:
                     ret.append(
-                        {"model_name": model_spec.model_name, "is_builtin": False}
+                        {"model_name": model_spec.model_name, "is_builtin": is_builtin}
                     )
 
             ret.sort(key=sort_helper)
@@ -749,13 +854,29 @@ def sort_helper(item):
             ret = []
 
             for model_spec in get_flexible_models():
+                # Check if this model is persisted (added via add_model API)
+                from ..model.cache_manager import CacheManager
+                cache_manager = CacheManager(model_spec)
+                is_persisted_model = False
+                if hasattr(cache_manager, '_v2_custom_dir_prefix'):
+                    import os
+                    potential_persist_path = os.path.join(
+                        cache_manager._v2_custom_dir_prefix,
+                        "flexible",
+                        f"{model_spec.model_name}.json"
+                    )
+                    is_persisted_model = os.path.exists(potential_persist_path)
+
+                is_builtin = is_persisted_model  # Treat persisted models as built-in
+                logger.info(f"[DEBUG SUPERVISOR] Flexible model {model_spec.model_name} persisted: {is_persisted_model}, treating as builtin: {is_builtin}")
+
                 if detailed:
                     ret.append(
-                        await self._to_flexible_model_reg(model_spec, is_builtin=False)
+                        await self._to_flexible_model_reg(model_spec, is_builtin=is_builtin)
                     )
                 else:
                     ret.append(
-                        {"model_name": model_spec.model_name, "is_builtin": False}
+                        {"model_name": model_spec.model_name, "is_builtin": is_builtin}
                     )
 
             ret.sort(key=sort_helper)
@@ -942,15 +1063,26 @@ async def add_model(self, model_type: str, model_json: Dict[str, Any]):
             model_type: Type of model (LLM, embedding, image, etc.)
             model_json: JSON configuration for the model
         """
-        # Validate model type
+        # Validate model type (with case normalization)
         supported_types = list(self._custom_register_type_to_cls.keys())
+        logger.info(f"[DEBUG SUPERVISOR] Supported model types: {supported_types}")
+        logger.info(f"[DEBUG SUPERVISOR] Received model_type: '{model_type}'")
+
+        # Try to normalize case (only convert 'llm' -> 'LLM', keep others as is)
+        normalized_model_type = 'LLM' if model_type.lower() == 'llm' else model_type
+        logger.info(f"[DEBUG SUPERVISOR] Normalized model_type: '{normalized_model_type}'")
 
-        if model_type not in self._custom_register_type_to_cls:
+        if normalized_model_type not in self._custom_register_type_to_cls:
+            logger.error(f"[DEBUG SUPERVISOR] Unsupported model type: {normalized_model_type} (original: {model_type})")
             raise ValueError(
                 f"Unsupported model type '{model_type}'. "
                 f"Supported types are: {', '.join(supported_types)}"
             )
 
+        # Use normalized model type for the rest of the function
+        model_type = normalized_model_type
+        logger.info(f"[DEBUG SUPERVISOR] Using model_type: '{model_type}' for registration")
+
         # Get the appropriate model class and register function
         (
             model_spec_cls,
@@ -1173,6 +1305,314 @@ def _convert_model_json_format(self, model_json: Dict[str, Any]) -> Dict[str, An
 
         return converted
 
+    @log_async(logger=logger)
+    async def add_model(self, model_type: str, model_json: Dict[str, Any]):
+        """
+        Add a new model by parsing the provided JSON and registering it.
+
+        Args:
+            model_type: Type of model (LLM, embedding, image, etc.)
+            model_json: JSON configuration for the model
+        """
+        logger.info(f"[DEBUG SUPERVISOR] add_model called with model_type: {model_type}")
+        logger.info(f"[DEBUG SUPERVISOR] model_json type: {type(model_json)}")
+        logger.info(f"[DEBUG SUPERVISOR] model_json keys: {list(model_json.keys()) if isinstance(model_json, dict) else 'Not a dict'}")
+        if isinstance(model_json, dict):
+            logger.info(f"[DEBUG SUPERVISOR] model_json content: {model_json}")
+
+        # Validate model type (with case normalization)
+        supported_types = list(self._custom_register_type_to_cls.keys())
+        logger.info(f"[DEBUG SUPERVISOR] Supported model types: {supported_types}")
+        logger.info(f"[DEBUG SUPERVISOR] Received model_type: '{model_type}'")
+
+        # Try to normalize case (only convert 'llm' -> 'LLM', keep others as is)
+        normalized_model_type = 'LLM' if model_type.lower() == 'llm' else model_type
+        logger.info(f"[DEBUG SUPERVISOR] Normalized model_type: '{normalized_model_type}'")
+
+        if normalized_model_type not in self._custom_register_type_to_cls:
+            logger.error(f"[DEBUG SUPERVISOR] Unsupported model type: {normalized_model_type} (original: {model_type})")
+            raise ValueError(
+                f"Unsupported model type '{model_type}'. "
+                f"Supported types are: {', '.join(supported_types)}"
+            )
+
+        # Use normalized model type for the rest of the function
+        model_type = normalized_model_type
+        logger.info(f"[DEBUG SUPERVISOR] Using model_type: '{model_type}' for registration")
+
+        # Get the appropriate model class and register function
+        (
+            model_spec_cls,
+            register_fn,
+            unregister_fn,
+            generate_fn,
+        ) = self._custom_register_type_to_cls[model_type]
+        logger.info(f"[DEBUG SUPERVISOR] Model spec class: {model_spec_cls}")
+        logger.info(f"[DEBUG SUPERVISOR] Register function: {register_fn}")
+        logger.info(f"[DEBUG SUPERVISOR] Unregister function: {unregister_fn}")
+        logger.info(f"[DEBUG SUPERVISOR] Generate function: {generate_fn}")
+
+        # Validate required fields (only model_name is required)
+        required_fields = ["model_name"]
+        logger.info(f"[DEBUG SUPERVISOR] Checking required fields: {required_fields}")
+        for field in required_fields:
+            if field not in model_json:
+                logger.error(f"[DEBUG SUPERVISOR] Missing required field: {field}")
+                raise ValueError(f"Missing required field: {field}")
+
+        # Validate model name format
+        from ..model.utils import is_valid_model_name
+
+        model_name = model_json["model_name"]
+        logger.info(f"[DEBUG SUPERVISOR] Extracted model_name: {model_name}")
+
+        if not is_valid_model_name(model_name):
+            logger.error(f"[DEBUG SUPERVISOR] Invalid model name format: {model_name}")
+            raise ValueError(f"Invalid model name format: {model_name}")
+
+        logger.info(f"[DEBUG SUPERVISOR] Model name validation passed")
+
+        # Convert model hub JSON format to Xinference expected format
+        logger.info(f"[DEBUG SUPERVISOR] Converting model JSON format...")
+        try:
+            converted_model_json = self._convert_model_json_format(model_json)
+            logger.info(f"[DEBUG SUPERVISOR] Converted model JSON: {converted_model_json}")
+        except Exception as e:
+            logger.error(f"[DEBUG SUPERVISOR] Format conversion failed: {str(e)}", exc_info=True)
+            raise ValueError(f"Failed to convert model JSON format: {str(e)}")
+
+        # Parse the JSON into the appropriate model spec
+        logger.info(f"[DEBUG SUPERVISOR] Parsing model spec...")
+        try:
+            model_spec = model_spec_cls.parse_obj(converted_model_json)
+            logger.info(f"[DEBUG SUPERVISOR] Parsed model spec: {model_spec}")
+        except Exception as e:
+            logger.error(f"[DEBUG SUPERVISOR] Model spec parsing failed: {str(e)}", exc_info=True)
+            raise ValueError(f"Invalid model JSON format: {str(e)}")
+
+        # Check if model already exists
+        logger.info(f"[DEBUG SUPERVISOR] Checking if model already exists...")
+        try:
+            existing_model = await self.get_model_registration(
+                model_type, model_spec.model_name
+            )
+            logger.info(f"[DEBUG SUPERVISOR] Existing model check result: {existing_model}")
+
+            if existing_model is not None:
+                logger.error(f"[DEBUG SUPERVISOR] Model already exists: {model_spec.model_name}")
+                raise ValueError(
+                    f"Model '{model_spec.model_name}' already exists for type '{model_type}'. "
+                    f"Please choose a different model name or remove the existing model first."
+                )
+
+        except ValueError as e:
+            if "not found" in str(e):
+                # Model doesn't exist, we can proceed
+                logger.info(f"[DEBUG SUPERVISOR] Model doesn't exist yet, proceeding with registration")
+                pass
+            else:
+                # Re-raise validation errors
+                logger.error(f"[DEBUG SUPERVISOR] Validation error during model check: {str(e)}")
+                raise e
+        except Exception as ex:
+            logger.error(f"[DEBUG SUPERVISOR] Unexpected error during model check: {str(ex)}", exc_info=True)
+            raise ValueError(f"Failed to validate model registration: {str(ex)}")
+
+        # Register the model (persist=True for adding models)
+        logger.info(f"[DEBUG SUPERVISOR] Starting model registration...")
+        try:
+            logger.info(f"[DEBUG SUPERVISOR] Calling register_fn with model_spec: {model_spec}, persist=True")
+            register_fn(model_spec, persist=True)
+            logger.info(f"[DEBUG SUPERVISOR] register_fn completed successfully")
+
+            # Record model version
+            logger.info(f"[DEBUG SUPERVISOR] Generating version info...")
+            version_info = generate_fn(model_spec)
+            logger.info(f"[DEBUG SUPERVISOR] Generated version_info: {version_info}")
+
+            logger.info(f"[DEBUG SUPERVISOR] Recording model version in cache tracker...")
+            await self._cache_tracker_ref.record_model_version(
+                version_info, self.address
+            )
+            logger.info(f"[DEBUG SUPERVISOR] Cache tracker recording completed")
+
+            # Sync to workers if not local deployment
+            is_local = self.is_local_deployment()
+            logger.info(f"[DEBUG SUPERVISOR] Is local deployment: {is_local}")
+            if not is_local:
+                # Convert back to JSON string for sync compatibility
+                model_json_str = json.dumps(converted_model_json)
+                logger.info(f"[DEBUG SUPERVISOR] Syncing model to workers...")
+                await self._sync_register_model(
+                    model_type, model_json_str, True, model_spec.model_name
+                )
+                logger.info(f"[DEBUG SUPERVISOR] Model sync to workers completed")
+
+            logger.info(
+                f"Successfully added model '{model_spec.model_name}' (type: {model_type})"
+            )
+
+        except ValueError as e:
+            # Validation errors - don't need cleanup as model wasn't registered
+            logger.error(f"[DEBUG SUPERVISOR] ValueError during registration: {str(e)}")
+            raise e
+        except Exception as e:
+            # Unexpected errors - attempt cleanup
+            logger.error(f"[DEBUG SUPERVISOR] Unexpected error during registration: {str(e)}", exc_info=True)
+            try:
+                logger.info(f"[DEBUG SUPERVISOR] Attempting cleanup...")
+                unregister_fn(model_spec.model_name, raise_error=False)
+                logger.info(f"[DEBUG SUPERVISOR] Cleanup completed successfully")
+            except Exception as cleanup_error:
+                logger.warning(f"[DEBUG SUPERVISOR] Cleanup failed: {cleanup_error}")
+            raise ValueError(
+                f"Failed to register model '{model_spec.model_name}': {str(e)}"
+            )
+
+        logger.info(f"[DEBUG SUPERVISOR] add_model completed successfully")
+
+    def _convert_model_json_format(self, model_json: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Convert model hub JSON format to Xinference expected format.
+
+        The input format uses nested 'model_src' structure, but Xinference expects
+        flattened fields at the spec level.
+
+        Also handles cases where model_specs field is missing by providing a default.
+        """
+        logger.info(f"[DEBUG SUPERVISOR] _convert_model_json_format called")
+        logger.info(f"[DEBUG SUPERVISOR] Input model_json: {model_json}")
+
+        # If model_specs is missing, provide a default minimal spec
+        if "model_specs" not in model_json or not model_json["model_specs"]:
+            logger.info(f"[DEBUG SUPERVISOR] model_specs missing or empty, creating default spec")
+            # Create a minimal default spec
+            default_spec = {
+                **model_json,
+                "model_specs": [
+                    {
+                        "model_format": "pytorch",
+                        "model_size_in_billions": None,
+                        "quantization": "none",
+                    }
+                ],
+            }
+            logger.info(f"[DEBUG SUPERVISOR] Created default spec: {default_spec}")
+            return default_spec
+
+        logger.info(f"[DEBUG SUPERVISOR] Found model_specs: {model_json['model_specs']}")
+
+        # Check if conversion is needed (detect model_src structure)
+        needs_conversion = False
+        for i, spec in enumerate(model_json["model_specs"]):
+            logger.info(f"[DEBUG SUPERVISOR] Checking spec {i}: {spec}")
+            if "model_src" in spec:
+                logger.info(f"[DEBUG SUPERVISOR] Found model_src in spec {i}, conversion needed")
+                needs_conversion = True
+                break
+
+        if not needs_conversion:
+            logger.info(f"[DEBUG SUPERVISOR] No conversion needed, returning original model_json")
+            return model_json
+
+        converted = model_json.copy()
+        converted_specs = []
+
+        for spec in model_json["model_specs"]:
+            model_format = spec.get("model_format", "pytorch")
+            model_size = spec.get("model_size_in_billions")
+
+            if "model_src" not in spec:
+                # No model_src, keep spec as is but ensure required fields
+                converted_spec = spec.copy()
+                if "quantization" not in converted_spec:
+                    converted_spec["quantization"] = "none"
+                if "model_format" not in converted_spec:
+                    converted_spec["model_format"] = "pytorch"
+                converted_specs.append(converted_spec)
+                continue
+
+            model_src = spec["model_src"]
+
+            # Handle different model sources
+            if "huggingface" in model_src:
+                hf_info = model_src["huggingface"]
+                quantizations = hf_info.get("quantizations", ["none"])
+
+                # Create separate specs for each quantization
+                for quant in quantizations:
+                    converted_spec = {
+                        "model_format": model_format,
+                        "model_size_in_billions": model_size,
+                        "quantization": quant,
+                        "model_hub": "huggingface",
+                    }
+
+                    # Add common fields
+                    if "model_id" in hf_info:
+                        converted_spec["model_id"] = hf_info["model_id"]
+                    if "model_revision" in hf_info:
+                        converted_spec["model_revision"] = hf_info["model_revision"]
+
+                    # Format-specific fields
+                    if model_format == "ggufv2":
+                        if "model_id" in hf_info:
+                            converted_spec["model_id"] = hf_info["model_id"]
+                        if "model_file_name_template" in hf_info:
+                            converted_spec["model_file_name_template"] = hf_info[
+                                "model_file_name_template"
+                            ]
+                        else:
+                            # Default template
+                            model_name = model_json["model_name"]
+                            converted_spec["model_file_name_template"] = (
+                                f"{model_name}-{{quantization}}.gguf"
+                            )
+                    elif model_format in ["pytorch", "mlx"]:
+                        if "model_id" in hf_info:
+                            converted_spec["model_id"] = hf_info["model_id"]
+                        if "model_revision" in hf_info:
+                            converted_spec["model_revision"] = hf_info["model_revision"]
+
+                    converted_specs.append(converted_spec)
+
+            elif "modelscope" in model_src:
+                # Handle ModelScope similarly
+                ms_info = model_src["modelscope"]
+                quantizations = ms_info.get("quantizations", ["none"])
+
+                for quant in quantizations:
+                    converted_spec = {
+                        "model_format": model_format,
+                        "model_size_in_billions": model_size,
+                        "quantization": quant,
+                        "model_hub": "modelscope",
+                    }
+
+                    if "model_id" in ms_info:
+                        converted_spec["model_id"] = ms_info["model_id"]
+                    if "model_revision" in ms_info:
+                        converted_spec["model_revision"] = ms_info["model_revision"]
+
+                    converted_specs.append(converted_spec)
+
+            else:
+                # Unknown model source, skip or handle as error
+                logger.warning(
+                    f"Unknown model source in spec: {list(model_src.keys())}"
+                )
+                # Keep original spec but add required fields
+                converted_spec = spec.copy()
+                if "quantization" not in converted_spec:
+                    converted_spec["quantization"] = "none"
+                if "model_format" not in converted_spec:
+                    converted_spec["model_format"] = "pytorch"
+                converted_specs.append(converted_spec)
+
+        converted["model_specs"] = converted_specs
+
+        return converted
+
     async def _sync_register_model(
         self, model_type: str, model: str, persist: bool, model_name: str
     ):

From 52d61e6aea324270ef1e28e73f8c4a2e7983d648 Mon Sep 17 00:00:00 2001
From: OliverBryant <2713999266@qq.com>
Date: Fri, 24 Oct 2025 12:22:03 +0800
Subject: [PATCH 12/25] Function 1: Add a specific model from ModelHub

---
 xinference/api/restful_api.py |  71 +++---
 xinference/core/supervisor.py | 460 +++++++++++-----------------------
 2 files changed, 189 insertions(+), 342 deletions(-)

diff --git a/xinference/api/restful_api.py b/xinference/api/restful_api.py
index de7e06abca..d382ee72d5 100644
--- a/xinference/api/restful_api.py
+++ b/xinference/api/restful_api.py
@@ -3138,30 +3138,6 @@ async def unregister_model(self, model_type: str, model_name: str) -> JSONRespon
             raise HTTPException(status_code=500, detail=str(e))
         return JSONResponse(content=None)
 
-    async def add_model(self, request: Request) -> JSONResponse:
-        try:
-            # Parse request
-            raw_json = await request.json()
-
-            body = AddModelRequest.parse_obj(raw_json)
-            model_type = body.model_type
-            model_json = body.model_json
-
-            # Call supervisor
-            supervisor_ref = await self._get_supervisor_ref()
-            await supervisor_ref.add_model(model_type, model_json)
-
-        except ValueError as re:
-            logger.error(f"ValueError in add_model API: {re}", exc_info=True)
-            raise HTTPException(status_code=400, detail=str(re))
-        except Exception as e:
-            logger.error(f"Unexpected error in add_model API: {e}", exc_info=True)
-            raise HTTPException(status_code=500, detail=str(e))
-
-        return JSONResponse(
-            content={"message": f"Model added successfully for type: {model_type}"}
-        )
-
     async def add_model(self, request: Request) -> JSONResponse:
         try:
             # Debug: Log incoming request
@@ -3177,7 +3153,9 @@ async def add_model(self, request: Request) -> JSONResponse:
             model_json = body.model_json
 
             logger.info(f"[DEBUG] Parsed model_type: {model_type}")
-            logger.info(f"[DEBUG] Parsed model_json keys: {list(model_json.keys()) if isinstance(model_json, dict) else 'Not a dict'}")
+            logger.info(
+                f"[DEBUG] Parsed model_json keys: {list(model_json.keys()) if isinstance(model_json, dict) else 'Not a dict'}"
+            )
             if isinstance(model_json, dict):
                 logger.info(f"[DEBUG] Model JSON content: {model_json}")
 
@@ -3187,7 +3165,9 @@ async def add_model(self, request: Request) -> JSONResponse:
             logger.info(f"[DEBUG] Supervisor reference obtained: {supervisor_ref}")
 
             # Call supervisor
-            logger.info(f"[DEBUG] Calling supervisor.add_model with model_type: {model_type}")
+            logger.info(
+                f"[DEBUG] Calling supervisor.add_model with model_type: {model_type}"
+            )
             await supervisor_ref.add_model(model_type, model_json)
             logger.info(f"[DEBUG] Supervisor.add_model completed successfully")
 
@@ -3196,13 +3176,18 @@ async def add_model(self, request: Request) -> JSONResponse:
             logger.error(f"[DEBUG] ValueError details: {type(re).__name__}: {re}")
             raise HTTPException(status_code=400, detail=str(re))
         except Exception as e:
-            logger.error(f"[DEBUG] Unexpected error in add_model API: {e}", exc_info=True)
+            logger.error(
+                f"[DEBUG] Unexpected error in add_model API: {e}", exc_info=True
+            )
             logger.error(f"[DEBUG] Error details: {type(e).__name__}: {e}")
             import traceback
+
             logger.error(f"[DEBUG] Full traceback: {traceback.format_exc()}")
             raise HTTPException(status_code=500, detail=str(e))
 
-        logger.info(f"[DEBUG] Add model API completed successfully for model_type: {model_type}")
+        logger.info(
+            f"[DEBUG] Add model API completed successfully for model_type: {model_type}"
+        )
         return JSONResponse(
             content={"message": f"Model added successfully for type: {model_type}"}
         )
@@ -3211,7 +3196,9 @@ async def list_model_registrations(
         self, model_type: str, detailed: bool = Query(False)
     ) -> JSONResponse:
         try:
-            logger.info(f"[DEBUG API] list_model_registrations called with model_type: {model_type}, detailed: {detailed}")
+            logger.info(
+                f"[DEBUG API] list_model_registrations called with model_type: {model_type}, detailed: {detailed}"
+            )
 
             data = await (await self._get_supervisor_ref()).list_model_registrations(
                 model_type, detailed=detailed
@@ -3219,7 +3206,9 @@ async def list_model_registrations(
 
             logger.info(f"[DEBUG API] Raw data from supervisor: {len(data)} items")
             for i, item in enumerate(data):
-                logger.info(f"[DEBUG API] Item {i}: {item.get('model_name', 'Unknown')} (builtin: {item.get('is_builtin', 'Unknown')})")
+                logger.info(
+                    f"[DEBUG API] Item {i}: {item.get('model_name', 'Unknown')} (builtin: {item.get('is_builtin', 'Unknown')})"
+                )
 
             # Remove duplicate model names.
             model_names = set()
@@ -3230,17 +3219,29 @@ async def list_model_registrations(
                     final_data.append(item)
 
             logger.info(f"[DEBUG API] After deduplication: {len(final_data)} items")
-            builtin_count = sum(1 for item in final_data if item.get('is_builtin', False))
-            custom_count = sum(1 for item in final_data if not item.get('is_builtin', False))
-            logger.info(f"[DEBUG API] Built-in models: {builtin_count}, Custom models: {custom_count}")
+            builtin_count = sum(
+                1 for item in final_data if item.get("is_builtin", False)
+            )
+            custom_count = sum(
+                1 for item in final_data if not item.get("is_builtin", False)
+            )
+            logger.info(
+                f"[DEBUG API] Built-in models: {builtin_count}, Custom models: {custom_count}"
+            )
 
             return JSONResponse(content=final_data)
         except ValueError as re:
-            logger.error(f"[DEBUG API] ValueError in list_model_registrations: {re}", exc_info=True)
+            logger.error(
+                f"[DEBUG API] ValueError in list_model_registrations: {re}",
+                exc_info=True,
+            )
             logger.error(re, exc_info=True)
             raise HTTPException(status_code=400, detail=str(re))
         except Exception as e:
-            logger.error(f"[DEBUG API] Unexpected error in list_model_registrations: {e}", exc_info=True)
+            logger.error(
+                f"[DEBUG API] Unexpected error in list_model_registrations: {e}",
+                exc_info=True,
+            )
             logger.error(e, exc_info=True)
             raise HTTPException(status_code=500, detail=str(e))
 
diff --git a/xinference/core/supervisor.py b/xinference/core/supervisor.py
index a9a8fe626c..9e676370ec 100644
--- a/xinference/core/supervisor.py
+++ b/xinference/core/supervisor.py
@@ -610,15 +610,21 @@ def sort_helper(item):
             assert isinstance(item["model_name"], str)
             return item.get("model_name").lower()
 
-        logger.info(f"[DEBUG SUPERVISOR] list_model_registrations called with model_type: {model_type}, detailed: {detailed}")
+        logger.info(
+            f"[DEBUG SUPERVISOR] list_model_registrations called with model_type: {model_type}, detailed: {detailed}"
+        )
 
         ret = []
         if not self.is_local_deployment():
             logger.info(f"[DEBUG SUPERVISOR] Not local deployment, checking workers...")
             workers = list(self._worker_address_to_worker.values())
             for worker in workers:
-                worker_data = await worker.list_model_registrations(model_type, detailed)
-                logger.info(f"[DEBUG SUPERVISOR] Worker returned {len(worker_data)} models")
+                worker_data = await worker.list_model_registrations(
+                    model_type, detailed
+                )
+                logger.info(
+                    f"[DEBUG SUPERVISOR] Worker returned {len(worker_data)} models"
+                )
                 ret.extend(worker_data)
         else:
             logger.info(f"[DEBUG SUPERVISOR] Local deployment mode")
@@ -627,52 +633,71 @@ def sort_helper(item):
             from ..model.llm import BUILTIN_LLM_FAMILIES, get_user_defined_llm_families
 
             logger.info(f"[DEBUG SUPERVISOR] Processing LLM models")
-            logger.info(f"[DEBUG SUPERVISOR] Found {len(BUILTIN_LLM_FAMILIES)} builtin LLM families")
+            logger.info(
+                f"[DEBUG SUPERVISOR] Found {len(BUILTIN_LLM_FAMILIES)} builtin LLM families"
+            )
 
             for family in BUILTIN_LLM_FAMILIES:
-                logger.debug(f"[DEBUG SUPERVISOR] Processing builtin LLM: {family.model_name}")
+                logger.debug(
+                    f"[DEBUG SUPERVISOR] Processing builtin LLM: {family.model_name}"
+                )
                 if detailed:
                     reg_data = await self._to_llm_reg(family, True)
-                    logger.debug(f"[DEBUG SUPERVISOR] Builtin LLM reg data: {reg_data['model_name']}")
+                    logger.debug(
+                        f"[DEBUG SUPERVISOR] Builtin LLM reg data: {reg_data['model_name']}"
+                    )
                     ret.append(reg_data)
                 else:
                     ret.append({"model_name": family.model_name, "is_builtin": True})
 
             user_defined_families = get_user_defined_llm_families()
-            logger.info(f"[DEBUG SUPERVISOR] Found {len(user_defined_families)} user-defined LLM families")
+            logger.info(
+                f"[DEBUG SUPERVISOR] Found {len(user_defined_families)} user-defined LLM families"
+            )
 
             for family in user_defined_families:
-                logger.info(f"[DEBUG SUPERVISOR] Processing user-defined LLM: {family.model_name}")
+                logger.info(
+                    f"[DEBUG SUPERVISOR] Processing user-defined LLM: {family.model_name}"
+                )
 
                 # Check if this model is persisted (added via add_model API)
                 # Persisted models from model hub should be treated as built-in
                 from ..model.cache_manager import CacheManager
+
                 cache_manager = CacheManager(family)
-                persist_path = cache_manager._get_persist_path() if hasattr(cache_manager, '_get_persist_path') else None
 
                 # If persist path exists, this model was added via add_model API and should be treated as built-in
                 is_persisted_model = False
-                if hasattr(cache_manager, '_v2_custom_dir_prefix'):
+                if hasattr(cache_manager, "_v2_custom_dir_prefix"):
                     import os
+
                     potential_persist_path = os.path.join(
                         cache_manager._v2_custom_dir_prefix,
                         "llm",
-                        f"{family.model_name}.json"
+                        f"{family.model_name}.json",
                     )
                     is_persisted_model = os.path.exists(potential_persist_path)
 
                 is_builtin = is_persisted_model  # Treat persisted models as built-in
-                logger.info(f"[DEBUG SUPERVISOR] Model {family.model_name} persisted: {is_persisted_model}, treating as builtin: {is_builtin}")
+                logger.info(
+                    f"[DEBUG SUPERVISOR] Model {family.model_name} persisted: {is_persisted_model}, treating as builtin: {is_builtin}"
+                )
 
                 if detailed:
                     reg_data = await self._to_llm_reg(family, is_builtin)
-                    logger.info(f"[DEBUG SUPERVISOR] User-defined LLM reg data: {reg_data['model_name']}, builtin: {reg_data.get('is_builtin', False)}")
+                    logger.info(
+                        f"[DEBUG SUPERVISOR] User-defined LLM reg data: {reg_data['model_name']}, builtin: {reg_data.get('is_builtin', False)}"
+                    )
                     ret.append(reg_data)
                 else:
-                    ret.append({"model_name": family.model_name, "is_builtin": is_builtin})
+                    ret.append(
+                        {"model_name": family.model_name, "is_builtin": is_builtin}
+                    )
 
             ret.sort(key=sort_helper)
-            logger.info(f"[DEBUG SUPERVISOR] LLM: Returning {len(ret)} total models (builtin: {sum(1 for r in ret if r.get('is_builtin', False))}, custom: {sum(1 for r in ret if not r.get('is_builtin', False))})")
+            logger.info(
+                f"[DEBUG SUPERVISOR] LLM: Returning {len(ret)} total models (builtin: {sum(1 for r in ret if r.get('is_builtin', False))}, custom: {sum(1 for r in ret if not r.get('is_builtin', False))})"
+            )
             return ret
         elif model_type == "embedding":
             from ..model.embedding import BUILTIN_EMBEDDING_MODELS
@@ -689,23 +714,29 @@ def sort_helper(item):
             for model_spec in get_user_defined_embeddings():
                 # Check if this model is persisted (added via add_model API)
                 from ..model.cache_manager import CacheManager
+
                 cache_manager = CacheManager(model_spec)
                 is_persisted_model = False
-                if hasattr(cache_manager, '_v2_custom_dir_prefix'):
+                if hasattr(cache_manager, "_v2_custom_dir_prefix"):
                     import os
+
                     potential_persist_path = os.path.join(
                         cache_manager._v2_custom_dir_prefix,
                         "embedding",
-                        f"{model_spec.model_name}.json"
+                        f"{model_spec.model_name}.json",
                     )
                     is_persisted_model = os.path.exists(potential_persist_path)
 
                 is_builtin = is_persisted_model  # Treat persisted models as built-in
-                logger.info(f"[DEBUG SUPERVISOR] Embedding model {model_spec.model_name} persisted: {is_persisted_model}, treating as builtin: {is_builtin}")
+                logger.info(
+                    f"[DEBUG SUPERVISOR] Embedding model {model_spec.model_name} persisted: {is_persisted_model}, treating as builtin: {is_builtin}"
+                )
 
                 if detailed:
                     ret.append(
-                        await self._to_embedding_model_reg(model_spec, is_builtin=is_builtin)
+                        await self._to_embedding_model_reg(
+                            model_spec, is_builtin=is_builtin
+                        )
                     )
                 else:
                     ret.append(
@@ -730,23 +761,29 @@ def sort_helper(item):
             for model_spec in get_user_defined_images():
                 # Check if this model is persisted (added via add_model API)
                 from ..model.cache_manager import CacheManager
+
                 cache_manager = CacheManager(model_spec)
                 is_persisted_model = False
-                if hasattr(cache_manager, '_v2_custom_dir_prefix'):
+                if hasattr(cache_manager, "_v2_custom_dir_prefix"):
                     import os
+
                     potential_persist_path = os.path.join(
                         cache_manager._v2_custom_dir_prefix,
                         "image",
-                        f"{model_spec.model_name}.json"
+                        f"{model_spec.model_name}.json",
                     )
                     is_persisted_model = os.path.exists(potential_persist_path)
 
                 is_builtin = is_persisted_model  # Treat persisted models as built-in
-                logger.info(f"[DEBUG SUPERVISOR] Image model {model_spec.model_name} persisted: {is_persisted_model}, treating as builtin: {is_builtin}")
+                logger.info(
+                    f"[DEBUG SUPERVISOR] Image model {model_spec.model_name} persisted: {is_persisted_model}, treating as builtin: {is_builtin}"
+                )
 
                 if detailed:
                     ret.append(
-                        await self._to_image_model_reg(model_spec, is_builtin=is_builtin)
+                        await self._to_image_model_reg(
+                            model_spec, is_builtin=is_builtin
+                        )
                     )
                 else:
                     ret.append(
@@ -771,23 +808,29 @@ def sort_helper(item):
             for model_spec in get_user_defined_audios():
                 # Check if this model is persisted (added via add_model API)
                 from ..model.cache_manager import CacheManager
+
                 cache_manager = CacheManager(model_spec)
                 is_persisted_model = False
-                if hasattr(cache_manager, '_v2_custom_dir_prefix'):
+                if hasattr(cache_manager, "_v2_custom_dir_prefix"):
                     import os
+
                     potential_persist_path = os.path.join(
                         cache_manager._v2_custom_dir_prefix,
                         "audio",
-                        f"{model_spec.model_name}.json"
+                        f"{model_spec.model_name}.json",
                     )
                     is_persisted_model = os.path.exists(potential_persist_path)
 
                 is_builtin = is_persisted_model  # Treat persisted models as built-in
-                logger.info(f"[DEBUG SUPERVISOR] Audio model {model_spec.model_name} persisted: {is_persisted_model}, treating as builtin: {is_builtin}")
+                logger.info(
+                    f"[DEBUG SUPERVISOR] Audio model {model_spec.model_name} persisted: {is_persisted_model}, treating as builtin: {is_builtin}"
+                )
 
                 if detailed:
                     ret.append(
-                        await self._to_audio_model_reg(model_spec, is_builtin=is_builtin)
+                        await self._to_audio_model_reg(
+                            model_spec, is_builtin=is_builtin
+                        )
                     )
                 else:
                     ret.append(
@@ -823,23 +866,29 @@ def sort_helper(item):
             for model_spec in get_user_defined_reranks():
                 # Check if this model is persisted (added via add_model API)
                 from ..model.cache_manager import CacheManager
+
                 cache_manager = CacheManager(model_spec)
                 is_persisted_model = False
-                if hasattr(cache_manager, '_v2_custom_dir_prefix'):
+                if hasattr(cache_manager, "_v2_custom_dir_prefix"):
                     import os
+
                     potential_persist_path = os.path.join(
                         cache_manager._v2_custom_dir_prefix,
                         "rerank",
-                        f"{model_spec.model_name}.json"
+                        f"{model_spec.model_name}.json",
                     )
                     is_persisted_model = os.path.exists(potential_persist_path)
 
                 is_builtin = is_persisted_model  # Treat persisted models as built-in
-                logger.info(f"[DEBUG SUPERVISOR] Rerank model {model_spec.model_name} persisted: {is_persisted_model}, treating as builtin: {is_builtin}")
+                logger.info(
+                    f"[DEBUG SUPERVISOR] Rerank model {model_spec.model_name} persisted: {is_persisted_model}, treating as builtin: {is_builtin}"
+                )
 
                 if detailed:
                     ret.append(
-                        await self._to_rerank_model_reg(model_spec, is_builtin=is_builtin)
+                        await self._to_rerank_model_reg(
+                            model_spec, is_builtin=is_builtin
+                        )
                     )
                 else:
                     ret.append(
@@ -856,23 +905,29 @@ def sort_helper(item):
             for model_spec in get_flexible_models():
                 # Check if this model is persisted (added via add_model API)
                 from ..model.cache_manager import CacheManager
+
                 cache_manager = CacheManager(model_spec)
                 is_persisted_model = False
-                if hasattr(cache_manager, '_v2_custom_dir_prefix'):
+                if hasattr(cache_manager, "_v2_custom_dir_prefix"):
                     import os
+
                     potential_persist_path = os.path.join(
                         cache_manager._v2_custom_dir_prefix,
                         "flexible",
-                        f"{model_spec.model_name}.json"
+                        f"{model_spec.model_name}.json",
                     )
                     is_persisted_model = os.path.exists(potential_persist_path)
 
                 is_builtin = is_persisted_model  # Treat persisted models as built-in
-                logger.info(f"[DEBUG SUPERVISOR] Flexible model {model_spec.model_name} persisted: {is_persisted_model}, treating as builtin: {is_builtin}")
+                logger.info(
+                    f"[DEBUG SUPERVISOR] Flexible model {model_spec.model_name} persisted: {is_persisted_model}, treating as builtin: {is_builtin}"
+                )
 
                 if detailed:
                     ret.append(
-                        await self._to_flexible_model_reg(model_spec, is_builtin=is_builtin)
+                        await self._to_flexible_model_reg(
+                            model_spec, is_builtin=is_builtin
+                        )
                     )
                 else:
                     ret.append(
@@ -1063,260 +1118,13 @@ async def add_model(self, model_type: str, model_json: Dict[str, Any]):
             model_type: Type of model (LLM, embedding, image, etc.)
             model_json: JSON configuration for the model
         """
-        # Validate model type (with case normalization)
-        supported_types = list(self._custom_register_type_to_cls.keys())
-        logger.info(f"[DEBUG SUPERVISOR] Supported model types: {supported_types}")
-        logger.info(f"[DEBUG SUPERVISOR] Received model_type: '{model_type}'")
-
-        # Try to normalize case (only convert 'llm' -> 'LLM', keep others as is)
-        normalized_model_type = 'LLM' if model_type.lower() == 'llm' else model_type
-        logger.info(f"[DEBUG SUPERVISOR] Normalized model_type: '{normalized_model_type}'")
-
-        if normalized_model_type not in self._custom_register_type_to_cls:
-            logger.error(f"[DEBUG SUPERVISOR] Unsupported model type: {normalized_model_type} (original: {model_type})")
-            raise ValueError(
-                f"Unsupported model type '{model_type}'. "
-                f"Supported types are: {', '.join(supported_types)}"
-            )
-
-        # Use normalized model type for the rest of the function
-        model_type = normalized_model_type
-        logger.info(f"[DEBUG SUPERVISOR] Using model_type: '{model_type}' for registration")
-
-        # Get the appropriate model class and register function
-        (
-            model_spec_cls,
-            register_fn,
-            unregister_fn,
-            generate_fn,
-        ) = self._custom_register_type_to_cls[model_type]
-
-        # Validate required fields (only model_name is required)
-        required_fields = ["model_name"]
-        for field in required_fields:
-            if field not in model_json:
-                raise ValueError(f"Missing required field: {field}")
-        # Validate model name format
-        from ..model.utils import is_valid_model_name
-
-        model_name = model_json["model_name"]
-
-        if not is_valid_model_name(model_name):
-            raise ValueError(f"Invalid model name format: {model_name}")
-
-        # Convert model hub JSON format to Xinference expected format
-        try:
-            converted_model_json = self._convert_model_json_format(model_json)
-        except Exception as e:
-            raise ValueError(f"Failed to convert model JSON format: {str(e)}")
-
-        # Parse the JSON into the appropriate model spec
-        try:
-            model_spec = model_spec_cls.parse_obj(converted_model_json)
-        except Exception as e:
-            raise ValueError(f"Invalid model JSON format: {str(e)}")
-
-        # Check if model already exists
-        try:
-            existing_model = await self.get_model_registration(
-                model_type, model_spec.model_name
-            )
-
-            if existing_model is not None:
-                raise ValueError(
-                    f"Model '{model_spec.model_name}' already exists for type '{model_type}'. "
-                    f"Please choose a different model name or remove the existing model first."
-                )
-
-        except ValueError as e:
-            if "not found" in str(e):
-                # Model doesn't exist, we can proceed
-                pass
-            else:
-                # Re-raise validation errors
-                raise e
-        except Exception as ex:
-            raise ValueError(f"Failed to validate model registration: {str(ex)}")
-
-        # Register the model (persist=True for adding models)
-        try:
-            register_fn(model_spec, persist=True)
-
-            # Record model version
-            version_info = generate_fn(model_spec)
-            await self._cache_tracker_ref.record_model_version(
-                version_info, self.address
-            )
-
-            # Sync to workers if not local deployment
-            is_local = self.is_local_deployment()
-            if not is_local:
-                # Convert back to JSON string for sync compatibility
-                model_json_str = json.dumps(converted_model_json)
-                await self._sync_register_model(
-                    model_type, model_json_str, True, model_spec.model_name
-                )
-
-            logger.info(
-                f"Successfully added model '{model_spec.model_name}' (type: {model_type})"
-            )
-
-        except ValueError as e:
-            # Validation errors - don't need cleanup as model wasn't registered
-            raise e
-        except Exception as e:
-            # Unexpected errors - attempt cleanup
-            try:
-                unregister_fn(model_spec.model_name, raise_error=False)
-            except Exception as cleanup_error:
-                logger.warning(f"Cleanup failed: {cleanup_error}")
-            raise ValueError(
-                f"Failed to register model '{model_spec.model_name}': {str(e)}"
-            )
-
-    def _convert_model_json_format(self, model_json: Dict[str, Any]) -> Dict[str, Any]:
-        """
-        Convert model hub JSON format to Xinference expected format.
-
-        The input format uses nested 'model_src' structure, but Xinference expects
-        flattened fields at the spec level.
-
-        Also handles cases where model_specs field is missing by providing a default.
-        """
-        # If model_specs is missing, provide a default minimal spec
-        if "model_specs" not in model_json or not model_json["model_specs"]:
-            # Create a minimal default spec
-            return {
-                **model_json,
-                "model_specs": [
-                    {
-                        "model_format": "pytorch",
-                        "model_size_in_billions": None,
-                        "quantization": "none",
-                    }
-                ],
-            }
-
-        # Check if conversion is needed (detect model_src structure)
-        needs_conversion = False
-        for spec in model_json["model_specs"]:
-            if "model_src" in spec:
-                needs_conversion = True
-                break
-
-        if not needs_conversion:
-            return model_json
-
-        converted = model_json.copy()
-        converted_specs = []
-
-        for spec in model_json["model_specs"]:
-            model_format = spec.get("model_format", "pytorch")
-            model_size = spec.get("model_size_in_billions")
-
-            if "model_src" not in spec:
-                # No model_src, keep spec as is but ensure required fields
-                converted_spec = spec.copy()
-                if "quantization" not in converted_spec:
-                    converted_spec["quantization"] = "none"
-                if "model_format" not in converted_spec:
-                    converted_spec["model_format"] = "pytorch"
-                converted_specs.append(converted_spec)
-                continue
-
-            model_src = spec["model_src"]
-
-            # Handle different model sources
-            if "huggingface" in model_src:
-                hf_info = model_src["huggingface"]
-                quantizations = hf_info.get("quantizations", ["none"])
-
-                # Create separate specs for each quantization
-                for quant in quantizations:
-                    converted_spec = {
-                        "model_format": model_format,
-                        "model_size_in_billions": model_size,
-                        "quantization": quant,
-                        "model_hub": "huggingface",
-                    }
-
-                    # Add common fields
-                    if "model_id" in hf_info:
-                        converted_spec["model_id"] = hf_info["model_id"]
-                    if "model_revision" in hf_info:
-                        converted_spec["model_revision"] = hf_info["model_revision"]
-
-                    # Format-specific fields
-                    if model_format == "ggufv2":
-                        if "model_id" in hf_info:
-                            converted_spec["model_id"] = hf_info["model_id"]
-                        if "model_file_name_template" in hf_info:
-                            converted_spec["model_file_name_template"] = hf_info[
-                                "model_file_name_template"
-                            ]
-                        else:
-                            # Default template
-                            model_name = model_json["model_name"]
-                            converted_spec["model_file_name_template"] = (
-                                f"{model_name}-{{quantization}}.gguf"
-                            )
-                    elif model_format in ["pytorch", "mlx"]:
-                        if "model_id" in hf_info:
-                            converted_spec["model_id"] = hf_info["model_id"]
-                        if "model_revision" in hf_info:
-                            converted_spec["model_revision"] = hf_info["model_revision"]
-
-                    converted_specs.append(converted_spec)
-
-            elif "modelscope" in model_src:
-                # Handle ModelScope similarly
-                ms_info = model_src["modelscope"]
-                quantizations = ms_info.get("quantizations", ["none"])
-
-                for quant in quantizations:
-                    converted_spec = {
-                        "model_format": model_format,
-                        "model_size_in_billions": model_size,
-                        "quantization": quant,
-                        "model_hub": "modelscope",
-                    }
-
-                    if "model_id" in ms_info:
-                        converted_spec["model_id"] = ms_info["model_id"]
-                    if "model_revision" in ms_info:
-                        converted_spec["model_revision"] = ms_info["model_revision"]
-
-                    converted_specs.append(converted_spec)
-
-            else:
-                # Unknown model source, skip or handle as error
-                logger.warning(
-                    f"Unknown model source in spec: {list(model_src.keys())}"
-                )
-                # Keep original spec but add required fields
-                converted_spec = spec.copy()
-                if "quantization" not in converted_spec:
-                    converted_spec["quantization"] = "none"
-                if "model_format" not in converted_spec:
-                    converted_spec["model_format"] = "pytorch"
-                converted_specs.append(converted_spec)
-
-        converted["model_specs"] = converted_specs
-
-        return converted
-
-    @log_async(logger=logger)
-    async def add_model(self, model_type: str, model_json: Dict[str, Any]):
-        """
-        Add a new model by parsing the provided JSON and registering it.
-
-        Args:
-            model_type: Type of model (LLM, embedding, image, etc.)
-            model_json: JSON configuration for the model
-        """
-        logger.info(f"[DEBUG SUPERVISOR] add_model called with model_type: {model_type}")
+        logger.info(
+            f"[DEBUG SUPERVISOR] add_model called with model_type: {model_type}"
+        )
         logger.info(f"[DEBUG SUPERVISOR] model_json type: {type(model_json)}")
-        logger.info(f"[DEBUG SUPERVISOR] model_json keys: {list(model_json.keys()) if isinstance(model_json, dict) else 'Not a dict'}")
+        logger.info(
+            f"[DEBUG SUPERVISOR] model_json keys: {list(model_json.keys()) if isinstance(model_json, dict) else 'Not a dict'}"
+        )
         if isinstance(model_json, dict):
             logger.info(f"[DEBUG SUPERVISOR] model_json content: {model_json}")
 
@@ -1326,11 +1134,15 @@ async def add_model(self, model_type: str, model_json: Dict[str, Any]):
         logger.info(f"[DEBUG SUPERVISOR] Received model_type: '{model_type}'")
 
         # Try to normalize case (only convert 'llm' -> 'LLM', keep others as is)
-        normalized_model_type = 'LLM' if model_type.lower() == 'llm' else model_type
-        logger.info(f"[DEBUG SUPERVISOR] Normalized model_type: '{normalized_model_type}'")
+        normalized_model_type = "LLM" if model_type.lower() == "llm" else model_type
+        logger.info(
+            f"[DEBUG SUPERVISOR] Normalized model_type: '{normalized_model_type}'"
+        )
 
         if normalized_model_type not in self._custom_register_type_to_cls:
-            logger.error(f"[DEBUG SUPERVISOR] Unsupported model type: {normalized_model_type} (original: {model_type})")
+            logger.error(
+                f"[DEBUG SUPERVISOR] Unsupported model type: {normalized_model_type} (original: {model_type})"
+            )
             raise ValueError(
                 f"Unsupported model type '{model_type}'. "
                 f"Supported types are: {', '.join(supported_types)}"
@@ -1338,7 +1150,9 @@ async def add_model(self, model_type: str, model_json: Dict[str, Any]):
 
         # Use normalized model type for the rest of the function
         model_type = normalized_model_type
-        logger.info(f"[DEBUG SUPERVISOR] Using model_type: '{model_type}' for registration")
+        logger.info(
+            f"[DEBUG SUPERVISOR] Using model_type: '{model_type}' for registration"
+        )
 
         # Get the appropriate model class and register function
         (
@@ -1376,9 +1190,13 @@ async def add_model(self, model_type: str, model_json: Dict[str, Any]):
         logger.info(f"[DEBUG SUPERVISOR] Converting model JSON format...")
         try:
             converted_model_json = self._convert_model_json_format(model_json)
-            logger.info(f"[DEBUG SUPERVISOR] Converted model JSON: {converted_model_json}")
+            logger.info(
+                f"[DEBUG SUPERVISOR] Converted model JSON: {converted_model_json}"
+            )
         except Exception as e:
-            logger.error(f"[DEBUG SUPERVISOR] Format conversion failed: {str(e)}", exc_info=True)
+            logger.error(
+                f"[DEBUG SUPERVISOR] Format conversion failed: {str(e)}", exc_info=True
+            )
             raise ValueError(f"Failed to convert model JSON format: {str(e)}")
 
         # Parse the JSON into the appropriate model spec
@@ -1387,7 +1205,9 @@ async def add_model(self, model_type: str, model_json: Dict[str, Any]):
             model_spec = model_spec_cls.parse_obj(converted_model_json)
             logger.info(f"[DEBUG SUPERVISOR] Parsed model spec: {model_spec}")
         except Exception as e:
-            logger.error(f"[DEBUG SUPERVISOR] Model spec parsing failed: {str(e)}", exc_info=True)
+            logger.error(
+                f"[DEBUG SUPERVISOR] Model spec parsing failed: {str(e)}", exc_info=True
+            )
             raise ValueError(f"Invalid model JSON format: {str(e)}")
 
         # Check if model already exists
@@ -1396,10 +1216,14 @@ async def add_model(self, model_type: str, model_json: Dict[str, Any]):
             existing_model = await self.get_model_registration(
                 model_type, model_spec.model_name
             )
-            logger.info(f"[DEBUG SUPERVISOR] Existing model check result: {existing_model}")
+            logger.info(
+                f"[DEBUG SUPERVISOR] Existing model check result: {existing_model}"
+            )
 
             if existing_model is not None:
-                logger.error(f"[DEBUG SUPERVISOR] Model already exists: {model_spec.model_name}")
+                logger.error(
+                    f"[DEBUG SUPERVISOR] Model already exists: {model_spec.model_name}"
+                )
                 raise ValueError(
                     f"Model '{model_spec.model_name}' already exists for type '{model_type}'. "
                     f"Please choose a different model name or remove the existing model first."
@@ -1408,20 +1232,29 @@ async def add_model(self, model_type: str, model_json: Dict[str, Any]):
         except ValueError as e:
             if "not found" in str(e):
                 # Model doesn't exist, we can proceed
-                logger.info(f"[DEBUG SUPERVISOR] Model doesn't exist yet, proceeding with registration")
+                logger.info(
+                    f"[DEBUG SUPERVISOR] Model doesn't exist yet, proceeding with registration"
+                )
                 pass
             else:
                 # Re-raise validation errors
-                logger.error(f"[DEBUG SUPERVISOR] Validation error during model check: {str(e)}")
+                logger.error(
+                    f"[DEBUG SUPERVISOR] Validation error during model check: {str(e)}"
+                )
                 raise e
         except Exception as ex:
-            logger.error(f"[DEBUG SUPERVISOR] Unexpected error during model check: {str(ex)}", exc_info=True)
+            logger.error(
+                f"[DEBUG SUPERVISOR] Unexpected error during model check: {str(ex)}",
+                exc_info=True,
+            )
             raise ValueError(f"Failed to validate model registration: {str(ex)}")
 
         # Register the model (persist=True for adding models)
         logger.info(f"[DEBUG SUPERVISOR] Starting model registration...")
         try:
-            logger.info(f"[DEBUG SUPERVISOR] Calling register_fn with model_spec: {model_spec}, persist=True")
+            logger.info(
+                f"[DEBUG SUPERVISOR] Calling register_fn with model_spec: {model_spec}, persist=True"
+            )
             register_fn(model_spec, persist=True)
             logger.info(f"[DEBUG SUPERVISOR] register_fn completed successfully")
 
@@ -1430,7 +1263,9 @@ async def add_model(self, model_type: str, model_json: Dict[str, Any]):
             version_info = generate_fn(model_spec)
             logger.info(f"[DEBUG SUPERVISOR] Generated version_info: {version_info}")
 
-            logger.info(f"[DEBUG SUPERVISOR] Recording model version in cache tracker...")
+            logger.info(
+                f"[DEBUG SUPERVISOR] Recording model version in cache tracker..."
+            )
             await self._cache_tracker_ref.record_model_version(
                 version_info, self.address
             )
@@ -1458,7 +1293,10 @@ async def add_model(self, model_type: str, model_json: Dict[str, Any]):
             raise e
         except Exception as e:
             # Unexpected errors - attempt cleanup
-            logger.error(f"[DEBUG SUPERVISOR] Unexpected error during registration: {str(e)}", exc_info=True)
+            logger.error(
+                f"[DEBUG SUPERVISOR] Unexpected error during registration: {str(e)}",
+                exc_info=True,
+            )
             try:
                 logger.info(f"[DEBUG SUPERVISOR] Attempting cleanup...")
                 unregister_fn(model_spec.model_name, raise_error=False)
@@ -1485,7 +1323,9 @@ def _convert_model_json_format(self, model_json: Dict[str, Any]) -> Dict[str, An
 
         # If model_specs is missing, provide a default minimal spec
         if "model_specs" not in model_json or not model_json["model_specs"]:
-            logger.info(f"[DEBUG SUPERVISOR] model_specs missing or empty, creating default spec")
+            logger.info(
+                f"[DEBUG SUPERVISOR] model_specs missing or empty, creating default spec"
+            )
             # Create a minimal default spec
             default_spec = {
                 **model_json,
@@ -1500,19 +1340,25 @@ def _convert_model_json_format(self, model_json: Dict[str, Any]) -> Dict[str, An
             logger.info(f"[DEBUG SUPERVISOR] Created default spec: {default_spec}")
             return default_spec
 
-        logger.info(f"[DEBUG SUPERVISOR] Found model_specs: {model_json['model_specs']}")
+        logger.info(
+            f"[DEBUG SUPERVISOR] Found model_specs: {model_json['model_specs']}"
+        )
 
         # Check if conversion is needed (detect model_src structure)
         needs_conversion = False
         for i, spec in enumerate(model_json["model_specs"]):
             logger.info(f"[DEBUG SUPERVISOR] Checking spec {i}: {spec}")
             if "model_src" in spec:
-                logger.info(f"[DEBUG SUPERVISOR] Found model_src in spec {i}, conversion needed")
+                logger.info(
+                    f"[DEBUG SUPERVISOR] Found model_src in spec {i}, conversion needed"
+                )
                 needs_conversion = True
                 break
 
         if not needs_conversion:
-            logger.info(f"[DEBUG SUPERVISOR] No conversion needed, returning original model_json")
+            logger.info(
+                f"[DEBUG SUPERVISOR] No conversion needed, returning original model_json"
+            )
             return model_json
 
         converted = model_json.copy()

From bb029ad076da5de430d3fb14f1d503006b4cbe3c Mon Sep 17 00:00:00 2001
From: OliverBryant <2713999266@qq.com>
Date: Fri, 24 Oct 2025 15:45:56 +0800
Subject: [PATCH 13/25] Function 2: One-Click Model Update

---
 xinference/api/restful_api.py |  55 ++++++++++
 xinference/core/supervisor.py | 191 ++++++++++++++++++++++++++++++++++
 2 files changed, 246 insertions(+)

diff --git a/xinference/api/restful_api.py b/xinference/api/restful_api.py
index d382ee72d5..7045cd3b63 100644
--- a/xinference/api/restful_api.py
+++ b/xinference/api/restful_api.py
@@ -203,6 +203,10 @@ class AddModelRequest(BaseModel):
     model_json: Dict[str, Any]
 
 
+class UpdateModelRequest(BaseModel):
+    model_type: str
+
+
 class BuildGradioInterfaceRequest(BaseModel):
     model_type: str
     model_name: str
@@ -915,6 +919,16 @@ async def internal_exception_handler(request: Request, exc: Exception):
                 else None
             ),
         )
+        self._router.add_api_route(
+            "/v1/models/update_type",
+            self.update_model_type,
+            methods=["POST"],
+            dependencies=(
+                [Security(self._auth_service, scopes=["models:add"])]
+                if self.is_authenticated()
+                else None
+            ),
+        )
         self._router.add_api_route(
             "/v1/cache/models",
             self.list_cached_models,
@@ -3192,6 +3206,47 @@ async def add_model(self, request: Request) -> JSONResponse:
             content={"message": f"Model added successfully for type: {model_type}"}
         )
 
+    async def update_model_type(self, request: Request) -> JSONResponse:
+        try:
+            # Parse request
+            raw_json = await request.json()
+            logger.info(f"[DEBUG] Update model type API called with: {raw_json}")
+
+            body = UpdateModelRequest.parse_obj(raw_json)
+            model_type = body.model_type
+
+            logger.info(f"[DEBUG] Parsed model_type for update: {model_type}")
+
+            # Get supervisor reference
+            supervisor_ref = await self._get_supervisor_ref()
+
+            # Call supervisor to update model type
+            logger.info(
+                f"[DEBUG] Calling supervisor.update_model_type with model_type: {model_type}"
+            )
+            await supervisor_ref.update_model_type(model_type)
+            logger.info(f"[DEBUG] Supervisor.update_model_type completed successfully")
+
+        except ValueError as re:
+            logger.error(
+                f"[DEBUG] ValueError in update_model_type API: {re}", exc_info=True
+            )
+            raise HTTPException(status_code=400, detail=str(re))
+        except Exception as e:
+            logger.error(
+                f"[DEBUG] Unexpected error in update_model_type API: {e}", exc_info=True
+            )
+            raise HTTPException(status_code=500, detail=str(e))
+
+        logger.info(
+            f"[DEBUG] Update model type API completed successfully for model_type: {model_type}"
+        )
+        return JSONResponse(
+            content={
+                "message": f"Model configurations updated successfully for type: {model_type}"
+            }
+        )
+
     async def list_model_registrations(
         self, model_type: str, detailed: bool = Query(False)
     ) -> JSONResponse:
diff --git a/xinference/core/supervisor.py b/xinference/core/supervisor.py
index 9e676370ec..1a6b2e917b 100644
--- a/xinference/core/supervisor.py
+++ b/xinference/core/supervisor.py
@@ -1483,6 +1483,197 @@ async def _sync_register_model(
                 logger.warning(f"finish unregister model: {model} for {name}")
             raise e
 
+    @log_async(logger=logger)
+    async def update_model_type(self, model_type: str):
+        """
+        Update model configurations for a specific model type by downloading
+        the latest JSON from the remote API and storing it locally.
+
+        Args:
+            model_type: Type of model (LLM, embedding, image, etc.)
+        """
+        import json
+
+        import requests
+
+        logger.info(
+            f"[DEBUG SUPERVISOR] update_model_type called with model_type: {model_type}"
+        )
+
+        # Validate model type
+        normalized_model_type = "LLM" if model_type.lower() == "llm" else model_type
+        supported_types = list(self._custom_register_type_to_cls.keys())
+
+        if normalized_model_type not in supported_types:
+            logger.error(
+                f"[DEBUG SUPERVISOR] Unsupported model type: {normalized_model_type}"
+            )
+            raise ValueError(
+                f"Unsupported model type '{model_type}'. "
+                f"Supported types are: {', '.join(supported_types)}"
+            )
+
+        # Use normalized model type for the rest of the function
+        model_type = normalized_model_type
+        logger.info(f"[DEBUG SUPERVISOR] Using model_type: '{model_type}' for update")
+
+        # Construct the URL to download JSON
+        url = f"https://model.xinference.io/api/models/download?model_type={model_type.lower()}"
+        logger.info(f"[DEBUG SUPERVISOR] Downloading model configurations from: {url}")
+
+        try:
+            # Download JSON from remote API
+            response = requests.get(url, timeout=30)
+            response.raise_for_status()
+
+            # Parse JSON response
+            model_data = response.json()
+            logger.info(
+                f"[DEBUG SUPERVISOR] Successfully downloaded JSON for model type: {model_type}"
+            )
+            logger.info(f"[DEBUG SUPERVISOR] JSON data type: {type(model_data)}")
+
+            if isinstance(model_data, dict):
+                logger.info(
+                    f"[DEBUG SUPERVISOR] JSON data keys: {list(model_data.keys())}"
+                )
+            elif isinstance(model_data, list):
+                logger.info(
+                    f"[DEBUG SUPERVISOR] JSON data contains {len(model_data)} items"
+                )
+                if model_data:
+                    logger.info(
+                        f"[DEBUG SUPERVISOR] First item keys: {list(model_data[0].keys()) if isinstance(model_data[0], dict) else 'Not a dict'}"
+                    )
+
+            # Store the JSON data using CacheManager
+            logger.info(f"[DEBUG SUPERVISOR] Storing model configurations...")
+            await self._store_model_configurations(model_type, model_data)
+            logger.info(f"[DEBUG SUPERVISOR] Model configurations stored successfully")
+
+        except requests.exceptions.RequestException as e:
+            logger.error(
+                f"[DEBUG SUPERVISOR] Network error downloading model configurations: {e}"
+            )
+            raise ValueError(f"Failed to download model configurations: {str(e)}")
+        except json.JSONDecodeError as e:
+            logger.error(f"[DEBUG SUPERVISOR] JSON decode error: {e}")
+            raise ValueError(f"Invalid JSON response from remote API: {str(e)}")
+        except Exception as e:
+            logger.error(
+                f"[DEBUG SUPERVISOR] Unexpected error during model update: {e}",
+                exc_info=True,
+            )
+            raise ValueError(f"Failed to update model configurations: {str(e)}")
+
+    async def _store_model_configurations(self, model_type: str, model_data):
+        """
+        Store model configurations using the appropriate CacheManager.
+
+        Args:
+            model_type: Type of model
+            model_data: JSON data containing model configurations
+        """
+
+        logger.info(
+            f"[DEBUG SUPERVISOR] Storing configurations for model type: {model_type}"
+        )
+
+        try:
+            # Create a temporary model spec to get CacheManager instance
+            # We need to determine the appropriate model spec class for this model type
+            model_spec_cls, _, _, _ = self._custom_register_type_to_cls[model_type]
+
+            # Handle different response formats
+            if isinstance(model_data, dict):
+                # Single model configuration
+                logger.info(f"[DEBUG SUPERVISOR] Processing single model configuration")
+                await self._store_single_model_config(
+                    model_type, model_data, model_spec_cls
+                )
+            elif isinstance(model_data, list):
+                # Multiple model configurations
+                logger.info(
+                    f"[DEBUG SUPERVISOR] Processing {len(model_data)} model configurations"
+                )
+                for i, model_config in enumerate(model_data):
+                    if isinstance(model_config, dict):
+                        logger.info(f"[DEBUG SUPERVISOR] Processing model config {i+1}")
+                        await self._store_single_model_config(
+                            model_type, model_config, model_spec_cls
+                        )
+                    else:
+                        logger.warning(
+                            f"[DEBUG SUPERVISOR] Skipping invalid model config {i+1}: not a dict"
+                        )
+            else:
+                raise ValueError(
+                    f"Invalid model data format: expected dict or list, got {type(model_data)}"
+                )
+
+        except Exception as e:
+            logger.error(
+                f"[DEBUG SUPERVISOR] Error storing model configurations: {e}",
+                exc_info=True,
+            )
+            raise
+
+    async def _store_single_model_config(
+        self, model_type: str, model_config: dict, model_spec_cls
+    ):
+        """
+        Store a single model configuration.
+
+        Args:
+            model_type: Type of model
+            model_config: Single model configuration dictionary
+            model_spec_cls: Model specification class
+        """
+        from ..model.cache_manager import CacheManager
+
+        # Ensure required fields are present
+        if "model_name" not in model_config:
+            logger.warning(
+                f"[DEBUG SUPERVISOR] Skipping model config without model_name: {model_config}"
+            )
+            return
+
+        model_name = model_config["model_name"]
+        logger.info(f"[DEBUG SUPERVISOR] Storing model: {model_name}")
+
+        # Validate model name format
+        from ..model.utils import is_valid_model_name
+
+        if not is_valid_model_name(model_name):
+            logger.warning(
+                f"[DEBUG SUPERVISOR] Skipping model with invalid name: {model_name}"
+            )
+            return
+
+        try:
+            # Convert model hub JSON format to Xinference expected format
+            converted_config = self._convert_model_json_format(model_config)
+            logger.info(f"[DEBUG SUPERVISOR] Converted model config for: {model_name}")
+
+            # Create model spec instance
+            model_spec = model_spec_cls.parse_obj(converted_config)
+            logger.info(f"[DEBUG SUPERVISOR] Created model spec for: {model_name}")
+
+            # Create CacheManager and store the configuration
+            cache_manager = CacheManager(model_spec)
+            cache_manager.register_custom_model(model_type)
+            logger.info(
+                f"[DEBUG SUPERVISOR] Stored model configuration for: {model_name}"
+            )
+
+        except Exception as e:
+            logger.error(
+                f"[DEBUG SUPERVISOR] Error storing model {model_name}: {e}",
+                exc_info=True,
+            )
+            # Continue with other models instead of failing completely
+            return
+
     @log_async(logger=logger)
     async def unregister_model(self, model_type: str, model_name: str):
         if model_type in self._custom_register_type_to_cls:

From d332f4bdc6deb65460c42f4f609d5899a3ba6d2e Mon Sep 17 00:00:00 2001
From: yiboyasss <3359595624@qq.com>
Date: Fri, 24 Oct 2025 18:18:02 +0800
Subject: [PATCH 14/25] fix: frontend

---
 .../ui/src/scenes/launch_model/LaunchModel.js | 867 +++++++++---------
 .../launch_model/components/addModelDialog.js |  16 +-
 .../web/ui/src/scenes/launch_model/index.js   |  56 +-
 3 files changed, 496 insertions(+), 443 deletions(-)

diff --git a/xinference/ui/web/ui/src/scenes/launch_model/LaunchModel.js b/xinference/ui/web/ui/src/scenes/launch_model/LaunchModel.js
index cba7bf9a65..623a122b6d 100644
--- a/xinference/ui/web/ui/src/scenes/launch_model/LaunchModel.js
+++ b/xinference/ui/web/ui/src/scenes/launch_model/LaunchModel.js
@@ -10,9 +10,11 @@ import {
   Select,
 } from '@mui/material'
 import React, {
+  forwardRef,
   useCallback,
   useContext,
   useEffect,
+  useImperativeHandle,
   useRef,
   useState,
 } from 'react'
@@ -28,494 +30,507 @@ import ModelCard from './modelCard'
 // Toggle pagination globally for this page. Set to false to disable pagination and load all items.
 const ENABLE_PAGINATION = false
 
-const LaunchModelComponent = ({ modelType, gpuAvailable, featureModels }) => {
-  const { isCallingApi, setIsCallingApi, endPoint } = useContext(ApiContext)
-  const { isUpdatingModel } = useContext(ApiContext)
-  const { setErrorMsg } = useContext(ApiContext)
-  const [cookie] = useCookies(['token'])
-
-  const [registrationData, setRegistrationData] = useState([])
-  // States used for filtering
-  const [searchTerm, setSearchTerm] = useState('')
-  const [status, setStatus] = useState('')
-  const [statusArr, setStatusArr] = useState([])
-  const [collectionArr, setCollectionArr] = useState([])
-  const [filterArr, setFilterArr] = useState([])
-  const { t } = useTranslation()
-  const [modelListType, setModelListType] = useState('featured')
-  const [modelAbilityData, setModelAbilityData] = useState({
-    type: modelType,
-    modelAbility: '',
-    options: [],
-  })
-  const [selectedModel, setSelectedModel] = useState(null)
-  const [isOpenLaunchModelDrawer, setIsOpenLaunchModelDrawer] = useState(false)
-
-  // Pagination status
-  const [displayedData, setDisplayedData] = useState([])
-  const [currentPage, setCurrentPage] = useState(1)
-  const [hasMore, setHasMore] = useState(true)
-  const itemsPerPage = 20
-  const loaderRef = useRef(null)
-
-  const filter = useCallback(
-    (registration) => {
-      if (searchTerm !== '') {
-        if (!registration || typeof searchTerm !== 'string') return false
-        const modelName = registration.model_name
-          ? registration.model_name.toLowerCase()
-          : ''
-        const modelDescription = registration.model_description
-          ? registration.model_description.toLowerCase()
-          : ''
+const LaunchModelComponent = forwardRef(
+  ({ modelType, gpuAvailable, featureModels }, ref) => {
+    const { isCallingApi, setIsCallingApi, endPoint } = useContext(ApiContext)
+    const { isUpdatingModel } = useContext(ApiContext)
+    const { setErrorMsg } = useContext(ApiContext)
+    const [cookie] = useCookies(['token'])
+
+    const [registrationData, setRegistrationData] = useState([])
+    // States used for filtering
+    const [searchTerm, setSearchTerm] = useState('')
+    const [status, setStatus] = useState('')
+    const [statusArr, setStatusArr] = useState([])
+    const [collectionArr, setCollectionArr] = useState([])
+    const [filterArr, setFilterArr] = useState([])
+    const { t } = useTranslation()
+    const [modelListType, setModelListType] = useState('featured')
+    const [modelAbilityData, setModelAbilityData] = useState({
+      type: modelType,
+      modelAbility: '',
+      options: [],
+    })
+    const [selectedModel, setSelectedModel] = useState(null)
+    const [isOpenLaunchModelDrawer, setIsOpenLaunchModelDrawer] =
+      useState(false)
+
+    // Pagination status
+    const [displayedData, setDisplayedData] = useState([])
+    const [currentPage, setCurrentPage] = useState(1)
+    const [hasMore, setHasMore] = useState(true)
+    const itemsPerPage = 20
+    const loaderRef = useRef(null)
+
+    const filter = useCallback(
+      (registration) => {
+        if (searchTerm !== '') {
+          if (!registration || typeof searchTerm !== 'string') return false
+          const modelName = registration.model_name
+            ? registration.model_name.toLowerCase()
+            : ''
+          const modelDescription = registration.model_description
+            ? registration.model_description.toLowerCase()
+            : ''
+
+          if (
+            !modelName.includes(searchTerm.toLowerCase()) &&
+            !modelDescription.includes(searchTerm.toLowerCase())
+          ) {
+            return false
+          }
+        }
 
-        if (
-          !modelName.includes(searchTerm.toLowerCase()) &&
-          !modelDescription.includes(searchTerm.toLowerCase())
-        ) {
-          return false
+        if (modelListType === 'featured') {
+          if (
+            featureModels.length &&
+            !featureModels.includes(registration.model_name) &&
+            !collectionArr?.includes(registration.model_name)
+          ) {
+            return false
+          }
         }
-      }
 
-      if (modelListType === 'featured') {
         if (
-          featureModels.length &&
-          !featureModels.includes(registration.model_name) &&
-          !collectionArr?.includes(registration.model_name)
-        ) {
+          modelAbilityData.modelAbility &&
+          ((Array.isArray(registration.model_ability) &&
+            registration.model_ability.indexOf(modelAbilityData.modelAbility) <
+              0) ||
+            (typeof registration.model_ability === 'string' &&
+              registration.model_ability !== modelAbilityData.modelAbility))
+        )
           return false
-        }
-      }
 
-      if (
-        modelAbilityData.modelAbility &&
-        ((Array.isArray(registration.model_ability) &&
-          registration.model_ability.indexOf(modelAbilityData.modelAbility) <
-            0) ||
-          (typeof registration.model_ability === 'string' &&
-            registration.model_ability !== modelAbilityData.modelAbility))
-      )
-        return false
-
-      if (statusArr.length === 1) {
-        if (statusArr[0] === 'cached') {
+        if (statusArr.length === 1) {
+          if (statusArr[0] === 'cached') {
+            const judge =
+              registration.model_specs?.some((spec) => filterCache(spec)) ||
+              registration?.cache_status
+            return judge
+          } else {
+            return collectionArr?.includes(registration.model_name)
+          }
+        } else if (statusArr.length > 1) {
           const judge =
             registration.model_specs?.some((spec) => filterCache(spec)) ||
             registration?.cache_status
-          return judge
-        } else {
-          return collectionArr?.includes(registration.model_name)
+          return judge && collectionArr?.includes(registration.model_name)
         }
-      } else if (statusArr.length > 1) {
-        const judge =
-          registration.model_specs?.some((spec) => filterCache(spec)) ||
-          registration?.cache_status
-        return judge && collectionArr?.includes(registration.model_name)
-      }
 
-      return true
-    },
-    [
-      searchTerm,
-      modelListType,
-      featureModels,
-      collectionArr,
-      modelAbilityData.modelAbility,
-      statusArr,
-    ]
-  )
-
-  const filterCache = useCallback((spec) => {
-    if (Array.isArray(spec.cache_status)) {
-      return spec.cache_status?.some((cs) => cs)
-    } else {
-      return spec.cache_status === true
-    }
-  }, [])
-
-  function getUniqueModelAbilities(arr) {
-    const uniqueAbilities = new Set()
+        return true
+      },
+      [
+        searchTerm,
+        modelListType,
+        featureModels,
+        collectionArr,
+        modelAbilityData.modelAbility,
+        statusArr,
+      ]
+    )
 
-    arr.forEach((item) => {
-      if (Array.isArray(item.model_ability)) {
-        item.model_ability.forEach((ability) => {
-          uniqueAbilities.add(ability)
-        })
+    const filterCache = useCallback((spec) => {
+      if (Array.isArray(spec.cache_status)) {
+        return spec.cache_status?.some((cs) => cs)
+      } else {
+        return spec.cache_status === true
       }
-    })
+    }, [])
 
-    return Array.from(uniqueAbilities)
-  }
+    function getUniqueModelAbilities(arr) {
+      const uniqueAbilities = new Set()
 
-  const update = () => {
-    if (
-      isCallingApi ||
-      isUpdatingModel ||
-      (cookie.token !== 'no_auth' && !sessionStorage.getItem('token'))
-    )
-      return
-
-    try {
-      setIsCallingApi(true)
-
-      fetchWrapper
-        .get(`/v1/model_registrations/${modelType}?detailed=true`)
-        .then((data) => {
-          const builtinRegistrations = data.filter((v) => v.is_builtin)
-          setModelAbilityData({
-            ...modelAbilityData,
-            options: getUniqueModelAbilities(builtinRegistrations),
+      arr.forEach((item) => {
+        if (Array.isArray(item.model_ability)) {
+          item.model_ability.forEach((ability) => {
+            uniqueAbilities.add(ability)
           })
-          setRegistrationData(builtinRegistrations)
-          const collectionData = JSON.parse(
-            localStorage.getItem('collectionArr')
-          )
-          setCollectionArr(collectionData)
+        }
+      })
 
-          // Reset pagination status
-          setCurrentPage(1)
-          setHasMore(true)
-        })
-        .catch((error) => {
-          console.error('Error:', error)
-          if (error.response.status !== 403 && error.response.status !== 401) {
-            setErrorMsg(error.message)
-          }
-        })
-    } catch (error) {
-      console.error('Error:', error)
-    } finally {
-      setIsCallingApi(false)
+      return Array.from(uniqueAbilities)
     }
-  }
 
-  useEffect(() => {
-    update()
-  }, [cookie.token])
+    const update = () => {
+      if (
+        isCallingApi ||
+        isUpdatingModel ||
+        (cookie.token !== 'no_auth' && !sessionStorage.getItem('token'))
+      )
+        return
+
+      try {
+        setIsCallingApi(true)
+
+        fetchWrapper
+          .get(`/v1/model_registrations/${modelType}?detailed=true`)
+          .then((data) => {
+            const builtinRegistrations = data.filter((v) => v.is_builtin)
+            setModelAbilityData({
+              ...modelAbilityData,
+              options: getUniqueModelAbilities(builtinRegistrations),
+            })
+            setRegistrationData(builtinRegistrations)
+            const collectionData = JSON.parse(
+              localStorage.getItem('collectionArr')
+            )
+            setCollectionArr(collectionData)
+
+            // Reset pagination status
+            setCurrentPage(1)
+            setHasMore(true)
+          })
+          .catch((error) => {
+            console.error('Error:', error)
+            if (
+              error.response.status !== 403 &&
+              error.response.status !== 401
+            ) {
+              setErrorMsg(error.message)
+            }
+          })
+      } catch (error) {
+        console.error('Error:', error)
+      } finally {
+        setIsCallingApi(false)
+      }
+    }
 
-  // Update pagination data
-  const updateDisplayedData = useCallback(() => {
-    const filteredData = registrationData.filter((registration) =>
-      filter(registration)
-    )
+    useEffect(() => {
+      update()
+    }, [cookie.token])
 
-    const sortedData = [...filteredData].sort((a, b) => {
-      if (modelListType === 'featured') {
-        const indexA = featureModels.indexOf(a.model_name)
-        const indexB = featureModels.indexOf(b.model_name)
-        return (
-          (indexA !== -1 ? indexA : Infinity) -
-          (indexB !== -1 ? indexB : Infinity)
-        )
+    // Update pagination data
+    const updateDisplayedData = useCallback(() => {
+      const filteredData = registrationData.filter((registration) =>
+        filter(registration)
+      )
+
+      const sortedData = [...filteredData].sort((a, b) => {
+        if (modelListType === 'featured') {
+          const indexA = featureModels.indexOf(a.model_name)
+          const indexB = featureModels.indexOf(b.model_name)
+          return (
+            (indexA !== -1 ? indexA : Infinity) -
+            (indexB !== -1 ? indexB : Infinity)
+          )
+        }
+        return 0
+      })
+
+      // If pagination is disabled, show all data at once
+      if (!ENABLE_PAGINATION) {
+        setDisplayedData(sortedData)
+        setHasMore(false)
+        return
       }
-      return 0
-    })
 
-    // If pagination is disabled, show all data at once
-    if (!ENABLE_PAGINATION) {
-      setDisplayedData(sortedData)
-      setHasMore(false)
-      return
-    }
+      const startIndex = (currentPage - 1) * itemsPerPage
+      const endIndex = currentPage * itemsPerPage
+      const newData = sortedData.slice(startIndex, endIndex)
 
-    const startIndex = (currentPage - 1) * itemsPerPage
-    const endIndex = currentPage * itemsPerPage
-    const newData = sortedData.slice(startIndex, endIndex)
+      if (currentPage === 1) {
+        setDisplayedData(newData)
+      } else {
+        setDisplayedData((prev) => [...prev, ...newData])
+      }
+      setHasMore(endIndex < sortedData.length)
+    }, [
+      registrationData,
+      filter,
+      modelListType,
+      featureModels,
+      currentPage,
+      itemsPerPage,
+    ])
 
-    if (currentPage === 1) {
-      setDisplayedData(newData)
-    } else {
-      setDisplayedData((prev) => [...prev, ...newData])
-    }
-    setHasMore(endIndex < sortedData.length)
-  }, [
-    registrationData,
-    filter,
-    modelListType,
-    featureModels,
-    currentPage,
-    itemsPerPage,
-  ])
-
-  useEffect(() => {
-    updateDisplayedData()
-  }, [updateDisplayedData])
-
-  // Reset pagination when filters change
-  useEffect(() => {
-    setCurrentPage(1)
-    setHasMore(true)
-  }, [searchTerm, modelAbilityData.modelAbility, status, modelListType])
-
-  // Infinite scroll observer
-  useEffect(() => {
-    if (!ENABLE_PAGINATION) return
-
-    const observer = new IntersectionObserver(
-      (entries) => {
-        if (entries[0].isIntersecting && hasMore && !isCallingApi) {
-          setCurrentPage((prev) => prev + 1)
-        }
-      },
-      { threshold: 1.0 }
-    )
+    useEffect(() => {
+      updateDisplayedData()
+    }, [updateDisplayedData])
 
-    if (loaderRef.current) {
-      observer.observe(loaderRef.current)
-    }
+    // Reset pagination when filters change
+    useEffect(() => {
+      setCurrentPage(1)
+      setHasMore(true)
+    }, [searchTerm, modelAbilityData.modelAbility, status, modelListType])
+
+    // Infinite scroll observer
+    useEffect(() => {
+      if (!ENABLE_PAGINATION) return
+
+      const observer = new IntersectionObserver(
+        (entries) => {
+          if (entries[0].isIntersecting && hasMore && !isCallingApi) {
+            setCurrentPage((prev) => prev + 1)
+          }
+        },
+        { threshold: 1.0 }
+      )
 
-    return () => {
       if (loaderRef.current) {
-        observer.unobserve(loaderRef.current)
+        observer.observe(loaderRef.current)
       }
-    }
-  }, [hasMore, isCallingApi, currentPage])
 
-  const getCollectionArr = (data) => {
-    setCollectionArr(data)
-  }
+      return () => {
+        if (loaderRef.current) {
+          observer.unobserve(loaderRef.current)
+        }
+      }
+    }, [hasMore, isCallingApi, currentPage])
 
-  const handleChangeFilter = (type, value) => {
-    const typeMap = {
-      modelAbility: {
-        setter: (value) => {
-          setModelAbilityData({
-            ...modelAbilityData,
-            modelAbility: value,
-          })
-        },
-        filterArr: modelAbilityData.options,
-      },
-      status: { setter: setStatus, filterArr: [] },
+    const getCollectionArr = (data) => {
+      setCollectionArr(data)
     }
 
-    const { setter, filterArr: excludeArr } = typeMap[type] || {}
-    if (!setter) return
+    const handleChangeFilter = (type, value) => {
+      const typeMap = {
+        modelAbility: {
+          setter: (value) => {
+            setModelAbilityData({
+              ...modelAbilityData,
+              modelAbility: value,
+            })
+          },
+          filterArr: modelAbilityData.options,
+        },
+        status: { setter: setStatus, filterArr: [] },
+      }
 
-    setter(value)
+      const { setter, filterArr: excludeArr } = typeMap[type] || {}
+      if (!setter) return
 
-    const updatedFilterArr = Array.from(
-      new Set([
-        ...filterArr.filter((item) => !excludeArr.includes(item)),
-        value,
-      ])
-    )
+      setter(value)
+
+      const updatedFilterArr = Array.from(
+        new Set([
+          ...filterArr.filter((item) => !excludeArr.includes(item)),
+          value,
+        ])
+      )
 
-    setFilterArr(updatedFilterArr)
+      setFilterArr(updatedFilterArr)
 
-    if (type === 'status') {
-      setStatusArr(
-        updatedFilterArr.filter(
-          (item) => ![...modelAbilityData.options].includes(item)
+      if (type === 'status') {
+        setStatusArr(
+          updatedFilterArr.filter(
+            (item) => ![...modelAbilityData.options].includes(item)
+          )
         )
-      )
-    }
+      }
 
-    // Reset pagination status
-    setDisplayedData([])
-    setCurrentPage(1)
-    setHasMore(true)
-  }
+      // Reset pagination status
+      setDisplayedData([])
+      setCurrentPage(1)
+      setHasMore(true)
+    }
 
-  const handleDeleteChip = (item) => {
-    setFilterArr(
-      filterArr.filter((subItem) => {
-        return subItem !== item
-      })
-    )
-    if (item === modelAbilityData.modelAbility) {
-      setModelAbilityData({
-        ...modelAbilityData,
-        modelAbility: '',
-      })
-    } else {
-      setStatusArr(
-        statusArr.filter((subItem) => {
+    const handleDeleteChip = (item) => {
+      setFilterArr(
+        filterArr.filter((subItem) => {
           return subItem !== item
         })
       )
-      if (item === status) setStatus('')
-    }
-
-    // Reset pagination status
-    setCurrentPage(1)
-    setHasMore(true)
-  }
-
-  const handleModelType = (newModelType) => {
-    if (newModelType !== null) {
-      setModelListType(newModelType)
+      if (item === modelAbilityData.modelAbility) {
+        setModelAbilityData({
+          ...modelAbilityData,
+          modelAbility: '',
+        })
+      } else {
+        setStatusArr(
+          statusArr.filter((subItem) => {
+            return subItem !== item
+          })
+        )
+        if (item === status) setStatus('')
+      }
 
       // Reset pagination status
-      setDisplayedData([])
       setCurrentPage(1)
       setHasMore(true)
     }
-  }
 
-  function getLabel(item) {
-    const translation = t(`launchModel.${item}`)
-    return translation === `launchModel.${item}` ? item : translation
-  }
+    const handleModelType = (newModelType) => {
+      if (newModelType !== null) {
+        setModelListType(newModelType)
 
-  return (
-    <Box m="20px">
-      <div
-        style={{
-          display: 'grid',
-          gridTemplateColumns: (() => {
-            const hasAbility = modelAbilityData.options.length > 0
-            const hasFeature = featureModels.length > 0
-
-            const baseColumns = hasAbility ? ['200px', '150px'] : ['200px']
-            const altColumns = hasAbility ? ['150px', '150px'] : ['150px']
-
-            const columns = hasFeature
-              ? [...baseColumns, '150px', '1fr']
-              : [...altColumns, '1fr']
-
-            return columns.join(' ')
-          })(),
-          columnGap: '20px',
-          margin: '30px 2rem',
-          alignItems: 'center',
-        }}
-      >
-        {featureModels.length > 0 && (
-          <FormControl sx={{ minWidth: 120 }} size="small">
-            <ButtonGroup>
-              <Button
-                fullWidth
-                onClick={() => handleModelType('featured')}
-                variant={
-                  modelListType === 'featured' ? 'contained' : 'outlined'
+        // Reset pagination status
+        setDisplayedData([])
+        setCurrentPage(1)
+        setHasMore(true)
+      }
+    }
+
+    function getLabel(item) {
+      const translation = t(`launchModel.${item}`)
+      return translation === `launchModel.${item}` ? item : translation
+    }
+
+    useImperativeHandle(ref, () => ({
+      update,
+    }))
+    return (
+      <Box m="20px">
+        <div
+          style={{
+            display: 'grid',
+            gridTemplateColumns: (() => {
+              const hasAbility = modelAbilityData.options.length > 0
+              const hasFeature = featureModels.length > 0
+
+              const baseColumns = hasAbility ? ['200px', '150px'] : ['200px']
+              const altColumns = hasAbility ? ['150px', '150px'] : ['150px']
+
+              const columns = hasFeature
+                ? [...baseColumns, '150px', '1fr']
+                : [...altColumns, '1fr']
+
+              return columns.join(' ')
+            })(),
+            columnGap: '20px',
+            margin: '30px 2rem',
+            alignItems: 'center',
+          }}
+        >
+          {featureModels.length > 0 && (
+            <FormControl sx={{ minWidth: 120 }} size="small">
+              <ButtonGroup>
+                <Button
+                  fullWidth
+                  onClick={() => handleModelType('featured')}
+                  variant={
+                    modelListType === 'featured' ? 'contained' : 'outlined'
+                  }
+                >
+                  {t('launchModel.featured')}
+                </Button>
+                <Button
+                  fullWidth
+                  onClick={() => handleModelType('all')}
+                  variant={modelListType === 'all' ? 'contained' : 'outlined'}
+                >
+                  {t('launchModel.all')}
+                </Button>
+              </ButtonGroup>
+            </FormControl>
+          )}
+          {modelAbilityData.options.length > 0 && (
+            <FormControl sx={{ minWidth: 120 }} size="small">
+              <InputLabel id="ability-select-label">
+                {t('launchModel.modelAbility')}
+              </InputLabel>
+              <Select
+                id="ability"
+                labelId="ability-select-label"
+                label="Model Ability"
+                onChange={(e) =>
+                  handleChangeFilter('modelAbility', e.target.value)
                 }
+                value={modelAbilityData.modelAbility}
+                size="small"
+                sx={{ width: '150px' }}
               >
-                {t('launchModel.featured')}
-              </Button>
-              <Button
-                fullWidth
-                onClick={() => handleModelType('all')}
-                variant={modelListType === 'all' ? 'contained' : 'outlined'}
-              >
-                {t('launchModel.all')}
-              </Button>
-            </ButtonGroup>
-          </FormControl>
-        )}
-        {modelAbilityData.options.length > 0 && (
+                {modelAbilityData.options.map((item) => (
+                  <MenuItem key={item} value={item}>
+                    {getLabel(item)}
+                  </MenuItem>
+                ))}
+              </Select>
+            </FormControl>
+          )}
           <FormControl sx={{ minWidth: 120 }} size="small">
-            <InputLabel id="ability-select-label">
-              {t('launchModel.modelAbility')}
+            <InputLabel id="select-status">
+              {t('launchModel.status')}
             </InputLabel>
             <Select
-              id="ability"
-              labelId="ability-select-label"
-              label="Model Ability"
-              onChange={(e) =>
-                handleChangeFilter('modelAbility', e.target.value)
-              }
-              value={modelAbilityData.modelAbility}
+              id="status"
+              labelId="select-status"
+              label={t('launchModel.status')}
+              onChange={(e) => handleChangeFilter('status', e.target.value)}
+              value={status}
               size="small"
               sx={{ width: '150px' }}
             >
-              {modelAbilityData.options.map((item) => (
-                <MenuItem key={item} value={item}>
-                  {getLabel(item)}
-                </MenuItem>
-              ))}
+              <MenuItem value="cached">{t('launchModel.cached')}</MenuItem>
+              <MenuItem value="favorite">{t('launchModel.favorite')}</MenuItem>
             </Select>
           </FormControl>
-        )}
-        <FormControl sx={{ minWidth: 120 }} size="small">
-          <InputLabel id="select-status">{t('launchModel.status')}</InputLabel>
-          <Select
-            id="status"
-            labelId="select-status"
-            label={t('launchModel.status')}
-            onChange={(e) => handleChangeFilter('status', e.target.value)}
-            value={status}
-            size="small"
-            sx={{ width: '150px' }}
-          >
-            <MenuItem value="cached">{t('launchModel.cached')}</MenuItem>
-            <MenuItem value="favorite">{t('launchModel.favorite')}</MenuItem>
-          </Select>
-        </FormControl>
-
-        <FormControl sx={{ marginTop: 1 }} variant="outlined" margin="normal">
-          <HotkeyFocusTextField
-            id="search"
-            type="search"
-            label={t('launchModel.search')}
-            value={searchTerm}
-            onChange={(e) => {
-              setSearchTerm(e.target.value)
-            }}
-            size="small"
-            hotkey="Enter"
-            t={t}
-          />
-        </FormControl>
-      </div>
-      <div style={{ margin: '0 0 30px 30px' }}>
-        {filterArr.map((item, index) => (
-          <Chip
-            key={index}
-            label={getLabel(item)}
-            variant="outlined"
-            size="small"
-            color="primary"
-            style={{ marginRight: 10 }}
-            onDelete={() => handleDeleteChip(item)}
-          />
-        ))}
-      </div>
-      <div
-        style={{
-          display: 'grid',
-          gridTemplateColumns: 'repeat(auto-fill, minmax(300px, 1fr))',
-          paddingLeft: '2rem',
-          gridGap: '2rem 0rem',
-        }}
-      >
-        {displayedData.map((filteredRegistration) => (
-          <ModelCard
-            key={filteredRegistration.model_name}
-            url={endPoint}
-            modelData={filteredRegistration}
-            gpuAvailable={gpuAvailable}
+
+          <FormControl sx={{ marginTop: 1 }} variant="outlined" margin="normal">
+            <HotkeyFocusTextField
+              id="search"
+              type="search"
+              label={t('launchModel.search')}
+              value={searchTerm}
+              onChange={(e) => {
+                setSearchTerm(e.target.value)
+              }}
+              size="small"
+              hotkey="Enter"
+              t={t}
+            />
+          </FormControl>
+        </div>
+        <div style={{ margin: '0 0 30px 30px' }}>
+          {filterArr.map((item, index) => (
+            <Chip
+              key={index}
+              label={getLabel(item)}
+              variant="outlined"
+              size="small"
+              color="primary"
+              style={{ marginRight: 10 }}
+              onDelete={() => handleDeleteChip(item)}
+            />
+          ))}
+        </div>
+        <div
+          style={{
+            display: 'grid',
+            gridTemplateColumns: 'repeat(auto-fill, minmax(300px, 1fr))',
+            paddingLeft: '2rem',
+            gridGap: '2rem 0rem',
+          }}
+        >
+          {displayedData.map((filteredRegistration) => (
+            <ModelCard
+              key={filteredRegistration.model_name}
+              url={endPoint}
+              modelData={filteredRegistration}
+              gpuAvailable={gpuAvailable}
+              modelType={modelType}
+              onGetCollectionArr={getCollectionArr}
+              onUpdate={update}
+              onClick={() => {
+                setSelectedModel(filteredRegistration)
+                setIsOpenLaunchModelDrawer(true)
+              }}
+            />
+          ))}
+        </div>
+
+        <div ref={loaderRef} style={{ height: '20px', margin: '20px 0' }}>
+          {ENABLE_PAGINATION && hasMore && !isCallingApi && (
+            <div style={{ textAlign: 'center', padding: '10px' }}>
+              <CircularProgress />
+            </div>
+          )}
+        </div>
+
+        {selectedModel && (
+          <LaunchModelDrawer
+            key={selectedModel.model_name}
+            modelData={selectedModel}
             modelType={modelType}
-            onGetCollectionArr={getCollectionArr}
-            onUpdate={update}
-            onClick={() => {
-              setSelectedModel(filteredRegistration)
-              setIsOpenLaunchModelDrawer(true)
-            }}
+            gpuAvailable={gpuAvailable}
+            open={isOpenLaunchModelDrawer}
+            onClose={() => setIsOpenLaunchModelDrawer(false)}
           />
-        ))}
-      </div>
-
-      <div ref={loaderRef} style={{ height: '20px', margin: '20px 0' }}>
-        {ENABLE_PAGINATION && hasMore && !isCallingApi && (
-          <div style={{ textAlign: 'center', padding: '10px' }}>
-            <CircularProgress />
-          </div>
         )}
-      </div>
-
-      {selectedModel && (
-        <LaunchModelDrawer
-          key={selectedModel.model_name}
-          modelData={selectedModel}
-          modelType={modelType}
-          gpuAvailable={gpuAvailable}
-          open={isOpenLaunchModelDrawer}
-          onClose={() => setIsOpenLaunchModelDrawer(false)}
-        />
-      )}
-    </Box>
-  )
-}
+      </Box>
+    )
+  }
+)
+
+LaunchModelComponent.displayName = 'LaunchModelComponent'
 
 export default LaunchModelComponent
diff --git a/xinference/ui/web/ui/src/scenes/launch_model/components/addModelDialog.js b/xinference/ui/web/ui/src/scenes/launch_model/components/addModelDialog.js
index f72c3bfc75..be73726963 100644
--- a/xinference/ui/web/ui/src/scenes/launch_model/components/addModelDialog.js
+++ b/xinference/ui/web/ui/src/scenes/launch_model/components/addModelDialog.js
@@ -13,7 +13,7 @@ import { ApiContext } from '../../../components/apiContext'
 
 const API_BASE_URL = 'https://model.xinference.io'
 
-function AddModelDialog({ open, onClose }) {
+const AddModelDialog = ({ open, onClose, onUpdateList }) => {
   const { t } = useTranslation()
   const [modelName, setModelName] = useState('')
   const [loading, setLoading] = useState(false)
@@ -88,19 +88,15 @@ function AddModelDialog({ open, onClose }) {
       const res = await fetch(endPoint + '/v1/models/add', {
         method: 'POST',
         headers: { 'Content-Type': 'application/json' },
-        body: JSON.stringify({ model_type: modelType, model_json: modelJson }),
+        body: JSON.stringify(modelJson),
       })
       const rawText = await res.text().catch(() => '')
       if (!res.ok) {
         setErrorMsg(rawText || `HTTP ${res.status}`)
         return
       }
-      try {
-        const data = JSON.parse(rawText)
-        console.log('本地 /v1/models/add 响应:', data)
-      } catch {
-        console.log('本地 /v1/models/add 原始响应:', rawText)
-      }
+      onClose(`/launch_model/${modelType}`)
+      onUpdateList(modelType)
     } catch (error) {
       console.error('Error:', error)
       if (error?.response?.status !== 403) {
@@ -133,7 +129,7 @@ function AddModelDialog({ open, onClose }) {
   }
 
   return (
-    <Dialog open={open} onClose={onClose} width={500}>
+    <Dialog open={open} onClose={() => onClose()} width={500}>
       <DialogTitle>{t('launchModel.addModel')}</DialogTitle>
       <DialogContent>
         <div
@@ -177,7 +173,7 @@ function AddModelDialog({ open, onClose }) {
         </div>
       </DialogContent>
       <DialogActions>
-        <Button onClick={onClose} disabled={loading}>
+        <Button onClick={() => onClose()} disabled={loading}>
           {t('launchModel.cancel')}
         </Button>
         <Button
diff --git a/xinference/ui/web/ui/src/scenes/launch_model/index.js b/xinference/ui/web/ui/src/scenes/launch_model/index.js
index 9ee2ceaf7e..4ac6cff612 100644
--- a/xinference/ui/web/ui/src/scenes/launch_model/index.js
+++ b/xinference/ui/web/ui/src/scenes/launch_model/index.js
@@ -1,7 +1,7 @@
 import Add from '@mui/icons-material/Add'
 import { LoadingButton, TabContext, TabList, TabPanel } from '@mui/lab'
 import { Box, Button, MenuItem, Select, Tab } from '@mui/material'
-import React, { useContext, useEffect, useState } from 'react'
+import React, { useContext, useEffect, useRef, useState } from 'react'
 import { useCookies } from 'react-cookie'
 import { useTranslation } from 'react-i18next'
 import { useNavigate } from 'react-router-dom'
@@ -32,8 +32,9 @@ const LaunchModel = () => {
   const [cookie] = useCookies(['token'])
   const navigate = useNavigate()
   const { t } = useTranslation()
+  const LaunchModelRefs = useRef({})
 
-  const handleTabChange = (event, newValue) => {
+  const handleTabChange = (newValue) => {
     setValue(newValue)
     navigate(newValue)
     sessionStorage.setItem('modelType', newValue)
@@ -64,9 +65,34 @@ const LaunchModel = () => {
     }
   }, [cookie.token])
 
-  const downloadModels = () => {
+  const updateList = (modelType) => {
+    LaunchModelRefs.current[modelType]?.update()
+  }
+
+  const handleClose = (value) => {
+    setOpen(false)
+    if (value) {
+      handleTabChange(value)
+    }
+  }
+
+  const updateModels = () => {
     setLoading(true)
-    console.log('modelType', modelType)
+    fetchWrapper
+      .post('/v1/models/update_type', { model_type: modelType })
+      .then(() => {
+        handleTabChange(`/launch_model/${modelType}`)
+        updateList(modelType)
+      })
+      .catch((error) => {
+        console.error('Error:', error)
+        if (error.response.status !== 403 && error.response.status !== 401) {
+          setErrorMsg(error.message)
+        }
+      })
+      .finally(() => {
+        setLoading(false)
+      })
   }
 
   return (
@@ -84,7 +110,11 @@ const LaunchModel = () => {
             alignItems: 'center',
           }}
         >
-          <TabList value={value} onChange={handleTabChange} aria-label="tabs">
+          <TabList
+            value={value}
+            onChange={(_, value) => handleTabChange(value)}
+            aria-label="tabs"
+          >
             <Tab label={t('model.languageModels')} value="/launch_model/llm" />
             <Tab
               label={t('model.embeddingModels')}
@@ -127,7 +157,7 @@ const LaunchModel = () => {
 
               <LoadingButton
                 variant="contained"
-                onClick={downloadModels}
+                onClick={updateModels}
                 loading={loading}
                 sx={{
                   borderTopLeftRadius: 0,
@@ -154,6 +184,7 @@ const LaunchModel = () => {
             featureModels={
               featureModels.find((item) => item.type === 'llm').feature_models
             }
+            ref={(ref) => (LaunchModelRefs.current.llm = ref)}
           />
         </TabPanel>
         <TabPanel value="/launch_model/embedding" sx={{ padding: 0 }}>
@@ -164,6 +195,7 @@ const LaunchModel = () => {
               featureModels.find((item) => item.type === 'embedding')
                 .feature_models
             }
+            ref={(ref) => (LaunchModelRefs.current.embedding = ref)}
           />
         </TabPanel>
         <TabPanel value="/launch_model/rerank" sx={{ padding: 0 }}>
@@ -174,6 +206,7 @@ const LaunchModel = () => {
               featureModels.find((item) => item.type === 'rerank')
                 .feature_models
             }
+            ref={(ref) => (LaunchModelRefs.current.rerank = ref)}
           />
         </TabPanel>
         <TabPanel value="/launch_model/image" sx={{ padding: 0 }}>
@@ -183,6 +216,7 @@ const LaunchModel = () => {
             featureModels={
               featureModels.find((item) => item.type === 'image').feature_models
             }
+            ref={(ref) => (LaunchModelRefs.current.image = ref)}
           />
         </TabPanel>
         <TabPanel value="/launch_model/audio" sx={{ padding: 0 }}>
@@ -192,6 +226,7 @@ const LaunchModel = () => {
             featureModels={
               featureModels.find((item) => item.type === 'audio').feature_models
             }
+            ref={(ref) => (LaunchModelRefs.current.audio = ref)}
           />
         </TabPanel>
         <TabPanel value="/launch_model/video" sx={{ padding: 0 }}>
@@ -201,13 +236,20 @@ const LaunchModel = () => {
             featureModels={
               featureModels.find((item) => item.type === 'video').feature_models
             }
+            ref={(ref) => (LaunchModelRefs.current.video = ref)}
           />
         </TabPanel>
         <TabPanel value="/launch_model/custom/llm" sx={{ padding: 0 }}>
           <LaunchCustom gpuAvailable={gpuAvailable} />
         </TabPanel>
       </TabContext>
-      <AddModelDialog open={open} onClose={() => setOpen(false)} />
+      {open && (
+        <AddModelDialog
+          onUpdateList={updateList}
+          open={open}
+          onClose={handleClose}
+        />
+      )}
     </Box>
   )
 }

From 05b18049731d4ea1acee749f9c096f52e95d5693 Mon Sep 17 00:00:00 2001
From: yiboyasss <3359595624@qq.com>
Date: Mon, 27 Oct 2025 14:58:42 +0800
Subject: [PATCH 15/25] fix: request bug

---
 .../web/ui/src/scenes/launch_model/components/addModelDialog.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xinference/ui/web/ui/src/scenes/launch_model/components/addModelDialog.js b/xinference/ui/web/ui/src/scenes/launch_model/components/addModelDialog.js
index be73726963..b258a2bdb5 100644
--- a/xinference/ui/web/ui/src/scenes/launch_model/components/addModelDialog.js
+++ b/xinference/ui/web/ui/src/scenes/launch_model/components/addModelDialog.js
@@ -88,7 +88,7 @@ const AddModelDialog = ({ open, onClose, onUpdateList }) => {
       const res = await fetch(endPoint + '/v1/models/add', {
         method: 'POST',
         headers: { 'Content-Type': 'application/json' },
-        body: JSON.stringify(modelJson),
+        body: JSON.stringify({ model_type: modelType, model_json: modelJson }),
       })
       const rawText = await res.text().catch(() => '')
       if (!res.ok) {

From 77b7e9033f3328ed03228eeba39c046ba69cb77f Mon Sep 17 00:00:00 2001
From: OliverBryant <2713999266@qq.com>
Date: Mon, 27 Oct 2025 16:14:38 +0800
Subject: [PATCH 16/25] Function 2: One-Click Model Update

---
 xinference/api/restful_api.py          |  22 +-
 xinference/core/supervisor.py          | 346 ++++++++++++++++++-------
 xinference/model/audio/__init__.py     |  42 +++
 xinference/model/audio/builtin.py      | 110 ++++++++
 xinference/model/cache_manager.py      |  19 +-
 xinference/model/custom.py             |   3 +
 xinference/model/embedding/__init__.py |  25 ++
 xinference/model/embedding/builtin.py  | 110 ++++++++
 xinference/model/image/__init__.py     |  25 ++
 xinference/model/image/builtin.py      | 110 ++++++++
 xinference/model/llm/__init__.py       |  25 ++
 xinference/model/llm/builtin.py        | 113 ++++++++
 xinference/model/rerank/__init__.py    |  24 ++
 xinference/model/rerank/builtin.py     | 110 ++++++++
 xinference/model/video/__init__.py     |  52 ++++
 xinference/model/video/builtin.py      | 103 ++++++++
 16 files changed, 1136 insertions(+), 103 deletions(-)
 create mode 100644 xinference/model/audio/builtin.py
 create mode 100644 xinference/model/embedding/builtin.py
 create mode 100644 xinference/model/image/builtin.py
 create mode 100644 xinference/model/llm/builtin.py
 create mode 100644 xinference/model/rerank/builtin.py
 create mode 100644 xinference/model/video/builtin.py

diff --git a/xinference/api/restful_api.py b/xinference/api/restful_api.py
index 7045cd3b63..353b450d57 100644
--- a/xinference/api/restful_api.py
+++ b/xinference/api/restful_api.py
@@ -3162,9 +3162,25 @@ async def add_model(self, request: Request) -> JSONResponse:
             raw_json = await request.json()
             logger.info(f"[DEBUG] Raw request JSON: {raw_json}")
 
-            body = AddModelRequest.parse_obj(raw_json)
-            model_type = body.model_type
-            model_json = body.model_json
+            if "model_type" in raw_json and "model_json" in raw_json:
+                body = AddModelRequest.parse_obj(raw_json)
+                model_type = body.model_type
+                model_json = body.model_json
+                logger.info(f"[DEBUG] Using wrapped format, model_type: {model_type}")
+            else:
+                model_json = raw_json
+
+                # Priority 1: Check if model_type is explicitly provided in the JSON
+                if "model_type" in model_json:
+                    model_type = model_json["model_type"]
+                    logger.info(f"[DEBUG] Using explicit model_type from JSON: {model_type}")
+                else:
+                    # model_type is required in the JSON when using unwrapped format
+                    logger.error(f"[DEBUG] model_type not provided in JSON, this is required")
+                    raise HTTPException(
+                        status_code=400,
+                        detail="model_type is required in the model JSON. Supported types: LLM, embedding, audio, image, video, rerank"
+                    )
 
             logger.info(f"[DEBUG] Parsed model_type: {model_type}")
             logger.info(
diff --git a/xinference/core/supervisor.py b/xinference/core/supervisor.py
index 1a6b2e917b..4ffe490449 100644
--- a/xinference/core/supervisor.py
+++ b/xinference/core/supervisor.py
@@ -183,6 +183,13 @@ async def __post_create__(self):
             register_audio,
             unregister_audio,
         )
+        from ..model.video import (
+            CustomVideoModelFamilyV2,
+            generate_video_description,
+            get_video_model_descriptions,
+            register_video,
+            unregister_video,
+        )
         from ..model.embedding import (
             CustomEmbeddingModelFamilyV2,
             generate_embedding_description,
@@ -250,6 +257,12 @@ async def __post_create__(self):
                 unregister_audio,
                 generate_audio_description,
             ),
+            "video": (
+                CustomVideoModelFamilyV2,
+                register_video,
+                unregister_video,
+                generate_video_description,
+            ),
             "flexible": (
                 FlexibleModelSpec,
                 register_flexible_model,
@@ -265,6 +278,7 @@ async def __post_create__(self):
         model_version_infos.update(get_rerank_model_descriptions())
         model_version_infos.update(get_image_model_descriptions())
         model_version_infos.update(get_audio_model_descriptions())
+        model_version_infos.update(get_video_model_descriptions())
         model_version_infos.update(get_flexible_model_descriptions())
         await self._cache_tracker_ref.record_model_version(
             model_version_infos, self.address
@@ -629,13 +643,12 @@ def sort_helper(item):
         else:
             logger.info(f"[DEBUG SUPERVISOR] Local deployment mode")
 
-        if model_type == "LLM":
-            from ..model.llm import BUILTIN_LLM_FAMILIES, get_user_defined_llm_families
+        if model_type.upper() == "LLM":
+            from ..model.llm import BUILTIN_LLM_FAMILIES, get_user_defined_llm_families, register_builtin_model
 
             logger.info(f"[DEBUG SUPERVISOR] Processing LLM models")
-            logger.info(
-                f"[DEBUG SUPERVISOR] Found {len(BUILTIN_LLM_FAMILIES)} builtin LLM families"
-            )
+
+            register_builtin_model()
 
             for family in BUILTIN_LLM_FAMILIES:
                 logger.debug(
@@ -643,66 +656,35 @@ def sort_helper(item):
                 )
                 if detailed:
                     reg_data = await self._to_llm_reg(family, True)
-                    logger.debug(
-                        f"[DEBUG SUPERVISOR] Builtin LLM reg data: {reg_data['model_name']}"
-                    )
                     ret.append(reg_data)
                 else:
                     ret.append({"model_name": family.model_name, "is_builtin": True})
 
             user_defined_families = get_user_defined_llm_families()
-            logger.info(
-                f"[DEBUG SUPERVISOR] Found {len(user_defined_families)} user-defined LLM families"
-            )
+            builtin_names = {family.model_name for family in BUILTIN_LLM_FAMILIES}
 
             for family in user_defined_families:
-                logger.info(
-                    f"[DEBUG SUPERVISOR] Processing user-defined LLM: {family.model_name}"
-                )
-
-                # Check if this model is persisted (added via add_model API)
-                # Persisted models from model hub should be treated as built-in
-                from ..model.cache_manager import CacheManager
-
-                cache_manager = CacheManager(family)
-
-                # If persist path exists, this model was added via add_model API and should be treated as built-in
-                is_persisted_model = False
-                if hasattr(cache_manager, "_v2_custom_dir_prefix"):
-                    import os
-
-                    potential_persist_path = os.path.join(
-                        cache_manager._v2_custom_dir_prefix,
-                        "llm",
-                        f"{family.model_name}.json",
-                    )
-                    is_persisted_model = os.path.exists(potential_persist_path)
-
-                is_builtin = is_persisted_model  # Treat persisted models as built-in
-                logger.info(
-                    f"[DEBUG SUPERVISOR] Model {family.model_name} persisted: {is_persisted_model}, treating as builtin: {is_builtin}"
-                )
-
-                if detailed:
-                    reg_data = await self._to_llm_reg(family, is_builtin)
-                    logger.info(
-                        f"[DEBUG SUPERVISOR] User-defined LLM reg data: {reg_data['model_name']}, builtin: {reg_data.get('is_builtin', False)}"
-                    )
-                    ret.append(reg_data)
-                else:
-                    ret.append(
-                        {"model_name": family.model_name, "is_builtin": is_builtin}
+                if family.model_name not in builtin_names:
+                    logger.debug(
+                        f"[DEBUG SUPERVISOR] Processing dynamic LLM: {family.model_name}"
                     )
+                    if detailed:
+                        reg_data = await self._to_llm_reg(family, True)
+                        ret.append(reg_data)
+                    else:
+                        ret.append({"model_name": family.model_name, "is_builtin": True})
 
-            ret.sort(key=sort_helper)
+                ret.sort(key=sort_helper)
             logger.info(
-                f"[DEBUG SUPERVISOR] LLM: Returning {len(ret)} total models (builtin: {sum(1 for r in ret if r.get('is_builtin', False))}, custom: {sum(1 for r in ret if not r.get('is_builtin', False))})"
+                f"[DEBUG SUPERVISOR] LLM: Returning {len(ret)} total models"
             )
             return ret
         elif model_type == "embedding":
-            from ..model.embedding import BUILTIN_EMBEDDING_MODELS
+            from ..model.embedding import BUILTIN_EMBEDDING_MODELS, register_builtin_model
             from ..model.embedding.custom import get_user_defined_embeddings
 
+            register_builtin_model()
+
             for model_name, family in BUILTIN_EMBEDDING_MODELS.items():
                 if detailed:
                     ret.append(
@@ -717,15 +699,25 @@ def sort_helper(item):
 
                 cache_manager = CacheManager(model_spec)
                 is_persisted_model = False
-                if hasattr(cache_manager, "_v2_custom_dir_prefix"):
+                if hasattr(cache_manager, "_v2_builtin_dir_prefix"):
                     import os
 
                     potential_persist_path = os.path.join(
-                        cache_manager._v2_custom_dir_prefix,
+                        cache_manager._v2_builtin_dir_prefix,
                         "embedding",
                         f"{model_spec.model_name}.json",
                     )
-                    is_persisted_model = os.path.exists(potential_persist_path)
+                    if os.path.exists(potential_persist_path):
+                        is_persisted_model = True
+                    else:
+                        if hasattr(cache_manager, "_v2_custom_dir_prefix"):
+                            potential_custom_path = os.path.join(
+                                cache_manager._v2_custom_dir_prefix,
+                                "embedding",
+                                f"{model_spec.model_name}.json",
+                            )
+                            if os.path.exists(potential_custom_path):
+                                is_persisted_model = True
 
                 is_builtin = is_persisted_model  # Treat persisted models as built-in
                 logger.info(
@@ -746,9 +738,11 @@ def sort_helper(item):
             ret.sort(key=sort_helper)
             return ret
         elif model_type == "image":
-            from ..model.image import BUILTIN_IMAGE_MODELS
+            from ..model.image import BUILTIN_IMAGE_MODELS, register_builtin_model
             from ..model.image.custom import get_user_defined_images
 
+            register_builtin_model()
+
             for model_name, families in BUILTIN_IMAGE_MODELS.items():
                 if detailed:
                     family = [x for x in families if x.model_hub == "huggingface"][0]
@@ -759,20 +753,29 @@ def sort_helper(item):
                     ret.append({"model_name": model_name, "is_builtin": True})
 
             for model_spec in get_user_defined_images():
-                # Check if this model is persisted (added via add_model API)
                 from ..model.cache_manager import CacheManager
 
                 cache_manager = CacheManager(model_spec)
                 is_persisted_model = False
-                if hasattr(cache_manager, "_v2_custom_dir_prefix"):
+                if hasattr(cache_manager, "_v2_builtin_dir_prefix"):
                     import os
 
                     potential_persist_path = os.path.join(
-                        cache_manager._v2_custom_dir_prefix,
+                        cache_manager._v2_builtin_dir_prefix,
                         "image",
                         f"{model_spec.model_name}.json",
                     )
-                    is_persisted_model = os.path.exists(potential_persist_path)
+                    if os.path.exists(potential_persist_path):
+                        is_persisted_model = True
+                    else:
+                        if hasattr(cache_manager, "_v2_custom_dir_prefix"):
+                            potential_custom_path = os.path.join(
+                                cache_manager._v2_custom_dir_prefix,
+                                "image",
+                                f"{model_spec.model_name}.json",
+                            )
+                            if os.path.exists(potential_custom_path):
+                                is_persisted_model = True
 
                 is_builtin = is_persisted_model  # Treat persisted models as built-in
                 logger.info(
@@ -793,9 +796,11 @@ def sort_helper(item):
             ret.sort(key=sort_helper)
             return ret
         elif model_type == "audio":
-            from ..model.audio import BUILTIN_AUDIO_MODELS
+            from ..model.audio import BUILTIN_AUDIO_MODELS, register_builtin_model
             from ..model.audio.custom import get_user_defined_audios
 
+            register_builtin_model()
+
             for model_name, families in BUILTIN_AUDIO_MODELS.items():
                 if detailed:
                     family = [x for x in families if x.model_hub == "huggingface"][0]
@@ -806,20 +811,29 @@ def sort_helper(item):
                     ret.append({"model_name": model_name, "is_builtin": True})
 
             for model_spec in get_user_defined_audios():
-                # Check if this model is persisted (added via add_model API)
                 from ..model.cache_manager import CacheManager
 
                 cache_manager = CacheManager(model_spec)
                 is_persisted_model = False
-                if hasattr(cache_manager, "_v2_custom_dir_prefix"):
+                if hasattr(cache_manager, "_v2_builtin_dir_prefix"):
                     import os
 
                     potential_persist_path = os.path.join(
-                        cache_manager._v2_custom_dir_prefix,
+                        cache_manager._v2_builtin_dir_prefix,
                         "audio",
                         f"{model_spec.model_name}.json",
                     )
-                    is_persisted_model = os.path.exists(potential_persist_path)
+                    if os.path.exists(potential_persist_path):
+                        is_persisted_model = True
+                    else:
+                        if hasattr(cache_manager, "_v2_custom_dir_prefix"):
+                            potential_custom_path = os.path.join(
+                                cache_manager._v2_custom_dir_prefix,
+                                "audio",
+                                f"{model_spec.model_name}.json",
+                            )
+                            if os.path.exists(potential_custom_path):
+                                is_persisted_model = True
 
                 is_builtin = is_persisted_model  # Treat persisted models as built-in
                 logger.info(
@@ -840,7 +854,10 @@ def sort_helper(item):
             ret.sort(key=sort_helper)
             return ret
         elif model_type == "video":
-            from ..model.video import BUILTIN_VIDEO_MODELS
+            from ..model.video import BUILTIN_VIDEO_MODELS, register_builtin_model
+            from ..model.video.custom import get_user_defined_videos
+
+            register_builtin_model()
 
             for model_name, families in BUILTIN_VIDEO_MODELS.items():
                 if detailed:
@@ -850,13 +867,52 @@ def sort_helper(item):
                     ret.append(info)
                 else:
                     ret.append({"model_name": model_name, "is_builtin": True})
-
+            for model_spec in get_user_defined_videos():
+                from ..model.cache_manager import CacheManager
+                cache_manager = CacheManager(model_spec)
+                is_persisted_model = False
+                if hasattr(cache_manager, "_v2_builtin_dir_prefix"):
+                    import os
+                    potential_persist_path = os.path.join(
+                        cache_manager._v2_builtin_dir_prefix,
+                        "video",
+                        f"{model_spec.model_name}.json",
+                    )
+                    if os.path.exists(potential_persist_path):
+                        is_persisted_model = True
+                    else:
+                        if hasattr(cache_manager, "_v2_custom_dir_prefix"):
+                            potential_custom_path = os.path.join(
+                                cache_manager._v2_custom_dir_prefix,
+                                "video",
+                                f"{model_spec.model_name}.json",
+                            )
+                            if os.path.exists(potential_custom_path):
+                                is_persisted_model = True
+                logger.debug(
+                    f"[DEBUG SUPERVISOR] Video model {model_spec.model_name} persisted: {is_persisted_model}, treating as builtin: {is_persisted_model}"
+                )
+                if detailed:
+                    ret.append(
+                        await self._to_video_model_reg(
+                            model_spec, is_builtin=is_persisted_model
+                        )
+                    )
+                else:
+                    ret.append(
+                        {
+                            "model_name": model_spec.model_name,
+                            "is_builtin": is_persisted_model,
+                        }
+                    )
             ret.sort(key=sort_helper)
             return ret
         elif model_type == "rerank":
-            from ..model.rerank import BUILTIN_RERANK_MODELS
+            from ..model.rerank import BUILTIN_RERANK_MODELS, register_builtin_model
             from ..model.rerank.custom import get_user_defined_reranks
 
+            register_builtin_model()
+
             for model_name, family in BUILTIN_RERANK_MODELS.items():
                 if detailed:
                     ret.append(await self._to_rerank_model_reg(family, is_builtin=True))
@@ -864,20 +920,29 @@ def sort_helper(item):
                     ret.append({"model_name": model_name, "is_builtin": True})
 
             for model_spec in get_user_defined_reranks():
-                # Check if this model is persisted (added via add_model API)
                 from ..model.cache_manager import CacheManager
 
                 cache_manager = CacheManager(model_spec)
                 is_persisted_model = False
-                if hasattr(cache_manager, "_v2_custom_dir_prefix"):
+                if hasattr(cache_manager, "_v2_builtin_dir_prefix"):
                     import os
 
                     potential_persist_path = os.path.join(
-                        cache_manager._v2_custom_dir_prefix,
+                        cache_manager._v2_builtin_dir_prefix,
                         "rerank",
                         f"{model_spec.model_name}.json",
                     )
-                    is_persisted_model = os.path.exists(potential_persist_path)
+                    if os.path.exists(potential_persist_path):
+                        is_persisted_model = True
+                    else:
+                        if hasattr(cache_manager, "_v2_custom_dir_prefix"):
+                            potential_custom_path = os.path.join(
+                                cache_manager._v2_custom_dir_prefix,
+                                "rerank",
+                                f"{model_spec.model_name}.json",
+                            )
+                            if os.path.exists(potential_custom_path):
+                                is_persisted_model = True
 
                 is_builtin = is_persisted_model  # Treat persisted models as built-in
                 logger.info(
@@ -903,7 +968,6 @@ def sort_helper(item):
             ret = []
 
             for model_spec in get_flexible_models():
-                # Check if this model is persisted (added via add_model API)
                 from ..model.cache_manager import CacheManager
 
                 cache_manager = CacheManager(model_spec)
@@ -949,7 +1013,7 @@ async def get_model_registration(self, model_type: str, model_name: str) -> Any:
                 if f is not None:
                     return f
 
-        if model_type == "LLM":
+        if model_type.upper() == "LLM":
             from ..model.llm import BUILTIN_LLM_FAMILIES, get_user_defined_llm_families
 
             for f in BUILTIN_LLM_FAMILIES + get_user_defined_llm_families():
@@ -1014,6 +1078,7 @@ async def get_model_registration(self, model_type: str, model_name: str) -> Any:
             raise ValueError(f"Model {model_name} not found")
         elif model_type == "video":
             from ..model.video import BUILTIN_VIDEO_MODELS
+            from ..model.video.custom import get_user_defined_videos
 
             if model_name in BUILTIN_VIDEO_MODELS:
                 return [
@@ -1021,6 +1086,10 @@ async def get_model_registration(self, model_type: str, model_name: str) -> Any:
                     for x in BUILTIN_VIDEO_MODELS[model_name]
                     if x.model_hub == "huggingface"
                 ][0]
+            else:
+                for f in get_user_defined_videos():
+                    if f.model_name == model_name:
+                        return f
             raise ValueError(f"Model {model_name} not found")
         else:
             raise ValueError(f"Unsupported model type: {model_type}")
@@ -1133,8 +1202,13 @@ async def add_model(self, model_type: str, model_json: Dict[str, Any]):
         logger.info(f"[DEBUG SUPERVISOR] Supported model types: {supported_types}")
         logger.info(f"[DEBUG SUPERVISOR] Received model_type: '{model_type}'")
 
-        # Try to normalize case (only convert 'llm' -> 'LLM', keep others as is)
-        normalized_model_type = "LLM" if model_type.lower() == "llm" else model_type
+        normalized_model_type = model_type
+
+        if model_type.lower() == "llm" and "LLM" in supported_types:
+            normalized_model_type = "LLM"
+        elif model_type.lower() == "llm" and "llm" in supported_types:
+            normalized_model_type = "llm"
+
         logger.info(
             f"[DEBUG SUPERVISOR] Normalized model_type: '{normalized_model_type}'"
         )
@@ -1249,14 +1323,17 @@ async def add_model(self, model_type: str, model_json: Dict[str, Any]):
             )
             raise ValueError(f"Failed to validate model registration: {str(ex)}")
 
-        # Register the model (persist=True for adding models)
-        logger.info(f"[DEBUG SUPERVISOR] Starting model registration...")
+        logger.info(f"[DEBUG SUPERVISOR] Storing single model as built-in...")
         try:
-            logger.info(
-                f"[DEBUG SUPERVISOR] Calling register_fn with model_spec: {model_spec}, persist=True"
-            )
-            register_fn(model_spec, persist=True)
-            logger.info(f"[DEBUG SUPERVISOR] register_fn completed successfully")
+            # Create CacheManager and store as built-in model
+            from ..model.cache_manager import CacheManager
+            cache_manager = CacheManager(model_spec)
+            cache_manager.register_builtin_model(model_type.lower())
+            logger.info(f"[DEBUG SUPERVISOR] Built-in model stored successfully")
+
+            # Register in the model registry without persisting to avoid duplicate storage
+            register_fn(model_spec, persist=False)
+            logger.info(f"[DEBUG SUPERVISOR] Model registry registration completed successfully")
 
             # Record model version
             logger.info(f"[DEBUG SUPERVISOR] Generating version info...")
@@ -1321,6 +1398,25 @@ def _convert_model_json_format(self, model_json: Dict[str, Any]) -> Dict[str, An
         logger.info(f"[DEBUG SUPERVISOR] _convert_model_json_format called")
         logger.info(f"[DEBUG SUPERVISOR] Input model_json: {model_json}")
 
+        if model_json.get("model_id") is None and "model_src" in model_json:
+            logger.info(f"[DEBUG SUPERVISOR] model_id is null, attempting to extract from model_src")
+            model_src = model_json["model_src"]
+
+            if "huggingface" in model_src and "model_id" in model_src["huggingface"]:
+                model_json["model_id"] = model_src["huggingface"]["model_id"]
+                logger.info(f"[DEBUG SUPERVISOR] Extracted model_id from huggingface: {model_json['model_id']}")
+            elif "modelscope" in model_src and "model_id" in model_src["modelscope"]:
+                model_json["model_id"] = model_src["modelscope"]["model_id"]
+                logger.info(f"[DEBUG SUPERVISOR] Extracted model_id from modelscope: {model_json['model_id']}")
+
+            if model_json.get("model_revision") is None:
+                if "huggingface" in model_src and "model_revision" in model_src["huggingface"]:
+                    model_json["model_revision"] = model_src["huggingface"]["model_revision"]
+                    logger.info(f"[DEBUG SUPERVISOR] Extracted model_revision from huggingface: {model_json['model_revision']}")
+                elif "modelscope" in model_src and "model_revision" in model_src["modelscope"]:
+                    model_json["model_revision"] = model_src["modelscope"]["model_revision"]
+                    logger.info(f"[DEBUG SUPERVISOR] Extracted model_revision from modelscope: {model_json['model_revision']}")
+
         # If model_specs is missing, provide a default minimal spec
         if "model_specs" not in model_json or not model_json["model_specs"]:
             logger.info(
@@ -1375,6 +1471,10 @@ def _convert_model_json_format(self, model_json: Dict[str, Any]) -> Dict[str, An
                     converted_spec["quantization"] = "none"
                 if "model_format" not in converted_spec:
                     converted_spec["model_format"] = "pytorch"
+                if "model_file_name_template" not in converted_spec:
+                    converted_spec["model_file_name_template"] = "model.bin"
+                if "model_hub" not in converted_spec and "model_id" in converted_spec:
+                    converted_spec["model_hub"] = "huggingface"
                 converted_specs.append(converted_spec)
                 continue
 
@@ -1419,6 +1519,7 @@ def _convert_model_json_format(self, model_json: Dict[str, Any]) -> Dict[str, An
                             converted_spec["model_id"] = hf_info["model_id"]
                         if "model_revision" in hf_info:
                             converted_spec["model_revision"] = hf_info["model_revision"]
+                        converted_spec["model_file_name_template"] = "pytorch_model.bin"
 
                     converted_specs.append(converted_spec)
 
@@ -1439,6 +1540,7 @@ def _convert_model_json_format(self, model_json: Dict[str, Any]) -> Dict[str, An
                         converted_spec["model_id"] = ms_info["model_id"]
                     if "model_revision" in ms_info:
                         converted_spec["model_revision"] = ms_info["model_revision"]
+                    converted_spec["model_file_name_template"] = "pytorch_model.bin"
 
                     converted_specs.append(converted_spec)
 
@@ -1453,6 +1555,8 @@ def _convert_model_json_format(self, model_json: Dict[str, Any]) -> Dict[str, An
                     converted_spec["quantization"] = "none"
                 if "model_format" not in converted_spec:
                     converted_spec["model_format"] = "pytorch"
+                if "model_file_name_template" not in converted_spec:
+                    converted_spec["model_file_name_template"] = "model.bin"
                 converted_specs.append(converted_spec)
 
         converted["model_specs"] = converted_specs
@@ -1500,22 +1604,25 @@ async def update_model_type(self, model_type: str):
             f"[DEBUG SUPERVISOR] update_model_type called with model_type: {model_type}"
         )
 
-        # Validate model type
-        normalized_model_type = "LLM" if model_type.lower() == "llm" else model_type
         supported_types = list(self._custom_register_type_to_cls.keys())
 
-        if normalized_model_type not in supported_types:
+        normalized_for_validation = model_type
+        if model_type.lower() == "llm" and "LLM" in supported_types:
+            normalized_for_validation = "LLM"
+        elif model_type.lower() == "llm" and "llm" in supported_types:
+            normalized_for_validation = "llm"
+
+        if normalized_for_validation not in supported_types:
             logger.error(
-                f"[DEBUG SUPERVISOR] Unsupported model type: {normalized_model_type}"
+                f"[DEBUG SUPERVISOR] Unsupported model type: {normalized_for_validation}"
             )
             raise ValueError(
                 f"Unsupported model type '{model_type}'. "
                 f"Supported types are: {', '.join(supported_types)}"
             )
 
-        # Use normalized model type for the rest of the function
-        model_type = normalized_model_type
-        logger.info(f"[DEBUG SUPERVISOR] Using model_type: '{model_type}' for update")
+        model_type_for_operations = normalized_for_validation
+        logger.info(f"[DEBUG SUPERVISOR] Using model_type: '{model_type_for_operations}' for operations")
 
         # Construct the URL to download JSON
         url = f"https://model.xinference.io/api/models/download?model_type={model_type.lower()}"
@@ -1546,10 +1653,43 @@ async def update_model_type(self, model_type: str):
                         f"[DEBUG SUPERVISOR] First item keys: {list(model_data[0].keys()) if isinstance(model_data[0], dict) else 'Not a dict'}"
                     )
 
-            # Store the JSON data using CacheManager
-            logger.info(f"[DEBUG SUPERVISOR] Storing model configurations...")
+            # Store the JSON data using CacheManager as built-in models
+            logger.info(f"[DEBUG SUPERVISOR] Storing model configurations as built-in models...")
             await self._store_model_configurations(model_type, model_data)
-            logger.info(f"[DEBUG SUPERVISOR] Model configurations stored successfully")
+            logger.info(f"[DEBUG SUPERVISOR] Built-in model configurations stored successfully")
+
+            # Dynamically reload built-in models to make them immediately available
+            logger.info(f"[DEBUG SUPERVISOR] Reloading built-in models for immediate availability...")
+            try:
+                if model_type.lower() == "llm":
+                    from ..model.llm import register_builtin_model
+                    register_builtin_model()
+                    logger.info(f"[DEBUG SUPERVISOR] LLM models reloaded successfully")
+                elif model_type.lower() == "embedding":
+                    from ..model.embedding import register_builtin_model
+                    register_builtin_model()
+                    logger.info(f"[DEBUG SUPERVISOR] Embedding models reloaded successfully")
+                elif model_type.lower() == "audio":
+                    from ..model.audio import register_builtin_model
+                    register_builtin_model()
+                    logger.info(f"[DEBUG SUPERVISOR] Audio models reloaded successfully")
+                elif model_type.lower() == "image":
+                    from ..model.image import register_builtin_model
+                    register_builtin_model()
+                    logger.info(f"[DEBUG SUPERVISOR] Image models reloaded successfully")
+                elif model_type.lower() == "rerank":
+                    from ..model.rerank import register_builtin_model
+                    register_builtin_model()
+                    logger.info(f"[DEBUG SUPERVISOR] Rerank models reloaded successfully")
+                elif model_type.lower() == "video":
+                    from ..model.video import register_builtin_model
+                    register_builtin_model()
+                    logger.info(f"[DEBUG SUPERVISOR] Video models reloaded successfully")
+                else:
+                    logger.warning(f"[DEBUG SUPERVISOR] No dynamic loading available for model type: {model_type}")
+            except Exception as reload_error:
+                logger.error(f"[DEBUG SUPERVISOR] Error reloading built-in models: {reload_error}", exc_info=True)
+                # Don't fail the update if reload fails, just log the error
 
         except requests.exceptions.RequestException as e:
             logger.error(
@@ -1568,10 +1708,10 @@ async def update_model_type(self, model_type: str):
 
     async def _store_model_configurations(self, model_type: str, model_data):
         """
-        Store model configurations using the appropriate CacheManager.
+        Store model configurations using the appropriate CacheManager as built-in models.
 
         Args:
-            model_type: Type of model
+            model_type: Type of model (as provided by user, e.g., "llm")
             model_data: JSON data containing model configurations
         """
 
@@ -1582,7 +1722,17 @@ async def _store_model_configurations(self, model_type: str, model_data):
         try:
             # Create a temporary model spec to get CacheManager instance
             # We need to determine the appropriate model spec class for this model type
-            model_spec_cls, _, _, _ = self._custom_register_type_to_cls[model_type]
+            lookup_key = None
+            for key in self._custom_register_type_to_cls.keys():
+                if key.lower() == model_type.lower():
+                    lookup_key = key
+                    break
+
+            if lookup_key is None:
+                raise ValueError(f"Unsupported model type: {model_type}")
+
+            model_spec_cls, _, _, _ = self._custom_register_type_to_cls[lookup_key]
+            logger.info(f"[DEBUG SUPERVISOR] Using model spec class: {model_spec_cls.__name__} with key: {lookup_key}")
 
             # Handle different response formats
             if isinstance(model_data, dict):
@@ -1622,7 +1772,7 @@ async def _store_single_model_config(
         self, model_type: str, model_config: dict, model_spec_cls
     ):
         """
-        Store a single model configuration.
+        Store a single model configuration as built-in model.
 
         Args:
             model_type: Type of model
@@ -1659,11 +1809,11 @@ async def _store_single_model_config(
             model_spec = model_spec_cls.parse_obj(converted_config)
             logger.info(f"[DEBUG SUPERVISOR] Created model spec for: {model_name}")
 
-            # Create CacheManager and store the configuration
+            # Create CacheManager and store the configuration as built-in model
             cache_manager = CacheManager(model_spec)
-            cache_manager.register_custom_model(model_type)
+            cache_manager.register_builtin_model(model_type)
             logger.info(
-                f"[DEBUG SUPERVISOR] Stored model configuration for: {model_name}"
+                f"[DEBUG SUPERVISOR] Stored built-in model configuration for: {model_name}"
             )
 
         except Exception as e:
diff --git a/xinference/model/audio/__init__.py b/xinference/model/audio/__init__.py
index 9465771917..fa4bb47c36 100644
--- a/xinference/model/audio/__init__.py
+++ b/xinference/model/audio/__init__.py
@@ -60,6 +60,48 @@ def register_custom_model():
                 warnings.warn(f"{user_defined_audio_dir}/{f} has error, {e}")
 
 
+def register_builtin_model():
+    from ..custom import RegistryManager
+
+    registry = RegistryManager.get_registry("audio")
+    existing_model_names = {spec.model_name for spec in registry.get_custom_models()}
+
+    builtin_audio_dir = os.path.join(XINFERENCE_MODEL_DIR, "v2", "builtin", "audio")
+    if os.path.isdir(builtin_audio_dir):
+        for f in os.listdir(builtin_audio_dir):
+            if f.endswith(".json"):
+                try:
+                    with codecs.open(
+                        os.path.join(builtin_audio_dir, f), encoding="utf-8"
+                    ) as fd:
+                        model_data = json.load(fd)
+
+                        # Apply conversion logic to handle null model_id and other issues
+                        if model_data.get("model_id") is None and "model_src" in model_data:
+                            model_src = model_data["model_src"]
+                            # Extract model_id from available sources
+                            if "huggingface" in model_src and "model_id" in model_src["huggingface"]:
+                                model_data["model_id"] = model_src["huggingface"]["model_id"]
+                            elif "modelscope" in model_src and "model_id" in model_src["modelscope"]:
+                                model_data["model_id"] = model_src["modelscope"]["model_id"]
+
+                            # Extract model_revision if available
+                            if model_data.get("model_revision") is None:
+                                if "huggingface" in model_src and "model_revision" in model_src["huggingface"]:
+                                    model_data["model_revision"] = model_src["huggingface"]["model_revision"]
+                                elif "modelscope" in model_src and "model_revision" in model_src["modelscope"]:
+                                    model_data["model_revision"] = model_src["modelscope"]["model_revision"]
+
+                        builtin_audio_family = AudioModelFamilyV2.parse_obj(model_data)
+
+                        # Only register if model doesn't already exist
+                        if builtin_audio_family.model_name not in existing_model_names:
+                            register_audio(builtin_audio_family, persist=False)
+                            existing_model_names.add(builtin_audio_family.model_name)
+                except Exception as e:
+                    warnings.warn(f"{builtin_audio_dir}/{f} has error, {e}")
+
+
 def _need_filter(spec: dict):
     if (sys.platform != "darwin" or platform.processor() != "arm") and spec.get(
         "engine", ""
diff --git a/xinference/model/audio/builtin.py b/xinference/model/audio/builtin.py
new file mode 100644
index 0000000000..6017e957db
--- /dev/null
+++ b/xinference/model/audio/builtin.py
@@ -0,0 +1,110 @@
+# Copyright 2022-2025 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import logging
+import os
+from typing import List
+
+logger = logging.getLogger(__name__)
+
+
+class BuiltinAudioModelRegistry:
+    """
+    Registry for built-in audio models downloaded from official model hub.
+
+    These models are treated as built-in models and don't require model_family validation.
+    They are stored in ~/.xinference/model/v2/builtin/audio/ directory.
+    """
+
+    def __init__(self):
+        from ...constants import XINFERENCE_MODEL_DIR
+
+        self.builtin_dir = os.path.join(XINFERENCE_MODEL_DIR, "v2", "builtin", "audio")
+        os.makedirs(self.builtin_dir, exist_ok=True)
+
+    def get_builtin_models(self) -> List:
+        """Load all built-in audio models from the builtin directory."""
+        from .custom import AudioModelFamilyV2
+
+        models = []
+
+        if not os.path.exists(self.builtin_dir):
+            return models
+
+        for filename in os.listdir(self.builtin_dir):
+            if filename.endswith(".json"):
+                file_path = os.path.join(self.builtin_dir, filename)
+                try:
+                    with open(file_path, "r", encoding="utf-8") as f:
+                        model_data = json.load(f)
+
+                    # Parse using AudioModelFamilyV2 (no model_family validation required)
+                    model = AudioModelFamilyV2.parse_obj(model_data)
+                    models.append(model)
+                    logger.info(f"Loaded built-in audio model: {model.model_name}")
+
+                except Exception as e:
+                    logger.warning(f"Failed to load built-in model from {filename}: {e}")
+
+        return models
+
+    def register_builtin_model(self, model) -> None:
+        """Register a built-in audio model by saving it to the builtin directory."""
+        persist_path = os.path.join(self.builtin_dir, f"{model.model_name}.json")
+
+        try:
+            with open(persist_path, "w", encoding="utf-8") as f:
+                f.write(model.json(exclude_none=True))
+            logger.info(f"Registered built-in audio model: {model.model_name}")
+        except Exception as e:
+            logger.error(f"Failed to register built-in model {model.model_name}: {e}")
+            raise
+
+    def unregister_builtin_model(self, model_name: str) -> None:
+        """Unregister a built-in audio model by removing its JSON file."""
+        persist_path = os.path.join(self.builtin_dir, f"{model_name}.json")
+
+        if os.path.exists(persist_path):
+            os.remove(persist_path)
+            logger.info(f"Unregistered built-in audio model: {model_name}")
+        else:
+            logger.warning(f"Built-in model file not found: {persist_path}")
+
+
+# Global registry instance
+_builtin_registry = None
+
+
+def get_builtin_audio_registry() -> BuiltinAudioModelRegistry:
+    """Get the global built-in audio model registry instance."""
+    global _builtin_registry
+    if _builtin_registry is None:
+        _builtin_registry = BuiltinAudioModelRegistry()
+    return _builtin_registry
+
+
+def get_builtin_audio_families() -> List:
+    """Get all built-in audio model families."""
+    return get_builtin_audio_registry().get_builtin_models()
+
+
+def register_builtin_audio(audio_family) -> None:
+    """Register a built-in audio model family."""
+    return get_builtin_audio_registry().register_builtin_model(audio_family)
+
+
+def unregister_builtin_audio(model_name: str) -> None:
+    """Unregister a built-in audio model family."""
+    return get_builtin_audio_registry().unregister_builtin_model(model_name)
\ No newline at end of file
diff --git a/xinference/model/cache_manager.py b/xinference/model/cache_manager.py
index ae9a9f1bfd..b97981a17a 100644
--- a/xinference/model/cache_manager.py
+++ b/xinference/model/cache_manager.py
@@ -16,8 +16,10 @@ def __init__(self, model_family: "CacheableModelSpec"):
         self._model_family = model_family
         self._v2_cache_dir_prefix = os.path.join(XINFERENCE_CACHE_DIR, "v2")
         self._v2_custom_dir_prefix = os.path.join(XINFERENCE_MODEL_DIR, "v2")
+        self._v2_builtin_dir_prefix = os.path.join(XINFERENCE_MODEL_DIR, "v2", "builtin")
         os.makedirs(self._v2_cache_dir_prefix, exist_ok=True)
         os.makedirs(self._v2_custom_dir_prefix, exist_ok=True)
+        os.makedirs(self._v2_builtin_dir_prefix, exist_ok=True)
         self._cache_dir = os.path.join(
             self._v2_cache_dir_prefix, self._model_family.model_name.replace(".", "_")
         )
@@ -109,9 +111,21 @@ def cache(self) -> str:
         return self._cache()
 
     def register_custom_model(self, model_type: str):
+        model_type_dir = model_type.lower()
         persist_path = os.path.join(
             self._v2_custom_dir_prefix,
-            model_type,
+            model_type_dir,
+            f"{self._model_family.model_name}.json",
+        )
+        os.makedirs(os.path.dirname(persist_path), exist_ok=True)
+        with open(persist_path, mode="w") as fd:
+            fd.write(self._model_family.json())
+
+    def register_builtin_model(self, model_type: str):
+        model_type_dir = model_type.lower()
+        persist_path = os.path.join(
+            self._v2_builtin_dir_prefix,
+            model_type_dir,
             f"{self._model_family.model_name}.json",
         )
         os.makedirs(os.path.dirname(persist_path), exist_ok=True)
@@ -119,9 +133,10 @@ def register_custom_model(self, model_type: str):
             fd.write(self._model_family.json())
 
     def unregister_custom_model(self, model_type: str):
+        model_type_dir = model_type.lower()
         persist_path = os.path.join(
             self._v2_custom_dir_prefix,
-            model_type,
+            model_type_dir,
             f"{self._model_family.model_name}.json",
         )
         if os.path.exists(persist_path):
diff --git a/xinference/model/custom.py b/xinference/model/custom.py
index f08a09dfea..a1adee9aea 100644
--- a/xinference/model/custom.py
+++ b/xinference/model/custom.py
@@ -118,6 +118,7 @@ def get_registry(cls, model_type: str) -> ModelRegistry:
         from .image.custom import ImageModelRegistry
         from .llm.custom import LLMModelRegistry
         from .rerank.custom import RerankModelRegistry
+        from .video.custom import VideoModelRegistry
 
         if model_type not in cls._instances:
             if model_type == "rerank":
@@ -126,6 +127,8 @@ def get_registry(cls, model_type: str) -> ModelRegistry:
                 cls._instances[model_type] = ImageModelRegistry()
             elif model_type == "audio":
                 cls._instances[model_type] = AudioModelRegistry()
+            elif model_type == "video":
+                cls._instances[model_type] = VideoModelRegistry()
             elif model_type == "llm":
                 cls._instances[model_type] = LLMModelRegistry()
             elif model_type == "flexible":
diff --git a/xinference/model/embedding/__init__.py b/xinference/model/embedding/__init__.py
index f1e822e112..017cf6114a 100644
--- a/xinference/model/embedding/__init__.py
+++ b/xinference/model/embedding/__init__.py
@@ -64,6 +64,31 @@ def register_custom_model():
                 warnings.warn(f"{user_defined_embedding_dir}/{f} has error, {e}")
 
 
+def register_builtin_model():
+    from ...constants import XINFERENCE_MODEL_DIR
+    from ..custom import RegistryManager
+
+    registry = RegistryManager.get_registry("embedding")
+    existing_model_names = {spec.model_name for spec in registry.get_custom_models()}
+
+    builtin_embedding_dir = os.path.join(XINFERENCE_MODEL_DIR, "v2", "builtin", "embedding")
+    if os.path.isdir(builtin_embedding_dir):
+        for f in os.listdir(builtin_embedding_dir):
+            if f.endswith(".json"):
+                try:
+                    with codecs.open(
+                        os.path.join(builtin_embedding_dir, f), encoding="utf-8"
+                    ) as fd:
+                        builtin_embedding_family = EmbeddingModelFamilyV2.parse_obj(json.load(fd))
+
+                        # Only register if model doesn't already exist
+                        if builtin_embedding_family.model_name not in existing_model_names:
+                            register_embedding(builtin_embedding_family, persist=False)
+                            existing_model_names.add(builtin_embedding_family.model_name)
+                except Exception as e:
+                    warnings.warn(f"{builtin_embedding_dir}/{f} has error, {e}")
+
+
 def check_format_with_engine(model_format, engine):
     if model_format in ["ggufv2"] and engine not in ["llama.cpp"]:
         return False
diff --git a/xinference/model/embedding/builtin.py b/xinference/model/embedding/builtin.py
new file mode 100644
index 0000000000..15a4cf7308
--- /dev/null
+++ b/xinference/model/embedding/builtin.py
@@ -0,0 +1,110 @@
+# Copyright 2022-2025 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import logging
+import os
+from typing import List
+
+logger = logging.getLogger(__name__)
+
+
+class BuiltinEmbeddingModelRegistry:
+    """
+    Registry for built-in embedding models downloaded from official model hub.
+
+    These models are treated as built-in models.
+    They are stored in ~/.xinference/model/v2/builtin/embedding/ directory.
+    """
+
+    def __init__(self):
+        from ...constants import XINFERENCE_MODEL_DIR
+
+        self.builtin_dir = os.path.join(XINFERENCE_MODEL_DIR, "v2", "builtin", "embedding")
+        os.makedirs(self.builtin_dir, exist_ok=True)
+
+    def get_builtin_models(self) -> List:
+        """Load all built-in embedding models from the builtin directory."""
+        from .custom import EmbeddingModelFamilyV2
+
+        models = []
+
+        if not os.path.exists(self.builtin_dir):
+            return models
+
+        for filename in os.listdir(self.builtin_dir):
+            if filename.endswith(".json"):
+                file_path = os.path.join(self.builtin_dir, filename)
+                try:
+                    with open(file_path, "r", encoding="utf-8") as f:
+                        model_data = json.load(f)
+
+                    # Parse using EmbeddingFamilyV2
+                    model = EmbeddingModelFamilyV2.parse_obj(model_data)
+                    models.append(model)
+                    logger.info(f"Loaded built-in embedding model: {model.model_name}")
+
+                except Exception as e:
+                    logger.warning(f"Failed to load built-in model from {filename}: {e}")
+
+        return models
+
+    def register_builtin_model(self, model) -> None:
+        """Register a built-in embedding model by saving it to the builtin directory."""
+        persist_path = os.path.join(self.builtin_dir, f"{model.model_name}.json")
+
+        try:
+            with open(persist_path, "w", encoding="utf-8") as f:
+                f.write(model.json(exclude_none=True))
+            logger.info(f"Registered built-in embedding model: {model.model_name}")
+        except Exception as e:
+            logger.error(f"Failed to register built-in model {model.model_name}: {e}")
+            raise
+
+    def unregister_builtin_model(self, model_name: str) -> None:
+        """Unregister a built-in embedding model by removing its JSON file."""
+        persist_path = os.path.join(self.builtin_dir, f"{model_name}.json")
+
+        if os.path.exists(persist_path):
+            os.remove(persist_path)
+            logger.info(f"Unregistered built-in embedding model: {model_name}")
+        else:
+            logger.warning(f"Built-in model file not found: {persist_path}")
+
+
+# Global registry instance
+_builtin_registry = None
+
+
+def get_builtin_embedding_registry() -> BuiltinEmbeddingModelRegistry:
+    """Get the global built-in embedding model registry instance."""
+    global _builtin_registry
+    if _builtin_registry is None:
+        _builtin_registry = BuiltinEmbeddingModelRegistry()
+    return _builtin_registry
+
+
+def get_builtin_embedding_families() -> List:
+    """Get all built-in embedding model families."""
+    return get_builtin_embedding_registry().get_builtin_models()
+
+
+def register_builtin_embedding(embedding_family) -> None:
+    """Register a built-in embedding model family."""
+    return get_builtin_embedding_registry().register_builtin_model(embedding_family)
+
+
+def unregister_builtin_embedding(model_name: str) -> None:
+    """Unregister a built-in embedding model family."""
+    return get_builtin_embedding_registry().unregister_builtin_model(model_name)
\ No newline at end of file
diff --git a/xinference/model/image/__init__.py b/xinference/model/image/__init__.py
index 14230ea41c..afc016ba91 100644
--- a/xinference/model/image/__init__.py
+++ b/xinference/model/image/__init__.py
@@ -55,6 +55,31 @@ def register_custom_model():
                 warnings.warn(f"{user_defined_image_dir}/{f} has error, {e}")
 
 
+def register_builtin_model():
+    from ...constants import XINFERENCE_MODEL_DIR
+    from ..custom import RegistryManager
+
+    registry = RegistryManager.get_registry("image")
+    existing_model_names = {spec.model_name for spec in registry.get_custom_models()}
+
+    builtin_image_dir = os.path.join(XINFERENCE_MODEL_DIR, "v2", "builtin", "image")
+    if os.path.isdir(builtin_image_dir):
+        for f in os.listdir(builtin_image_dir):
+            if f.endswith(".json"):
+                try:
+                    with codecs.open(
+                        os.path.join(builtin_image_dir, f), encoding="utf-8"
+                    ) as fd:
+                        builtin_image_family = ImageModelFamilyV2.parse_obj(json.load(fd))
+
+                        # Only register if model doesn't already exist
+                        if builtin_image_family.model_name not in existing_model_names:
+                            register_image(builtin_image_family, persist=False)
+                            existing_model_names.add(builtin_image_family.model_name)
+                except Exception as e:
+                    warnings.warn(f"{builtin_image_dir}/{f} has error, {e}")
+
+
 def _install():
     load_model_family_from_json("model_spec.json", BUILTIN_IMAGE_MODELS)
 
diff --git a/xinference/model/image/builtin.py b/xinference/model/image/builtin.py
new file mode 100644
index 0000000000..c687e30cec
--- /dev/null
+++ b/xinference/model/image/builtin.py
@@ -0,0 +1,110 @@
+# Copyright 2022-2025 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import logging
+import os
+from typing import List
+
+logger = logging.getLogger(__name__)
+
+
+class BuiltinImageModelRegistry:
+    """
+    Registry for built-in image models downloaded from official model hub.
+
+    These models are treated as built-in models and don't require model_family validation.
+    They are stored in ~/.xinference/model/v2/builtin/image/ directory.
+    """
+
+    def __init__(self):
+        from ...constants import XINFERENCE_MODEL_DIR
+
+        self.builtin_dir = os.path.join(XINFERENCE_MODEL_DIR, "v2", "builtin", "image")
+        os.makedirs(self.builtin_dir, exist_ok=True)
+
+    def get_builtin_models(self) -> List:
+        """Load all built-in image models from the builtin directory."""
+        from .custom import ImageModelFamilyV2
+
+        models = []
+
+        if not os.path.exists(self.builtin_dir):
+            return models
+
+        for filename in os.listdir(self.builtin_dir):
+            if filename.endswith(".json"):
+                file_path = os.path.join(self.builtin_dir, filename)
+                try:
+                    with open(file_path, "r", encoding="utf-8") as f:
+                        model_data = json.load(f)
+
+                    # Parse using ImageModelFamilyV2 (no model_family validation required)
+                    model = ImageModelFamilyV2.parse_obj(model_data)
+                    models.append(model)
+                    logger.info(f"Loaded built-in image model: {model.model_name}")
+
+                except Exception as e:
+                    logger.warning(f"Failed to load built-in model from {filename}: {e}")
+
+        return models
+
+    def register_builtin_model(self, model) -> None:
+        """Register a built-in image model by saving it to the builtin directory."""
+        persist_path = os.path.join(self.builtin_dir, f"{model.model_name}.json")
+
+        try:
+            with open(persist_path, "w", encoding="utf-8") as f:
+                f.write(model.json(exclude_none=True))
+            logger.info(f"Registered built-in image model: {model.model_name}")
+        except Exception as e:
+            logger.error(f"Failed to register built-in model {model.model_name}: {e}")
+            raise
+
+    def unregister_builtin_model(self, model_name: str) -> None:
+        """Unregister a built-in image model by removing its JSON file."""
+        persist_path = os.path.join(self.builtin_dir, f"{model_name}.json")
+
+        if os.path.exists(persist_path):
+            os.remove(persist_path)
+            logger.info(f"Unregistered built-in image model: {model_name}")
+        else:
+            logger.warning(f"Built-in model file not found: {persist_path}")
+
+
+# Global registry instance
+_builtin_registry = None
+
+
+def get_builtin_image_registry() -> BuiltinImageModelRegistry:
+    """Get the global built-in image model registry instance."""
+    global _builtin_registry
+    if _builtin_registry is None:
+        _builtin_registry = BuiltinImageModelRegistry()
+    return _builtin_registry
+
+
+def get_builtin_image_families() -> List:
+    """Get all built-in image model families."""
+    return get_builtin_image_registry().get_builtin_models()
+
+
+def register_builtin_image(image_family) -> None:
+    """Register a built-in image model family."""
+    return get_builtin_image_registry().register_builtin_model(image_family)
+
+
+def unregister_builtin_image(model_name: str) -> None:
+    """Unregister a built-in image model family."""
+    return get_builtin_image_registry().unregister_builtin_model(model_name)
\ No newline at end of file
diff --git a/xinference/model/llm/__init__.py b/xinference/model/llm/__init__.py
index a4c4704ce4..f417d1acfc 100644
--- a/xinference/model/llm/__init__.py
+++ b/xinference/model/llm/__init__.py
@@ -128,6 +128,31 @@ def register_custom_model():
                 warnings.warn(f"{user_defined_llm_dir}/{f} has error, {e}")
 
 
+def register_builtin_model():
+    from ...constants import XINFERENCE_MODEL_DIR
+    from ..custom import RegistryManager
+
+    registry = RegistryManager.get_registry("llm")
+    existing_model_names = {spec.model_name for spec in registry.get_custom_models()}
+
+    builtin_llm_dir = os.path.join(XINFERENCE_MODEL_DIR, "v2", "builtin", "llm")
+    if os.path.isdir(builtin_llm_dir):
+        for f in os.listdir(builtin_llm_dir):
+            if f.endswith(".json"):
+                try:
+                    with codecs.open(
+                        os.path.join(builtin_llm_dir, f), encoding="utf-8"
+                    ) as fd:
+                        builtin_llm_family = LLMFamilyV2.parse_raw(fd.read())
+
+                        # Only register if model doesn't already exist
+                        if builtin_llm_family.model_name not in existing_model_names:
+                            register_llm(builtin_llm_family, persist=False)
+                            existing_model_names.add(builtin_llm_family.model_name)
+                except Exception as e:
+                    warnings.warn(f"{builtin_llm_dir}/{f} has error, {e}")
+
+
 def load_model_family_from_json(json_filename, target_families):
     json_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), json_filename)
     for json_obj in json.load(codecs.open(json_path, "r", encoding="utf-8")):
diff --git a/xinference/model/llm/builtin.py b/xinference/model/llm/builtin.py
new file mode 100644
index 0000000000..0762746532
--- /dev/null
+++ b/xinference/model/llm/builtin.py
@@ -0,0 +1,113 @@
+# Copyright 2022-2025 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import logging
+import os
+from typing import TYPE_CHECKING, List
+
+if TYPE_CHECKING:
+    from ..llm.llm_family import LLMFamilyV2
+
+logger = logging.getLogger(__name__)
+
+
+class BuiltinLLMModelRegistry:
+    """
+    Registry for built-in LLM models downloaded from official model hub.
+
+    These models are treated as built-in models and don't require model_family validation.
+    They are stored in ~/.xinference/model/v2/builtin/llm/ directory.
+    """
+
+    def __init__(self):
+        from ...constants import XINFERENCE_MODEL_DIR
+
+        self.builtin_dir = os.path.join(XINFERENCE_MODEL_DIR, "v2", "builtin", "llm")
+        os.makedirs(self.builtin_dir, exist_ok=True)
+
+    def get_builtin_models(self) -> List["LLMFamilyV2"]:
+        """Load all built-in LLM models from the builtin directory."""
+        from ..llm.llm_family import LLMFamilyV2
+
+        models = []
+
+        if not os.path.exists(self.builtin_dir):
+            return models
+
+        for filename in os.listdir(self.builtin_dir):
+            if filename.endswith(".json"):
+                file_path = os.path.join(self.builtin_dir, filename)
+                try:
+                    with open(file_path, "r", encoding="utf-8") as f:
+                        model_data = json.load(f)
+
+                    # Parse using LLMFamilyV2 (no model_family validation required)
+                    model = LLMFamilyV2.parse_obj(model_data)
+                    models.append(model)
+                    logger.info(f"Loaded built-in LLM model: {model.model_name}")
+
+                except Exception as e:
+                    logger.warning(f"Failed to load built-in model from {filename}: {e}")
+
+        return models
+
+    def register_builtin_model(self, model: "LLMFamilyV2") -> None:
+        """Register a built-in LLM model by saving it to the builtin directory."""
+        persist_path = os.path.join(self.builtin_dir, f"{model.model_name}.json")
+
+        try:
+            with open(persist_path, "w", encoding="utf-8") as f:
+                f.write(model.json(exclude_none=True))
+            logger.info(f"Registered built-in LLM model: {model.model_name}")
+        except Exception as e:
+            logger.error(f"Failed to register built-in model {model.model_name}: {e}")
+            raise
+
+    def unregister_builtin_model(self, model_name: str) -> None:
+        """Unregister a built-in LLM model by removing its JSON file."""
+        persist_path = os.path.join(self.builtin_dir, f"{model_name}.json")
+
+        if os.path.exists(persist_path):
+            os.remove(persist_path)
+            logger.info(f"Unregistered built-in LLM model: {model_name}")
+        else:
+            logger.warning(f"Built-in model file not found: {persist_path}")
+
+
+# Global registry instance
+_builtin_registry = None
+
+
+def get_builtin_llm_registry() -> BuiltinLLMModelRegistry:
+    """Get the global built-in LLM model registry instance."""
+    global _builtin_registry
+    if _builtin_registry is None:
+        _builtin_registry = BuiltinLLMModelRegistry()
+    return _builtin_registry
+
+
+def get_builtin_llm_families() -> List["LLMFamilyV2"]:
+    """Get all built-in LLM model families."""
+    return get_builtin_llm_registry().get_builtin_models()
+
+
+def register_builtin_llm(llm_family: "LLMFamilyV2") -> None:
+    """Register a built-in LLM model family."""
+    return get_builtin_llm_registry().register_builtin_model(llm_family)
+
+
+def unregister_builtin_llm(model_name: str) -> None:
+    """Unregister a built-in LLM model family."""
+    return get_builtin_llm_registry().unregister_builtin_model(model_name)
\ No newline at end of file
diff --git a/xinference/model/rerank/__init__.py b/xinference/model/rerank/__init__.py
index 36334cb9fc..622febea0b 100644
--- a/xinference/model/rerank/__init__.py
+++ b/xinference/model/rerank/__init__.py
@@ -63,6 +63,30 @@ def register_custom_model():
                 warnings.warn(f"{user_defined_rerank_dir}/{f} has error, {e}")
 
 
+def register_builtin_model():
+    from ..custom import RegistryManager
+
+    registry = RegistryManager.get_registry("rerank")
+    existing_model_names = {spec.model_name for spec in registry.get_custom_models()}
+
+    builtin_rerank_dir = os.path.join(XINFERENCE_MODEL_DIR, "v2", "builtin", "rerank")
+    if os.path.isdir(builtin_rerank_dir):
+        for f in os.listdir(builtin_rerank_dir):
+            if f.endswith(".json"):
+                try:
+                    with codecs.open(
+                        os.path.join(builtin_rerank_dir, f), encoding="utf-8"
+                    ) as fd:
+                        builtin_rerank_family = RerankModelFamilyV2.parse_obj(json.load(fd))
+
+                        # Only register if model doesn't already exist
+                        if builtin_rerank_family.model_name not in existing_model_names:
+                            register_rerank(builtin_rerank_family, persist=False)
+                            existing_model_names.add(builtin_rerank_family.model_name)
+                except Exception as e:
+                    warnings.warn(f"{builtin_rerank_dir}/{f} has error, {e}")
+
+
 def generate_engine_config_by_model_name(model_family: "RerankModelFamilyV2"):
     model_name = model_family.model_name
     engines: Dict[str, List[Dict[str, Any]]] = RERANK_ENGINES.get(
diff --git a/xinference/model/rerank/builtin.py b/xinference/model/rerank/builtin.py
new file mode 100644
index 0000000000..f5f13d7d56
--- /dev/null
+++ b/xinference/model/rerank/builtin.py
@@ -0,0 +1,110 @@
+# Copyright 2022-2025 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import logging
+import os
+from typing import List
+
+logger = logging.getLogger(__name__)
+
+
+class BuiltinRerankModelRegistry:
+    """
+    Registry for built-in rerank models downloaded from official model hub.
+
+    These models are treated as built-in models and don't require model_family validation.
+    They are stored in ~/.xinference/model/v2/builtin/rerank/ directory.
+    """
+
+    def __init__(self):
+        from ...constants import XINFERENCE_MODEL_DIR
+
+        self.builtin_dir = os.path.join(XINFERENCE_MODEL_DIR, "v2", "builtin", "rerank")
+        os.makedirs(self.builtin_dir, exist_ok=True)
+
+    def get_builtin_models(self) -> List:
+        """Load all built-in rerank models from the builtin directory."""
+        from .custom import RerankModelFamilyV2
+
+        models = []
+
+        if not os.path.exists(self.builtin_dir):
+            return models
+
+        for filename in os.listdir(self.builtin_dir):
+            if filename.endswith(".json"):
+                file_path = os.path.join(self.builtin_dir, filename)
+                try:
+                    with open(file_path, "r", encoding="utf-8") as f:
+                        model_data = json.load(f)
+
+                    # Parse using RerankModelFamilyV2 (no model_family validation required)
+                    model = RerankModelFamilyV2.parse_obj(model_data)
+                    models.append(model)
+                    logger.info(f"Loaded built-in rerank model: {model.model_name}")
+
+                except Exception as e:
+                    logger.warning(f"Failed to load built-in model from {filename}: {e}")
+
+        return models
+
+    def register_builtin_model(self, model) -> None:
+        """Register a built-in rerank model by saving it to the builtin directory."""
+        persist_path = os.path.join(self.builtin_dir, f"{model.model_name}.json")
+
+        try:
+            with open(persist_path, "w", encoding="utf-8") as f:
+                f.write(model.json(exclude_none=True))
+            logger.info(f"Registered built-in rerank model: {model.model_name}")
+        except Exception as e:
+            logger.error(f"Failed to register built-in model {model.model_name}: {e}")
+            raise
+
+    def unregister_builtin_model(self, model_name: str) -> None:
+        """Unregister a built-in rerank model by removing its JSON file."""
+        persist_path = os.path.join(self.builtin_dir, f"{model_name}.json")
+
+        if os.path.exists(persist_path):
+            os.remove(persist_path)
+            logger.info(f"Unregistered built-in rerank model: {model_name}")
+        else:
+            logger.warning(f"Built-in model file not found: {persist_path}")
+
+
+# Global registry instance
+_builtin_registry = None
+
+
+def get_builtin_rerank_registry() -> BuiltinRerankModelRegistry:
+    """Get the global built-in rerank model registry instance."""
+    global _builtin_registry
+    if _builtin_registry is None:
+        _builtin_registry = BuiltinRerankModelRegistry()
+    return _builtin_registry
+
+
+def get_builtin_rerank_families() -> List:
+    """Get all built-in rerank model families."""
+    return get_builtin_rerank_registry().get_builtin_models()
+
+
+def register_builtin_rerank(rerank_family) -> None:
+    """Register a built-in rerank model family."""
+    return get_builtin_rerank_registry().register_builtin_model(rerank_family)
+
+
+def unregister_builtin_rerank(model_name: str) -> None:
+    """Unregister a built-in rerank model family."""
+    return get_builtin_rerank_registry().unregister_builtin_model(model_name)
\ No newline at end of file
diff --git a/xinference/model/video/__init__.py b/xinference/model/video/__init__.py
index 5002fcc039..eeded29da0 100644
--- a/xinference/model/video/__init__.py
+++ b/xinference/model/video/__init__.py
@@ -15,6 +15,7 @@
 import codecs
 import json
 import os
+import warnings
 
 from ..utils import flatten_model_src
 from .core import (
@@ -24,6 +25,57 @@
     generate_video_description,
     get_video_model_descriptions,
 )
+from .custom import (
+    CustomVideoModelFamilyV2,
+    get_user_defined_videos,
+    register_video,
+    unregister_video,
+)
+
+
+def register_custom_model():
+    from ...constants import XINFERENCE_MODEL_DIR
+    from ..custom import migrate_from_v1_to_v2
+
+    # migrate from v1 to v2 first
+    migrate_from_v1_to_v2("video", CustomVideoModelFamilyV2)
+
+    user_defined_video_dir = os.path.join(XINFERENCE_MODEL_DIR, "v2", "video")
+    if os.path.isdir(user_defined_video_dir):
+        for f in os.listdir(user_defined_video_dir):
+            try:
+                with codecs.open(
+                    os.path.join(user_defined_video_dir, f), encoding="utf-8"
+                ) as fd:
+                    user_defined_video_family = CustomVideoModelFamilyV2.parse_obj(
+                        json.load(fd)
+                    )
+                    register_video(user_defined_video_family, persist=False)
+            except Exception as e:
+                warnings.warn(f"{user_defined_video_dir}/{f} has error, {e}")
+
+
+def register_builtin_model():
+    """
+    Dynamically load built-in video models from builtin/video directory.
+    This function is called every time model list is requested,
+    ensuring real-time updates without server restart.
+    """
+    from ..custom import RegistryManager
+
+    registry = RegistryManager.get_registry("video")
+    existing_model_names = {spec.model_name for spec in registry.get_custom_models()}
+
+    # Use the builtin registry to load models
+    from .builtin import BuiltinVideoModelRegistry
+    builtin_registry = BuiltinVideoModelRegistry()
+    builtin_models = builtin_registry.get_builtin_models()
+
+    for model in builtin_models:
+        # Only register if model doesn't already exist
+        if model.model_name not in existing_model_names:
+            register_video(model, persist=False)
+            existing_model_names.add(model.model_name)
 
 
 def _install():
diff --git a/xinference/model/video/builtin.py b/xinference/model/video/builtin.py
new file mode 100644
index 0000000000..9dd8af05d0
--- /dev/null
+++ b/xinference/model/video/builtin.py
@@ -0,0 +1,103 @@
+# Copyright 2022-2025 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import logging
+import os
+from typing import List
+
+logger = logging.getLogger(__name__)
+
+
+class BuiltinVideoModelRegistry:
+    """
+    Registry for built-in video models downloaded from official model hub.
+
+    These models are treated as built-in models and don't require model_family validation.
+    They are stored in ~/.xinference/model/v2/builtin/video/ directory.
+    """
+
+    def __init__(self):
+        from ...constants import XINFERENCE_MODEL_DIR
+
+        self.builtin_dir = os.path.join(XINFERENCE_MODEL_DIR, "v2", "builtin", "video")
+        os.makedirs(self.builtin_dir, exist_ok=True)
+
+    def get_builtin_models(self) -> List:
+        """Load all built-in video models from the builtin directory."""
+        from .custom import CustomVideoModelFamilyV2
+
+        models = []
+
+        if not os.path.exists(self.builtin_dir):
+            return models
+
+        for filename in os.listdir(self.builtin_dir):
+            if filename.endswith(".json"):
+                file_path = os.path.join(self.builtin_dir, filename)
+                try:
+                    with open(file_path, "r", encoding="utf-8") as f:
+                        model_data = json.load(f)
+
+                    # Apply conversion logic to handle null model_id and other issues
+                    if model_data.get("model_id") is None and "model_src" in model_data:
+                        model_src = model_data["model_src"]
+                        # Extract model_id from available sources
+                        if "huggingface" in model_src and "model_id" in model_src["huggingface"]:
+                            model_data["model_id"] = model_src["huggingface"]["model_id"]
+                        elif "modelscope" in model_src and "model_id" in model_src["modelscope"]:
+                            model_data["model_id"] = model_src["modelscope"]["model_id"]
+
+                        # Extract model_revision if available
+                        if model_data.get("model_revision") is None:
+                            if "huggingface" in model_src and "model_revision" in model_src["huggingface"]:
+                                model_data["model_revision"] = model_src["huggingface"]["model_revision"]
+                            elif "modelscope" in model_src and "model_revision" in model_src["modelscope"]:
+                                model_data["model_revision"] = model_src["modelscope"]["model_revision"]
+
+                    # Parse using CustomVideoModelFamilyV2
+                    model = CustomVideoModelFamilyV2.parse_obj(model_data)
+                    models.append(model)
+                    logger.info(f"Loaded built-in video model: {model.model_name}")
+
+                except Exception as e:
+                    logger.warning(f"Failed to load built-in model from {filename}: {e}")
+
+        return models
+
+    def register_builtin_model(self, model) -> None:
+        """Register a built-in video model by saving it to the builtin directory."""
+        persist_path = os.path.join(self.builtin_dir, f"{model.model_name}.json")
+
+        try:
+            with open(persist_path, "w", encoding="utf-8") as f:
+                f.write(model.json(exclude_none=True))
+            logger.info(f"Registered built-in video model: {model.model_name}")
+        except Exception as e:
+            logger.error(f"Failed to register built-in model {model.model_name}: {e}")
+            raise
+
+    def unregister_builtin_model(self, model_name: str) -> None:
+        """Unregister a built-in video model by removing its JSON file."""
+        persist_path = os.path.join(self.builtin_dir, f"{model_name}.json")
+
+        if os.path.exists(persist_path):
+            try:
+                os.remove(persist_path)
+                logger.info(f"Unregistered built-in video model: {model_name}")
+            except Exception as e:
+                logger.error(f"Failed to unregister built-in model {model_name}: {e}")
+                raise
+        else:
+            logger.warning(f"Built-in video model {model_name} not found for unregistration")
\ No newline at end of file

From 43bf76ec6f95ccc84846a93e1b142d3d79b8d291 Mon Sep 17 00:00:00 2001
From: OliverBryant <2713999266@qq.com>
Date: Mon, 27 Oct 2025 16:35:03 +0800
Subject: [PATCH 17/25] Function 2: One-Click Model Update

---
 .../model/flexible/launchers/__init__.py      |  5 --
 xinference/model/video/__init__.py            |  1 -
 xinference/model/video/builtin.py             |  2 +-
 xinference/model/video/custom.py              | 73 +++++++++++++++++++
 4 files changed, 74 insertions(+), 7 deletions(-)
 create mode 100644 xinference/model/video/custom.py

diff --git a/xinference/model/flexible/launchers/__init__.py b/xinference/model/flexible/launchers/__init__.py
index f8de4cd8d4..09138b5b2a 100644
--- a/xinference/model/flexible/launchers/__init__.py
+++ b/xinference/model/flexible/launchers/__init__.py
@@ -11,8 +11,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-from .image_process_launcher import launcher as image_process
-from .modelscope_launcher import launcher as modelscope
-from .transformers_launcher import launcher as transformers
-from .yolo_launcher import launcher as yolo
diff --git a/xinference/model/video/__init__.py b/xinference/model/video/__init__.py
index eeded29da0..aa732d0a09 100644
--- a/xinference/model/video/__init__.py
+++ b/xinference/model/video/__init__.py
@@ -27,7 +27,6 @@
 )
 from .custom import (
     CustomVideoModelFamilyV2,
-    get_user_defined_videos,
     register_video,
     unregister_video,
 )
diff --git a/xinference/model/video/builtin.py b/xinference/model/video/builtin.py
index 9dd8af05d0..0ba04f4ba3 100644
--- a/xinference/model/video/builtin.py
+++ b/xinference/model/video/builtin.py
@@ -100,4 +100,4 @@ def unregister_builtin_model(self, model_name: str) -> None:
                 logger.error(f"Failed to unregister built-in model {model_name}: {e}")
                 raise
         else:
-            logger.warning(f"Built-in video model {model_name} not found for unregistration")
\ No newline at end of file
+            logger.warning(f"Built-in video model {model_name} not found for unregistration")
diff --git a/xinference/model/video/custom.py b/xinference/model/video/custom.py
new file mode 100644
index 0000000000..3fb3c7a10d
--- /dev/null
+++ b/xinference/model/video/custom.py
@@ -0,0 +1,73 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+from typing import List, Optional, TYPE_CHECKING
+
+from ..._compat import (
+    Literal,
+)
+from ..custom import ModelRegistry
+from .core import VideoModelFamilyV2
+
+logger = logging.getLogger(__name__)
+
+
+class CustomVideoModelFamilyV2(VideoModelFamilyV2):
+    version: Literal[2] = 2
+    model_id: Optional[str]  # type: ignore
+    model_revision: Optional[str]  # type: ignore
+    model_uri: Optional[str]
+
+
+if TYPE_CHECKING:
+    from typing import TypeVar
+
+    _T = TypeVar("_T", bound="CustomVideoModelFamilyV2")
+
+
+class VideoModelRegistry(ModelRegistry):
+    model_type = "video"
+
+    def __init__(self):
+        super().__init__()
+
+    def get_user_defined_models(self) -> List["CustomVideoModelFamilyV2"]:
+        return self.get_custom_models()
+
+
+video_registry = VideoModelRegistry()
+
+
+def register_video(model_spec: CustomVideoModelFamilyV2, persist: bool = True):
+    from ..custom import RegistryManager
+    registry = RegistryManager.get_registry("video")
+    registry.register(model_spec, persist)
+
+    if persist:
+        from ..custom import persist_user_model
+        persist_user_model("video", model_spec)
+
+
+def unregister_video(model_name: str, raise_error: bool = True):
+    video_registry.unregister(model_name, raise_error=raise_error)
+
+    # Remove persisted file if exists
+    from ..custom import unpersist_user_model
+    unpersist_user_model("video", model_name)
+
+
+def get_user_defined_videos() -> List[CustomVideoModelFamilyV2]:
+    from ..custom import RegistryManager
+    registry = RegistryManager.get_registry("video")
+    return registry.get_custom_models()

From 60e6aba2ecf4f74343ab193bc636625a699708d6 Mon Sep 17 00:00:00 2001
From: OliverBryant <2713999266@qq.com>
Date: Mon, 27 Oct 2025 16:45:49 +0800
Subject: [PATCH 18/25] flake8 black isort

---
 xinference/api/restful_api.py          |  10 +-
 xinference/core/supervisor.py          | 133 ++++++++++++++++++-------
 xinference/model/audio/__init__.py     |  41 ++++++--
 xinference/model/audio/builtin.py      |   6 +-
 xinference/model/cache_manager.py      |   4 +-
 xinference/model/embedding/__init__.py |  17 +++-
 xinference/model/embedding/builtin.py  |  10 +-
 xinference/model/image/__init__.py     |   4 +-
 xinference/model/image/builtin.py      |   6 +-
 xinference/model/llm/builtin.py        |   6 +-
 xinference/model/rerank/__init__.py    |   4 +-
 xinference/model/rerank/builtin.py     |   6 +-
 xinference/model/video/__init__.py     |   1 +
 xinference/model/video/builtin.py      |  40 ++++++--
 xinference/model/video/custom.py       |   6 +-
 15 files changed, 219 insertions(+), 75 deletions(-)

diff --git a/xinference/api/restful_api.py b/xinference/api/restful_api.py
index 353b450d57..05ce33df61 100644
--- a/xinference/api/restful_api.py
+++ b/xinference/api/restful_api.py
@@ -3173,13 +3173,17 @@ async def add_model(self, request: Request) -> JSONResponse:
                 # Priority 1: Check if model_type is explicitly provided in the JSON
                 if "model_type" in model_json:
                     model_type = model_json["model_type"]
-                    logger.info(f"[DEBUG] Using explicit model_type from JSON: {model_type}")
+                    logger.info(
+                        f"[DEBUG] Using explicit model_type from JSON: {model_type}"
+                    )
                 else:
                     # model_type is required in the JSON when using unwrapped format
-                    logger.error(f"[DEBUG] model_type not provided in JSON, this is required")
+                    logger.error(
+                        f"[DEBUG] model_type not provided in JSON, this is required"
+                    )
                     raise HTTPException(
                         status_code=400,
-                        detail="model_type is required in the model JSON. Supported types: LLM, embedding, audio, image, video, rerank"
+                        detail="model_type is required in the model JSON. Supported types: LLM, embedding, audio, image, video, rerank",
                     )
 
             logger.info(f"[DEBUG] Parsed model_type: {model_type}")
diff --git a/xinference/core/supervisor.py b/xinference/core/supervisor.py
index 4ffe490449..03bd14173e 100644
--- a/xinference/core/supervisor.py
+++ b/xinference/core/supervisor.py
@@ -183,13 +183,6 @@ async def __post_create__(self):
             register_audio,
             unregister_audio,
         )
-        from ..model.video import (
-            CustomVideoModelFamilyV2,
-            generate_video_description,
-            get_video_model_descriptions,
-            register_video,
-            unregister_video,
-        )
         from ..model.embedding import (
             CustomEmbeddingModelFamilyV2,
             generate_embedding_description,
@@ -225,6 +218,13 @@ async def __post_create__(self):
             register_rerank,
             unregister_rerank,
         )
+        from ..model.video import (
+            CustomVideoModelFamilyV2,
+            generate_video_description,
+            get_video_model_descriptions,
+            register_video,
+            unregister_video,
+        )
 
         self._custom_register_type_to_cls: Dict[str, Tuple] = {  # type: ignore
             "LLM": (
@@ -644,7 +644,11 @@ def sort_helper(item):
             logger.info(f"[DEBUG SUPERVISOR] Local deployment mode")
 
         if model_type.upper() == "LLM":
-            from ..model.llm import BUILTIN_LLM_FAMILIES, get_user_defined_llm_families, register_builtin_model
+            from ..model.llm import (
+                BUILTIN_LLM_FAMILIES,
+                get_user_defined_llm_families,
+                register_builtin_model,
+            )
 
             logger.info(f"[DEBUG SUPERVISOR] Processing LLM models")
 
@@ -672,15 +676,18 @@ def sort_helper(item):
                         reg_data = await self._to_llm_reg(family, True)
                         ret.append(reg_data)
                     else:
-                        ret.append({"model_name": family.model_name, "is_builtin": True})
+                        ret.append(
+                            {"model_name": family.model_name, "is_builtin": True}
+                        )
 
                 ret.sort(key=sort_helper)
-            logger.info(
-                f"[DEBUG SUPERVISOR] LLM: Returning {len(ret)} total models"
-            )
+            logger.info(f"[DEBUG SUPERVISOR] LLM: Returning {len(ret)} total models")
             return ret
         elif model_type == "embedding":
-            from ..model.embedding import BUILTIN_EMBEDDING_MODELS, register_builtin_model
+            from ..model.embedding import (
+                BUILTIN_EMBEDDING_MODELS,
+                register_builtin_model,
+            )
             from ..model.embedding.custom import get_user_defined_embeddings
 
             register_builtin_model()
@@ -869,10 +876,12 @@ def sort_helper(item):
                     ret.append({"model_name": model_name, "is_builtin": True})
             for model_spec in get_user_defined_videos():
                 from ..model.cache_manager import CacheManager
+
                 cache_manager = CacheManager(model_spec)
                 is_persisted_model = False
                 if hasattr(cache_manager, "_v2_builtin_dir_prefix"):
                     import os
+
                     potential_persist_path = os.path.join(
                         cache_manager._v2_builtin_dir_prefix,
                         "video",
@@ -1327,13 +1336,16 @@ async def add_model(self, model_type: str, model_json: Dict[str, Any]):
         try:
             # Create CacheManager and store as built-in model
             from ..model.cache_manager import CacheManager
+
             cache_manager = CacheManager(model_spec)
             cache_manager.register_builtin_model(model_type.lower())
             logger.info(f"[DEBUG SUPERVISOR] Built-in model stored successfully")
 
             # Register in the model registry without persisting to avoid duplicate storage
             register_fn(model_spec, persist=False)
-            logger.info(f"[DEBUG SUPERVISOR] Model registry registration completed successfully")
+            logger.info(
+                f"[DEBUG SUPERVISOR] Model registry registration completed successfully"
+            )
 
             # Record model version
             logger.info(f"[DEBUG SUPERVISOR] Generating version info...")
@@ -1399,23 +1411,43 @@ def _convert_model_json_format(self, model_json: Dict[str, Any]) -> Dict[str, An
         logger.info(f"[DEBUG SUPERVISOR] Input model_json: {model_json}")
 
         if model_json.get("model_id") is None and "model_src" in model_json:
-            logger.info(f"[DEBUG SUPERVISOR] model_id is null, attempting to extract from model_src")
+            logger.info(
+                f"[DEBUG SUPERVISOR] model_id is null, attempting to extract from model_src"
+            )
             model_src = model_json["model_src"]
 
             if "huggingface" in model_src and "model_id" in model_src["huggingface"]:
                 model_json["model_id"] = model_src["huggingface"]["model_id"]
-                logger.info(f"[DEBUG SUPERVISOR] Extracted model_id from huggingface: {model_json['model_id']}")
+                logger.info(
+                    f"[DEBUG SUPERVISOR] Extracted model_id from huggingface: {model_json['model_id']}"
+                )
             elif "modelscope" in model_src and "model_id" in model_src["modelscope"]:
                 model_json["model_id"] = model_src["modelscope"]["model_id"]
-                logger.info(f"[DEBUG SUPERVISOR] Extracted model_id from modelscope: {model_json['model_id']}")
+                logger.info(
+                    f"[DEBUG SUPERVISOR] Extracted model_id from modelscope: {model_json['model_id']}"
+                )
 
             if model_json.get("model_revision") is None:
-                if "huggingface" in model_src and "model_revision" in model_src["huggingface"]:
-                    model_json["model_revision"] = model_src["huggingface"]["model_revision"]
-                    logger.info(f"[DEBUG SUPERVISOR] Extracted model_revision from huggingface: {model_json['model_revision']}")
-                elif "modelscope" in model_src and "model_revision" in model_src["modelscope"]:
-                    model_json["model_revision"] = model_src["modelscope"]["model_revision"]
-                    logger.info(f"[DEBUG SUPERVISOR] Extracted model_revision from modelscope: {model_json['model_revision']}")
+                if (
+                    "huggingface" in model_src
+                    and "model_revision" in model_src["huggingface"]
+                ):
+                    model_json["model_revision"] = model_src["huggingface"][
+                        "model_revision"
+                    ]
+                    logger.info(
+                        f"[DEBUG SUPERVISOR] Extracted model_revision from huggingface: {model_json['model_revision']}"
+                    )
+                elif (
+                    "modelscope" in model_src
+                    and "model_revision" in model_src["modelscope"]
+                ):
+                    model_json["model_revision"] = model_src["modelscope"][
+                        "model_revision"
+                    ]
+                    logger.info(
+                        f"[DEBUG SUPERVISOR] Extracted model_revision from modelscope: {model_json['model_revision']}"
+                    )
 
         # If model_specs is missing, provide a default minimal spec
         if "model_specs" not in model_json or not model_json["model_specs"]:
@@ -1622,7 +1654,9 @@ async def update_model_type(self, model_type: str):
             )
 
         model_type_for_operations = normalized_for_validation
-        logger.info(f"[DEBUG SUPERVISOR] Using model_type: '{model_type_for_operations}' for operations")
+        logger.info(
+            f"[DEBUG SUPERVISOR] Using model_type: '{model_type_for_operations}' for operations"
+        )
 
         # Construct the URL to download JSON
         url = f"https://model.xinference.io/api/models/download?model_type={model_type.lower()}"
@@ -1654,41 +1688,68 @@ async def update_model_type(self, model_type: str):
                     )
 
             # Store the JSON data using CacheManager as built-in models
-            logger.info(f"[DEBUG SUPERVISOR] Storing model configurations as built-in models...")
+            logger.info(
+                f"[DEBUG SUPERVISOR] Storing model configurations as built-in models..."
+            )
             await self._store_model_configurations(model_type, model_data)
-            logger.info(f"[DEBUG SUPERVISOR] Built-in model configurations stored successfully")
+            logger.info(
+                f"[DEBUG SUPERVISOR] Built-in model configurations stored successfully"
+            )
 
             # Dynamically reload built-in models to make them immediately available
-            logger.info(f"[DEBUG SUPERVISOR] Reloading built-in models for immediate availability...")
+            logger.info(
+                f"[DEBUG SUPERVISOR] Reloading built-in models for immediate availability..."
+            )
             try:
                 if model_type.lower() == "llm":
                     from ..model.llm import register_builtin_model
+
                     register_builtin_model()
                     logger.info(f"[DEBUG SUPERVISOR] LLM models reloaded successfully")
                 elif model_type.lower() == "embedding":
                     from ..model.embedding import register_builtin_model
+
                     register_builtin_model()
-                    logger.info(f"[DEBUG SUPERVISOR] Embedding models reloaded successfully")
+                    logger.info(
+                        f"[DEBUG SUPERVISOR] Embedding models reloaded successfully"
+                    )
                 elif model_type.lower() == "audio":
                     from ..model.audio import register_builtin_model
+
                     register_builtin_model()
-                    logger.info(f"[DEBUG SUPERVISOR] Audio models reloaded successfully")
+                    logger.info(
+                        f"[DEBUG SUPERVISOR] Audio models reloaded successfully"
+                    )
                 elif model_type.lower() == "image":
                     from ..model.image import register_builtin_model
+
                     register_builtin_model()
-                    logger.info(f"[DEBUG SUPERVISOR] Image models reloaded successfully")
+                    logger.info(
+                        f"[DEBUG SUPERVISOR] Image models reloaded successfully"
+                    )
                 elif model_type.lower() == "rerank":
                     from ..model.rerank import register_builtin_model
+
                     register_builtin_model()
-                    logger.info(f"[DEBUG SUPERVISOR] Rerank models reloaded successfully")
+                    logger.info(
+                        f"[DEBUG SUPERVISOR] Rerank models reloaded successfully"
+                    )
                 elif model_type.lower() == "video":
                     from ..model.video import register_builtin_model
+
                     register_builtin_model()
-                    logger.info(f"[DEBUG SUPERVISOR] Video models reloaded successfully")
+                    logger.info(
+                        f"[DEBUG SUPERVISOR] Video models reloaded successfully"
+                    )
                 else:
-                    logger.warning(f"[DEBUG SUPERVISOR] No dynamic loading available for model type: {model_type}")
+                    logger.warning(
+                        f"[DEBUG SUPERVISOR] No dynamic loading available for model type: {model_type}"
+                    )
             except Exception as reload_error:
-                logger.error(f"[DEBUG SUPERVISOR] Error reloading built-in models: {reload_error}", exc_info=True)
+                logger.error(
+                    f"[DEBUG SUPERVISOR] Error reloading built-in models: {reload_error}",
+                    exc_info=True,
+                )
                 # Don't fail the update if reload fails, just log the error
 
         except requests.exceptions.RequestException as e:
@@ -1732,7 +1793,9 @@ async def _store_model_configurations(self, model_type: str, model_data):
                 raise ValueError(f"Unsupported model type: {model_type}")
 
             model_spec_cls, _, _, _ = self._custom_register_type_to_cls[lookup_key]
-            logger.info(f"[DEBUG SUPERVISOR] Using model spec class: {model_spec_cls.__name__} with key: {lookup_key}")
+            logger.info(
+                f"[DEBUG SUPERVISOR] Using model spec class: {model_spec_cls.__name__} with key: {lookup_key}"
+            )
 
             # Handle different response formats
             if isinstance(model_data, dict):
diff --git a/xinference/model/audio/__init__.py b/xinference/model/audio/__init__.py
index fa4bb47c36..89a8cb0a4e 100644
--- a/xinference/model/audio/__init__.py
+++ b/xinference/model/audio/__init__.py
@@ -77,20 +77,43 @@ def register_builtin_model():
                         model_data = json.load(fd)
 
                         # Apply conversion logic to handle null model_id and other issues
-                        if model_data.get("model_id") is None and "model_src" in model_data:
+                        if (
+                            model_data.get("model_id") is None
+                            and "model_src" in model_data
+                        ):
                             model_src = model_data["model_src"]
                             # Extract model_id from available sources
-                            if "huggingface" in model_src and "model_id" in model_src["huggingface"]:
-                                model_data["model_id"] = model_src["huggingface"]["model_id"]
-                            elif "modelscope" in model_src and "model_id" in model_src["modelscope"]:
-                                model_data["model_id"] = model_src["modelscope"]["model_id"]
+                            if (
+                                "huggingface" in model_src
+                                and "model_id" in model_src["huggingface"]
+                            ):
+                                model_data["model_id"] = model_src["huggingface"][
+                                    "model_id"
+                                ]
+                            elif (
+                                "modelscope" in model_src
+                                and "model_id" in model_src["modelscope"]
+                            ):
+                                model_data["model_id"] = model_src["modelscope"][
+                                    "model_id"
+                                ]
 
                             # Extract model_revision if available
                             if model_data.get("model_revision") is None:
-                                if "huggingface" in model_src and "model_revision" in model_src["huggingface"]:
-                                    model_data["model_revision"] = model_src["huggingface"]["model_revision"]
-                                elif "modelscope" in model_src and "model_revision" in model_src["modelscope"]:
-                                    model_data["model_revision"] = model_src["modelscope"]["model_revision"]
+                                if (
+                                    "huggingface" in model_src
+                                    and "model_revision" in model_src["huggingface"]
+                                ):
+                                    model_data["model_revision"] = model_src[
+                                        "huggingface"
+                                    ]["model_revision"]
+                                elif (
+                                    "modelscope" in model_src
+                                    and "model_revision" in model_src["modelscope"]
+                                ):
+                                    model_data["model_revision"] = model_src[
+                                        "modelscope"
+                                    ]["model_revision"]
 
                         builtin_audio_family = AudioModelFamilyV2.parse_obj(model_data)
 
diff --git a/xinference/model/audio/builtin.py b/xinference/model/audio/builtin.py
index 6017e957db..c68babadba 100644
--- a/xinference/model/audio/builtin.py
+++ b/xinference/model/audio/builtin.py
@@ -56,7 +56,9 @@ def get_builtin_models(self) -> List:
                     logger.info(f"Loaded built-in audio model: {model.model_name}")
 
                 except Exception as e:
-                    logger.warning(f"Failed to load built-in model from {filename}: {e}")
+                    logger.warning(
+                        f"Failed to load built-in model from {filename}: {e}"
+                    )
 
         return models
 
@@ -107,4 +109,4 @@ def register_builtin_audio(audio_family) -> None:
 
 def unregister_builtin_audio(model_name: str) -> None:
     """Unregister a built-in audio model family."""
-    return get_builtin_audio_registry().unregister_builtin_model(model_name)
\ No newline at end of file
+    return get_builtin_audio_registry().unregister_builtin_model(model_name)
diff --git a/xinference/model/cache_manager.py b/xinference/model/cache_manager.py
index b97981a17a..e4b74e2177 100644
--- a/xinference/model/cache_manager.py
+++ b/xinference/model/cache_manager.py
@@ -16,7 +16,9 @@ def __init__(self, model_family: "CacheableModelSpec"):
         self._model_family = model_family
         self._v2_cache_dir_prefix = os.path.join(XINFERENCE_CACHE_DIR, "v2")
         self._v2_custom_dir_prefix = os.path.join(XINFERENCE_MODEL_DIR, "v2")
-        self._v2_builtin_dir_prefix = os.path.join(XINFERENCE_MODEL_DIR, "v2", "builtin")
+        self._v2_builtin_dir_prefix = os.path.join(
+            XINFERENCE_MODEL_DIR, "v2", "builtin"
+        )
         os.makedirs(self._v2_cache_dir_prefix, exist_ok=True)
         os.makedirs(self._v2_custom_dir_prefix, exist_ok=True)
         os.makedirs(self._v2_builtin_dir_prefix, exist_ok=True)
diff --git a/xinference/model/embedding/__init__.py b/xinference/model/embedding/__init__.py
index 017cf6114a..cebf1eee03 100644
--- a/xinference/model/embedding/__init__.py
+++ b/xinference/model/embedding/__init__.py
@@ -71,7 +71,9 @@ def register_builtin_model():
     registry = RegistryManager.get_registry("embedding")
     existing_model_names = {spec.model_name for spec in registry.get_custom_models()}
 
-    builtin_embedding_dir = os.path.join(XINFERENCE_MODEL_DIR, "v2", "builtin", "embedding")
+    builtin_embedding_dir = os.path.join(
+        XINFERENCE_MODEL_DIR, "v2", "builtin", "embedding"
+    )
     if os.path.isdir(builtin_embedding_dir):
         for f in os.listdir(builtin_embedding_dir):
             if f.endswith(".json"):
@@ -79,12 +81,19 @@ def register_builtin_model():
                     with codecs.open(
                         os.path.join(builtin_embedding_dir, f), encoding="utf-8"
                     ) as fd:
-                        builtin_embedding_family = EmbeddingModelFamilyV2.parse_obj(json.load(fd))
+                        builtin_embedding_family = EmbeddingModelFamilyV2.parse_obj(
+                            json.load(fd)
+                        )
 
                         # Only register if model doesn't already exist
-                        if builtin_embedding_family.model_name not in existing_model_names:
+                        if (
+                            builtin_embedding_family.model_name
+                            not in existing_model_names
+                        ):
                             register_embedding(builtin_embedding_family, persist=False)
-                            existing_model_names.add(builtin_embedding_family.model_name)
+                            existing_model_names.add(
+                                builtin_embedding_family.model_name
+                            )
                 except Exception as e:
                     warnings.warn(f"{builtin_embedding_dir}/{f} has error, {e}")
 
diff --git a/xinference/model/embedding/builtin.py b/xinference/model/embedding/builtin.py
index 15a4cf7308..c57176e3de 100644
--- a/xinference/model/embedding/builtin.py
+++ b/xinference/model/embedding/builtin.py
@@ -31,7 +31,9 @@ class BuiltinEmbeddingModelRegistry:
     def __init__(self):
         from ...constants import XINFERENCE_MODEL_DIR
 
-        self.builtin_dir = os.path.join(XINFERENCE_MODEL_DIR, "v2", "builtin", "embedding")
+        self.builtin_dir = os.path.join(
+            XINFERENCE_MODEL_DIR, "v2", "builtin", "embedding"
+        )
         os.makedirs(self.builtin_dir, exist_ok=True)
 
     def get_builtin_models(self) -> List:
@@ -56,7 +58,9 @@ def get_builtin_models(self) -> List:
                     logger.info(f"Loaded built-in embedding model: {model.model_name}")
 
                 except Exception as e:
-                    logger.warning(f"Failed to load built-in model from {filename}: {e}")
+                    logger.warning(
+                        f"Failed to load built-in model from {filename}: {e}"
+                    )
 
         return models
 
@@ -107,4 +111,4 @@ def register_builtin_embedding(embedding_family) -> None:
 
 def unregister_builtin_embedding(model_name: str) -> None:
     """Unregister a built-in embedding model family."""
-    return get_builtin_embedding_registry().unregister_builtin_model(model_name)
\ No newline at end of file
+    return get_builtin_embedding_registry().unregister_builtin_model(model_name)
diff --git a/xinference/model/image/__init__.py b/xinference/model/image/__init__.py
index afc016ba91..a08b72c1dd 100644
--- a/xinference/model/image/__init__.py
+++ b/xinference/model/image/__init__.py
@@ -70,7 +70,9 @@ def register_builtin_model():
                     with codecs.open(
                         os.path.join(builtin_image_dir, f), encoding="utf-8"
                     ) as fd:
-                        builtin_image_family = ImageModelFamilyV2.parse_obj(json.load(fd))
+                        builtin_image_family = ImageModelFamilyV2.parse_obj(
+                            json.load(fd)
+                        )
 
                         # Only register if model doesn't already exist
                         if builtin_image_family.model_name not in existing_model_names:
diff --git a/xinference/model/image/builtin.py b/xinference/model/image/builtin.py
index c687e30cec..8b01e56f2d 100644
--- a/xinference/model/image/builtin.py
+++ b/xinference/model/image/builtin.py
@@ -56,7 +56,9 @@ def get_builtin_models(self) -> List:
                     logger.info(f"Loaded built-in image model: {model.model_name}")
 
                 except Exception as e:
-                    logger.warning(f"Failed to load built-in model from {filename}: {e}")
+                    logger.warning(
+                        f"Failed to load built-in model from {filename}: {e}"
+                    )
 
         return models
 
@@ -107,4 +109,4 @@ def register_builtin_image(image_family) -> None:
 
 def unregister_builtin_image(model_name: str) -> None:
     """Unregister a built-in image model family."""
-    return get_builtin_image_registry().unregister_builtin_model(model_name)
\ No newline at end of file
+    return get_builtin_image_registry().unregister_builtin_model(model_name)
diff --git a/xinference/model/llm/builtin.py b/xinference/model/llm/builtin.py
index 0762746532..563f6432cc 100644
--- a/xinference/model/llm/builtin.py
+++ b/xinference/model/llm/builtin.py
@@ -59,7 +59,9 @@ def get_builtin_models(self) -> List["LLMFamilyV2"]:
                     logger.info(f"Loaded built-in LLM model: {model.model_name}")
 
                 except Exception as e:
-                    logger.warning(f"Failed to load built-in model from {filename}: {e}")
+                    logger.warning(
+                        f"Failed to load built-in model from {filename}: {e}"
+                    )
 
         return models
 
@@ -110,4 +112,4 @@ def register_builtin_llm(llm_family: "LLMFamilyV2") -> None:
 
 def unregister_builtin_llm(model_name: str) -> None:
     """Unregister a built-in LLM model family."""
-    return get_builtin_llm_registry().unregister_builtin_model(model_name)
\ No newline at end of file
+    return get_builtin_llm_registry().unregister_builtin_model(model_name)
diff --git a/xinference/model/rerank/__init__.py b/xinference/model/rerank/__init__.py
index 622febea0b..5ed2b2fd14 100644
--- a/xinference/model/rerank/__init__.py
+++ b/xinference/model/rerank/__init__.py
@@ -77,7 +77,9 @@ def register_builtin_model():
                     with codecs.open(
                         os.path.join(builtin_rerank_dir, f), encoding="utf-8"
                     ) as fd:
-                        builtin_rerank_family = RerankModelFamilyV2.parse_obj(json.load(fd))
+                        builtin_rerank_family = RerankModelFamilyV2.parse_obj(
+                            json.load(fd)
+                        )
 
                         # Only register if model doesn't already exist
                         if builtin_rerank_family.model_name not in existing_model_names:
diff --git a/xinference/model/rerank/builtin.py b/xinference/model/rerank/builtin.py
index f5f13d7d56..126f2e9991 100644
--- a/xinference/model/rerank/builtin.py
+++ b/xinference/model/rerank/builtin.py
@@ -56,7 +56,9 @@ def get_builtin_models(self) -> List:
                     logger.info(f"Loaded built-in rerank model: {model.model_name}")
 
                 except Exception as e:
-                    logger.warning(f"Failed to load built-in model from {filename}: {e}")
+                    logger.warning(
+                        f"Failed to load built-in model from {filename}: {e}"
+                    )
 
         return models
 
@@ -107,4 +109,4 @@ def register_builtin_rerank(rerank_family) -> None:
 
 def unregister_builtin_rerank(model_name: str) -> None:
     """Unregister a built-in rerank model family."""
-    return get_builtin_rerank_registry().unregister_builtin_model(model_name)
\ No newline at end of file
+    return get_builtin_rerank_registry().unregister_builtin_model(model_name)
diff --git a/xinference/model/video/__init__.py b/xinference/model/video/__init__.py
index aa732d0a09..5d6d95425b 100644
--- a/xinference/model/video/__init__.py
+++ b/xinference/model/video/__init__.py
@@ -67,6 +67,7 @@ def register_builtin_model():
 
     # Use the builtin registry to load models
     from .builtin import BuiltinVideoModelRegistry
+
     builtin_registry = BuiltinVideoModelRegistry()
     builtin_models = builtin_registry.get_builtin_models()
 
diff --git a/xinference/model/video/builtin.py b/xinference/model/video/builtin.py
index 0ba04f4ba3..83faa95a9e 100644
--- a/xinference/model/video/builtin.py
+++ b/xinference/model/video/builtin.py
@@ -54,17 +54,35 @@ def get_builtin_models(self) -> List:
                     if model_data.get("model_id") is None and "model_src" in model_data:
                         model_src = model_data["model_src"]
                         # Extract model_id from available sources
-                        if "huggingface" in model_src and "model_id" in model_src["huggingface"]:
-                            model_data["model_id"] = model_src["huggingface"]["model_id"]
-                        elif "modelscope" in model_src and "model_id" in model_src["modelscope"]:
+                        if (
+                            "huggingface" in model_src
+                            and "model_id" in model_src["huggingface"]
+                        ):
+                            model_data["model_id"] = model_src["huggingface"][
+                                "model_id"
+                            ]
+                        elif (
+                            "modelscope" in model_src
+                            and "model_id" in model_src["modelscope"]
+                        ):
                             model_data["model_id"] = model_src["modelscope"]["model_id"]
 
                         # Extract model_revision if available
                         if model_data.get("model_revision") is None:
-                            if "huggingface" in model_src and "model_revision" in model_src["huggingface"]:
-                                model_data["model_revision"] = model_src["huggingface"]["model_revision"]
-                            elif "modelscope" in model_src and "model_revision" in model_src["modelscope"]:
-                                model_data["model_revision"] = model_src["modelscope"]["model_revision"]
+                            if (
+                                "huggingface" in model_src
+                                and "model_revision" in model_src["huggingface"]
+                            ):
+                                model_data["model_revision"] = model_src["huggingface"][
+                                    "model_revision"
+                                ]
+                            elif (
+                                "modelscope" in model_src
+                                and "model_revision" in model_src["modelscope"]
+                            ):
+                                model_data["model_revision"] = model_src["modelscope"][
+                                    "model_revision"
+                                ]
 
                     # Parse using CustomVideoModelFamilyV2
                     model = CustomVideoModelFamilyV2.parse_obj(model_data)
@@ -72,7 +90,9 @@ def get_builtin_models(self) -> List:
                     logger.info(f"Loaded built-in video model: {model.model_name}")
 
                 except Exception as e:
-                    logger.warning(f"Failed to load built-in model from {filename}: {e}")
+                    logger.warning(
+                        f"Failed to load built-in model from {filename}: {e}"
+                    )
 
         return models
 
@@ -100,4 +120,6 @@ def unregister_builtin_model(self, model_name: str) -> None:
                 logger.error(f"Failed to unregister built-in model {model_name}: {e}")
                 raise
         else:
-            logger.warning(f"Built-in video model {model_name} not found for unregistration")
+            logger.warning(
+                f"Built-in video model {model_name} not found for unregistration"
+            )
diff --git a/xinference/model/video/custom.py b/xinference/model/video/custom.py
index 3fb3c7a10d..7debe29c75 100644
--- a/xinference/model/video/custom.py
+++ b/xinference/model/video/custom.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
-from typing import List, Optional, TYPE_CHECKING
+from typing import TYPE_CHECKING, List, Optional
 
 from ..._compat import (
     Literal,
@@ -51,11 +51,13 @@ def get_user_defined_models(self) -> List["CustomVideoModelFamilyV2"]:
 
 def register_video(model_spec: CustomVideoModelFamilyV2, persist: bool = True):
     from ..custom import RegistryManager
+
     registry = RegistryManager.get_registry("video")
     registry.register(model_spec, persist)
 
     if persist:
         from ..custom import persist_user_model
+
         persist_user_model("video", model_spec)
 
 
@@ -64,10 +66,12 @@ def unregister_video(model_name: str, raise_error: bool = True):
 
     # Remove persisted file if exists
     from ..custom import unpersist_user_model
+
     unpersist_user_model("video", model_name)
 
 
 def get_user_defined_videos() -> List[CustomVideoModelFamilyV2]:
     from ..custom import RegistryManager
+
     registry = RegistryManager.get_registry("video")
     return registry.get_custom_models()

From 5c402859fea0129ebc94779ab6be32dfc833a280 Mon Sep 17 00:00:00 2001
From: OliverBryant <2713999266@qq.com>
Date: Mon, 27 Oct 2025 17:01:38 +0800
Subject: [PATCH 19/25] mypy

---
 xinference/model/audio/builtin.py     |  9 ++++++---
 xinference/model/embedding/builtin.py |  9 ++++++---
 xinference/model/image/builtin.py     |  9 ++++++---
 xinference/model/llm/builtin.py       |  2 +-
 xinference/model/rerank/builtin.py    |  9 ++++++---
 xinference/model/video/builtin.py     |  9 ++++++---
 xinference/model/video/custom.py      | 13 +++----------
 7 files changed, 34 insertions(+), 26 deletions(-)

diff --git a/xinference/model/audio/builtin.py b/xinference/model/audio/builtin.py
index c68babadba..e78cc756d6 100644
--- a/xinference/model/audio/builtin.py
+++ b/xinference/model/audio/builtin.py
@@ -15,10 +15,13 @@
 import json
 import logging
 import os
-from typing import List
+from typing import TYPE_CHECKING, List
 
 logger = logging.getLogger(__name__)
 
+if TYPE_CHECKING:
+    from .custom import AudioModelFamilyV2
+
 
 class BuiltinAudioModelRegistry:
     """
@@ -34,11 +37,11 @@ def __init__(self):
         self.builtin_dir = os.path.join(XINFERENCE_MODEL_DIR, "v2", "builtin", "audio")
         os.makedirs(self.builtin_dir, exist_ok=True)
 
-    def get_builtin_models(self) -> List:
+    def get_builtin_models(self) -> List["AudioModelFamilyV2"]:
         """Load all built-in audio models from the builtin directory."""
         from .custom import AudioModelFamilyV2
 
-        models = []
+        models: List["AudioModelFamilyV2"] = []
 
         if not os.path.exists(self.builtin_dir):
             return models
diff --git a/xinference/model/embedding/builtin.py b/xinference/model/embedding/builtin.py
index c57176e3de..d100931136 100644
--- a/xinference/model/embedding/builtin.py
+++ b/xinference/model/embedding/builtin.py
@@ -15,10 +15,13 @@
 import json
 import logging
 import os
-from typing import List
+from typing import TYPE_CHECKING, List
 
 logger = logging.getLogger(__name__)
 
+if TYPE_CHECKING:
+    from .custom import EmbeddingModelFamilyV2
+
 
 class BuiltinEmbeddingModelRegistry:
     """
@@ -36,11 +39,11 @@ def __init__(self):
         )
         os.makedirs(self.builtin_dir, exist_ok=True)
 
-    def get_builtin_models(self) -> List:
+    def get_builtin_models(self) -> List["EmbeddingModelFamilyV2"]:
         """Load all built-in embedding models from the builtin directory."""
         from .custom import EmbeddingModelFamilyV2
 
-        models = []
+        models: List["EmbeddingModelFamilyV2"] = []
 
         if not os.path.exists(self.builtin_dir):
             return models
diff --git a/xinference/model/image/builtin.py b/xinference/model/image/builtin.py
index 8b01e56f2d..230b3b8d7c 100644
--- a/xinference/model/image/builtin.py
+++ b/xinference/model/image/builtin.py
@@ -15,10 +15,13 @@
 import json
 import logging
 import os
-from typing import List
+from typing import TYPE_CHECKING, List
 
 logger = logging.getLogger(__name__)
 
+if TYPE_CHECKING:
+    from .custom import ImageModelFamilyV2
+
 
 class BuiltinImageModelRegistry:
     """
@@ -34,11 +37,11 @@ def __init__(self):
         self.builtin_dir = os.path.join(XINFERENCE_MODEL_DIR, "v2", "builtin", "image")
         os.makedirs(self.builtin_dir, exist_ok=True)
 
-    def get_builtin_models(self) -> List:
+    def get_builtin_models(self) -> List["ImageModelFamilyV2"]:
         """Load all built-in image models from the builtin directory."""
         from .custom import ImageModelFamilyV2
 
-        models = []
+        models: List["ImageModelFamilyV2"] = []
 
         if not os.path.exists(self.builtin_dir):
             return models
diff --git a/xinference/model/llm/builtin.py b/xinference/model/llm/builtin.py
index 563f6432cc..d82378db1e 100644
--- a/xinference/model/llm/builtin.py
+++ b/xinference/model/llm/builtin.py
@@ -41,7 +41,7 @@ def get_builtin_models(self) -> List["LLMFamilyV2"]:
         """Load all built-in LLM models from the builtin directory."""
         from ..llm.llm_family import LLMFamilyV2
 
-        models = []
+        models: List["LLMFamilyV2"] = []
 
         if not os.path.exists(self.builtin_dir):
             return models
diff --git a/xinference/model/rerank/builtin.py b/xinference/model/rerank/builtin.py
index 126f2e9991..3fe0cd927b 100644
--- a/xinference/model/rerank/builtin.py
+++ b/xinference/model/rerank/builtin.py
@@ -15,10 +15,13 @@
 import json
 import logging
 import os
-from typing import List
+from typing import TYPE_CHECKING, List
 
 logger = logging.getLogger(__name__)
 
+if TYPE_CHECKING:
+    from .custom import RerankModelFamilyV2
+
 
 class BuiltinRerankModelRegistry:
     """
@@ -34,11 +37,11 @@ def __init__(self):
         self.builtin_dir = os.path.join(XINFERENCE_MODEL_DIR, "v2", "builtin", "rerank")
         os.makedirs(self.builtin_dir, exist_ok=True)
 
-    def get_builtin_models(self) -> List:
+    def get_builtin_models(self) -> List["RerankModelFamilyV2"]:
         """Load all built-in rerank models from the builtin directory."""
         from .custom import RerankModelFamilyV2
 
-        models = []
+        models: List["RerankModelFamilyV2"] = []
 
         if not os.path.exists(self.builtin_dir):
             return models
diff --git a/xinference/model/video/builtin.py b/xinference/model/video/builtin.py
index 83faa95a9e..6affe65ab2 100644
--- a/xinference/model/video/builtin.py
+++ b/xinference/model/video/builtin.py
@@ -15,10 +15,13 @@
 import json
 import logging
 import os
-from typing import List
+from typing import TYPE_CHECKING, List
 
 logger = logging.getLogger(__name__)
 
+if TYPE_CHECKING:
+    from .custom import CustomVideoModelFamilyV2
+
 
 class BuiltinVideoModelRegistry:
     """
@@ -34,11 +37,11 @@ def __init__(self):
         self.builtin_dir = os.path.join(XINFERENCE_MODEL_DIR, "v2", "builtin", "video")
         os.makedirs(self.builtin_dir, exist_ok=True)
 
-    def get_builtin_models(self) -> List:
+    def get_builtin_models(self) -> List["CustomVideoModelFamilyV2"]:
         """Load all built-in video models from the builtin directory."""
         from .custom import CustomVideoModelFamilyV2
 
-        models = []
+        models: List["CustomVideoModelFamilyV2"] = []
 
         if not os.path.exists(self.builtin_dir):
             return models
diff --git a/xinference/model/video/custom.py b/xinference/model/video/custom.py
index 7debe29c75..11fe07a2d5 100644
--- a/xinference/model/video/custom.py
+++ b/xinference/model/video/custom.py
@@ -55,19 +55,12 @@ def register_video(model_spec: CustomVideoModelFamilyV2, persist: bool = True):
     registry = RegistryManager.get_registry("video")
     registry.register(model_spec, persist)
 
-    if persist:
-        from ..custom import persist_user_model
-
-        persist_user_model("video", model_spec)
-
 
 def unregister_video(model_name: str, raise_error: bool = True):
-    video_registry.unregister(model_name, raise_error=raise_error)
-
-    # Remove persisted file if exists
-    from ..custom import unpersist_user_model
+    from ..custom import RegistryManager
 
-    unpersist_user_model("video", model_name)
+    registry = RegistryManager.get_registry("video")
+    registry.unregister(model_name, raise_error)
 
 
 def get_user_defined_videos() -> List[CustomVideoModelFamilyV2]:

From 9b56f30fa67cabdbe13d5f1118c692b24ecd772a Mon Sep 17 00:00:00 2001
From: OliverBryant <2713999266@qq.com>
Date: Tue, 28 Oct 2025 19:32:25 +0800
Subject: [PATCH 20/25] single json

---
 xinference/core/supervisor.py          | 112 +++-----------
 xinference/model/audio/__init__.py     | 198 +++++++++++++++++++------
 xinference/model/embedding/__init__.py | 132 +++++++++++++++--
 xinference/model/image/__init__.py     | 168 +++++++++++++++++++--
 xinference/model/image/builtin.py      |  35 ++++-
 xinference/model/llm/__init__.py       | 135 +++++++++++++++--
 xinference/model/llm/builtin.py        | 104 +++++++++++--
 xinference/model/rerank/__init__.py    | 124 ++++++++++++++--
 xinference/model/video/__init__.py     |  49 +++++-
 xinference/model/video/builtin.py      | 133 +++++++++++------
 10 files changed, 955 insertions(+), 235 deletions(-)

diff --git a/xinference/core/supervisor.py b/xinference/core/supervisor.py
index 03bd14173e..036e5b9ae8 100644
--- a/xinference/core/supervisor.py
+++ b/xinference/core/supervisor.py
@@ -1769,123 +1769,49 @@ async def update_model_type(self, model_type: str):
 
     async def _store_model_configurations(self, model_type: str, model_data):
         """
-        Store model configurations using the appropriate CacheManager as built-in models.
-
+        Store model configurations as a complete JSON file without splitting.
         Args:
             model_type: Type of model (as provided by user, e.g., "llm")
             model_data: JSON data containing model configurations
         """
-
         logger.info(
             f"[DEBUG SUPERVISOR] Storing configurations for model type: {model_type}"
         )
-
         try:
-            # Create a temporary model spec to get CacheManager instance
-            # We need to determine the appropriate model spec class for this model type
-            lookup_key = None
-            for key in self._custom_register_type_to_cls.keys():
-                if key.lower() == model_type.lower():
-                    lookup_key = key
-                    break
-
-            if lookup_key is None:
-                raise ValueError(f"Unsupported model type: {model_type}")
-
-            model_spec_cls, _, _, _ = self._custom_register_type_to_cls[lookup_key]
-            logger.info(
-                f"[DEBUG SUPERVISOR] Using model spec class: {model_spec_cls.__name__} with key: {lookup_key}"
-            )
+            import json
+            import os
 
-            # Handle different response formats
-            if isinstance(model_data, dict):
-                # Single model configuration
-                logger.info(f"[DEBUG SUPERVISOR] Processing single model configuration")
-                await self._store_single_model_config(
-                    model_type, model_data, model_spec_cls
-                )
-            elif isinstance(model_data, list):
-                # Multiple model configurations
-                logger.info(
-                    f"[DEBUG SUPERVISOR] Processing {len(model_data)} model configurations"
-                )
-                for i, model_config in enumerate(model_data):
-                    if isinstance(model_config, dict):
-                        logger.info(f"[DEBUG SUPERVISOR] Processing model config {i+1}")
-                        await self._store_single_model_config(
-                            model_type, model_config, model_spec_cls
-                        )
-                    else:
-                        logger.warning(
-                            f"[DEBUG SUPERVISOR] Skipping invalid model config {i+1}: not a dict"
-                        )
-            else:
-                raise ValueError(
-                    f"Invalid model data format: expected dict or list, got {type(model_data)}"
-                )
+            from ..constants import XINFERENCE_MODEL_DIR
 
-        except Exception as e:
-            logger.error(
-                f"[DEBUG SUPERVISOR] Error storing model configurations: {e}",
-                exc_info=True,
+            # Create the built-in directory structure
+            builtin_dir = os.path.join(
+                XINFERENCE_MODEL_DIR, "v2", "builtin", model_type.lower()
             )
-            raise
+            os.makedirs(builtin_dir, exist_ok=True)
 
-    async def _store_single_model_config(
-        self, model_type: str, model_config: dict, model_spec_cls
-    ):
-        """
-        Store a single model configuration as built-in model.
-
-        Args:
-            model_type: Type of model
-            model_config: Single model configuration dictionary
-            model_spec_cls: Model specification class
-        """
-        from ..model.cache_manager import CacheManager
-
-        # Ensure required fields are present
-        if "model_name" not in model_config:
-            logger.warning(
-                f"[DEBUG SUPERVISOR] Skipping model config without model_name: {model_config}"
+            # Store the complete JSON as a single file
+            json_file_path = os.path.join(
+                builtin_dir, f"{model_type.lower()}_models.json"
             )
-            return
-
-        model_name = model_config["model_name"]
-        logger.info(f"[DEBUG SUPERVISOR] Storing model: {model_name}")
-
-        # Validate model name format
-        from ..model.utils import is_valid_model_name
 
-        if not is_valid_model_name(model_name):
-            logger.warning(
-                f"[DEBUG SUPERVISOR] Skipping model with invalid name: {model_name}"
+            logger.info(
+                f"[DEBUG SUPERVISOR] Storing complete JSON to: {json_file_path}"
             )
-            return
 
-        try:
-            # Convert model hub JSON format to Xinference expected format
-            converted_config = self._convert_model_json_format(model_config)
-            logger.info(f"[DEBUG SUPERVISOR] Converted model config for: {model_name}")
-
-            # Create model spec instance
-            model_spec = model_spec_cls.parse_obj(converted_config)
-            logger.info(f"[DEBUG SUPERVISOR] Created model spec for: {model_name}")
+            # Save the complete JSON data
+            with open(json_file_path, "w", encoding="utf-8") as f:
+                json.dump(model_data, f, indent=2, ensure_ascii=False)
 
-            # Create CacheManager and store the configuration as built-in model
-            cache_manager = CacheManager(model_spec)
-            cache_manager.register_builtin_model(model_type)
             logger.info(
-                f"[DEBUG SUPERVISOR] Stored built-in model configuration for: {model_name}"
+                f"[DEBUG SUPERVISOR] Successfully stored complete JSON for {model_type} containing {len(model_data) if isinstance(model_data, list) else 1} model configurations"
             )
 
         except Exception as e:
             logger.error(
-                f"[DEBUG SUPERVISOR] Error storing model {model_name}: {e}",
+                f"[DEBUG SUPERVISOR] Error storing model configurations: {e}",
                 exc_info=True,
             )
-            # Continue with other models instead of failing completely
-            return
+            raise
 
     @log_async(logger=logger)
     async def unregister_model(self, model_type: str, model_name: str):
diff --git a/xinference/model/audio/__init__.py b/xinference/model/audio/__init__.py
index 89a8cb0a4e..cb08c89152 100644
--- a/xinference/model/audio/__init__.py
+++ b/xinference/model/audio/__init__.py
@@ -14,14 +14,63 @@
 
 import codecs
 import json
+import logging
 import os
 import platform
 import sys
 import warnings
-from typing import Dict, List
+from typing import Any, Dict, List
 
 from ...constants import XINFERENCE_MODEL_DIR
 from ..utils import flatten_model_src
+
+logger = logging.getLogger(__name__)
+
+
+def convert_audio_model_format(model_json: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Convert audio model hub JSON format to Xinference expected format.
+    """
+    logger.debug(
+        f"convert_audio_model_format called for: {model_json.get('model_name', 'Unknown')}"
+    )
+
+    # Apply conversion logic to handle null model_id and other issues
+    if model_json.get("model_id") is None and "model_src" in model_json:
+        model_src = model_json["model_src"]
+        # Extract model_id from available sources
+        if "huggingface" in model_src and "model_id" in model_src["huggingface"]:
+            model_json["model_id"] = model_src["huggingface"]["model_id"]
+        elif "modelscope" in model_src and "model_id" in model_src["modelscope"]:
+            model_json["model_id"] = model_src["modelscope"]["model_id"]
+
+        # Extract model_revision if available
+        if model_json.get("model_revision") is None:
+            if (
+                "huggingface" in model_src
+                and "model_revision" in model_src["huggingface"]
+            ):
+                model_json["model_revision"] = model_src["huggingface"][
+                    "model_revision"
+                ]
+            elif (
+                "modelscope" in model_src
+                and "model_revision" in model_src["modelscope"]
+            ):
+                model_json["model_revision"] = model_src["modelscope"]["model_revision"]
+
+    # Ensure required fields for audio models
+    if "version" not in model_json:
+        model_json["version"] = 2
+    if "model_lang" not in model_json:
+        model_json["model_lang"] = [
+            "en",
+            "zh",
+        ]  # Audio models often support multiple languages
+
+    return model_json
+
+
 from .core import (
     AUDIO_MODEL_DESCRIPTIONS,
     AudioModelFamilyV2,
@@ -68,61 +117,126 @@ def register_builtin_model():
 
     builtin_audio_dir = os.path.join(XINFERENCE_MODEL_DIR, "v2", "builtin", "audio")
     if os.path.isdir(builtin_audio_dir):
-        for f in os.listdir(builtin_audio_dir):
-            if f.endswith(".json"):
-                try:
-                    with codecs.open(
-                        os.path.join(builtin_audio_dir, f), encoding="utf-8"
-                    ) as fd:
-                        model_data = json.load(fd)
+        # First, try to load from the complete JSON file
+        complete_json_path = os.path.join(builtin_audio_dir, "audio_models.json")
+        if os.path.exists(complete_json_path):
+            try:
+                with codecs.open(complete_json_path, encoding="utf-8") as fd:
+                    model_data = json.load(fd)
 
+                # Handle different formats
+                models_to_register = []
+                if isinstance(model_data, list):
+                    # Multiple models in a list
+                    models_to_register = model_data
+                elif isinstance(model_data, dict):
+                    # Single model
+                    if "model_name" in model_data:
+                        models_to_register = [model_data]
+                    else:
+                        # Models dict - extract models
+                        for key, value in model_data.items():
+                            if isinstance(value, dict) and "model_name" in value:
+                                models_to_register.append(value)
+
+                # Register all models from the complete JSON
+                for model_data in models_to_register:
+                    try:
                         # Apply conversion logic to handle null model_id and other issues
+                        converted_data = convert_audio_model_format(model_data)
+                        builtin_audio_family = AudioModelFamilyV2.parse_obj(
+                            converted_data
+                        )
+
+                        # Only register if model doesn't already exist
+                        if builtin_audio_family.model_name not in existing_model_names:
+                            # Add to BUILTIN_AUDIO_MODELS directly for proper builtin registration
+                            if (
+                                builtin_audio_family.model_name
+                                not in BUILTIN_AUDIO_MODELS
+                            ):
+                                BUILTIN_AUDIO_MODELS[
+                                    builtin_audio_family.model_name
+                                ] = []
+                            BUILTIN_AUDIO_MODELS[
+                                builtin_audio_family.model_name
+                            ].append(builtin_audio_family)
+                            existing_model_names.add(builtin_audio_family.model_name)
+                    except Exception as e:
+                        warnings.warn(
+                            f"Error parsing audio model {model_data.get('model_name', 'Unknown')}: {e}"
+                        )
+
+                logger.info(
+                    f"Successfully registered {len(models_to_register)} audio models from complete JSON"
+                )
+
+            except Exception as e:
+                warnings.warn(
+                    f"Error loading complete JSON file {complete_json_path}: {e}"
+                )
+                # Fall back to individual files if complete JSON loading fails
+
+        # Fall back: load individual JSON files (backward compatibility)
+        individual_files = [
+            f
+            for f in os.listdir(builtin_audio_dir)
+            if f.endswith(".json") and f != "audio_models.json"
+        ]
+        for f in individual_files:
+            try:
+                with codecs.open(
+                    os.path.join(builtin_audio_dir, f), encoding="utf-8"
+                ) as fd:
+                    model_data = json.load(fd)
+
+                    # Apply conversion logic to handle null model_id and other issues
+                    if model_data.get("model_id") is None and "model_src" in model_data:
+                        model_src = model_data["model_src"]
+                        # Extract model_id from available sources
                         if (
-                            model_data.get("model_id") is None
-                            and "model_src" in model_data
+                            "huggingface" in model_src
+                            and "model_id" in model_src["huggingface"]
+                        ):
+                            model_data["model_id"] = model_src["huggingface"][
+                                "model_id"
+                            ]
+                        elif (
+                            "modelscope" in model_src
+                            and "model_id" in model_src["modelscope"]
                         ):
-                            model_src = model_data["model_src"]
-                            # Extract model_id from available sources
+                            model_data["model_id"] = model_src["modelscope"]["model_id"]
+
+                        # Extract model_revision if available
+                        if model_data.get("model_revision") is None:
                             if (
                                 "huggingface" in model_src
-                                and "model_id" in model_src["huggingface"]
+                                and "model_revision" in model_src["huggingface"]
                             ):
-                                model_data["model_id"] = model_src["huggingface"][
-                                    "model_id"
+                                model_data["model_revision"] = model_src["huggingface"][
+                                    "model_revision"
                                 ]
                             elif (
                                 "modelscope" in model_src
-                                and "model_id" in model_src["modelscope"]
+                                and "model_revision" in model_src["modelscope"]
                             ):
-                                model_data["model_id"] = model_src["modelscope"][
-                                    "model_id"
+                                model_data["model_revision"] = model_src["modelscope"][
+                                    "model_revision"
                                 ]
 
-                            # Extract model_revision if available
-                            if model_data.get("model_revision") is None:
-                                if (
-                                    "huggingface" in model_src
-                                    and "model_revision" in model_src["huggingface"]
-                                ):
-                                    model_data["model_revision"] = model_src[
-                                        "huggingface"
-                                    ]["model_revision"]
-                                elif (
-                                    "modelscope" in model_src
-                                    and "model_revision" in model_src["modelscope"]
-                                ):
-                                    model_data["model_revision"] = model_src[
-                                        "modelscope"
-                                    ]["model_revision"]
-
-                        builtin_audio_family = AudioModelFamilyV2.parse_obj(model_data)
+                    builtin_audio_family = AudioModelFamilyV2.parse_obj(model_data)
 
-                        # Only register if model doesn't already exist
-                        if builtin_audio_family.model_name not in existing_model_names:
-                            register_audio(builtin_audio_family, persist=False)
-                            existing_model_names.add(builtin_audio_family.model_name)
-                except Exception as e:
-                    warnings.warn(f"{builtin_audio_dir}/{f} has error, {e}")
+                    # Only register if model doesn't already exist
+                    if builtin_audio_family.model_name not in existing_model_names:
+                        # Add to BUILTIN_AUDIO_MODELS directly for proper builtin registration
+                        if builtin_audio_family.model_name not in BUILTIN_AUDIO_MODELS:
+                            BUILTIN_AUDIO_MODELS[builtin_audio_family.model_name] = []
+                        BUILTIN_AUDIO_MODELS[builtin_audio_family.model_name].append(
+                            builtin_audio_family
+                        )
+                        existing_model_names.add(builtin_audio_family.model_name)
+            except Exception as e:
+                warnings.warn(f"{builtin_audio_dir}/{f} has error, {e}")
 
 
 def _need_filter(spec: dict):
diff --git a/xinference/model/embedding/__init__.py b/xinference/model/embedding/__init__.py
index cebf1eee03..d2cb478905 100644
--- a/xinference/model/embedding/__init__.py
+++ b/xinference/model/embedding/__init__.py
@@ -14,11 +14,54 @@
 
 import codecs
 import json
+import logging
 import os
 import warnings
 from typing import Any, Dict, List
 
 from ..utils import flatten_quantizations
+
+logger = logging.getLogger(__name__)
+
+
+def convert_embedding_model_format(model_json: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Convert embedding model hub JSON format to Xinference expected format.
+    """
+    logger.debug(
+        f"convert_embedding_model_format called for: {model_json.get('model_name', 'Unknown')}"
+    )
+
+    # Ensure required fields for embedding models
+    converted = model_json.copy()
+
+    # Add missing required fields based on EmbeddingModelFamilyV2 requirements
+    if "version" not in converted:
+        converted["version"] = 2
+    if "model_lang" not in converted:
+        converted["model_lang"] = ["en"]
+
+    # Handle model_specs
+    if "model_specs" not in converted or not converted["model_specs"]:
+        converted["model_specs"] = [
+            {
+                "model_format": "pytorch",
+                "model_size_in_billions": None,
+                "quantization": "none",
+                "model_hub": "huggingface",
+            }
+        ]
+    else:
+        # Ensure each spec has required fields
+        for spec in converted["model_specs"]:
+            if "quantization" not in spec:
+                spec["quantization"] = "none"
+            if "model_hub" not in spec:
+                spec["model_hub"] = "huggingface"
+
+    return converted
+
+
 from .core import (
     EMBEDDING_MODEL_DESCRIPTIONS,
     EmbeddingModelFamilyV2,
@@ -65,6 +108,8 @@ def register_custom_model():
 
 
 def register_builtin_model():
+    import json
+
     from ...constants import XINFERENCE_MODEL_DIR
     from ..custom import RegistryManager
 
@@ -75,14 +120,39 @@ def register_builtin_model():
         XINFERENCE_MODEL_DIR, "v2", "builtin", "embedding"
     )
     if os.path.isdir(builtin_embedding_dir):
-        for f in os.listdir(builtin_embedding_dir):
-            if f.endswith(".json"):
-                try:
-                    with codecs.open(
-                        os.path.join(builtin_embedding_dir, f), encoding="utf-8"
-                    ) as fd:
+        # First, try to load from the complete JSON file
+        complete_json_path = os.path.join(
+            builtin_embedding_dir, "embedding_models.json"
+        )
+        if os.path.exists(complete_json_path):
+            try:
+                with codecs.open(complete_json_path, encoding="utf-8") as fd:
+                    model_data = json.load(fd)
+
+                # Handle different formats
+                models_to_register = []
+                if isinstance(model_data, list):
+                    # Multiple models in a list
+                    models_to_register = model_data
+                elif isinstance(model_data, dict):
+                    # Single model
+                    if "model_name" in model_data:
+                        models_to_register = [model_data]
+                    else:
+                        # Models dict - extract models
+                        for key, value in model_data.items():
+                            if isinstance(value, dict) and "model_name" in value:
+                                models_to_register.append(value)
+
+                # Register all models from the complete JSON
+                from .embed_family import BUILTIN_EMBEDDING_MODELS
+
+                for model_data in models_to_register:
+                    try:
+                        # Convert format if needed (embedding models might have different requirements)
+                        converted_data = convert_embedding_model_format(model_data)
                         builtin_embedding_family = EmbeddingModelFamilyV2.parse_obj(
-                            json.load(fd)
+                            converted_data
                         )
 
                         # Only register if model doesn't already exist
@@ -90,12 +160,54 @@ def register_builtin_model():
                             builtin_embedding_family.model_name
                             not in existing_model_names
                         ):
-                            register_embedding(builtin_embedding_family, persist=False)
+                            # Add to BUILTIN_EMBEDDING_MODELS directly for proper builtin registration
+                            BUILTIN_EMBEDDING_MODELS[
+                                builtin_embedding_family.model_name
+                            ] = builtin_embedding_family
                             existing_model_names.add(
                                 builtin_embedding_family.model_name
                             )
-                except Exception as e:
-                    warnings.warn(f"{builtin_embedding_dir}/{f} has error, {e}")
+                    except Exception as e:
+                        warnings.warn(
+                            f"Error parsing embedding model {model_data.get('model_name', 'Unknown')}: {e}"
+                        )
+
+                logger.info(
+                    f"Successfully registered {len(models_to_register)} embedding models from complete JSON"
+                )
+
+            except Exception as e:
+                warnings.warn(
+                    f"Error loading complete JSON file {complete_json_path}: {e}"
+                )
+                # Fall back to individual files if complete JSON loading fails
+
+        # Fall back: load individual JSON files (backward compatibility)
+        individual_files = [
+            f
+            for f in os.listdir(builtin_embedding_dir)
+            if f.endswith(".json") and f != "embedding_models.json"
+        ]
+        if individual_files:
+            from .embed_family import BUILTIN_EMBEDDING_MODELS
+        for f in individual_files:
+            try:
+                with codecs.open(
+                    os.path.join(builtin_embedding_dir, f), encoding="utf-8"
+                ) as fd:
+                    builtin_embedding_family = EmbeddingModelFamilyV2.parse_obj(
+                        json.load(fd)
+                    )
+
+                    # Only register if model doesn't already exist
+                    if builtin_embedding_family.model_name not in existing_model_names:
+                        # Add to BUILTIN_EMBEDDING_MODELS directly for proper builtin registration
+                        BUILTIN_EMBEDDING_MODELS[
+                            builtin_embedding_family.model_name
+                        ] = builtin_embedding_family
+                        existing_model_names.add(builtin_embedding_family.model_name)
+            except Exception as e:
+                warnings.warn(f"{builtin_embedding_dir}/{f} has error, {e}")
 
 
 def check_format_with_engine(model_format, engine):
diff --git a/xinference/model/image/__init__.py b/xinference/model/image/__init__.py
index a08b72c1dd..0af0d1574e 100644
--- a/xinference/model/image/__init__.py
+++ b/xinference/model/image/__init__.py
@@ -14,10 +14,77 @@
 
 import codecs
 import json
+import logging
 import os
 import warnings
+from typing import Any, Dict
 
 from ..utils import flatten_model_src
+
+logger = logging.getLogger(__name__)
+
+
+def convert_image_model_format(model_json: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Convert image model hub JSON format to Xinference expected format.
+    """
+    logger.debug(
+        f"convert_image_model_format called for: {model_json.get('model_name', 'Unknown')}"
+    )
+
+    # Ensure required fields for image models
+    converted = model_json.copy()
+
+    # Add missing required fields
+    if "version" not in converted:
+        converted["version"] = 2
+    if "model_lang" not in converted:
+        converted["model_lang"] = ["en"]
+
+    # Handle missing model_id and model_revision
+    if converted.get("model_id") is None and "model_src" in converted:
+        model_src = converted["model_src"]
+        # Extract model_id from available sources
+        if "huggingface" in model_src and "model_id" in model_src["huggingface"]:
+            converted["model_id"] = model_src["huggingface"]["model_id"]
+        elif "modelscope" in model_src and "model_id" in model_src["modelscope"]:
+            converted["model_id"] = model_src["modelscope"]["model_id"]
+
+    if converted.get("model_revision") is None and "model_src" in converted:
+        model_src = converted["model_src"]
+        # Extract model_revision if available
+        if "huggingface" in model_src and "model_revision" in model_src["huggingface"]:
+            converted["model_revision"] = model_src["huggingface"]["model_revision"]
+        elif "modelscope" in model_src and "model_revision" in model_src["modelscope"]:
+            converted["model_revision"] = model_src["modelscope"]["model_revision"]
+
+    # Set defaults if still missing
+    if converted.get("model_id") is None:
+        converted["model_id"] = converted.get("model_name", "unknown")
+    if converted.get("model_revision") is None:
+        converted["model_revision"] = "main"
+
+    # Handle model_specs
+    if "model_specs" not in converted or not converted["model_specs"]:
+        converted["model_specs"] = [
+            {
+                "model_format": "pytorch",
+                "model_size_in_billions": None,
+                "quantization": "none",
+                "model_hub": "huggingface",
+            }
+        ]
+    else:
+        # Ensure each spec has required fields
+        for spec in converted["model_specs"]:
+            if "quantization" not in spec:
+                spec["quantization"] = "none"
+            if "model_hub" not in spec:
+                spec["model_hub"] = "huggingface"
+
+    return converted
+
+
 from .core import (
     BUILTIN_IMAGE_MODELS,
     IMAGE_MODEL_DESCRIPTIONS,
@@ -56,6 +123,8 @@ def register_custom_model():
 
 
 def register_builtin_model():
+    import json
+
     from ...constants import XINFERENCE_MODEL_DIR
     from ..custom import RegistryManager
 
@@ -64,27 +133,106 @@ def register_builtin_model():
 
     builtin_image_dir = os.path.join(XINFERENCE_MODEL_DIR, "v2", "builtin", "image")
     if os.path.isdir(builtin_image_dir):
-        for f in os.listdir(builtin_image_dir):
-            if f.endswith(".json"):
-                try:
-                    with codecs.open(
-                        os.path.join(builtin_image_dir, f), encoding="utf-8"
-                    ) as fd:
+        # First, try to load from the complete JSON file
+        complete_json_path = os.path.join(builtin_image_dir, "image_models.json")
+        if os.path.exists(complete_json_path):
+            try:
+                with codecs.open(complete_json_path, encoding="utf-8") as fd:
+                    model_data = json.load(fd)
+
+                # Handle different formats
+                models_to_register = []
+                if isinstance(model_data, list):
+                    # Multiple models in a list
+                    models_to_register = model_data
+                elif isinstance(model_data, dict):
+                    # Single model
+                    if "model_name" in model_data:
+                        models_to_register = [model_data]
+                    else:
+                        # Models dict - extract models
+                        for key, value in model_data.items():
+                            if isinstance(value, dict) and "model_name" in value:
+                                models_to_register.append(value)
+
+                # Register all models from the complete JSON
+                for model_data in models_to_register:
+                    try:
+                        # Convert format if needed
+                        converted_data = convert_image_model_format(model_data)
                         builtin_image_family = ImageModelFamilyV2.parse_obj(
-                            json.load(fd)
+                            converted_data
                         )
 
                         # Only register if model doesn't already exist
                         if builtin_image_family.model_name not in existing_model_names:
-                            register_image(builtin_image_family, persist=False)
+                            # Add to BUILTIN_IMAGE_MODELS directly for proper builtin registration
+                            if (
+                                builtin_image_family.model_name
+                                not in BUILTIN_IMAGE_MODELS
+                            ):
+                                BUILTIN_IMAGE_MODELS[
+                                    builtin_image_family.model_name
+                                ] = []
+                            BUILTIN_IMAGE_MODELS[
+                                builtin_image_family.model_name
+                            ].append(builtin_image_family)
+                            # Update model descriptions for the new builtin model
+                            IMAGE_MODEL_DESCRIPTIONS.update(
+                                generate_image_description(builtin_image_family)
+                            )
                             existing_model_names.add(builtin_image_family.model_name)
-                except Exception as e:
-                    warnings.warn(f"{builtin_image_dir}/{f} has error, {e}")
+                    except Exception as e:
+                        warnings.warn(
+                            f"Error parsing image model {model_data.get('model_name', 'Unknown')}: {e}"
+                        )
+
+                logger.info(
+                    f"Successfully registered {len(models_to_register)} image models from complete JSON"
+                )
+
+            except Exception as e:
+                warnings.warn(
+                    f"Error loading complete JSON file {complete_json_path}: {e}"
+                )
+                # Fall back to individual files if complete JSON loading fails
+
+        # Fall back: load individual JSON files (backward compatibility)
+        individual_files = [
+            f
+            for f in os.listdir(builtin_image_dir)
+            if f.endswith(".json") and f != "image_models.json"
+        ]
+        for f in individual_files:
+            try:
+                with codecs.open(
+                    os.path.join(builtin_image_dir, f), encoding="utf-8"
+                ) as fd:
+                    builtin_image_family = ImageModelFamilyV2.parse_obj(json.load(fd))
+
+                    # Only register if model doesn't already exist
+                    if builtin_image_family.model_name not in existing_model_names:
+                        # Add to BUILTIN_IMAGE_MODELS directly for proper builtin registration
+                        if builtin_image_family.model_name not in BUILTIN_IMAGE_MODELS:
+                            BUILTIN_IMAGE_MODELS[builtin_image_family.model_name] = []
+                        BUILTIN_IMAGE_MODELS[builtin_image_family.model_name].append(
+                            builtin_image_family
+                        )
+                        # Update model descriptions for the new builtin model
+                        IMAGE_MODEL_DESCRIPTIONS.update(
+                            generate_image_description(builtin_image_family)
+                        )
+                        existing_model_names.add(builtin_image_family.model_name)
+            except Exception as e:
+                warnings.warn(f"{builtin_image_dir}/{f} has error, {e}")
 
 
 def _install():
     load_model_family_from_json("model_spec.json", BUILTIN_IMAGE_MODELS)
 
+    # Load models from complete JSON file (from update_model_type)
+    register_builtin_model()
+
     # register model description
     for model_name, model_specs in BUILTIN_IMAGE_MODELS.items():
         model_spec = [x for x in model_specs if x.model_hub == "huggingface"][0]
diff --git a/xinference/model/image/builtin.py b/xinference/model/image/builtin.py
index 230b3b8d7c..e941771b64 100644
--- a/xinference/model/image/builtin.py
+++ b/xinference/model/image/builtin.py
@@ -39,6 +39,7 @@ def __init__(self):
 
     def get_builtin_models(self) -> List["ImageModelFamilyV2"]:
         """Load all built-in image models from the builtin directory."""
+        from . import convert_image_model_format
         from .custom import ImageModelFamilyV2
 
         models: List["ImageModelFamilyV2"] = []
@@ -46,8 +47,40 @@ def get_builtin_models(self) -> List["ImageModelFamilyV2"]:
         if not os.path.exists(self.builtin_dir):
             return models
 
+        # First, try to load from complete JSON file
+        complete_json_path = os.path.join(self.builtin_dir, "image_models.json")
+        if os.path.exists(complete_json_path):
+            try:
+                with open(complete_json_path, "r", encoding="utf-8") as f:
+                    data = json.load(f)
+
+                for model_data in data:
+                    try:
+                        # Convert format to handle missing required fields
+                        converted = convert_image_model_format(model_data)
+                        model = ImageModelFamilyV2.parse_obj(converted)
+                        models.append(model)
+                        logger.info(
+                            f"Loaded built-in image model from complete JSON: {model.model_name}"
+                        )
+                    except Exception as e:
+                        logger.warning(
+                            f"Failed to parse model {model_data.get('model_name', 'Unknown')} from complete JSON: {e}"
+                        )
+
+                logger.info(
+                    f"Loaded {len(models)} built-in image models from complete JSON"
+                )
+                return models
+
+            except Exception as e:
+                logger.warning(
+                    f"Failed to load complete JSON {complete_json_path}: {e}"
+                )
+
+        # Fallback to loading individual JSON files
         for filename in os.listdir(self.builtin_dir):
-            if filename.endswith(".json"):
+            if filename.endswith(".json") and filename != "image_models.json":
                 file_path = os.path.join(self.builtin_dir, filename)
                 try:
                     with open(file_path, "r", encoding="utf-8") as f:
diff --git a/xinference/model/llm/__init__.py b/xinference/model/llm/__init__.py
index f417d1acfc..60f377bec0 100644
--- a/xinference/model/llm/__init__.py
+++ b/xinference/model/llm/__init__.py
@@ -13,10 +13,72 @@
 # limitations under the License.
 import codecs
 import json
+import logging
 import os
 import warnings
+from typing import Any, Dict
 
 from ..utils import flatten_quantizations
+
+logger = logging.getLogger(__name__)
+
+
+def convert_model_json_format(model_json: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Convert model hub JSON format to Xinference expected format.
+
+    This is a standalone version of the conversion logic from supervisor.py.
+    """
+    logger.debug(
+        f"convert_model_json_format called for: {model_json.get('model_name', 'Unknown')}"
+    )
+
+    # If model_specs is missing, provide a default minimal spec
+    if "model_specs" not in model_json or not model_json["model_specs"]:
+        logger.debug("model_specs missing or empty, creating default spec")
+        return {
+            **model_json,
+            "version": 2,  # Add missing required field
+            "model_lang": ["en"],  # Add missing required field
+            "model_specs": [
+                {
+                    "model_format": "pytorch",
+                    "model_size_in_billions": None,
+                    "quantization": "none",
+                    "model_file_name_template": "model.bin",
+                    "model_hub": "huggingface",
+                }
+            ],
+        }
+
+    converted = model_json.copy()
+    converted_specs = []
+
+    # Ensure required top-level fields
+    if "version" not in converted:
+        converted["version"] = 2
+    if "model_lang" not in converted:
+        converted["model_lang"] = ["en"]
+
+    for spec in model_json["model_specs"]:
+        model_format = spec.get("model_format", "pytorch")
+        model_size = spec.get("model_size_in_billions")
+
+        # Ensure required fields
+        converted_spec = spec.copy()
+        if "quantization" not in converted_spec:
+            converted_spec["quantization"] = "none"
+        if "model_file_name_template" not in converted_spec:
+            converted_spec["model_file_name_template"] = "model.bin"
+        if "model_hub" not in converted_spec:
+            converted_spec["model_hub"] = "huggingface"
+
+        converted_specs.append(converted_spec)
+
+    converted["model_specs"] = converted_specs
+    return converted
+
+
 from .core import (
     LLM,
     LLM_VERSION_INFOS,
@@ -129,6 +191,8 @@ def register_custom_model():
 
 
 def register_builtin_model():
+    import json
+
     from ...constants import XINFERENCE_MODEL_DIR
     from ..custom import RegistryManager
 
@@ -137,20 +201,73 @@ def register_builtin_model():
 
     builtin_llm_dir = os.path.join(XINFERENCE_MODEL_DIR, "v2", "builtin", "llm")
     if os.path.isdir(builtin_llm_dir):
-        for f in os.listdir(builtin_llm_dir):
-            if f.endswith(".json"):
-                try:
-                    with codecs.open(
-                        os.path.join(builtin_llm_dir, f), encoding="utf-8"
-                    ) as fd:
-                        builtin_llm_family = LLMFamilyV2.parse_raw(fd.read())
+        # First, try to load from the complete JSON file
+        complete_json_path = os.path.join(builtin_llm_dir, "llm_models.json")
+        if os.path.exists(complete_json_path):
+            try:
+                with codecs.open(complete_json_path, encoding="utf-8") as fd:
+                    model_data = json.load(fd)
+
+                # Handle different formats
+                models_to_register = []
+                if isinstance(model_data, list):
+                    # Multiple models in a list
+                    models_to_register = model_data
+                elif isinstance(model_data, dict):
+                    # Single model
+                    if "model_name" in model_data:
+                        models_to_register = [model_data]
+                    else:
+                        # Models dict - extract models
+                        for key, value in model_data.items():
+                            if isinstance(value, dict) and "model_name" in value:
+                                models_to_register.append(value)
+
+                # Register all models from the complete JSON
+                for model_data in models_to_register:
+                    try:
+                        # Convert model hub JSON format to Xinference expected format
+                        converted_data = convert_model_json_format(model_data)
+                        builtin_llm_family = LLMFamilyV2.parse_obj(converted_data)
 
                         # Only register if model doesn't already exist
                         if builtin_llm_family.model_name not in existing_model_names:
                             register_llm(builtin_llm_family, persist=False)
                             existing_model_names.add(builtin_llm_family.model_name)
-                except Exception as e:
-                    warnings.warn(f"{builtin_llm_dir}/{f} has error, {e}")
+                    except Exception as e:
+                        warnings.warn(
+                            f"Error parsing model {model_data.get('model_name', 'Unknown')} from complete JSON: {e}"
+                        )
+
+                logger.info(
+                    f"Successfully registered {len(models_to_register)} models from complete JSON"
+                )
+
+            except Exception as e:
+                warnings.warn(
+                    f"Error loading complete JSON file {complete_json_path}: {e}"
+                )
+                # Fall back to individual files if complete JSON loading fails
+
+        # Fall back: load individual JSON files (backward compatibility)
+        individual_files = [
+            f
+            for f in os.listdir(builtin_llm_dir)
+            if f.endswith(".json") and f != "llm_models.json"
+        ]
+        for f in individual_files:
+            try:
+                with codecs.open(
+                    os.path.join(builtin_llm_dir, f), encoding="utf-8"
+                ) as fd:
+                    builtin_llm_family = LLMFamilyV2.parse_raw(fd.read())
+
+                    # Only register if model doesn't already exist
+                    if builtin_llm_family.model_name not in existing_model_names:
+                        register_llm(builtin_llm_family, persist=False)
+                        existing_model_names.add(builtin_llm_family.model_name)
+            except Exception as e:
+                warnings.warn(f"{builtin_llm_dir}/{f} has error, {e}")
 
 
 def load_model_family_from_json(json_filename, target_families):
diff --git a/xinference/model/llm/builtin.py b/xinference/model/llm/builtin.py
index d82378db1e..466831c3b0 100644
--- a/xinference/model/llm/builtin.py
+++ b/xinference/model/llm/builtin.py
@@ -46,22 +46,94 @@ def get_builtin_models(self) -> List["LLMFamilyV2"]:
         if not os.path.exists(self.builtin_dir):
             return models
 
-        for filename in os.listdir(self.builtin_dir):
-            if filename.endswith(".json"):
-                file_path = os.path.join(self.builtin_dir, filename)
-                try:
-                    with open(file_path, "r", encoding="utf-8") as f:
-                        model_data = json.load(f)
-
-                    # Parse using LLMFamilyV2 (no model_family validation required)
-                    model = LLMFamilyV2.parse_obj(model_data)
-                    models.append(model)
-                    logger.info(f"Loaded built-in LLM model: {model.model_name}")
-
-                except Exception as e:
-                    logger.warning(
-                        f"Failed to load built-in model from {filename}: {e}"
-                    )
+        # First, try to load the complete JSON file if it exists
+        complete_json_path = os.path.join(self.builtin_dir, "llm_models.json")
+        if os.path.exists(complete_json_path):
+            try:
+                with open(complete_json_path, "r", encoding="utf-8") as f:
+                    model_data = json.load(f)
+
+                # Handle different formats
+                if isinstance(model_data, list):
+                    # Multiple models in a list
+                    for item in model_data:
+                        if isinstance(item, dict):
+                            try:
+                                model = LLMFamilyV2.parse_obj(item)
+                                models.append(model)
+                                logger.info(
+                                    f"Loaded built-in LLM model from complete JSON: {model.model_name}"
+                                )
+                            except Exception as e:
+                                logger.warning(
+                                    f"Failed to parse model from complete JSON: {e}"
+                                )
+                elif isinstance(model_data, dict):
+                    # Single model or models dict
+                    if "model_name" in model_data:
+                        # Single model
+                        try:
+                            model = LLMFamilyV2.parse_obj(model_data)
+                            models.append(model)
+                            logger.info(
+                                f"Loaded built-in LLM model from complete JSON: {model.model_name}"
+                            )
+                        except Exception as e:
+                            logger.warning(
+                                f"Failed to parse single model from complete JSON: {e}"
+                            )
+                    else:
+                        # Models dict - try to extract models
+                        for key, value in model_data.items():
+                            if isinstance(value, dict) and "model_name" in value:
+                                try:
+                                    model = LLMFamilyV2.parse_obj(value)
+                                    models.append(model)
+                                    logger.info(
+                                        f"Loaded built-in LLM model from complete JSON: {model.model_name}"
+                                    )
+                                except Exception as e:
+                                    logger.warning(
+                                        f"Failed to parse model {key} from complete JSON: {e}"
+                                    )
+
+                logger.info(
+                    f"Successfully loaded {len(models)} models from complete JSON file"
+                )
+                return models
+
+            except Exception as e:
+                logger.warning(
+                    f"Failed to load complete JSON file {complete_json_path}: {e}"
+                )
+                # Fall back to individual files if complete JSON loading fails
+
+        # Fall back: load individual JSON files (backward compatibility)
+        individual_files = [
+            f
+            for f in os.listdir(self.builtin_dir)
+            if f.endswith(".json") and f != "llm_models.json"
+        ]
+        if individual_files:
+            logger.info(
+                f"Loading {len(individual_files)} individual model files for backward compatibility"
+            )
+
+        for filename in individual_files:
+            file_path = os.path.join(self.builtin_dir, filename)
+            try:
+                with open(file_path, "r", encoding="utf-8") as f:
+                    model_data = json.load(f)
+
+                # Parse using LLMFamilyV2 (no model_family validation required)
+                model = LLMFamilyV2.parse_obj(model_data)
+                models.append(model)
+                logger.info(
+                    f"Loaded built-in LLM model from individual file: {model.model_name}"
+                )
+
+            except Exception as e:
+                logger.warning(f"Failed to load built-in model from {filename}: {e}")
 
         return models
 
diff --git a/xinference/model/rerank/__init__.py b/xinference/model/rerank/__init__.py
index 5ed2b2fd14..03033dd446 100644
--- a/xinference/model/rerank/__init__.py
+++ b/xinference/model/rerank/__init__.py
@@ -14,12 +14,55 @@
 
 import codecs
 import json
+import logging
 import os
 import warnings
 from typing import Any, Dict, List
 
 from ...constants import XINFERENCE_MODEL_DIR
 from ..utils import flatten_quantizations
+
+logger = logging.getLogger(__name__)
+
+
+def convert_rerank_model_format(model_json: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Convert rerank model hub JSON format to Xinference expected format.
+    """
+    logger.debug(
+        f"convert_rerank_model_format called for: {model_json.get('model_name', 'Unknown')}"
+    )
+
+    # Ensure required fields for rerank models
+    converted = model_json.copy()
+
+    # Add missing required fields
+    if "version" not in converted:
+        converted["version"] = 2
+    if "model_lang" not in converted:
+        converted["model_lang"] = ["en"]
+
+    # Handle model_specs
+    if "model_specs" not in converted or not converted["model_specs"]:
+        converted["model_specs"] = [
+            {
+                "model_format": "pytorch",
+                "model_size_in_billions": None,
+                "quantization": "none",
+                "model_hub": "huggingface",
+            }
+        ]
+    else:
+        # Ensure each spec has required fields
+        for spec in converted["model_specs"]:
+            if "quantization" not in spec:
+                spec["quantization"] = "none"
+            if "model_hub" not in spec:
+                spec["model_hub"] = "huggingface"
+
+    return converted
+
+
 from .core import (
     RERANK_MODEL_DESCRIPTIONS,
     RerankModelFamilyV2,
@@ -64,6 +107,8 @@ def register_custom_model():
 
 
 def register_builtin_model():
+    import json
+
     from ..custom import RegistryManager
 
     registry = RegistryManager.get_registry("rerank")
@@ -71,22 +116,81 @@ def register_builtin_model():
 
     builtin_rerank_dir = os.path.join(XINFERENCE_MODEL_DIR, "v2", "builtin", "rerank")
     if os.path.isdir(builtin_rerank_dir):
-        for f in os.listdir(builtin_rerank_dir):
-            if f.endswith(".json"):
-                try:
-                    with codecs.open(
-                        os.path.join(builtin_rerank_dir, f), encoding="utf-8"
-                    ) as fd:
+        # First, try to load from the complete JSON file
+        complete_json_path = os.path.join(builtin_rerank_dir, "rerank_models.json")
+        if os.path.exists(complete_json_path):
+            try:
+                with codecs.open(complete_json_path, encoding="utf-8") as fd:
+                    model_data = json.load(fd)
+
+                # Handle different formats
+                models_to_register = []
+                if isinstance(model_data, list):
+                    # Multiple models in a list
+                    models_to_register = model_data
+                elif isinstance(model_data, dict):
+                    # Single model
+                    if "model_name" in model_data:
+                        models_to_register = [model_data]
+                    else:
+                        # Models dict - extract models
+                        for key, value in model_data.items():
+                            if isinstance(value, dict) and "model_name" in value:
+                                models_to_register.append(value)
+
+                # Register all models from the complete JSON
+                for model_data in models_to_register:
+                    try:
+                        # Convert format if needed
+                        converted_data = convert_rerank_model_format(model_data)
                         builtin_rerank_family = RerankModelFamilyV2.parse_obj(
-                            json.load(fd)
+                            converted_data
                         )
 
                         # Only register if model doesn't already exist
                         if builtin_rerank_family.model_name not in existing_model_names:
-                            register_rerank(builtin_rerank_family, persist=False)
+                            # Add to BUILTIN_RERANK_MODELS directly for proper builtin registration
+                            BUILTIN_RERANK_MODELS[builtin_rerank_family.model_name] = (
+                                builtin_rerank_family
+                            )
                             existing_model_names.add(builtin_rerank_family.model_name)
-                except Exception as e:
-                    warnings.warn(f"{builtin_rerank_dir}/{f} has error, {e}")
+                    except Exception as e:
+                        warnings.warn(
+                            f"Error parsing rerank model {model_data.get('model_name', 'Unknown')}: {e}"
+                        )
+
+                logger.info(
+                    f"Successfully registered {len(models_to_register)} rerank models from complete JSON"
+                )
+
+            except Exception as e:
+                warnings.warn(
+                    f"Error loading complete JSON file {complete_json_path}: {e}"
+                )
+                # Fall back to individual files if complete JSON loading fails
+
+        # Fall back: load individual JSON files (backward compatibility)
+        individual_files = [
+            f
+            for f in os.listdir(builtin_rerank_dir)
+            if f.endswith(".json") and f != "rerank_models.json"
+        ]
+        for f in individual_files:
+            try:
+                with codecs.open(
+                    os.path.join(builtin_rerank_dir, f), encoding="utf-8"
+                ) as fd:
+                    builtin_rerank_family = RerankModelFamilyV2.parse_obj(json.load(fd))
+
+                    # Only register if model doesn't already exist
+                    if builtin_rerank_family.model_name not in existing_model_names:
+                        # Add to BUILTIN_RERANK_MODELS directly for proper builtin registration
+                        BUILTIN_RERANK_MODELS[builtin_rerank_family.model_name] = (
+                            builtin_rerank_family
+                        )
+                        existing_model_names.add(builtin_rerank_family.model_name)
+            except Exception as e:
+                warnings.warn(f"{builtin_rerank_dir}/{f} has error, {e}")
 
 
 def generate_engine_config_by_model_name(model_family: "RerankModelFamilyV2"):
diff --git a/xinference/model/video/__init__.py b/xinference/model/video/__init__.py
index 5d6d95425b..80ff842cc3 100644
--- a/xinference/model/video/__init__.py
+++ b/xinference/model/video/__init__.py
@@ -14,10 +14,54 @@
 
 import codecs
 import json
+import logging
 import os
 import warnings
+from typing import Any, Dict
 
 from ..utils import flatten_model_src
+
+logger = logging.getLogger(__name__)
+
+
+def convert_video_model_format(model_json: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Convert video model hub JSON format to Xinference expected format.
+    """
+    logger.debug(
+        f"convert_video_model_format called for: {model_json.get('model_name', 'Unknown')}"
+    )
+
+    # Ensure required fields for video models
+    converted = model_json.copy()
+
+    # Add missing required fields
+    if "version" not in converted:
+        converted["version"] = 2
+    if "model_lang" not in converted:
+        converted["model_lang"] = ["en"]
+
+    # Handle model_specs
+    if "model_specs" not in converted or not converted["model_specs"]:
+        converted["model_specs"] = [
+            {
+                "model_format": "pytorch",
+                "model_size_in_billions": None,
+                "quantization": "none",
+                "model_hub": "huggingface",
+            }
+        ]
+    else:
+        # Ensure each spec has required fields
+        for spec in converted["model_specs"]:
+            if "quantization" not in spec:
+                spec["quantization"] = "none"
+            if "model_hub" not in spec:
+                spec["model_hub"] = "huggingface"
+
+    return converted
+
+
 from .core import (
     BUILTIN_VIDEO_MODELS,
     VIDEO_MODEL_DESCRIPTIONS,
@@ -74,7 +118,10 @@ def register_builtin_model():
     for model in builtin_models:
         # Only register if model doesn't already exist
         if model.model_name not in existing_model_names:
-            register_video(model, persist=False)
+            # Add to BUILTIN_VIDEO_MODELS directly for proper builtin registration
+            if model.model_name not in BUILTIN_VIDEO_MODELS:
+                BUILTIN_VIDEO_MODELS[model.model_name] = []
+            BUILTIN_VIDEO_MODELS[model.model_name].append(model)
             existing_model_names.add(model.model_name)
 
 
diff --git a/xinference/model/video/builtin.py b/xinference/model/video/builtin.py
index 6affe65ab2..f53fadf0d1 100644
--- a/xinference/model/video/builtin.py
+++ b/xinference/model/video/builtin.py
@@ -46,56 +46,103 @@ def get_builtin_models(self) -> List["CustomVideoModelFamilyV2"]:
         if not os.path.exists(self.builtin_dir):
             return models
 
-        for filename in os.listdir(self.builtin_dir):
-            if filename.endswith(".json"):
-                file_path = os.path.join(self.builtin_dir, filename)
-                try:
-                    with open(file_path, "r", encoding="utf-8") as f:
-                        model_data = json.load(f)
-
-                    # Apply conversion logic to handle null model_id and other issues
-                    if model_data.get("model_id") is None and "model_src" in model_data:
-                        model_src = model_data["model_src"]
-                        # Extract model_id from available sources
+        # First, try to load from the complete JSON file
+        complete_json_path = os.path.join(self.builtin_dir, "video_models.json")
+        if os.path.exists(complete_json_path):
+            try:
+                with open(complete_json_path, "r", encoding="utf-8") as f:
+                    model_data = json.load(f)
+
+                # Handle different formats
+                models_to_process = []
+                if isinstance(model_data, list):
+                    # Multiple models in a list
+                    models_to_process = model_data
+                elif isinstance(model_data, dict):
+                    # Single model
+                    if "model_name" in model_data:
+                        models_to_process = [model_data]
+                    else:
+                        # Models dict - extract models
+                        for key, value in model_data.items():
+                            if isinstance(value, dict) and "model_name" in value:
+                                models_to_process.append(value)
+
+                # Process all models from the complete JSON
+                for model_data in models_to_process:
+                    try:
+                        # Convert format if needed
+                        from xinference.model.video import convert_video_model_format
+
+                        converted_data = convert_video_model_format(model_data)
+                        model = CustomVideoModelFamilyV2.parse_obj(converted_data)
+                        models.append(model)
+                    except Exception as e:
+                        logger.warning(
+                            f"Failed to parse model {model_data.get('model_name', 'Unknown')}: {e}"
+                        )
+
+                logger.info(
+                    f"Successfully loaded {len(models)} video models from complete JSON"
+                )
+
+            except Exception as e:
+                logger.warning(
+                    f"Error loading complete JSON file {complete_json_path}: {e}"
+                )
+                # Fall back to individual files if complete JSON loading fails
+
+        # Fall back: load individual JSON files (backward compatibility)
+        individual_files = [
+            f
+            for f in os.listdir(self.builtin_dir)
+            if f.endswith(".json") and f != "video_models.json"
+        ]
+        for filename in individual_files:
+            file_path = os.path.join(self.builtin_dir, filename)
+            try:
+                with open(file_path, "r", encoding="utf-8") as f:
+                    model_data = json.load(f)
+
+                # Apply conversion logic to handle null model_id and other issues
+                if model_data.get("model_id") is None and "model_src" in model_data:
+                    model_src = model_data["model_src"]
+                    # Extract model_id from available sources
+                    if (
+                        "huggingface" in model_src
+                        and "model_id" in model_src["huggingface"]
+                    ):
+                        model_data["model_id"] = model_src["huggingface"]["model_id"]
+                    elif (
+                        "modelscope" in model_src
+                        and "model_id" in model_src["modelscope"]
+                    ):
+                        model_data["model_id"] = model_src["modelscope"]["model_id"]
+
+                    # Extract model_revision if available
+                    if model_data.get("model_revision") is None:
                         if (
                             "huggingface" in model_src
-                            and "model_id" in model_src["huggingface"]
+                            and "model_revision" in model_src["huggingface"]
                         ):
-                            model_data["model_id"] = model_src["huggingface"][
-                                "model_id"
+                            model_data["model_revision"] = model_src["huggingface"][
+                                "model_revision"
                             ]
                         elif (
                             "modelscope" in model_src
-                            and "model_id" in model_src["modelscope"]
+                            and "model_revision" in model_src["modelscope"]
                         ):
-                            model_data["model_id"] = model_src["modelscope"]["model_id"]
-
-                        # Extract model_revision if available
-                        if model_data.get("model_revision") is None:
-                            if (
-                                "huggingface" in model_src
-                                and "model_revision" in model_src["huggingface"]
-                            ):
-                                model_data["model_revision"] = model_src["huggingface"][
-                                    "model_revision"
-                                ]
-                            elif (
-                                "modelscope" in model_src
-                                and "model_revision" in model_src["modelscope"]
-                            ):
-                                model_data["model_revision"] = model_src["modelscope"][
-                                    "model_revision"
-                                ]
-
-                    # Parse using CustomVideoModelFamilyV2
-                    model = CustomVideoModelFamilyV2.parse_obj(model_data)
-                    models.append(model)
-                    logger.info(f"Loaded built-in video model: {model.model_name}")
-
-                except Exception as e:
-                    logger.warning(
-                        f"Failed to load built-in model from {filename}: {e}"
-                    )
+                            model_data["model_revision"] = model_src["modelscope"][
+                                "model_revision"
+                            ]
+
+                # Parse using CustomVideoModelFamilyV2
+                model = CustomVideoModelFamilyV2.parse_obj(model_data)
+                models.append(model)
+                logger.info(f"Loaded built-in video model: {model.model_name}")
+
+            except Exception as e:
+                logger.warning(f"Failed to load built-in model from {filename}: {e}")
 
         return models
 

From 071fcfd3b44fa97e3de5e03b4180765a35522d20 Mon Sep 17 00:00:00 2001
From: OliverBryant <2713999266@qq.com>
Date: Wed, 29 Oct 2025 10:30:07 +0800
Subject: [PATCH 21/25] remove builtin.py

---
 xinference/model/audio/__init__.py     | 136 ++----------------
 xinference/model/audio/builtin.py      | 115 ---------------
 xinference/model/embedding/__init__.py | 110 ++-------------
 xinference/model/embedding/builtin.py  | 117 ----------------
 xinference/model/image/builtin.py      | 148 -------------------
 xinference/model/llm/__init__.py       | 111 ++++++---------
 xinference/model/llm/builtin.py        | 187 -------------------------
 xinference/model/rerank/__init__.py    |  93 ++----------
 xinference/model/rerank/builtin.py     | 115 ---------------
 xinference/model/utils.py              |  89 ++++++++++++
 xinference/model/video/__init__.py     |  54 ++++---
 xinference/model/video/builtin.py      | 175 -----------------------
 12 files changed, 200 insertions(+), 1250 deletions(-)
 delete mode 100644 xinference/model/audio/builtin.py
 delete mode 100644 xinference/model/embedding/builtin.py
 delete mode 100644 xinference/model/image/builtin.py
 delete mode 100644 xinference/model/llm/builtin.py
 delete mode 100644 xinference/model/rerank/builtin.py
 delete mode 100644 xinference/model/video/builtin.py

diff --git a/xinference/model/audio/__init__.py b/xinference/model/audio/__init__.py
index cb08c89152..37b5d763bc 100644
--- a/xinference/model/audio/__init__.py
+++ b/xinference/model/audio/__init__.py
@@ -110,133 +110,17 @@ def register_custom_model():
 
 
 def register_builtin_model():
-    from ..custom import RegistryManager
-
-    registry = RegistryManager.get_registry("audio")
-    existing_model_names = {spec.model_name for spec in registry.get_custom_models()}
-
-    builtin_audio_dir = os.path.join(XINFERENCE_MODEL_DIR, "v2", "builtin", "audio")
-    if os.path.isdir(builtin_audio_dir):
-        # First, try to load from the complete JSON file
-        complete_json_path = os.path.join(builtin_audio_dir, "audio_models.json")
-        if os.path.exists(complete_json_path):
-            try:
-                with codecs.open(complete_json_path, encoding="utf-8") as fd:
-                    model_data = json.load(fd)
-
-                # Handle different formats
-                models_to_register = []
-                if isinstance(model_data, list):
-                    # Multiple models in a list
-                    models_to_register = model_data
-                elif isinstance(model_data, dict):
-                    # Single model
-                    if "model_name" in model_data:
-                        models_to_register = [model_data]
-                    else:
-                        # Models dict - extract models
-                        for key, value in model_data.items():
-                            if isinstance(value, dict) and "model_name" in value:
-                                models_to_register.append(value)
-
-                # Register all models from the complete JSON
-                for model_data in models_to_register:
-                    try:
-                        # Apply conversion logic to handle null model_id and other issues
-                        converted_data = convert_audio_model_format(model_data)
-                        builtin_audio_family = AudioModelFamilyV2.parse_obj(
-                            converted_data
-                        )
-
-                        # Only register if model doesn't already exist
-                        if builtin_audio_family.model_name not in existing_model_names:
-                            # Add to BUILTIN_AUDIO_MODELS directly for proper builtin registration
-                            if (
-                                builtin_audio_family.model_name
-                                not in BUILTIN_AUDIO_MODELS
-                            ):
-                                BUILTIN_AUDIO_MODELS[
-                                    builtin_audio_family.model_name
-                                ] = []
-                            BUILTIN_AUDIO_MODELS[
-                                builtin_audio_family.model_name
-                            ].append(builtin_audio_family)
-                            existing_model_names.add(builtin_audio_family.model_name)
-                    except Exception as e:
-                        warnings.warn(
-                            f"Error parsing audio model {model_data.get('model_name', 'Unknown')}: {e}"
-                        )
-
-                logger.info(
-                    f"Successfully registered {len(models_to_register)} audio models from complete JSON"
-                )
-
-            except Exception as e:
-                warnings.warn(
-                    f"Error loading complete JSON file {complete_json_path}: {e}"
-                )
-                # Fall back to individual files if complete JSON loading fails
-
-        # Fall back: load individual JSON files (backward compatibility)
-        individual_files = [
-            f
-            for f in os.listdir(builtin_audio_dir)
-            if f.endswith(".json") and f != "audio_models.json"
-        ]
-        for f in individual_files:
-            try:
-                with codecs.open(
-                    os.path.join(builtin_audio_dir, f), encoding="utf-8"
-                ) as fd:
-                    model_data = json.load(fd)
-
-                    # Apply conversion logic to handle null model_id and other issues
-                    if model_data.get("model_id") is None and "model_src" in model_data:
-                        model_src = model_data["model_src"]
-                        # Extract model_id from available sources
-                        if (
-                            "huggingface" in model_src
-                            and "model_id" in model_src["huggingface"]
-                        ):
-                            model_data["model_id"] = model_src["huggingface"][
-                                "model_id"
-                            ]
-                        elif (
-                            "modelscope" in model_src
-                            and "model_id" in model_src["modelscope"]
-                        ):
-                            model_data["model_id"] = model_src["modelscope"]["model_id"]
-
-                        # Extract model_revision if available
-                        if model_data.get("model_revision") is None:
-                            if (
-                                "huggingface" in model_src
-                                and "model_revision" in model_src["huggingface"]
-                            ):
-                                model_data["model_revision"] = model_src["huggingface"][
-                                    "model_revision"
-                                ]
-                            elif (
-                                "modelscope" in model_src
-                                and "model_revision" in model_src["modelscope"]
-                            ):
-                                model_data["model_revision"] = model_src["modelscope"][
-                                    "model_revision"
-                                ]
-
-                    builtin_audio_family = AudioModelFamilyV2.parse_obj(model_data)
+    from ..utils import load_complete_builtin_models
+
+    # Use unified loading function
+    loaded_count = load_complete_builtin_models(
+        model_type="audio",
+        builtin_registry=BUILTIN_AUDIO_MODELS,
+        convert_format_func=convert_audio_model_format,
+        model_class=AudioModelFamilyV2,
+    )
 
-                    # Only register if model doesn't already exist
-                    if builtin_audio_family.model_name not in existing_model_names:
-                        # Add to BUILTIN_AUDIO_MODELS directly for proper builtin registration
-                        if builtin_audio_family.model_name not in BUILTIN_AUDIO_MODELS:
-                            BUILTIN_AUDIO_MODELS[builtin_audio_family.model_name] = []
-                        BUILTIN_AUDIO_MODELS[builtin_audio_family.model_name].append(
-                            builtin_audio_family
-                        )
-                        existing_model_names.add(builtin_audio_family.model_name)
-            except Exception as e:
-                warnings.warn(f"{builtin_audio_dir}/{f} has error, {e}")
+    logger.info(f"Successfully loaded {loaded_count} audio models from complete JSON")
 
 
 def _need_filter(spec: dict):
diff --git a/xinference/model/audio/builtin.py b/xinference/model/audio/builtin.py
deleted file mode 100644
index e78cc756d6..0000000000
--- a/xinference/model/audio/builtin.py
+++ /dev/null
@@ -1,115 +0,0 @@
-# Copyright 2022-2025 XProbe Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import json
-import logging
-import os
-from typing import TYPE_CHECKING, List
-
-logger = logging.getLogger(__name__)
-
-if TYPE_CHECKING:
-    from .custom import AudioModelFamilyV2
-
-
-class BuiltinAudioModelRegistry:
-    """
-    Registry for built-in audio models downloaded from official model hub.
-
-    These models are treated as built-in models and don't require model_family validation.
-    They are stored in ~/.xinference/model/v2/builtin/audio/ directory.
-    """
-
-    def __init__(self):
-        from ...constants import XINFERENCE_MODEL_DIR
-
-        self.builtin_dir = os.path.join(XINFERENCE_MODEL_DIR, "v2", "builtin", "audio")
-        os.makedirs(self.builtin_dir, exist_ok=True)
-
-    def get_builtin_models(self) -> List["AudioModelFamilyV2"]:
-        """Load all built-in audio models from the builtin directory."""
-        from .custom import AudioModelFamilyV2
-
-        models: List["AudioModelFamilyV2"] = []
-
-        if not os.path.exists(self.builtin_dir):
-            return models
-
-        for filename in os.listdir(self.builtin_dir):
-            if filename.endswith(".json"):
-                file_path = os.path.join(self.builtin_dir, filename)
-                try:
-                    with open(file_path, "r", encoding="utf-8") as f:
-                        model_data = json.load(f)
-
-                    # Parse using AudioModelFamilyV2 (no model_family validation required)
-                    model = AudioModelFamilyV2.parse_obj(model_data)
-                    models.append(model)
-                    logger.info(f"Loaded built-in audio model: {model.model_name}")
-
-                except Exception as e:
-                    logger.warning(
-                        f"Failed to load built-in model from {filename}: {e}"
-                    )
-
-        return models
-
-    def register_builtin_model(self, model) -> None:
-        """Register a built-in audio model by saving it to the builtin directory."""
-        persist_path = os.path.join(self.builtin_dir, f"{model.model_name}.json")
-
-        try:
-            with open(persist_path, "w", encoding="utf-8") as f:
-                f.write(model.json(exclude_none=True))
-            logger.info(f"Registered built-in audio model: {model.model_name}")
-        except Exception as e:
-            logger.error(f"Failed to register built-in model {model.model_name}: {e}")
-            raise
-
-    def unregister_builtin_model(self, model_name: str) -> None:
-        """Unregister a built-in audio model by removing its JSON file."""
-        persist_path = os.path.join(self.builtin_dir, f"{model_name}.json")
-
-        if os.path.exists(persist_path):
-            os.remove(persist_path)
-            logger.info(f"Unregistered built-in audio model: {model_name}")
-        else:
-            logger.warning(f"Built-in model file not found: {persist_path}")
-
-
-# Global registry instance
-_builtin_registry = None
-
-
-def get_builtin_audio_registry() -> BuiltinAudioModelRegistry:
-    """Get the global built-in audio model registry instance."""
-    global _builtin_registry
-    if _builtin_registry is None:
-        _builtin_registry = BuiltinAudioModelRegistry()
-    return _builtin_registry
-
-
-def get_builtin_audio_families() -> List:
-    """Get all built-in audio model families."""
-    return get_builtin_audio_registry().get_builtin_models()
-
-
-def register_builtin_audio(audio_family) -> None:
-    """Register a built-in audio model family."""
-    return get_builtin_audio_registry().register_builtin_model(audio_family)
-
-
-def unregister_builtin_audio(model_name: str) -> None:
-    """Unregister a built-in audio model family."""
-    return get_builtin_audio_registry().unregister_builtin_model(model_name)
diff --git a/xinference/model/embedding/__init__.py b/xinference/model/embedding/__init__.py
index d2cb478905..81415dfbef 100644
--- a/xinference/model/embedding/__init__.py
+++ b/xinference/model/embedding/__init__.py
@@ -108,106 +108,20 @@ def register_custom_model():
 
 
 def register_builtin_model():
-    import json
-
-    from ...constants import XINFERENCE_MODEL_DIR
-    from ..custom import RegistryManager
-
-    registry = RegistryManager.get_registry("embedding")
-    existing_model_names = {spec.model_name for spec in registry.get_custom_models()}
-
-    builtin_embedding_dir = os.path.join(
-        XINFERENCE_MODEL_DIR, "v2", "builtin", "embedding"
+    from ..utils import load_complete_builtin_models
+    from .embed_family import BUILTIN_EMBEDDING_MODELS
+
+    # Use unified loading function
+    loaded_count = load_complete_builtin_models(
+        model_type="embedding",
+        builtin_registry=BUILTIN_EMBEDDING_MODELS,
+        convert_format_func=convert_embedding_model_format,
+        model_class=EmbeddingModelFamilyV2,
     )
-    if os.path.isdir(builtin_embedding_dir):
-        # First, try to load from the complete JSON file
-        complete_json_path = os.path.join(
-            builtin_embedding_dir, "embedding_models.json"
-        )
-        if os.path.exists(complete_json_path):
-            try:
-                with codecs.open(complete_json_path, encoding="utf-8") as fd:
-                    model_data = json.load(fd)
-
-                # Handle different formats
-                models_to_register = []
-                if isinstance(model_data, list):
-                    # Multiple models in a list
-                    models_to_register = model_data
-                elif isinstance(model_data, dict):
-                    # Single model
-                    if "model_name" in model_data:
-                        models_to_register = [model_data]
-                    else:
-                        # Models dict - extract models
-                        for key, value in model_data.items():
-                            if isinstance(value, dict) and "model_name" in value:
-                                models_to_register.append(value)
-
-                # Register all models from the complete JSON
-                from .embed_family import BUILTIN_EMBEDDING_MODELS
-
-                for model_data in models_to_register:
-                    try:
-                        # Convert format if needed (embedding models might have different requirements)
-                        converted_data = convert_embedding_model_format(model_data)
-                        builtin_embedding_family = EmbeddingModelFamilyV2.parse_obj(
-                            converted_data
-                        )
-
-                        # Only register if model doesn't already exist
-                        if (
-                            builtin_embedding_family.model_name
-                            not in existing_model_names
-                        ):
-                            # Add to BUILTIN_EMBEDDING_MODELS directly for proper builtin registration
-                            BUILTIN_EMBEDDING_MODELS[
-                                builtin_embedding_family.model_name
-                            ] = builtin_embedding_family
-                            existing_model_names.add(
-                                builtin_embedding_family.model_name
-                            )
-                    except Exception as e:
-                        warnings.warn(
-                            f"Error parsing embedding model {model_data.get('model_name', 'Unknown')}: {e}"
-                        )
-
-                logger.info(
-                    f"Successfully registered {len(models_to_register)} embedding models from complete JSON"
-                )
 
-            except Exception as e:
-                warnings.warn(
-                    f"Error loading complete JSON file {complete_json_path}: {e}"
-                )
-                # Fall back to individual files if complete JSON loading fails
-
-        # Fall back: load individual JSON files (backward compatibility)
-        individual_files = [
-            f
-            for f in os.listdir(builtin_embedding_dir)
-            if f.endswith(".json") and f != "embedding_models.json"
-        ]
-        if individual_files:
-            from .embed_family import BUILTIN_EMBEDDING_MODELS
-        for f in individual_files:
-            try:
-                with codecs.open(
-                    os.path.join(builtin_embedding_dir, f), encoding="utf-8"
-                ) as fd:
-                    builtin_embedding_family = EmbeddingModelFamilyV2.parse_obj(
-                        json.load(fd)
-                    )
-
-                    # Only register if model doesn't already exist
-                    if builtin_embedding_family.model_name not in existing_model_names:
-                        # Add to BUILTIN_EMBEDDING_MODELS directly for proper builtin registration
-                        BUILTIN_EMBEDDING_MODELS[
-                            builtin_embedding_family.model_name
-                        ] = builtin_embedding_family
-                        existing_model_names.add(builtin_embedding_family.model_name)
-            except Exception as e:
-                warnings.warn(f"{builtin_embedding_dir}/{f} has error, {e}")
+    logger.info(
+        f"Successfully loaded {loaded_count} embedding models from complete JSON"
+    )
 
 
 def check_format_with_engine(model_format, engine):
diff --git a/xinference/model/embedding/builtin.py b/xinference/model/embedding/builtin.py
deleted file mode 100644
index d100931136..0000000000
--- a/xinference/model/embedding/builtin.py
+++ /dev/null
@@ -1,117 +0,0 @@
-# Copyright 2022-2025 XProbe Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import json
-import logging
-import os
-from typing import TYPE_CHECKING, List
-
-logger = logging.getLogger(__name__)
-
-if TYPE_CHECKING:
-    from .custom import EmbeddingModelFamilyV2
-
-
-class BuiltinEmbeddingModelRegistry:
-    """
-    Registry for built-in embedding models downloaded from official model hub.
-
-    These models are treated as built-in models.
-    They are stored in ~/.xinference/model/v2/builtin/embedding/ directory.
-    """
-
-    def __init__(self):
-        from ...constants import XINFERENCE_MODEL_DIR
-
-        self.builtin_dir = os.path.join(
-            XINFERENCE_MODEL_DIR, "v2", "builtin", "embedding"
-        )
-        os.makedirs(self.builtin_dir, exist_ok=True)
-
-    def get_builtin_models(self) -> List["EmbeddingModelFamilyV2"]:
-        """Load all built-in embedding models from the builtin directory."""
-        from .custom import EmbeddingModelFamilyV2
-
-        models: List["EmbeddingModelFamilyV2"] = []
-
-        if not os.path.exists(self.builtin_dir):
-            return models
-
-        for filename in os.listdir(self.builtin_dir):
-            if filename.endswith(".json"):
-                file_path = os.path.join(self.builtin_dir, filename)
-                try:
-                    with open(file_path, "r", encoding="utf-8") as f:
-                        model_data = json.load(f)
-
-                    # Parse using EmbeddingFamilyV2
-                    model = EmbeddingModelFamilyV2.parse_obj(model_data)
-                    models.append(model)
-                    logger.info(f"Loaded built-in embedding model: {model.model_name}")
-
-                except Exception as e:
-                    logger.warning(
-                        f"Failed to load built-in model from {filename}: {e}"
-                    )
-
-        return models
-
-    def register_builtin_model(self, model) -> None:
-        """Register a built-in embedding model by saving it to the builtin directory."""
-        persist_path = os.path.join(self.builtin_dir, f"{model.model_name}.json")
-
-        try:
-            with open(persist_path, "w", encoding="utf-8") as f:
-                f.write(model.json(exclude_none=True))
-            logger.info(f"Registered built-in embedding model: {model.model_name}")
-        except Exception as e:
-            logger.error(f"Failed to register built-in model {model.model_name}: {e}")
-            raise
-
-    def unregister_builtin_model(self, model_name: str) -> None:
-        """Unregister a built-in embedding model by removing its JSON file."""
-        persist_path = os.path.join(self.builtin_dir, f"{model_name}.json")
-
-        if os.path.exists(persist_path):
-            os.remove(persist_path)
-            logger.info(f"Unregistered built-in embedding model: {model_name}")
-        else:
-            logger.warning(f"Built-in model file not found: {persist_path}")
-
-
-# Global registry instance
-_builtin_registry = None
-
-
-def get_builtin_embedding_registry() -> BuiltinEmbeddingModelRegistry:
-    """Get the global built-in embedding model registry instance."""
-    global _builtin_registry
-    if _builtin_registry is None:
-        _builtin_registry = BuiltinEmbeddingModelRegistry()
-    return _builtin_registry
-
-
-def get_builtin_embedding_families() -> List:
-    """Get all built-in embedding model families."""
-    return get_builtin_embedding_registry().get_builtin_models()
-
-
-def register_builtin_embedding(embedding_family) -> None:
-    """Register a built-in embedding model family."""
-    return get_builtin_embedding_registry().register_builtin_model(embedding_family)
-
-
-def unregister_builtin_embedding(model_name: str) -> None:
-    """Unregister a built-in embedding model family."""
-    return get_builtin_embedding_registry().unregister_builtin_model(model_name)
diff --git a/xinference/model/image/builtin.py b/xinference/model/image/builtin.py
deleted file mode 100644
index e941771b64..0000000000
--- a/xinference/model/image/builtin.py
+++ /dev/null
@@ -1,148 +0,0 @@
-# Copyright 2022-2025 XProbe Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import json
-import logging
-import os
-from typing import TYPE_CHECKING, List
-
-logger = logging.getLogger(__name__)
-
-if TYPE_CHECKING:
-    from .custom import ImageModelFamilyV2
-
-
-class BuiltinImageModelRegistry:
-    """
-    Registry for built-in image models downloaded from official model hub.
-
-    These models are treated as built-in models and don't require model_family validation.
-    They are stored in ~/.xinference/model/v2/builtin/image/ directory.
-    """
-
-    def __init__(self):
-        from ...constants import XINFERENCE_MODEL_DIR
-
-        self.builtin_dir = os.path.join(XINFERENCE_MODEL_DIR, "v2", "builtin", "image")
-        os.makedirs(self.builtin_dir, exist_ok=True)
-
-    def get_builtin_models(self) -> List["ImageModelFamilyV2"]:
-        """Load all built-in image models from the builtin directory."""
-        from . import convert_image_model_format
-        from .custom import ImageModelFamilyV2
-
-        models: List["ImageModelFamilyV2"] = []
-
-        if not os.path.exists(self.builtin_dir):
-            return models
-
-        # First, try to load from complete JSON file
-        complete_json_path = os.path.join(self.builtin_dir, "image_models.json")
-        if os.path.exists(complete_json_path):
-            try:
-                with open(complete_json_path, "r", encoding="utf-8") as f:
-                    data = json.load(f)
-
-                for model_data in data:
-                    try:
-                        # Convert format to handle missing required fields
-                        converted = convert_image_model_format(model_data)
-                        model = ImageModelFamilyV2.parse_obj(converted)
-                        models.append(model)
-                        logger.info(
-                            f"Loaded built-in image model from complete JSON: {model.model_name}"
-                        )
-                    except Exception as e:
-                        logger.warning(
-                            f"Failed to parse model {model_data.get('model_name', 'Unknown')} from complete JSON: {e}"
-                        )
-
-                logger.info(
-                    f"Loaded {len(models)} built-in image models from complete JSON"
-                )
-                return models
-
-            except Exception as e:
-                logger.warning(
-                    f"Failed to load complete JSON {complete_json_path}: {e}"
-                )
-
-        # Fallback to loading individual JSON files
-        for filename in os.listdir(self.builtin_dir):
-            if filename.endswith(".json") and filename != "image_models.json":
-                file_path = os.path.join(self.builtin_dir, filename)
-                try:
-                    with open(file_path, "r", encoding="utf-8") as f:
-                        model_data = json.load(f)
-
-                    # Parse using ImageModelFamilyV2 (no model_family validation required)
-                    model = ImageModelFamilyV2.parse_obj(model_data)
-                    models.append(model)
-                    logger.info(f"Loaded built-in image model: {model.model_name}")
-
-                except Exception as e:
-                    logger.warning(
-                        f"Failed to load built-in model from {filename}: {e}"
-                    )
-
-        return models
-
-    def register_builtin_model(self, model) -> None:
-        """Register a built-in image model by saving it to the builtin directory."""
-        persist_path = os.path.join(self.builtin_dir, f"{model.model_name}.json")
-
-        try:
-            with open(persist_path, "w", encoding="utf-8") as f:
-                f.write(model.json(exclude_none=True))
-            logger.info(f"Registered built-in image model: {model.model_name}")
-        except Exception as e:
-            logger.error(f"Failed to register built-in model {model.model_name}: {e}")
-            raise
-
-    def unregister_builtin_model(self, model_name: str) -> None:
-        """Unregister a built-in image model by removing its JSON file."""
-        persist_path = os.path.join(self.builtin_dir, f"{model_name}.json")
-
-        if os.path.exists(persist_path):
-            os.remove(persist_path)
-            logger.info(f"Unregistered built-in image model: {model_name}")
-        else:
-            logger.warning(f"Built-in model file not found: {persist_path}")
-
-
-# Global registry instance
-_builtin_registry = None
-
-
-def get_builtin_image_registry() -> BuiltinImageModelRegistry:
-    """Get the global built-in image model registry instance."""
-    global _builtin_registry
-    if _builtin_registry is None:
-        _builtin_registry = BuiltinImageModelRegistry()
-    return _builtin_registry
-
-
-def get_builtin_image_families() -> List:
-    """Get all built-in image model families."""
-    return get_builtin_image_registry().get_builtin_models()
-
-
-def register_builtin_image(image_family) -> None:
-    """Register a built-in image model family."""
-    return get_builtin_image_registry().register_builtin_model(image_family)
-
-
-def unregister_builtin_image(model_name: str) -> None:
-    """Unregister a built-in image model family."""
-    return get_builtin_image_registry().unregister_builtin_model(model_name)
diff --git a/xinference/model/llm/__init__.py b/xinference/model/llm/__init__.py
index 60f377bec0..afb5e5c96c 100644
--- a/xinference/model/llm/__init__.py
+++ b/xinference/model/llm/__init__.py
@@ -191,83 +191,58 @@ def register_custom_model():
 
 
 def register_builtin_model():
-    import json
+    from ..utils import load_complete_builtin_models
+
+    # Use unified loading function, but LLM needs special handling
+    loaded_count = load_complete_builtin_models(
+        model_type="llm",
+        builtin_registry={},  # Temporarily use empty dict, we handle it manually
+        convert_format_func=convert_model_json_format,
+        model_class=LLMFamilyV2,
+    )
 
-    from ...constants import XINFERENCE_MODEL_DIR
-    from ..custom import RegistryManager
+    # Manually handle LLM's special registration logic
+    if loaded_count > 0:
+        from ...constants import XINFERENCE_MODEL_DIR
+        from ..custom import RegistryManager
 
-    registry = RegistryManager.get_registry("llm")
-    existing_model_names = {spec.model_name for spec in registry.get_custom_models()}
+        registry = RegistryManager.get_registry("llm")
+        existing_model_names = {
+            spec.model_name for spec in registry.get_custom_models()
+        }
 
-    builtin_llm_dir = os.path.join(XINFERENCE_MODEL_DIR, "v2", "builtin", "llm")
-    if os.path.isdir(builtin_llm_dir):
-        # First, try to load from the complete JSON file
+        builtin_llm_dir = os.path.join(XINFERENCE_MODEL_DIR, "v2", "builtin", "llm")
         complete_json_path = os.path.join(builtin_llm_dir, "llm_models.json")
-        if os.path.exists(complete_json_path):
-            try:
-                with codecs.open(complete_json_path, encoding="utf-8") as fd:
-                    model_data = json.load(fd)
-
-                # Handle different formats
-                models_to_register = []
-                if isinstance(model_data, list):
-                    # Multiple models in a list
-                    models_to_register = model_data
-                elif isinstance(model_data, dict):
-                    # Single model
-                    if "model_name" in model_data:
-                        models_to_register = [model_data]
-                    else:
-                        # Models dict - extract models
-                        for key, value in model_data.items():
-                            if isinstance(value, dict) and "model_name" in value:
-                                models_to_register.append(value)
-
-                # Register all models from the complete JSON
-                for model_data in models_to_register:
-                    try:
-                        # Convert model hub JSON format to Xinference expected format
-                        converted_data = convert_model_json_format(model_data)
-                        builtin_llm_family = LLMFamilyV2.parse_obj(converted_data)
-
-                        # Only register if model doesn't already exist
-                        if builtin_llm_family.model_name not in existing_model_names:
-                            register_llm(builtin_llm_family, persist=False)
-                            existing_model_names.add(builtin_llm_family.model_name)
-                    except Exception as e:
-                        warnings.warn(
-                            f"Error parsing model {model_data.get('model_name', 'Unknown')} from complete JSON: {e}"
-                        )
-
-                logger.info(
-                    f"Successfully registered {len(models_to_register)} models from complete JSON"
-                )
 
-            except Exception as e:
-                warnings.warn(
-                    f"Error loading complete JSON file {complete_json_path}: {e}"
-                )
-                # Fall back to individual files if complete JSON loading fails
-
-        # Fall back: load individual JSON files (backward compatibility)
-        individual_files = [
-            f
-            for f in os.listdir(builtin_llm_dir)
-            if f.endswith(".json") and f != "llm_models.json"
-        ]
-        for f in individual_files:
-            try:
-                with codecs.open(
-                    os.path.join(builtin_llm_dir, f), encoding="utf-8"
-                ) as fd:
-                    builtin_llm_family = LLMFamilyV2.parse_raw(fd.read())
+        if os.path.exists(complete_json_path):
+            with codecs.open(complete_json_path, encoding="utf-8") as fd:
+                model_data = json.load(fd)
+
+            models_to_register = []
+            if isinstance(model_data, list):
+                models_to_register = model_data
+            elif isinstance(model_data, dict):
+                if "model_name" in model_data:
+                    models_to_register = [model_data]
+                else:
+                    for key, value in model_data.items():
+                        if isinstance(value, dict) and "model_name" in value:
+                            models_to_register.append(value)
+
+            for model_data in models_to_register:
+                try:
+                    converted_data = convert_model_json_format(model_data)
+                    builtin_llm_family = LLMFamilyV2.parse_obj(converted_data)
 
-                    # Only register if model doesn't already exist
                     if builtin_llm_family.model_name not in existing_model_names:
                         register_llm(builtin_llm_family, persist=False)
                         existing_model_names.add(builtin_llm_family.model_name)
-            except Exception as e:
-                warnings.warn(f"{builtin_llm_dir}/{f} has error, {e}")
+                except Exception as e:
+                    warnings.warn(
+                        f"Error parsing model {model_data.get('model_name', 'Unknown')}: {e}"
+                    )
+
+    logger.info(f"Successfully loaded {loaded_count} llm models from complete JSON")
 
 
 def load_model_family_from_json(json_filename, target_families):
diff --git a/xinference/model/llm/builtin.py b/xinference/model/llm/builtin.py
deleted file mode 100644
index 466831c3b0..0000000000
--- a/xinference/model/llm/builtin.py
+++ /dev/null
@@ -1,187 +0,0 @@
-# Copyright 2022-2025 XProbe Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import json
-import logging
-import os
-from typing import TYPE_CHECKING, List
-
-if TYPE_CHECKING:
-    from ..llm.llm_family import LLMFamilyV2
-
-logger = logging.getLogger(__name__)
-
-
-class BuiltinLLMModelRegistry:
-    """
-    Registry for built-in LLM models downloaded from official model hub.
-
-    These models are treated as built-in models and don't require model_family validation.
-    They are stored in ~/.xinference/model/v2/builtin/llm/ directory.
-    """
-
-    def __init__(self):
-        from ...constants import XINFERENCE_MODEL_DIR
-
-        self.builtin_dir = os.path.join(XINFERENCE_MODEL_DIR, "v2", "builtin", "llm")
-        os.makedirs(self.builtin_dir, exist_ok=True)
-
-    def get_builtin_models(self) -> List["LLMFamilyV2"]:
-        """Load all built-in LLM models from the builtin directory."""
-        from ..llm.llm_family import LLMFamilyV2
-
-        models: List["LLMFamilyV2"] = []
-
-        if not os.path.exists(self.builtin_dir):
-            return models
-
-        # First, try to load the complete JSON file if it exists
-        complete_json_path = os.path.join(self.builtin_dir, "llm_models.json")
-        if os.path.exists(complete_json_path):
-            try:
-                with open(complete_json_path, "r", encoding="utf-8") as f:
-                    model_data = json.load(f)
-
-                # Handle different formats
-                if isinstance(model_data, list):
-                    # Multiple models in a list
-                    for item in model_data:
-                        if isinstance(item, dict):
-                            try:
-                                model = LLMFamilyV2.parse_obj(item)
-                                models.append(model)
-                                logger.info(
-                                    f"Loaded built-in LLM model from complete JSON: {model.model_name}"
-                                )
-                            except Exception as e:
-                                logger.warning(
-                                    f"Failed to parse model from complete JSON: {e}"
-                                )
-                elif isinstance(model_data, dict):
-                    # Single model or models dict
-                    if "model_name" in model_data:
-                        # Single model
-                        try:
-                            model = LLMFamilyV2.parse_obj(model_data)
-                            models.append(model)
-                            logger.info(
-                                f"Loaded built-in LLM model from complete JSON: {model.model_name}"
-                            )
-                        except Exception as e:
-                            logger.warning(
-                                f"Failed to parse single model from complete JSON: {e}"
-                            )
-                    else:
-                        # Models dict - try to extract models
-                        for key, value in model_data.items():
-                            if isinstance(value, dict) and "model_name" in value:
-                                try:
-                                    model = LLMFamilyV2.parse_obj(value)
-                                    models.append(model)
-                                    logger.info(
-                                        f"Loaded built-in LLM model from complete JSON: {model.model_name}"
-                                    )
-                                except Exception as e:
-                                    logger.warning(
-                                        f"Failed to parse model {key} from complete JSON: {e}"
-                                    )
-
-                logger.info(
-                    f"Successfully loaded {len(models)} models from complete JSON file"
-                )
-                return models
-
-            except Exception as e:
-                logger.warning(
-                    f"Failed to load complete JSON file {complete_json_path}: {e}"
-                )
-                # Fall back to individual files if complete JSON loading fails
-
-        # Fall back: load individual JSON files (backward compatibility)
-        individual_files = [
-            f
-            for f in os.listdir(self.builtin_dir)
-            if f.endswith(".json") and f != "llm_models.json"
-        ]
-        if individual_files:
-            logger.info(
-                f"Loading {len(individual_files)} individual model files for backward compatibility"
-            )
-
-        for filename in individual_files:
-            file_path = os.path.join(self.builtin_dir, filename)
-            try:
-                with open(file_path, "r", encoding="utf-8") as f:
-                    model_data = json.load(f)
-
-                # Parse using LLMFamilyV2 (no model_family validation required)
-                model = LLMFamilyV2.parse_obj(model_data)
-                models.append(model)
-                logger.info(
-                    f"Loaded built-in LLM model from individual file: {model.model_name}"
-                )
-
-            except Exception as e:
-                logger.warning(f"Failed to load built-in model from {filename}: {e}")
-
-        return models
-
-    def register_builtin_model(self, model: "LLMFamilyV2") -> None:
-        """Register a built-in LLM model by saving it to the builtin directory."""
-        persist_path = os.path.join(self.builtin_dir, f"{model.model_name}.json")
-
-        try:
-            with open(persist_path, "w", encoding="utf-8") as f:
-                f.write(model.json(exclude_none=True))
-            logger.info(f"Registered built-in LLM model: {model.model_name}")
-        except Exception as e:
-            logger.error(f"Failed to register built-in model {model.model_name}: {e}")
-            raise
-
-    def unregister_builtin_model(self, model_name: str) -> None:
-        """Unregister a built-in LLM model by removing its JSON file."""
-        persist_path = os.path.join(self.builtin_dir, f"{model_name}.json")
-
-        if os.path.exists(persist_path):
-            os.remove(persist_path)
-            logger.info(f"Unregistered built-in LLM model: {model_name}")
-        else:
-            logger.warning(f"Built-in model file not found: {persist_path}")
-
-
-# Global registry instance
-_builtin_registry = None
-
-
-def get_builtin_llm_registry() -> BuiltinLLMModelRegistry:
-    """Get the global built-in LLM model registry instance."""
-    global _builtin_registry
-    if _builtin_registry is None:
-        _builtin_registry = BuiltinLLMModelRegistry()
-    return _builtin_registry
-
-
-def get_builtin_llm_families() -> List["LLMFamilyV2"]:
-    """Get all built-in LLM model families."""
-    return get_builtin_llm_registry().get_builtin_models()
-
-
-def register_builtin_llm(llm_family: "LLMFamilyV2") -> None:
-    """Register a built-in LLM model family."""
-    return get_builtin_llm_registry().register_builtin_model(llm_family)
-
-
-def unregister_builtin_llm(model_name: str) -> None:
-    """Unregister a built-in LLM model family."""
-    return get_builtin_llm_registry().unregister_builtin_model(model_name)
diff --git a/xinference/model/rerank/__init__.py b/xinference/model/rerank/__init__.py
index 03033dd446..fd0029e700 100644
--- a/xinference/model/rerank/__init__.py
+++ b/xinference/model/rerank/__init__.py
@@ -107,90 +107,17 @@ def register_custom_model():
 
 
 def register_builtin_model():
-    import json
-
-    from ..custom import RegistryManager
-
-    registry = RegistryManager.get_registry("rerank")
-    existing_model_names = {spec.model_name for spec in registry.get_custom_models()}
-
-    builtin_rerank_dir = os.path.join(XINFERENCE_MODEL_DIR, "v2", "builtin", "rerank")
-    if os.path.isdir(builtin_rerank_dir):
-        # First, try to load from the complete JSON file
-        complete_json_path = os.path.join(builtin_rerank_dir, "rerank_models.json")
-        if os.path.exists(complete_json_path):
-            try:
-                with codecs.open(complete_json_path, encoding="utf-8") as fd:
-                    model_data = json.load(fd)
-
-                # Handle different formats
-                models_to_register = []
-                if isinstance(model_data, list):
-                    # Multiple models in a list
-                    models_to_register = model_data
-                elif isinstance(model_data, dict):
-                    # Single model
-                    if "model_name" in model_data:
-                        models_to_register = [model_data]
-                    else:
-                        # Models dict - extract models
-                        for key, value in model_data.items():
-                            if isinstance(value, dict) and "model_name" in value:
-                                models_to_register.append(value)
-
-                # Register all models from the complete JSON
-                for model_data in models_to_register:
-                    try:
-                        # Convert format if needed
-                        converted_data = convert_rerank_model_format(model_data)
-                        builtin_rerank_family = RerankModelFamilyV2.parse_obj(
-                            converted_data
-                        )
-
-                        # Only register if model doesn't already exist
-                        if builtin_rerank_family.model_name not in existing_model_names:
-                            # Add to BUILTIN_RERANK_MODELS directly for proper builtin registration
-                            BUILTIN_RERANK_MODELS[builtin_rerank_family.model_name] = (
-                                builtin_rerank_family
-                            )
-                            existing_model_names.add(builtin_rerank_family.model_name)
-                    except Exception as e:
-                        warnings.warn(
-                            f"Error parsing rerank model {model_data.get('model_name', 'Unknown')}: {e}"
-                        )
-
-                logger.info(
-                    f"Successfully registered {len(models_to_register)} rerank models from complete JSON"
-                )
-
-            except Exception as e:
-                warnings.warn(
-                    f"Error loading complete JSON file {complete_json_path}: {e}"
-                )
-                # Fall back to individual files if complete JSON loading fails
-
-        # Fall back: load individual JSON files (backward compatibility)
-        individual_files = [
-            f
-            for f in os.listdir(builtin_rerank_dir)
-            if f.endswith(".json") and f != "rerank_models.json"
-        ]
-        for f in individual_files:
-            try:
-                with codecs.open(
-                    os.path.join(builtin_rerank_dir, f), encoding="utf-8"
-                ) as fd:
-                    builtin_rerank_family = RerankModelFamilyV2.parse_obj(json.load(fd))
+    from ..utils import load_complete_builtin_models
+
+    # Use unified loading function
+    loaded_count = load_complete_builtin_models(
+        model_type="rerank",
+        builtin_registry=BUILTIN_RERANK_MODELS,
+        convert_format_func=convert_rerank_model_format,
+        model_class=RerankModelFamilyV2,
+    )
 
-                    # Only register if model doesn't already exist
-                    if builtin_rerank_family.model_name not in existing_model_names:
-                        # Add to BUILTIN_RERANK_MODELS directly for proper builtin registration
-                        BUILTIN_RERANK_MODELS[builtin_rerank_family.model_name] = (
-                            builtin_rerank_family
-                        )
-                        existing_model_names.add(builtin_rerank_family.model_name)
-            except Exception as e:
-                warnings.warn(f"{builtin_rerank_dir}/{f} has error, {e}")
+    logger.info(f"Successfully loaded {loaded_count} rerank models from complete JSON")
 
 
 def generate_engine_config_by_model_name(model_family: "RerankModelFamilyV2"):
diff --git a/xinference/model/rerank/builtin.py b/xinference/model/rerank/builtin.py
deleted file mode 100644
index 3fe0cd927b..0000000000
--- a/xinference/model/rerank/builtin.py
+++ /dev/null
@@ -1,115 +0,0 @@
-# Copyright 2022-2025 XProbe Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import json
-import logging
-import os
-from typing import TYPE_CHECKING, List
-
-logger = logging.getLogger(__name__)
-
-if TYPE_CHECKING:
-    from .custom import RerankModelFamilyV2
-
-
-class BuiltinRerankModelRegistry:
-    """
-    Registry for built-in rerank models downloaded from official model hub.
-
-    These models are treated as built-in models and don't require model_family validation.
-    They are stored in ~/.xinference/model/v2/builtin/rerank/ directory.
-    """
-
-    def __init__(self):
-        from ...constants import XINFERENCE_MODEL_DIR
-
-        self.builtin_dir = os.path.join(XINFERENCE_MODEL_DIR, "v2", "builtin", "rerank")
-        os.makedirs(self.builtin_dir, exist_ok=True)
-
-    def get_builtin_models(self) -> List["RerankModelFamilyV2"]:
-        """Load all built-in rerank models from the builtin directory."""
-        from .custom import RerankModelFamilyV2
-
-        models: List["RerankModelFamilyV2"] = []
-
-        if not os.path.exists(self.builtin_dir):
-            return models
-
-        for filename in os.listdir(self.builtin_dir):
-            if filename.endswith(".json"):
-                file_path = os.path.join(self.builtin_dir, filename)
-                try:
-                    with open(file_path, "r", encoding="utf-8") as f:
-                        model_data = json.load(f)
-
-                    # Parse using RerankModelFamilyV2 (no model_family validation required)
-                    model = RerankModelFamilyV2.parse_obj(model_data)
-                    models.append(model)
-                    logger.info(f"Loaded built-in rerank model: {model.model_name}")
-
-                except Exception as e:
-                    logger.warning(
-                        f"Failed to load built-in model from {filename}: {e}"
-                    )
-
-        return models
-
-    def register_builtin_model(self, model) -> None:
-        """Register a built-in rerank model by saving it to the builtin directory."""
-        persist_path = os.path.join(self.builtin_dir, f"{model.model_name}.json")
-
-        try:
-            with open(persist_path, "w", encoding="utf-8") as f:
-                f.write(model.json(exclude_none=True))
-            logger.info(f"Registered built-in rerank model: {model.model_name}")
-        except Exception as e:
-            logger.error(f"Failed to register built-in model {model.model_name}: {e}")
-            raise
-
-    def unregister_builtin_model(self, model_name: str) -> None:
-        """Unregister a built-in rerank model by removing its JSON file."""
-        persist_path = os.path.join(self.builtin_dir, f"{model_name}.json")
-
-        if os.path.exists(persist_path):
-            os.remove(persist_path)
-            logger.info(f"Unregistered built-in rerank model: {model_name}")
-        else:
-            logger.warning(f"Built-in model file not found: {persist_path}")
-
-
-# Global registry instance
-_builtin_registry = None
-
-
-def get_builtin_rerank_registry() -> BuiltinRerankModelRegistry:
-    """Get the global built-in rerank model registry instance."""
-    global _builtin_registry
-    if _builtin_registry is None:
-        _builtin_registry = BuiltinRerankModelRegistry()
-    return _builtin_registry
-
-
-def get_builtin_rerank_families() -> List:
-    """Get all built-in rerank model families."""
-    return get_builtin_rerank_registry().get_builtin_models()
-
-
-def register_builtin_rerank(rerank_family) -> None:
-    """Register a built-in rerank model family."""
-    return get_builtin_rerank_registry().register_builtin_model(rerank_family)
-
-
-def unregister_builtin_rerank(model_name: str) -> None:
-    """Unregister a built-in rerank model family."""
-    return get_builtin_rerank_registry().unregister_builtin_model(model_name)
diff --git a/xinference/model/utils.py b/xinference/model/utils.py
index ea5dec74d5..83163619d3 100644
--- a/xinference/model/utils.py
+++ b/xinference/model/utils.py
@@ -709,3 +709,92 @@ def _wrapper(self, *args, **kwargs):
         return _async_wrapper
     else:
         return _wrapper
+
+
+def load_complete_builtin_models(
+    model_type: str, builtin_registry: dict, convert_format_func=None, model_class=None
+):
+    """
+    Load complete JSON files for built-in models in a unified way.
+
+    Args:
+        model_type: Model type (llm, embedding, audio, image, video, rerank)
+        builtin_registry: Built-in model registry dictionary
+        convert_format_func: Format conversion function (optional)
+        model_class: Model class (optional)
+
+    Returns:
+        int: Number of successfully loaded models
+    """
+    import codecs
+    import json
+    import logging
+
+    from ..constants import XINFERENCE_MODEL_DIR
+
+    logger = logging.getLogger(__name__)
+
+    builtin_dir = os.path.join(XINFERENCE_MODEL_DIR, "v2", "builtin", model_type)
+    complete_json_path = os.path.join(builtin_dir, f"{model_type}_models.json")
+
+    if not os.path.exists(complete_json_path):
+        logger.debug(f"Complete JSON file not found: {complete_json_path}")
+        return 0
+
+    try:
+        with codecs.open(complete_json_path, encoding="utf-8") as fd:
+            model_data = json.load(fd)
+
+        models_to_register = []
+        if isinstance(model_data, list):
+            models_to_register = model_data
+        elif isinstance(model_data, dict):
+            if "model_name" in model_data:
+                models_to_register = [model_data]
+            else:
+                for key, value in model_data.items():
+                    if isinstance(value, dict) and "model_name" in value:
+                        models_to_register.append(value)
+
+        loaded_count = 0
+        for data in models_to_register:
+            try:
+                # Apply format conversion function (if provided)
+                if convert_format_func:
+                    data = convert_format_func(data)
+
+                # Create model instance (if model class is provided)
+                if model_class:
+                    model = model_class.parse_obj(data)
+                    model_name = model.model_name
+                else:
+                    model_name = data.get("model_name", "unknown")
+                    model = data
+
+                # Add to registry based on model type
+                if model_type in ["audio", "image", "video", "llm"]:
+                    # These model types use list structure: dict[model_name] = [model1, model2, ...]
+                    if model_name not in builtin_registry:
+                        builtin_registry[model_name] = [model]
+                    else:
+                        builtin_registry[model_name].append(model)
+                else:
+                    # embedding, rerank use single model structure: dict[model_name] = model
+                    builtin_registry[model_name] = model
+
+                loaded_count += 1
+                logger.info(f"Loaded {model_type} builtin model: {model_name}")
+
+            except Exception as e:
+                logger.warning(
+                    f"Failed to load {model_type} model {data.get('model_name', 'Unknown')}: {e}"
+                )
+
+        logger.info(
+            f"Successfully loaded {loaded_count} {model_type} models from complete JSON"
+        )
+        return loaded_count
+
+    except Exception as e:
+        logger.error(f"Failed to load complete JSON {complete_json_path}: {e}")
+        return 0
diff --git a/xinference/model/video/__init__.py b/xinference/model/video/__init__.py
index 80ff842cc3..b64c64c00e 100644
--- a/xinference/model/video/__init__.py
+++ b/xinference/model/video/__init__.py
@@ -41,6 +41,29 @@ def convert_video_model_format(model_json: Dict[str, Any]) -> Dict[str, Any]:
     if "model_lang" not in converted:
         converted["model_lang"] = ["en"]
 
+    # Handle missing model_id and model_revision
+    if converted.get("model_id") is None and "model_src" in converted:
+        model_src = converted["model_src"]
+        # Extract model_id from available sources
+        if "huggingface" in model_src and "model_id" in model_src["huggingface"]:
+            converted["model_id"] = model_src["huggingface"]["model_id"]
+        elif "modelscope" in model_src and "model_id" in model_src["modelscope"]:
+            converted["model_id"] = model_src["modelscope"]["model_id"]
+
+    if converted.get("model_revision") is None and "model_src" in converted:
+        model_src = converted["model_src"]
+        # Extract model_revision if available
+        if "huggingface" in model_src and "model_revision" in model_src["huggingface"]:
+            converted["model_revision"] = model_src["huggingface"]["model_revision"]
+        elif "modelscope" in model_src and "model_revision" in model_src["modelscope"]:
+            converted["model_revision"] = model_src["modelscope"]["model_revision"]
+
+    # Set defaults if still missing
+    if converted.get("model_id") is None:
+        converted["model_id"] = converted.get("model_name", "unknown")
+    if converted.get("model_revision") is None:
+        converted["model_revision"] = "main"
+
     # Handle model_specs
     if "model_specs" not in converted or not converted["model_specs"]:
         converted["model_specs"] = [
@@ -104,30 +127,25 @@ def register_builtin_model():
     This function is called every time model list is requested,
     ensuring real-time updates without server restart.
     """
-    from ..custom import RegistryManager
-
-    registry = RegistryManager.get_registry("video")
-    existing_model_names = {spec.model_name for spec in registry.get_custom_models()}
-
-    # Use the builtin registry to load models
-    from .builtin import BuiltinVideoModelRegistry
-
-    builtin_registry = BuiltinVideoModelRegistry()
-    builtin_models = builtin_registry.get_builtin_models()
+    from ..utils import load_complete_builtin_models
+
+    # Use unified loading function
+    loaded_count = load_complete_builtin_models(
+        model_type="video",
+        builtin_registry=BUILTIN_VIDEO_MODELS,
+        convert_format_func=convert_video_model_format,
+        model_class=VideoModelFamilyV2,
+    )
 
-    for model in builtin_models:
-        # Only register if model doesn't already exist
-        if model.model_name not in existing_model_names:
-            # Add to BUILTIN_VIDEO_MODELS directly for proper builtin registration
-            if model.model_name not in BUILTIN_VIDEO_MODELS:
-                BUILTIN_VIDEO_MODELS[model.model_name] = []
-            BUILTIN_VIDEO_MODELS[model.model_name].append(model)
-            existing_model_names.add(model.model_name)
+    logger.info(f"Successfully loaded {loaded_count} video models from complete JSON")
 
 
 def _install():
     load_model_family_from_json("model_spec.json", BUILTIN_VIDEO_MODELS)
 
+    # Load models from complete JSON file (from update_model_type)
+    register_builtin_model()
+
     # register model description
     for model_name, model_specs in BUILTIN_VIDEO_MODELS.items():
         model_spec = [x for x in model_specs if x.model_hub == "huggingface"][0]
diff --git a/xinference/model/video/builtin.py b/xinference/model/video/builtin.py
deleted file mode 100644
index f53fadf0d1..0000000000
--- a/xinference/model/video/builtin.py
+++ /dev/null
@@ -1,175 +0,0 @@
-# Copyright 2022-2025 XProbe Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import json
-import logging
-import os
-from typing import TYPE_CHECKING, List
-
-logger = logging.getLogger(__name__)
-
-if TYPE_CHECKING:
-    from .custom import CustomVideoModelFamilyV2
-
-
-class BuiltinVideoModelRegistry:
-    """
-    Registry for built-in video models downloaded from official model hub.
-
-    These models are treated as built-in models and don't require model_family validation.
-    They are stored in ~/.xinference/model/v2/builtin/video/ directory.
-    """
-
-    def __init__(self):
-        from ...constants import XINFERENCE_MODEL_DIR
-
-        self.builtin_dir = os.path.join(XINFERENCE_MODEL_DIR, "v2", "builtin", "video")
-        os.makedirs(self.builtin_dir, exist_ok=True)
-
-    def get_builtin_models(self) -> List["CustomVideoModelFamilyV2"]:
-        """Load all built-in video models from the builtin directory."""
-        from .custom import CustomVideoModelFamilyV2
-
-        models: List["CustomVideoModelFamilyV2"] = []
-
-        if not os.path.exists(self.builtin_dir):
-            return models
-
-        # First, try to load from the complete JSON file
-        complete_json_path = os.path.join(self.builtin_dir, "video_models.json")
-        if os.path.exists(complete_json_path):
-            try:
-                with open(complete_json_path, "r", encoding="utf-8") as f:
-                    model_data = json.load(f)
-
-                # Handle different formats
-                models_to_process = []
-                if isinstance(model_data, list):
-                    # Multiple models in a list
-                    models_to_process = model_data
-                elif isinstance(model_data, dict):
-                    # Single model
-                    if "model_name" in model_data:
-                        models_to_process = [model_data]
-                    else:
-                        # Models dict - extract models
-                        for key, value in model_data.items():
-                            if isinstance(value, dict) and "model_name" in value:
-                                models_to_process.append(value)
-
-                # Process all models from the complete JSON
-                for model_data in models_to_process:
-                    try:
-                        # Convert format if needed
-                        from xinference.model.video import convert_video_model_format
-
-                        converted_data = convert_video_model_format(model_data)
-                        model = CustomVideoModelFamilyV2.parse_obj(converted_data)
-                        models.append(model)
-                    except Exception as e:
-                        logger.warning(
-                            f"Failed to parse model {model_data.get('model_name', 'Unknown')}: {e}"
-                        )
-
-                logger.info(
-                    f"Successfully loaded {len(models)} video models from complete JSON"
-                )
-
-            except Exception as e:
-                logger.warning(
-                    f"Error loading complete JSON file {complete_json_path}: {e}"
-                )
-                # Fall back to individual files if complete JSON loading fails
-
-        # Fall back: load individual JSON files (backward compatibility)
-        individual_files = [
-            f
-            for f in os.listdir(self.builtin_dir)
-            if f.endswith(".json") and f != "video_models.json"
-        ]
-        for filename in individual_files:
-            file_path = os.path.join(self.builtin_dir, filename)
-            try:
-                with open(file_path, "r", encoding="utf-8") as f:
-                    model_data = json.load(f)
-
-                # Apply conversion logic to handle null model_id and other issues
-                if model_data.get("model_id") is None and "model_src" in model_data:
-                    model_src = model_data["model_src"]
-                    # Extract model_id from available sources
-                    if (
-                        "huggingface" in model_src
-                        and "model_id" in model_src["huggingface"]
-                    ):
-                        model_data["model_id"] = model_src["huggingface"]["model_id"]
-                    elif (
-                        "modelscope" in model_src
-                        and "model_id" in model_src["modelscope"]
-                    ):
-                        model_data["model_id"] = model_src["modelscope"]["model_id"]
-
-                    # Extract model_revision if available
-                    if model_data.get("model_revision") is None:
-                        if (
-                            "huggingface" in model_src
-                            and "model_revision" in model_src["huggingface"]
-                        ):
-                            model_data["model_revision"] = model_src["huggingface"][
-                                "model_revision"
-                            ]
-                        elif (
-                            "modelscope" in model_src
-                            and "model_revision" in model_src["modelscope"]
-                        ):
-                            model_data["model_revision"] = model_src["modelscope"][
-                                "model_revision"
-                            ]
-
-                # Parse using CustomVideoModelFamilyV2
-                model = CustomVideoModelFamilyV2.parse_obj(model_data)
-                models.append(model)
-                logger.info(f"Loaded built-in video model: {model.model_name}")
-
-            except Exception as e:
-                logger.warning(f"Failed to load built-in model from {filename}: {e}")
-
-        return models
-
-    def register_builtin_model(self, model) -> None:
-        """Register a built-in video model by saving it to the builtin directory."""
-        persist_path = os.path.join(self.builtin_dir, f"{model.model_name}.json")
-
-        try:
-            with open(persist_path, "w", encoding="utf-8") as f:
-                f.write(model.json(exclude_none=True))
-            logger.info(f"Registered built-in video model: {model.model_name}")
-        except Exception as e:
-            logger.error(f"Failed to register built-in model {model.model_name}: {e}")
-            raise
-
-    def unregister_builtin_model(self, model_name: str) -> None:
-        """Unregister a built-in video model by removing its JSON file."""
-        persist_path = os.path.join(self.builtin_dir, f"{model_name}.json")
-
-        if os.path.exists(persist_path):
-            try:
-                os.remove(persist_path)
-                logger.info(f"Unregistered built-in video model: {model_name}")
-            except Exception as e:
-                logger.error(f"Failed to unregister built-in model {model_name}: {e}")
-                raise
-        else:
-            logger.warning(
-                f"Built-in video model {model_name} not found for unregistration"
-            )

From 57285aed9fafa81c4d3f5079c9670eedb497cd1e Mon Sep 17 00:00:00 2001
From: OliverBryant <2713999266@qq.com>
Date: Fri, 31 Oct 2025 18:32:05 +0800
Subject: [PATCH 22/25] add model and update model type to worker

---
 xinference/core/supervisor.py | 353 ++---------------
 xinference/core/worker.py     | 707 ++++++++++++++++++++++++++++++++++
 2 files changed, 741 insertions(+), 319 deletions(-)

diff --git a/xinference/core/supervisor.py b/xinference/core/supervisor.py
index 036e5b9ae8..127ceec2ce 100644
--- a/xinference/core/supervisor.py
+++ b/xinference/core/supervisor.py
@@ -1190,7 +1190,7 @@ async def register_model(
     @log_async(logger=logger)
     async def add_model(self, model_type: str, model_json: Dict[str, Any]):
         """
-        Add a new model by parsing the provided JSON and registering it.
+        Add a new model by forwarding the request to all workers.
 
         Args:
             model_type: Type of model (LLM, embedding, image, etc.)
@@ -1199,204 +1199,30 @@ async def add_model(self, model_type: str, model_json: Dict[str, Any]):
         logger.info(
             f"[DEBUG SUPERVISOR] add_model called with model_type: {model_type}"
         )
-        logger.info(f"[DEBUG SUPERVISOR] model_json type: {type(model_json)}")
-        logger.info(
-            f"[DEBUG SUPERVISOR] model_json keys: {list(model_json.keys()) if isinstance(model_json, dict) else 'Not a dict'}"
-        )
-        if isinstance(model_json, dict):
-            logger.info(f"[DEBUG SUPERVISOR] model_json content: {model_json}")
-
-        # Validate model type (with case normalization)
-        supported_types = list(self._custom_register_type_to_cls.keys())
-        logger.info(f"[DEBUG SUPERVISOR] Supported model types: {supported_types}")
-        logger.info(f"[DEBUG SUPERVISOR] Received model_type: '{model_type}'")
-
-        normalized_model_type = model_type
-
-        if model_type.lower() == "llm" and "LLM" in supported_types:
-            normalized_model_type = "LLM"
-        elif model_type.lower() == "llm" and "llm" in supported_types:
-            normalized_model_type = "llm"
-
-        logger.info(
-            f"[DEBUG SUPERVISOR] Normalized model_type: '{normalized_model_type}'"
-        )
-
-        if normalized_model_type not in self._custom_register_type_to_cls:
-            logger.error(
-                f"[DEBUG SUPERVISOR] Unsupported model type: {normalized_model_type} (original: {model_type})"
-            )
-            raise ValueError(
-                f"Unsupported model type '{model_type}'. "
-                f"Supported types are: {', '.join(supported_types)}"
-            )
-
-        # Use normalized model type for the rest of the function
-        model_type = normalized_model_type
-        logger.info(
-            f"[DEBUG SUPERVISOR] Using model_type: '{model_type}' for registration"
-        )
-
-        # Get the appropriate model class and register function
-        (
-            model_spec_cls,
-            register_fn,
-            unregister_fn,
-            generate_fn,
-        ) = self._custom_register_type_to_cls[model_type]
-        logger.info(f"[DEBUG SUPERVISOR] Model spec class: {model_spec_cls}")
-        logger.info(f"[DEBUG SUPERVISOR] Register function: {register_fn}")
-        logger.info(f"[DEBUG SUPERVISOR] Unregister function: {unregister_fn}")
-        logger.info(f"[DEBUG SUPERVISOR] Generate function: {generate_fn}")
-
-        # Validate required fields (only model_name is required)
-        required_fields = ["model_name"]
-        logger.info(f"[DEBUG SUPERVISOR] Checking required fields: {required_fields}")
-        for field in required_fields:
-            if field not in model_json:
-                logger.error(f"[DEBUG SUPERVISOR] Missing required field: {field}")
-                raise ValueError(f"Missing required field: {field}")
-
-        # Validate model name format
-        from ..model.utils import is_valid_model_name
-
-        model_name = model_json["model_name"]
-        logger.info(f"[DEBUG SUPERVISOR] Extracted model_name: {model_name}")
-
-        if not is_valid_model_name(model_name):
-            logger.error(f"[DEBUG SUPERVISOR] Invalid model name format: {model_name}")
-            raise ValueError(f"Invalid model name format: {model_name}")
-
-        logger.info(f"[DEBUG SUPERVISOR] Model name validation passed")
-
-        # Convert model hub JSON format to Xinference expected format
-        logger.info(f"[DEBUG SUPERVISOR] Converting model JSON format...")
-        try:
-            converted_model_json = self._convert_model_json_format(model_json)
-            logger.info(
-                f"[DEBUG SUPERVISOR] Converted model JSON: {converted_model_json}"
-            )
-        except Exception as e:
-            logger.error(
-                f"[DEBUG SUPERVISOR] Format conversion failed: {str(e)}", exc_info=True
-            )
-            raise ValueError(f"Failed to convert model JSON format: {str(e)}")
+        logger.info(f"[DEBUG SUPERVISOR] Forwarding add_model request to all workers")
 
-        # Parse the JSON into the appropriate model spec
-        logger.info(f"[DEBUG SUPERVISOR] Parsing model spec...")
         try:
-            model_spec = model_spec_cls.parse_obj(converted_model_json)
-            logger.info(f"[DEBUG SUPERVISOR] Parsed model spec: {model_spec}")
-        except Exception as e:
-            logger.error(
-                f"[DEBUG SUPERVISOR] Model spec parsing failed: {str(e)}", exc_info=True
-            )
-            raise ValueError(f"Invalid model JSON format: {str(e)}")
-
-        # Check if model already exists
-        logger.info(f"[DEBUG SUPERVISOR] Checking if model already exists...")
-        try:
-            existing_model = await self.get_model_registration(
-                model_type, model_spec.model_name
-            )
-            logger.info(
-                f"[DEBUG SUPERVISOR] Existing model check result: {existing_model}"
-            )
-
-            if existing_model is not None:
-                logger.error(
-                    f"[DEBUG SUPERVISOR] Model already exists: {model_spec.model_name}"
-                )
-                raise ValueError(
-                    f"Model '{model_spec.model_name}' already exists for type '{model_type}'. "
-                    f"Please choose a different model name or remove the existing model first."
-                )
-
-        except ValueError as e:
-            if "not found" in str(e):
-                # Model doesn't exist, we can proceed
-                logger.info(
-                    f"[DEBUG SUPERVISOR] Model doesn't exist yet, proceeding with registration"
-                )
-                pass
+            # Forward the add_model request to all workers
+            tasks = []
+            for worker_address, worker_ref in self._worker_address_to_worker.items():
+                logger.info(f"[DEBUG SUPERVISOR] Forwarding add_model to worker: {worker_address}")
+                tasks.append(worker_ref.add_model(model_type, model_json))
+
+            # Wait for all workers to complete the operation
+            if tasks:
+                await asyncio.gather(*tasks, return_exceptions=True)
+                logger.info(f"[DEBUG SUPERVISOR] All workers completed add_model operation")
             else:
-                # Re-raise validation errors
-                logger.error(
-                    f"[DEBUG SUPERVISOR] Validation error during model check: {str(e)}"
-                )
-                raise e
-        except Exception as ex:
-            logger.error(
-                f"[DEBUG SUPERVISOR] Unexpected error during model check: {str(ex)}",
-                exc_info=True,
-            )
-            raise ValueError(f"Failed to validate model registration: {str(ex)}")
-
-        logger.info(f"[DEBUG SUPERVISOR] Storing single model as built-in...")
-        try:
-            # Create CacheManager and store as built-in model
-            from ..model.cache_manager import CacheManager
+                logger.warning(f"[DEBUG SUPERVISOR] No workers available to forward add_model request")
 
-            cache_manager = CacheManager(model_spec)
-            cache_manager.register_builtin_model(model_type.lower())
-            logger.info(f"[DEBUG SUPERVISOR] Built-in model stored successfully")
+            logger.info(f"[DEBUG SUPERVISOR] add_model completed successfully")
 
-            # Register in the model registry without persisting to avoid duplicate storage
-            register_fn(model_spec, persist=False)
-            logger.info(
-                f"[DEBUG SUPERVISOR] Model registry registration completed successfully"
-            )
-
-            # Record model version
-            logger.info(f"[DEBUG SUPERVISOR] Generating version info...")
-            version_info = generate_fn(model_spec)
-            logger.info(f"[DEBUG SUPERVISOR] Generated version_info: {version_info}")
-
-            logger.info(
-                f"[DEBUG SUPERVISOR] Recording model version in cache tracker..."
-            )
-            await self._cache_tracker_ref.record_model_version(
-                version_info, self.address
-            )
-            logger.info(f"[DEBUG SUPERVISOR] Cache tracker recording completed")
-
-            # Sync to workers if not local deployment
-            is_local = self.is_local_deployment()
-            logger.info(f"[DEBUG SUPERVISOR] Is local deployment: {is_local}")
-            if not is_local:
-                # Convert back to JSON string for sync compatibility
-                model_json_str = json.dumps(converted_model_json)
-                logger.info(f"[DEBUG SUPERVISOR] Syncing model to workers...")
-                await self._sync_register_model(
-                    model_type, model_json_str, True, model_spec.model_name
-                )
-                logger.info(f"[DEBUG SUPERVISOR] Model sync to workers completed")
-
-            logger.info(
-                f"Successfully added model '{model_spec.model_name}' (type: {model_type})"
-            )
-
-        except ValueError as e:
-            # Validation errors - don't need cleanup as model wasn't registered
-            logger.error(f"[DEBUG SUPERVISOR] ValueError during registration: {str(e)}")
-            raise e
         except Exception as e:
-            # Unexpected errors - attempt cleanup
             logger.error(
-                f"[DEBUG SUPERVISOR] Unexpected error during registration: {str(e)}",
+                f"[DEBUG SUPERVISOR] Error during add_model forwarding: {str(e)}",
                 exc_info=True,
             )
-            try:
-                logger.info(f"[DEBUG SUPERVISOR] Attempting cleanup...")
-                unregister_fn(model_spec.model_name, raise_error=False)
-                logger.info(f"[DEBUG SUPERVISOR] Cleanup completed successfully")
-            except Exception as cleanup_error:
-                logger.warning(f"[DEBUG SUPERVISOR] Cleanup failed: {cleanup_error}")
-            raise ValueError(
-                f"Failed to register model '{model_spec.model_name}': {str(e)}"
-            )
-
-        logger.info(f"[DEBUG SUPERVISOR] add_model completed successfully")
+            raise ValueError(f"Failed to add model: {str(e)}")
 
     def _convert_model_json_format(self, model_json: Dict[str, Any]) -> Dict[str, Any]:
         """
@@ -1622,150 +1448,39 @@ async def _sync_register_model(
     @log_async(logger=logger)
     async def update_model_type(self, model_type: str):
         """
-        Update model configurations for a specific model type by downloading
-        the latest JSON from the remote API and storing it locally.
+        Update model configurations for a specific model type by forwarding
+        the request to all workers.
 
         Args:
             model_type: Type of model (LLM, embedding, image, etc.)
         """
-        import json
-
-        import requests
-
         logger.info(
             f"[DEBUG SUPERVISOR] update_model_type called with model_type: {model_type}"
         )
-
-        supported_types = list(self._custom_register_type_to_cls.keys())
-
-        normalized_for_validation = model_type
-        if model_type.lower() == "llm" and "LLM" in supported_types:
-            normalized_for_validation = "LLM"
-        elif model_type.lower() == "llm" and "llm" in supported_types:
-            normalized_for_validation = "llm"
-
-        if normalized_for_validation not in supported_types:
-            logger.error(
-                f"[DEBUG SUPERVISOR] Unsupported model type: {normalized_for_validation}"
-            )
-            raise ValueError(
-                f"Unsupported model type '{model_type}'. "
-                f"Supported types are: {', '.join(supported_types)}"
-            )
-
-        model_type_for_operations = normalized_for_validation
-        logger.info(
-            f"[DEBUG SUPERVISOR] Using model_type: '{model_type_for_operations}' for operations"
-        )
-
-        # Construct the URL to download JSON
-        url = f"https://model.xinference.io/api/models/download?model_type={model_type.lower()}"
-        logger.info(f"[DEBUG SUPERVISOR] Downloading model configurations from: {url}")
+        logger.info(f"[DEBUG SUPERVISOR] Forwarding update_model_type request to all workers")
 
         try:
-            # Download JSON from remote API
-            response = requests.get(url, timeout=30)
-            response.raise_for_status()
-
-            # Parse JSON response
-            model_data = response.json()
-            logger.info(
-                f"[DEBUG SUPERVISOR] Successfully downloaded JSON for model type: {model_type}"
-            )
-            logger.info(f"[DEBUG SUPERVISOR] JSON data type: {type(model_data)}")
-
-            if isinstance(model_data, dict):
-                logger.info(
-                    f"[DEBUG SUPERVISOR] JSON data keys: {list(model_data.keys())}"
-                )
-            elif isinstance(model_data, list):
-                logger.info(
-                    f"[DEBUG SUPERVISOR] JSON data contains {len(model_data)} items"
-                )
-                if model_data:
-                    logger.info(
-                        f"[DEBUG SUPERVISOR] First item keys: {list(model_data[0].keys()) if isinstance(model_data[0], dict) else 'Not a dict'}"
-                    )
-
-            # Store the JSON data using CacheManager as built-in models
-            logger.info(
-                f"[DEBUG SUPERVISOR] Storing model configurations as built-in models..."
-            )
-            await self._store_model_configurations(model_type, model_data)
-            logger.info(
-                f"[DEBUG SUPERVISOR] Built-in model configurations stored successfully"
-            )
-
-            # Dynamically reload built-in models to make them immediately available
-            logger.info(
-                f"[DEBUG SUPERVISOR] Reloading built-in models for immediate availability..."
-            )
-            try:
-                if model_type.lower() == "llm":
-                    from ..model.llm import register_builtin_model
-
-                    register_builtin_model()
-                    logger.info(f"[DEBUG SUPERVISOR] LLM models reloaded successfully")
-                elif model_type.lower() == "embedding":
-                    from ..model.embedding import register_builtin_model
-
-                    register_builtin_model()
-                    logger.info(
-                        f"[DEBUG SUPERVISOR] Embedding models reloaded successfully"
-                    )
-                elif model_type.lower() == "audio":
-                    from ..model.audio import register_builtin_model
-
-                    register_builtin_model()
-                    logger.info(
-                        f"[DEBUG SUPERVISOR] Audio models reloaded successfully"
-                    )
-                elif model_type.lower() == "image":
-                    from ..model.image import register_builtin_model
-
-                    register_builtin_model()
-                    logger.info(
-                        f"[DEBUG SUPERVISOR] Image models reloaded successfully"
-                    )
-                elif model_type.lower() == "rerank":
-                    from ..model.rerank import register_builtin_model
-
-                    register_builtin_model()
-                    logger.info(
-                        f"[DEBUG SUPERVISOR] Rerank models reloaded successfully"
-                    )
-                elif model_type.lower() == "video":
-                    from ..model.video import register_builtin_model
+            # Forward the update_model_type request to all workers
+            tasks = []
+            for worker_address, worker_ref in self._worker_address_to_worker.items():
+                logger.info(f"[DEBUG SUPERVISOR] Forwarding update_model_type to worker: {worker_address}")
+                tasks.append(worker_ref.update_model_type(model_type))
+
+            # Wait for all workers to complete the operation
+            if tasks:
+                await asyncio.gather(*tasks, return_exceptions=True)
+                logger.info(f"[DEBUG SUPERVISOR] All workers completed update_model_type operation")
+            else:
+                logger.warning(f"[DEBUG SUPERVISOR] No workers available to forward update_model_type request")
 
-                    register_builtin_model()
-                    logger.info(
-                        f"[DEBUG SUPERVISOR] Video models reloaded successfully"
-                    )
-                else:
-                    logger.warning(
-                        f"[DEBUG SUPERVISOR] No dynamic loading available for model type: {model_type}"
-                    )
-            except Exception as reload_error:
-                logger.error(
-                    f"[DEBUG SUPERVISOR] Error reloading built-in models: {reload_error}",
-                    exc_info=True,
-                )
-                # Don't fail the update if reload fails, just log the error
+            logger.info(f"[DEBUG SUPERVISOR] update_model_type completed successfully")
 
-        except requests.exceptions.RequestException as e:
-            logger.error(
-                f"[DEBUG SUPERVISOR] Network error downloading model configurations: {e}"
-            )
-            raise ValueError(f"Failed to download model configurations: {str(e)}")
-        except json.JSONDecodeError as e:
-            logger.error(f"[DEBUG SUPERVISOR] JSON decode error: {e}")
-            raise ValueError(f"Invalid JSON response from remote API: {str(e)}")
         except Exception as e:
             logger.error(
-                f"[DEBUG SUPERVISOR] Unexpected error during model update: {e}",
+                f"[DEBUG SUPERVISOR] Error during update_model_type forwarding: {str(e)}",
                 exc_info=True,
             )
-            raise ValueError(f"Failed to update model configurations: {str(e)}")
+            raise ValueError(f"Failed to update model type: {str(e)}")
 
     async def _store_model_configurations(self, model_type: str, model_data):
         """
diff --git a/xinference/core/worker.py b/xinference/core/worker.py
index 3a211b19e3..613bcc049b 100644
--- a/xinference/core/worker.py
+++ b/xinference/core/worker.py
@@ -272,6 +272,12 @@ async def __post_create__(self):
             register_rerank,
             unregister_rerank,
         )
+        from ..model.video import (
+            CustomVideoModelFamilyV2,
+            generate_video_description,
+            register_video,
+            unregister_video,
+        )
 
         self._custom_register_type_to_cls: Dict[str, Tuple] = {  # type: ignore
             "LLM": (
@@ -310,6 +316,12 @@ async def __post_create__(self):
                 unregister_flexible_model,
                 generate_flexible_model_description,
             ),
+            "video": (
+                CustomVideoModelFamilyV2,
+                register_video,
+                unregister_video,
+                generate_video_description,
+            ),
         }
 
         logger.info("Purge cache directory: %s", XINFERENCE_CACHE_DIR)
@@ -652,6 +664,701 @@ async def unregister_model(self, model_type: str, model_name: str):
         else:
             raise ValueError(f"Unsupported model type: {model_type}")
 
+    @log_async(logger=logger)
+    async def add_model(self, model_type: str, model_json: Dict[str, Any]):
+        """
+        Add a new model by parsing the provided JSON and registering it.
+
+        Args:
+            model_type: Type of model (LLM, embedding, image, etc.)
+            model_json: JSON configuration for the model
+        """
+        logger.info(
+            f"[DEBUG WORKER] add_model called with model_type: {model_type}"
+        )
+        logger.info(f"[DEBUG WORKER] model_json type: {type(model_json)}")
+        logger.info(
+            f"[DEBUG WORKER] model_json keys: {list(model_json.keys()) if isinstance(model_json, dict) else 'Not a dict'}"
+        )
+        if isinstance(model_json, dict):
+            logger.info(f"[DEBUG WORKER] model_json content: {model_json}")
+
+        # Validate model type (with case normalization)
+        supported_types = list(self._custom_register_type_to_cls.keys())
+        logger.info(f"[DEBUG WORKER] Supported model types: {supported_types}")
+        logger.info(f"[DEBUG WORKER] Received model_type: '{model_type}'")
+
+        normalized_model_type = model_type
+
+        if model_type.lower() == "llm" and "LLM" in supported_types:
+            normalized_model_type = "LLM"
+        elif model_type.lower() == "llm" and "llm" in supported_types:
+            normalized_model_type = "llm"
+
+        logger.info(
+            f"[DEBUG WORKER] Normalized model_type: '{normalized_model_type}'"
+        )
+
+        if normalized_model_type not in self._custom_register_type_to_cls:
+            logger.error(
+                f"[DEBUG WORKER] Unsupported model type: {normalized_model_type} (original: {model_type})"
+            )
+            raise ValueError(
+                f"Unsupported model type '{model_type}'. "
+                f"Supported types are: {', '.join(supported_types)}"
+            )
+
+        # Use normalized model type for the rest of the function
+        model_type = normalized_model_type
+        logger.info(
+            f"[DEBUG WORKER] Using model_type: '{model_type}' for registration"
+        )
+
+        # Get the appropriate model class and register function
+        (
+            model_spec_cls,
+            register_fn,
+            unregister_fn,
+            generate_fn,
+        ) = self._custom_register_type_to_cls[model_type]
+        logger.info(f"[DEBUG WORKER] Model spec class: {model_spec_cls}")
+        logger.info(f"[DEBUG WORKER] Register function: {register_fn}")
+        logger.info(f"[DEBUG WORKER] Unregister function: {unregister_fn}")
+        logger.info(f"[DEBUG WORKER] Generate function: {generate_fn}")
+
+        # Validate required fields (only model_name is required)
+        required_fields = ["model_name"]
+        logger.info(f"[DEBUG WORKER] Checking required fields: {required_fields}")
+        for field in required_fields:
+            if field not in model_json:
+                logger.error(f"[DEBUG WORKER] Missing required field: {field}")
+                raise ValueError(f"Missing required field: {field}")
+
+        # Validate model name format
+        from ..model.utils import is_valid_model_name
+
+        model_name = model_json["model_name"]
+        logger.info(f"[DEBUG WORKER] Extracted model_name: {model_name}")
+
+        if not is_valid_model_name(model_name):
+            logger.error(f"[DEBUG WORKER] Invalid model name format: {model_name}")
+            raise ValueError(f"Invalid model name format: {model_name}")
+
+        logger.info(f"[DEBUG WORKER] Model name validation passed")
+
+        # Convert model hub JSON format to Xinference expected format
+        logger.info(f"[DEBUG WORKER] Converting model JSON format...")
+        try:
+            converted_model_json = self._convert_model_json_format(model_json)
+            logger.info(
+                f"[DEBUG WORKER] Converted model JSON: {converted_model_json}"
+            )
+        except Exception as e:
+            logger.error(
+                f"[DEBUG WORKER] Format conversion failed: {str(e)}", exc_info=True
+            )
+            raise ValueError(f"Failed to convert model JSON format: {str(e)}")
+
+        # Parse the JSON into the appropriate model spec
+        logger.info(f"[DEBUG WORKER] Parsing model spec...")
+        try:
+            model_spec = model_spec_cls.parse_obj(converted_model_json)
+            logger.info(f"[DEBUG WORKER] Parsed model spec: {model_spec}")
+        except Exception as e:
+            logger.error(
+                f"[DEBUG WORKER] Model spec parsing failed: {str(e)}", exc_info=True
+            )
+            raise ValueError(f"Invalid model JSON format: {str(e)}")
+
+        # Check if model already exists
+        logger.info(f"[DEBUG WORKER] Checking if model already exists...")
+        try:
+            existing_models = await self.list_model_registrations(model_type)
+            existing_model = None
+            for model in existing_models:
+                if model["model_name"] == model_spec.model_name:
+                    existing_model = model
+                    break
+
+            logger.info(
+                f"[DEBUG WORKER] Existing model check result: {existing_model}"
+            )
+
+            if existing_model is not None:
+                logger.error(
+                    f"[DEBUG WORKER] Model already exists: {model_spec.model_name}"
+                )
+                raise ValueError(
+                    f"Model '{model_spec.model_name}' already exists for type '{model_type}'. "
+                    f"Please choose a different model name or remove the existing model first."
+                )
+
+        except ValueError as e:
+            if "not found" in str(e):
+                # Model doesn't exist, we can proceed
+                logger.info(
+                    f"[DEBUG WORKER] Model doesn't exist yet, proceeding with registration"
+                )
+                pass
+            else:
+                # Re-raise validation errors
+                logger.error(
+                    f"[DEBUG WORKER] Validation error during model check: {str(e)}"
+                )
+                raise e
+        except Exception as ex:
+            logger.error(
+                f"[DEBUG WORKER] Unexpected error during model check: {str(ex)}",
+                exc_info=True,
+            )
+            raise ValueError(f"Failed to validate model registration: {str(ex)}")
+
+        logger.info(f"[DEBUG WORKER] Storing model as built-in...")
+        try:
+            # Store model using the same logic as update_model_type for consistency
+            import json
+            from ..constants import XINFERENCE_MODEL_DIR
+
+            model_type_lower = model_type.lower()
+            builtin_dir = os.path.join(XINFERENCE_MODEL_DIR, "v2", "builtin", model_type_lower)
+
+            # Ensure directory exists
+            os.makedirs(builtin_dir, exist_ok=True)
+            logger.info(f"[DEBUG WORKER] Builtin directory: {builtin_dir}")
+
+            # Use correct storage: save each model as a separate JSON file
+            # This follows the CacheManager.register_builtin_model pattern
+            model_dict = model_spec.dict()
+            logger.info(f"[DEBUG WORKER] Model dict: {model_dict}")
+
+            # Create individual model file path
+            model_file_path = os.path.join(builtin_dir, f"{model_spec.model_name}.json")
+
+            # Check if model already exists
+            if os.path.exists(model_file_path):
+                logger.warning(f"[DEBUG WORKER] Model {model_spec.model_name} already exists at {model_file_path}")
+                # Continue with registration even if it exists
+            else:
+                logger.info(f"[DEBUG WORKER] Creating new model file: {model_file_path}")
+
+            # Save the model as a separate JSON file
+            with open(model_file_path, 'w', encoding='utf-8') as f:
+                json.dump(model_dict, f, indent=2, ensure_ascii=False)
+
+            logger.info(f"[DEBUG WORKER] Successfully saved model to {model_file_path}")
+            logger.info(f"[DEBUG WORKER] File exists after save: {os.path.exists(model_file_path)}")
+
+            # Register in the model registry without persisting to avoid duplicate storage
+            register_fn(model_spec, persist=False)
+            logger.info(
+                f"[DEBUG WORKER] Model registry registration completed successfully"
+            )
+
+            # Record model version
+            logger.info(f"[DEBUG WORKER] Generating version info...")
+            version_info = generate_fn(model_spec)
+            logger.info(f"[DEBUG WORKER] Generated version_info: {version_info}")
+
+            logger.info(
+                f"[DEBUG WORKER] Recording model version in cache tracker..."
+            )
+            await self._cache_tracker_ref.record_model_version(
+                version_info, self.address
+            )
+            logger.info(f"[DEBUG WORKER] Cache tracker recording completed")
+
+            logger.info(
+                f"Successfully added model '{model_spec.model_name}' (type: {model_type})"
+            )
+
+        except ValueError as e:
+            # Validation errors - don't need cleanup as model wasn't registered
+            logger.error(f"[DEBUG WORKER] ValueError during registration: {str(e)}")
+            raise e
+        except Exception as e:
+            # Unexpected errors - attempt cleanup
+            logger.error(
+                f"[DEBUG WORKER] Unexpected error during registration: {str(e)}",
+                exc_info=True,
+            )
+            try:
+                logger.info(f"[DEBUG WORKER] Attempting cleanup...")
+                unregister_fn(model_spec.model_name, raise_error=False)
+                logger.info(f"[DEBUG WORKER] Cleanup completed successfully")
+            except Exception as cleanup_error:
+                logger.warning(f"[DEBUG WORKER] Cleanup failed: {cleanup_error}")
+            raise ValueError(
+                f"Failed to register model '{model_spec.model_name}': {str(e)}"
+            )
+
+        logger.info(f"[DEBUG WORKER] add_model completed successfully")
+
+    def _convert_model_json_format(self, model_json: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Convert model hub JSON format to Xinference expected format.
+
+        The input format uses nested 'model_src' structure, but Xinference expects
+        flattened fields at the spec level.
+
+        For LLM/embedding/rerank models: uses model_specs structure
+        For image/audio/video models: uses flat structure with direct fields
+        """
+        logger.info(f"[DEBUG WORKER] _convert_model_json_format called")
+        logger.info(f"[DEBUG WORKER] Input model_json: {model_json}")
+
+        # Determine if this is an image/audio/video model (flat structure) or LLM/embedding/rerank (model_specs structure)
+        flat_model_types = ["image", "audio", "video"]
+        model_type = None
+
+        # Try to determine model type from context or model_ability
+        if "model_ability" in model_json:
+            abilities = model_json["model_ability"]
+            if isinstance(abilities, list):
+                if "text2img" in abilities or "image2image" in abilities or "ocr" in abilities:
+                    model_type = "image"
+                elif "auto-speech" in abilities or "text-to-speech" in abilities:
+                    model_type = "audio"
+                elif "text-to-video" in abilities:
+                    model_type = "video"
+
+        logger.info(f"[DEBUG WORKER] Determined model type: {model_type}")
+
+        if model_type in flat_model_types:
+            # Handle image/audio/video models with flat structure
+            logger.info(f"[DEBUG WORKER] Processing {model_type} model with flat structure")
+
+            if "model_src" in model_json:
+                model_src = model_json["model_src"]
+
+                # Extract fields from model_src to top level
+                if "huggingface" in model_src:
+                    hf_data = model_src["huggingface"]
+
+                    if "model_id" in hf_data and model_json.get("model_id") is None:
+                        model_json["model_id"] = hf_data["model_id"]
+                        logger.info(f"[DEBUG WORKER] Extracted model_id: {model_json['model_id']}")
+
+                    if "model_revision" in hf_data and model_json.get("model_revision") is None:
+                        model_json["model_revision"] = hf_data["model_revision"]
+                        logger.info(f"[DEBUG WORKER] Extracted model_revision: {model_json['model_revision']}")
+
+                # Remove model_src field as it's not needed in the final format
+                del model_json["model_src"]
+                logger.info(f"[DEBUG WORKER] Removed model_src field")
+
+            # Set required defaults for image models
+            if model_json.get("model_hub") is None:
+                model_json["model_hub"] = "huggingface"
+                logger.info(f"[DEBUG WORKER] Added default model_hub: huggingface")
+
+            # Add null fields for completeness based on builtin image model structure
+            null_fields = [
+                "cache_config", "controlnet", "gguf_model_id", "gguf_quantizations",
+                "gguf_model_file_name_template", "lightning_model_id", "lightning_versions",
+                "lightning_model_file_name_template", "model_uri"
+            ]
+            for field in null_fields:
+                if field not in model_json:
+                    model_json[field] = None
+
+            # Add empty dict fields if missing
+            dict_fields = ["default_model_config", "default_generate_config"]
+            for field in dict_fields:
+                if field not in model_json:
+                    model_json[field] = {}
+
+        else:
+            # Handle LLM/embedding/rerank models with model_specs structure
+            logger.info(f"[DEBUG WORKER] Processing model with model_specs structure")
+
+            # Handle model_specs - if multiple formats are provided, select the first one
+            if "model_specs" in model_json and isinstance(model_json["model_specs"], list):
+                if len(model_json["model_specs"]) > 1:
+                    logger.info(
+                        f"[DEBUG WORKER] Multiple model specs found ({len(model_json['model_specs'])}), "
+                        f"selecting the first one for validation"
+                    )
+                    # For add_model, we'll use the first spec as the primary one
+                    # The other specs will be ignored for this registration
+                    model_json["model_specs"] = [model_json["model_specs"][0]]
+                    logger.info(f"[DEBUG WORKER] Selected model spec: {model_json['model_specs'][0]}")
+
+                # Fix missing quantization field for pytorch/mlx specs
+                spec = model_json["model_specs"][0]
+                if "quantization" not in spec:
+                    model_format = spec.get("model_format", "")
+                    if model_format in ["pytorch", "gptq", "awq", "fp8", "bnb"]:
+                        # Extract quantization from model_src if available
+                        if "model_src" in spec and "huggingface" in spec["model_src"]:
+                            quantizations = spec["model_src"]["huggingface"].get("quantizations", [])
+                            if quantizations:
+                                spec["quantization"] = quantizations[0]  # Use first quantization
+                                logger.info(f"[DEBUG WORKER] Added quantization: {spec['quantization']}")
+                            else:
+                                spec["quantization"] = "none"  # Default quantization
+                                logger.info(f"[DEBUG WORKER] Added default quantization: none")
+                        else:
+                            spec["quantization"] = "none"  # Default quantization
+                            logger.info(f"[DEBUG WORKER] Added default quantization: none")
+                    elif model_format == "mlx":
+                        # Extract quantization from model_src if available
+                        if "model_src" in spec and "huggingface" in spec["model_src"]:
+                            quantizations = spec["model_src"]["huggingface"].get("quantizations", [])
+                            if quantizations:
+                                spec["quantization"] = quantizations[0]  # Use first quantization
+                                logger.info(f"[DEBUG WORKER] Added MLX quantization: {spec['quantization']}")
+                            else:
+                                spec["quantization"] = "4bit"  # Default for MLX
+                                logger.info(f"[DEBUG WORKER] Added default MLX quantization: 4bit")
+                        else:
+                            spec["quantization"] = "4bit"  # Default for MLX
+                            logger.info(f"[DEBUG WORKER] Added default MLX quantization: 4bit")
+                    elif model_format == "ggufv2":
+                        # GGUF models need to extract quantization from filename template
+                        if "model_file_name_template" in spec:
+                            template = spec["model_file_name_template"]
+                            if "{quantization}" in template:
+                                # This is handled by the GGUF spec, just log
+                                logger.info(f"[DEBUG WORKER] GGUF model has quantization template")
+                            else:
+                                # Try to extract from model_id or set default
+                                spec["quantization"] = "Q4_K_M"  # Common GGUF quantization
+                                logger.info(f"[DEBUG WORKER] Added default GGUF quantization: Q4_K_M")
+                        else:
+                            spec["quantization"] = "Q4_K_M"  # Default for GGUF
+                            logger.info(f"[DEBUG WORKER] Added default GGUF quantization: Q4_K_M")
+
+                # Add missing required fields for LLM-style specs
+                if "model_hub" not in spec:
+                    spec["model_hub"] = "huggingface"
+                    logger.info(f"[DEBUG WORKER] Added model_hub: huggingface")
+
+                if "model_id" not in spec:
+                    if "model_src" in spec and "huggingface" in spec["model_src"]:
+                        spec["model_id"] = spec["model_src"]["huggingface"]["model_id"]
+                        logger.info(f"[DEBUG WORKER] Added model_id: {spec['model_id']}")
+
+                if "model_revision" not in spec:
+                    if "model_src" in spec and "huggingface" in spec["model_src"]:
+                        spec["model_revision"] = spec["model_src"]["huggingface"]["model_revision"]
+                        logger.info(f"[DEBUG WORKER] Added model_revision: {spec['model_revision']}")
+
+                # Remove model_src from spec as it's not needed in the final format
+                if "model_src" in spec:
+                    del spec["model_src"]
+                    logger.info(f"[DEBUG WORKER] Removed model_src field from spec")
+
+                logger.info(f"[DEBUG WORKER] Fixed model spec: {spec}")
+
+        # Handle legacy top-level model_src for backward compatibility
+        if model_json.get("model_id") is None and "model_src" in model_json:
+            logger.info(
+                f"[DEBUG WORKER] model_id is null, attempting to extract from model_src"
+            )
+            model_src = model_json["model_src"]
+
+            if "huggingface" in model_src and "model_id" in model_src["huggingface"]:
+                model_json["model_id"] = model_src["huggingface"]["model_id"]
+                logger.info(
+                    f"[DEBUG WORKER] Extracted model_id from huggingface: {model_json['model_id']}"
+                )
+            elif "modelscope" in model_src and "model_id" in model_src["modelscope"]:
+                model_json["model_id"] = model_src["modelscope"]["model_id"]
+                logger.info(
+                    f"[DEBUG WORKER] Extracted model_id from modelscope: {model_json['model_id']}"
+                )
+
+            if model_json.get("model_revision") is None:
+                if (
+                    "huggingface" in model_src
+                    and "model_revision" in model_src["huggingface"]
+                ):
+                    model_json["model_revision"] = model_src["huggingface"][
+                        "model_revision"
+                    ]
+                    logger.info(
+                        f"[DEBUG WORKER] Extracted model_revision from huggingface: {model_json['model_revision']}"
+                    )
+                elif (
+                    "modelscope" in model_src
+                    and "model_revision" in model_src["modelscope"]
+                ):
+                    model_json["model_revision"] = model_src["modelscope"][
+                        "model_revision"
+                    ]
+                    logger.info(
+                        f"[DEBUG WORKER] Extracted model_revision from modelscope: {model_json['model_revision']}"
+                    )
+
+            # Remove top-level model_src field as it's not needed in the final format
+            del model_json["model_src"]
+            logger.info(f"[DEBUG WORKER] Removed top-level model_src field")
+
+        logger.info(f"[DEBUG WORKER] Final model_json: {model_json}")
+        return model_json
+
+    @log_async(logger=logger)
+    async def update_model_type(self, model_type: str):
+        """
+        Update model configurations for a specific model type by downloading
+        the latest JSON from the remote API and storing it locally.
+
+        Args:
+            model_type: Type of model (LLM, embedding, image, etc.)
+        """
+        import json
+        import requests
+
+        logger.info(
+            f"[DEBUG WORKER] update_model_type called with model_type: {model_type}"
+        )
+
+        supported_types = list(self._custom_register_type_to_cls.keys())
+
+        normalized_for_validation = model_type
+        if model_type.lower() == "llm" and "LLM" in supported_types:
+            normalized_for_validation = "LLM"
+        elif model_type.lower() == "llm" and "llm" in supported_types:
+            normalized_for_validation = "llm"
+
+        if normalized_for_validation not in supported_types:
+            logger.error(
+                f"[DEBUG WORKER] Unsupported model type: {normalized_for_validation}"
+            )
+            raise ValueError(
+                f"Unsupported model type '{model_type}'. "
+                f"Supported types are: {', '.join(supported_types)}"
+            )
+
+        model_type_for_operations = normalized_for_validation
+        logger.info(
+            f"[DEBUG WORKER] Using model_type: '{model_type_for_operations}' for operations"
+        )
+
+        # Construct the URL to download JSON
+        url = f"https://model.xinference.io/api/models/download?model_type={model_type.lower()}"
+        logger.info(f"[DEBUG WORKER] Downloading model configurations from: {url}")
+
+        try:
+            # Download JSON from remote API
+            response = requests.get(url, timeout=30)
+            response.raise_for_status()
+
+            # Parse JSON response
+            model_data = response.json()
+            logger.info(
+                f"[DEBUG WORKER] Successfully downloaded JSON for model type: {model_type}"
+            )
+            logger.info(f"[DEBUG WORKER] JSON data type: {type(model_data)}")
+
+            if isinstance(model_data, dict):
+                logger.info(
+                    f"[DEBUG WORKER] JSON data keys: {list(model_data.keys())}"
+                )
+            elif isinstance(model_data, list):
+                logger.info(
+                    f"[DEBUG WORKER] JSON data contains {len(model_data)} items"
+                )
+                if model_data:
+                    logger.info(
+                        f"[DEBUG WORKER] First item keys: {list(model_data[0].keys()) if isinstance(model_data[0], dict) else 'Not a dict'}"
+                    )
+
+            # Store the JSON data using CacheManager as built-in models
+            logger.info(
+                f"[DEBUG WORKER] Storing model configurations as built-in models..."
+            )
+            await self._store_complete_model_configurations(model_type, model_data)
+            logger.info(
+                f"[DEBUG WORKER] Built-in model configurations stored successfully"
+            )
+
+            # Dynamically reload built-in models to make them immediately available
+            logger.info(
+                f"[DEBUG WORKER] Reloading built-in models for immediate availability..."
+            )
+            try:
+                if model_type.lower() == "llm":
+                    from ..model.llm import register_builtin_model
+
+                    register_builtin_model()
+                    logger.info(f"[DEBUG WORKER] LLM models reloaded successfully")
+                elif model_type.lower() == "embedding":
+                    from ..model.embedding import register_builtin_model
+
+                    register_builtin_model()
+                    logger.info(
+                        f"[DEBUG WORKER] Embedding models reloaded successfully"
+                    )
+                elif model_type.lower() == "audio":
+                    from ..model.audio import register_builtin_model
+
+                    register_builtin_model()
+                    logger.info(
+                        f"[DEBUG WORKER] Audio models reloaded successfully"
+                    )
+                elif model_type.lower() == "image":
+                    from ..model.image import register_builtin_model
+
+                    register_builtin_model()
+                    logger.info(
+                        f"[DEBUG WORKER] Image models reloaded successfully"
+                    )
+                elif model_type.lower() == "rerank":
+                    from ..model.rerank import register_builtin_model
+
+                    register_builtin_model()
+                    logger.info(
+                        f"[DEBUG WORKER] Rerank models reloaded successfully"
+                    )
+                elif model_type.lower() == "video":
+                    from ..model.video import register_builtin_model
+
+                    register_builtin_model()
+                    logger.info(
+                        f"[DEBUG WORKER] Video models reloaded successfully"
+                    )
+                else:
+                    logger.warning(
+                        f"[DEBUG WORKER] No dynamic loading available for model type: {model_type}"
+                    )
+            except Exception as reload_error:
+                logger.error(
+                    f"[DEBUG WORKER] Error reloading built-in models: {reload_error}",
+                    exc_info=True,
+                )
+                # Don't fail the update if reload fails, just log the error
+
+        except requests.exceptions.RequestException as e:
+            logger.error(
+                f"[DEBUG WORKER] Network error downloading model configurations: {e}"
+            )
+            raise ValueError(f"Failed to download model configurations: {str(e)}")
+        except json.JSONDecodeError as e:
+            logger.error(f"[DEBUG WORKER] JSON decode error: {e}")
+            raise ValueError(f"Invalid JSON response from remote API: {str(e)}")
+        except Exception as e:
+            logger.error(
+                f"[DEBUG WORKER] Unexpected error during model update: {e}",
+                exc_info=True,
+            )
+            raise ValueError(f"Failed to update model configurations: {str(e)}")
+
+    async def _store_model_configurations(self, model_type: str, model_data):
+        """
+        Store model configurations as separate JSON files (one per model).
+        This follows the same pattern as CacheManager.register_builtin_model.
+
+        Args:
+            model_type: Type of model (as provided by user, e.g., "llm")
+            model_data: JSON data containing model configurations (can be single dict or list)
+        """
+        logger.info(
+            f"[DEBUG WORKER] Storing configurations for model type: {model_type}"
+        )
+
+        import json
+        from ..constants import XINFERENCE_MODEL_DIR
+
+        try:
+            # Ensure model_data is a list for consistent processing
+            if isinstance(model_data, dict):
+                models_to_store = [model_data]
+            elif isinstance(model_data, list):
+                models_to_store = model_data
+            else:
+                raise ValueError(f"Invalid model_data type: {type(model_data)}")
+
+            model_type_lower = model_type.lower()
+            builtin_dir = os.path.join(XINFERENCE_MODEL_DIR, "v2", "builtin", model_type_lower)
+
+            logger.info(f"[DEBUG WORKER] Using builtin dir: {builtin_dir}")
+            logger.info(f"[DEBUG WORKER] Storing {len(models_to_store)} model(s) as separate files")
+
+            # Ensure directory exists
+            os.makedirs(builtin_dir, exist_ok=True)
+            logger.info(f"[DEBUG WORKER] Directory created/verified: {builtin_dir}")
+
+            # Store each model as a separate JSON file
+            for model_dict in models_to_store:
+                if not isinstance(model_dict, dict):
+                    logger.warning(f"[DEBUG WORKER] Skipping invalid model data: {model_dict}")
+                    continue
+
+                model_name = model_dict.get("model_name")
+                if not model_name:
+                    logger.warning(f"[DEBUG WORKER] Skipping model without model_name: {model_dict}")
+                    continue
+
+                # Create file path using model name (same as CacheManager pattern)
+                json_file_path = os.path.join(builtin_dir, f"{model_name}.json")
+
+                logger.info(f"[DEBUG WORKER] Storing model '{model_name}' at: {json_file_path}")
+
+                # Store the model as a separate JSON file
+                with open(json_file_path, 'w', encoding='utf-8') as f:
+                    json.dump(model_dict, f, indent=2, ensure_ascii=False)
+
+                logger.info(f"[DEBUG WORKER] Successfully stored model '{model_name}'")
+                logger.info(f"[DEBUG WORKER] File exists after writing: {os.path.exists(json_file_path)}")
+
+            logger.info(
+                f"[DEBUG WORKER] Successfully stored {len(models_to_store)} model configuration(s) in {builtin_dir}"
+            )
+
+        except Exception as e:
+            logger.error(
+                f"[DEBUG WORKER] Error storing model configurations: {str(e)}",
+                exc_info=True,
+            )
+            raise ValueError(f"Failed to store model configurations: {str(e)}")
+
+    async def _store_complete_model_configurations(self, model_type: str, model_data):
+        """
+        Store complete model configurations as a unified JSON file.
+        This is used by update_model_type to preserve the original JSON structure.
+
+        Args:
+            model_type: Type of model (as provided by user, e.g., "llm")
+            model_data: JSON data containing model configurations (complete array)
+        """
+        logger.info(
+            f"[DEBUG WORKER] Storing complete configurations for model type: {model_type}"
+        )
+
+        import json
+        from ..constants import XINFERENCE_MODEL_DIR
+
+        try:
+            model_type_lower = model_type.lower()
+
+            # Use the unified JSON file path (same as original update_model_type logic)
+            builtin_dir = os.path.join(XINFERENCE_MODEL_DIR, "v2", "builtin", model_type_lower)
+            json_file_path = os.path.join(builtin_dir, f"{model_type_lower}_models.json")
+
+            logger.info(f"[DEBUG WORKER] Using builtin dir: {builtin_dir}")
+            logger.info(f"[DEBUG WORKER] Storing complete JSON file at: {json_file_path}")
+
+            # Ensure directory exists
+            os.makedirs(builtin_dir, exist_ok=True)
+            logger.info(f"[DEBUG WORKER] Directory created/verified: {builtin_dir}")
+
+            # Store the complete JSON file (preserving original structure)
+            with open(json_file_path, 'w', encoding='utf-8') as f:
+                json.dump(model_data, f, indent=2, ensure_ascii=False)
+
+            logger.info(
+                f"[DEBUG WORKER] Successfully stored complete model configurations in {json_file_path}"
+            )
+            logger.info(f"[DEBUG WORKER] File exists after writing: {os.path.exists(json_file_path)}")
+
+        except Exception as e:
+            logger.error(
+                f"[DEBUG WORKER] Error storing complete model configurations: {str(e)}",
+                exc_info=True,
+            )
+            raise ValueError(f"Failed to store complete model configurations: {str(e)}")
+
     @log_async(logger=logger)
     async def list_model_registrations(
         self, model_type: str, detailed: bool = False

From 37d0b772641572cc45b30218df341a7d696525ce Mon Sep 17 00:00:00 2001
From: OliverBryant <2713999266@qq.com>
Date: Fri, 31 Oct 2025 19:04:09 +0800
Subject: [PATCH 23/25] add model and update model type to worker

---
 xinference/api/restful_api.py |  75 +-------
 xinference/core/supervisor.py | 274 +---------------------------
 xinference/core/worker.py     | 331 +++++++++-------------------------
 3 files changed, 99 insertions(+), 581 deletions(-)

diff --git a/xinference/api/restful_api.py b/xinference/api/restful_api.py
index 05ce33df61..61bd97b6e5 100644
--- a/xinference/api/restful_api.py
+++ b/xinference/api/restful_api.py
@@ -3154,19 +3154,14 @@ async def unregister_model(self, model_type: str, model_name: str) -> JSONRespon
 
     async def add_model(self, request: Request) -> JSONResponse:
         try:
-            # Debug: Log incoming request
-            logger.info(f"[DEBUG] Add model API called")
-            logger.info(f"[DEBUG] Request headers: {dict(request.headers)}")
 
             # Parse request
             raw_json = await request.json()
-            logger.info(f"[DEBUG] Raw request JSON: {raw_json}")
 
             if "model_type" in raw_json and "model_json" in raw_json:
                 body = AddModelRequest.parse_obj(raw_json)
                 model_type = body.model_type
                 model_json = body.model_json
-                logger.info(f"[DEBUG] Using wrapped format, model_type: {model_type}")
             else:
                 model_json = raw_json
 
@@ -3186,42 +3181,23 @@ async def add_model(self, request: Request) -> JSONResponse:
                         detail="model_type is required in the model JSON. Supported types: LLM, embedding, audio, image, video, rerank",
                     )
 
-            logger.info(f"[DEBUG] Parsed model_type: {model_type}")
-            logger.info(
-                f"[DEBUG] Parsed model_json keys: {list(model_json.keys()) if isinstance(model_json, dict) else 'Not a dict'}"
-            )
-            if isinstance(model_json, dict):
-                logger.info(f"[DEBUG] Model JSON content: {model_json}")
-
-            # Debug: Check supervisor reference
-            logger.info(f"[DEBUG] Getting supervisor reference...")
             supervisor_ref = await self._get_supervisor_ref()
-            logger.info(f"[DEBUG] Supervisor reference obtained: {supervisor_ref}")
 
             # Call supervisor
-            logger.info(
-                f"[DEBUG] Calling supervisor.add_model with model_type: {model_type}"
-            )
             await supervisor_ref.add_model(model_type, model_json)
-            logger.info(f"[DEBUG] Supervisor.add_model completed successfully")
 
         except ValueError as re:
-            logger.error(f"[DEBUG] ValueError in add_model API: {re}", exc_info=True)
-            logger.error(f"[DEBUG] ValueError details: {type(re).__name__}: {re}")
+            logger.error(f"ValueError in add_model API: {re}", exc_info=True)
+            logger.error(f"ValueError details: {type(re).__name__}: {re}")
             raise HTTPException(status_code=400, detail=str(re))
         except Exception as e:
-            logger.error(
-                f"[DEBUG] Unexpected error in add_model API: {e}", exc_info=True
-            )
-            logger.error(f"[DEBUG] Error details: {type(e).__name__}: {e}")
+            logger.error(f"Unexpected error in add_model API: {e}", exc_info=True)
+            logger.error(f"Error details: {type(e).__name__}: {e}")
             import traceback
 
-            logger.error(f"[DEBUG] Full traceback: {traceback.format_exc()}")
+            logger.error(f"Full traceback: {traceback.format_exc()}")
             raise HTTPException(status_code=500, detail=str(e))
 
-        logger.info(
-            f"[DEBUG] Add model API completed successfully for model_type: {model_type}"
-        )
         return JSONResponse(
             content={"message": f"Model added successfully for type: {model_type}"}
         )
@@ -3230,37 +3206,24 @@ async def update_model_type(self, request: Request) -> JSONResponse:
         try:
             # Parse request
             raw_json = await request.json()
-            logger.info(f"[DEBUG] Update model type API called with: {raw_json}")
 
             body = UpdateModelRequest.parse_obj(raw_json)
             model_type = body.model_type
 
-            logger.info(f"[DEBUG] Parsed model_type for update: {model_type}")
-
             # Get supervisor reference
             supervisor_ref = await self._get_supervisor_ref()
 
-            # Call supervisor to update model type
-            logger.info(
-                f"[DEBUG] Calling supervisor.update_model_type with model_type: {model_type}"
-            )
             await supervisor_ref.update_model_type(model_type)
-            logger.info(f"[DEBUG] Supervisor.update_model_type completed successfully")
 
         except ValueError as re:
-            logger.error(
-                f"[DEBUG] ValueError in update_model_type API: {re}", exc_info=True
-            )
+            logger.error(f"ValueError in update_model_type API: {re}", exc_info=True)
             raise HTTPException(status_code=400, detail=str(re))
         except Exception as e:
             logger.error(
-                f"[DEBUG] Unexpected error in update_model_type API: {e}", exc_info=True
+                f"Unexpected error in update_model_type API: {e}", exc_info=True
             )
             raise HTTPException(status_code=500, detail=str(e))
 
-        logger.info(
-            f"[DEBUG] Update model type API completed successfully for model_type: {model_type}"
-        )
         return JSONResponse(
             content={
                 "message": f"Model configurations updated successfully for type: {model_type}"
@@ -3271,20 +3234,11 @@ async def list_model_registrations(
         self, model_type: str, detailed: bool = Query(False)
     ) -> JSONResponse:
         try:
-            logger.info(
-                f"[DEBUG API] list_model_registrations called with model_type: {model_type}, detailed: {detailed}"
-            )
 
             data = await (await self._get_supervisor_ref()).list_model_registrations(
                 model_type, detailed=detailed
             )
 
-            logger.info(f"[DEBUG API] Raw data from supervisor: {len(data)} items")
-            for i, item in enumerate(data):
-                logger.info(
-                    f"[DEBUG API] Item {i}: {item.get('model_name', 'Unknown')} (builtin: {item.get('is_builtin', 'Unknown')})"
-                )
-
             # Remove duplicate model names.
             model_names = set()
             final_data = []
@@ -3293,28 +3247,17 @@ async def list_model_registrations(
                     model_names.add(item["model_name"])
                     final_data.append(item)
 
-            logger.info(f"[DEBUG API] After deduplication: {len(final_data)} items")
-            builtin_count = sum(
-                1 for item in final_data if item.get("is_builtin", False)
-            )
-            custom_count = sum(
-                1 for item in final_data if not item.get("is_builtin", False)
-            )
-            logger.info(
-                f"[DEBUG API] Built-in models: {builtin_count}, Custom models: {custom_count}"
-            )
-
             return JSONResponse(content=final_data)
         except ValueError as re:
             logger.error(
-                f"[DEBUG API] ValueError in list_model_registrations: {re}",
+                f"ValueError in list_model_registrations: {re}",
                 exc_info=True,
             )
             logger.error(re, exc_info=True)
             raise HTTPException(status_code=400, detail=str(re))
         except Exception as e:
             logger.error(
-                f"[DEBUG API] Unexpected error in list_model_registrations: {e}",
+                f"Unexpected error in list_model_registrations: {e}",
                 exc_info=True,
             )
             logger.error(e, exc_info=True)
diff --git a/xinference/core/supervisor.py b/xinference/core/supervisor.py
index 127ceec2ce..41a86ea1c3 100644
--- a/xinference/core/supervisor.py
+++ b/xinference/core/supervisor.py
@@ -14,7 +14,6 @@
 
 import asyncio
 import itertools
-import json
 import os
 import signal
 import time
@@ -624,24 +623,14 @@ def sort_helper(item):
             assert isinstance(item["model_name"], str)
             return item.get("model_name").lower()
 
-        logger.info(
-            f"[DEBUG SUPERVISOR] list_model_registrations called with model_type: {model_type}, detailed: {detailed}"
-        )
-
         ret = []
         if not self.is_local_deployment():
-            logger.info(f"[DEBUG SUPERVISOR] Not local deployment, checking workers...")
             workers = list(self._worker_address_to_worker.values())
             for worker in workers:
                 worker_data = await worker.list_model_registrations(
                     model_type, detailed
                 )
-                logger.info(
-                    f"[DEBUG SUPERVISOR] Worker returned {len(worker_data)} models"
-                )
                 ret.extend(worker_data)
-        else:
-            logger.info(f"[DEBUG SUPERVISOR] Local deployment mode")
 
         if model_type.upper() == "LLM":
             from ..model.llm import (
@@ -650,14 +639,9 @@ def sort_helper(item):
                 register_builtin_model,
             )
 
-            logger.info(f"[DEBUG SUPERVISOR] Processing LLM models")
-
             register_builtin_model()
 
             for family in BUILTIN_LLM_FAMILIES:
-                logger.debug(
-                    f"[DEBUG SUPERVISOR] Processing builtin LLM: {family.model_name}"
-                )
                 if detailed:
                     reg_data = await self._to_llm_reg(family, True)
                     ret.append(reg_data)
@@ -669,9 +653,6 @@ def sort_helper(item):
 
             for family in user_defined_families:
                 if family.model_name not in builtin_names:
-                    logger.debug(
-                        f"[DEBUG SUPERVISOR] Processing dynamic LLM: {family.model_name}"
-                    )
                     if detailed:
                         reg_data = await self._to_llm_reg(family, True)
                         ret.append(reg_data)
@@ -681,7 +662,6 @@ def sort_helper(item):
                         )
 
                 ret.sort(key=sort_helper)
-            logger.info(f"[DEBUG SUPERVISOR] LLM: Returning {len(ret)} total models")
             return ret
         elif model_type == "embedding":
             from ..model.embedding import (
@@ -727,9 +707,6 @@ def sort_helper(item):
                                 is_persisted_model = True
 
                 is_builtin = is_persisted_model  # Treat persisted models as built-in
-                logger.info(
-                    f"[DEBUG SUPERVISOR] Embedding model {model_spec.model_name} persisted: {is_persisted_model}, treating as builtin: {is_builtin}"
-                )
 
                 if detailed:
                     ret.append(
@@ -785,9 +762,6 @@ def sort_helper(item):
                                 is_persisted_model = True
 
                 is_builtin = is_persisted_model  # Treat persisted models as built-in
-                logger.info(
-                    f"[DEBUG SUPERVISOR] Image model {model_spec.model_name} persisted: {is_persisted_model}, treating as builtin: {is_builtin}"
-                )
 
                 if detailed:
                     ret.append(
@@ -843,9 +817,6 @@ def sort_helper(item):
                                 is_persisted_model = True
 
                 is_builtin = is_persisted_model  # Treat persisted models as built-in
-                logger.info(
-                    f"[DEBUG SUPERVISOR] Audio model {model_spec.model_name} persisted: {is_persisted_model}, treating as builtin: {is_builtin}"
-                )
 
                 if detailed:
                     ret.append(
@@ -898,9 +869,6 @@ def sort_helper(item):
                             )
                             if os.path.exists(potential_custom_path):
                                 is_persisted_model = True
-                logger.debug(
-                    f"[DEBUG SUPERVISOR] Video model {model_spec.model_name} persisted: {is_persisted_model}, treating as builtin: {is_persisted_model}"
-                )
                 if detailed:
                     ret.append(
                         await self._to_video_model_reg(
@@ -954,9 +922,6 @@ def sort_helper(item):
                                 is_persisted_model = True
 
                 is_builtin = is_persisted_model  # Treat persisted models as built-in
-                logger.info(
-                    f"[DEBUG SUPERVISOR] Rerank model {model_spec.model_name} persisted: {is_persisted_model}, treating as builtin: {is_builtin}"
-                )
 
                 if detailed:
                     ret.append(
@@ -992,9 +957,6 @@ def sort_helper(item):
                     is_persisted_model = os.path.exists(potential_persist_path)
 
                 is_builtin = is_persisted_model  # Treat persisted models as built-in
-                logger.info(
-                    f"[DEBUG SUPERVISOR] Flexible model {model_spec.model_name} persisted: {is_persisted_model}, treating as builtin: {is_builtin}"
-                )
 
                 if detailed:
                     ret.append(
@@ -1196,231 +1158,26 @@ async def add_model(self, model_type: str, model_json: Dict[str, Any]):
             model_type: Type of model (LLM, embedding, image, etc.)
             model_json: JSON configuration for the model
         """
-        logger.info(
-            f"[DEBUG SUPERVISOR] add_model called with model_type: {model_type}"
-        )
-        logger.info(f"[DEBUG SUPERVISOR] Forwarding add_model request to all workers")
 
         try:
             # Forward the add_model request to all workers
             tasks = []
             for worker_address, worker_ref in self._worker_address_to_worker.items():
-                logger.info(f"[DEBUG SUPERVISOR] Forwarding add_model to worker: {worker_address}")
                 tasks.append(worker_ref.add_model(model_type, model_json))
 
             # Wait for all workers to complete the operation
             if tasks:
                 await asyncio.gather(*tasks, return_exceptions=True)
-                logger.info(f"[DEBUG SUPERVISOR] All workers completed add_model operation")
             else:
-                logger.warning(f"[DEBUG SUPERVISOR] No workers available to forward add_model request")
-
-            logger.info(f"[DEBUG SUPERVISOR] add_model completed successfully")
+                logger.warning(f"No workers available to forward add_model request")
 
         except Exception as e:
             logger.error(
-                f"[DEBUG SUPERVISOR] Error during add_model forwarding: {str(e)}",
+                f"Error during add_model forwarding: {str(e)}",
                 exc_info=True,
             )
             raise ValueError(f"Failed to add model: {str(e)}")
 
-    def _convert_model_json_format(self, model_json: Dict[str, Any]) -> Dict[str, Any]:
-        """
-        Convert model hub JSON format to Xinference expected format.
-
-        The input format uses nested 'model_src' structure, but Xinference expects
-        flattened fields at the spec level.
-
-        Also handles cases where model_specs field is missing by providing a default.
-        """
-        logger.info(f"[DEBUG SUPERVISOR] _convert_model_json_format called")
-        logger.info(f"[DEBUG SUPERVISOR] Input model_json: {model_json}")
-
-        if model_json.get("model_id") is None and "model_src" in model_json:
-            logger.info(
-                f"[DEBUG SUPERVISOR] model_id is null, attempting to extract from model_src"
-            )
-            model_src = model_json["model_src"]
-
-            if "huggingface" in model_src and "model_id" in model_src["huggingface"]:
-                model_json["model_id"] = model_src["huggingface"]["model_id"]
-                logger.info(
-                    f"[DEBUG SUPERVISOR] Extracted model_id from huggingface: {model_json['model_id']}"
-                )
-            elif "modelscope" in model_src and "model_id" in model_src["modelscope"]:
-                model_json["model_id"] = model_src["modelscope"]["model_id"]
-                logger.info(
-                    f"[DEBUG SUPERVISOR] Extracted model_id from modelscope: {model_json['model_id']}"
-                )
-
-            if model_json.get("model_revision") is None:
-                if (
-                    "huggingface" in model_src
-                    and "model_revision" in model_src["huggingface"]
-                ):
-                    model_json["model_revision"] = model_src["huggingface"][
-                        "model_revision"
-                    ]
-                    logger.info(
-                        f"[DEBUG SUPERVISOR] Extracted model_revision from huggingface: {model_json['model_revision']}"
-                    )
-                elif (
-                    "modelscope" in model_src
-                    and "model_revision" in model_src["modelscope"]
-                ):
-                    model_json["model_revision"] = model_src["modelscope"][
-                        "model_revision"
-                    ]
-                    logger.info(
-                        f"[DEBUG SUPERVISOR] Extracted model_revision from modelscope: {model_json['model_revision']}"
-                    )
-
-        # If model_specs is missing, provide a default minimal spec
-        if "model_specs" not in model_json or not model_json["model_specs"]:
-            logger.info(
-                f"[DEBUG SUPERVISOR] model_specs missing or empty, creating default spec"
-            )
-            # Create a minimal default spec
-            default_spec = {
-                **model_json,
-                "model_specs": [
-                    {
-                        "model_format": "pytorch",
-                        "model_size_in_billions": None,
-                        "quantization": "none",
-                    }
-                ],
-            }
-            logger.info(f"[DEBUG SUPERVISOR] Created default spec: {default_spec}")
-            return default_spec
-
-        logger.info(
-            f"[DEBUG SUPERVISOR] Found model_specs: {model_json['model_specs']}"
-        )
-
-        # Check if conversion is needed (detect model_src structure)
-        needs_conversion = False
-        for i, spec in enumerate(model_json["model_specs"]):
-            logger.info(f"[DEBUG SUPERVISOR] Checking spec {i}: {spec}")
-            if "model_src" in spec:
-                logger.info(
-                    f"[DEBUG SUPERVISOR] Found model_src in spec {i}, conversion needed"
-                )
-                needs_conversion = True
-                break
-
-        if not needs_conversion:
-            logger.info(
-                f"[DEBUG SUPERVISOR] No conversion needed, returning original model_json"
-            )
-            return model_json
-
-        converted = model_json.copy()
-        converted_specs = []
-
-        for spec in model_json["model_specs"]:
-            model_format = spec.get("model_format", "pytorch")
-            model_size = spec.get("model_size_in_billions")
-
-            if "model_src" not in spec:
-                # No model_src, keep spec as is but ensure required fields
-                converted_spec = spec.copy()
-                if "quantization" not in converted_spec:
-                    converted_spec["quantization"] = "none"
-                if "model_format" not in converted_spec:
-                    converted_spec["model_format"] = "pytorch"
-                if "model_file_name_template" not in converted_spec:
-                    converted_spec["model_file_name_template"] = "model.bin"
-                if "model_hub" not in converted_spec and "model_id" in converted_spec:
-                    converted_spec["model_hub"] = "huggingface"
-                converted_specs.append(converted_spec)
-                continue
-
-            model_src = spec["model_src"]
-
-            # Handle different model sources
-            if "huggingface" in model_src:
-                hf_info = model_src["huggingface"]
-                quantizations = hf_info.get("quantizations", ["none"])
-
-                # Create separate specs for each quantization
-                for quant in quantizations:
-                    converted_spec = {
-                        "model_format": model_format,
-                        "model_size_in_billions": model_size,
-                        "quantization": quant,
-                        "model_hub": "huggingface",
-                    }
-
-                    # Add common fields
-                    if "model_id" in hf_info:
-                        converted_spec["model_id"] = hf_info["model_id"]
-                    if "model_revision" in hf_info:
-                        converted_spec["model_revision"] = hf_info["model_revision"]
-
-                    # Format-specific fields
-                    if model_format == "ggufv2":
-                        if "model_id" in hf_info:
-                            converted_spec["model_id"] = hf_info["model_id"]
-                        if "model_file_name_template" in hf_info:
-                            converted_spec["model_file_name_template"] = hf_info[
-                                "model_file_name_template"
-                            ]
-                        else:
-                            # Default template
-                            model_name = model_json["model_name"]
-                            converted_spec["model_file_name_template"] = (
-                                f"{model_name}-{{quantization}}.gguf"
-                            )
-                    elif model_format in ["pytorch", "mlx"]:
-                        if "model_id" in hf_info:
-                            converted_spec["model_id"] = hf_info["model_id"]
-                        if "model_revision" in hf_info:
-                            converted_spec["model_revision"] = hf_info["model_revision"]
-                        converted_spec["model_file_name_template"] = "pytorch_model.bin"
-
-                    converted_specs.append(converted_spec)
-
-            elif "modelscope" in model_src:
-                # Handle ModelScope similarly
-                ms_info = model_src["modelscope"]
-                quantizations = ms_info.get("quantizations", ["none"])
-
-                for quant in quantizations:
-                    converted_spec = {
-                        "model_format": model_format,
-                        "model_size_in_billions": model_size,
-                        "quantization": quant,
-                        "model_hub": "modelscope",
-                    }
-
-                    if "model_id" in ms_info:
-                        converted_spec["model_id"] = ms_info["model_id"]
-                    if "model_revision" in ms_info:
-                        converted_spec["model_revision"] = ms_info["model_revision"]
-                    converted_spec["model_file_name_template"] = "pytorch_model.bin"
-
-                    converted_specs.append(converted_spec)
-
-            else:
-                # Unknown model source, skip or handle as error
-                logger.warning(
-                    f"Unknown model source in spec: {list(model_src.keys())}"
-                )
-                # Keep original spec but add required fields
-                converted_spec = spec.copy()
-                if "quantization" not in converted_spec:
-                    converted_spec["quantization"] = "none"
-                if "model_format" not in converted_spec:
-                    converted_spec["model_format"] = "pytorch"
-                if "model_file_name_template" not in converted_spec:
-                    converted_spec["model_file_name_template"] = "model.bin"
-                converted_specs.append(converted_spec)
-
-        converted["model_specs"] = converted_specs
-
-        return converted
-
     async def _sync_register_model(
         self, model_type: str, model: str, persist: bool, model_name: str
     ):
@@ -1454,30 +1211,24 @@ async def update_model_type(self, model_type: str):
         Args:
             model_type: Type of model (LLM, embedding, image, etc.)
         """
-        logger.info(
-            f"[DEBUG SUPERVISOR] update_model_type called with model_type: {model_type}"
-        )
-        logger.info(f"[DEBUG SUPERVISOR] Forwarding update_model_type request to all workers")
 
         try:
             # Forward the update_model_type request to all workers
             tasks = []
             for worker_address, worker_ref in self._worker_address_to_worker.items():
-                logger.info(f"[DEBUG SUPERVISOR] Forwarding update_model_type to worker: {worker_address}")
                 tasks.append(worker_ref.update_model_type(model_type))
 
             # Wait for all workers to complete the operation
             if tasks:
                 await asyncio.gather(*tasks, return_exceptions=True)
-                logger.info(f"[DEBUG SUPERVISOR] All workers completed update_model_type operation")
             else:
-                logger.warning(f"[DEBUG SUPERVISOR] No workers available to forward update_model_type request")
-
-            logger.info(f"[DEBUG SUPERVISOR] update_model_type completed successfully")
+                logger.warning(
+                    f"No workers available to forward update_model_type request"
+                )
 
         except Exception as e:
             logger.error(
-                f"[DEBUG SUPERVISOR] Error during update_model_type forwarding: {str(e)}",
+                f"Error during update_model_type forwarding: {str(e)}",
                 exc_info=True,
             )
             raise ValueError(f"Failed to update model type: {str(e)}")
@@ -1489,9 +1240,6 @@ async def _store_model_configurations(self, model_type: str, model_data):
             model_type: Type of model (as provided by user, e.g., "llm")
             model_data: JSON data containing model configurations
         """
-        logger.info(
-            f"[DEBUG SUPERVISOR] Storing configurations for model type: {model_type}"
-        )
         try:
             import json
             import os
@@ -1509,21 +1257,13 @@ async def _store_model_configurations(self, model_type: str, model_data):
                 builtin_dir, f"{model_type.lower()}_models.json"
             )
 
-            logger.info(
-                f"[DEBUG SUPERVISOR] Storing complete JSON to: {json_file_path}"
-            )
-
             # Save the complete JSON data
             with open(json_file_path, "w", encoding="utf-8") as f:
                 json.dump(model_data, f, indent=2, ensure_ascii=False)
 
-            logger.info(
-                f"[DEBUG SUPERVISOR] Successfully stored complete JSON for {model_type} containing {len(model_data) if isinstance(model_data, list) else 1} model configurations"
-            )
-
         except Exception as e:
             logger.error(
-                f"[DEBUG SUPERVISOR] Error storing model configurations: {e}",
+                f"Error storing model configurations: {e}",
                 exc_info=True,
             )
             raise
diff --git a/xinference/core/worker.py b/xinference/core/worker.py
index 613bcc049b..26c0e43e05 100644
--- a/xinference/core/worker.py
+++ b/xinference/core/worker.py
@@ -673,20 +673,8 @@ async def add_model(self, model_type: str, model_json: Dict[str, Any]):
             model_type: Type of model (LLM, embedding, image, etc.)
             model_json: JSON configuration for the model
         """
-        logger.info(
-            f"[DEBUG WORKER] add_model called with model_type: {model_type}"
-        )
-        logger.info(f"[DEBUG WORKER] model_json type: {type(model_json)}")
-        logger.info(
-            f"[DEBUG WORKER] model_json keys: {list(model_json.keys()) if isinstance(model_json, dict) else 'Not a dict'}"
-        )
-        if isinstance(model_json, dict):
-            logger.info(f"[DEBUG WORKER] model_json content: {model_json}")
-
         # Validate model type (with case normalization)
         supported_types = list(self._custom_register_type_to_cls.keys())
-        logger.info(f"[DEBUG WORKER] Supported model types: {supported_types}")
-        logger.info(f"[DEBUG WORKER] Received model_type: '{model_type}'")
 
         normalized_model_type = model_type
 
@@ -695,13 +683,9 @@ async def add_model(self, model_type: str, model_json: Dict[str, Any]):
         elif model_type.lower() == "llm" and "llm" in supported_types:
             normalized_model_type = "llm"
 
-        logger.info(
-            f"[DEBUG WORKER] Normalized model_type: '{normalized_model_type}'"
-        )
-
         if normalized_model_type not in self._custom_register_type_to_cls:
             logger.error(
-                f"[DEBUG WORKER] Unsupported model type: {normalized_model_type} (original: {model_type})"
+                f"Unsupported model type: {normalized_model_type} (original: {model_type})"
             )
             raise ValueError(
                 f"Unsupported model type '{model_type}'. "
@@ -710,9 +694,6 @@ async def add_model(self, model_type: str, model_json: Dict[str, Any]):
 
         # Use normalized model type for the rest of the function
         model_type = normalized_model_type
-        logger.info(
-            f"[DEBUG WORKER] Using model_type: '{model_type}' for registration"
-        )
 
         # Get the appropriate model class and register function
         (
@@ -721,57 +702,38 @@ async def add_model(self, model_type: str, model_json: Dict[str, Any]):
             unregister_fn,
             generate_fn,
         ) = self._custom_register_type_to_cls[model_type]
-        logger.info(f"[DEBUG WORKER] Model spec class: {model_spec_cls}")
-        logger.info(f"[DEBUG WORKER] Register function: {register_fn}")
-        logger.info(f"[DEBUG WORKER] Unregister function: {unregister_fn}")
-        logger.info(f"[DEBUG WORKER] Generate function: {generate_fn}")
 
         # Validate required fields (only model_name is required)
         required_fields = ["model_name"]
-        logger.info(f"[DEBUG WORKER] Checking required fields: {required_fields}")
         for field in required_fields:
             if field not in model_json:
-                logger.error(f"[DEBUG WORKER] Missing required field: {field}")
+                logger.error(f"Missing required field: {field}")
                 raise ValueError(f"Missing required field: {field}")
 
         # Validate model name format
         from ..model.utils import is_valid_model_name
 
         model_name = model_json["model_name"]
-        logger.info(f"[DEBUG WORKER] Extracted model_name: {model_name}")
 
         if not is_valid_model_name(model_name):
-            logger.error(f"[DEBUG WORKER] Invalid model name format: {model_name}")
+            logger.error(f"Invalid model name format: {model_name}")
             raise ValueError(f"Invalid model name format: {model_name}")
 
-        logger.info(f"[DEBUG WORKER] Model name validation passed")
-
         # Convert model hub JSON format to Xinference expected format
-        logger.info(f"[DEBUG WORKER] Converting model JSON format...")
         try:
             converted_model_json = self._convert_model_json_format(model_json)
-            logger.info(
-                f"[DEBUG WORKER] Converted model JSON: {converted_model_json}"
-            )
         except Exception as e:
-            logger.error(
-                f"[DEBUG WORKER] Format conversion failed: {str(e)}", exc_info=True
-            )
+            logger.error(f"Format conversion failed: {str(e)}", exc_info=True)
             raise ValueError(f"Failed to convert model JSON format: {str(e)}")
 
         # Parse the JSON into the appropriate model spec
-        logger.info(f"[DEBUG WORKER] Parsing model spec...")
         try:
             model_spec = model_spec_cls.parse_obj(converted_model_json)
-            logger.info(f"[DEBUG WORKER] Parsed model spec: {model_spec}")
         except Exception as e:
-            logger.error(
-                f"[DEBUG WORKER] Model spec parsing failed: {str(e)}", exc_info=True
-            )
+            logger.error(f"Model spec parsing failed: {str(e)}", exc_info=True)
             raise ValueError(f"Invalid model JSON format: {str(e)}")
 
         # Check if model already exists
-        logger.info(f"[DEBUG WORKER] Checking if model already exists...")
         try:
             existing_models = await self.list_model_registrations(model_type)
             existing_model = None
@@ -780,14 +742,8 @@ async def add_model(self, model_type: str, model_json: Dict[str, Any]):
                     existing_model = model
                     break
 
-            logger.info(
-                f"[DEBUG WORKER] Existing model check result: {existing_model}"
-            )
-
             if existing_model is not None:
-                logger.error(
-                    f"[DEBUG WORKER] Model already exists: {model_spec.model_name}"
-                )
+                logger.error(f"Model already exists: {model_spec.model_name}")
                 raise ValueError(
                     f"Model '{model_spec.model_name}' already exists for type '{model_type}'. "
                     f"Please choose a different model name or remove the existing model first."
@@ -796,76 +752,59 @@ async def add_model(self, model_type: str, model_json: Dict[str, Any]):
         except ValueError as e:
             if "not found" in str(e):
                 # Model doesn't exist, we can proceed
-                logger.info(
-                    f"[DEBUG WORKER] Model doesn't exist yet, proceeding with registration"
-                )
                 pass
             else:
                 # Re-raise validation errors
-                logger.error(
-                    f"[DEBUG WORKER] Validation error during model check: {str(e)}"
-                )
+                logger.error(f"Validation error during model check: {str(e)}")
                 raise e
         except Exception as ex:
             logger.error(
-                f"[DEBUG WORKER] Unexpected error during model check: {str(ex)}",
+                f"Unexpected error during model check: {str(ex)}",
                 exc_info=True,
             )
             raise ValueError(f"Failed to validate model registration: {str(ex)}")
 
-        logger.info(f"[DEBUG WORKER] Storing model as built-in...")
         try:
             # Store model using the same logic as update_model_type for consistency
             import json
+
             from ..constants import XINFERENCE_MODEL_DIR
 
             model_type_lower = model_type.lower()
-            builtin_dir = os.path.join(XINFERENCE_MODEL_DIR, "v2", "builtin", model_type_lower)
+            builtin_dir = os.path.join(
+                XINFERENCE_MODEL_DIR, "v2", "builtin", model_type_lower
+            )
 
             # Ensure directory exists
             os.makedirs(builtin_dir, exist_ok=True)
-            logger.info(f"[DEBUG WORKER] Builtin directory: {builtin_dir}")
 
             # Use correct storage: save each model as a separate JSON file
             # This follows the CacheManager.register_builtin_model pattern
             model_dict = model_spec.dict()
-            logger.info(f"[DEBUG WORKER] Model dict: {model_dict}")
 
             # Create individual model file path
             model_file_path = os.path.join(builtin_dir, f"{model_spec.model_name}.json")
 
             # Check if model already exists
             if os.path.exists(model_file_path):
-                logger.warning(f"[DEBUG WORKER] Model {model_spec.model_name} already exists at {model_file_path}")
+                logger.warning(
+                    f"Model {model_spec.model_name} already exists at {model_file_path}"
+                )
                 # Continue with registration even if it exists
-            else:
-                logger.info(f"[DEBUG WORKER] Creating new model file: {model_file_path}")
 
             # Save the model as a separate JSON file
-            with open(model_file_path, 'w', encoding='utf-8') as f:
+            with open(model_file_path, "w", encoding="utf-8") as f:
                 json.dump(model_dict, f, indent=2, ensure_ascii=False)
 
-            logger.info(f"[DEBUG WORKER] Successfully saved model to {model_file_path}")
-            logger.info(f"[DEBUG WORKER] File exists after save: {os.path.exists(model_file_path)}")
-
             # Register in the model registry without persisting to avoid duplicate storage
             register_fn(model_spec, persist=False)
-            logger.info(
-                f"[DEBUG WORKER] Model registry registration completed successfully"
-            )
 
             # Record model version
-            logger.info(f"[DEBUG WORKER] Generating version info...")
             version_info = generate_fn(model_spec)
-            logger.info(f"[DEBUG WORKER] Generated version_info: {version_info}")
 
-            logger.info(
-                f"[DEBUG WORKER] Recording model version in cache tracker..."
-            )
             await self._cache_tracker_ref.record_model_version(
                 version_info, self.address
             )
-            logger.info(f"[DEBUG WORKER] Cache tracker recording completed")
 
             logger.info(
                 f"Successfully added model '{model_spec.model_name}' (type: {model_type})"
@@ -873,26 +812,22 @@ async def add_model(self, model_type: str, model_json: Dict[str, Any]):
 
         except ValueError as e:
             # Validation errors - don't need cleanup as model wasn't registered
-            logger.error(f"[DEBUG WORKER] ValueError during registration: {str(e)}")
+            logger.error(f"ValueError during registration: {str(e)}")
             raise e
         except Exception as e:
             # Unexpected errors - attempt cleanup
             logger.error(
-                f"[DEBUG WORKER] Unexpected error during registration: {str(e)}",
+                f"Unexpected error during registration: {str(e)}",
                 exc_info=True,
             )
             try:
-                logger.info(f"[DEBUG WORKER] Attempting cleanup...")
                 unregister_fn(model_spec.model_name, raise_error=False)
-                logger.info(f"[DEBUG WORKER] Cleanup completed successfully")
             except Exception as cleanup_error:
-                logger.warning(f"[DEBUG WORKER] Cleanup failed: {cleanup_error}")
+                logger.warning(f"Cleanup failed: {cleanup_error}")
             raise ValueError(
                 f"Failed to register model '{model_spec.model_name}': {str(e)}"
             )
 
-        logger.info(f"[DEBUG WORKER] add_model completed successfully")
-
     def _convert_model_json_format(self, model_json: Dict[str, Any]) -> Dict[str, Any]:
         """
         Convert model hub JSON format to Xinference expected format.
@@ -903,9 +838,6 @@ def _convert_model_json_format(self, model_json: Dict[str, Any]) -> Dict[str, An
         For LLM/embedding/rerank models: uses model_specs structure
         For image/audio/video models: uses flat structure with direct fields
         """
-        logger.info(f"[DEBUG WORKER] _convert_model_json_format called")
-        logger.info(f"[DEBUG WORKER] Input model_json: {model_json}")
-
         # Determine if this is an image/audio/video model (flat structure) or LLM/embedding/rerank (model_specs structure)
         flat_model_types = ["image", "audio", "video"]
         model_type = None
@@ -914,18 +846,19 @@ def _convert_model_json_format(self, model_json: Dict[str, Any]) -> Dict[str, An
         if "model_ability" in model_json:
             abilities = model_json["model_ability"]
             if isinstance(abilities, list):
-                if "text2img" in abilities or "image2image" in abilities or "ocr" in abilities:
+                if (
+                    "text2img" in abilities
+                    or "image2image" in abilities
+                    or "ocr" in abilities
+                ):
                     model_type = "image"
                 elif "auto-speech" in abilities or "text-to-speech" in abilities:
                     model_type = "audio"
                 elif "text-to-video" in abilities:
                     model_type = "video"
 
-        logger.info(f"[DEBUG WORKER] Determined model type: {model_type}")
-
         if model_type in flat_model_types:
             # Handle image/audio/video models with flat structure
-            logger.info(f"[DEBUG WORKER] Processing {model_type} model with flat structure")
 
             if "model_src" in model_json:
                 model_src = model_json["model_src"]
@@ -936,26 +869,31 @@ def _convert_model_json_format(self, model_json: Dict[str, Any]) -> Dict[str, An
 
                     if "model_id" in hf_data and model_json.get("model_id") is None:
                         model_json["model_id"] = hf_data["model_id"]
-                        logger.info(f"[DEBUG WORKER] Extracted model_id: {model_json['model_id']}")
 
-                    if "model_revision" in hf_data and model_json.get("model_revision") is None:
+                    if (
+                        "model_revision" in hf_data
+                        and model_json.get("model_revision") is None
+                    ):
                         model_json["model_revision"] = hf_data["model_revision"]
-                        logger.info(f"[DEBUG WORKER] Extracted model_revision: {model_json['model_revision']}")
 
                 # Remove model_src field as it's not needed in the final format
                 del model_json["model_src"]
-                logger.info(f"[DEBUG WORKER] Removed model_src field")
 
             # Set required defaults for image models
             if model_json.get("model_hub") is None:
                 model_json["model_hub"] = "huggingface"
-                logger.info(f"[DEBUG WORKER] Added default model_hub: huggingface")
 
             # Add null fields for completeness based on builtin image model structure
             null_fields = [
-                "cache_config", "controlnet", "gguf_model_id", "gguf_quantizations",
-                "gguf_model_file_name_template", "lightning_model_id", "lightning_versions",
-                "lightning_model_file_name_template", "model_uri"
+                "cache_config",
+                "controlnet",
+                "gguf_model_id",
+                "gguf_quantizations",
+                "gguf_model_file_name_template",
+                "lightning_model_id",
+                "lightning_versions",
+                "lightning_model_file_name_template",
+                "model_uri",
             ]
             for field in null_fields:
                 if field not in model_json:
@@ -969,19 +907,15 @@ def _convert_model_json_format(self, model_json: Dict[str, Any]) -> Dict[str, An
 
         else:
             # Handle LLM/embedding/rerank models with model_specs structure
-            logger.info(f"[DEBUG WORKER] Processing model with model_specs structure")
 
             # Handle model_specs - if multiple formats are provided, select the first one
-            if "model_specs" in model_json and isinstance(model_json["model_specs"], list):
+            if "model_specs" in model_json and isinstance(
+                model_json["model_specs"], list
+            ):
                 if len(model_json["model_specs"]) > 1:
-                    logger.info(
-                        f"[DEBUG WORKER] Multiple model specs found ({len(model_json['model_specs'])}), "
-                        f"selecting the first one for validation"
-                    )
                     # For add_model, we'll use the first spec as the primary one
                     # The other specs will be ignored for this registration
                     model_json["model_specs"] = [model_json["model_specs"][0]]
-                    logger.info(f"[DEBUG WORKER] Selected model spec: {model_json['model_specs'][0]}")
 
                 # Fix missing quantization field for pytorch/mlx specs
                 spec = model_json["model_specs"][0]
@@ -990,83 +924,69 @@ def _convert_model_json_format(self, model_json: Dict[str, Any]) -> Dict[str, An
                     if model_format in ["pytorch", "gptq", "awq", "fp8", "bnb"]:
                         # Extract quantization from model_src if available
                         if "model_src" in spec and "huggingface" in spec["model_src"]:
-                            quantizations = spec["model_src"]["huggingface"].get("quantizations", [])
+                            quantizations = spec["model_src"]["huggingface"].get(
+                                "quantizations", []
+                            )
                             if quantizations:
-                                spec["quantization"] = quantizations[0]  # Use first quantization
-                                logger.info(f"[DEBUG WORKER] Added quantization: {spec['quantization']}")
+                                spec["quantization"] = quantizations[
+                                    0
+                                ]  # Use first quantization
                             else:
                                 spec["quantization"] = "none"  # Default quantization
-                                logger.info(f"[DEBUG WORKER] Added default quantization: none")
                         else:
                             spec["quantization"] = "none"  # Default quantization
-                            logger.info(f"[DEBUG WORKER] Added default quantization: none")
                     elif model_format == "mlx":
                         # Extract quantization from model_src if available
                         if "model_src" in spec and "huggingface" in spec["model_src"]:
-                            quantizations = spec["model_src"]["huggingface"].get("quantizations", [])
+                            quantizations = spec["model_src"]["huggingface"].get(
+                                "quantizations", []
+                            )
                             if quantizations:
-                                spec["quantization"] = quantizations[0]  # Use first quantization
-                                logger.info(f"[DEBUG WORKER] Added MLX quantization: {spec['quantization']}")
+                                spec["quantization"] = quantizations[
+                                    0
+                                ]  # Use first quantization
                             else:
                                 spec["quantization"] = "4bit"  # Default for MLX
-                                logger.info(f"[DEBUG WORKER] Added default MLX quantization: 4bit")
                         else:
                             spec["quantization"] = "4bit"  # Default for MLX
-                            logger.info(f"[DEBUG WORKER] Added default MLX quantization: 4bit")
                     elif model_format == "ggufv2":
                         # GGUF models need to extract quantization from filename template
                         if "model_file_name_template" in spec:
                             template = spec["model_file_name_template"]
-                            if "{quantization}" in template:
-                                # This is handled by the GGUF spec, just log
-                                logger.info(f"[DEBUG WORKER] GGUF model has quantization template")
-                            else:
+                            if "{quantization}" not in template:
                                 # Try to extract from model_id or set default
-                                spec["quantization"] = "Q4_K_M"  # Common GGUF quantization
-                                logger.info(f"[DEBUG WORKER] Added default GGUF quantization: Q4_K_M")
+                                spec["quantization"] = (
+                                    "Q4_K_M"  # Common GGUF quantization
+                                )
                         else:
                             spec["quantization"] = "Q4_K_M"  # Default for GGUF
-                            logger.info(f"[DEBUG WORKER] Added default GGUF quantization: Q4_K_M")
 
                 # Add missing required fields for LLM-style specs
                 if "model_hub" not in spec:
                     spec["model_hub"] = "huggingface"
-                    logger.info(f"[DEBUG WORKER] Added model_hub: huggingface")
 
                 if "model_id" not in spec:
                     if "model_src" in spec and "huggingface" in spec["model_src"]:
                         spec["model_id"] = spec["model_src"]["huggingface"]["model_id"]
-                        logger.info(f"[DEBUG WORKER] Added model_id: {spec['model_id']}")
 
                 if "model_revision" not in spec:
                     if "model_src" in spec and "huggingface" in spec["model_src"]:
-                        spec["model_revision"] = spec["model_src"]["huggingface"]["model_revision"]
-                        logger.info(f"[DEBUG WORKER] Added model_revision: {spec['model_revision']}")
+                        spec["model_revision"] = spec["model_src"]["huggingface"][
+                            "model_revision"
+                        ]
 
                 # Remove model_src from spec as it's not needed in the final format
                 if "model_src" in spec:
                     del spec["model_src"]
-                    logger.info(f"[DEBUG WORKER] Removed model_src field from spec")
-
-                logger.info(f"[DEBUG WORKER] Fixed model spec: {spec}")
 
         # Handle legacy top-level model_src for backward compatibility
         if model_json.get("model_id") is None and "model_src" in model_json:
-            logger.info(
-                f"[DEBUG WORKER] model_id is null, attempting to extract from model_src"
-            )
             model_src = model_json["model_src"]
 
             if "huggingface" in model_src and "model_id" in model_src["huggingface"]:
                 model_json["model_id"] = model_src["huggingface"]["model_id"]
-                logger.info(
-                    f"[DEBUG WORKER] Extracted model_id from huggingface: {model_json['model_id']}"
-                )
             elif "modelscope" in model_src and "model_id" in model_src["modelscope"]:
                 model_json["model_id"] = model_src["modelscope"]["model_id"]
-                logger.info(
-                    f"[DEBUG WORKER] Extracted model_id from modelscope: {model_json['model_id']}"
-                )
 
             if model_json.get("model_revision") is None:
                 if (
@@ -1076,9 +996,6 @@ def _convert_model_json_format(self, model_json: Dict[str, Any]) -> Dict[str, An
                     model_json["model_revision"] = model_src["huggingface"][
                         "model_revision"
                     ]
-                    logger.info(
-                        f"[DEBUG WORKER] Extracted model_revision from huggingface: {model_json['model_revision']}"
-                    )
                 elif (
                     "modelscope" in model_src
                     and "model_revision" in model_src["modelscope"]
@@ -1086,15 +1003,10 @@ def _convert_model_json_format(self, model_json: Dict[str, Any]) -> Dict[str, An
                     model_json["model_revision"] = model_src["modelscope"][
                         "model_revision"
                     ]
-                    logger.info(
-                        f"[DEBUG WORKER] Extracted model_revision from modelscope: {model_json['model_revision']}"
-                    )
 
             # Remove top-level model_src field as it's not needed in the final format
             del model_json["model_src"]
-            logger.info(f"[DEBUG WORKER] Removed top-level model_src field")
 
-        logger.info(f"[DEBUG WORKER] Final model_json: {model_json}")
         return model_json
 
     @log_async(logger=logger)
@@ -1107,11 +1019,8 @@ async def update_model_type(self, model_type: str):
             model_type: Type of model (LLM, embedding, image, etc.)
         """
         import json
-        import requests
 
-        logger.info(
-            f"[DEBUG WORKER] update_model_type called with model_type: {model_type}"
-        )
+        import requests
 
         supported_types = list(self._custom_register_type_to_cls.keys())
 
@@ -1122,22 +1031,14 @@ async def update_model_type(self, model_type: str):
             normalized_for_validation = "llm"
 
         if normalized_for_validation not in supported_types:
-            logger.error(
-                f"[DEBUG WORKER] Unsupported model type: {normalized_for_validation}"
-            )
+            logger.error(f"Unsupported model type: {normalized_for_validation}")
             raise ValueError(
                 f"Unsupported model type '{model_type}'. "
                 f"Supported types are: {', '.join(supported_types)}"
             )
 
-        model_type_for_operations = normalized_for_validation
-        logger.info(
-            f"[DEBUG WORKER] Using model_type: '{model_type_for_operations}' for operations"
-        )
-
         # Construct the URL to download JSON
         url = f"https://model.xinference.io/api/models/download?model_type={model_type.lower()}"
-        logger.info(f"[DEBUG WORKER] Downloading model configurations from: {url}")
 
         try:
             # Download JSON from remote API
@@ -1146,100 +1047,56 @@ async def update_model_type(self, model_type: str):
 
             # Parse JSON response
             model_data = response.json()
-            logger.info(
-                f"[DEBUG WORKER] Successfully downloaded JSON for model type: {model_type}"
-            )
-            logger.info(f"[DEBUG WORKER] JSON data type: {type(model_data)}")
-
-            if isinstance(model_data, dict):
-                logger.info(
-                    f"[DEBUG WORKER] JSON data keys: {list(model_data.keys())}"
-                )
-            elif isinstance(model_data, list):
-                logger.info(
-                    f"[DEBUG WORKER] JSON data contains {len(model_data)} items"
-                )
-                if model_data:
-                    logger.info(
-                        f"[DEBUG WORKER] First item keys: {list(model_data[0].keys()) if isinstance(model_data[0], dict) else 'Not a dict'}"
-                    )
 
             # Store the JSON data using CacheManager as built-in models
-            logger.info(
-                f"[DEBUG WORKER] Storing model configurations as built-in models..."
-            )
             await self._store_complete_model_configurations(model_type, model_data)
-            logger.info(
-                f"[DEBUG WORKER] Built-in model configurations stored successfully"
-            )
 
             # Dynamically reload built-in models to make them immediately available
-            logger.info(
-                f"[DEBUG WORKER] Reloading built-in models for immediate availability..."
-            )
             try:
                 if model_type.lower() == "llm":
                     from ..model.llm import register_builtin_model
 
                     register_builtin_model()
-                    logger.info(f"[DEBUG WORKER] LLM models reloaded successfully")
                 elif model_type.lower() == "embedding":
                     from ..model.embedding import register_builtin_model
 
                     register_builtin_model()
-                    logger.info(
-                        f"[DEBUG WORKER] Embedding models reloaded successfully"
-                    )
                 elif model_type.lower() == "audio":
                     from ..model.audio import register_builtin_model
 
                     register_builtin_model()
-                    logger.info(
-                        f"[DEBUG WORKER] Audio models reloaded successfully"
-                    )
                 elif model_type.lower() == "image":
                     from ..model.image import register_builtin_model
 
                     register_builtin_model()
-                    logger.info(
-                        f"[DEBUG WORKER] Image models reloaded successfully"
-                    )
                 elif model_type.lower() == "rerank":
                     from ..model.rerank import register_builtin_model
 
                     register_builtin_model()
-                    logger.info(
-                        f"[DEBUG WORKER] Rerank models reloaded successfully"
-                    )
                 elif model_type.lower() == "video":
                     from ..model.video import register_builtin_model
 
                     register_builtin_model()
-                    logger.info(
-                        f"[DEBUG WORKER] Video models reloaded successfully"
-                    )
                 else:
                     logger.warning(
-                        f"[DEBUG WORKER] No dynamic loading available for model type: {model_type}"
+                        f"No dynamic loading available for model type: {model_type}"
                     )
             except Exception as reload_error:
                 logger.error(
-                    f"[DEBUG WORKER] Error reloading built-in models: {reload_error}",
+                    f"Error reloading built-in models: {reload_error}",
                     exc_info=True,
                 )
                 # Don't fail the update if reload fails, just log the error
 
         except requests.exceptions.RequestException as e:
-            logger.error(
-                f"[DEBUG WORKER] Network error downloading model configurations: {e}"
-            )
+            logger.error(f"Network error downloading model configurations: {e}")
             raise ValueError(f"Failed to download model configurations: {str(e)}")
         except json.JSONDecodeError as e:
-            logger.error(f"[DEBUG WORKER] JSON decode error: {e}")
+            logger.error(f"JSON decode error: {e}")
             raise ValueError(f"Invalid JSON response from remote API: {str(e)}")
         except Exception as e:
             logger.error(
-                f"[DEBUG WORKER] Unexpected error during model update: {e}",
+                f"Unexpected error during model update: {e}",
                 exc_info=True,
             )
             raise ValueError(f"Failed to update model configurations: {str(e)}")
@@ -1253,11 +1110,8 @@ async def _store_model_configurations(self, model_type: str, model_data):
             model_type: Type of model (as provided by user, e.g., "llm")
             model_data: JSON data containing model configurations (can be single dict or list)
         """
-        logger.info(
-            f"[DEBUG WORKER] Storing configurations for model type: {model_type}"
-        )
-
         import json
+
         from ..constants import XINFERENCE_MODEL_DIR
 
         try:
@@ -1270,45 +1124,34 @@ async def _store_model_configurations(self, model_type: str, model_data):
                 raise ValueError(f"Invalid model_data type: {type(model_data)}")
 
             model_type_lower = model_type.lower()
-            builtin_dir = os.path.join(XINFERENCE_MODEL_DIR, "v2", "builtin", model_type_lower)
-
-            logger.info(f"[DEBUG WORKER] Using builtin dir: {builtin_dir}")
-            logger.info(f"[DEBUG WORKER] Storing {len(models_to_store)} model(s) as separate files")
+            builtin_dir = os.path.join(
+                XINFERENCE_MODEL_DIR, "v2", "builtin", model_type_lower
+            )
 
             # Ensure directory exists
             os.makedirs(builtin_dir, exist_ok=True)
-            logger.info(f"[DEBUG WORKER] Directory created/verified: {builtin_dir}")
 
             # Store each model as a separate JSON file
             for model_dict in models_to_store:
                 if not isinstance(model_dict, dict):
-                    logger.warning(f"[DEBUG WORKER] Skipping invalid model data: {model_dict}")
+                    logger.warning(f"Skipping invalid model data: {model_dict}")
                     continue
 
                 model_name = model_dict.get("model_name")
                 if not model_name:
-                    logger.warning(f"[DEBUG WORKER] Skipping model without model_name: {model_dict}")
+                    logger.warning(f"Skipping model without model_name: {model_dict}")
                     continue
 
                 # Create file path using model name (same as CacheManager pattern)
                 json_file_path = os.path.join(builtin_dir, f"{model_name}.json")
 
-                logger.info(f"[DEBUG WORKER] Storing model '{model_name}' at: {json_file_path}")
-
                 # Store the model as a separate JSON file
-                with open(json_file_path, 'w', encoding='utf-8') as f:
+                with open(json_file_path, "w", encoding="utf-8") as f:
                     json.dump(model_dict, f, indent=2, ensure_ascii=False)
 
-                logger.info(f"[DEBUG WORKER] Successfully stored model '{model_name}'")
-                logger.info(f"[DEBUG WORKER] File exists after writing: {os.path.exists(json_file_path)}")
-
-            logger.info(
-                f"[DEBUG WORKER] Successfully stored {len(models_to_store)} model configuration(s) in {builtin_dir}"
-            )
-
         except Exception as e:
             logger.error(
-                f"[DEBUG WORKER] Error storing model configurations: {str(e)}",
+                f"Error storing model configurations: {str(e)}",
                 exc_info=True,
             )
             raise ValueError(f"Failed to store model configurations: {str(e)}")
@@ -1322,39 +1165,31 @@ async def _store_complete_model_configurations(self, model_type: str, model_data
             model_type: Type of model (as provided by user, e.g., "llm")
             model_data: JSON data containing model configurations (complete array)
         """
-        logger.info(
-            f"[DEBUG WORKER] Storing complete configurations for model type: {model_type}"
-        )
-
         import json
+
         from ..constants import XINFERENCE_MODEL_DIR
 
         try:
             model_type_lower = model_type.lower()
 
             # Use the unified JSON file path (same as original update_model_type logic)
-            builtin_dir = os.path.join(XINFERENCE_MODEL_DIR, "v2", "builtin", model_type_lower)
-            json_file_path = os.path.join(builtin_dir, f"{model_type_lower}_models.json")
-
-            logger.info(f"[DEBUG WORKER] Using builtin dir: {builtin_dir}")
-            logger.info(f"[DEBUG WORKER] Storing complete JSON file at: {json_file_path}")
+            builtin_dir = os.path.join(
+                XINFERENCE_MODEL_DIR, "v2", "builtin", model_type_lower
+            )
+            json_file_path = os.path.join(
+                builtin_dir, f"{model_type_lower}_models.json"
+            )
 
             # Ensure directory exists
             os.makedirs(builtin_dir, exist_ok=True)
-            logger.info(f"[DEBUG WORKER] Directory created/verified: {builtin_dir}")
 
             # Store the complete JSON file (preserving original structure)
-            with open(json_file_path, 'w', encoding='utf-8') as f:
+            with open(json_file_path, "w", encoding="utf-8") as f:
                 json.dump(model_data, f, indent=2, ensure_ascii=False)
 
-            logger.info(
-                f"[DEBUG WORKER] Successfully stored complete model configurations in {json_file_path}"
-            )
-            logger.info(f"[DEBUG WORKER] File exists after writing: {os.path.exists(json_file_path)}")
-
         except Exception as e:
             logger.error(
-                f"[DEBUG WORKER] Error storing complete model configurations: {str(e)}",
+                f"Error storing complete model configurations: {str(e)}",
                 exc_info=True,
             )
             raise ValueError(f"Failed to store complete model configurations: {str(e)}")

From 04e7e5030ef664401fe75bf791b37072727c7d46 Mon Sep 17 00:00:00 2001
From: OliverBryant <2713999266@qq.com>
Date: Fri, 31 Oct 2025 20:35:54 +0800
Subject: [PATCH 24/25] remove unregister_model

---
 xinference/core/supervisor.py | 35 -----------------------------------
 1 file changed, 35 deletions(-)

diff --git a/xinference/core/supervisor.py b/xinference/core/supervisor.py
index 41a86ea1c3..43cede74a7 100644
--- a/xinference/core/supervisor.py
+++ b/xinference/core/supervisor.py
@@ -1233,41 +1233,6 @@ async def update_model_type(self, model_type: str):
             )
             raise ValueError(f"Failed to update model type: {str(e)}")
 
-    async def _store_model_configurations(self, model_type: str, model_data):
-        """
-        Store model configurations as a complete JSON file without splitting.
-        Args:
-            model_type: Type of model (as provided by user, e.g., "llm")
-            model_data: JSON data containing model configurations
-        """
-        try:
-            import json
-            import os
-
-            from ..constants import XINFERENCE_MODEL_DIR
-
-            # Create the built-in directory structure
-            builtin_dir = os.path.join(
-                XINFERENCE_MODEL_DIR, "v2", "builtin", model_type.lower()
-            )
-            os.makedirs(builtin_dir, exist_ok=True)
-
-            # Store the complete JSON as a single file
-            json_file_path = os.path.join(
-                builtin_dir, f"{model_type.lower()}_models.json"
-            )
-
-            # Save the complete JSON data
-            with open(json_file_path, "w", encoding="utf-8") as f:
-                json.dump(model_data, f, indent=2, ensure_ascii=False)
-
-        except Exception as e:
-            logger.error(
-                f"Error storing model configurations: {e}",
-                exc_info=True,
-            )
-            raise
-
     @log_async(logger=logger)
     async def unregister_model(self, model_type: str, model_name: str):
         if model_type in self._custom_register_type_to_cls:

From 8df104a8152f2bfaea1e92feec429718dce77f5f Mon Sep 17 00:00:00 2001
From: OliverBryant <2713999266@qq.com>
Date: Fri, 31 Oct 2025 21:46:22 +0800
Subject: [PATCH 25/25] num1

---
 xinference/core/supervisor.py              | 395 ++++++++++++---------
 xinference/core/worker.py                  |  40 +--
 xinference/model/audio/__init__.py         |   4 +-
 xinference/model/audio/core.py             |   4 +-
 xinference/model/audio/custom.py           |   6 +-
 xinference/model/embedding/__init__.py     |   4 +-
 xinference/model/embedding/custom.py       |   6 +-
 xinference/model/embedding/embed_family.py |   4 +-
 xinference/model/image/__init__.py         |   4 +-
 xinference/model/image/core.py             |   4 +-
 xinference/model/image/custom.py           |   6 +-
 xinference/model/llm/__init__.py           |   4 +-
 xinference/model/llm/custom.py             |   6 +-
 xinference/model/llm/llm_family.py         |   4 +-
 xinference/model/rerank/__init__.py        |   4 +-
 xinference/model/rerank/custom.py          |   6 +-
 xinference/model/rerank/rerank_family.py   |   4 +-
 xinference/model/video/__init__.py         |   1 +
 xinference/model/video/custom.py           |   6 +-
 19 files changed, 294 insertions(+), 218 deletions(-)

diff --git a/xinference/core/supervisor.py b/xinference/core/supervisor.py
index 43cede74a7..7afed56a1a 100644
--- a/xinference/core/supervisor.py
+++ b/xinference/core/supervisor.py
@@ -441,6 +441,51 @@ def _get_spec_dicts(
             )
         return specs, list(download_hubs)
 
+    def _is_model_from_builtin_dir(self, model_name: str, model_type: str) -> bool:
+        """
+        Check if a model comes from the builtin directory (added via update_model_type)
+        or from the custom directory (true user custom models).
+        """
+        import os
+
+        from xinference.constants import XINFERENCE_MODEL_DIR
+
+        # Check builtin directory (update_model_type models)
+        builtin_dir = os.path.join(
+            XINFERENCE_MODEL_DIR, "v2", "builtin", model_type.lower()
+        )
+        builtin_file = os.path.join(builtin_dir, f"{model_name}.json")
+
+        if os.path.exists(builtin_file):
+            return True
+
+        # Also check unified JSON file for models added via update_model_type
+        unified_json = os.path.join(builtin_dir, f"{model_type.lower()}_models.json")
+        if os.path.exists(unified_json):
+            import json
+
+            try:
+                with open(unified_json, "r", encoding="utf-8") as f:
+                    data = json.load(f)
+
+                # Check if model_name exists in this JSON file
+                if isinstance(data, list):
+                    return any(model.get("model_name") == model_name for model in data)
+                elif isinstance(data, dict):
+                    if data.get("model_name") == model_name:
+                        return True
+                    else:
+                        # Check dict values
+                        return any(
+                            isinstance(value, dict)
+                            and value.get("model_name") == model_name
+                            for value in data.values()
+                        )
+            except Exception:
+                pass
+
+        return False
+
     async def _to_llm_reg(
         self, llm_family: "LLMFamilyV2", is_builtin: bool
     ) -> Dict[str, Any]:
@@ -635,12 +680,13 @@ def sort_helper(item):
         if model_type.upper() == "LLM":
             from ..model.llm import (
                 BUILTIN_LLM_FAMILIES,
-                get_user_defined_llm_families,
+                get_registered_llm_families,
                 register_builtin_model,
             )
 
             register_builtin_model()
 
+            # 1. Hardcoded built-in models
             for family in BUILTIN_LLM_FAMILIES:
                 if detailed:
                     reg_data = await self._to_llm_reg(family, True)
@@ -648,18 +694,31 @@ def sort_helper(item):
                 else:
                     ret.append({"model_name": family.model_name, "is_builtin": True})
 
-            user_defined_families = get_user_defined_llm_families()
+            # 2. Registered models (user-defined + editor-defined)
+            registered_families = get_registered_llm_families()
             builtin_names = {family.model_name for family in BUILTIN_LLM_FAMILIES}
 
-            for family in user_defined_families:
+            for family in registered_families:
+                # If model is not in hardcoded list, it might be editor-defined, need to check source
                 if family.model_name not in builtin_names:
-                    if detailed:
-                        reg_data = await self._to_llm_reg(family, True)
-                        ret.append(reg_data)
-                    else:
-                        ret.append(
-                            {"model_name": family.model_name, "is_builtin": True}
-                        )
+                    # Check if it comes from builtin directory (added via update_model_type)
+                    if self._is_model_from_builtin_dir(family.model_name, "llm"):
+                        # This is an editor-defined model, should be marked as builtin=True
+                        if detailed:
+                            reg_data = await self._to_llm_reg(family, True)
+                            ret.append(reg_data)
+                        else:
+                            ret.append(
+                                {"model_name": family.model_name, "is_builtin": True}
+                            )
+                        continue
+
+                # True user-defined model, mark as builtin=False
+                if detailed:
+                    reg_data = await self._to_llm_reg(family, False)
+                    ret.append(reg_data)
+                else:
+                    ret.append({"model_name": family.model_name, "is_builtin": False})
 
                 ret.sort(key=sort_helper)
             return ret
@@ -668,10 +727,11 @@ def sort_helper(item):
                 BUILTIN_EMBEDDING_MODELS,
                 register_builtin_model,
             )
-            from ..model.embedding.custom import get_user_defined_embeddings
+            from ..model.embedding.custom import get_registered_embeddings
 
             register_builtin_model()
 
+            # 1. Hardcoded built-in models
             for model_name, family in BUILTIN_EMBEDDING_MODELS.items():
                 if detailed:
                     ret.append(
@@ -680,53 +740,52 @@ def sort_helper(item):
                 else:
                     ret.append({"model_name": model_name, "is_builtin": True})
 
-            for model_spec in get_user_defined_embeddings():
-                # Check if this model is persisted (added via add_model API)
-                from ..model.cache_manager import CacheManager
+            # 2. Registered models (user-defined + editor-defined)
+            registered_models = get_registered_embeddings()
+            builtin_names = set(BUILTIN_EMBEDDING_MODELS.keys())
 
-                cache_manager = CacheManager(model_spec)
-                is_persisted_model = False
-                if hasattr(cache_manager, "_v2_builtin_dir_prefix"):
-                    import os
-
-                    potential_persist_path = os.path.join(
-                        cache_manager._v2_builtin_dir_prefix,
-                        "embedding",
-                        f"{model_spec.model_name}.json",
-                    )
-                    if os.path.exists(potential_persist_path):
-                        is_persisted_model = True
-                    else:
-                        if hasattr(cache_manager, "_v2_custom_dir_prefix"):
-                            potential_custom_path = os.path.join(
-                                cache_manager._v2_custom_dir_prefix,
-                                "embedding",
-                                f"{model_spec.model_name}.json",
+            for model_spec in registered_models:
+                # If model is not in hardcoded list, it might be editor-defined, need to check source
+                if model_spec.model_name not in builtin_names:
+                    # Check if it comes from builtin directory (added via update_model_type)
+                    if self._is_model_from_builtin_dir(
+                        model_spec.model_name, "embedding"
+                    ):
+                        # This is an editor-defined model, should be marked as builtin=True
+                        if detailed:
+                            ret.append(
+                                await self._to_embedding_model_reg(
+                                    model_spec, is_builtin=True
+                                )
                             )
-                            if os.path.exists(potential_custom_path):
-                                is_persisted_model = True
-
-                is_builtin = is_persisted_model  # Treat persisted models as built-in
+                        else:
+                            ret.append(
+                                {
+                                    "model_name": model_spec.model_name,
+                                    "is_builtin": True,
+                                }
+                            )
+                        continue
 
+                # True user-defined model, mark as builtin=False
                 if detailed:
                     ret.append(
-                        await self._to_embedding_model_reg(
-                            model_spec, is_builtin=is_builtin
-                        )
+                        await self._to_embedding_model_reg(model_spec, is_builtin=False)
                     )
                 else:
                     ret.append(
-                        {"model_name": model_spec.model_name, "is_builtin": is_builtin}
+                        {"model_name": model_spec.model_name, "is_builtin": False}
                     )
 
             ret.sort(key=sort_helper)
             return ret
         elif model_type == "image":
             from ..model.image import BUILTIN_IMAGE_MODELS, register_builtin_model
-            from ..model.image.custom import get_user_defined_images
+            from ..model.image.custom import get_registered_images
 
             register_builtin_model()
 
+            # 1. Hardcoded built-in models
             for model_name, families in BUILTIN_IMAGE_MODELS.items():
                 if detailed:
                     family = [x for x in families if x.model_hub == "huggingface"][0]
@@ -736,52 +795,50 @@ def sort_helper(item):
                 else:
                     ret.append({"model_name": model_name, "is_builtin": True})
 
-            for model_spec in get_user_defined_images():
-                from ..model.cache_manager import CacheManager
-
-                cache_manager = CacheManager(model_spec)
-                is_persisted_model = False
-                if hasattr(cache_manager, "_v2_builtin_dir_prefix"):
-                    import os
-
-                    potential_persist_path = os.path.join(
-                        cache_manager._v2_builtin_dir_prefix,
-                        "image",
-                        f"{model_spec.model_name}.json",
-                    )
-                    if os.path.exists(potential_persist_path):
-                        is_persisted_model = True
-                    else:
-                        if hasattr(cache_manager, "_v2_custom_dir_prefix"):
-                            potential_custom_path = os.path.join(
-                                cache_manager._v2_custom_dir_prefix,
-                                "image",
-                                f"{model_spec.model_name}.json",
+            # 2. Registered models (user-defined + editor-defined)
+            registered_models = get_registered_images()
+            builtin_names = set(BUILTIN_IMAGE_MODELS.keys())
+
+            for model_spec in registered_models:
+                # If model is not in hardcoded list, it might be editor-defined, need to check source
+                if model_spec.model_name not in builtin_names:
+                    # Check if it comes from builtin directory (added via update_model_type)
+                    if self._is_model_from_builtin_dir(model_spec.model_name, "image"):
+                        # This is an editor-defined model, should be marked as builtin=True
+                        if detailed:
+                            ret.append(
+                                await self._to_image_model_reg(
+                                    model_spec, is_builtin=True
+                                )
                             )
-                            if os.path.exists(potential_custom_path):
-                                is_persisted_model = True
-
-                is_builtin = is_persisted_model  # Treat persisted models as built-in
+                        else:
+                            ret.append(
+                                {
+                                    "model_name": model_spec.model_name,
+                                    "is_builtin": True,
+                                }
+                            )
+                        continue
 
+                # True user-defined model, mark as builtin=False
                 if detailed:
                     ret.append(
-                        await self._to_image_model_reg(
-                            model_spec, is_builtin=is_builtin
-                        )
+                        await self._to_image_model_reg(model_spec, is_builtin=False)
                     )
                 else:
                     ret.append(
-                        {"model_name": model_spec.model_name, "is_builtin": is_builtin}
+                        {"model_name": model_spec.model_name, "is_builtin": False}
                     )
 
             ret.sort(key=sort_helper)
             return ret
         elif model_type == "audio":
             from ..model.audio import BUILTIN_AUDIO_MODELS, register_builtin_model
-            from ..model.audio.custom import get_user_defined_audios
+            from ..model.audio.custom import get_registered_audios
 
             register_builtin_model()
 
+            # 1. Hardcoded built-in models
             for model_name, families in BUILTIN_AUDIO_MODELS.items():
                 if detailed:
                     family = [x for x in families if x.model_hub == "huggingface"][0]
@@ -791,52 +848,50 @@ def sort_helper(item):
                 else:
                     ret.append({"model_name": model_name, "is_builtin": True})
 
-            for model_spec in get_user_defined_audios():
-                from ..model.cache_manager import CacheManager
-
-                cache_manager = CacheManager(model_spec)
-                is_persisted_model = False
-                if hasattr(cache_manager, "_v2_builtin_dir_prefix"):
-                    import os
-
-                    potential_persist_path = os.path.join(
-                        cache_manager._v2_builtin_dir_prefix,
-                        "audio",
-                        f"{model_spec.model_name}.json",
-                    )
-                    if os.path.exists(potential_persist_path):
-                        is_persisted_model = True
-                    else:
-                        if hasattr(cache_manager, "_v2_custom_dir_prefix"):
-                            potential_custom_path = os.path.join(
-                                cache_manager._v2_custom_dir_prefix,
-                                "audio",
-                                f"{model_spec.model_name}.json",
+            # 2. Registered models (user-defined + editor-defined)
+            registered_models = get_registered_audios()
+            builtin_names = set(BUILTIN_AUDIO_MODELS.keys())
+
+            for model_spec in registered_models:
+                # If model is not in hardcoded list, it might be editor-defined, need to check source
+                if model_spec.model_name not in builtin_names:
+                    # Check if it comes from builtin directory (added via update_model_type)
+                    if self._is_model_from_builtin_dir(model_spec.model_name, "audio"):
+                        # This is an editor-defined model, should be marked as builtin=True
+                        if detailed:
+                            ret.append(
+                                await self._to_audio_model_reg(
+                                    model_spec, is_builtin=True
+                                )
                             )
-                            if os.path.exists(potential_custom_path):
-                                is_persisted_model = True
-
-                is_builtin = is_persisted_model  # Treat persisted models as built-in
+                        else:
+                            ret.append(
+                                {
+                                    "model_name": model_spec.model_name,
+                                    "is_builtin": True,
+                                }
+                            )
+                        continue
 
+                # True user-defined model, mark as builtin=False
                 if detailed:
                     ret.append(
-                        await self._to_audio_model_reg(
-                            model_spec, is_builtin=is_builtin
-                        )
+                        await self._to_audio_model_reg(model_spec, is_builtin=False)
                     )
                 else:
                     ret.append(
-                        {"model_name": model_spec.model_name, "is_builtin": is_builtin}
+                        {"model_name": model_spec.model_name, "is_builtin": False}
                     )
 
             ret.sort(key=sort_helper)
             return ret
         elif model_type == "video":
             from ..model.video import BUILTIN_VIDEO_MODELS, register_builtin_model
-            from ..model.video.custom import get_user_defined_videos
+            from ..model.video.custom import get_registered_videos
 
             register_builtin_model()
 
+            # 1. Hardcoded built-in models
             for model_name, families in BUILTIN_VIDEO_MODELS.items():
                 if detailed:
                     family = [x for x in families if x.model_hub == "huggingface"][0]
@@ -845,93 +900,89 @@ def sort_helper(item):
                     ret.append(info)
                 else:
                     ret.append({"model_name": model_name, "is_builtin": True})
-            for model_spec in get_user_defined_videos():
-                from ..model.cache_manager import CacheManager
-
-                cache_manager = CacheManager(model_spec)
-                is_persisted_model = False
-                if hasattr(cache_manager, "_v2_builtin_dir_prefix"):
-                    import os
 
-                    potential_persist_path = os.path.join(
-                        cache_manager._v2_builtin_dir_prefix,
-                        "video",
-                        f"{model_spec.model_name}.json",
-                    )
-                    if os.path.exists(potential_persist_path):
-                        is_persisted_model = True
-                    else:
-                        if hasattr(cache_manager, "_v2_custom_dir_prefix"):
-                            potential_custom_path = os.path.join(
-                                cache_manager._v2_custom_dir_prefix,
-                                "video",
-                                f"{model_spec.model_name}.json",
+            # 2. Registered models (user-defined + editor-defined)
+            registered_models = get_registered_videos()
+            builtin_names = set(BUILTIN_VIDEO_MODELS.keys())
+
+            for model_spec in registered_models:
+                # If model is not in hardcoded list, it might be editor-defined, need to check source
+                if model_spec.model_name not in builtin_names:
+                    # Check if it comes from builtin directory (added via update_model_type)
+                    if self._is_model_from_builtin_dir(model_spec.model_name, "video"):
+                        # This is an editor-defined model, should be marked as builtin=True
+                        if detailed:
+                            ret.append(
+                                await self._to_video_model_reg(
+                                    model_spec, is_builtin=True
+                                )
+                            )
+                        else:
+                            ret.append(
+                                {
+                                    "model_name": model_spec.model_name,
+                                    "is_builtin": True,
+                                }
                             )
-                            if os.path.exists(potential_custom_path):
-                                is_persisted_model = True
+                        continue
+
+                # True user-defined model, mark as builtin=False
                 if detailed:
                     ret.append(
-                        await self._to_video_model_reg(
-                            model_spec, is_builtin=is_persisted_model
-                        )
+                        await self._to_video_model_reg(model_spec, is_builtin=False)
                     )
                 else:
                     ret.append(
-                        {
-                            "model_name": model_spec.model_name,
-                            "is_builtin": is_persisted_model,
-                        }
+                        {"model_name": model_spec.model_name, "is_builtin": False}
                     )
             ret.sort(key=sort_helper)
             return ret
         elif model_type == "rerank":
             from ..model.rerank import BUILTIN_RERANK_MODELS, register_builtin_model
-            from ..model.rerank.custom import get_user_defined_reranks
+            from ..model.rerank.custom import get_registered_reranks
 
             register_builtin_model()
 
+            # 1. Hardcoded built-in models
             for model_name, family in BUILTIN_RERANK_MODELS.items():
                 if detailed:
                     ret.append(await self._to_rerank_model_reg(family, is_builtin=True))
                 else:
                     ret.append({"model_name": model_name, "is_builtin": True})
 
-            for model_spec in get_user_defined_reranks():
-                from ..model.cache_manager import CacheManager
-
-                cache_manager = CacheManager(model_spec)
-                is_persisted_model = False
-                if hasattr(cache_manager, "_v2_builtin_dir_prefix"):
-                    import os
-
-                    potential_persist_path = os.path.join(
-                        cache_manager._v2_builtin_dir_prefix,
-                        "rerank",
-                        f"{model_spec.model_name}.json",
-                    )
-                    if os.path.exists(potential_persist_path):
-                        is_persisted_model = True
-                    else:
-                        if hasattr(cache_manager, "_v2_custom_dir_prefix"):
-                            potential_custom_path = os.path.join(
-                                cache_manager._v2_custom_dir_prefix,
-                                "rerank",
-                                f"{model_spec.model_name}.json",
+            # 2. Registered models (user-defined + editor-defined)
+            registered_models = get_registered_reranks()
+            builtin_names = set(BUILTIN_RERANK_MODELS.keys())
+
+            for model_spec in registered_models:
+                # If model is not in hardcoded list, it might be editor-defined, need to check source
+                if model_spec.model_name not in builtin_names:
+                    # Check if it comes from builtin directory (added via update_model_type)
+                    if self._is_model_from_builtin_dir(model_spec.model_name, "rerank"):
+                        # This is an editor-defined model, should be marked as builtin=True
+                        if detailed:
+                            ret.append(
+                                await self._to_rerank_model_reg(
+                                    model_spec, is_builtin=True
+                                )
                             )
-                            if os.path.exists(potential_custom_path):
-                                is_persisted_model = True
-
-                is_builtin = is_persisted_model  # Treat persisted models as built-in
+                        else:
+                            ret.append(
+                                {
+                                    "model_name": model_spec.model_name,
+                                    "is_builtin": True,
+                                }
+                            )
+                        continue
 
+                # True user-defined model, mark as builtin=False
                 if detailed:
                     ret.append(
-                        await self._to_rerank_model_reg(
-                            model_spec, is_builtin=is_builtin
-                        )
+                        await self._to_rerank_model_reg(model_spec, is_builtin=False)
                     )
                 else:
                     ret.append(
-                        {"model_name": model_spec.model_name, "is_builtin": is_builtin}
+                        {"model_name": model_spec.model_name, "is_builtin": False}
                     )
 
             ret.sort(key=sort_helper)
@@ -985,26 +1036,26 @@ async def get_model_registration(self, model_type: str, model_name: str) -> Any:
                     return f
 
         if model_type.upper() == "LLM":
-            from ..model.llm import BUILTIN_LLM_FAMILIES, get_user_defined_llm_families
+            from ..model.llm import BUILTIN_LLM_FAMILIES, get_registered_llm_families
 
-            for f in BUILTIN_LLM_FAMILIES + get_user_defined_llm_families():
+            for f in BUILTIN_LLM_FAMILIES + get_registered_llm_families():
                 if f.model_name == model_name:
                     return f
 
             raise ValueError(f"Model {model_name} not found")
         elif model_type == "embedding":
             from ..model.embedding import BUILTIN_EMBEDDING_MODELS
-            from ..model.embedding.custom import get_user_defined_embeddings
+            from ..model.embedding.custom import get_registered_embeddings
 
             for f in (
-                list(BUILTIN_EMBEDDING_MODELS.values()) + get_user_defined_embeddings()
+                list(BUILTIN_EMBEDDING_MODELS.values()) + get_registered_embeddings()
             ):
                 if f.model_name == model_name:
                     return f
             raise ValueError(f"Model {model_name} not found")
         elif model_type == "image":
             from ..model.image import BUILTIN_IMAGE_MODELS
-            from ..model.image.custom import get_user_defined_images
+            from ..model.image.custom import get_registered_images
 
             if model_name in BUILTIN_IMAGE_MODELS:
                 return [
@@ -1013,13 +1064,13 @@ async def get_model_registration(self, model_type: str, model_name: str) -> Any:
                     if x.model_hub == "huggingface"
                 ][0]
             else:
-                for f in get_user_defined_images():
+                for f in get_registered_images():
                     if f.model_name == model_name:
                         return f
             raise ValueError(f"Model {model_name} not found")
         elif model_type == "audio":
             from ..model.audio import BUILTIN_AUDIO_MODELS
-            from ..model.audio.custom import get_user_defined_audios
+            from ..model.audio.custom import get_registered_audios
 
             if model_name in BUILTIN_AUDIO_MODELS:
                 return [
@@ -1028,15 +1079,15 @@ async def get_model_registration(self, model_type: str, model_name: str) -> Any:
                     if x.model_hub == "huggingface"
                 ][0]
             else:
-                for f in get_user_defined_audios():
+                for f in get_registered_audios():
                     if f.model_name == model_name:
                         return f
             raise ValueError(f"Model {model_name} not found")
         elif model_type == "rerank":
             from ..model.rerank import BUILTIN_RERANK_MODELS
-            from ..model.rerank.custom import get_user_defined_reranks
+            from ..model.rerank.custom import get_registered_reranks
 
-            for f in list(BUILTIN_RERANK_MODELS.values()) + get_user_defined_reranks():
+            for f in list(BUILTIN_RERANK_MODELS.values()) + get_registered_reranks():
                 if f.model_name == model_name:
                     return f
             raise ValueError(f"Model {model_name} not found")
@@ -1049,7 +1100,7 @@ async def get_model_registration(self, model_type: str, model_name: str) -> Any:
             raise ValueError(f"Model {model_name} not found")
         elif model_type == "video":
             from ..model.video import BUILTIN_VIDEO_MODELS
-            from ..model.video.custom import get_user_defined_videos
+            from ..model.video.custom import get_registered_videos
 
             if model_name in BUILTIN_VIDEO_MODELS:
                 return [
@@ -1058,7 +1109,7 @@ async def get_model_registration(self, model_type: str, model_name: str) -> Any:
                     if x.model_hub == "huggingface"
                 ][0]
             else:
-                for f in get_user_defined_videos():
+                for f in get_registered_videos():
                     if f.model_name == model_name:
                         return f
             raise ValueError(f"Model {model_name} not found")
diff --git a/xinference/core/worker.py b/xinference/core/worker.py
index 26c0e43e05..d5d41765e5 100644
--- a/xinference/core/worker.py
+++ b/xinference/core/worker.py
@@ -1203,41 +1203,41 @@ def sort_helper(item):
             return item.get("model_name").lower()
 
         if model_type == "LLM":
-            from ..model.llm import get_user_defined_llm_families
+            from ..model.llm import get_registered_llm_families
 
             ret = []
 
-            for family in get_user_defined_llm_families():
+            for family in get_registered_llm_families():
                 ret.append({"model_name": family.model_name, "is_builtin": False})
 
             ret.sort(key=sort_helper)
             return ret
         elif model_type == "embedding":
-            from ..model.embedding.custom import get_user_defined_embeddings
+            from ..model.embedding.custom import get_registered_embeddings
 
             ret = []
 
-            for model_spec in get_user_defined_embeddings():
+            for model_spec in get_registered_embeddings():
                 ret.append({"model_name": model_spec.model_name, "is_builtin": False})
 
             ret.sort(key=sort_helper)
             return ret
         elif model_type == "image":
-            from ..model.image.custom import get_user_defined_images
+            from ..model.image.custom import get_registered_images
 
             ret = []
 
-            for model_spec in get_user_defined_images():
+            for model_spec in get_registered_images():
                 ret.append({"model_name": model_spec.model_name, "is_builtin": False})
 
             ret.sort(key=sort_helper)
             return ret
         elif model_type == "audio":
-            from ..model.audio.custom import get_user_defined_audios
+            from ..model.audio.custom import get_registered_audios
 
             ret = []
 
-            for model_spec in get_user_defined_audios():
+            for model_spec in get_registered_audios():
                 ret.append({"model_name": model_spec.model_name, "is_builtin": False})
 
             ret.sort(key=sort_helper)
@@ -1245,11 +1245,11 @@ def sort_helper(item):
         elif model_type == "video":
             return []
         elif model_type == "rerank":
-            from ..model.rerank.custom import get_user_defined_reranks
+            from ..model.rerank.custom import get_registered_reranks
 
             ret = []
 
-            for model_spec in get_user_defined_reranks():
+            for model_spec in get_registered_reranks():
                 ret.append({"model_name": model_spec.model_name, "is_builtin": False})
 
             ret.sort(key=sort_helper)
@@ -1270,35 +1270,35 @@ def sort_helper(item):
     @log_sync(logger=logger)
     async def get_model_registration(self, model_type: str, model_name: str) -> Any:
         if model_type == "LLM":
-            from ..model.llm import get_user_defined_llm_families
+            from ..model.llm import get_registered_llm_families
 
-            for f in get_user_defined_llm_families():
+            for f in get_registered_llm_families():
                 if f.model_name == model_name:
                     return f
         elif model_type == "embedding":
-            from ..model.embedding.custom import get_user_defined_embeddings
+            from ..model.embedding.custom import get_registered_embeddings
 
-            for f in get_user_defined_embeddings():
+            for f in get_registered_embeddings():
                 if f.model_name == model_name:
                     return f
         elif model_type == "image":
-            from ..model.image.custom import get_user_defined_images
+            from ..model.image.custom import get_registered_images
 
-            for f in get_user_defined_images():
+            for f in get_registered_images():
                 if f.model_name == model_name:
                     return f
         elif model_type == "audio":
-            from ..model.audio.custom import get_user_defined_audios
+            from ..model.audio.custom import get_registered_audios
 
-            for f in get_user_defined_audios():
+            for f in get_registered_audios():
                 if f.model_name == model_name:
                     return f
         elif model_type == "video":
             return None
         elif model_type == "rerank":
-            from ..model.rerank.custom import get_user_defined_reranks
+            from ..model.rerank.custom import get_registered_reranks
 
-            for f in get_user_defined_reranks():
+            for f in get_registered_reranks():
                 if f.model_name == model_name:
                     return f
         return None
diff --git a/xinference/model/audio/__init__.py b/xinference/model/audio/__init__.py
index 37b5d763bc..775a61aab7 100644
--- a/xinference/model/audio/__init__.py
+++ b/xinference/model/audio/__init__.py
@@ -79,7 +79,7 @@ def convert_audio_model_format(model_json: Dict[str, Any]) -> Dict[str, Any]:
 )
 from .custom import (
     CustomAudioModelFamilyV2,
-    get_user_defined_audios,
+    get_registered_audios,
     register_audio,
     unregister_audio,
 )
@@ -143,7 +143,7 @@ def _install():
     register_custom_model()
 
     # register model description
-    for ud_audio in get_user_defined_audios():
+    for ud_audio in get_registered_audios():
         AUDIO_MODEL_DESCRIPTIONS.update(generate_audio_description(ud_audio))
 
 
diff --git a/xinference/model/audio/core.py b/xinference/model/audio/core.py
index e2d147fa38..666bf45c1d 100644
--- a/xinference/model/audio/core.py
+++ b/xinference/model/audio/core.py
@@ -100,9 +100,9 @@ def match_audio(
 ) -> AudioModelFamilyV2:
     from ..utils import download_from_modelscope
     from . import BUILTIN_AUDIO_MODELS
-    from .custom import get_user_defined_audios
+    from .custom import get_registered_audios
 
-    for model_spec in get_user_defined_audios():
+    for model_spec in get_registered_audios():
         if model_spec.model_name == model_name:
             return model_spec
 
diff --git a/xinference/model/audio/custom.py b/xinference/model/audio/custom.py
index 8024078481..b38fc1378e 100644
--- a/xinference/model/audio/custom.py
+++ b/xinference/model/audio/custom.py
@@ -83,7 +83,11 @@ def __init__(self):
         self.builtin_models = list(BUILTIN_AUDIO_MODELS.keys())
 
 
-def get_user_defined_audios() -> List[CustomAudioModelFamilyV2]:
+def get_registered_audios() -> List[CustomAudioModelFamilyV2]:
+    """
+    Get all audio families registered in the registry (both user-defined and editor-defined).
+    This excludes hardcoded builtin models.
+    """
     from ..custom import RegistryManager
 
     registry = RegistryManager.get_registry("audio")
diff --git a/xinference/model/embedding/__init__.py b/xinference/model/embedding/__init__.py
index 81415dfbef..602781436d 100644
--- a/xinference/model/embedding/__init__.py
+++ b/xinference/model/embedding/__init__.py
@@ -70,7 +70,7 @@ def convert_embedding_model_format(model_json: Dict[str, Any]) -> Dict[str, Any]
 )
 from .custom import (
     CustomEmbeddingModelFamilyV2,
-    get_user_defined_embeddings,
+    get_registered_embeddings,
     register_embedding,
     unregister_embedding,
 )
@@ -211,7 +211,7 @@ def _install():
     register_custom_model()
 
     # register model description
-    for ud_embedding in get_user_defined_embeddings():
+    for ud_embedding in get_registered_embeddings():
         EMBEDDING_MODEL_DESCRIPTIONS.update(
             generate_embedding_description(ud_embedding)
         )
diff --git a/xinference/model/embedding/custom.py b/xinference/model/embedding/custom.py
index 180d2f690a..2e889d5e0a 100644
--- a/xinference/model/embedding/custom.py
+++ b/xinference/model/embedding/custom.py
@@ -69,7 +69,11 @@ def remove_ud_model_files(self, model_family: "CustomEmbeddingModelFamilyV2"):
             cache_manager.unregister_custom_model(self.model_type)
 
 
-def get_user_defined_embeddings() -> List[EmbeddingModelFamilyV2]:
+def get_registered_embeddings() -> List[EmbeddingModelFamilyV2]:
+    """
+    Get all embedding families registered in the registry (both user-defined and editor-defined).
+    This excludes hardcoded builtin models.
+    """
     from ..custom import RegistryManager
 
     registry = RegistryManager.get_registry("embedding")
diff --git a/xinference/model/embedding/embed_family.py b/xinference/model/embedding/embed_family.py
index a572d7cb68..60c2682792 100644
--- a/xinference/model/embedding/embed_family.py
+++ b/xinference/model/embedding/embed_family.py
@@ -37,14 +37,14 @@ def match_embedding(
     ] = None,
 ) -> "EmbeddingModelFamilyV2":
     from ..utils import download_from_modelscope
-    from .custom import get_user_defined_embeddings
+    from .custom import get_registered_embeddings
 
     target_family = None
 
     if model_name in BUILTIN_EMBEDDING_MODELS:
         target_family = BUILTIN_EMBEDDING_MODELS[model_name]
     else:
-        for model_family in get_user_defined_embeddings():
+        for model_family in get_registered_embeddings():
             if model_name == model_family.model_name:
                 target_family = model_family
                 break
diff --git a/xinference/model/image/__init__.py b/xinference/model/image/__init__.py
index 0af0d1574e..b6e8321275 100644
--- a/xinference/model/image/__init__.py
+++ b/xinference/model/image/__init__.py
@@ -94,7 +94,7 @@ def convert_image_model_format(model_json: Dict[str, Any]) -> Dict[str, Any]:
 )
 from .custom import (
     CustomImageModelFamilyV2,
-    get_user_defined_images,
+    get_registered_images,
     register_image,
     unregister_image,
 )
@@ -240,7 +240,7 @@ def _install():
 
     register_custom_model()
 
-    for ud_image in get_user_defined_images():
+    for ud_image in get_registered_images():
         IMAGE_MODEL_DESCRIPTIONS.update(generate_image_description(ud_image))
 
 
diff --git a/xinference/model/image/core.py b/xinference/model/image/core.py
index b4baa09bcd..46be16945f 100644
--- a/xinference/model/image/core.py
+++ b/xinference/model/image/core.py
@@ -121,9 +121,9 @@ def match_diffusion(
 ) -> ImageModelFamilyV2:
     from ..utils import download_from_modelscope
     from . import BUILTIN_IMAGE_MODELS
-    from .custom import get_user_defined_images
+    from .custom import get_registered_images
 
-    for model_spec in get_user_defined_images():
+    for model_spec in get_registered_images():
         if model_spec.model_name == model_name:
             return model_spec
 
diff --git a/xinference/model/image/custom.py b/xinference/model/image/custom.py
index 3e3e2a81b9..a8c75433b4 100644
--- a/xinference/model/image/custom.py
+++ b/xinference/model/image/custom.py
@@ -43,7 +43,11 @@ def __init__(self):
         self.builtin_models = list(BUILTIN_IMAGE_MODELS.keys())
 
 
-def get_user_defined_images() -> List[ImageModelFamilyV2]:
+def get_registered_images() -> List[ImageModelFamilyV2]:
+    """
+    Get all image families registered in the registry (both user-defined and editor-defined).
+    This excludes hardcoded builtin models.
+    """
     from ..custom import RegistryManager
 
     registry = RegistryManager.get_registry("image")
diff --git a/xinference/model/llm/__init__.py b/xinference/model/llm/__init__.py
index afb5e5c96c..425d4e7513 100644
--- a/xinference/model/llm/__init__.py
+++ b/xinference/model/llm/__init__.py
@@ -85,7 +85,7 @@ def convert_model_json_format(model_json: Dict[str, Any]) -> Dict[str, Any]:
     generate_llm_version_info,
     get_llm_version_infos,
 )
-from .custom import get_user_defined_llm_families, register_llm, unregister_llm
+from .custom import get_registered_llm_families, register_llm, unregister_llm
 from .llm_family import (
     BUILTIN_LLM_FAMILIES,
     BUILTIN_LLM_MODEL_CHAT_FAMILIES,
@@ -327,5 +327,5 @@ def _install():
     register_custom_model()
 
     # register model description
-    for ud_llm in get_user_defined_llm_families():
+    for ud_llm in get_registered_llm_families():
         LLM_VERSION_INFOS.update(generate_llm_version_info(ud_llm))
diff --git a/xinference/model/llm/custom.py b/xinference/model/llm/custom.py
index 65cf8f8afd..8d96a341eb 100644
--- a/xinference/model/llm/custom.py
+++ b/xinference/model/llm/custom.py
@@ -67,7 +67,11 @@ def remove_ud_model_files(self, llm_family: "LLMFamilyV2"):
             cache_manager.unregister_custom_model(self.model_type)
 
 
-def get_user_defined_llm_families():
+def get_registered_llm_families():
+    """
+    Get all LLM families registered in the registry (both user-defined and editor-defined).
+    This excludes hardcoded builtin models.
+    """
     from ..custom import RegistryManager
 
     registry = RegistryManager.get_registry("llm")
diff --git a/xinference/model/llm/llm_family.py b/xinference/model/llm/llm_family.py
index 628c59e98b..0e58dc9269 100644
--- a/xinference/model/llm/llm_family.py
+++ b/xinference/model/llm/llm_family.py
@@ -479,9 +479,9 @@ def match_llm(
     """
     Find an LLM family, spec, and quantization that satisfy given criteria.
     """
-    from .custom import get_user_defined_llm_families
+    from .custom import get_registered_llm_families
 
-    user_defined_llm_families = get_user_defined_llm_families()
+    user_defined_llm_families = get_registered_llm_families()
 
     def _match_quantization(q: Union[str, None], quant: str):
         # Currently, the quantization name could include both uppercase and lowercase letters,
diff --git a/xinference/model/rerank/__init__.py b/xinference/model/rerank/__init__.py
index fd0029e700..0ce236fd13 100644
--- a/xinference/model/rerank/__init__.py
+++ b/xinference/model/rerank/__init__.py
@@ -71,7 +71,7 @@ def convert_rerank_model_format(model_json: Dict[str, Any]) -> Dict[str, Any]:
 )
 from .custom import (
     CustomRerankModelFamilyV2,
-    get_user_defined_reranks,
+    get_registered_reranks,
     register_rerank,
     unregister_rerank,
 )
@@ -184,5 +184,5 @@ def _install():
     register_custom_model()
 
     # register model description
-    for ud_rerank in get_user_defined_reranks():
+    for ud_rerank in get_registered_reranks():
         RERANK_MODEL_DESCRIPTIONS.update(generate_rerank_description(ud_rerank))
diff --git a/xinference/model/rerank/custom.py b/xinference/model/rerank/custom.py
index c09fdd40be..1e22dfaf54 100644
--- a/xinference/model/rerank/custom.py
+++ b/xinference/model/rerank/custom.py
@@ -67,7 +67,11 @@ def remove_ud_model_files(self, model_family: "CustomRerankModelFamilyV2"):
             cache_manager.unregister_custom_model(self.model_type)
 
 
-def get_user_defined_reranks() -> List[CustomRerankModelFamilyV2]:
+def get_registered_reranks() -> List[CustomRerankModelFamilyV2]:
+    """
+    Get all rerank families registered in the registry (both user-defined and editor-defined).
+    This excludes hardcoded builtin models.
+    """
     from ..custom import RegistryManager
 
     registry = RegistryManager.get_registry("rerank")
diff --git a/xinference/model/rerank/rerank_family.py b/xinference/model/rerank/rerank_family.py
index 62639d06cf..1cbcc681d9 100644
--- a/xinference/model/rerank/rerank_family.py
+++ b/xinference/model/rerank/rerank_family.py
@@ -36,14 +36,14 @@ def match_rerank(
     ] = None,
 ) -> "RerankModelFamilyV2":
     from ..utils import download_from_modelscope
-    from .custom import get_user_defined_reranks
+    from .custom import get_registered_reranks
 
     target_family = None
 
     if model_name in BUILTIN_RERANK_MODELS:
         target_family = BUILTIN_RERANK_MODELS[model_name]
     else:
-        for model_family in get_user_defined_reranks():
+        for model_family in get_registered_reranks():
             if model_name == model_family.model_name:
                 target_family = model_family
                 break
diff --git a/xinference/model/video/__init__.py b/xinference/model/video/__init__.py
index b64c64c00e..71e22490a0 100644
--- a/xinference/model/video/__init__.py
+++ b/xinference/model/video/__init__.py
@@ -94,6 +94,7 @@ def convert_video_model_format(model_json: Dict[str, Any]) -> Dict[str, Any]:
 )
 from .custom import (
     CustomVideoModelFamilyV2,
+    get_registered_videos,
     register_video,
     unregister_video,
 )
diff --git a/xinference/model/video/custom.py b/xinference/model/video/custom.py
index 11fe07a2d5..841917c42e 100644
--- a/xinference/model/video/custom.py
+++ b/xinference/model/video/custom.py
@@ -63,7 +63,11 @@ def unregister_video(model_name: str, raise_error: bool = True):
     registry.unregister(model_name, raise_error)
 
 
-def get_user_defined_videos() -> List[CustomVideoModelFamilyV2]:
+def get_registered_videos() -> List[CustomVideoModelFamilyV2]:
+    """
+    Get all video families registered in the registry (both user-defined and editor-defined).
+    This excludes hardcoded builtin models.
+    """
     from ..custom import RegistryManager
 
     registry = RegistryManager.get_registry("video")