-
Notifications
You must be signed in to change notification settings - Fork 5
/
generate.js
139 lines (133 loc) · 7.02 KB
/
generate.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
const fs = require('fs')
const path = require('path')
function removeHtmlTag (content) {
content = content.replace(/(?:<\/?[a-z][a-z1-6]{0,9}>|<[a-z][a-z1-6]{0,9} .+?>)/gi, '')
return content.replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>').replace(/"/g, '"').replace(/ /g, ' ')
}
function writeHtmlFile (filePath, htmlContent, pythonVersion) {
const startIndex = htmlContent.indexOf('<div class="body" role="main">')
if (startIndex === -1) throw new Error('😱 未匹配到内容开头 --- ' + filePath)
const endIndex = htmlContent.search(/\s*<\/div>\s*<\/div>\s*<div class="sphinxsidebar"/)
if (endIndex === -1) throw new Error('😱 未匹配到内容结尾 --- ' + filePath)
let docContent = htmlContent.substring(startIndex, endIndex)
docContent = docContent.replace(/<a[^>\n]+?>¶<\/a>/g, '')
docContent = docContent.replace(/<h(\d)>\s*\d{1,2}\.(?:\d{1,2}\.)?(?:\d{1,2}\.)?\s*?(.*?)<\/h\1>/g, '<h$1>$2</h$1>')
docContent = docContent.replace(/href="(\.\.)(\/(?!library\/)[^"\n]+?)"/g, 'href="https://docs.python.org/zh-cn/' + pythonVersion + '$2"')
fs.writeFileSync(filePath, `
<!DOCTYPE html><html lang="zh_CN">
<head>
<meta charset="UTF-8">
<title></title>
<link rel="stylesheet" href="../pydoc.css" type="text/css" />
</head>
<body>${docContent}</body>
</html>`)
}
// 语言参考索引
function getPythonReferenceIndexes (pythoneVersionDir, indexes) {
const pythonVersion = pythoneVersionDir.match(/python-(\d\.\d)/)[1]
const pubPath = path.join(__dirname, 'public', 'python-' + pythonVersion)
const indexHtmlContent = fs.readFileSync(path.join(__dirname, pythoneVersionDir, 'reference', 'index.html'), { encoding: 'utf-8' })
const matchs = indexHtmlContent.match(/<li class="toctree-l\d">\s*<a class="reference internal" href="[^"\n]+?">[\s\S]+?<\/a>/g)
if (!matchs) throw new Error('😱 语言参考未匹配')
let parentString = ''
matchs.forEach(x => {
const rowMatches = x.match(/<li class="toctree-l(\d)">\s*<a class="reference internal" href="([^"\n]+?)">([\s\S]+?)<\/a>/)
if (rowMatches[1] === '1') {
parentString = rowMatches[3].replace(/\d{1,2}\.(?:\d{1,2}\.)?/, '').trim()
parentString = removeHtmlTag(parentString)
const htmlContent = fs.readFileSync(path.join(__dirname, pythoneVersionDir, 'reference', rowMatches[2].trim()), { encoding: 'utf-8' })
writeHtmlFile(path.join(pubPath, 'reference', rowMatches[2].trim()), htmlContent, pythonVersion)
return
}
let key = rowMatches[3].replace(/\d{1,2}\.(?:\d{1,2}\.)?/, '').trim()
key = removeHtmlTag(key)
indexes.push({ t: key, p: 'reference/' + rowMatches[2].trim(), d: parentString })
})
}
// Python 标准库索引
function getPythonLibraryIndexes (pythoneVersionDir, indexes) {
const pythonVersion = pythoneVersionDir.match(/python-(\d\.\d)/)[1]
const pubPath = path.join(__dirname, 'public', 'python-' + pythonVersion)
const libraryDir = path.join(__dirname, pythoneVersionDir, 'library')
const files = fs.readdirSync(libraryDir)
files.forEach(f => {
if (!f.endsWith('.html')) return
let isUse = false
const htmlContent = fs.readFileSync(path.join(libraryDir, f), { encoding: 'utf-8' })
if (/<h1>(?:\d{1,2}\.\d{1,2}\. )?<a[^>\n]+?><code[^>\n]+?><span class="pre">(.+?)<\/span><\/code><\/a>\s*?(?:---|—)(.+?)<a class="headerlink".+?<\/h1>/.test(htmlContent)) {
const t = removeHtmlTag(RegExp.$1.trim())
const d = removeHtmlTag(RegExp.$2.trim())
indexes.push({ t, p: 'library/' + f, d })
isUse = true
}
const dlMatchs = htmlContent.match(/<dl class="(?:class|function|method|data|attribute)">\s*?<dt id="[^"\n]+?">[\s\S]+?<dd><p>[\s\S]+?<\/p>/g)
if (dlMatchs) {
dlMatchs.forEach(dl => {
const maches = dl.match(/<dl class="(?:class|function|method|data|attribute)">\s*?<dt id="([^"\n]+?)">([\s\S]+?)<dd><p>([\s\S]+?)<\/p>/)
const id = maches[1].trim()
const checkNextContent = maches[2]
const d = removeHtmlTag(maches[3]).replace(/\s+/g, ' ').trim()
if (checkNextContent.includes('<dt id="')) {
checkNextContent.match(/<dt id="[^"\n]+?">/g).forEach(nx => {
const nid = nx.match(/<dt id="([^"\n]+?)">/)[1]
indexes.push({ t: nid, p: 'library/' + f + '#' + nid, d })
})
}
indexes.push({ t: id, p: 'library/' + f + '#' + id, d })
})
isUse = true
}
// 存在可能重复的ID python文档的方式是用 target 处理
const targetMatchs = htmlContent.match(/<span class="target" id="[^"\n]+?"><\/span>\s*?<dl class="(?:class|function|method|data|attribute)">\s*?<dt>[\s\S]+?<dd><p>[\s\S]+?<\/p>/g)
if (targetMatchs) {
targetMatchs.forEach(tt => {
const maches = tt.match(/<span class="target" id="([^"\n]+?)"><\/span>\s*?<dl class="(?:class|function|method|data|attribute)">\s*?<dt>[\s\S]+?<dd><p>([\s\S]+?)<\/p>/)
let id = maches[1].trim()
let kid = id
if (id.includes('-')) {
const lcheck = id.substr(0, id.indexOf('-'))
if (lcheck !== 'func') return
kid = id.substr(id.indexOf('-') + 1)
}
const d = removeHtmlTag(maches[2]).replace(/\s+/g, ' ').trim()
indexes.push({ t: kid, p: 'library/' + f + '#' + id, d })
})
isUse = true
}
if (isUse) {
writeHtmlFile(path.join(pubPath, 'library', f), htmlContent, pythonVersion)
}
})
fs.writeFileSync(path.join(pubPath, 'indexes.json'), JSON.stringify(indexes))
}
function main () {
var args = process.argv.slice(2)
const indexes = []
const pythoneVersionDir = args[0]
if (!/python-(\d\.\d)/.test(pythoneVersionDir)) throw new Error('文件夹错误')
const pythonVersion = RegExp.$1
if (!fs.existsSync(path.join(__dirname, pythoneVersionDir))) throw new Error(pythoneVersionDir + '文件夹不存在')
const pubPath = path.join(__dirname, 'public', 'python-' + pythonVersion)
if (!fs.existsSync(pubPath)) {
fs.mkdirSync(pubPath)
fs.mkdirSync(path.join(pubPath, 'reference'))
fs.mkdirSync(path.join(pubPath, 'library'))
fs.mkdirSync(path.join(pubPath, '_images'))
fs.copyFileSync(path.join(__dirname, 'pydoc.css'), path.join(pubPath, 'pydoc.css'))
fs.copyFileSync(path.join(__dirname, 'README.md'), path.join(pubPath, 'README.md'))
fs.copyFileSync(path.join(__dirname, 'logo.png'), path.join(pubPath, 'logo.png'))
fs.copyFileSync(path.join(__dirname, 'preload.js'), path.join(pubPath, 'preload.js'))
const _imagesDir = path.join(__dirname, pythoneVersionDir, '_images')
const images = fs.readdirSync(_imagesDir)
images.forEach(fimg => {
fs.copyFileSync(path.join(_imagesDir, fimg), path.join(pubPath, '_images', fimg))
})
let pluginJsonContent = fs.readFileSync(path.join(__dirname, 'plugin.json'), { encoding: 'utf-8' })
pluginJsonContent = pluginJsonContent.replace(/<version>/g, pythonVersion)
fs.writeFileSync(path.join(pubPath, 'plugin.json'), pluginJsonContent)
}
getPythonReferenceIndexes(pythoneVersionDir, indexes)
getPythonLibraryIndexes(pythoneVersionDir, indexes)
}
main()