Skip to content

Commit 28d2fdf

Browse files
author
Yusaku Sato
committed
feat(dom-scanner): add HTML and Pug file scanner CLI tool
Add a new CLI tool to scan HTML and Pug files in a directory and find elements matching a CSS selector. The tool supports both HTML and Pug file processing with configurable extensions and processors.
1 parent 0f0bd49 commit 28d2fdf

File tree

9 files changed

+928
-15
lines changed

9 files changed

+928
-15
lines changed
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
# `@d-zero/dom-scanner`
2+
3+
指定ディレクトリ内のHTMLファイルとPugファイルから特定のCSSセレクタにマッチする要素を検索して報告するCLIツールです。
4+
5+
## 使い方
6+
7+
### 基本的な使い方
8+
9+
```sh
10+
npx @d-zero/dom-scanner <selector> [options]
11+
```
12+
13+
**引数**
14+
15+
- `selector` - CSSセレクタ(必須)
16+
17+
**オプション**
18+
19+
- `-d, --dir, --directory <directory>` - 検索対象のディレクトリパス(デフォルト: 現在のディレクトリ)
20+
- `--ext, --extension <extensions>` - 検索対象の拡張子(カンマ区切り、デフォルト: `html`
21+
- 例: `--ext html,pug` でHTMLとPugファイルの両方を検索
22+
- `-p, --processor <processor>` - 使用するプロセッサーを明示的に指定(`html` または `pug`
23+
- 例: `--processor pug` で全てのファイルをPugプロセッサーで処理
24+
- 拡張子ごとのデフォルトプロセッサー: `html``html`, `pug``pug`
25+
- `-x, --exclude-dirs <dirs>` - 除外するディレクトリ名(カンマ区切り)
26+
- 例: `--exclude-dirs node_modules,dist` で特定のディレクトリを除外
27+
- `--verbose` - 詳細なログを表示
28+
- `--ignore <pattern>` - 無視するファイルパターン(複数指定可能)
29+
30+
### 使用例
31+
32+
```sh
33+
# 現在のディレクトリでHTMLファイルのみを検索(デフォルト)
34+
npx @d-zero/dom-scanner "button"
35+
36+
# 指定ディレクトリで検索
37+
npx @d-zero/dom-scanner "button" --dir ./src
38+
39+
# HTMLとPugファイルの両方を検索
40+
npx @d-zero/dom-scanner "button" --dir ./src --ext html,pug
41+
42+
# HTMLファイルをPugプロセッサーで処理(極端な例)
43+
npx @d-zero/dom-scanner "button" --dir ./src --ext html --processor pug
44+
45+
# 除外ディレクトリをカスタマイズ
46+
npx @d-zero/dom-scanner "button" --exclude-dirs node_modules,dist
47+
```
48+
49+
## API
50+
51+
このパッケージはAPIとしても使用できます。
52+
53+
### 基本的な使い方
54+
55+
```typescript
56+
import { scanDirectory } from '@d-zero/dom-scanner';
57+
58+
const results = await scanDirectory('./src', 'button', {
59+
extensions: ['html', 'pug'],
60+
});
61+
62+
for (const result of results) {
63+
console.log(`${result.filePath}: ${result.count}件`);
64+
}
65+
```
66+
67+
## 動作環境
68+
69+
- Node.js 20.11以降
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
{
2+
"name": "@d-zero/dom-scanner",
3+
"version": "1.0.0",
4+
"description": "Scan HTML and Pug files in a directory to find elements matching a CSS selector",
5+
"author": "D-ZERO",
6+
"license": "MIT",
7+
"publishConfig": {
8+
"access": "public"
9+
},
10+
"type": "module",
11+
"main": "./dist/index.js",
12+
"types": "./dist/index.d.ts",
13+
"exports": {
14+
".": {
15+
"import": "./dist/index.js",
16+
"types": "./dist/index.d.ts"
17+
}
18+
},
19+
"bin": "./dist/cli.js",
20+
"files": [
21+
"dist"
22+
],
23+
"scripts": {
24+
"build": "tsc",
25+
"watch": "tsc --watch",
26+
"clean": "tsc --build --clean"
27+
},
28+
"dependencies": {
29+
"@d-zero/cli-core": "1.2.5",
30+
"@d-zero/shared": "0.16.0",
31+
"cheerio": "^1.0.0",
32+
"pug": "^3.0.3"
33+
},
34+
"devDependencies": {
35+
"@types/node": "24.10.1"
36+
}
37+
}
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
#!/usr/bin/env node
2+
3+
import type { ProcessorType } from './types.js';
4+
import type { BaseCLIOptions } from '@d-zero/cli-core';
5+
6+
import { cwd } from 'node:process';
7+
8+
import { createCLI, parseCommonOptions, parseList } from '@d-zero/cli-core';
9+
10+
import { scanDirectory } from './scanner.js';
11+
12+
interface DomScannerCLIOptions extends BaseCLIOptions {
13+
dir?: string;
14+
directory?: string;
15+
ext?: string;
16+
extension?: string;
17+
processor?: string;
18+
ignore?: string | string[];
19+
'exclude-dirs'?: string;
20+
excludeDirs?: string;
21+
}
22+
23+
const { options: cliOptions, args } = createCLI<DomScannerCLIOptions>({
24+
aliases: {
25+
d: 'dir',
26+
D: 'directory',
27+
e: 'ext',
28+
E: 'extension',
29+
p: 'processor',
30+
i: 'ignore',
31+
x: 'exclude-dirs',
32+
v: 'verbose',
33+
},
34+
usage: [
35+
'Usage: dom-scanner <selector> [options]',
36+
'',
37+
'Arguments:',
38+
'\t<selector> CSS selector (required)',
39+
'',
40+
'Options:',
41+
'\t-d, --dir <directory> Directory to scan (default: current directory)',
42+
'\t-D, --directory Alias for --dir',
43+
'\t-e, --ext <extensions> File extensions to search (comma-separated, default: html)',
44+
'\t-E, --extension Alias for --ext',
45+
'\t-p, --processor <proc> Processor to use: html or pug (default: auto-detect by extension)',
46+
'\t-i, --ignore <pattern> Ignore file patterns (can be specified multiple times)',
47+
'\t-x, --exclude-dirs <dirs> Exclude directories (comma-separated)',
48+
'\t-v, --verbose Enable verbose logging',
49+
'',
50+
'Examples:',
51+
'\tdom-scanner "button"',
52+
'\tdom-scanner "button" --dir ./src',
53+
'\tdom-scanner "button" --ext html,pug',
54+
'\tdom-scanner "button" --dir ./src --ext html --processor pug',
55+
'\tdom-scanner "button" --exclude-dirs node_modules,dist',
56+
],
57+
parseArgs: (cli) => ({
58+
...parseCommonOptions(cli),
59+
dir: cli.dir ?? cli.directory,
60+
ext: cli.ext ?? cli.extension,
61+
processor: cli.processor,
62+
ignore: cli.ignore,
63+
'exclude-dirs': cli['exclude-dirs'] ?? cli.excludeDirs,
64+
}),
65+
validateArgs: (_options, cli) => {
66+
return cli._.length > 0;
67+
},
68+
});
69+
70+
const [selector] = args;
71+
72+
if (!selector) {
73+
process.stderr.write('Error: selector is required\n');
74+
process.exit(1);
75+
}
76+
77+
const directory = cliOptions.dir ?? cwd();
78+
79+
const extensions = cliOptions.ext
80+
? parseList(cliOptions.ext).map((ext) => ext.toLowerCase().trim())
81+
: undefined;
82+
83+
const processor = cliOptions.processor as ProcessorType | undefined;
84+
if (processor && processor !== 'html' && processor !== 'pug') {
85+
process.stderr.write(`Error: processor must be 'html' or 'pug', got '${processor}'\n`);
86+
process.exit(1);
87+
}
88+
89+
const ignorePatterns = cliOptions.ignore
90+
? Array.isArray(cliOptions.ignore)
91+
? cliOptions.ignore
92+
: [cliOptions.ignore]
93+
: undefined;
94+
95+
const excludeDirs = cliOptions['exclude-dirs']
96+
? parseList(cliOptions['exclude-dirs']).map((dir) => dir.trim())
97+
: undefined;
98+
99+
const summary = await scanDirectory(directory, selector, {
100+
extensions,
101+
processor,
102+
verbose: cliOptions.verbose,
103+
ignore: ignorePatterns,
104+
excludeDirs,
105+
});
106+
107+
// 結果を表示
108+
if (summary.results.length === 0) {
109+
process.stdout.write('検索結果: 見つかりませんでした\n');
110+
} else {
111+
process.stdout.write('検索結果:\n');
112+
for (const result of summary.results) {
113+
process.stdout.write(` ${result.filePath}: ${result.count}件\n`);
114+
}
115+
process.stdout.write('\n');
116+
process.stdout.write(
117+
`合計: ${summary.totalFiles}ファイル, ${summary.totalMatches}件の要素が見つかりました\n`,
118+
);
119+
}
120+
121+
process.exit(0);
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
export { scanDirectory } from './scanner.js';
2+
export { parseFile, getDefaultProcessor } from './parser.js';
3+
export type { ScanOptions, ScanResult, ScanSummary, ProcessorType } from './types.js';
4+
export { DEFAULT_PROCESSOR_MAP, DEFAULT_EXTENSIONS } from './types.js';
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
import type { ProcessorType } from './types.js';
2+
3+
import { readFile } from 'node:fs/promises';
4+
import path from 'node:path';
5+
6+
import * as cheerio from 'cheerio';
7+
// @ts-expect-error - pug doesn't have type definitions
8+
import pug from 'pug';
9+
10+
import { DEFAULT_PROCESSOR_MAP } from './types.js';
11+
12+
/**
13+
* ファイル拡張子からデフォルトプロセッサーを取得
14+
* @param filePath
15+
*/
16+
export function getDefaultProcessor(filePath: string): ProcessorType {
17+
const ext = path.extname(filePath).slice(1).toLowerCase();
18+
return DEFAULT_PROCESSOR_MAP[ext] ?? 'html';
19+
}
20+
21+
/**
22+
* HTMLファイルをパースして要素数をカウント
23+
* @param html
24+
* @param selector
25+
*/
26+
function parseHTML(html: string, selector: string): number {
27+
const $ = cheerio.load(html);
28+
return $(selector).length;
29+
}
30+
31+
/**
32+
* PugファイルをコンパイルしてHTMLに変換し、要素数をカウント
33+
* @param pugContent
34+
* @param selector
35+
*/
36+
function parsePug(pugContent: string, selector: string): number {
37+
try {
38+
const compileFunction = pug.compile(pugContent, {
39+
basedir: process.cwd(),
40+
});
41+
const html = compileFunction();
42+
return parseHTML(html, selector);
43+
} catch (error) {
44+
throw new Error(
45+
`Pugコンパイルエラー: ${error instanceof Error ? error.message : String(error)}`,
46+
);
47+
}
48+
}
49+
50+
/**
51+
* ファイルを処理して要素数をカウント
52+
* @param filePath
53+
* @param selector
54+
* @param processor
55+
*/
56+
export async function parseFile(
57+
filePath: string,
58+
selector: string,
59+
processor?: ProcessorType,
60+
): Promise<number> {
61+
const content = await readFile(filePath, 'utf8');
62+
const actualProcessor = processor ?? getDefaultProcessor(filePath);
63+
64+
switch (actualProcessor) {
65+
case 'html': {
66+
return parseHTML(content, selector);
67+
}
68+
case 'pug': {
69+
return parsePug(content, selector);
70+
}
71+
default: {
72+
throw new Error(`Unknown processor: ${actualProcessor}`);
73+
}
74+
}
75+
}

0 commit comments

Comments
 (0)