-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpyproject.toml
More file actions
102 lines (93 loc) · 2.55 KB
/
pyproject.toml
File metadata and controls
102 lines (93 loc) · 2.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
[build-system]
requires = ["setuptools>=68", "setuptools-scm>=8", "wheel"]
build-backend = "setuptools.build_meta"
[project]
name = "websweep"
dynamic = ["version"]
description = "A user-friendly and high-speed web scraping library."
readme = "README.md"
requires-python = ">=3.10"
license = "MIT"
authors = [
{ name = "ODISSEI Social Data Science" },
]
keywords = ["web scraping", "crawler", "data extraction", "research", "duckdb"]
classifiers = [
"Development Status :: 4 - Beta",
"Intended Audience :: Science/Research",
"Intended Audience :: Developers",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Operating System :: OS Independent",
"Topic :: Internet :: WWW/HTTP",
"Topic :: Scientific/Engineering :: Information Analysis",
]
dependencies = [
"aiohttp>=3.8.1",
"beautifulsoup4>=4.12.0",
"duckdb>=1.1.3",
"ftfy>=6.1.1",
"google-re2>=1.1; python_version >= '3.10'",
"html2text>=2020.1.16",
"jellyfish>=0.9.0",
"lxml>=4.9.2",
"moment>=0.12.1",
"multiprocess>=0.70.13",
"numpy>=1.24.2",
"pandas>=1.4.2",
"peewee>=3.14.10",
"protego>=0.2.1",
"regex>=2023.12.25",
"tldextract>=3.3.0",
"tqdm>=4.64.1",
"typer>=0.4.1",
]
[project.urls]
Homepage = "https://github.com/sodascience/websweep"
Documentation = "https://websweep.readthedocs.io/en/latest/"
Repository = "https://github.com/sodascience/websweep"
Issues = "https://github.com/sodascience/websweep/issues"
Releases = "https://github.com/sodascience/websweep/releases"
[project.scripts]
websweep = "websweep.main:app"
[project.optional-dependencies]
docs = [
"nbsphinx>=0.9.5",
"sphinx>=7.4.7",
"sphinx-rtd-theme>=2.0.0",
"sphinxcontrib-napoleon>=0.7",
]
test = [
"pytest>=8.0.0",
]
[tool.setuptools]
package-dir = {"" = "src"}
include-package-data = true
[tool.setuptools.packages.find]
where = ["src"]
[tool.setuptools.package-data]
websweep = ["utils/public_suffix_list.dat"]
[tool.setuptools_scm]
fallback_version = "0.0.0"
local_scheme = "no-local-version"
version_scheme = "no-guess-dev"
tag_regex = "^(?:v)?(?P<version>\\d+\\.\\d+(?:\\.\\d+)?(?:[a-zA-Z0-9\\.\\-+]*)?)$"
[dependency-groups]
test = [
"pytest>=8.0.0",
]
docs = [
"nbsphinx>=0.9.5",
"sphinx>=7.4.7",
"sphinx-rtd-theme>=2.0.0",
"sphinxcontrib-napoleon>=0.7",
]
dev = [
"build>=1.2.2",
"mypy>=1.8.0",
"ruff>=0.7.0",
"twine>=5.1.1",
]