-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrequirements.txt
140 lines (140 loc) · 3.95 KB
/
requirements.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# Basic requirements for main scripts
#
# By default only the cases used in multuiple scripts are installed.
#
# Note:
# - Usage examples:
# 1. Usual
# pip install -r requirements.txt
# 2. Optional (i.e., including most optional):
# perl -pe 's/^#opt#\s*//; s/\s*#.*//;' ~/Mezcla/requirements.txt | egrep -v '^(#|$)' > _opt-requirements.list
# pip install --verbose --upgrade --requirement _opt-requirements.list
# 3. Full (i.e., including all optional), done one-by-one to avoid pip quirk:
# perl -pe 's/^#(opt|full)#\s*//; s/\s*#.*//;' ~/Mezcla/requirements.txt | egrep -v '^(#|$)' > _all-requirements.list
# cat _all-requirements.list | xargs -I '{}' pip install --verbose --upgrade '{}'
# - use `pip freeze` to get current list of package specifications (n.b., >= better than == unless specific version needed)
# - installing textract from PyPI fails https://github.com/deanmalmgren/textract/issues/461
#
# Installation:
# - python -m nltk.downloader punkt averaged_perceptron_tagger stopwords
#
# TODO:
# - check absl module ($ grep -r "absl" .)
# - add support for this to setup.py
#...............................................................................
# Regular requirements
#
HTMLParser
absl_py
beautifulsoup4
bs4
cachetools
cherrypy
clip_interrogator
datasets # Hugging Face (HF) data
#opt# diffusers # Stale Diffusion; TEMP
diskcache
extcolors>=1.0.0
flair
flask
git+https://github.com/tehabstract/textract.git
gradio # UI support (e.g., HF-based apps)
importlib_metadata
langchain
langchain_community
#opt# kenlm # language model support (NOTE: problem with wheel)
## OLD: librosa>=0.10.0
lxml
mako
matplotlib
more_itertools
## bad: nltk
nltk==3.8.1
numpy>=1.18.5
pandas>=1.3.0
pyaml
pyctcdecode # CTC beam search decoder for speech recognition
pyenchant
pysbd
pytest
requests
scikit-learn
scipy
#opt# sentencepiece # BERT tokenization; TEMP
six
stop_words
#opt# tensorrt # NVidia GPU support
transformers # HF models
torch
#opt# torchaudio
webcolors
wheel
## NOTE: temporarily disabled due to stupid docker disk space constraints
## TODO: xgboost
#
#...............................................................................
# Optional requirements
#
# TODO:
# - download Spacy model(s):
# python -m spacy download en_core_web_lg
# - install bash kernel
# python -m bash_kernel.install
# - add '#opt#deprecated#' (e,.g., useful for deprecated functions/modules)
# NOTE:
# - tensorflow and related not installed by default due to size of repo and C compilation overhead.
# - ipython, pylint, etc. used for setting up development environment
#
#opt# SpeechRecognition
#opt# accelerate
#opt# astor
#opt# bash_kernel
#opt# bashlex
#opt# colout
#opt# coverage
#opt# flit
#opt# gensim
#opt# ibm-watson
#opt# ibm_cloud_sdk_core
#opt# ipython
#opt# jupyter
#opt# librosa
#opt# numba
#opt# pocketsphinx
#opt# pydantic
#opt# pylint
#opt# pyyaml
#opt# scispacy
#opt# selenium
#opt# Sphinx
#
## OLD: #opt# spacy>=3.0.0
## TODO:
## note: See https://github.com/explosion/spacy-models/releases/tag/en_core_web_md-3.7.0
spacy==3.7.0
en_core_web_md@https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.7.0/en_core_web_md-3.7.0.tar.gz
#
## See https://github.com/explosion/spacy-models/releases/tag/en_core_web_md-3.2.0
## OLD
## spacy==3.2.6
## en_core_web_md@https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.2.0/en_core_web_md-3.2.0.tar.gz
## typing_extensions==4.7.0
typing_extensions>=4.7.0
#
#opt# textract
#opt# youtube_transcript_api
#opt# vaderSentiment
#...............................................................................
# "Fully optional" requirements
# Note: these generally involve large or long installations.
#
#full# bert==2.2.0
#full# albert==1.3.1
#full# bert-tensorflow
#full# keras
#full# sacremoses
#full# tensorflow
#full# tensorflow_hub
#full# tensorrt
types-six
astor