Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
173 commits
Select commit Hold shift + click to select a range
2b29858
Update gitignore
D0ugins Jul 28, 2022
69eeeda
Fix closing of tags at the end of cards
D0ugins Feb 20, 2022
acb920b
Switch to 64 bit file ids
D0ugins Feb 20, 2022
94c21c9
Improve error messages
D0ugins Jun 1, 2022
522891a
Containerize app
D0ugins Jun 26, 2022
7445516
Add database migrations to git
D0ugins Jun 26, 2022
f5076d7
Update migrations
D0ugins Jul 28, 2022
7aba9ec
Sync to elasticsearch with pgsync
D0ugins Jul 3, 2022
9c03bae
Add evidence set id to evidence index
D0ugins Jul 28, 2022
37612cc
Clean up dependencies
D0ugins Aug 6, 2022
2465389
Add typescript client for caselist api
D0ugins Aug 10, 2022
f2b720a
Switch to generating api client from local copy of schema
D0ugins Aug 13, 2022
5e61bc9
Update school model in caselist api
D0ugins Aug 13, 2022
0a0e394
Fix incorrect route name and description
D0ugins Aug 13, 2022
3f44e07
Ratelimit requests to caselist api
D0ugins Aug 13, 2022
edaf50c
Fix round api schema
D0ugins Aug 14, 2022
52a7357
Fix team api schema
D0ugins Aug 14, 2022
f609be1
Fix caselist api schema
D0ugins Aug 14, 2022
66ab502
Add caselist data to prisma schema
D0ugins Aug 14, 2022
ad4c980
Add required properties to caselist api schema
D0ugins Aug 14, 2022
87e7665
Fix archived field
D0ugins Aug 15, 2022
09df0bb
Add downloading of round data from caselist
D0ugins Aug 15, 2022
650ec64
Add opensource file to round schema
D0ugins Aug 16, 2022
154be3a
Fix download api schema
D0ugins Aug 17, 2022
75f43c2
Add downloading file from caselist
D0ugins Aug 17, 2022
88f8aae
Fix openev file api schema
D0ugins Aug 17, 2022
939954d
Add openev downloading
D0ugins Aug 17, 2022
3d41ae8
Fix tags prisma schema
D0ugins Aug 17, 2022
7f48345
Add tags to file downloaded from openev
D0ugins Aug 17, 2022
f6f306c
Pass entire parent object in school and team loaded events
D0ugins Aug 17, 2022
618b538
Add tags to opensource files
D0ugins Aug 17, 2022
fbeb1bc
Remove evidenceSet entity
D0ugins Aug 17, 2022
8623950
Prevent concurrent addFile actions
D0ugins Aug 17, 2022
8d9f0b3
Improve tags for openev files
D0ugins Aug 17, 2022
3955f25
Skip empty cites
D0ugins Aug 17, 2022
c4f7c6a
Fix occasional rate limit fail for downloading files
D0ugins Aug 17, 2022
9f1b450
Remove evidenceSet from pgsync schema
D0ugins Aug 18, 2022
e978a71
Fix trimming of text while parsing
D0ugins Jul 16, 2022
f98845c
Update @types/node
D0ugins Jul 17, 2022
184b28f
API Setup
D0ugins Jul 15, 2022
807010b
Basic evidence query
D0ugins Jul 16, 2022
dac1954
Add file and evidenceSet to evidence resolver
D0ugins Jul 16, 2022
d2ad165
Add file query to api
D0ugins Aug 18, 2022
e6dc07c
Factor out base resolver logic
D0ugins Jul 16, 2022
7307030
Add resolvers for caselist data
D0ugins Jul 16, 2022
f8faa6a
Add evidenceBucket resolver
D0ugins Aug 18, 2022
0d3b4c3
Add basic search to api
D0ugins Jul 17, 2022
ffc77ac
Fix search results being returned in the wrong order
D0ugins Aug 18, 2022
9f1712f
Fix bug in pgsync
D0ugins Aug 19, 2022
16a4c59
Add evidence bucket count and tags to elastic index
D0ugins Aug 19, 2022
e257bfb
Boost scores for commonly read cards
D0ugins Jul 24, 2022
e37171e
Add customization of fields being searched
D0ugins Aug 19, 2022
be74eec
Fix name of tag table
D0ugins Aug 19, 2022
9d9cb30
Add evidence resolver
D0ugins Aug 19, 2022
4b12b4a
Add filtering search by tags
D0ugins Aug 19, 2022
e02668d
Add customization of weighing of relevance and number of duplicates
D0ugins Aug 19, 2022
eea0a55
Add specifying number of results wanted from search
D0ugins Aug 19, 2022
39c1bee
Improve organization of query code
D0ugins Aug 19, 2022
9bcf9bf
Squash migrations
D0ugins Aug 19, 2022
8cb6bc2
Fix error handling in addFile action
D0ugins Aug 19, 2022
3da9c99
Fix file parsing erorr handling
D0ugins Aug 20, 2022
d86c77d
Fix issue searching documents with no evidence bucket
D0ugins Aug 20, 2022
3ebbbb9
Add unique constraints to caselist data
D0ugins Sep 25, 2022
113fe4a
Add querying of caselist data by name
D0ugins Sep 25, 2022
c960dd0
Add querying of files by tag
D0ugins Sep 25, 2022
2c942c1
Remove unnecessary redis imports
D0ugins Sep 28, 2022
9e4315c
Clean up resolver code
D0ugins Sep 28, 2022
c7ba60a
Add round field to file in api
D0ugins Sep 28, 2022
46fea61
Switch to using fluent api for relation queries
D0ugins Sep 28, 2022
254d140
Add basic pagination
D0ugins Sep 28, 2022
c76dbd1
Add skip parameter to search
D0ugins Sep 29, 2022
68cb153
Add pagination to tagFiles query
D0ugins Sep 29, 2022
e061197
Make pagination optional by default
D0ugins Sep 29, 2022
4dfec0b
Fix typo in compose file
D0ugins Oct 1, 2022
2bd6359
Add basic authorization
D0ugins Oct 2, 2022
3afdc12
Add indexes to one to many relations
D0ugins Oct 2, 2022
2b7eddc
Add query complexity calculation
D0ugins Oct 2, 2022
276f80d
Move api constants to constants folder
D0ugins Oct 3, 2022
665bdda
Move query complexity calculation to seperate file
D0ugins Oct 3, 2022
a4f6a03
Add complexity based rate limiting
D0ugins Oct 3, 2022
459072e
Add user info query
D0ugins Oct 3, 2022
a1f22db
Fix formatting inconsistency in models
D0ugins Oct 3, 2022
d002432
Fix name of rateLimit.ts
D0ugins Oct 3, 2022
b199c6c
Add basic admin authentication
D0ugins Oct 5, 2022
15e7a77
Change folder structure of models
D0ugins Oct 5, 2022
10ee842
Seperate relation fields in caselist models
D0ugins Oct 6, 2022
fd1370e
Add name to action queues
D0ugins Oct 7, 2022
93c1d36
Add models for task queues
D0ugins Oct 10, 2022
c59a8cf
Add ipc capabilites to task runner
D0ugins Oct 10, 2022
11bd816
Add query for task info
D0ugins Oct 10, 2022
61fc989
Switch to explicit type imports
D0ugins Oct 12, 2022
fe48a2a
Add types to queue ipc requests
D0ugins Oct 12, 2022
96f6e72
Switch to sending object in ipc requests
D0ugins Oct 12, 2022
bbb1659
Switch to field resolvers for queue api
D0ugins Oct 13, 2022
4952c5b
Add pagination to queue api
D0ugins Oct 13, 2022
1a3350f
Add mutation to load tasks into action queues
D0ugins Oct 16, 2022
0ef10fe
Fix types in load mutation
D0ugins Oct 17, 2022
926b17d
Improve error handling in ipc requests
D0ugins Oct 18, 2022
8c3861f
Add more options to queue loaders
D0ugins Oct 18, 2022
5492c02
Load more recent years first
D0ugins Oct 18, 2022
3d836ea
Update which fields are optional on rounds
D0ugins Oct 19, 2022
a2865bc
Add debugging
D0ugins Oct 23, 2022
5266719
Switch to pipeline for file parsing
D0ugins Oct 23, 2022
ca9bb73
Fix call stack error
D0ugins Oct 23, 2022
5491d13
Move adding opensource to seperate action
D0ugins Oct 23, 2022
ffeeca9
Add queue for open source additions
D0ugins Oct 23, 2022
6d385a1
Fix name of deduplication action
D0ugins Oct 26, 2022
1be9b79
Update redis client version
D0ugins Oct 31, 2022
bc5f2c6
Add take parameter to dedup queue load
D0ugins Oct 31, 2022
5ad234e
Switch to native redis locks for deduplication
D0ugins Oct 31, 2022
2545442
Add clear queue command to admin api
D0ugins Nov 2, 2022
9399177
Improve dedup queue performance
D0ugins Nov 2, 2022
2757398
Fix evidence match filtering
D0ugins Nov 20, 2022
95a1591
Clean up
D0ugins Dec 1, 2022
1a1b226
Update docker setup
D0ugins Dec 22, 2022
f05629a
Fix error message
D0ugins Dec 22, 2022
ff3ad4f
Change deduplication system
D0ugins Dec 22, 2022
db27a41
Reorder methods
D0ugins Dec 23, 2022
2bd92a8
Fix deletion of empty buckets
D0ugins Dec 23, 2022
7b2e75a
Improve error handling
D0ugins Dec 23, 2022
ef2703c
Fix typo
D0ugins Dec 24, 2022
79395d8
Fix reprocessing of cards
D0ugins Dec 24, 2022
fa06f2b
Watch when creating redis key
D0ugins Dec 24, 2022
6b913d1
Fix bucket set deletions
D0ugins Dec 24, 2022
3adf571
Change BucketSet match criteria
D0ugins Dec 27, 2022
1e63b44
Fix corner case in BucketSet resolution
D0ugins Dec 27, 2022
d008a3d
Improve error handling
D0ugins Dec 27, 2022
a50e3b2
Fix issue with redis locking
D0ugins Dec 27, 2022
c1a8754
Config updates
D0ugins Dec 27, 2022
b43ad9d
Fix eslint
D0ugins Dec 27, 2022
ae36c1e
Actually fix BucketSet resolution corner case
D0ugins Dec 27, 2022
88816d4
Fix infinite loop caused by outdated reads
D0ugins Dec 28, 2022
4d69223
Fix crash when erroring at wrong time
D0ugins Dec 28, 2022
e10e3fe
Handle cards being reprocessed because of failed database writes
D0ugins Dec 30, 2022
e9a4af3
Clean up error handling
D0ugins Dec 30, 2022
6138438
Fix renaming of SubBuckets
D0ugins Dec 30, 2022
ec8630e
Fix redis connection pool
D0ugins Dec 31, 2022
0823bae
Improve peformance of reprocessing cards
D0ugins Dec 31, 2022
4a20f2a
Fix performance issue with shouldMerge function
D0ugins Dec 31, 2022
1bbedc4
Fix double calculation of sentences
D0ugins Jan 2, 2023
639adcf
Ignore text in quotes for matching
D0ugins Jan 2, 2023
f0a4a31
Improve BucketSet matching algorithm
D0ugins Jan 3, 2023
0fa818c
Fix quote regex
D0ugins Jan 3, 2023
775cd7a
Improve sentence splitting
D0ugins Jan 9, 2023
b0b5c61
Improve SHOULD_MERGE function for large buckets
D0ugins Jan 14, 2023
c40c2f6
Logging
D0ugins Jan 15, 2023
a1cfed8
Improve card match filtering
D0ugins Jan 15, 2023
0ec17ef
Performance improvements
D0ugins Jan 15, 2023
f2177bf
Add minimum number of matching sentences
D0ugins Jan 15, 2023
c483e9c
Improve sentence splitting
D0ugins Jan 15, 2023
0ef9736
Performance improvment to resolving bucketSets
D0ugins Jan 15, 2023
1dae7fa
More performance improvment for resolving bucketSets
D0ugins Jan 15, 2023
08f3663
Refactor redis entities
D0ugins Jan 18, 2023
a54ffa4
Add maximum depth for card reprocessing
D0ugins Jan 18, 2023
c687399
Fix depth check in card reprocessing
D0ugins Jan 20, 2023
55ddb3d
Update prisma
D0ugins Jan 20, 2023
6f97bbe
Move adding round to seperate action
D0ugins Jan 19, 2023
eb42714
Implment requests to caselist api with different priority
D0ugins Jan 19, 2023
2ccf3d2
Set up update processing queue
D0ugins Jan 20, 2023
d907a39
Fix caselist caselist api schema for getRecent
D0ugins Jan 20, 2023
e2bc8a3
Fix debugging for api
D0ugins Jan 20, 2023
9225dc9
Update caselist api schema with date fields
D0ugins Jan 20, 2023
065bb36
Add caselistUpdatedAt field to Round schema
D0ugins Jan 20, 2023
ce0be9c
Skip adding rounds that already have the most updated version
D0ugins Jan 20, 2023
dc21487
Load recently changed rounds
D0ugins Jan 20, 2023
57f59b1
Update pgsync
D0ugins Jan 21, 2023
04b54bb
Fix pgsync checkpoints
D0ugins Jan 22, 2023
98b55f9
Move file by tags input to seperate file
D0ugins Jan 22, 2023
781fc74
Improve order in schema.json
D0ugins Jan 22, 2023
f0005f2
Add cites to elastic index
D0ugins Jan 22, 2023
823c70b
Implement cite search
D0ugins Jan 22, 2023
2145b95
Implement round report search
D0ugins Jan 22, 2023
1530c37
Remove complexity limit for admins
D0ugins Jan 22, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions .env.sample
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
API_PREFIX=/v1

SOLR_PORT=<MY_SOLR_PORT>
SOLR_CORE=<MY_SOLR_CORE>
SOLR_HOST=<MY_SOLR_HOST>

MONGO_DEV_CONN_URL=mongodb://<MY_MONGO_INSTANCE>
MONGO_LOCAL_CONN_URL=mongodb://<MY_MONGO_INSTANCE>
MONGO_DB_NAME=<MY_DB_NAME>
DATABASE_URL=postgresql://username:password@localhost:5432/debate-cards
DOCUMENT_PATH=./documents
POSTGRES_USER=username
POSTGRES_PASSWORD=password
POSTGRES_DB=dbname
ELASTIC_PASSWORD=password
CASELIST_TOKEN=send a POST request to https://api.opencaselist.com/v1/login with tabroom username and password in body
JWT_SECRET=secret
ADMIN_PASSWORD=password
15 changes: 2 additions & 13 deletions .eslintrc
Original file line number Diff line number Diff line change
Expand Up @@ -28,18 +28,7 @@
}
},
"rules": {
// "import/extensions": [
// "error",
// "ignorePackages",
// {
// "js": "never",
// "mjs": "never",
// "jsx": "never",
// "ts": "never",
// "tsx": "never"
// }
// ],
// "no-console": 0,
"@typescript-eslint/no-explicit-any": "off"
}
},
"ignorePatterns": ["src/constants/caselist/**/*"]
}
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ yarn-error.log
.env
.DS_Store
prisma/dev.*
prisma/migrations
.yarn/*
!.yarn/cache
!.yarn/patches
Expand All @@ -15,4 +14,5 @@ prisma/migrations
!.yarn/sdks
!.yarn/versions
*.docx
*.html
*.html
tmp
14 changes: 14 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,20 @@
{
"version": "0.2.0",
"configurations": [
{
"type": "node",
"request": "attach",
"name": "Debug Tasks",
"remoteRoot": "/app",
"port": 9229
},
{
"type": "node",
"request": "attach",
"name": "Debug API",
"remoteRoot": "/app",
"port": 9230
},
{
"type": "node",
"request": "launch",
Expand Down
16 changes: 16 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Designed for development, for production you would copy the built files from this container
FROM node:19.3.0

WORKDIR /app

COPY package*.json .
RUN yarn

COPY prisma prisma
COPY .env .
RUN npx prisma generate

COPY tsconfig.json .
COPY src src

RUN yarn run build
106 changes: 106 additions & 0 deletions compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
services:
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch:8.2.3
env_file: .env
environment:
- xpack.security.enabled=false # Dont need for now, complicates things a lot
- discovery.type=single-node
healthcheck:
test: curl -s http://elasticsearch:9200 > /dev/null || exit 1
ports:
- 9200:9200
volumes:
- elastic:/usr/share/elasticsearch/data
postgres:
image: postgres:14
command: postgres -c wal_level=logical -c max_replication_slots=3 # Options for pgsync
env_file: .env
expose:
- 5432
healthcheck:
test: pg_isready -U postgres
ports:
- 5432:5432
volumes:
- db:/var/lib/postgresql/data
redis:
image: redis:7
command: redis-server --requirepass ${REDIS_PASSWORD}
healthcheck:
test: redis-cli ping
ports:
- 6379:6379
expose:
- 6379
volumes:
- redis:/data
pgsync:
build:
context: .
dockerfile: ./pgsync/Dockerfile
depends_on:
elasticsearch:
condition: service_healthy
postgres:
condition: service_healthy
redis:
condition: service_healthy
migrate:
condition: service_completed_successfully
environment:
- PG_USER=${POSTGRES_USER}
- PG_HOST=postgres
- PG_PASSWORD=${POSTGRES_PASSWORD}
- ELASTICSEARCH_PORT=9200
- ELASTICSEARCH_SCHEME=http
- ELASTICSEARCH_HOST=elasticsearch
- REDIS_HOST=redis
- REDIS_AUTH=${REDIS_PASSWORD}
- CHECKPOINT_PATH=/checkpoint/
- ELASTICSEARCH_TIMEOUT=20
- ELASTICSEARCH_CHUNK_SIZE=250
volumes:
- pgsyncCheckpoint:/checkpoint
tasks:
build: .
command: npm run start:tasks
depends_on:
- postgres
- redis
env_file: .env
ports:
- 9229:9229 # Debugging
profiles:
- tasks # Dont start by default
volumes:
- ${DOCUMENT_PATH:-./documents}:/app/documents
api:
build: .
command: npm run dev:api
depends_on:
- postgres
- elasticsearch
env_file: .env
ports:
- 4000:4000
- 5555:5555 # Prisma studio port
- 9230:9230 # Debugging
expose:
- 4000
volumes:
- ./src:/app/src
migrate:
build:
context: .
dockerfile: ./prisma/Dockerfile
depends_on:
- postgres
volumes:
- ./prisma/migrations:/app/prisma/migrations # Migrations
- ./node_modules/.prisma:/app/node_modules/.prisma # Client code

volumes:
db: {}
elastic: {}
redis: {}
pgsyncCheckpoint: {}
15 changes: 15 additions & 0 deletions openapitools.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"$schema": "./node_modules/@openapitools/openapi-generator-cli/config.schema.json",
"spaces": 2,
"generator-cli": {
"version": "6.0.1",
"generators": {
"caselist": {
"generatorName": "typescript-node",
"inputSpec": "./src/constants/caselistSchema.json",
"output": "#{cwd}/src/constants/caselist",
"skipValidateSpec": true
}
}
}
}
53 changes: 31 additions & 22 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,53 +3,62 @@
"version": "0.0.0",
"private": true,
"scripts": {
"prestart": "npm run build",
"start": "node ./build",
"dev": "ts-node-dev --respawn -r tsconfig-paths/register ./src/",
"build": "tsc --project tsconfig.json && tscpaths -p tsconfig.json -s ./src -o ./build",
"test": "jest"
"prestart": "npx prisma migrate deploy",
"generate": "npx openapi-generator-cli generate",
"start": "npm run tasks & node ./build/modules/api",
"start:tasks": "node --inspect=0.0.0.0:9229 ./build/tasks",
"start:api": "node --inspect=0.0.0.0:9230 ./build/modules/api",
"dev": "ts-node-dev --poll --respawn -r tsconfig-paths/register ./src/",
"dev:api": "ts-node-dev --transpile-only --poll --respawn --inspect=0.0.0.0:9230 -r tsconfig-paths/register ./src/modules/api"
},
"dependencies": {
"@prisma/client": "^3.8.1",
"@elastic/elasticsearch": "^8.2.1",
"@openapitools/openapi-generator-cli": "^2.5.1",
"@prisma/client": "^4.9.0",
"apollo-server-express": "^3.10.2",
"cheerio": "1.0.0-rc.3",
"class-validator": "^0.13.2",
"docx": "^6.0.3",
"dotenv": "^6.2.0",
"express": "^4.18.1",
"graphql": "^15.0.0",
"graphql-fields": "^2.0.3",
"graphql-query-complexity": "^0.12.0",
"htmlparser2": "^7.2.0",
"jsonwebtoken": "^8.5.1",
"limiter": "^2.1.0",
"lodash": "^4.17.15",
"mammoth": "^1.4.19",
"node-pandoc-promise": "^0.0.6",
"p-ratelimit": "^1.0.1",
"redis": "^4.0.4",
"sqlite3": "^5.0.0",
"tmp-promise": "^3.0.2",
"pm2-axon": "^4.0.1",
"prisma": "^4.9.0",
"rate-limiter-flexible": "^2.3.11",
"redis": "^4.3.1",
"reflect-metadata": "^0.1.13",
"request": "^2.88.2",
"type-graphql": "^1.1.1",
"typescript-collections": "^1.3.3",
"unzipper": "^0.10.11"
},
"devDependencies": {
"@types/cheerio": "^0.22.21",
"@types/graphql-fields": "^1.3.4",
"@types/jest": "^26.0.20",
"@types/jsonwebtoken": "^8.5.9",
"@types/lodash": "^4.14.157",
"@types/mongoose": "^5.7.30",
"@types/node": "^14.0.19",
"@types/node": "^18.0.5",
"@types/unzipper": "^0.10.4",
"@typescript-eslint/eslint-plugin": "^3.6.0",
"@typescript-eslint/parser": "^3.6.0",
"babel-node": "^0.0.1-security",
"babel-plugin-module-resolver": "^4.1.0",
"babel-preset-es2015": "^6.24.1",
"babel-register": "^6.26.0",
"eslint": "^7.4.0",
"eslint-config-airbnb": "18.2.0",
"eslint-config-airbnb-base": "^14.2.0",
"eslint-config-airbnb-typescript": "^15.0.0",
"eslint-config-prettier": "^6.11.0",
"eslint-import-resolver-alias": "^1.1.2",
"eslint-plugin-import": "^2.22.0",
"eslint-plugin-prettier": "^3.1.4",
"eslint-plugin-type-graphql": "^1.0.0",
"jest": "^26.6.3",
"prettier": "^2.0.5",
"prisma": "^3.8.1",
"ts-node-dev": "^1.0.0-pre.50",
"ts-node-dev": "^2.0.0",
"tsconfig-paths": "^3.12.0",
"tscpaths": "^0.0.9",
"typescript": "^4.5.2"
}
Expand Down
10 changes: 10 additions & 0 deletions pgsync/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
FROM python:3.11

WORKDIR /app
RUN pip install pgsync==2.4.0
# Current version of pgsync has a bug that causes an error in this file. Fixed on the github so once next version releases this can be removed
COPY ./pgsync/patches/transform.py /usr/local/lib/python3.11/site-packages/pgsync/

COPY ./pgsync/start.sh .
COPY ./pgsync/schema.json .
CMD ["./start.sh"]
Loading