Skip to content

Commit

Permalink
Feature/db (#2)
Browse files Browse the repository at this point in the history
* add db logic

* fix: migration script db connection

* code cleanup

* change relationship type

* fix saving to db

* code cleanup

* code cleanup

* add check duplicate logic

* fix: update listing price after change

* lock python version to 3.12.7

* change black action

* add black config
  • Loading branch information
mevljas authored Oct 12, 2024
1 parent 22dc142 commit f25b974
Show file tree
Hide file tree
Showing 15 changed files with 599 additions and 238 deletions.
7 changes: 4 additions & 3 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.12"]
python-version: ["3.12.7"]
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
Expand All @@ -23,7 +23,8 @@ jobs:
run: |
poetry run pylint $(git ls-files '*.py')
- name: Check files using the black formatter
uses: rickstaa/action-black@v1
uses: psf/black@stable
id: action_black
with:
black_args: ". --check"
options: "--check"
use_pyproject: true
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -238,3 +238,4 @@ fabric.properties

# Android studio 3.1+ serialized cache file
.idea/caches/build_file_checksums.ser
*.sqlite
2 changes: 1 addition & 1 deletion .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion .idea/nepremicnine-discord-bot.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 2 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,22 +38,16 @@ Alternatively you can set it up using Pycharm.
poetry install
```

### Install the [Pytest plugin](https://pypi.org/project/pytest-playwright/):

```bash
pip install pytest-playwright
```

### Install Playwright browsers (chromium, firefox, webkit)

```bash
playwright install
```

### Run database migrations
### Setup the database

```bash
python migrate.py
python setup_db.py
```

## Run the local browser
Expand Down
Empty file added database/__init__.py
Empty file.
166 changes: 166 additions & 0 deletions database/database_manager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
"""
Database manager module.
"""

import threading
from asyncio import current_task
from datetime import datetime

from sqlalchemy import exc, select, Result
from sqlalchemy.ext.asyncio import (
async_sessionmaker,
create_async_engine,
AsyncEngine,
AsyncSession,
async_scoped_session,
)

from database.models import meta, Listing, Price
from logger.logger import logger


class DatabaseManager:
"""
Class for interacting with the database.
"""

def __init__(self, url: str):
self.db_connections = threading.local()
self.url = url

def async_engine(self) -> AsyncEngine:
"""
Returns the async engine.
"""
if not hasattr(self.db_connections, "engine"):
logger.debug("Getting async engine.")
self.db_connections.engine = create_async_engine(self.url)
logger.debug("Creating database engine finished.")
return self.db_connections.engine

def async_session_factory(self) -> async_sessionmaker:
"""
Returns the async session factory.
:return:
"""
logger.debug("Getting async session factory.")
if not hasattr(self.db_connections, "session_factory"):
engine = self.async_engine()
self.db_connections.session_factory = async_sessionmaker(bind=engine)
return self.db_connections.session_factory

def async_scoped_session(self) -> async_scoped_session[AsyncSession]:
"""
Returns the async scoped session.
:return:
"""
logger.debug("Getting async scoped session.")
if not hasattr(self.db_connections, "scoped_session"):
session_factory = self.async_session_factory()
self.db_connections.scoped_session = async_scoped_session(
session_factory, scopefunc=current_task
)
return self.db_connections.scoped_session

async def cleanup(self):
"""
Cleans up the database engine.
:return:
"""
logger.debug("Cleaning database engine.")

await self.db_connections.engine.dispose()
logger.debug("Cleaning database finished.")

async def create_models(self):
"""
Creates all required database tables from the declared models.
"""
logger.debug("Creating ORM modules.")
async with self.async_engine().begin() as conn:
await conn.run_sync(meta.create_all)
logger.debug("Finished creating ORM modules.")

async def add_new_price(
self,
listing_id: int,
current_price: float,
):
"""
Adds a new price to the listing.
"""
logger.debug("Saving new price in the database.")
async with self.async_session_factory()() as session:

listing = session.get(Listing, listing_id)

price = Price(accessed_time=datetime.now(), price=current_price)
session.add(price)
listing.prices.append(price)

await session.flush()
await session.commit()

logger.debug("Price saved.")

async def save_listing(
self,
item_id: str,
data: tuple[str, str | None, str, float, float, int, str | None, str | None],
):
"""
Saved a crawled listing to the db.
"""
logger.debug("Saving new listing %s to the database.", item_id)

_, _, _, current_price, _, _, _, url = data

async with self.async_session_factory()() as session:
try:

listing = Listing(
url=url,
accessed_time=datetime.now(),
nepremicnine_id=item_id,
)

session.add(listing)

price = Price(accessed_time=datetime.now(), price=current_price)
session.add(price)
listing.prices.append(price)

await session.flush()
await session.commit()
logger.debug("New listing saved to the database.")
except exc.SQLAlchemyError as e:
await session.rollback()
logger.warning("Error saving listing to the database with error: %s", e)
# listing: Listing = (
# (
# await session.execute(
# select(Listing).where(Listing.url == listing.url).limit(1)
# )
# )
# .scalars()
# .first()
# )

async def get_listings(self):
"""
Returns all listings.
"""
logger.debug("Getting all listings from the database.")
async with self.async_session_factory()() as session:
# Select all listing and join them with their last price (by date).
stmt = (
select(Listing.nepremicnine_id, Listing.id, Price.price)
.join(Price)
.order_by(Price.accessed_time.desc())
.distinct(Listing.id)
)
result: Result = await session.execute(stmt)
logger.debug("Getting all listings finished.")

# return a dictionary of listings. Use id as the key and price as the value.
return {item[0]: (item[1], item[2]) for item in result.all()}
68 changes: 68 additions & 0 deletions database/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# pylint: disable=too-few-public-methods
"""
This module contains the SQLAlchemy models for the database.
"""

import enum
from typing import List

from sqlalchemy import (
Column,
Integer,
String,
DateTime,
MetaData,
ForeignKey,
)
from sqlalchemy.orm import declarative_base, Mapped, relationship

meta = MetaData()
Base = declarative_base(metadata=meta)


class ListingType(enum.Enum):
"""
Enum for listing type.
Currently not used.
"""

SELLING = 1
RENTING = 2


class PropertyType(enum.Enum):
"""
Enum for property type.
Currently not used.
"""

APARTMENT = 1
HOUSE = 2


class Listing(Base):
"""
A search results table. It stores found apartments and houses.
"""

__tablename__ = "listing"

id: Mapped[int] = Column(Integer, primary_key=True, autoincrement=True)
nepremicnine_id: Mapped[str] = Column(String(50), unique=True)
url: Mapped[str] = Column(String(150), unique=True)
accessed_time = Column(DateTime)
prices: Mapped[List["Price"]] = relationship(back_populates="listing")


class Price(Base):
"""
A table that stores the current and previous prices.
"""

__tablename__ = "history"

id: Mapped[int] = Column(Integer, primary_key=True, autoincrement=True)
price: Mapped[float] = Column(Integer, unique=False)
accessed_time = Column(DateTime)
listing_id: Mapped[int] = Column(ForeignKey("listing.id"))
listing: Mapped["Listing"] = relationship(back_populates="prices")
54 changes: 54 additions & 0 deletions db_setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
"""
Module for setting up the database.
"""

import asyncio
import os

from dotenv import load_dotenv

from database.database_manager import DatabaseManager
from logger.logger import logger


def load_env() -> (str, str, str):
"""
Load ENV variables.
:return: postgres_user, postgres_password, postgres_db
"""
load_dotenv()
postgres_user = os.getenv("POSTGRES_USER")
postgres_password = os.getenv("POSTGRES_PASSWORD")
postgres_db = os.getenv("POSTGRES_DB")
return postgres_user, postgres_password, postgres_db


async def main():
"""
Main function.
:return:
"""
logger.info("DB setup started.")

# Load env variables.
# postgres_user, postgres_password, postgres_db = load_env()

# Delete existing database if it exists.
if os.path.exists("nepremicnine_database.sqlite"):
os.remove("nepremicnine_database.sqlite")

# Setup database manager.
database_manager = DatabaseManager(
url="sqlite+aiosqlite:///nepremicnine_database.sqlite"
)

# Create database tables.
await database_manager.create_models()

# Clean database manager.
await database_manager.cleanup()
logger.info("DB setup finished.")


if __name__ == "__main__":
asyncio.run(main())
6 changes: 3 additions & 3 deletions logger/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,11 @@ def __init__(self, fmt: str):
self.level_to_formatter = {}

def add_color_format(level: int):
color = ColorizedArgsFormatter.level_to_color[level]
color = ColorizedArgsFormatter.level_to_color[level].value
_format = fmt
for fld in ColorizedArgsFormatter.level_fields:
search = r"(%\(" + fld + r"\).*?s)"
_format = re.sub(search, f"{color}\\1{ColorCodes.RESET}", _format)
_format = re.sub(search, f"{color}\\1{ColorCodes.RESET.value}", _format)
formatter = logging.Formatter(_format)
self.level_to_formatter[level] = formatter

Expand Down Expand Up @@ -81,7 +81,7 @@ def rewrite_record(record: logging.LogRecord):
color_index = placeholder_count % len(ColorizedArgsFormatter.arg_colors)
color = ColorizedArgsFormatter.arg_colors[color_index]
msg = msg.replace("_{{", color + "{", 1)
msg = msg.replace("_}}", "}" + ColorCodes.RESET, 1)
msg = msg.replace("_}}", "}" + ColorCodes.RESET.value, 1)
placeholder_count += 1

record.msg = msg.format(*record.args)
Expand Down
Loading

0 comments on commit f25b974

Please sign in to comment.