From e5e48a747931608aeec175e17bbe5d5563422a62 Mon Sep 17 00:00:00 2001 From: "s.kazemi" Date: Sat, 23 Aug 2025 12:00:52 +0330 Subject: [PATCH] starrocks implementation --- modules/starrocks/example_basic.py | 90 ++++++++++++++ .../testcontainers/starrocks/__init__.py | 111 ++++++++++++++++++ modules/starrocks/tests/test_starrocks.py | 29 +++++ poetry.lock | 13 +- pyproject.toml | 2 + 5 files changed, 239 insertions(+), 6 deletions(-) create mode 100644 modules/starrocks/example_basic.py create mode 100644 modules/starrocks/testcontainers/starrocks/__init__.py create mode 100644 modules/starrocks/tests/test_starrocks.py diff --git a/modules/starrocks/example_basic.py b/modules/starrocks/example_basic.py new file mode 100644 index 00000000..da0ed377 --- /dev/null +++ b/modules/starrocks/example_basic.py @@ -0,0 +1,90 @@ +import pandas as pd +import sqlalchemy +from sqlalchemy import text + +from testcontainers.starrocks import StarRocksContainer + +from datetime import datetime + +def basic_example(): + with StarRocksContainer() as starrocks: + # Get connection URL + connection_url = starrocks.get_connection_url() + + # Create SQLAlchemy engine + engine = sqlalchemy.create_engine(connection_url) + print("Connected to StarRocks") + + # Create a test table + create_table_sql = """ + CREATE TABLE IF NOT EXISTS weatherdata ( + DATE DATETIME, + NAME STRING, + Temperature STRING + ) + ENGINE=olap + DUPLICATE KEY(DATE) + DISTRIBUTED BY HASH(DATE) BUCKETS 10; + """ + + with engine.begin() as connection: + connection.execute(text(create_table_sql)) + print("Created test table") + + # Insert test data + test_data = test_data = [ + {"date": datetime(2025, 1, 1, 10, 0), "name": "New York", "temperature": "25°C"}, + {"date": datetime(2025, 1, 2, 10, 0), "name": "London", "temperature": "15°C"}, + {"date": datetime(2025, 1, 3, 10, 0), "name": "Tokyo", "temperature": "20°C"}, + {"date": datetime(2025, 1, 4, 10, 0), "name": "Sydney", "temperature": "30°C"}, + {"date": datetime(2025, 1, 5, 10, 0), "name": "Paris", "temperature": "18°C"}, + {"date": datetime(2025, 1, 6, 10, 0), "name": "Berlin", "temperature": "16°C"}, + {"date": datetime(2025, 1, 7, 10, 0), "name": "Moscow", "temperature": "5°C"}, + {"date": datetime(2025, 1, 8, 10, 0), "name": "Dubai", "temperature": "35°C"}, + {"date": datetime(2025, 1, 9, 10, 0), "name": "Singapore", "temperature": "28°C"}, + {"date": datetime(2025, 1, 10, 10, 0), "name": "Toronto", "temperature": "10°C"}, + ] + + with engine.begin() as connection: + for data in test_data: + connection.execute(text("INSERT INTO weatherdata (DATE, NAME, Temperature) VALUES (:date, :name, :temperature)"), data) + print("Inserted test data") + + # Query data + with engine.connect() as connection: + try: + result = connection.execute(text("SELECT * FROM weatherdata ORDER BY DATE")) + rows = result.fetchall() + + print("\nQuery results:") + for row in rows: + print(f"Date: {row[0]}, City: {row[1]}, Temperature: {row[2]}") + except sqlalchemy.exc.ProgrammingError as e: + print(f"Error querying data: {e}") + + # Execute a more complex query + with engine.connect() as connection: + try: + result = connection.execute( + text(""" + SELECT + NAME, + AVG(CAST(REGEXP_REPLACE(Temperature, '[^0-9.]', '') AS FLOAT)) as avg_temperature, + COUNT(*) as count, + MIN(DATE) as first_date, + MAX(DATE) as last_date + FROM weatherdata + GROUP BY NAME + ORDER BY avg_temperature DESC + """) + ) + + print("\nAggregation results:") + for row in result: + print(f"City: {row[0]}, Avg Temperature: {row[1]:.2f}°C, Count: {row[2]}, First: {row[3]}, Last: {row[4]}") + except sqlalchemy.exc.ProgrammingError as e: + print(f"Error executing query: {e}") + + +if __name__ == "__main__": + basic_example() diff --git a/modules/starrocks/testcontainers/starrocks/__init__.py b/modules/starrocks/testcontainers/starrocks/__init__.py new file mode 100644 index 00000000..7004022f --- /dev/null +++ b/modules/starrocks/testcontainers/starrocks/__init__.py @@ -0,0 +1,111 @@ +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import os +import re +import string +from typing import Optional + +from testcontainers.core.generic import DbContainer +from testcontainers.core.utils import raise_for_deprecated_parameter +from testcontainers.core.waiting_utils import wait_container_is_ready, wait_for_logs + +_UNSET = object() + + +class StarRocksContainer(DbContainer): + """ + StarRocks database container. + + To get a URL without a driver, pass in :code:`driver=None`. + + Example: + + The example spins up a StarRocks database and connects to it using the :code:`pymysql` + driver. + + .. doctest:: + + >>> from testcontainers.starrocks import StarRocksContainer + >>> import sqlalchemy + + >>> with StarRocksContainer("starrocks/allin1-ubuntu:3.5.3") as starrocks: + ... engine = sqlalchemy.create_engine(starrocks.get_connection_url()) + ... with engine.begin() as connection: + ... result = connection.execute(sqlalchemy.text("select version()")) + ... version, = result.fetchone() + >>> version + 'StarRocks...' + """ + + def __init__( + self, + image: str = "repo.smartech.ir/starrocks/allin1-ubuntu:3.5.3", + dbname: string = "test", + port: int = 9030, + http_port: int = 8030, + rpc_port: int = 9020, + edit_log_port:int = 9010, + heartbeat_port:int = 9050, + username: Optional[str] = None, + password: Optional[str] = None, + driver: Optional[str] = "pymysql", + **kwargs, + ) -> None: + raise_for_deprecated_parameter(kwargs, "user", "username") + super().__init__(image=image, **kwargs) + self.dbname: str = dbname + self.username: str = username or os.environ.get("STARROCKS_USER", "root") + self.password: str = password or os.environ.get("STARROCKS_PASSWORD", "") + self.port = port + self.http_port = http_port + self.rpc_port = rpc_port + self.edit_log_port = edit_log_port + self.heartbeat_port = heartbeat_port + self.driver = f"+{driver}" if driver else "" + + self.with_exposed_ports(self.port, self.http_port, self.rpc_port, self.edit_log_port, self.heartbeat_port) + + def _configure(self) -> None: + self.with_env("STARROCKS_NODE_ROLE", "fe") + self.with_env("FE_QUERY_PORT", str(self.port)) + self.with_env("FE_HTTP_PORT", str(self.http_port)) + self.with_env("FE_RPC_PORT", str(self.rpc_port)) + self.with_env("FE_EDIT_LOG_PORT", str(self.edit_log_port)) + self.with_env("STARROCKS_ROOT_PASSWORD", self.password) + + @wait_container_is_ready() + def _connect(self) -> None: + wait_for_logs(self, + re.compile(".*Enjoy the journey to StarRocks blazing-fast lake-house engine!.*", flags=re.DOTALL | re.MULTILINE).search,) + + # Create The Default Database If It Does Not Exist + command = f'mysql -P 9030 -h 127.0.0.1 -u root -e "CREATE DATABASE IF NOT EXISTS {self.dbname}"' + self.exec(command) + + def get_connection_url(self, host: Optional[str] = None, driver: Optional[str] = _UNSET) -> str: + """Get a DB connection URL to connect to the StarRocks DB. + + If a driver is set in the constructor (defaults to pymysql), the URL will contain the + driver. The optional driver argument to :code:`get_connection_url` overwrites the constructor + set value. Pass :code:`driver=None` to get URLs without a driver. + """ + driver_str = "" if driver is None else self.driver if driver is _UNSET else f"+{driver}" + + return super()._create_connection_url( + dialect=f"mysql{driver_str}", + username=self.username, + password=self.password, + dbname=self.dbname, + host=host, + port=self.port, + ) diff --git a/modules/starrocks/tests/test_starrocks.py b/modules/starrocks/tests/test_starrocks.py new file mode 100644 index 00000000..77d95974 --- /dev/null +++ b/modules/starrocks/tests/test_starrocks.py @@ -0,0 +1,29 @@ +from pathlib import Path + +import requests + +import pytest +import sqlalchemy + +from testcontainers.starrocks import StarRocksContainer + + +def test_docker_run_starrocks(): + starrocks_container = StarRocksContainer() + with starrocks_container as starrocks: + host = starrocks.get_container_host_ip() + port = starrocks.get_exposed_port(8030) + + response = requests.get(f'http://{host}:{port}/api/health') + + assert response.status_code == 200 + +@pytest.mark.inside_docker_check +def test_docker_run_starrocks_with_sqlalchemy(): + starrocks_container = StarRocksContainer() + with starrocks_container as starrocks: + engine = sqlalchemy.create_engine(starrocks.get_connection_url()) + with engine.begin() as connection: + result = connection.execute(sqlalchemy.text("select version()")) + for row in result: + assert row[0].lower().startswith("8.0.33") diff --git a/poetry.lock b/poetry.lock index 67c4abe3..e66ee336 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.1.4 and should not be changed by hand. [[package]] name = "aiohappyeyeballs" @@ -756,7 +756,7 @@ files = [ {file = "cffi-1.16.0-cp39-cp39-win_amd64.whl", hash = "sha256:3686dffb02459559c74dd3d81748269ffb0eb027c39a6fc99502de37d501faa8"}, {file = "cffi-1.16.0.tar.gz", hash = "sha256:bcb3ef43e58665bbda2fb198698fcae6776483e0c4a631aa5647806c25e02cc0"}, ] -markers = {main = "((extra == \"azurite\" or extra == \"keycloak\" or extra == \"mysql\" or extra == \"oracle\" or extra == \"oracle-free\" or extra == \"weaviate\" or extra == \"mailpit\" or extra == \"sftp\") and platform_python_implementation != \"PyPy\" or extra == \"minio\" or os_name == \"nt\" and implementation_name != \"pypy\" and extra == \"selenium\")"} +markers = {main = "((extra == \"azurite\" or extra == \"keycloak\" or extra == \"mysql\" or extra == \"starrocks\" or extra == \"oracle\" or extra == \"oracle-free\" or extra == \"weaviate\" or extra == \"mailpit\" or extra == \"sftp\") and platform_python_implementation != \"PyPy\" or extra == \"minio\" or os_name == \"nt\" and implementation_name != \"pypy\" and extra == \"selenium\")"} [package.dependencies] pycparser = "*" @@ -1202,7 +1202,7 @@ files = [ {file = "cryptography-42.0.5-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:37dd623507659e08be98eec89323469e8c7b4c1407c85112634ae3dbdb926fdd"}, {file = "cryptography-42.0.5.tar.gz", hash = "sha256:6fe07eec95dfd477eb9530aef5bead34fec819b3aaf6c5bd6d20565da607bfe1"}, ] -markers = {main = "extra == \"azurite\" or extra == \"keycloak\" or extra == \"mysql\" or extra == \"oracle\" or extra == \"oracle-free\" or extra == \"weaviate\" or extra == \"mailpit\" or extra == \"sftp\""} +markers = {main = "extra == \"azurite\" or extra == \"keycloak\" or extra == \"mysql\" or extra == \"starrocks\" or extra == \"oracle\" or extra == \"oracle-free\" or extra == \"weaviate\" or extra == \"mailpit\" or extra == \"sftp\""} [package.dependencies] cffi = {version = ">=1.12", markers = "platform_python_implementation != \"PyPy\""} @@ -4321,7 +4321,7 @@ files = [ {file = "pycparser-2.21-py2.py3-none-any.whl", hash = "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9"}, {file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"}, ] -markers = {main = "((extra == \"azurite\" or extra == \"keycloak\" or extra == \"mysql\" or extra == \"oracle\" or extra == \"oracle-free\" or extra == \"weaviate\" or extra == \"mailpit\" or extra == \"sftp\") and platform_python_implementation != \"PyPy\" or extra == \"minio\" or os_name == \"nt\" and implementation_name != \"pypy\" and extra == \"selenium\")"} +markers = {main = "((extra == \"azurite\" or extra == \"keycloak\" or extra == \"mysql\" or extra == \"starrocks\" or extra == \"oracle\" or extra == \"oracle-free\" or extra == \"weaviate\" or extra == \"mailpit\" or extra == \"sftp\") and platform_python_implementation != \"PyPy\" or extra == \"minio\" or os_name == \"nt\" and implementation_name != \"pypy\" and extra == \"selenium\")"} [package.source] type = "legacy" @@ -4777,7 +4777,7 @@ description = "Pure Python MySQL Driver" optional = true python-versions = ">=3.7" groups = ["main"] -markers = "extra == \"mysql\"" +markers = "extra == \"mysql\" or extra == \"starrocks\"" files = [ {file = "PyMySQL-1.1.0-py3-none-any.whl", hash = "sha256:8969ec6d763c856f7073c4c64662882675702efcb114b4bcbb955aea3a069fa7"}, {file = "PyMySQL-1.1.0.tar.gz", hash = "sha256:4f13a7df8bf36a51e81dd9f3605fede45a4878fe02f9236349fd82a3f0612f96"}, @@ -6812,6 +6812,7 @@ registry = ["bcrypt"] scylla = ["cassandra-driver"] selenium = ["selenium"] sftp = ["cryptography"] +starrocks = ["pymysql", "sqlalchemy"] test-module-import = ["httpx"] trino = ["trino"] vault = [] @@ -6820,4 +6821,4 @@ weaviate = ["weaviate-client"] [metadata] lock-version = "2.1" python-versions = ">=3.9,<4.0" -content-hash = "495578a8d383aa0bf5496c6ec2db38e81bb36b30c5cd4b5fdd2d186b4a74b3f1" +content-hash = "e135d5a2ab8a00381056be7259209cc9095a18253881de972698592c2c8d694a" diff --git a/pyproject.toml b/pyproject.toml index 5cc32197..4378b0f6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -72,6 +72,7 @@ packages = [ { include = "testcontainers", from = "modules/trino" }, { include = "testcontainers", from = "modules/vault" }, { include = "testcontainers", from = "modules/weaviate" }, + { include = "testcontainers", from = "modules/starrocks" }, ] [tool.poetry.urls] @@ -171,6 +172,7 @@ vault = [] weaviate = ["weaviate-client"] chroma = ["chromadb-client"] trino = ["trino"] +starrocks = ["sqlalchemy", "pymysql"] [tool.poetry.group.dev.dependencies] mypy = "1.11.2"