diff --git a/Dockerfile b/Dockerfile index a639f90..81467cd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,6 +11,8 @@ RUN make FROM ubuntu:22.04 as base +RUN apt-get update && apt-get install -y mysql-client && rm -rf /var/lib/apt/lists/* + WORKDIR /metrics-loader COPY configs /configs COPY --from=builder /metrics-loader/bin/* /usr/local/bin/ diff --git a/build-docker-image.sh b/build-docker-image.sh new file mode 100644 index 0000000..2765a00 --- /dev/null +++ b/build-docker-image.sh @@ -0,0 +1,200 @@ +#!/bin/bash + +# Function to handle tagging and pushing an image +perform_push_operations() { + local image_to_push="$1" + local current_container_engine="$2" + + DEFAULT_REGISTRY_HOST="greptime-registry.cn-hangzhou.cr.aliyuncs.com/tools" + read -p "Please enter the image registry address (default is $DEFAULT_REGISTRY_HOST): " REGISTRY_HOST + if [ -z "$REGISTRY_HOST" ]; then + REGISTRY_HOST="$DEFAULT_REGISTRY_HOST" + echo "Using default image registry address: $REGISTRY_HOST" + fi + + REMOTE_IMAGE_NAME="$REGISTRY_HOST/$image_to_push" + + echo "Tagging image $image_to_push as $REMOTE_IMAGE_NAME ..." + if ! "$current_container_engine" tag "$image_to_push" "$REMOTE_IMAGE_NAME"; then + echo "Error: Image tagging failed." + return 1 + fi + echo "Image tagged successfully." + + echo "Pushing image $REMOTE_IMAGE_NAME ..." + if ! "$current_container_engine" push "$REMOTE_IMAGE_NAME"; then + echo "Error: Image push failed." + echo "Please ensure you are logged in to $REGISTRY_HOST ($current_container_engine login $REGISTRY_HOST)" + return 1 + fi + echo "Image $REMOTE_IMAGE_NAME pushed successfully." + return 0 +} + +# Determine script mode +SCRIPT_MODE="default" +if [[ "$1" == "only-push" ]]; then + SCRIPT_MODE="only-push" +fi + +# Check if podman is available +if command -v podman &> /dev/null; then + CONTAINER_ENGINE="podman" +# Otherwise, check if docker is available +elif command -v docker &> /dev/null; then + CONTAINER_ENGINE="docker" +else + echo "Error: Neither podman nor docker found. Please install one of them." + exit 1 +fi +echo "Will use $CONTAINER_ENGINE." + +if [[ "$SCRIPT_MODE" == "only-push" ]]; then + echo "--- Push-Only Mode ---" + echo "Available local images:" + "$CONTAINER_ENGINE" images + + read -p "Please enter the name of the local image to push (e.g., repository:tag): " LOCAL_IMAGE_TO_PUSH + if [ -z "$LOCAL_IMAGE_TO_PUSH" ]; then + echo "Error: Image name cannot be empty." + exit 1 + fi + + # Check if the image exists + if ! "$CONTAINER_ENGINE" image inspect "$LOCAL_IMAGE_TO_PUSH" &> /dev/null; then + echo "Error: Local image $LOCAL_IMAGE_TO_PUSH not found." + exit 1 + fi + + echo "Preparing to push image: $LOCAL_IMAGE_TO_PUSH" + if perform_push_operations "$LOCAL_IMAGE_TO_PUSH" "$CONTAINER_ENGINE"; then + echo "Image push process completed." + else + echo "Image push process failed." + exit 1 + fi +else # Default mode (build and then optionally push) + echo "--- Default Build and Push Mode ---" + + # Interactively ask the user for the image tag + read -p "Please enter the image tag (e.g., latest, 1.0): " IMAGE_TAG + + # Check if the tag is empty + if [ -z "$IMAGE_TAG" ]; then + echo "Error: Image tag cannot be empty." + exit 1 + fi + + BUILT_IMAGE_BASENAME="ingester" # As implied by original script + LOCAL_BUILT_IMAGE_NAME="${BUILT_IMAGE_BASENAME}:${IMAGE_TAG}" + DOCKERFILE_PATH="Dockerfile" # Assume Dockerfile is in the current directory + + # Check if Dockerfile exists + if [ ! -f "$DOCKERFILE_PATH" ]; then + echo "Error: $DOCKERFILE_PATH not found in the current directory." + echo "Please ensure Dockerfile exists in the directory where the script is executed." + exit 1 + fi + + echo "Building image $LOCAL_BUILT_IMAGE_NAME ..." + + # Build the image + if "$CONTAINER_ENGINE" build -t "$LOCAL_BUILT_IMAGE_NAME" -f "$DOCKERFILE_PATH" .; then + echo "Image $LOCAL_BUILT_IMAGE_NAME built successfully." + else + echo "Error: Image build failed." + exit 1 + fi + + # Ask whether to push to the image registry + read -p "Do you want to push the image $LOCAL_BUILT_IMAGE_NAME to the image registry? (y/n): " PUSH_CONFIRMATION + + if [[ "$PUSH_CONFIRMATION" == "y" || "$PUSH_CONFIRMATION" == "Y" ]]; then + if perform_push_operations "$LOCAL_BUILT_IMAGE_NAME" "$CONTAINER_ENGINE"; then + echo "Image push process completed." + else + echo "Image push process failed." + exit 1 + fi + else + echo "Image $LOCAL_BUILT_IMAGE_NAME was not pushed to the registry." + fi +fi + +echo "Script execution finished." +exit 0 + +# Check if podman is available +if command -v podman &> /dev/null; then + CONTAINER_ENGINE="podman" +# Otherwise, check if docker is available +elif command -v docker &> /dev/null; then + CONTAINER_ENGINE="docker" +else + echo "Error: Neither podman nor docker found. Please install one of them." + exit 1 +fi + +echo "Will use $CONTAINER_ENGINE to build the image." + +# Interactively ask the user for the image tag +read -p "Please enter the image tag (e.g., latest, 1.0): " IMAGE_TAG + +# Check if the tag is empty +if [ -z "$IMAGE_TAG" ]; then + echo "Error: Image tag cannot be empty." + exit 1 +fi + +LOCAL_IMAGE_NAME="ingester:$IMAGE_TAG" +DOCKERFILE_PATH="Dockerfile" # Assume Dockerfile is in the current directory + +# Check if Dockerfile exists +if [ ! -f "$DOCKERFILE_PATH" ]; then + echo "Error: $DOCKERFILE_PATH not found in the current directory." + echo "Please ensure Dockerfile exists in the directory where the script is executed." + exit 1 +fi + +echo "Building image $LOCAL_IMAGE_NAME ..." + +# Build the image +if "$CONTAINER_ENGINE" build -t "$LOCAL_IMAGE_NAME" -f "$DOCKERFILE_PATH" .; then + echo "Image $LOCAL_IMAGE_NAME built successfully." +else + echo "Error: Image build failed." + exit 1 +fi + +# Ask whether to push to the image registry +read -p "Do you want to push the image to the image registry? (y/n): " PUSH_TO_REGISTRY + +if [[ "$PUSH_TO_REGISTRY" == "y" || "$PUSH_TO_REGISTRY" == "Y" ]]; then + DEFAULT_REGISTRY_HOST="greptime-registry.cn-hangzhou.cr.aliyuncs.com/tools" + read -p "Please enter the image registry address (default is $DEFAULT_REGISTRY_HOST): " REGISTRY_HOST + if [ -z "$REGISTRY_HOST" ]; then + REGISTRY_HOST="$DEFAULT_REGISTRY_HOST" + echo "Using default image registry address: $REGISTRY_HOST" + fi + + REMOTE_IMAGE_NAME="$REGISTRY_HOST/$LOCAL_IMAGE_NAME" + + echo "Tagging image $LOCAL_IMAGE_NAME as $REMOTE_IMAGE_NAME ..." + if "$CONTAINER_ENGINE" tag "$LOCAL_IMAGE_NAME" "$REMOTE_IMAGE_NAME"; then + echo "Image tagged successfully." + else + echo "Error: Image tagging failed." + exit 1 + fi + + echo "Pushing image $REMOTE_IMAGE_NAME ..." + if "$CONTAINER_ENGINE" push "$REMOTE_IMAGE_NAME"; then + echo "Image $REMOTE_IMAGE_NAME pushed successfully." + else + echo "Error: Image push failed." + echo "Please ensure you are logged in to $REGISTRY_HOST ($CONTAINER_ENGINE login $REGISTRY_HOST)" + exit 1 + fi +else + echo "Image was not pushed to the registry." +fi \ No newline at end of file diff --git a/go.mod b/go.mod index 2d34e05..fe4a0b5 100644 --- a/go.mod +++ b/go.mod @@ -11,6 +11,7 @@ require ( ) require ( + filippo.io/edwards25519 v1.1.0 // indirect github.com/apache/arrow/go/arrow v0.0.0-20200730104253-651201b0f516 // indirect github.com/apache/thrift v0.14.2 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect @@ -24,6 +25,7 @@ require ( require ( github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/go-sql-driver/mysql v1.9.2 github.com/gogo/protobuf v1.3.2 // indirect github.com/google/gopacket v1.1.19 github.com/grafana/regexp v0.0.0-20240518133315-a468a5bfb3bc // indirect diff --git a/go.sum b/go.sum index e7e6879..0712d6d 100644 --- a/go.sum +++ b/go.sum @@ -20,6 +20,8 @@ cloud.google.com/go/storage v1.0.0/go.mod h1:IhtSnM/ZTZV8YYJWCY8RULGVqBDmpoyjwiy cloud.google.com/go/storage v1.5.0/go.mod h1:tpKbwo567HUNpVclU5sGELwQWBDZ8gh0ZeosJ0Rtdos= cloud.google.com/go/storage v1.6.0/go.mod h1:N7U0C8pVQ/+NIKOBQyamJIeKQKkZ+mxpohlUTyfDhBk= dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= +filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA= +filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= github.com/apache/arrow/go/arrow v0.0.0-20200730104253-651201b0f516 h1:byKBBF2CKWBjjA4J1ZL2JXttJULvWSl50LegTyRZ728= @@ -46,6 +48,8 @@ github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9 github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg= +github.com/go-sql-driver/mysql v1.9.2 h1:4cNKDYQ1I84SXslGddlsrMhc8k4LeDVj6Ad6WRjiHuU= +github.com/go-sql-driver/mysql v1.9.2/go.mod h1:qn46aNg1333BRMNU69Lq93t8du/dwxI64Gl8i5p1WMU= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= diff --git a/pkg/cmd/schema_generator/schema_generator.go b/pkg/cmd/schema_generator/schema_generator.go index 3fb1dc1..dab7077 100644 --- a/pkg/cmd/schema_generator/schema_generator.go +++ b/pkg/cmd/schema_generator/schema_generator.go @@ -1,6 +1,7 @@ package schema_generator import ( + "database/sql" "encoding/json" "fmt" "log" @@ -8,9 +9,12 @@ import ( "os" "strconv" "strings" + "sync" "time" + _ "github.com/go-sql-driver/mysql" "github.com/spf13/cobra" + "golang.org/x/exp/maps" "gopkg.in/yaml.v3" ) @@ -26,55 +30,15 @@ func generateRandomArrays(n int, mRange [2]int, count int) [][]int { return arrays } -// Generate SQL for creating a metrics table -func generateMetricsSQL(columnListCount, sampleCount int, columnCountRange [2]int, physicalTables []string) ([]map[string]interface{}, string) { - sqlTemplate := "CREATE TABLE IF NOT EXISTS `%s` (\n`greptime_timestamp` TIMESTAMP(3) NOT NULL,\n`greptime_value` DOUBLE NULL,\n`region` STRING NULL,\n%s,\nTIME INDEX (greptime_timestamp),\nPRIMARY KEY (`region`,%s),\n) ENGINE = metric WITH (\non_physical_table = '%s'\n);" - - var tableInfoList []map[string]any - var finalSQLList []string - currentTableID := 0 - - for _, physicalTable := range physicalTables { - selectedColumns := generateRandomArrays(columnListCount, columnCountRange, sampleCount) - for _, columns := range selectedColumns { - tableName := fmt.Sprintf("metrics_table_%d", currentTableID) - var columnsStr, primaryKeys []string - for _, col := range columns { - columnsStr = append(columnsStr, fmt.Sprintf("`column%d` STRING NULL", col)) - primaryKeys = append(primaryKeys, fmt.Sprintf("`column%d`", col)) - } - tableInfo := map[string]any{ - "table_name": tableName, - "columns": columns, - "physical_table": physicalTable, - } - tableInfoList = append(tableInfoList, tableInfo) - sqlStr := fmt.Sprintf(sqlTemplate, tableName, strings.Join(columnsStr, ",\n"), strings.Join(primaryKeys, ","), physicalTable) - finalSQLList = append(finalSQLList, sqlStr) - currentTableID++ - } - } - return tableInfoList, strings.Join(finalSQLList, "\n") -} - -// Generate SQL for creating a physical table -func generatePhysicalTableSQL(count int) ([]string, string) { - sqlTemplate := "CREATE TABLE IF NOT EXISTS `%s` (\n `greptime_timestamp` TIMESTAMP(3) NOT NULL,\n `greptime_value` DOUBLE NULL,\n `region` STRING NULL,\n `column1` STRING NULL,\n `column2` STRING NULL,\n `column3` STRING NULL,\n `column4` STRING NULL,\n `column5` STRING NULL,\n `column6` STRING NULL,\n `column7` STRING NULL,\n `column8` STRING NULL,\n `column9` STRING NULL,\n `column10` STRING NULL,\n `column11` STRING NULL,\n `column12` STRING NULL,\n `column13` STRING NULL,\n `column14` STRING NULL,\n `column15` STRING NULL,\n `column16` STRING NULL,\n `column17` STRING NULL,\n `column18` STRING NULL,\n `column19` STRING NULL,\n `column20` STRING NULL,\n `column21` STRING NULL,\n `column22` STRING NULL,\n `column23` STRING NULL,\n `column24` STRING NULL,\n `column25` STRING NULL,\n `column26` STRING NULL,\n `column27` STRING NULL,\n `column28` STRING NULL,\n `column29` STRING NULL,\n `column30` STRING NULL,\n `column31` STRING NULL,\n `column32` STRING NULL,\n `column33` STRING NULL,\n `column34` STRING NULL,\n `column35` STRING NULL,\n `column36` STRING NULL,\n `column37` STRING NULL,\n `column38` STRING NULL,\n `column39` STRING NULL,\n `column40` STRING NULL,\n `column41` STRING NULL,\n `column42` STRING NULL,\n `column43` STRING NULL,\n `column44` STRING NULL,\n `column45` STRING NULL,\n `column46` STRING NULL,\n `column47` STRING NULL,\n `column48` STRING NULL,\n `column49` STRING NULL,\n `column50` STRING NULL,\n `column51` STRING NULL,\n `column52` STRING NULL,\n `column53` STRING NULL,\n `column54` STRING NULL,\n `column55` STRING NULL,\n `column56` STRING NULL,\n `column57` STRING NULL,\n `column58` STRING NULL,\n `column59` STRING NULL,\n `column60` STRING NULL,\n `column61` STRING NULL,\n `column62` STRING NULL,\n `column63` STRING NULL,\n `column64` STRING NULL,\n `column65` STRING NULL,\n `column66` STRING NULL,\n `column67` STRING NULL,\n `column68` STRING NULL,\n `column69` STRING NULL,\n `column70` STRING NULL,\n `column71` STRING NULL,\n `column72` STRING NULL,\n `column73` STRING NULL,\n `column74` STRING NULL,\n `column75` STRING NULL,\n `column76` STRING NULL,\n `column77` STRING NULL,\n `column78` STRING NULL,\n `column79` STRING NULL,\n `column80` STRING NULL,\n `column81` STRING NULL,\n `column82` STRING NULL,\n `column83` STRING NULL,\n `column84` STRING NULL,\n `column85` STRING NULL,\n `column86` STRING NULL,\n `column87` STRING NULL,\n `column88` STRING NULL,\n `column89` STRING NULL,\n `column90` STRING NULL,\n `column91` STRING NULL,\n `column92` STRING NULL,\n `column93` STRING NULL,\n `column94` STRING NULL,\n `column95` STRING NULL,\n `column96` STRING NULL,\n `column97` STRING NULL,\n `column98` STRING NULL,\n `column99` STRING NULL,\n TIME INDEX (`greptime_timestamp`),\n PRIMARY KEY (`region`,`column1`, `column2`, `column3`, `column4`, `column5`, `column6`, `column7`, `column8`, `column9`, `column10`,\n `column11`, `column12`, `column13`, `column14`, `column15`, `column16`, `column17`, `column18`, `column19`, `column20`,\n `column21`, `column22`, `column23`, `column24`, `column25`, `column26`, `column27`, `column28`, `column29`, `column30`,\n `column31`, `column32`, `column33`, `column34`, `column35`, `column36`, `column37`, `column38`, `column39`, `column40`,\n `column41`, `column42`, `column43`, `column44`, `column45`, `column46`, `column47`, `column48`, `column49`, `column50`,\n `column51`, `column52`, `column53`, `column54`, `column55`, `column56`, `column57`, `column58`, `column59`, `column60`,\n `column61`, `column62`, `column63`, `column64`, `column65`, `column66`, `column67`, `column68`, `column69`, `column70`,\n `column71`, `column72`, `column73`, `column74`, `column75`, `column76`, `column77`, `column78`, `column79`, `column80`,\n `column81`, `column82`, `column83`, `column84`, `column85`, `column86`, `column87`, `column88`, `column89`, `column90`,\n `column91`, `column92`, `column93`, `column94`, `column95`, `column96`, `column97`, `column98`, `column99`)\n)\nPARTITION ON COLUMNS (region) (\n region = 'region-0',\n region = 'region-1',\n region = 'region-2',\n region = 'region-3',\n region = 'region-4',\n region = 'region-5',\n region = 'region-6',\n region = 'region-7',\n region = 'region-8',\n region = 'region-9',\n region = 'region-10',\n region = 'region-11',\n region = 'region-12',\n region = 'region-13',\n region = 'region-14',\n region = 'region-15',\n region = 'region-16',\n region = 'region-17',\n region = 'region-18',\n region = 'region-19',\n region = 'region-20',\n region = 'region-21',\n region = 'region-22',\n region = 'region-23',\n region = 'region-24',\n region = 'region-25',\n region = 'region-26',\n region = 'region-27',\n region = 'region-28',\n region = 'region-29',\n region = 'region-30',\n region = 'region-31',\n region = 'region-32',\n region = 'region-33',\n region = 'region-34',\n region = 'region-35',\n region = 'region-36',\n region = 'region-37',\n region = 'region-38',\n region = 'region-39',\n region = 'region-40',\n region = 'region-41',\n region = 'region-42',\n region = 'region-43',\n region = 'region-44',\n region = 'region-45',\n region = 'region-46',\n region = 'region-47',\n region = 'region-48',\n region = 'region-49',\n region = 'region-50',\n)\nENGINE=metric\nWITH(\n physical_metric_table = 'true'\n);" - - var sqlList []string - var tableNameList []string - - for i := range count { - tableName := fmt.Sprintf("table_%d", i) - sqlStr := fmt.Sprintf(sqlTemplate, tableName) - sqlList = append(sqlList, sqlStr) - tableNameList = append(tableNameList, tableName) - } - return tableNameList, strings.Join(sqlList, "\n") -} - // Write content to a file func writeToFile(fileName, content string) error { + pathList := strings.Split(fileName, "/") + if len(pathList) > 1 { + err := os.MkdirAll(strings.Join(pathList[:len(pathList)-1], "/"), os.ModePerm) + if err != nil { + return err + } + } file, err := os.Create(fileName) if err != nil { return err @@ -84,7 +48,7 @@ func writeToFile(fileName, content string) error { return err } -func generateSampleLoaderYaml(tableInfos *[]map[string]any, targetFilePath string) error { +func generateSampleLoaderYaml(replica int, tableInfos *[]map[string]any, targetFilePath string) error { // create a path to store yaml files err := os.MkdirAll(targetFilePath, os.ModePerm) if err != nil { @@ -110,12 +74,12 @@ func generateSampleLoaderYaml(tableInfos *[]map[string]any, targetFilePath strin "type": "string", "dist": map[string]any{ "type": "replica_string", - "replica": 50, + "replica": replica, "replica_prefix": "region-", }, } - yamlData := map[string]interface{}{ + yamlData := map[string]any{ "start": "2023-01-01T00:00:00Z", "end": "2023-01-02T00:00:00Z", "interval": 30, @@ -145,31 +109,104 @@ func generateSampleLoaderYaml(tableInfos *[]map[string]any, targetFilePath strin return nil } -func run(columnListCount, sampleCount int, columnCountRange [2]int, targetPath string) error { +type SchemaGenerator struct { + mysqlHost string + mysqlPort string + physicalTableCount int + columnListCount int + regionCount int + sampleCount int + columnCountRange [2]int + targetPath string + execSQL bool + doExecSQLJobCount uint +} + +func (schema_generator *SchemaGenerator) generatePhysicalTableSQL() map[string]string { + //sqlTemplate := "CREATE TABLE IF NOT EXISTS `%s` (\n `greptime_timestamp` TIMESTAMP(3) NOT NULL,\n `greptime_value` DOUBLE NULL,\n `region` STRING NULL,\n `column1` STRING NULL,\n `column2` STRING NULL,\n `column3` STRING NULL,\n `column4` STRING NULL,\n `column5` STRING NULL,\n `column6` STRING NULL,\n `column7` STRING NULL,\n `column8` STRING NULL,\n `column9` STRING NULL,\n `column10` STRING NULL,\n `column11` STRING NULL,\n `column12` STRING NULL,\n `column13` STRING NULL,\n `column14` STRING NULL,\n `column15` STRING NULL,\n `column16` STRING NULL,\n `column17` STRING NULL,\n `column18` STRING NULL,\n `column19` STRING NULL,\n `column20` STRING NULL,\n `column21` STRING NULL,\n `column22` STRING NULL,\n `column23` STRING NULL,\n `column24` STRING NULL,\n `column25` STRING NULL,\n `column26` STRING NULL,\n `column27` STRING NULL,\n `column28` STRING NULL,\n `column29` STRING NULL,\n `column30` STRING NULL,\n `column31` STRING NULL,\n `column32` STRING NULL,\n `column33` STRING NULL,\n `column34` STRING NULL,\n `column35` STRING NULL,\n `column36` STRING NULL,\n `column37` STRING NULL,\n `column38` STRING NULL,\n `column39` STRING NULL,\n `column40` STRING NULL,\n `column41` STRING NULL,\n `column42` STRING NULL,\n `column43` STRING NULL,\n `column44` STRING NULL,\n `column45` STRING NULL,\n `column46` STRING NULL,\n `column47` STRING NULL,\n `column48` STRING NULL,\n `column49` STRING NULL,\n `column50` STRING NULL,\n `column51` STRING NULL,\n `column52` STRING NULL,\n `column53` STRING NULL,\n `column54` STRING NULL,\n `column55` STRING NULL,\n `column56` STRING NULL,\n `column57` STRING NULL,\n `column58` STRING NULL,\n `column59` STRING NULL,\n `column60` STRING NULL,\n `column61` STRING NULL,\n `column62` STRING NULL,\n `column63` STRING NULL,\n `column64` STRING NULL,\n `column65` STRING NULL,\n `column66` STRING NULL,\n `column67` STRING NULL,\n `column68` STRING NULL,\n `column69` STRING NULL,\n `column70` STRING NULL,\n `column71` STRING NULL,\n `column72` STRING NULL,\n `column73` STRING NULL,\n `column74` STRING NULL,\n `column75` STRING NULL,\n `column76` STRING NULL,\n `column77` STRING NULL,\n `column78` STRING NULL,\n `column79` STRING NULL,\n `column80` STRING NULL,\n `column81` STRING NULL,\n `column82` STRING NULL,\n `column83` STRING NULL,\n `column84` STRING NULL,\n `column85` STRING NULL,\n `column86` STRING NULL,\n `column87` STRING NULL,\n `column88` STRING NULL,\n `column89` STRING NULL,\n `column90` STRING NULL,\n `column91` STRING NULL,\n `column92` STRING NULL,\n `column93` STRING NULL,\n `column94` STRING NULL,\n `column95` STRING NULL,\n `column96` STRING NULL,\n `column97` STRING NULL,\n `column98` STRING NULL,\n `column99` STRING NULL,\n TIME INDEX (`greptime_timestamp`),\n PRIMARY KEY (`region`,`column1`, `column2`, `column3`, `column4`, `column5`, `column6`, `column7`, `column8`, `column9`, `column10`,\n `column11`, `column12`, `column13`, `column14`, `column15`, `column16`, `column17`, `column18`, `column19`, `column20`,\n `column21`, `column22`, `column23`, `column24`, `column25`, `column26`, `column27`, `column28`, `column29`, `column30`,\n `column31`, `column32`, `column33`, `column34`, `column35`, `column36`, `column37`, `column38`, `column39`, `column40`,\n `column41`, `column42`, `column43`, `column44`, `column45`, `column46`, `column47`, `column48`, `column49`, `column50`,\n `column51`, `column52`, `column53`, `column54`, `column55`, `column56`, `column57`, `column58`, `column59`, `column60`,\n `column61`, `column62`, `column63`, `column64`, `column65`, `column66`, `column67`, `column68`, `column69`, `column70`,\n `column71`, `column72`, `column73`, `column74`, `column75`, `column76`, `column77`, `column78`, `column79`, `column80`,\n `column81`, `column82`, `column83`, `column84`, `column85`, `column86`, `column87`, `column88`, `column89`, `column90`,\n `column91`, `column92`, `column93`, `column94`, `column95`, `column96`, `column97`, `column98`, `column99`)\n)\nPARTITION ON COLUMNS (region) (\n region = 'region-0',\n region = 'region-1',\n region = 'region-2',\n region = 'region-3',\n region = 'region-4',\n region = 'region-5',\n region = 'region-6',\n region = 'region-7',\n region = 'region-8',\n region = 'region-9',\n region = 'region-10',\n region = 'region-11',\n region = 'region-12',\n region = 'region-13',\n region = 'region-14',\n region = 'region-15',\n region = 'region-16',\n region = 'region-17',\n region = 'region-18',\n region = 'region-19',\n region = 'region-20',\n region = 'region-21',\n region = 'region-22',\n region = 'region-23',\n region = 'region-24',\n region = 'region-25',\n region = 'region-26',\n region = 'region-27',\n region = 'region-28',\n region = 'region-29',\n region = 'region-30',\n region = 'region-31',\n region = 'region-32',\n region = 'region-33',\n region = 'region-34',\n region = 'region-35',\n region = 'region-36',\n region = 'region-37',\n region = 'region-38',\n region = 'region-39',\n region = 'region-40',\n region = 'region-41',\n region = 'region-42',\n region = 'region-43',\n region = 'region-44',\n region = 'region-45',\n region = 'region-46',\n region = 'region-47',\n region = 'region-48',\n region = 'region-49',\n region = 'region-50',\n)\nENGINE=metric\nWITH(\n physical_metric_table = 'true'\n);" + sqlTemplate := "CREATE TABLE IF NOT EXISTS `%s` (\n `greptime_timestamp` TIMESTAMP(3) NOT NULL,\n `greptime_value` DOUBLE NULL,\n `region` STRING NULL,\n %s,\n TIME INDEX (`greptime_timestamp`),\n PRIMARY KEY (`region`,%s)\n)\nPARTITION ON COLUMNS (region) (\n %s\n)\nENGINE=metric\nWITH(\n physical_metric_table = 'true'\n);" + var sqlMap = make(map[string]string) + + columnDefList := make([]string, schema_generator.columnListCount) + columnNameList := make([]string, schema_generator.columnListCount) + regionList := make([]string, schema_generator.regionCount) + + for i := range schema_generator.columnListCount { + columnDefList[i] = fmt.Sprintf("`column%d` STRING NULL", i) + columnNameList[i] = fmt.Sprintf("`column%d`", i) + } + + for i := range schema_generator.regionCount { + regionList[i] = fmt.Sprintf("region = 'region-%d'", i) + } + + for i := range schema_generator.physicalTableCount { + tableName := fmt.Sprintf("table_%d", i) + sqlStr := fmt.Sprintf(sqlTemplate, tableName, strings.Join(columnDefList, ",\n"), strings.Join(columnNameList, ","), strings.Join(regionList, ",\n")) + sqlMap[tableName] = sqlStr + } + return sqlMap +} + +func (schema_generator *SchemaGenerator) generateMetricsSQL(physicalTables []string) ([]map[string]any, map[string]string) { + sqlTemplate := "CREATE TABLE IF NOT EXISTS `%s` (\n`greptime_timestamp` TIMESTAMP(3) NOT NULL,\n`greptime_value` DOUBLE NULL,\n`region` STRING NULL,\n%s,\nTIME INDEX (greptime_timestamp),\nPRIMARY KEY (`region`,%s),\n) ENGINE = metric WITH (\non_physical_table = '%s'\n);" + + var tableInfoList []map[string]any = make([]map[string]any, 0) + var finalSqlMap = make(map[string]string) + currentTableID := 0 + + for _, physicalTable := range physicalTables { + selectedColumns := generateRandomArrays(schema_generator.columnListCount, schema_generator.columnCountRange, schema_generator.sampleCount) + for _, columns := range selectedColumns { + tableName := fmt.Sprintf("metrics_table_%d", currentTableID) + var columnsStr, primaryKeys []string + for _, col := range columns { + columnsStr = append(columnsStr, fmt.Sprintf("`column%d` STRING NULL", col)) + primaryKeys = append(primaryKeys, fmt.Sprintf("`column%d`", col)) + } + tableInfo := map[string]any{ + "table_name": tableName, + "columns": columns, + "physical_table": physicalTable, + } + tableInfoList = append(tableInfoList, tableInfo) + sqlStr := fmt.Sprintf(sqlTemplate, tableName, strings.Join(columnsStr, ",\n"), strings.Join(primaryKeys, ","), physicalTable) + finalSqlMap[tableName] = sqlStr + currentTableID++ + } + } + return tableInfoList, finalSqlMap +} + +func (schema_generator *SchemaGenerator) run() error { - physicalTableSQLFileName := fmt.Sprintf("%s/physical_table.sql", targetPath) - metricsTableSQLFileName := fmt.Sprintf("%s/metrics_table.sql", targetPath) + physicalTableSQLFileName := fmt.Sprintf("%s/physical_table.sql", schema_generator.targetPath) + metricsTableSQLFileName := fmt.Sprintf("%s/metrics_table.sql", schema_generator.targetPath) // Generate SQL for creating a physical table - physicalTableNameList, physicalTableSQL := generatePhysicalTableSQL(400) - err := writeToFile(physicalTableSQLFileName, physicalTableSQL) + physicalTableSqlMap := schema_generator.generatePhysicalTableSQL() + physicalTableNameList := maps.Keys(physicalTableSqlMap) + // physicalTableNameList, physicalTableSQL := schema_generator.generatePhysicalTableSQL() + var physicalTableSql = strings.Join(maps.Values(physicalTableSqlMap), "\n") + err := writeToFile(physicalTableSQLFileName, physicalTableSql) // Write only the first table SQL for now if err != nil { - fmt.Printf("Error writing physical table SQL: %v\n", err) + log.Printf("Error writing physical table SQL: %v\n", err) return err } // Generate SQL for creating a metrics table - tableInfos, metricsTableSQL := generateMetricsSQL(columnListCount, sampleCount, columnCountRange, physicalTableNameList) - err = writeToFile(metricsTableSQLFileName, metricsTableSQL) + tableInfos, metricsTableSqlMap := schema_generator.generateMetricsSQL(physicalTableNameList) + var metricsTableSql = strings.Join(maps.Values(metricsTableSqlMap), "\n") + err = writeToFile(metricsTableSQLFileName, metricsTableSql) if err != nil { - fmt.Printf("Error writing metrics table SQL: %v\n", err) + log.Printf("Error writing metrics table SQL: %v\n", err) return err } // Save table info to a file - tableInfoFile, err := os.Create(fmt.Sprintf("%s/metrics_table_info.json", targetPath)) + tableInfoFile, err := os.Create(fmt.Sprintf("%s/metrics_table_info.json", schema_generator.targetPath)) if err != nil { - fmt.Printf("Error creating table info file: %v\n", err) + log.Printf("Error creating table info file: %v\n", err) return err } defer tableInfoFile.Close() @@ -177,23 +214,98 @@ func run(columnListCount, sampleCount int, columnCountRange [2]int, targetPath s jsonEncoder.SetIndent("", " ") err = jsonEncoder.Encode(tableInfos) if err != nil { - fmt.Printf("Error writing table info: %v\n", err) + log.Printf("Error writing table info: %v\n", err) return err } - err = generateSampleLoaderYaml(&tableInfos, targetPath) + err = generateSampleLoaderYaml(schema_generator.regionCount, &tableInfos, schema_generator.targetPath) if err != nil { - fmt.Printf("Error generating sample loader YAML: %v\n", err) + log.Printf("Error generating sample loader YAML: %v\n", err) return err } + schema_generator.doExecSQLConcurrently(maps.Values(physicalTableSqlMap), schema_generator.doExecSQLJobCount) + schema_generator.doExecSQLConcurrently(maps.Values(metricsTableSqlMap), schema_generator.doExecSQLJobCount) return nil } +// 并发执行传入的 sql +// jobs 的数量表示并发执行的任务数和 mysql 连接数 +// 使用 mysql driver 来执行 sql +func (schema_generator *SchemaGenerator) doExecSQLConcurrently(sqls []string, jobs uint) { + if !schema_generator.execSQL { + log.Println("Skipping execution of SQLs as exec-sql is false") + return + } + + if jobs == 0 { + log.Println("Number of jobs is 0, skipping SQL execution.") + return + } + + dsn := fmt.Sprintf("root:@tcp(%s:%s)/", schema_generator.mysqlHost, schema_generator.mysqlPort) + + sqlChan := make(chan string, len(sqls)) + var wg sync.WaitGroup + + for i := uint(0); i < jobs; i++ { + wg.Add(1) + go func(workerID int) { + defer wg.Done() + db, err := sql.Open("mysql", dsn) + if err != nil { + log.Printf("Worker %d: Error connecting to MySQL: %v\n", workerID, err) + return + } + defer db.Close() + + err = db.Ping() + if err != nil { + log.Printf("Worker %d: Error pinging MySQL: %v\n", workerID, err) + return + } + log.Printf("Worker %d: Connected to MySQL at %s:%s\n", workerID, schema_generator.mysqlHost, schema_generator.mysqlPort) + + for sqlQuery := range sqlChan { + log.Printf("Worker %d: Executing SQL: %s...\n", workerID, strings.Split(sqlQuery, "\n")[0]) // Log first line for brevity + _, err := db.Exec(sqlQuery) + if err != nil { + log.Printf("Worker %d: Error executing SQL: %v\nSQL: %s\n", workerID, err, sqlQuery) + } else { + log.Printf("Worker %d: Successfully executed SQL: %s...\n", workerID, strings.Split(sqlQuery, "\n")[0]) + } + } + log.Printf("Worker %d: Finished processing tasks.\n", workerID) + }(int(i)) + } + + for _, sqlQuery := range sqls { + sqlChan <- sqlQuery + } + close(sqlChan) + + wg.Wait() + log.Println("All SQL execution tasks completed.") +} + func NewCommand() *cobra.Command { var rootCmd = &cobra.Command{ Use: "schema_generator", Short: "SchemaGenerator is a tool to generate SQL schema", Run: func(cmd *cobra.Command, args []string) { - columnListCount, err := cmd.Flags().GetInt("column-list-count") + var schemaGenerator SchemaGenerator + var err error + schemaGenerator.mysqlHost, err = cmd.Flags().GetString("mysql-host") + if err != nil { + log.Fatalf("Error getting mysql-host: %v", err) + } + schemaGenerator.mysqlPort, err = cmd.Flags().GetString("mysql-port") + if err != nil { + log.Fatalf("Error getting mysql-port: %v", err) + } + schemaGenerator.physicalTableCount, err = cmd.Flags().GetInt("physical-table-count") + if err != nil { + log.Fatalf("Error getting physical-table-count: %v", err) + } + schemaGenerator.columnListCount, err = cmd.Flags().GetInt("column-list-count") if err != nil { log.Fatalf("Error getting column-list-count: %v", err) } @@ -205,36 +317,57 @@ func NewCommand() *cobra.Command { if len(selectedColumnCountRange) != 2 { log.Fatalf("Invalid selected-column-count-range range: %s", selectedColumnCount) } - columnCountRange := [2]int{} - columnCountRange[0], err = strconv.Atoi(selectedColumnCountRange[0]) + schemaGenerator.regionCount, err = cmd.Flags().GetInt("region-count") + if err != nil { + log.Fatalf("Error getting region-count: %v", err) + } + schemaGenerator.columnCountRange[0], err = strconv.Atoi(selectedColumnCountRange[0]) if err != nil { log.Fatalf("Error converting selected-column-count-range range: %v", err) } - columnCountRange[1], err = strconv.Atoi(selectedColumnCountRange[1]) + schemaGenerator.columnCountRange[1], err = strconv.Atoi(selectedColumnCountRange[1]) if err != nil { log.Fatalf("Error converting selected-column-count-range range: %v", err) } - if columnCountRange[0] > columnCountRange[1] { + + if schemaGenerator.columnCountRange[0] > schemaGenerator.columnCountRange[1] { log.Fatalf("Invalid selected-column-count-range range: %s", selectedColumnCount) } - sampleCount, err := cmd.Flags().GetInt("sample-count") + schemaGenerator.sampleCount, err = cmd.Flags().GetInt("sample-count") if err != nil { log.Fatalf("Error getting sample-count: %v", err) } - targetPath, err := cmd.Flags().GetString("target-path") + schemaGenerator.targetPath, err = cmd.Flags().GetString("target-path") if err != nil { log.Fatalf("Error getting target-path: %v", err) } - log.Printf("Generating schema with column list count: %d, sample count: %d, column count range: %v, target path: %s", columnListCount, sampleCount, columnCountRange, targetPath) - err = run(columnListCount, sampleCount, columnCountRange, targetPath) + schemaGenerator.execSQL, err = cmd.Flags().GetBool("exec-sql") + if err != nil { + log.Fatalf("Error getting exec-sql: %v", err) + } + schemaGenerator.doExecSQLJobCount, err = cmd.Flags().GetUint("do-exec-sql-job-count") + if err != nil { + log.Fatalf("Error getting do-exec-sql-job-count: %v", err) + } + log.Printf("Generating schema with column list count: %d, sample count: %d, column count range: %v, target path: %s", schemaGenerator.columnListCount, schemaGenerator.sampleCount, schemaGenerator.columnCountRange, schemaGenerator.targetPath) + + err = schemaGenerator.run() if err != nil { log.Fatalf("Error running sample_loader: %v", err) } }, } + + rootCmd.Flags().StringP("mysql-host", "H", "127.0.0.1", "The MySQL host") + rootCmd.Flags().StringP("mysql-port", "P", "4002", "The MySQL port") + rootCmd.Flags().IntP("region-count", "r", 50, "The number of regions to generate") + rootCmd.Flags().IntP("physical-table-count", "p", 100, "The number of physical tables to generate") rootCmd.Flags().IntP("column-list-count", "c", 100, "The number of columns in the column list") rootCmd.Flags().StringP("selected-column-count-range", "s", "5,10", "The range of selected column count") rootCmd.Flags().IntP("sample-count", "n", 10, "The number of samples to generate") rootCmd.Flags().StringP("target-path", "t", "yaml_config", "The target path to save the generated files") + rootCmd.Flags().BoolP("exec-sql", "d", false, "executing SQL files after generation") + rootCmd.Flags().UintP("do-exec-sql-job-count", "j", 4, "The number of jobs to execute SQL concurrently") + return rootCmd }