From 67561b71a108f1d7a5ac7d895f108292297470ec Mon Sep 17 00:00:00 2001 From: daidai Date: Fri, 24 Oct 2025 10:47:29 +0800 Subject: [PATCH] [feature](maxcompute)support maxcompte catalog read project-schema-table. (#57012) ### What problem does this PR solve? Problem Summary: In the previous MaxCompute architecture, and its mapping in Doris, the hierarchy was: `project / database -> table / table`. When creating a catalog, users needed to specify the property `mc.default.project`, which indicated the default project to access. In this structure, executing `SHOW DATABASES` would list other projects. After MaxCompute introduced the concept of schemas, the hierarchy changed to: `project / catalog -> schema / database -> table / table`. Here, the project is at a higher level, and `SHOW DATABASES` should now list all schemas under the current project. As a result, users need to create a separate catalog for each project, specifying a different `mc.default.project` property. doc: https://help.aliyun.com/zh/maxcompute/user-guide/schema-related-operations?spm=a2c4g.11186623.help-menu-search-27797.d_0#9d8326491bagj To maintain compatibility with the old version, a catalog property `mc.project.schema.table` is introduced: - When the property is true, the new architecture is used. - When the property is false, the old architecture is used. --- .../maxcompute/MaxComputeExternalCatalog.java | 104 +++---- .../maxcompute/MaxComputeExternalTable.java | 19 +- .../MaxComputeSchemaCacheValue.java | 8 +- .../maxcompute/McStructureHelper.java | 221 +++++++++++++++ .../maxcompute/source/MaxComputeScanNode.java | 4 +- .../property/constants/MCProperties.java | 20 ++ .../maxcompute/test_max_compute_schema.out | 258 ++++++++++++++++++ .../maxcompute/test_max_compute_schema.groovy | 239 ++++++++++++++++ 8 files changed, 802 insertions(+), 71 deletions(-) create mode 100644 fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/McStructureHelper.java create mode 100644 regression-test/data/external_table_p2/maxcompute/test_max_compute_schema.out create mode 100644 regression-test/suites/external_table_p2/maxcompute/test_max_compute_schema.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeExternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeExternalCatalog.java index ff46f3ad65a994..65af7f967fe849 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeExternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeExternalCatalog.java @@ -27,21 +27,16 @@ import org.apache.doris.datasource.property.constants.MCProperties; import com.aliyun.odps.Odps; -import com.aliyun.odps.OdpsException; import com.aliyun.odps.Partition; -import com.aliyun.odps.Project; import com.aliyun.odps.account.Account; import com.aliyun.odps.account.AccountFormat; import com.aliyun.odps.account.AliyunAccount; -import com.aliyun.odps.security.SecurityManager; +import com.aliyun.odps.table.TableIdentifier; import com.aliyun.odps.table.configuration.RestOptions; import com.aliyun.odps.table.configuration.SplitOptions; import com.aliyun.odps.table.enviroment.Credentials; import com.aliyun.odps.table.enviroment.EnvironmentSettings; -import com.aliyun.odps.utils.StringUtils; import com.google.common.collect.ImmutableList; -import com.google.gson.JsonObject; -import com.google.gson.JsonParser; import org.apache.log4j.Logger; import java.time.ZoneId; @@ -66,7 +61,6 @@ public class MaxComputeExternalCatalog extends ExternalCatalog { private String defaultProject; private String quota; private EnvironmentSettings settings; - private String catalogOwner; private String splitStrategy; private SplitOptions splitOptions; @@ -81,6 +75,8 @@ public class MaxComputeExternalCatalog extends ExternalCatalog { AccountFormat accountFormat = AccountFormat.DISPLAYNAME; + private McStructureHelper mcStructureHelper = null; + private static final Map REGION_ZONE_MAP; private static final List REQUIRED_PROPERTIES = ImmutableList.of( MCProperties.PROJECT, @@ -231,6 +227,10 @@ protected void initLocalObjectsImpl() { .withQuotaName(quota) .withRestOptions(restOptions) .build(); + + boolean enableNamespaceSchema = Boolean.parseBoolean( + props.getOrDefault(MCProperties.ENABLE_NAMESPACE_SCHEMA, MCProperties.DEFAULT_ENABLE_NAMESPACE_SCHEMA)); + mcStructureHelper = McStructureHelper.getHelper(enableNamespaceSchema, defaultProject); } public Odps getClient() { @@ -239,39 +239,15 @@ public Odps getClient() { } protected List listDatabaseNames() { - List result = new ArrayList<>(); - result.add(defaultProject); - - try { - result.add(defaultProject); - if (StringUtils.isNullOrEmpty(catalogOwner)) { - SecurityManager sm = odps.projects().get().getSecurityManager(); - String whoami = sm.runQuery("whoami", false); - - JsonObject js = JsonParser.parseString(whoami).getAsJsonObject(); - catalogOwner = js.get("DisplayName").getAsString(); - } - Iterator iterator = odps.projects().iterator(catalogOwner); - while (iterator.hasNext()) { - Project project = iterator.next(); - if (!project.getName().equals(defaultProject)) { - result.add(project.getName()); - } - } - } catch (OdpsException e) { - throw new RuntimeException(e); - } - return result; + makeSureInitialized(); + return mcStructureHelper.listDatabaseNames(getClient(), getDefaultProject()); } @Override public boolean tableExist(SessionContext ctx, String dbName, String tblName) { makeSureInitialized(); - try { - return getClient().tables().exists(dbName, tblName); - } catch (OdpsException e) { - throw new RuntimeException(e); - } + return mcStructureHelper.tableExist(getClient(), dbName, tblName); + } public List listPartitionNames(String dbName, String tbl) { @@ -279,43 +255,37 @@ public List listPartitionNames(String dbName, String tbl) { } public List listPartitionNames(String dbName, String tbl, long skip, long limit) { - try { - if (getClient().projects().exists(dbName)) { - List parts; - if (limit < 0) { - parts = getClient().tables().get(dbName, tbl).getPartitions(); - } else { - skip = skip < 0 ? 0 : skip; - parts = new ArrayList<>(); - Iterator it = getClient().tables().get(dbName, tbl).getPartitionIterator(); - int count = 0; - while (it.hasNext()) { - if (count < skip) { - count++; - it.next(); - } else if (parts.size() >= limit) { - break; - } else { - parts.add(it.next()); - } + if (mcStructureHelper.databaseExist(getClient(), dbName)) { + List parts; + if (limit < 0) { + parts = mcStructureHelper.getPartitions(getClient(), dbName, tbl); + } else { + skip = skip < 0 ? 0 : skip; + parts = new ArrayList<>(); + Iterator it = mcStructureHelper.getPartitionIterator(getClient(), dbName, tbl); + int count = 0; + while (it.hasNext()) { + if (count < skip) { + count++; + it.next(); + } else if (parts.size() >= limit) { + break; + } else { + parts.add(it.next()); } } - return parts.stream().map(p -> p.getPartitionSpec().toString(false, true)) - .collect(Collectors.toList()); - } else { - throw new OdpsException("Max compute project: " + dbName + " not exists."); } - } catch (OdpsException e) { - throw new RuntimeException(e); + return parts.stream().map(p -> p.getPartitionSpec().toString(false, true)) + .collect(Collectors.toList()); + } else { + throw new RuntimeException("MaxCompute schema/project: " + dbName + " not exists."); } } @Override public List listTableNames(SessionContext ctx, String dbName) { makeSureInitialized(); - List result = new ArrayList<>(); - getClient().tables().iterable(dbName).forEach(e -> result.add(e.getName())); - return result; + return mcStructureHelper.listTableNames(getClient(), dbName); } public String getAccessKey() { @@ -402,6 +372,14 @@ public long getSplitByteSize() { return splitByteSize; } + public com.aliyun.odps.Table getOdpsTable(String dbName, String tableName) { + return mcStructureHelper.getOdpsTable(getClient(), dbName, tableName); + } + + public TableIdentifier getOdpsTableIdentifier(String dbName, String tableName) { + return mcStructureHelper.getTableIdentifier(dbName, tableName); + } + @Override public void checkProperties() throws DdlException { super.checkProperties(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeExternalTable.java index a598c68703fa39..7135ef96fdce9f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeExternalTable.java @@ -36,6 +36,7 @@ import com.aliyun.odps.OdpsType; import com.aliyun.odps.Table; +import com.aliyun.odps.table.TableIdentifier; import com.aliyun.odps.type.ArrayTypeInfo; import com.aliyun.odps.type.CharTypeInfo; import com.aliyun.odps.type.DecimalTypeInfo; @@ -172,9 +173,12 @@ public Map getColumnNameToOdpsColumn() { public Optional initSchema() { // this method will be called at semantic parsing. makeSureInitialized(); - Table odpsTable = ((MaxComputeExternalCatalog) catalog).getClient().tables().get(dbName, name); - List columns = odpsTable.getSchema().getColumns(); + MaxComputeExternalCatalog mcCatalog = (MaxComputeExternalCatalog) catalog; + + Table odpsTable = mcCatalog.getOdpsTable(dbName, name); + TableIdentifier tableIdentifier = mcCatalog.getOdpsTableIdentifier(dbName, name); + List columns = odpsTable.getSchema().getColumns(); for (com.aliyun.odps.Column column : columns) { columnNameToOdpsColumn.put(column.getName(), column); @@ -213,8 +217,8 @@ public Optional initSchema() { partitionSpecs = ImmutableList.of(); } - return Optional.of(new MaxComputeSchemaCacheValue(schema, odpsTable, partitionColumnNames, - partitionSpecs, partitionDorisColumns, partitionTypes)); + return Optional.of(new MaxComputeSchemaCacheValue(schema, odpsTable, tableIdentifier, + partitionColumnNames, partitionSpecs, partitionDorisColumns, partitionTypes)); } private Type mcTypeToDorisType(TypeInfo typeInfo) { @@ -303,6 +307,13 @@ private Type mcTypeToDorisType(TypeInfo typeInfo) { } } + public TableIdentifier getTableIdentifier() { + makeSureInitialized(); + Optional schemaCacheValue = getSchemaCacheValue(); + return schemaCacheValue.map(value -> ((MaxComputeSchemaCacheValue) value).getTableIdentifier()) + .orElse(null); + } + @Override public TTableDescriptor toThrift() { // ak sk endpoint project quota diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeSchemaCacheValue.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeSchemaCacheValue.java index 0d0fb69a3e4e0f..cf87725c680618 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeSchemaCacheValue.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeSchemaCacheValue.java @@ -22,6 +22,7 @@ import org.apache.doris.datasource.SchemaCacheValue; import com.aliyun.odps.Table; +import com.aliyun.odps.table.TableIdentifier; import lombok.Getter; import lombok.Setter; @@ -31,15 +32,18 @@ @Setter public class MaxComputeSchemaCacheValue extends SchemaCacheValue { private Table odpsTable; + private TableIdentifier tableIdentifier; private List partitionColumnNames; private List partitionSpecs; private List partitionColumns; private List partitionTypes; - public MaxComputeSchemaCacheValue(List schema, Table odpsTable, List partitionColumnNames, - List partitionSpecs, List partitionColumns, List partitionTypes) { + public MaxComputeSchemaCacheValue(List schema, Table odpsTable, TableIdentifier tableIdentifier, + List partitionColumnNames, List partitionSpecs, List partitionColumns, + List partitionTypes) { super(schema); this.odpsTable = odpsTable; + this.tableIdentifier = tableIdentifier; this.partitionSpecs = partitionSpecs; this.partitionColumnNames = partitionColumnNames; this.partitionColumns = partitionColumns; diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/McStructureHelper.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/McStructureHelper.java new file mode 100644 index 00000000000000..7232b716426a13 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/McStructureHelper.java @@ -0,0 +1,221 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.datasource.maxcompute; + + +import com.aliyun.odps.Odps; +import com.aliyun.odps.OdpsException; +import com.aliyun.odps.Partition; +import com.aliyun.odps.Project; +import com.aliyun.odps.Schema; +import com.aliyun.odps.Table; +import com.aliyun.odps.security.SecurityManager; +import com.aliyun.odps.table.TableIdentifier; +import com.aliyun.odps.utils.StringUtils; +import com.google.gson.JsonObject; +import com.google.gson.JsonParser; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + + +/** + * Due to the introduction of the `mc.enable.namespace.schema` property, most interfaces using the + * ODPS client have changed, and the mapping structure between Doris and MaxCompute has also changed. + * Different property values correspond to different implementation class. + * It's important to note that when external functions are called through the interface, the structure + * mapped by Doris (database/table) is used, and the MaxCompute concept does not need to be considered. + */ +public interface McStructureHelper { + List listTableNames(Odps mcClient, String dbName); + + List listDatabaseNames(Odps mcClient, String defaultProject); + + boolean tableExist(Odps mcClient, String dbName, String tableName) throws RuntimeException; + + boolean databaseExist(Odps mcClient, String dbName); + + TableIdentifier getTableIdentifier(String dbName, String tableName); + + List getPartitions(Odps mcClient, String dbName, String tableName); + + Iterator getPartitionIterator(Odps mcClient, String dbName, String tableName); + + Table getOdpsTable(Odps mcClient, String dbName, String tableName); + + /** + * `mc.enable.namespace.schema` = true. + * mapping structure between Doris and MaxCompute: + * Doris : catalog, dbName, tableName + * MaxCompute: project, schema, table + */ + class ProjectSchemaTableHelper implements McStructureHelper { + private String defaultProjectName = null; + + public ProjectSchemaTableHelper(String defaultProjectName) { + this.defaultProjectName = defaultProjectName; + } + + @Override + public List listTableNames(Odps mcClient, String dbName) { + List result = new ArrayList<>(); + mcClient.tables().iterable(defaultProjectName, dbName, null, false) + .forEach(e -> result.add(e.getName())); + return result; + } + + @Override + public List listDatabaseNames(Odps mcClient, String defaultProject) { + List result = new ArrayList<>(); + Iterator iterator = mcClient.schemas().iterator(defaultProjectName); + while (iterator.hasNext()) { + Schema schema = iterator.next(); + result.add(schema.getName()); + } + return result; + } + + @Override + public List getPartitions(Odps mcClient, String dbName, String tableName) { + return mcClient.tables().get(defaultProjectName, dbName, tableName).getPartitions(); + } + + @Override + public Iterator getPartitionIterator(Odps mcClient, String dbName, String tableName) { + return mcClient.tables().get(defaultProjectName, dbName, tableName).getPartitions().iterator(); + } + + @Override + public boolean tableExist(Odps mcClient, String dbName, String tableName) throws RuntimeException { + try { + return mcClient.tables().exists(defaultProjectName, dbName, tableName); + } catch (OdpsException e) { + throw new RuntimeException(e); + } + } + + @Override + public boolean databaseExist(Odps mcClient, String dbName) throws RuntimeException { + try { + return mcClient.schemas().exists(dbName); + } catch (OdpsException e) { + throw new RuntimeException(e); + } + } + + @Override + public TableIdentifier getTableIdentifier(String dbName, String tableName) { + return TableIdentifier.of(defaultProjectName, dbName, tableName); + } + + @Override + public Table getOdpsTable(Odps mcClient, String dbName, String tableName) { + return mcClient.tables().get(defaultProjectName, dbName, tableName); + } + } + + /** + * `mc.enable.namespace.schema` = false. + * mapping structure between Doris and MaxCompute: + * Doris : dbName, tableName + * MaxCompute: project, table + */ + class ProjectTableHelper implements McStructureHelper { + private String catalogOwner = null; + + @Override + public boolean tableExist(Odps mcClient, String dbName, String tableName) throws RuntimeException { + try { + return mcClient.tables().exists(dbName, tableName); + } catch (OdpsException e) { + throw new RuntimeException(e); + } + } + + + @Override + public List listTableNames(Odps mcClient, String dbName) { + List result = new ArrayList<>(); + mcClient.tables().iterable(dbName).forEach(e -> result.add(e.getName())); + return result; + } + + @Override + public List listDatabaseNames(Odps mcClient, String defaultProject) { + List result = new ArrayList<>(); + result.add(defaultProject); + try { + result.add(defaultProject); + if (StringUtils.isNullOrEmpty(catalogOwner)) { + SecurityManager sm = mcClient.projects().get().getSecurityManager(); + String whoami = sm.runQuery("whoami", false); + + JsonObject js = JsonParser.parseString(whoami).getAsJsonObject(); + catalogOwner = js.get("DisplayName").getAsString(); + } + Iterator iterator = mcClient.projects().iterator(catalogOwner); + while (iterator.hasNext()) { + Project project = iterator.next(); + if (!project.getName().equals(defaultProject)) { + result.add(project.getName()); + } + } + } catch (OdpsException e) { + throw new RuntimeException(e); + } + return result; + } + + @Override + public List getPartitions(Odps mcClient, String dbName, String tableName) { + return mcClient.tables().get(dbName, tableName).getPartitions(); + } + + @Override + public Iterator getPartitionIterator(Odps mcClient, String dbName, String tableName) { + return mcClient.tables().get(dbName, tableName).getPartitions().iterator(); + } + + @Override + public boolean databaseExist(Odps mcClient, String dbName) throws RuntimeException { + try { + return mcClient.projects().exists(dbName); + } catch (OdpsException e) { + throw new RuntimeException(e); + } + } + + @Override + public TableIdentifier getTableIdentifier(String dbName, String tableName) { + return TableIdentifier.of(dbName, tableName); + } + + + @Override + public Table getOdpsTable(Odps mcClient, String dbName, String tableName) { + return mcClient.tables().get(dbName, tableName); + } + } + + static McStructureHelper getHelper(boolean isEnableNamespaceSchema, String defaultProjectName) { + return isEnableNamespaceSchema + ? new ProjectSchemaTableHelper(defaultProjectName) + : new ProjectTableHelper(); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/source/MaxComputeScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/source/MaxComputeScanNode.java index c8f24db0da9464..1ed292a8774861 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/source/MaxComputeScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/source/MaxComputeScanNode.java @@ -53,7 +53,6 @@ import com.aliyun.odps.OdpsType; import com.aliyun.odps.PartitionSpec; -import com.aliyun.odps.table.TableIdentifier; import com.aliyun.odps.table.configuration.ArrowOptions; import com.aliyun.odps.table.configuration.ArrowOptions.TimestampUnit; import com.aliyun.odps.table.optimizer.predicate.Predicate; @@ -190,7 +189,8 @@ TableBatchReadSession createTableBatchReadSession(List requiredPa retryTimes = mcCatalog.getRetryTimes(); TableReadSessionBuilder scanBuilder = new TableReadSessionBuilder(); - return scanBuilder.identifier(TableIdentifier.of(table.getDbName(), table.getName())) + + return scanBuilder.identifier(table.getTableIdentifier()) .withSettings(mcCatalog.getSettings()) .withSplitOptions(mcCatalog.getSplitOption()) .requiredPartitionColumns(requiredPartitionColumns) diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/MCProperties.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/MCProperties.java index 744a36aed37258..bdfd9793c535e0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/MCProperties.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/MCProperties.java @@ -81,6 +81,26 @@ public class MCProperties extends BaseProperties { public static final String ACCOUNT_FORMAT_ID = "id"; public static final String DEFAULT_ACCOUNT_FORMAT = ACCOUNT_FORMAT_NAME; + // In the previous MaxCompute architecture, and its mapping in Doris, + // the hierarchy was: project / database -> table / table. + // When creating a catalog, users needed to specify the property `mc.default.project`, + // which indicated the default project to access. + // In this structure, executing `SHOW DATABASES` would list other projects. + // + // After MaxCompute introduced the concept of schemas, the hierarchy changed to: + // project / catalog -> schema / database -> table / table. + // Here, the project is at a higher level, and `SHOW DATABASES` should now list + // all schemas under the current project. + // As a result, users need to create a separate catalog for each project, + // specifying a different `mc.default.project` property. + // + // To maintain compatibility with the old version, + // a variable is introduced: + // - When the property is true, the new architecture is used. + // - When the property is false, the old architecture is used. + public static final String ENABLE_NAMESPACE_SCHEMA = "mc.enable.namespace.schema"; + public static final String DEFAULT_ENABLE_NAMESPACE_SCHEMA = "false"; + public static CloudCredential getCredential(Map props) { return getCloudCredential(props, ACCESS_KEY, SECRET_KEY, SESSION_TOKEN); } diff --git a/regression-test/data/external_table_p2/maxcompute/test_max_compute_schema.out b/regression-test/data/external_table_p2/maxcompute/test_max_compute_schema.out new file mode 100644 index 00000000000000..eaaa45dd88b64e --- /dev/null +++ b/regression-test/data/external_table_p2/maxcompute/test_max_compute_schema.out @@ -0,0 +1,258 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !show_db_1 -- +analytics +default +information_schema +iot +mysql + +-- !show_tb_1 -- +order_detail +user_info + +-- !show_tb_2 -- +product_sales +web_log + +-- !show_tb_3 -- +employee_salary + +-- !show_par -- +ds1=202510 +ds1=202511 +ds1=202512 + +-- !show_par2 -- +ds2=20251015/hour=10 +ds2=20251016/hour=09 +ds2=20251017/hour=08 + +-- !desc -- +id int Yes true \N +emp_name text Yes true \N +department text Yes true \N +salary decimal(10,2) Yes true \N +hire_date date Yes true \N + +-- !show_db_2 -- +information_schema +mc_datalake_schema +mysql + +-- !show_tb_4 -- +order_detail +user_info + +-- !mc_old_q1 -- +1 Alice 23 Beijing +2 Bob 30 Shanghai +3 Carol 27 XiAn +4 David 22 Guangzhou + +-- !mc_old_q2 -- +1 ORD001 500 Alice 202510 +2 ORD002 899.9 Bob 202510 +3 ORD003 120.5 Carol 202511 +4 ORD004 350 David 202511 +5 ORD005 780 Alice 202511 +6 ORD006 640 Bob 202512 +7 ORD007 220 Carol 202512 + +-- !show_db_1 -- +analytics +default +information_schema +iot +mysql + +-- !show_tb_1 -- +order_detail +user_info + +-- !show_tb_2 -- +product_sales +web_log + +-- !show_tb_3 -- +employee_salary + +-- !show_par -- +ds1=202510 +ds1=202511 +ds1=202512 + +-- !show_par2 -- +ds2=20251015/hour=10 +ds2=20251016/hour=09 +ds2=20251017/hour=08 + +-- !desc -- +id int Yes true \N +emp_name text Yes true \N +department text Yes true \N +salary decimal(10,2) Yes true \N +hire_date date Yes true \N + +-- !mc_q1 -- +1 Alice 23 Beijing +2 Bob 30 Shanghai +3 Carol 27 XiAn +4 David 22 Guangzhou + +-- !mc_q2 -- +2 Bob 30 +3 Carol 27 + +-- !mc_q3 -- +Alice Beijing +Bob Shanghai + +-- !mc_q4 -- +1 Alice 23 Beijing + +-- !mc_q5 -- +1 ORD001 500 Alice 202510 +2 ORD002 899.9 Bob 202510 +5 ORD005 780 Alice 202511 +6 ORD006 640 Bob 202512 + +-- !mc_q6 -- +ORD002 Bob 899.9 +ORD006 Bob 640 + +-- !mc_q7 -- +3 + +-- !mc_q8 -- +1 Keyboard 199.9 3 2025-10-15T10:00 +2 Mouse 99.5 5 2025-10-15T11:00 +3 Monitor 899 2 2025-10-15T12:00 +4 Laptop 5699 1 2025-10-15T13:00 +5 USB Cable 25 10 2025-10-15T14:00 + +-- !mc_q9 -- +1 Keyboard 199.9 +3 Monitor 899 + +-- !mc_q10 -- +Keyboard 3 +Mouse 5 +USB Cable 10 + +-- !mc_q11 -- +2 Mouse 99.5 5 2025-10-15T11:00 +3 Monitor 899 2 2025-10-15T12:00 +4 Laptop 5699 1 2025-10-15T13:00 +5 USB Cable 25 10 2025-10-15T14:00 + +-- !mc_q12 -- +1 1001 click https://site.com/page1 2025-10-15T10:00 20251015 10 +6 1006 click https://site.com/page6 2025-10-16T09:45 20251016 09 + +-- !mc_q13 -- +4 1004 https://site.com/page4 +5 1005 https://site.com/page5 +6 1006 https://site.com/page6 + +-- !mc_q14 -- +4 1004 logout https://site.com/page4 2025-10-16T09:15 20251016 09 +5 1005 login https://site.com/page5 2025-10-16T09:30 20251016 09 +6 1006 click https://site.com/page6 2025-10-16T09:45 20251016 09 + +-- !mc_q15 -- +2 + +-- !mc_q16 -- +1 Tom Finance 12000.00 2023-03-01 +2 Jerry IT 15000.50 2022-09-15 +3 Mike HR 9800.00 2024-06-20 +4 Lucy IT 13500.00 2022-11-05 + +-- !mc_q17 -- +Tom Finance +Jerry IT +Lucy IT + +-- !mc_q18 -- +1 Tom Finance 12000.00 2023-03-01 +3 Mike HR 9800.00 2024-06-20 + +-- !mc_q19 -- +Jerry 15000.50 +Lucy 13500.00 + +-- !mc_q20 -- +1 Tom Finance 12000.00 2023-03-01 +2 Jerry IT 15000.50 2022-09-15 +3 Mike HR 9800.00 2024-06-20 +4 Lucy IT 13500.00 2022-11-05 + +-- !mc_join_q21 -- +1 Alice ORD001 500 +2 Bob ORD002 899.9 + +-- !mc_join_q22 -- +1 Alice Keyboard 199.9 +2 Bob Mouse 99.5 +3 Carol Monitor 899 +4 David Laptop 5699 + +-- !mc_join_q23 -- +Alice click https://site.com/page1 +Bob view https://site.com/page2 +Carol buy https://site.com/page3 + +-- !mc_join_q24 -- +Tom Keyboard 2025-10-15T10:00 +Jerry Mouse 2025-10-15T11:00 +Mike Monitor 2025-10-15T12:00 +Lucy Laptop 2025-10-15T13:00 + +-- !mc_join_q25 -- +Alice Finance 12000.00 +Bob IT 15000.50 +Carol HR 9800.00 +David IT 13500.00 + +-- !mc_join_q26 -- +ORD003 120.5 Monitor 899 +ORD004 350 Laptop 5699 +ORD005 780 USB Cable 25 + +-- !mc_join_q27 -- +ORD006 https://site.com/page6 click + +-- !mc_join_q28 -- +ORD003 Mike HR +ORD004 Lucy IT + +-- !mc_join_q29 -- + +-- !mc_join_q30 -- +Alice Keyboard 12000.00 +Bob Mouse 15000.50 +Carol Monitor 9800.00 +David Laptop 13500.00 + +-- !mc_join_q31 -- +Carol ORD003 https://site.com/page3 + +-- !mc_join_q32 -- +Lucy Laptop logout + +-- !mc_join_q33 -- +Alice Tom Finance +Bob Jerry IT +Carol Mike HR +David Lucy IT + +-- !mc_join_q34 -- +Keyboard Finance 12000.00 +Mouse IT 15000.50 +Monitor HR 9800.00 +Laptop IT 13500.00 + +-- !mc_join_q35 -- +ORD001 Keyboard Tom +ORD002 Mouse Jerry + diff --git a/regression-test/suites/external_table_p2/maxcompute/test_max_compute_schema.groovy b/regression-test/suites/external_table_p2/maxcompute/test_max_compute_schema.groovy new file mode 100644 index 00000000000000..b89c92fabdd278 --- /dev/null +++ b/regression-test/suites/external_table_p2/maxcompute/test_max_compute_schema.groovy @@ -0,0 +1,239 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +/* +// Test DDL and Data: + + +use mc_datalake_schema; +set odps.namespace.schema = true; + +USE schema `default`; +CREATE TABLE user_info (id INT, name STRING, age INT, city STRING); +INSERT INTO user_info VALUES +(1, 'Alice', 23, 'Beijing'), +(2, 'Bob', 30, 'Shanghai'), +(3, 'Carol', 27, 'XiAn'), +(4, 'David', 22, 'Guangzhou'); + +CREATE TABLE order_detail (id INT, order_id STRING, amount DOUBLE, buyer STRING) PARTITIONED BY (ds1 STRING); +INSERT INTO order_detail PARTITION (ds1='202510') VALUES +(1, 'ORD001', 500.0, 'Alice'), +(2, 'ORD002', 899.9, 'Bob'); +INSERT INTO order_detail PARTITION (ds1='202511') VALUES +(3, 'ORD003', 120.5, 'Carol'), +(4, 'ORD004', 350.0, 'David'), +(5, 'ORD005', 780.0, 'Alice'); +INSERT INTO order_detail PARTITION (ds1='202512') VALUES +(6, 'ORD006', 640.0, 'Bob'), +(7, 'ORD007', 220.0, 'Carol'); + + + +create schema analytics; +USE schema analytics; +CREATE TABLE product_sales (id INT, product_name STRING, price DOUBLE, quantity INT, sale_time DATETIME); +INSERT INTO analytics.product_sales VALUES +(1, 'Keyboard', 199.9, 3, CAST('2025-10-15 10:00:00' AS DATETIME)), +(2, 'Mouse', 99.5, 5, CAST('2025-10-15 11:00:00' AS DATETIME)), +(3, 'Monitor', 899.0, 2, CAST('2025-10-15 12:00:00' AS DATETIME)), +(4, 'Laptop', 5699.0, 1, CAST('2025-10-15 13:00:00' AS DATETIME)), +(5, 'USB Cable', 25.0, 10, CAST('2025-10-15 14:00:00' AS DATETIME)); +CREATE TABLE web_log ( + id INT, + user_id INT, + action STRING, + url STRING, + log_time DATETIME +) PARTITIONED BY (ds2 STRING, hour STRING); +INSERT INTO web_log PARTITION (ds2='20251015', hour='10') VALUES +(1, 1001, 'click', 'https://site.com/page1', CAST('2025-10-15 10:00:00' AS DATETIME)), +(2, 1002, 'view', 'https://site.com/page2', CAST('2025-10-15 10:05:00' AS DATETIME)), +(3, 1003, 'buy', 'https://site.com/page3', CAST('2025-10-15 10:10:00' AS DATETIME)); +INSERT INTO web_log PARTITION (ds2='20251016', hour='09') VALUES +(4, 1004, 'logout', 'https://site.com/page4', CAST('2025-10-16 09:15:00' AS DATETIME)), +(5, 1005, 'login', 'https://site.com/page5', CAST('2025-10-16 09:30:00' AS DATETIME)), +(6, 1006, 'click', 'https://site.com/page6', CAST('2025-10-16 09:45:00' AS DATETIME)); +INSERT INTO web_log PARTITION (ds2='20251017', hour='08') VALUES +(7, 1007, 'view', 'https://site.com/page7', CAST('2025-10-17 08:00:00' AS DATETIME)), +(8, 1008, 'buy', 'https://site.com/page8', CAST('2025-10-17 08:10:00' AS DATETIME)); + + +create schema iot; +USE schema iot; +CREATE TABLE employee_salary ( + id INT, + emp_name STRING, + department STRING, + salary DECIMAL(10,2), + hire_date DATE +); +INSERT INTO employee_salary VALUES +(1, 'Tom', 'Finance', 12000.00, CAST('2023-03-01' AS DATE)), +(2, 'Jerry', 'IT', 15000.50, CAST('2022-09-15' AS DATE)), +(3, 'Mike', 'HR', 9800.00, CAST('2024-06-20' AS DATE)), +(4, 'Lucy', 'IT', 13500.00, CAST('2022-11-05' AS DATE)); + */ +suite("test_max_compute_schema", "p2,external,maxcompute,external_remote,external_remote_maxcompute") { + String enabled = context.config.otherConfigs.get("enableMaxComputeTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + String ak = context.config.otherConfigs.get("ak") + String sk = context.config.otherConfigs.get("sk"); + String mc_project = "mc_datalake_schema" + String mc_catalog_name = "test_max_compute_schema" + + + + + sql """drop catalog if exists ${mc_catalog_name};""" + sql """ + create catalog if not exists ${mc_catalog_name} properties ( + "type" = "max_compute", + "mc.default.project" = "${mc_project}", + "mc.access_key" = "${ak}", + "mc.secret_key" = "${sk}", + "mc.endpoint" = "http://service.cn-beijing-vpc.maxcompute.aliyun-inc.com/api", + "mc.enable.namespace.schema" = "true" + ); + """ + + sql """ switch ${mc_catalog_name};""" + order_qt_show_db_1 """ show databases; """ + + sql """ use `default`; """ + order_qt_show_tb_1 """ show tables; """ + + + sql """ use `analytics`; """ + order_qt_show_tb_2 """ show tables; """ + + + sql """ use `iot`; """ + order_qt_show_tb_3 """ show tables; """ + + order_qt_show_par """ show partitions from `default`.order_detail; """ + order_qt_show_par2 """ show partitions from analytics.web_log; """ + qt_desc """ desc iot.employee_salary; """ + + sql """ alter catalog ${mc_catalog_name} set PROPERTIES("mc.enable.namespace.schema" = "false"); """ + qt_show_db_2 """ show databases;""" + sql """ use ${mc_project}; """ + order_qt_show_tb_4 """ show tables; """ + + qt_mc_old_q1 """ SELECT * FROM user_info ORDER BY id;""" + qt_mc_old_q2 """ SELECT * FROM order_detail ORDER BY id;""" + + sql """ alter catalog ${mc_catalog_name} set PROPERTIES("mc.enable.namespace.schema" = "true"); """ + + + + order_qt_show_db_1 """ show databases; """ + + sql """ use `default`; """ + order_qt_show_tb_1 """ show tables; """ + + + sql """ use `analytics`; """ + order_qt_show_tb_2 """ show tables; """ + + + sql """ use `iot`; """ + order_qt_show_tb_3 """ show tables; """ + + order_qt_show_par """ show partitions from `default`.order_detail; """ + order_qt_show_par2 """ show partitions from analytics.web_log; """ + qt_desc """ desc iot.employee_salary; """ + + + explain { + sql("""select * from `default`.order_detail """) + contains "partition=3/3" + } + + explain { + sql("""select * from `default`.order_detail where ds1 = "11" """) + contains "partition=0/3" + } + + + explain { + sql("""select * from `default`.order_detail where ds1 = "202511" or ds1 = "202512" """) + contains "partition=2/3" + } + + + explain { + sql("""select * from analytics.web_log; """) + contains "partition=3/3" + } + + + explain { + sql("""select * from analytics.web_log where hour="09" ; """) + contains "partition=1/3" + } + + explain { + sql("""select * from analytics.web_log where ds2="20251016" and hour="09" """) + contains "partition=1/3" + } + + + explain { + sql("""select * from analytics.web_log where ds2="20251016" and hour="99" """) + contains "partition=0/3" + } + + + + qt_mc_q1 """SELECT * FROM `default`.user_info ORDER BY id;""" + qt_mc_q2 """SELECT id, name, age FROM `default`.user_info WHERE age > 25 ORDER BY id;""" + qt_mc_q3 """SELECT name, city FROM `default`.user_info WHERE city IN ('Beijing','Shanghai') ORDER BY id;""" + qt_mc_q4 """SELECT * FROM `default`.user_info WHERE name LIKE 'A%' ORDER BY id;""" + qt_mc_q5 """SELECT * FROM `default`.order_detail WHERE amount > 400 ORDER BY id;""" + qt_mc_q6 """SELECT order_id, buyer, amount FROM `default`.order_detail WHERE buyer='Bob' ORDER BY id;""" + qt_mc_q7 """SELECT COUNT(*) AS cnt FROM `default`.order_detail WHERE ds1='202511';""" + qt_mc_q8 """SELECT * FROM analytics.product_sales ORDER BY id;""" + qt_mc_q9 """SELECT id, product_name, price FROM analytics.product_sales WHERE price BETWEEN 100 AND 900 ORDER BY id;""" + qt_mc_q10 """SELECT product_name, quantity FROM analytics.product_sales WHERE quantity >= 3 ORDER BY id;""" + qt_mc_q11 """SELECT * FROM analytics.product_sales WHERE sale_time >= '2025-10-15 11:00:00' ORDER BY id;""" + qt_mc_q12 """SELECT * FROM analytics.web_log WHERE action='click' ORDER BY id;""" + qt_mc_q13 """SELECT id, user_id, url FROM analytics.web_log WHERE ds2='20251016' ORDER BY id;""" + qt_mc_q14 """SELECT * FROM analytics.web_log WHERE hour='09' ORDER BY id;""" + qt_mc_q15 """SELECT COUNT(*) AS clicks FROM analytics.web_log WHERE action='buy';""" + qt_mc_q16 """SELECT * FROM iot.employee_salary ORDER BY id;""" + qt_mc_q17 """SELECT emp_name, department FROM iot.employee_salary WHERE salary > 10000 ORDER BY id;""" + qt_mc_q18 """SELECT * FROM iot.employee_salary WHERE hire_date > '2023-01-01' ORDER BY id;""" + qt_mc_q19 """SELECT emp_name, salary FROM iot.employee_salary WHERE department='IT' ORDER BY id;""" + qt_mc_q20 """SELECT * FROM iot.employee_salary WHERE salary BETWEEN 9000 AND 16000 ORDER BY id;""" + + qt_mc_join_q21 """SELECT u.id, u.name, o.order_id, o.amount FROM `default`.user_info u JOIN `default`.order_detail o ON u.name=o.buyer WHERE o.ds1='202510' ORDER BY u.id;""" + qt_mc_join_q22 """SELECT u.id, u.name, p.product_name, p.price FROM `default`.user_info u JOIN analytics.product_sales p ON u.id=p.id ORDER BY u.id;""" + qt_mc_join_q23 """SELECT u.name, w.action, w.url FROM `default`.user_info u JOIN analytics.web_log w ON u.id=w.user_id-1000 WHERE w.ds2='20251015' AND w.hour='10' ORDER BY u.id;""" + qt_mc_join_q24 """SELECT e.emp_name, p.product_name, p.sale_time FROM iot.employee_salary e JOIN analytics.product_sales p ON e.id=p.id ORDER BY e.id;""" + qt_mc_join_q25 """SELECT u.name, e.department, e.salary FROM `default`.user_info u JOIN iot.employee_salary e ON u.id=e.id ORDER BY u.id;""" + qt_mc_join_q26 """SELECT o.order_id, o.amount, p.product_name, p.price FROM `default`.order_detail o JOIN analytics.product_sales p ON o.id=p.id WHERE o.ds1='202511' ORDER BY o.id;""" + qt_mc_join_q27 """SELECT o.order_id, w.url, w.action FROM `default`.order_detail o JOIN analytics.web_log w ON o.id=w.id WHERE o.ds1='202512' AND w.ds2='20251016' AND w.hour='09' ORDER BY o.id;""" + qt_mc_join_q28 """SELECT o.order_id, e.emp_name, e.department FROM `default`.order_detail o JOIN iot.employee_salary e ON o.id=e.id WHERE o.ds1='202511' ORDER BY o.id;""" + qt_mc_join_q29 """SELECT p.product_name, w.url, w.action FROM analytics.product_sales p JOIN analytics.web_log w ON p.id=w.id WHERE w.ds2='20251017' AND w.hour='08' ORDER BY p.id;""" + qt_mc_join_q30 """SELECT u.name, p.product_name, e.salary FROM `default`.user_info u JOIN analytics.product_sales p ON u.id=p.id JOIN iot.employee_salary e ON u.id=e.id ORDER BY u.id;""" + qt_mc_join_q31 """SELECT u.name, o.order_id, w.url FROM `default`.user_info u JOIN `default`.order_detail o ON u.id=o.id JOIN analytics.web_log w ON u.id=w.id WHERE o.ds1='202511' AND w.ds2='20251015' AND w.hour='10' ORDER BY u.id;""" + qt_mc_join_q32 """SELECT e.emp_name, p.product_name, w.action FROM iot.employee_salary e JOIN analytics.product_sales p ON e.id=p.id JOIN analytics.web_log w ON e.id=w.id WHERE w.ds2='20251016' AND w.hour='09' ORDER BY e.id;""" + qt_mc_join_q33 """SELECT u.name, e.emp_name, e.department FROM `default`.user_info u JOIN iot.employee_salary e ON u.id=e.id ORDER BY u.id;""" + qt_mc_join_q34 """SELECT p.product_name, e.department, e.salary FROM analytics.product_sales p JOIN iot.employee_salary e ON p.id=e.id ORDER BY p.id;""" + qt_mc_join_q35 """SELECT o.order_id, p.product_name, e.emp_name FROM `default`.order_detail o JOIN analytics.product_sales p ON o.id=p.id JOIN iot.employee_salary e ON o.id=e.id WHERE o.ds1='202510' ORDER BY o.id;""" + } +} \ No newline at end of file