Add Column.shardingKeyIdx for column definition for BanyanDB (apach…

…e#8705) * Add `Column.shardingKeyIdx` for column definition for BanyanDB. ``` Sharding key is used to group time series data per metric of one entity in one place (same sharding or same column for column-oriented database). For example, ServiceA's traffic gauge, service call per minute, includes following timestamp values, then it should be sharded by service ID [ServiceA(encoded ID): 01-28 18:30 values-1, 01-28 18:31 values-2, 01-28 18:32 values-3, 01-28 18:32 values-4] BanyanDB is the 1st storage implementation supporting this. It would make continuous time series metrics stored closely and compressed better. NOTICE, this sharding concept is NOT just for splitting data into different database instances or physical files. ``` * Add `JD_PRESERVE_LINE_FEEDS=true` in official code style file. Then our comments don't have to use many blank lines to keep format.
hutaishi · Mar 18, 2022 · f956484 · f956484
1 parent 6486b13
commit f956484
Show file tree

Hide file tree

Showing 24 changed files with 164 additions and 46 deletions.
diff --git a/CHANGES.md b/CHANGES.md
@@ -31,6 +31,7 @@ Release Notes.
 * Update frontend-maven-plugin to 1.12 and npm to 16.14.0 for booster UI build.
 * Improve CI with the GHA new feature "run failed jobs".
 * Fix `./mvnw compile` not work if `./mvnw install` is not executed at least once.
+* Add `JD_PRESERVE_LINE_FEEDS=true` in official code style file.
 
 #### OAP Server
 
@@ -112,14 +113,28 @@ Release Notes.
   , `SW_CORE_REST_JETTY_DELTA`).
 * [Breaking Change] Remove configuration `graphql/path` (env var: `SW_QUERY_GRAPHQL_PATH`).
 * Add storage column attribute `indexOnly`, support ElasticSearch only index and not store some fields.
-* Add `indexOnly=true` to `SegmentRecord.tags`, `AlarmRecord.tags`, `AbstractLogRecord.tags`, to reduce unnecessary storage.
+* Add `indexOnly=true` to `SegmentRecord.tags`, `AlarmRecord.tags`, `AbstractLogRecord.tags`, to reduce unnecessary
+  storage.
 * [Breaking Change] Remove configuration `restMinThreads` (env var: `SW_CORE_REST_JETTY_MIN_THREADS`
   , `SW_RECEIVER_SHARING_JETTY_MIN_THREADS`).
 * Refactor the core Builder mechanism, new storage plugin could implement their own converter and get rid of hard
   requirement of using HashMap to communicate between data object and database native structure.
 * [Breaking Change] Break all existing 3rd-party storage extensions.
 * Remove hard requirement of BASE64 encoding for binary field.
 * Add complexity limitation for GraphQL query to avoid malicious query.
+* Add `Column.shardingKeyIdx` for column definition for BanyanDB.
+
+```
+Sharding key is used to group time series data per metric of one entity in one place (same sharding or same 
+column for column-oriented database).
+For example,
+ServiceA's traffic gauge, service call per minute, includes following timestamp values, then it should be sharded by service ID
+[ServiceA(encoded ID): 01-28 18:30 values-1, 01-28 18:31 values-2, 01-28 18:32 values-3, 01-28 18:32 values-4]
+
+BanyanDB is the 1st storage implementation supporting this. It would make continuous time series metrics stored closely and compressed better.
+
+NOTICE, this sharding concept is NOT just for splitting data into different database instances or physical files.
+```
 
 #### UI
 

diff --git a/codeStyle.xml b/codeStyle.xml
@@ -21,6 +21,7 @@
       </value>
     </option>
     <option name="JD_P_AT_EMPTY_LINES" value="false" />
+    <option name="JD_PRESERVE_LINE_FEEDS" value="true" />
   </JavaCodeStyleSettings>
   <ADDITIONAL_INDENT_OPTIONS fileType="haml">
     <option name="INDENT_SIZE" value="2" />

diff --git a/oap-server/oal-rt/src/main/java/org/apache/skywalking/oal/rt/OALRuntime.java b/oap-server/oal-rt/src/main/java/org/apache/skywalking/oal/rt/OALRuntime.java
@@ -51,7 +51,6 @@
 import org.apache.commons.io.FileUtils;
 import org.apache.commons.lang3.JavaVersion;
 import org.apache.commons.lang3.SystemUtils;
-import org.apache.skywalking.oap.server.library.util.StringUtil;
 import org.apache.skywalking.oal.rt.output.AllDispatcherContext;
 import org.apache.skywalking.oal.rt.output.DispatcherContext;
 import org.apache.skywalking.oal.rt.parser.AnalysisResult;
@@ -75,6 +74,7 @@
 import org.apache.skywalking.oap.server.core.storage.annotation.Column;
 import org.apache.skywalking.oap.server.library.module.ModuleStartException;
 import org.apache.skywalking.oap.server.library.util.ResourceUtils;
+import org.apache.skywalking.oap.server.library.util.StringUtil;
 
 /**
  * OAL Runtime is the class generation engine, which load the generated classes from OAL scrip definitions. This runtime
@@ -267,6 +267,10 @@ private Class generateMetricsClass(AnalysisResult metricsStmt) throws OALCompile
                 if (field.getType().equals(String.class)) {
                     columnAnnotation.addMemberValue("length", new IntegerMemberValue(constPool, field.getLength()));
                 }
+                if (field.isID()) {
+                    // Add shardingKeyIdx = 0 to column annotation.
+                    columnAnnotation.addMemberValue("shardingKeyIdx", new IntegerMemberValue(constPool, 0));
+                }
                 annotationsAttribute.addAnnotation(columnAnnotation);
 
                 newField.getFieldInfo().addAttribute(annotationsAttribute);

diff --git a/...ain/java/org/apache/skywalking/oap/server/core/analysis/manual/log/AbstractLogRecord.java b/...ain/java/org/apache/skywalking/oap/server/core/analysis/manual/log/AbstractLogRecord.java
@@ -47,11 +47,11 @@ public abstract class AbstractLogRecord extends Record {
 
     @Setter
     @Getter
-    @Column(columnName = SERVICE_ID)
+    @Column(columnName = SERVICE_ID, shardingKeyIdx = 0)
     private String serviceId;
     @Setter
     @Getter
-    @Column(columnName = SERVICE_INSTANCE_ID)
+    @Column(columnName = SERVICE_INSTANCE_ID, shardingKeyIdx = 1)
     private String serviceInstanceId;
     @Setter
     @Getter

diff --git a/...g/apache/skywalking/oap/server/core/analysis/manual/networkalias/NetworkAddressAlias.java b/...g/apache/skywalking/oap/server/core/analysis/manual/networkalias/NetworkAddressAlias.java
@@ -51,7 +51,7 @@ public class NetworkAddressAlias extends Metrics {
 
     @Setter
     @Getter
-    @Column(columnName = ADDRESS)
+    @Column(columnName = ADDRESS, shardingKeyIdx = 0)
     private String address;
     @Setter
     @Getter

diff --git a/.../oap/server/core/analysis/manual/relation/endpoint/EndpointRelationServerSideMetrics.java b/.../oap/server/core/analysis/manual/relation/endpoint/EndpointRelationServerSideMetrics.java
@@ -58,7 +58,7 @@ public class EndpointRelationServerSideMetrics extends Metrics {
     private int componentId;
     @Setter
     @Getter
-    @Column(columnName = ENTITY_ID, length = 512)
+    @Column(columnName = ENTITY_ID, length = 512, shardingKeyIdx = 0)
     private String entityId;
 
     @Override

diff --git a/...rver/core/analysis/manual/relation/instance/ServiceInstanceRelationServerSideMetrics.java b/...rver/core/analysis/manual/relation/instance/ServiceInstanceRelationServerSideMetrics.java
@@ -68,7 +68,7 @@ public class ServiceInstanceRelationServerSideMetrics extends Metrics {
     private int componentId;
     @Setter
     @Getter
-    @Column(columnName = ENTITY_ID, length = 512)
+    @Column(columnName = ENTITY_ID, length = 512, shardingKeyIdx = 0)
     private String entityId;
 
     @Override

diff --git a/...ng/oap/server/core/analysis/manual/relation/service/ServiceRelationServerSideMetrics.java b/...ng/oap/server/core/analysis/manual/relation/service/ServiceRelationServerSideMetrics.java
@@ -58,7 +58,7 @@ public class ServiceRelationServerSideMetrics extends Metrics {
     private int componentId;
     @Setter
     @Getter
-    @Column(columnName = ENTITY_ID, length = 512)
+    @Column(columnName = ENTITY_ID, length = 512, shardingKeyIdx = 0)
     private String entityId;
 
     @Override

diff --git a/...ain/java/org/apache/skywalking/oap/server/core/analysis/manual/segment/SegmentRecord.java b/...ain/java/org/apache/skywalking/oap/server/core/analysis/manual/segment/SegmentRecord.java
@@ -59,11 +59,11 @@ public class SegmentRecord extends Record {
     private String traceId;
     @Setter
     @Getter
-    @Column(columnName = SERVICE_ID)
+    @Column(columnName = SERVICE_ID, shardingKeyIdx = 0)
     private String serviceId;
     @Setter
     @Getter
-    @Column(columnName = SERVICE_INSTANCE_ID)
+    @Column(columnName = SERVICE_INSTANCE_ID, shardingKeyIdx = 1)
     private String serviceInstanceId;
     @Setter
     @Getter
@@ -79,7 +79,7 @@ public class SegmentRecord extends Record {
     private int latency;
     @Setter
     @Getter
-    @Column(columnName = IS_ERROR)
+    @Column(columnName = IS_ERROR, shardingKeyIdx = 2)
     private int isError;
     @Setter
     @Getter

diff --git a/...java/org/apache/skywalking/oap/server/core/analysis/meter/function/HistogramFunction.java b/...java/org/apache/skywalking/oap/server/core/analysis/meter/function/HistogramFunction.java
@@ -48,7 +48,7 @@ public abstract class HistogramFunction extends Meter implements AcceptableValue
 
     @Setter
     @Getter
-    @Column(columnName = ENTITY_ID, length = 512)
+    @Column(columnName = ENTITY_ID, length = 512, shardingKeyIdx = 0)
     private String entityId;
     @Getter
     @Setter

diff --git a/...ava/org/apache/skywalking/oap/server/core/analysis/meter/function/PercentileFunction.java b/...ava/org/apache/skywalking/oap/server/core/analysis/meter/function/PercentileFunction.java
@@ -55,7 +55,7 @@ public abstract class PercentileFunction extends Meter implements AcceptableValu
 
     @Setter
     @Getter
-    @Column(columnName = ENTITY_ID, length = 512)
+    @Column(columnName = ENTITY_ID, length = 512, shardingKeyIdx = 0)
     private String entityId;
     @Getter
     @Setter

diff --git a/...n/java/org/apache/skywalking/oap/server/core/analysis/meter/function/avg/AvgFunction.java b/...n/java/org/apache/skywalking/oap/server/core/analysis/meter/function/avg/AvgFunction.java
@@ -50,7 +50,7 @@ public abstract class AvgFunction extends Meter implements AcceptableValue<Long>
 
     @Setter
     @Getter
-    @Column(columnName = ENTITY_ID, length = 512)
+    @Column(columnName = ENTITY_ID, length = 512, shardingKeyIdx = 0)
     private String entityId;
 
     /**

diff --git a/...g/apache/skywalking/oap/server/core/analysis/meter/function/avg/AvgHistogramFunction.java b/...g/apache/skywalking/oap/server/core/analysis/meter/function/avg/AvgHistogramFunction.java
@@ -59,7 +59,7 @@ public abstract class AvgHistogramFunction extends Meter implements AcceptableVa
 
     @Setter
     @Getter
-    @Column(columnName = ENTITY_ID, length = 512)
+    @Column(columnName = ENTITY_ID, length = 512, shardingKeyIdx = 0)
     private String entityId;
     @Getter
     @Setter

diff --git a/...kywalking/oap/server/core/analysis/meter/function/avg/AvgHistogramPercentileFunction.java b/...kywalking/oap/server/core/analysis/meter/function/avg/AvgHistogramPercentileFunction.java
@@ -74,7 +74,7 @@ public abstract class AvgHistogramPercentileFunction extends Meter implements Ac
 
     @Setter
     @Getter
-    @Column(columnName = ENTITY_ID)
+    @Column(columnName = ENTITY_ID, shardingKeyIdx = 0)
     private String entityId;
     @Getter
     @Setter

diff --git a/...org/apache/skywalking/oap/server/core/analysis/meter/function/avg/AvgLabeledFunction.java b/...org/apache/skywalking/oap/server/core/analysis/meter/function/avg/AvgLabeledFunction.java
@@ -48,7 +48,7 @@ public abstract class AvgLabeledFunction extends Meter implements AcceptableValu
 
     @Setter
     @Getter
-    @Column(columnName = ENTITY_ID, length = 512)
+    @Column(columnName = ENTITY_ID, length = 512, shardingKeyIdx = 0)
     private String entityId;
 
     /**

diff --git a/.../org/apache/skywalking/oap/server/core/analysis/meter/function/latest/LatestFunction.java b/.../org/apache/skywalking/oap/server/core/analysis/meter/function/latest/LatestFunction.java
@@ -47,7 +47,7 @@ public abstract class LatestFunction extends Meter implements AcceptableValue<Lo
 
     @Setter
     @Getter
-    @Column(columnName = ENTITY_ID, length = 512)
+    @Column(columnName = ENTITY_ID, length = 512, shardingKeyIdx = 0)
     private String entityId;
 
     /**

diff --git a/...n/java/org/apache/skywalking/oap/server/core/analysis/meter/function/sum/SumFunction.java b/...n/java/org/apache/skywalking/oap/server/core/analysis/meter/function/sum/SumFunction.java
@@ -47,7 +47,7 @@ public abstract class SumFunction extends Meter implements AcceptableValue<Long>
 
     @Setter
     @Getter
-    @Column(columnName = ENTITY_ID, length = 512)
+    @Column(columnName = ENTITY_ID, length = 512, shardingKeyIdx = 0)
     private String entityId;
 
     @Setter

diff --git a/.../org/apache/skywalking/oap/server/core/browser/manual/errorlog/BrowserErrorLogRecord.java b/.../org/apache/skywalking/oap/server/core/browser/manual/errorlog/BrowserErrorLogRecord.java
@@ -54,7 +54,7 @@ public String id() {
 
     @Setter
     @Getter
-    @Column(columnName = SERVICE_ID)
+    @Column(columnName = SERVICE_ID, shardingKeyIdx = 0)
     private String serviceId;
 
     @Setter

diff --git a/...r-core/src/main/java/org/apache/skywalking/oap/server/core/source/ScopeDefaultColumn.java b/...r-core/src/main/java/org/apache/skywalking/oap/server/core/source/ScopeDefaultColumn.java
@@ -71,6 +71,14 @@ public ScopeDefaultColumn(String fieldName, String columnName, Class<?> type, bo
 
         Class type();
 
+        /**
+         * Declare this virtual column is representing an entity ID of this source and generated metrics.
+         * Typically, metric ID = timestamp + entity ID
+         *
+         * This takes {@link ISource#getEntityId()}'s return as the value.
+         *
+         * @return TRUE if this is an ID column.
+         */
         boolean isID() default false;
 
         /**

diff --git a/...r-core/src/main/java/org/apache/skywalking/oap/server/core/storage/annotation/Column.java b/...r-core/src/main/java/org/apache/skywalking/oap/server/core/storage/annotation/Column.java
@@ -59,8 +59,8 @@
     boolean storageOnly() default false;
 
     /**
-     * The column(field) is just indexed, never stored. Note: this feature only supported by elasticsearch
-     * and don't support mappings update due to ElasticSearch server's limitation.
+     * The column(field) is just indexed, never stored. Note: this feature only supported by elasticsearch and don't
+     * support mappings update due to ElasticSearch server's limitation.
      *
      * NOTICE, metrics should not use this, as the OAP core merges indices of metrics automatically.
      */
@@ -70,17 +70,16 @@
      * @return the length of this column, this is only for {@link String} column. The usage of this depends on the
      * storage implementation.
      *
-     * Notice, different lengths may cause different types.
-     * Such as, over 16383 would make the type in MySQL to be MEDIUMTEXT, due to database varchar max=16383
-     *
+     * Notice, different lengths may cause different types. Such as, over 16383 would make the type in MySQL to be
+     * MEDIUMTEXT, due to database varchar max=16383
      * @since 7.1.0
      */
     int length() default 200;
 
     /**
      * The return name of system environment could provide an override value of the length limitation.
-     * @return the variable name of system environment.
      *
+     * @return the variable name of system environment.
      * @since 8.2.0
      */
     String lengthEnvVariable() default "";
@@ -102,6 +101,29 @@
      */
     AnalyzerType analyzer() default AnalyzerType.OAP_ANALYZER;
 
+    /**
+     * Sharding key is used to group time series data per metric of one entity in one place (same sharding or same
+     * column for column-oriented database).
+     * For example,
+     * ServiceA's traffic gauge, service call per minute, includes following timestamp values, then it should be sharded
+     * by service ID
+     * [ServiceA(encoded ID): 01-28 18:30 values-1, 01-28 18:31 values-2, 01-28 18:32 values-3, 01-28 18:32 values-4]
+     *
+     * BanyanDB is the 1st storage implementation supporting this. It would make continuous time series metrics stored
+     * closely and compressed better.
+     *
+     * 1. One entity could have multiple sharding keys
+     * 2. If no column is appointed for this, {@link org.apache.skywalking.oap.server.core.storage.StorageData#id}
+     * would be used by the storage implementation accordingly.
+     *
+     * NOTICE, this sharding concept is NOT just for splitting data into different database instances or physical
+     * files.
+     *
+     * @return non-negative if this column be used for sharding. -1 means not as a sharding key
+     * @since 9.0.0
+     */
+    int shardingKeyIdx() default -1;
+
     /**
      * The analyzer declares the text analysis mode.
      */

diff --git a/...r-core/src/main/java/org/apache/skywalking/oap/server/core/storage/model/ModelColumn.java b/...r-core/src/main/java/org/apache/skywalking/oap/server/core/storage/model/ModelColumn.java
@@ -20,19 +20,41 @@
 
 import java.lang.reflect.Type;
 import lombok.Getter;
+import lombok.ToString;
 import org.apache.skywalking.oap.server.core.analysis.metrics.DataTable;
 import org.apache.skywalking.oap.server.core.storage.annotation.Column;
 
 @Getter
+@ToString
 public class ModelColumn {
     private final ColumnName columnName;
     private final Class<?> type;
     private final Type genericType;
     private final boolean matchQuery;
+    /**
+     * Storage this column for query result, but can't be as a condition . Conflict with {@link #indexOnly}
+     */
     private final boolean storageOnly;
+    /**
+     * Index this column for query condition only. Conflict with {@link #storageOnly}
+     *
+     * @since 9.0.0
+     */
     private final boolean indexOnly;
+    /**
+     * The max length of column value for length sensitive database.
+     */
     private final int length;
+    /**
+     * The analyzer policy appointed to fuzzy query, especially for ElasticSearch
+     */
     private final Column.AnalyzerType analyzer;
+    /**
+     * Sharding key is used to group time series data per metric of one entity. See {@link Column#shardingKeyIdx()}.
+     *
+     * @since 9.0.0
+     */
+    private int shardingKeyIdx;
 
     public ModelColumn(ColumnName columnName,
                        Class<?> type,
@@ -42,7 +64,8 @@ public ModelColumn(ColumnName columnName,
                        boolean indexOnly,
                        boolean isValue,
                        int length,
-                       Column.AnalyzerType analyzer) {
+                       Column.AnalyzerType analyzer,
+                       int shardingKeyIdx) {
         this.columnName = columnName;
         this.type = type;
         this.genericType = genericType;
@@ -67,5 +90,6 @@ public ModelColumn(ColumnName columnName,
                 "The column " + columnName + " can't be defined as both indexOnly and storageOnly.");
         }
         this.indexOnly = indexOnly;
+        this.shardingKeyIdx = shardingKeyIdx;
     }
 }
-Original file line number
+Diff line change
@@ Expand Up / @@ -51,7 +51,7 @@ public class NetworkAddressAlias extends Metrics { @@
         @Setter
         @Getter
-        @Column(columnName = ADDRESS)
+        @Column(columnName = ADDRESS, shardingKeyIdx = 0)
         private String address;
         @Setter
         @Getter
@@ Expand Down @@