Skip to content

Commit

Permalink
Merge branch 'dev' into spark-rapids-tools-distributed-base
Browse files Browse the repository at this point in the history
  • Loading branch information
parthosa authored Jan 28, 2025
2 parents 0ec5061 + de411e3 commit 2d43a58
Show file tree
Hide file tree
Showing 235 changed files with 7,844 additions and 3,593 deletions.
14 changes: 6 additions & 8 deletions .github/workflows/add-to-project.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
# Copyright (c) 2022-2025, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -23,13 +23,11 @@ on:
- opened

jobs:
add-to-project:
if: github.repository == 'NVIDIA/spark-rapids-tools'
name: Add new issues and pull requests to project
Add-to-project:
if: github.repository_owner == 'NVIDIA' # avoid adding issues from forks
runs-on: ubuntu-latest
steps:
- uses: actions/[email protected]
- name: add-to-project
uses: NVIDIA/spark-rapids-common/add-to-project@main
with:
project-url: https://github.com/orgs/NVIDIA/projects/4
github-token: ${{ secrets.PROJECT_TOKEN }}

token: ${{ secrets.PROJECT_TOKEN }}
50 changes: 50 additions & 0 deletions .github/workflows/license-header-check.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# A workflow to check copyright/license header
name: license header check

on:
pull_request:
types: [opened, synchronize, reopened]

jobs:
license-header-check:
runs-on: ubuntu-latest
if: "!contains(github.event.pull_request.title, '[bot]')"
steps:
- name: Get checkout depth
run: |
echo "PR_FETCH_DEPTH=$(( ${{ github.event.pull_request.commits }} + 10 ))" >> $GITHUB_ENV
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: ${{ env.PR_FETCH_DEPTH }}

- name: license-header-check
uses: NVIDIA/spark-rapids-common/license-header-check@main
with:
included_file_patterns: |
*.py,
*.toml,
*.ini,
*.yml,
*.yaml,
*.sh,
*.properties,
*.xml,
*.feature,
*.scala
4 changes: 2 additions & 2 deletions .github/workflows/mvn-verify-check.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
# Copyright (c) 2023-2025, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -25,7 +25,7 @@ jobs:
strategy:
matrix:
java-version: [8, 11]
spark-version: ['313', '324', '334', '350']
spark-version: ['324', '334', '350']
steps:
- uses: actions/checkout@v4

Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/python-unit-test.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
# Copyright (c) 2023-2025, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -24,7 +24,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ['3.8', '3.9', '3.10', '3.11']
python-version: ['3.9', '3.10', '3.11', '3.12']

steps:
- uses: actions/checkout@v4
Expand Down
64 changes: 2 additions & 62 deletions core/pom.xml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Copyright (c) 2021-2024, NVIDIA CORPORATION.
Copyright (c) 2021-2025, NVIDIA CORPORATION.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand All @@ -23,7 +23,7 @@
<artifactId>rapids-4-spark-tools_2.12</artifactId>
<name>RAPIDS Accelerator for Apache Spark tools</name>
<description>RAPIDS Accelerator for Apache Spark tools</description>
<version>24.10.2-SNAPSHOT</version>
<version>24.12.2-SNAPSHOT</version>
<packaging>jar</packaging>
<url>http://github.com/NVIDIA/spark-rapids-tools</url>

Expand Down Expand Up @@ -70,66 +70,6 @@
</developer>
</developers>
<profiles>
<profile>
<id>release311</id>
<activation>
<property>
<name>buildver</name>
<value>311</value>
</property>
</activation>
<properties>
<buildver>311</buildver>
<spark.version>${spark311.version}</spark.version>
<delta.core.version>${delta10x.version}</delta.core.version>
<hadoop.version>3.3.6</hadoop.version>
</properties>
</profile>
<profile>
<id>release312</id>
<activation>
<property>
<name>buildver</name>
<value>312</value>
</property>
</activation>
<properties>
<buildver>312</buildver>
<spark.version>${spark312.version}</spark.version>
<delta.core.version>${delta10x.version}</delta.core.version>
<hadoop.version>3.3.6</hadoop.version>
</properties>
</profile>
<profile>
<id>release313</id>
<activation>
<property>
<name>buildver</name>
<value>313</value>
</property>
</activation>
<properties>
<buildver>313</buildver>
<spark.version>${spark313.version}</spark.version>
<delta.core.version>${delta10x.version}</delta.core.version>
<hadoop.version>3.3.6</hadoop.version>
</properties>
</profile>
<profile>
<id>release314</id>
<activation>
<property>
<name>buildver</name>
<value>314</value>
</property>
</activation>
<properties>
<buildver>314</buildver>
<spark.version>${spark314.version}</spark.version>
<delta.core.version>${delta10x.version}</delta.core.version>
<hadoop.version>3.3.6</hadoop.version>
</properties>
</profile>
<profile>
<id>release320</id>
<activation>
Expand Down
95 changes: 94 additions & 1 deletion core/scalastyle-config.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<!--
Copyright (c) 2023-2024, NVIDIA CORPORATION. All Rights Reserved.
Copyright (c) 2023-2025, NVIDIA CORPORATION. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -40,6 +40,12 @@ You can also disable only one rule, by specifying its rule id, as specified in:

<check level="error" class="org.scalastyle.file.FileTabChecker" enabled="true"/>

<check level="error" class="org.scalastyle.scalariform.SpacesAfterPlusChecker" enabled="true"></check>

<check level="error" class="org.scalastyle.scalariform.SpacesBeforePlusChecker" enabled="true"></check>

<check level="error" class="org.scalastyle.file.WhitespaceEndOfLineChecker" enabled="true"></check>

<check level="error" class="org.scalastyle.file.FileLineLengthChecker" enabled="true">
<parameters>
<parameter name="maxLineLength"><![CDATA[100]]></parameter>
Expand All @@ -58,6 +64,24 @@ You can also disable only one rule, by specifying its rule id, as specified in:
</parameters>
</check>

<check level="error" class="org.scalastyle.scalariform.DisallowSpaceBeforeTokenChecker" enabled="true">
<parameters>
<parameter name="tokens">COMMA</parameter>
</parameters>
</check>

<check customId="SingleSpaceBetweenRParenAndLCurlyBrace" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
<parameters><parameter name="regex">\)\{</parameter></parameters>
<customMessage><![CDATA[
Single Space between ')' and `{`.
]]></customMessage>
</check>

<check customId="OmitBracesInCase" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
<parameters><parameter name="regex">case[^\n>]*=>\s*\{</parameter></parameters>
<customMessage>Omit braces in case clauses.</customMessage>
</check>

<check level="error" class="org.scalastyle.scalariform.ClassNamesChecker" enabled="true">
<parameters>
<parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter>
Expand Down Expand Up @@ -92,10 +116,35 @@ You can also disable only one rule, by specifying its rule id, as specified in:
</parameters>
</check>

<check level="error" class="org.scalastyle.file.NewLineAtEofChecker" enabled="true"></check>

<check level="error" class="org.scalastyle.scalariform.SpaceAfterCommentStartChecker" enabled="true"></check>

<check level="error" class="org.scalastyle.scalariform.EnsureSingleSpaceBeforeTokenChecker" enabled="true">
<parameters>
<parameter name="tokens">ARROW, EQUALS, ELSE, TRY, CATCH, FINALLY, LARROW, RARROW</parameter>
</parameters>
</check>

<check level="error" class="org.scalastyle.scalariform.EnsureSingleSpaceAfterTokenChecker" enabled="true">
<parameters>
<parameter name="tokens">ARROW, EQUALS, COMMA, COLON, IF, ELSE, DO, WHILE, FOR, MATCH, TRY, CATCH, FINALLY, LARROW, RARROW</parameter>
</parameters>
</check>

<!-- ??? usually shouldn't be checked into the code base. -->
<check level="error" class="org.scalastyle.scalariform.NotImplementedErrorUsage"
enabled="true"/>

<!-- Similar to Spark, all printlns need to be wrapped in '// scalastyle:off/on println' -->
<check customId="println" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
<parameters><parameter name="regex">^println$</parameter></parameters>
<customMessage><![CDATA[Are you sure you want to println? If yes, wrap the code block with
// scalastyle:off println
println(...)
// scalastyle:on println]]></customMessage>
</check>

<check customId="NoScalaDoc" level="error" class="org.scalastyle.file.RegexChecker"
enabled="true">
<parameters>
Expand All @@ -112,11 +161,55 @@ You can also disable only one rule, by specifying its rule id, as specified in:
<customMessage>Use UTF8Source.from instead of Source.from</customMessage>
</check>

<!-- ================================================================================ -->
<!-- rules for enforcing cross-build between Scala 2.12 and 2.13 -->
<!-- ================================================================================ -->

<check level="error" class="org.scalastyle.scalariform.IllegalImportsChecker" enabled="true">
<parameters><parameter name="illegalImports"><![CDATA[scala.collection.Seq,scala.collection.IndexedSeq]]></parameter></parameters>
<customMessage><![CDATA[
Don't import scala.collection.Seq and scala.collection.IndexedSeq as it may bring some problems with cross-build between Scala 2.12 and 2.13.
Please refer below page to see the details of changes around Seq / IndexedSeq.
https://docs.scala-lang.org/overviews/core/collections-migration-213.html
If you really need to use scala.collection.Seq or scala.collection.IndexedSeq, please use the fully-qualified name instead.
]]></customMessage>
</check>

<check level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
<parameters>
<parameter name="regex">def this\((.*)\) \{</parameter>
<parameter name="line">false</parameter>
</parameters>
<customMessage>procedure syntax is deprecated for constructors in Scala 2.13: add `=`, as in method definition</customMessage>
</check>

<check level="error" class="org.scalastyle.scalariform.ProcedureDeclarationChecker" enabled="true">
<customMessage>procedure syntax is deprecated in Scala 2.13: add return type `: Unit` and `=`</customMessage>
</check>

<check level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
<parameters>
<parameter name="regex">ArrayBuilder.make\[(.+)\]\(\)</parameter>
<parameter name="line">false</parameter>
</parameters>
<customMessage>ArrayBuilder.make does not accept parens anymore in Scala 2.13</customMessage>
</check>

<!-- ================================================================================ -->
<!-- rules we'd like to enforce, but haven't cleaned up the codebase yet -->
<!-- ================================================================================ -->

<!-- This project uses Javadoc rather than Scaladoc so scaladoc checks are disabled -->
<check enabled="false" class="org.scalastyle.scalariform.ScalaDocChecker" level="warning"/>

<check customId="argcount" level="error" class="org.scalastyle.scalariform.ParameterNumberChecker" enabled="false">
<parameters><parameter name="maxParameters"><![CDATA[10]]></parameter></parameters>
</check>

<check level="error" class="org.scalastyle.scalariform.PublicMethodsHaveTypeChecker" enabled="false"></check>

<!-- Unit test uses ascii characters. So, we need to clean that up first -->
<check customId="nonascii" level="error" class="org.scalastyle.scalariform.NonASCIICharacterChecker" enabled="false"></check>
</scalastyle>
Original file line number Diff line number Diff line change
Expand Up @@ -305,3 +305,7 @@ DecimalSum,1.5
MaxBy,1.5
MinBy,1.5
ArrayJoin,1.5
RunningWindowFunctionExec,1.5
MonthsBetween,1.5
TruncDate,1.5
TruncTimestamp,1.5
4 changes: 4 additions & 0 deletions core/src/main/resources/operatorsScore-databricks-aws-t4.csv
Original file line number Diff line number Diff line change
Expand Up @@ -305,3 +305,7 @@ DecimalSum,1.5
MaxBy,1.5
MinBy,1.5
ArrayJoin,1.5
RunningWindowFunctionExec,1.5
MonthsBetween,1.5
TruncDate,1.5
TruncTimestamp,1.5
Original file line number Diff line number Diff line change
Expand Up @@ -293,3 +293,7 @@ DecimalSum,1.5
MaxBy,1.5
MinBy,1.5
ArrayJoin,1.5
RunningWindowFunctionExec,1.5
MonthsBetween,1.5
TruncDate,1.5
TruncTimestamp,1.5
4 changes: 4 additions & 0 deletions core/src/main/resources/operatorsScore-dataproc-gke-l4.csv
Original file line number Diff line number Diff line change
Expand Up @@ -287,3 +287,7 @@ DecimalSum,1.5
MaxBy,1.5
MinBy,1.5
ArrayJoin,1.5
RunningWindowFunctionExec,1.5
MonthsBetween,1.5
TruncDate,1.5
TruncTimestamp,1.5
4 changes: 4 additions & 0 deletions core/src/main/resources/operatorsScore-dataproc-gke-t4.csv
Original file line number Diff line number Diff line change
Expand Up @@ -287,3 +287,7 @@ DecimalSum,1.5
MaxBy,1.5
MinBy,1.5
ArrayJoin,1.5
RunningWindowFunctionExec,1.5
MonthsBetween,1.5
TruncDate,1.5
TruncTimestamp,1.5
4 changes: 4 additions & 0 deletions core/src/main/resources/operatorsScore-dataproc-l4.csv
Original file line number Diff line number Diff line change
Expand Up @@ -293,3 +293,7 @@ DecimalSum,1.5
MaxBy,1.5
MinBy,1.5
ArrayJoin,1.5
RunningWindowFunctionExec,1.5
MonthsBetween,1.5
TruncDate,1.5
TruncTimestamp,1.5
Original file line number Diff line number Diff line change
Expand Up @@ -287,3 +287,7 @@ DecimalSum,1.5
MaxBy,1.5
MinBy,1.5
ArrayJoin,1.5
RunningWindowFunctionExec,1.5
MonthsBetween,1.5
TruncDate,1.5
TruncTimestamp,1.5
4 changes: 4 additions & 0 deletions core/src/main/resources/operatorsScore-dataproc-t4.csv
Original file line number Diff line number Diff line change
Expand Up @@ -293,3 +293,7 @@ DecimalSum,1.5
MaxBy,1.5
MinBy,1.5
ArrayJoin,1.5
RunningWindowFunctionExec,1.5
MonthsBetween,1.5
TruncDate,1.5
TruncTimestamp,1.5
4 changes: 4 additions & 0 deletions core/src/main/resources/operatorsScore-emr-a10.csv
Original file line number Diff line number Diff line change
Expand Up @@ -293,3 +293,7 @@ DecimalSum,1.5
MaxBy,1.5
MinBy,1.5
ArrayJoin,1.5
RunningWindowFunctionExec,1.5
MonthsBetween,1.5
TruncDate,1.5
TruncTimestamp,1.5
Loading

0 comments on commit 2d43a58

Please sign in to comment.