-
Notifications
You must be signed in to change notification settings - Fork 185
Standardize Stage Naming: Enforce snake_case with centralized conversion function #1038
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 8 commits
1880f08
1dd4b3e
6645b7e
bf192b8
eee822f
14ca3d9
bc8ea0c
f54706e
52563a5
28305e9
0274d6d
41ccb36
25cb9d0
d3e4dbd
0cd716b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -12,9 +12,17 @@ | |
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
|
|
||
| import re | ||
| from abc import ABC, abstractmethod | ||
|
|
||
|
|
||
| def _camel_to_snake(name: str) -> str: | ||
| """Convert CamelCase to snake_case.""" | ||
| # Insert an underscore before any uppercase letter that follows a lowercase letter or digit | ||
| s1 = re.sub("([a-z0-9])([A-Z])", r"\1_\2", name) | ||
| return s1.lower() | ||
|
||
|
|
||
|
|
||
VibhuJawa marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| class DocumentFilter(ABC): | ||
| """ | ||
| An abstract base class for text-based document filters. | ||
|
|
@@ -26,7 +34,7 @@ class DocumentFilter(ABC): | |
|
|
||
| def __init__(self): | ||
| super().__init__() | ||
| self._name = self.__class__.__name__ | ||
| self._name = _camel_to_snake(self.__class__.__name__) | ||
| self._sentences = None | ||
| self._paragraphs = None | ||
| self._ngrams = None | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -12,9 +12,17 @@ | |
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
|
|
||
| import re | ||
| from abc import ABC, abstractmethod | ||
|
|
||
|
|
||
| def _camel_to_snake(name: str) -> str: | ||
| """Convert CamelCase to snake_case.""" | ||
| # Insert an underscore before any uppercase letter that follows a lowercase letter or digit | ||
| s1 = re.sub("([a-z0-9])([A-Z])", r"\1_\2", name) | ||
| return s1.lower() | ||
|
||
|
|
||
|
|
||
| class DocumentModifier(ABC): | ||
| """ | ||
| Abstract base class for text-based document modifiers. | ||
|
|
@@ -29,7 +37,7 @@ class DocumentModifier(ABC): | |
|
|
||
| def __init__(self) -> None: | ||
| super().__init__() | ||
| self._name = self.__class__.__name__ | ||
| self._name = _camel_to_snake(self.__class__.__name__) | ||
| self._sentences = None | ||
| self._paragraphs = None | ||
| self._ngrams = None | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -245,11 +245,11 @@ def test_fuzzy_dedup_no_duplicates( | |
|
|
||
| workflow.run(initial_tasks=tasks) | ||
|
|
||
| assert not (cache_path / "ConnectedComponentsStage").exists() | ||
| assert not (cache_path / "BucketsToEdgesStage").exists() | ||
| assert not (cache_path / "connected_components_stage").exists() | ||
| assert not (cache_path / "buckets_to_edges_stage").exists() | ||
|
Comment on lines
+248
to
+249
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @ayushdg , Can you check this test please |
||
| assert not (output_path / DUPLICATE_IDS_SUBDIR).exists() | ||
|
|
||
| lsh_df = cudf.read_parquet(cache_path / "LSHStage") | ||
| lsh_df = cudf.read_parquet(cache_path / "lsh_stage") | ||
| assert len(lsh_df) == 0 | ||
|
|
||
| def test_bad_inputs(self, tmp_path: Path) -> None: | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@copilot , please address
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed in commit 0274d6d. Changed the minhash stage name from "min_hash_stage" to "minhash_stage" as suggested.