-
Notifications
You must be signed in to change notification settings - Fork 644
Fix/hash pattern scope #718
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
991b746
c8e52d1
cecc0f1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
secrets.toml | ||
.preswald_deploy | ||
.env.structured |
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
item,quantity,value | ||
Item A,10,100 | ||
Item B,5,60 | ||
Item C,8,80 | ||
Item D,12,150 | ||
Item E,7,90 | ||
Item F,9,110 | ||
Item G,4,50 | ||
Item H,11,130 | ||
Item I,6,70 | ||
Item J,3,40 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
import pandas as pd | ||
import plotly.express as px | ||
from preswald import text, connect, slider, selectbox, plotly | ||
import logging | ||
|
||
# Configure logging | ||
logging.basicConfig(level=logging.INFO) | ||
logger = logging.getLogger(__name__) | ||
|
||
try: | ||
# Initialize preswald | ||
connect() | ||
|
||
# Title | ||
text("# 📈 Superstore Sales & Profit Dashboard") | ||
|
||
# Load and preprocess | ||
df = pd.read_csv("data/my_sample_superstore.csv") | ||
print("Initial DataFrame shape:", df.shape) | ||
print("Columns:", df.columns.tolist()) | ||
|
||
df["Order Date"] = pd.to_datetime(df["Order Date"], format="%m/%d/%Y") | ||
df["Profit Margin"] = df["Profit"] / df["Sales"] | ||
|
||
# State name to abbreviation mapping | ||
state_abbrev = { | ||
'Alabama': 'AL', 'Alaska': 'AK', 'Arizona': 'AZ', 'Arkansas': 'AR', 'California': 'CA', | ||
'Colorado': 'CO', 'Connecticut': 'CT', 'Delaware': 'DE', 'Florida': 'FL', 'Georgia': 'GA', | ||
'Hawaii': 'HI', 'Idaho': 'ID', 'Illinois': 'IL', 'Indiana': 'IN', 'Iowa': 'IA', | ||
'Kansas': 'KS', 'Kentucky': 'KY', 'Louisiana': 'LA', 'Maine': 'ME', 'Maryland': 'MD', | ||
'Massachusetts': 'MA', 'Michigan': 'MI', 'Minnesota': 'MN', 'Mississippi': 'MS', 'Missouri': 'MO', | ||
'Montana': 'MT', 'Nebraska': 'NE', 'Nevada': 'NV', 'New Hampshire': 'NH', 'New Jersey': 'NJ', | ||
'New Mexico': 'NM', 'New York': 'NY', 'North Carolina': 'NC', 'North Dakota': 'ND', 'Ohio': 'OH', | ||
'Oklahoma': 'OK', 'Oregon': 'OR', 'Pennsylvania': 'PA', 'Rhode Island': 'RI', 'South Carolina': 'SC', | ||
'South Dakota': 'SD', 'Tennessee': 'TN', 'Texas': 'TX', 'Utah': 'UT', 'Vermont': 'VT', | ||
'Virginia': 'VA', 'Washington': 'WA', 'West Virginia': 'WV', 'Wisconsin': 'WI', 'Wyoming': 'WY' | ||
} | ||
|
||
# Convert state names to abbreviations | ||
print("\nUnique states before mapping:", df['State'].unique()) | ||
df['State_Code'] = df['State'].map(state_abbrev) | ||
print("Unique state codes after mapping:", df['State_Code'].unique()) | ||
|
||
# Sidebar widgets | ||
min_sales = slider( | ||
"Minimum Sales Filter", | ||
min_val=0, | ||
max_val=int(df["Sales"].max()), | ||
default=0 | ||
) | ||
segment_sel = selectbox( | ||
"Choose Segment", | ||
options=sorted(df["Segment"].unique().tolist()), | ||
default="Consumer" | ||
) | ||
|
||
# Apply filters | ||
df = df[(df["Sales"] >= min_sales) & (df["Segment"] == segment_sel)] | ||
print("\nDataFrame shape after filtering:", df.shape) | ||
|
||
# 1. Sales vs. Profit by Category | ||
text("## 1. Sales & Profit by Category") | ||
cat_stats = df.groupby("Category", as_index=False).agg( | ||
Total_Sales = ("Sales", "sum"), | ||
Total_Profit = ("Profit", "sum") | ||
) | ||
fig1 = px.bar( | ||
cat_stats, | ||
x="Category", | ||
y=["Total_Sales", "Total_Profit"], | ||
barmode="group", | ||
title="Sales and Profit by Category", | ||
labels={"value": "USD", "variable": "Measure"}, | ||
color_discrete_sequence=px.colors.qualitative.Set2 | ||
) | ||
fig1.update_layout(template="plotly_white") | ||
plotly(fig1) | ||
|
||
# 2. Average Profit Margin by Region | ||
text("## 2. Average Profit Margin by Region") | ||
region_stats = df.groupby("Region", as_index=False)["Profit Margin"]\ | ||
.mean().rename(columns={"Profit Margin": "Avg_Profit_Margin"}) | ||
fig2 = px.bar( | ||
region_stats, | ||
x="Region", | ||
y="Avg_Profit_Margin", | ||
title="Average Profit Margin by Region", | ||
labels={"Avg_Profit_Margin": "Profit Margin"}, | ||
color="Avg_Profit_Margin", | ||
color_continuous_scale="Viridis" | ||
) | ||
fig2.update_layout(template="plotly_white") | ||
plotly(fig2) | ||
|
||
# 3. Segment-Specific Profit Margin by Category | ||
text(f"## 3. Profit Margin by Category: {segment_sel}") | ||
seg_cat = df.groupby("Category", as_index=False)["Profit Margin"]\ | ||
.mean().rename(columns={"Profit Margin": "Profit_Margin"}) | ||
fig3 = px.bar( | ||
seg_cat, | ||
x="Category", | ||
y="Profit_Margin", | ||
title=f"{segment_sel} Segment: Profit Margin by Category", | ||
labels={"Profit_Margin": "Profit Margin"}, | ||
color_discrete_sequence=px.colors.sequential.Viridis | ||
) | ||
fig3.update_layout(template="plotly_white") | ||
plotly(fig3) | ||
|
||
# 4. Total Sales by State (USA Map) | ||
text("## 4. Total Sales by State (USA)") | ||
state_sales = df.groupby("State_Code", as_index=False)["Sales"].sum() | ||
print("\nState sales data:") | ||
print(state_sales) | ||
fig4 = px.choropleth( | ||
state_sales, | ||
locations="State_Code", | ||
locationmode="USA-states", | ||
color="Sales", | ||
scope="usa", | ||
title="Total Sales by State", | ||
labels={"Sales": "Total Sales"}, | ||
color_continuous_scale="Viridis" | ||
) | ||
fig4.update_layout(template="plotly_white") | ||
plotly(fig4) | ||
|
||
except Exception as e: | ||
logger.error(f"Error in main function: {e}") |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
[project] | ||
title = "Superstore Analysis Dashboard" | ||
version = "0.1.0" | ||
port = 8501 | ||
slug = "my-first-preswald-app-540052" | ||
entrypoint = "hello.py" | ||
|
||
[branding] | ||
name = "Superstore Analysis Dashboard" | ||
logo = "images/logo.png" | ||
favicon = "images/favicon.ico" | ||
primaryColor = "#F89613" | ||
|
||
[data.my_sample_superstore] | ||
type = "csv" | ||
path = "data/my_sample_superstore.csv" | ||
|
||
[logging] | ||
level = "INFO" | ||
format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
[build-system] | ||
requires = ["hatchling"] | ||
build-backend = "hatchling.build" | ||
|
||
[project] | ||
name = "preswald-app" | ||
version = "0.1.0" | ||
description = "A Preswald application" | ||
requires-python = ">=3.8" | ||
dependencies = [ | ||
"preswald" | ||
] | ||
|
||
[tool.hatch.build.targets.wheel] | ||
packages = ["."] | ||
|
||
[tool.black] | ||
line-length = 88 | ||
target-version = ['py38'] | ||
|
||
[tool.isort] | ||
profile = "black" | ||
multi_line_output = 3 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
pandas>=2.0.0 | ||
plotly>=5.0.0 | ||
preswald |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,14 +4,17 @@ | |
import re | ||
import zlib | ||
from datetime import date, datetime | ||
from typing import Any | ||
from typing import Any, Dict, List, Optional, Set, Tuple, Union | ||
|
||
import msgpack | ||
import numpy as np | ||
|
||
|
||
logger = logging.getLogger(__name__) | ||
|
||
# Compiled regex pattern for matching SHA-256 hashes | ||
HASH_PATTERN = re.compile(r"^[0-9a-f]{64}$") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We're not calling HASH_PATTERN outside RenderBuffer, so we don't need to expose it here. |
||
|
||
|
||
class PreswaldJSONEncoder(json.JSONEncoder): | ||
"""Custom JSON encoder for Preswald data types.""" | ||
|
@@ -196,24 +199,23 @@ class RenderBuffer: | |
Used by services to avoid redundant component reruns and frontend updates. | ||
""" | ||
|
||
HASH_PATTERN = re.compile(r"^[0-9a-f]{64}$") | ||
|
||
def __init__(self): | ||
self._state_cache: dict[str, str] = {} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. any reason to rename _state_cache and add a dirty set? |
||
self._cache: Dict[str, str] = {} | ||
self._dirty: Set[str] = set() | ||
|
||
def has_changed(self, component_id: str, new_value: Any) -> bool: | ||
"""Check if the new hash differs from the cached one.""" | ||
new_clean = clean_nan_values(new_value) | ||
|
||
if component_id not in self._state_cache: | ||
if component_id not in self._cache: | ||
return True # always render the first time | ||
|
||
old_clean = clean_nan_values(self._state_cache[component_id]) | ||
old_clean = clean_nan_values(self._cache[component_id]) | ||
return new_clean != old_clean | ||
|
||
def update(self, component_id: str, new_value: Any): | ||
"""Update the cached hash value.""" | ||
self._state_cache[component_id] = self._ensure_hash(new_value) | ||
self._cache[component_id] = self._ensure_hash(new_value) | ||
|
||
def should_render(self, component_id: str, new_value: Any) -> bool: | ||
""" | ||
|
@@ -224,7 +226,7 @@ def should_render(self, component_id: str, new_value: Any) -> bool: | |
|
||
def _ensure_hash(self, value: Any) -> str: | ||
"""Convert value to SHA256 hash. Accepts either a hash string or a hashable object.""" | ||
if isinstance(value, str) and self.HASH_PATTERN.match(value): | ||
if isinstance(value, str) and HASH_PATTERN.match(value): | ||
return value # already a hash | ||
try: | ||
cleaned = clean_nan_values(value) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
With the latest version of ruff operating on python 3.10, it seems the preferred way is to use things like
dict
instead ofDict
and|
instead ofOptional
. So we don't need these