Skip to content

Fix/hash pattern scope #718

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions my_first_preswald_app/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
secrets.toml
.preswald_deploy
.env.structured
9,995 changes: 9,995 additions & 0 deletions my_first_preswald_app/data/my_sample_superstore.csv

Large diffs are not rendered by default.

11 changes: 11 additions & 0 deletions my_first_preswald_app/data/sample.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
item,quantity,value
Item A,10,100
Item B,5,60
Item C,8,80
Item D,12,150
Item E,7,90
Item F,9,110
Item G,4,50
Item H,11,130
Item I,6,70
Item J,3,40
129 changes: 129 additions & 0 deletions my_first_preswald_app/hello.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
import pandas as pd
import plotly.express as px
from preswald import text, connect, slider, selectbox, plotly
import logging

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

try:
# Initialize preswald
connect()

# Title
text("# 📈 Superstore Sales & Profit Dashboard")

# Load and preprocess
df = pd.read_csv("data/my_sample_superstore.csv")
print("Initial DataFrame shape:", df.shape)
print("Columns:", df.columns.tolist())

df["Order Date"] = pd.to_datetime(df["Order Date"], format="%m/%d/%Y")
df["Profit Margin"] = df["Profit"] / df["Sales"]

# State name to abbreviation mapping
state_abbrev = {
'Alabama': 'AL', 'Alaska': 'AK', 'Arizona': 'AZ', 'Arkansas': 'AR', 'California': 'CA',
'Colorado': 'CO', 'Connecticut': 'CT', 'Delaware': 'DE', 'Florida': 'FL', 'Georgia': 'GA',
'Hawaii': 'HI', 'Idaho': 'ID', 'Illinois': 'IL', 'Indiana': 'IN', 'Iowa': 'IA',
'Kansas': 'KS', 'Kentucky': 'KY', 'Louisiana': 'LA', 'Maine': 'ME', 'Maryland': 'MD',
'Massachusetts': 'MA', 'Michigan': 'MI', 'Minnesota': 'MN', 'Mississippi': 'MS', 'Missouri': 'MO',
'Montana': 'MT', 'Nebraska': 'NE', 'Nevada': 'NV', 'New Hampshire': 'NH', 'New Jersey': 'NJ',
'New Mexico': 'NM', 'New York': 'NY', 'North Carolina': 'NC', 'North Dakota': 'ND', 'Ohio': 'OH',
'Oklahoma': 'OK', 'Oregon': 'OR', 'Pennsylvania': 'PA', 'Rhode Island': 'RI', 'South Carolina': 'SC',
'South Dakota': 'SD', 'Tennessee': 'TN', 'Texas': 'TX', 'Utah': 'UT', 'Vermont': 'VT',
'Virginia': 'VA', 'Washington': 'WA', 'West Virginia': 'WV', 'Wisconsin': 'WI', 'Wyoming': 'WY'
}

# Convert state names to abbreviations
print("\nUnique states before mapping:", df['State'].unique())
df['State_Code'] = df['State'].map(state_abbrev)
print("Unique state codes after mapping:", df['State_Code'].unique())

# Sidebar widgets
min_sales = slider(
"Minimum Sales Filter",
min_val=0,
max_val=int(df["Sales"].max()),
default=0
)
segment_sel = selectbox(
"Choose Segment",
options=sorted(df["Segment"].unique().tolist()),
default="Consumer"
)

# Apply filters
df = df[(df["Sales"] >= min_sales) & (df["Segment"] == segment_sel)]
print("\nDataFrame shape after filtering:", df.shape)

# 1. Sales vs. Profit by Category
text("## 1. Sales & Profit by Category")
cat_stats = df.groupby("Category", as_index=False).agg(
Total_Sales = ("Sales", "sum"),
Total_Profit = ("Profit", "sum")
)
fig1 = px.bar(
cat_stats,
x="Category",
y=["Total_Sales", "Total_Profit"],
barmode="group",
title="Sales and Profit by Category",
labels={"value": "USD", "variable": "Measure"},
color_discrete_sequence=px.colors.qualitative.Set2
)
fig1.update_layout(template="plotly_white")
plotly(fig1)

# 2. Average Profit Margin by Region
text("## 2. Average Profit Margin by Region")
region_stats = df.groupby("Region", as_index=False)["Profit Margin"]\
.mean().rename(columns={"Profit Margin": "Avg_Profit_Margin"})
fig2 = px.bar(
region_stats,
x="Region",
y="Avg_Profit_Margin",
title="Average Profit Margin by Region",
labels={"Avg_Profit_Margin": "Profit Margin"},
color="Avg_Profit_Margin",
color_continuous_scale="Viridis"
)
fig2.update_layout(template="plotly_white")
plotly(fig2)

# 3. Segment-Specific Profit Margin by Category
text(f"## 3. Profit Margin by Category: {segment_sel}")
seg_cat = df.groupby("Category", as_index=False)["Profit Margin"]\
.mean().rename(columns={"Profit Margin": "Profit_Margin"})
fig3 = px.bar(
seg_cat,
x="Category",
y="Profit_Margin",
title=f"{segment_sel} Segment: Profit Margin by Category",
labels={"Profit_Margin": "Profit Margin"},
color_discrete_sequence=px.colors.sequential.Viridis
)
fig3.update_layout(template="plotly_white")
plotly(fig3)

# 4. Total Sales by State (USA Map)
text("## 4. Total Sales by State (USA)")
state_sales = df.groupby("State_Code", as_index=False)["Sales"].sum()
print("\nState sales data:")
print(state_sales)
fig4 = px.choropleth(
state_sales,
locations="State_Code",
locationmode="USA-states",
color="Sales",
scope="usa",
title="Total Sales by State",
labels={"Sales": "Total Sales"},
color_continuous_scale="Viridis"
)
fig4.update_layout(template="plotly_white")
plotly(fig4)

except Exception as e:
logger.error(f"Error in main function: {e}")
Binary file added my_first_preswald_app/images/favicon.ico
Binary file not shown.
Binary file added my_first_preswald_app/images/logo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
20 changes: 20 additions & 0 deletions my_first_preswald_app/preswald.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
[project]
title = "Superstore Analysis Dashboard"
version = "0.1.0"
port = 8501
slug = "my-first-preswald-app-540052"
entrypoint = "hello.py"

[branding]
name = "Superstore Analysis Dashboard"
logo = "images/logo.png"
favicon = "images/favicon.ico"
primaryColor = "#F89613"

[data.my_sample_superstore]
type = "csv"
path = "data/my_sample_superstore.csv"

[logging]
level = "INFO"
format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
23 changes: 23 additions & 0 deletions my_first_preswald_app/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

[project]
name = "preswald-app"
version = "0.1.0"
description = "A Preswald application"
requires-python = ">=3.8"
dependencies = [
"preswald"
]

[tool.hatch.build.targets.wheel]
packages = ["."]

[tool.black]
line-length = 88
target-version = ['py38']

[tool.isort]
profile = "black"
multi_line_output = 3
3 changes: 3 additions & 0 deletions my_first_preswald_app/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
pandas>=2.0.0
plotly>=5.0.0
preswald
18 changes: 10 additions & 8 deletions preswald/engine/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,17 @@
import re
import zlib
from datetime import date, datetime
from typing import Any
from typing import Any, Dict, List, Optional, Set, Tuple, Union
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With the latest version of ruff operating on python 3.10, it seems the preferred way is to use things like dict instead of Dict and | instead of Optional. So we don't need these


import msgpack
import numpy as np


logger = logging.getLogger(__name__)

# Compiled regex pattern for matching SHA-256 hashes
HASH_PATTERN = re.compile(r"^[0-9a-f]{64}$")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We're not calling HASH_PATTERN outside RenderBuffer, so we don't need to expose it here.



class PreswaldJSONEncoder(json.JSONEncoder):
"""Custom JSON encoder for Preswald data types."""
Expand Down Expand Up @@ -196,24 +199,23 @@ class RenderBuffer:
Used by services to avoid redundant component reruns and frontend updates.
"""

HASH_PATTERN = re.compile(r"^[0-9a-f]{64}$")

def __init__(self):
self._state_cache: dict[str, str] = {}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

any reason to rename _state_cache and add a dirty set?

self._cache: Dict[str, str] = {}
self._dirty: Set[str] = set()

def has_changed(self, component_id: str, new_value: Any) -> bool:
"""Check if the new hash differs from the cached one."""
new_clean = clean_nan_values(new_value)

if component_id not in self._state_cache:
if component_id not in self._cache:
return True # always render the first time

old_clean = clean_nan_values(self._state_cache[component_id])
old_clean = clean_nan_values(self._cache[component_id])
return new_clean != old_clean

def update(self, component_id: str, new_value: Any):
"""Update the cached hash value."""
self._state_cache[component_id] = self._ensure_hash(new_value)
self._cache[component_id] = self._ensure_hash(new_value)

def should_render(self, component_id: str, new_value: Any) -> bool:
"""
Expand All @@ -224,7 +226,7 @@ def should_render(self, component_id: str, new_value: Any) -> bool:

def _ensure_hash(self, value: Any) -> str:
"""Convert value to SHA256 hash. Accepts either a hash string or a hashable object."""
if isinstance(value, str) and self.HASH_PATTERN.match(value):
if isinstance(value, str) and HASH_PATTERN.match(value):
return value # already a hash
try:
cleaned = clean_nan_values(value)
Expand Down