Jul 20, 2024
import streamlit as st
import plotly.express as px
import pandas as pd
import os
st.set_page_config(page_title='Superstore EDA', page_icon='📊', layout='wide')
uploaded_file = st.file_uploader('Upload a file', type=['csv', 'xlsx'])
if uploaded_file is not None:
df = pd.read_csv(uploaded_file)
else:
df = pd.read_csv('sample_superstore.csv')
col1, col2 = st.columns(2)
start_date = col1.date_input('Start Date', df['Order Date'].min())
end_date = col2.date_input('End Date', df['Order Date'].max())
df = df[(df['Order Date'] >= start_date) & (df['Order Date'] <= end_date)]
regions = st.sidebar.multiselect('Select Region', df['Region'].unique())
if regions:
df = df[df['Region'].isin(regions)]
states = st.sidebar.multiselect('Select State', df['State'].unique())
if states:
df = df[df['State'].isin(states)]
cities = st.sidebar.multiselect('Select City', df['City'].unique())
if cities:
df = df[df['City'].isin(cities)]
category_sales = df.groupby('Category')['Sales'].sum().reset_index()
fig1 = px.bar(category_sales, x='Category', y='Sales')
st.plotly_chart(fig1, use_container_width=True)
region_sales = df.groupby('Region')['Sales'].sum().reset_index()
fig2 = px.pie(region_sales, names='Region', values='Sales', hole=0.5)
st.plotly_chart(fig2, use_container_width=True)
df['Year-Month'] = df['Order Date'].dt.to_period('M')
time_series_sales = df.groupby('Year-Month')['Sales'].sum().reset_index()
fig3 = px.line(time_series_sales, x='Year-Month', y='Sales')
st.plotly_chart(fig3, use_container_width=True)
treemap_fig = px.treemap(df, path=['Region', 'Category', 'Sub-Category'], values='Sales')
st.plotly_chart(treemap_fig, use_container_width=True)
if st.expander('View Data'):
st.write(category_sales.style.background_gradient(cmap='Blues'))
csv = category_sales.to_csv(index=False).encode('utf-8')
st.download_button('Download CSV', csv, 'category_sales.csv', 'text/csv')