Jul 15, 2024
source tutorial/bin/activate
pip3 install -r requirements.txt
import pandas as pd
df = pd.DataFrame([[1,2,3],[4,5,6],[7,8,9]], columns=['A','B','C'])
df.head() # Display first 5 rows
df.head()
: First 5 rowsdf.tail()
: Last 5 rowsdf.columns
: Show columnsdf.index
: Show indexdf.info()
: Basic information about the DataFramedf.describe()
: Descriptive statisticscoffee = pd.read_csv('data/warmup/coffee.csv')
coffee.head()
results = pd.read_parquet('data/results.parquet')
olympics = pd.read_excel('data/olympics.xlsx', sheet_name='results')
df.loc[rows, columns]
df.iloc[rows, columns]
df.loc[0] # Access first row
df.iloc[0, 2] # First row, third column
df[df['A'] > 5]
df[(df['A'] > 5) & (df['B'] == 8)]
df['New_Col'] = value
df['Conditional_Col'] = np.where(df['A'] > 10, 'High', 'Low')
df.drop(columns=['A'], inplace=True)
merged_df = pd.merge(df1, df2, left_on='key1', right_on='key2', how='inner')
concatenated_df = pd.concat([df1, df2], axis=0)
df.isna().sum()
df.fillna(value, inplace=True)
df.interpolate(inplace=True)
df.dropna(subset=['column_name'], inplace=True)
df.groupby('column').sum()
pd.pivot_table(df, values='sales', index=['month'], columns=['product'])
df['Prev_Day'] = df['Sales'].shift(1)
df['Change'] = df['Sales'] - df['Prev_Day']
df['Rank'] = df['Values'].rank(ascending=False)
pyarrow
for better performance.
pd.options.compute.use_bottleneck = True
pd.options.compute.chunk_size = 100000
# Example ChatGPT query:
filter_df = df[(df['BornRegion'] == 'New Hampshire') | (df['BornCity'] == 'San Francisco')]
Happy coding with Pandas!