You are on page 1of 7

untitled18

April 14, 2024

[23]: import pandas as pd

[24]: df= pd.read_csv("ANP.csv")

[25]: df.head

[25]: <bound method NDFrame.head of id date destination distance_km


departure_time arrival_time \
0 1 2024-01-01 Mysore 150 08:00:00 11:00:00
1 2 2024-01-02 Coorg 250 07:30:00 13:00:00
2 3 2024-03-31 Hampi 350 09:00:00 15:30:00
3 4 2024-03-26 Mysore 118 04:04:49 24:03:54
4 5 2024-02-14 Mysore 395 13:10:17 24:21:58
… … … … … … …
1396 1998 2024-01-04 Mysore 243 12:02:18 23:02:08
1397 1999 2024-02-15 Hubli 135 06:47:58 26:46:48
1398 2000 2024-03-16 Bangalore 341 18:49:43 27:34:38
1399 2002 2024-01-20 Hampi 168 20:10:43 24:13:10
1400 2003 2024-02-13 Gokarna 365 12:11:01 24:20:14

fare_inr vehicle_type seats_available


0 5250.0 Bus 30
1 3625.0 SUV 4
2 8750.0 Minivan 8
3 2442.0 SUV 42
4 5307.0 Bus 21
… … … …
1396 8730.0 SUV 17
1397 3969.0 Bus 10
1398 18315.0 Bus 42
1399 2227.5 Bus 27
1400 17871.0 Bus 14

[1401 rows x 9 columns]>

[26]: total_distance_traveled = df['distance_km'].sum()


average_fare = df['fare_inr'].mean()

1
popular_destinations = df['destination'].value_counts().head(5)
revenue_by_vehicle_type = df.groupby('vehicle_type')['fare_inr'].sum()
average_seats_available = df['seats_available'].mean()

[27]: print("\nTotal Distance Traveled:", total_distance_traveled, "km")


print("Average Fare:", average_fare, "INR")
print("Most Popular Destinations:")
print(popular_destinations)
print("Revenue by Vehicle Type:")
print(revenue_by_vehicle_type)
print("Average Seats Available:", average_seats_available)

Total Distance Traveled: 376569 km


Average Fare: 6030.90863668808 INR
Most Popular Destinations:
Gokarna 158
Hubli 157
Mangalore 151
Mysore 147
Bangalore 136
Name: destination, dtype: int64
Revenue by Vehicle Type:
vehicle_type
Bus 4178448.5
Minivan 8750.0
SUV 4262104.5
Name: fare_inr, dtype: float64
Average Seats Available: 26.911491791577443

[28]: import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))
popular_destinations.plot(kind='bar', color='skyblue')
plt.title('Most Popular Destinations')
plt.xlabel('Destination')
plt.ylabel('Trip Count')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

2
[29]: # Histogram of distance_km
plt.figure(figsize=(10, 6))
plt.hist(df['distance_km'], bins=20, color='skyblue', edgecolor='black')
plt.title('Distribution of Distance Traveled')
plt.xlabel('Distance (km)')
plt.ylabel('Frequency')
plt.grid(True)
plt.show()

3
[30]: # Correlation matrix
correlation_matrix = df.corr()
print("Correlation Matrix:")
print(correlation_matrix)

import seaborn as sns

# Heatmap of correlation matrix


plt.figure(figsize=(10, 6))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f")
plt.title('Correlation Heatmap')
plt.show()

Correlation Matrix:
id distance_km fare_inr seats_available
id 1.000000 0.029220 0.011439 -0.007157
distance_km 0.029220 1.000000 0.052072 0.022267
fare_inr 0.011439 0.052072 1.000000 0.025553
seats_available -0.007157 0.022267 0.025553 1.000000

4
[31]: # Convert date column to datetime
df['date'] = pd.to_datetime(df['date'])

# Revenue trend over time


revenue_by_date = df.groupby('date')['fare_inr'].sum()
plt.figure(figsize=(12, 6))
revenue_by_date.plot(color='green')
plt.title('Revenue Trend Over Time')
plt.xlabel('Date')
plt.ylabel('Total Revenue (INR)')
plt.grid(True)
plt.show()

5
[22]: # Distribution of seats available
plt.figure(figsize=(10, 6))
plt.hist(df['seats_available'], bins=20, color='orange', edgecolor='black')
plt.title('Distribution of Seats Available')
plt.xlabel('Seats Available')
plt.ylabel('Frequency')
plt.grid(True)
plt.show()

6
[34]: # Python
popular_destinations = df['destination'].value_counts().head(5)
print("Most Popular Destinations:")
print(popular_destinations)

Most Popular Destinations:


Gokarna 158
Hubli 157
Mangalore 151
Mysore 147
Bangalore 136
Name: destination, dtype: int64

You might also like