Professional Documents
Culture Documents
[25]: df.head
1
popular_destinations = df['destination'].value_counts().head(5)
revenue_by_vehicle_type = df.groupby('vehicle_type')['fare_inr'].sum()
average_seats_available = df['seats_available'].mean()
plt.figure(figsize=(10, 6))
popular_destinations.plot(kind='bar', color='skyblue')
plt.title('Most Popular Destinations')
plt.xlabel('Destination')
plt.ylabel('Trip Count')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
2
[29]: # Histogram of distance_km
plt.figure(figsize=(10, 6))
plt.hist(df['distance_km'], bins=20, color='skyblue', edgecolor='black')
plt.title('Distribution of Distance Traveled')
plt.xlabel('Distance (km)')
plt.ylabel('Frequency')
plt.grid(True)
plt.show()
3
[30]: # Correlation matrix
correlation_matrix = df.corr()
print("Correlation Matrix:")
print(correlation_matrix)
Correlation Matrix:
id distance_km fare_inr seats_available
id 1.000000 0.029220 0.011439 -0.007157
distance_km 0.029220 1.000000 0.052072 0.022267
fare_inr 0.011439 0.052072 1.000000 0.025553
seats_available -0.007157 0.022267 0.025553 1.000000
4
[31]: # Convert date column to datetime
df['date'] = pd.to_datetime(df['date'])
5
[22]: # Distribution of seats available
plt.figure(figsize=(10, 6))
plt.hist(df['seats_available'], bins=20, color='orange', edgecolor='black')
plt.title('Distribution of Seats Available')
plt.xlabel('Seats Available')
plt.ylabel('Frequency')
plt.grid(True)
plt.show()
6
[34]: # Python
popular_destinations = df['destination'].value_counts().head(5)
print("Most Popular Destinations:")
print(popular_destinations)