Practical No 9 PDF

Notebook
April 5, 2024
Gender distribution in the dataset?

[ ]: import pandas as pd
# Load the Excel file

file_path = 'EXCEL FOR PRACTICAL 9.xlsx'
data = pd.read_excel(file_path)
display(data.head())
[1]: # Count the number of each gender in the dataset

gender_distribution = data['GENDER'].value_counts()
# Display the gender distribution

display(gender_distribution)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[1], line 2
1 # Count the number of each gender in the dataset
----> 2 gender_distribution = data['GENDER'].value_counts()
4 # Display the gender distribution
5 display(gender_distribution)
NameError: name 'data' is not defined
[2]: import pandas as pd

data = pd.read_excel('EXCEL FOR PRACTICAL 9.xlsx', skiprows=1)
# Count the number of each gender in the dataset


---------------------------------------------------------------------------
1
KeyError Traceback (most recent call last)
File /usr/local/lib/python3.11/site-packages/pandas/core/indexes/base.py:3805,␣
↪in Index.get_loc(self, key)
3804 try:
-> 3805 return self._engine.get_loc(casted_key)
3806 except KeyError as err:
File index.pyx:167, in pandas._libs.index.IndexEngine.get_loc()
File pandas/_libs/hashtable_class_helper.pxi:7081, in pandas._libs.hashtable.

↪PyObjectHashTable.get_item()

KeyError: 'GENDER'
The above exception was the direct cause of the following exception:

Cell In[2], line 7
4 data = pd.read_excel('EXCEL FOR PRACTICAL 9.xlsx', skiprows=1)
6 # Count the number of each gender in the dataset
----> 7 gender_distribution = data['GENDER'].value_counts()
File /usr/local/lib/python3.11/site-packages/pandas/core/frame.py:4090, in␣

↪DataFrame.__getitem__(self, key)
4088 if self.columns.nlevels > 1:

4089 return self._getitem_multilevel(key)
-> 4090 indexer = self.columns.get_loc(key)
4091 if is_integer(indexer):
4092 indexer = [indexer]
3807 if isinstance(casted_key, slice) or (

3808 isinstance(casted_key, abc.Iterable)
3809 and any(isinstance(x, slice) for x in casted_key)
3810 ):
3811 raise InvalidIndexError(key)
-> 3812 raise KeyError(key) from err
3813 except TypeError:
3814 # If we have a listlike key, _check_indexing_error will raise
3815 # InvalidIndexError. Otherwise we fall through and re-raise
2
3816 # the TypeError.
3817 self._check_indexing_error(key)
KeyError: 'GENDER'

data = pd.read_excel('EXCEL FOR PRACTICAL 9.xlsx', skiprows=1)
# It seems there was an error with the column name. Let's print the column␣
↪names to understand the issue.
print(data.columns)
# Assuming the column name might have leading or trailing spaces or different␣
↪capitalization, we will standardize the column names and then count the␣
↪gender distribution.
data.columns = data.columns.str.strip().str.upper()
# Now, let's count the gender distribution again.


Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Unnamed: 3', 'Unnamed: 4',
'Unnamed: 5', 'Unnamed: 6'],
dtype='object')
---------------------------------------------------------------------------
3804 try:
-> 3805 return self._engine.get_loc(casted_key)
3806 except KeyError as err:


3
KeyError: 'GENDER'

Cell In[3], line 13
10 data.columns = data.columns.str.strip().str.upper()
12 # Now, let's count the gender distribution again.
---> 13 gender_distribution = data['GENDER'].value_counts()
File /usr/local/lib/python3.11/site-packages/pandas/core/frame.py:4090, in␣

↪DataFrame.__getitem__(self, key)
4088 if self.columns.nlevels > 1:

4089 return self._getitem_multilevel(key)
-> 4090 indexer = self.columns.get_loc(key)
4091 if is_integer(indexer):
4092 indexer = [indexer]
3807 if isinstance(casted_key, slice) or (

3808 isinstance(casted_key, abc.Iterable)
3809 and any(isinstance(x, slice) for x in casted_key)
3810 ):
3811 raise InvalidIndexError(key)
-> 3812 raise KeyError(key) from err
3813 except TypeError:
3814 # If we have a listlike key, _check_indexing_error will raise
3815 # InvalidIndexError. Otherwise we fall through and re-raise
3816 # the TypeError.
3817 self._check_indexing_error(key)
KeyError: 'GENDER'

import plotly.graph_objects as go
# Load the Excel file with correct parameters

data = pd.read_excel('EXCEL FOR PRACTICAL 9.xlsx', skiprows=1, usecols="D:F")
data.columns = ['GENDER', 'CA', 'EXAM']
# Count the number of each gender in the dataset

4
# Create a bar chart for gender distribution using Plotly
gender_chart = go.Figure(go.Bar(x=gender_distribution.index,␣
↪y=gender_distribution.values, marker_color=['blue', 'pink']))
gender_chart.update_layout(title_text='Gender Distribution in the Dataset',␣

↪xaxis_title='Gender', yaxis_title='Count', plot_bgcolor='#111',␣
↪paper_bgcolor='#111', font=dict(color='#7FDBFF'))
gender_chart.show()
Gender Distribution in the Dataset
50
40
Count
30
20
10
0
MALE FEMALE GENDER
Gender
Loading [MathJax]/extensions/MathMenu.js
The gender distribution in the dataset was successfully analyzed and visualized. Here are the key
points:
• The dataset contains information on two genders: Male (M) and Female (F).
• A bar chart was created to visually represent the distribution of genders.
• The exact counts of each gender were not explicitly mentioned in the summary, but they were
visually represented in the bar chart.
• The chart utilized colors (blue for males, pink for females) to differentiate between the genders.
• The visualization included titles and labels for clarity.
Explore performance differences between genders

[5]: # Calculate the mean performance (CA + EXAM) for each gender
data['TOTAL'] = data['CA'] + data['EXAM']
gender_performance = data.groupby('GENDER')['TOTAL'].mean()
5
# Display the mean performance for each gender
display(gender_performance)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
File /usr/local/lib/python3.11/site-packages/pandas/core/groupby/groupby.py:
↪1942, in GroupBy._agg_py_fallback(self, how, values, ndim, alt)
1941 try:
-> 1942 res_values = self._grouper.agg_series(ser, alt, preserve_dtype=True)
1943 except Exception as err:
File /usr/local/lib/python3.11/site-packages/pandas/core/groupby/ops.py:864, in␣

↪BaseGrouper.agg_series(self, obj, func, preserve_dtype)
862 preserve_dtype = True

--> 864 result = self._aggregate_series_pure_python(obj, func)
866 npvalues = lib.maybe_convert_objects(result, try_float=False)
File /usr/local/lib/python3.11/site-packages/pandas/core/groupby/ops.py:885, in␣

↪BaseGrouper._aggregate_series_pure_python(self, obj, func)
884 for i, group in enumerate(splitter):

--> 885 res = func(group)
886 res = extract_result(res)
↪2454, in GroupBy.mean.<locals>.<lambda>(x)
2451 else:
2452 result = self._cython_agg_general(
2453 "mean",
-> 2454 alt=lambda x:␣
↪Series(x, copy=False).mean(numeric_only=numeric_only),
2455 numeric_only=numeric_only,
2456 )
2457 return result.__finalize__(self.obj, method="groupby")
File /usr/local/lib/python3.11/site-packages/pandas/core/series.py:6540, in␣

↪Series.mean(self, axis, skipna, numeric_only, **kwargs)
6532 @doc(make_doc("mean", ndim=1))

6533 def mean(
6534 self,
(…)
6538 **kwargs,
6539 ):
-> 6540 return NDFrame.mean(self, axis, skipna, numeric_only, **kwargs)
File /usr/local/lib/python3.11/site-packages/pandas/core/generic.py:12417, in␣

↪NDFrame.mean(self, axis, skipna, numeric_only, **kwargs)
6
12410 def mean(
12411 self,
12412 axis: Axis | None = 0,
(…)
12415 **kwargs,
12416 ) -> Series | float:
> 12417 return self._stat_function(
12418 "mean", nanops.nanmean, axis, skipna, numeric_only, **kwargs
12419 )
File /usr/local/lib/python3.11/site-packages/pandas/core/generic.py:12374, in␣

↪NDFrame._stat_function(self, name, func, axis, skipna, numeric_only, **kwargs)
12372 validate_bool_kwarg(skipna, "skipna", none_allowed=False)

> 12374 return self._reduce(
12375 func, name=name, axis=axis, skipna=skipna, numeric_only=numeric_only
12376 )
File /usr/local/lib/python3.11/site-packages/pandas/core/series.py:6448, in␣

↪Series._reduce(self, op, name, axis, skipna, numeric_only, filter_type, **kwds)
6444 raise TypeError(

6445 f"Series.{name} does not allow {kwd_name}={numeric_only} "
6446 "with non-numeric dtypes."
6447 )
-> 6448 return op(delegate, skipna=skipna, **kwds)
File /usr/local/lib/python3.11/site-packages/pandas/core/nanops.py:147, in␣

↪bottleneck_switch.__call__.<locals>.f(values, axis, skipna, **kwds)
146 else:
--> 147 result = alt(values, axis=axis, skipna=skipna, **kwds)
149 return result

↪_datetimelike_compat.<locals>.new_func(values, axis, skipna, mask, **kwargs)
402 mask = isna(values)

--> 404 result = func(values, axis=axis, skipna=skipna, mask=mask, **kwargs)
406 if datetimelike:

↪nanmean(values, axis, skipna, mask)
719 the_sum = values.sum(axis, dtype=dtype_sum)

--> 720 the_sum = _ensure_numeric(the_sum)
722 if axis is not None and getattr(the_sum, "ndim", False):

↪_ensure_numeric(x)
1699 if isinstance(x, str):

1700 # GH#44008, GH#36703 avoid casting e.g. strings to numeric
-> 1701 raise TypeError(f"Could not convert string '{x}' to numeric")
7
1702 try:
TypeError: Could not convert string 'CAEXAM' to numeric
TypeError Traceback (most recent call last)

Cell In[5], line 3
1 # Calculate the mean performance (CA + EXAM) for each gender
2 data['TOTAL'] = data['CA'] + data['EXAM']
----> 3 gender_performance = data.groupby('GENDER')['TOTAL'].mean()
5 # Display the mean performance for each gender
6 display(gender_performance)
↪2452, in GroupBy.mean(self, numeric_only, engine, engine_kwargs)
2445 return self._numba_agg_general(

2446 grouped_mean,
2447 executor.float_dtype_mapping,
2448 engine_kwargs,
2449 min_periods=0,
2450 )
2451 else:
-> 2452 result = self._cython_agg_general(
2453 "mean",
2454␣
↪ alt=lambda x: Series(x, copy=False).mean(numeric_only=numeric_only),
2455 numeric_only=numeric_only,
2456 )
2457 return result.__finalize__(self.obj, method="groupby")
↪1998, in GroupBy._cython_agg_general(self, how, alt, numeric_only, min_count,␣
↪**kwargs)
1995 result = self._agg_py_fallback(how, values, ndim=data.ndim, alt=alt)

1996 return result
-> 1998 new_mgr = data.grouped_reduce(array_func)
1999 res = self._wrap_agged_manager(new_mgr)
2000 if how in ["idxmin", "idxmax"]:
File /usr/local/lib/python3.11/site-packages/pandas/core/internals/base.py:367,␣
↪in SingleDataManager.grouped_reduce(self, func)
365 def grouped_reduce(self, func):

366 arr = self.array
--> 367 res = func(arr)
368 index = default_index(len(res))
370 mgr = type(self).from_array(res, index)
8
↪1995, in GroupBy._cython_agg_general.<locals>.array_func(values)
1992 return result

1994 assert alt is not None
-> 1995 result = self._agg_py_fallback(how, values, ndim=data.ndim, alt=alt)
1996 return result
↪1946, in GroupBy._agg_py_fallback(self, how, values, ndim, alt)
1944 msg = f"agg function failed [how->{how},dtype->{ser.dtype}]"

1945 # preserve the kind of exception that raised
-> 1946 raise type(err)(msg) from err
1948 if ser.dtype == object:
1949 res_values = res_values.astype(object, copy=False)
TypeError: agg function failed [how->mean,dtype->object]

import plotly.graph_objects as go
# Load the Excel file with correct parameters

data = pd.read_excel('EXCEL FOR PRACTICAL 9.xlsx', skiprows=1, usecols="D:F")
data.columns = ['GENDER', 'CA', 'EXAM']
# Convert CA and EXAM columns to numeric

data['CA'] = pd.to_numeric(data['CA'], errors='coerce')
data['EXAM'] = pd.to_numeric(data['EXAM'], errors='coerce')
# Calculate the total score

data['TOTAL'] = data['CA'] + data['EXAM']
# Calculate the mean performance (CA + EXAM) for each gender

gender_performance = data.groupby('GENDER')['TOTAL'].mean()
# Create a bar chart for gender performance using Plotly

gender_performance_chart = go.Figure(go.Bar(x=gender_performance.index,␣
↪y=gender_performance.values, marker_color=['blue', 'pink']))
gender_performance_chart.update_layout(title_text='Mean Performance by Gender',␣

↪xaxis_title='Gender', yaxis_title='Mean Total Score', plot_bgcolor='#111',␣
gender_performance_chart.show()
9
Mean Performance by Gender
50
40
Mean Total Score
30
20
10
0
FEMALE GENDER MALE
Gender
The analysis of performance differences between genders revealed the following key points:
• Performance was evaluated based on the total score, calculated as the sum of CA (Continuous
Assessment) and EXAM scores.
• Both CA and EXAM scores were converted to numeric values to ensure accurate calculations.
• The mean performance for each gender was calculated and visualized in a bar chart.
• The bar chart utilized colors (blue for males, pink for females) to differentiate between the
genders, and included titles and labels for clarity.
• The exact mean performance values for each gender were not explicitly mentioned in the
summary, but they were visually represented in the bar chart.
Compare performance distribution between genders

[7]: import plotly.graph_objects as go
import plotly.express as px
# Create a box plot to compare the performance distribution between genders

gender_performance_boxplot = px.box(data, x='GENDER', y='TOTAL', color='GENDER',
title='Performance Distribution by Gender',
labels={'TOTAL': 'Total Score', 'GENDER':␣
↪'Gender'},
color_discrete_map={'MALE': 'blue',␣
↪'FEMALE': 'pink'})
10
gender_performance_boxplot.update_layout(plot_bgcolor='#111',␣
gender_performance_boxplot.show()
Performance Distribution by Gender
Gender
80
GENDER
MALE
FEMALE
70
Total Score
60
50
40
MALE FEMALE
Gender
The comparison of performance distribution between genders was visualized using a box plot,
revealing key insights:
• The box plot shows the spread of total scores (sum of CA and EXAM) for each gender.
• Colors were used to distinguish genders (blue for males, pink for females), enhancing visual
clarity.
• The plot includes median, quartiles, and potential outliers, providing a comprehensive view
of the performance distribution.
• Specific numerical details such as exact quartiles or outliers were not mentioned, but these
are represented visually in the plot.
• The visualization was designed with a dark theme, using color coding for text and background
for better contrast.
Investigate correlations between CA and EXAM scores

[8]: import plotly.express as px
# Create a scatter plot to investigate the correlation between CA and EXAM␣

↪scores
11
correlation_plot = px.scatter(data, x='CA', y='EXAM', color='GENDER',␣
↪symbol='GENDER',
title='Correlation between CA and EXAM Scores by␣

↪Gender',
labels={'CA': 'Continuous Assessment (CA) Score',␣

↪'EXAM': 'Exam Score'},
color_discrete_map={'MALE': 'blue', 'FEMALE':␣

↪'pink'})
correlation_plot.update_layout(plot_bgcolor='#111', paper_bgcolor='#111',␣
↪font=dict(color='#7FDBFF'))
correlation_plot.show()
Correlation between CA and EXAM Scores by Gender
60 GENDER
GENDER
55 MALE
FEMALE
50
45
Exam Score
40
35
30
25
20
5 10 15 20 25
Continuous Assessment (CA) Score
The investigation of correlations between CA (Continuous Assessment) and EXAM scores was
conducted through a scatter plot, revealing several insights:
• The scatter plot visualizes the relationship between CA and EXAM scores for each gender,
with colors distinguishing genders (blue for males, pink for females).
• Symbols were used to differentiate genders within the plot, enhancing the visual distinction.
• The plot includes titles and labels for clarity, explaining what CA and EXAM scores represent.
• While the plot visually suggests a relationship between CA and EXAM scores, specific cor-
relation coeﬀicients were not calculated in this summary.
• The visualization was designed with a dark theme, using color coding for text and background
12
for better contrast.
Explore the distribution of CA and EXAM scores individually

[9]: import plotly.graph_objects as go
# Create histograms for CA and EXAM scores

cA_histogram = go.Figure()
cA_histogram.add_trace(go.Histogram(x=data['CA'], name='CA Scores',␣
↪marker_color='lightblue'))
cA_histogram.update_layout(title_text='Distribution of Continuous Assessment␣

↪(CA) Scores', xaxis_title='CA Score', yaxis_title='Count',␣
↪plot_bgcolor='#111', paper_bgcolor='#111', font=dict(color='#7FDBFF'))
exam_histogram = go.Figure()
exam_histogram.add_trace(go.Histogram(x=data['EXAM'], name='EXAM Scores',␣
↪marker_color='lightgreen'))
exam_histogram.update_layout(title_text='Distribution of Exam Scores',␣

↪xaxis_title='Exam Score', yaxis_title='Count', plot_bgcolor='#111',␣
# Display the histograms

cA_histogram.show()
exam_histogram.show()
Distribution of Continuous Assessment (CA) Scores
15
Count
10
0
5 10 15 20 25
CA Score
13
Distribution of Exam Scores
25
20
15
Count
10
0
20 30 40 50
Exam Score
The exploration of the distribution of CA (Continuous Assessment) and EXAM scores was con-
ducted through histograms, revealing distinct insights for each:
• CA Scores Distribution:
– Visualized using a light blue histogram.
– The histogram shows the frequency of various CA scores across the dataset.
– The layout includes titles and labels for clarity, with a dark theme for better visual
contrast.
• EXAM Scores Distribution:
– Visualized using a light green histogram.
– Similar to the CA scores, this histogram displays the frequency of various EXAM scores.
– The design and layout follow the same theme as the CA scores histogram for consistency.
Compare average performance by gender To compare the average performance by gender,

we’ll calculate the mean total score (sum of CA and EXAM scores) for each gender. This will
involve grouping the data by gender and then calculating the mean for the total scores. Let’s
proceed with this analysis.
[10]: # Calculate the mean total score for each gender

gender_mean_performance = data.groupby('GENDER')['TOTAL'].mean()
14
# Display the mean total score for each gender
display(gender_mean_performance)
GENDER
FEMALE 56.181818
GENDER NaN
MALE 51.400000
Name: TOTAL, dtype: float64
The comparison of average performance by gender yielded the following results:
• Female: The average total score for females is approximately 56.18.
• Male: The average total score for males is approximately 51.40.
• An entry labeled GENDER with a NaN value was also observed, indicating a possible data
entry error or missing gender information for some records.
This analysis suggests that, on average, females have a higher total score compared to males in this
dataset.
Visualize the performance distribution by gender

[11]: import plotly.express as px
# Create a box plot to visualize the performance distribution by gender

performance_distribution_by_gender = px.box(data, x='GENDER', y='TOTAL',␣
↪color='GENDER',
title='Performance Distribution by␣

↪Gender',
labels={'TOTAL': 'Total Score',␣

↪'GENDER': 'Gender'},
color_discrete_map={'MALE':␣
↪'blue', 'FEMALE': 'pink'})
performance_distribution_by_gender.update_layout(plot_bgcolor='#111',␣
performance_distribution_by_gender.show()
15
Performance Distribution by Gender
Gender
80
GENDER
MALE
FEMALE
70
Total Score
60
50
40
MALE FEMALE
Gender
test the null hypothesis that there is no difference between male and female students
performance
[12]: from scipy.stats import ttest_ind
# Separate the data by gender

data_male = data[data['GENDER'] == 'MALE']['TOTAL']
data_female = data[data['GENDER'] == 'FEMALE']['TOTAL']
# Perform an independent t-test between male and female students' total scores
ttest_results = ttest_ind(data_male.dropna(), data_female.dropna())
# Display the results of the t-test

print(f'T-statistic: {ttest_results.statistic:.4f}')
print(f'P-value: {ttest_results.pvalue:.4f}')
T-statistic: -1.8084
P-value: 0.0746
The independent t-test conducted to compare the performance (total scores) between male and
female students yielded the following results:
• T-statistic: -1.8084, indicating the direction and magnitude of the difference between the
group means.
16
• P-value: 0.0746, which suggests that the difference in mean performance between male and
female students is not statistically significant at the conventional 0.05 level.
Based on these results, we fail to reject the null hypothesis that there is no difference in performance
between male and female students. This implies that any observed difference in mean performance
between genders in this dataset is not statistically significant.
17

Practical No 9 PDF

Uploaded by

Document Information

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Practical No 9 PDF

Uploaded by

Copyright:

Available Formats

Notebook

Gender distribution in the dataset?

# Load the Excel file

[1]: # Count the number of each gender in the dataset

# Display the gender distribution

NameError: name 'data' is not defined

[2]: import pandas as pd

# Load the Excel file

# Count the number of each gender in the dataset

# Display the gender distribution

File index.pyx:167, in pandas._libs.index.IndexEngine.get_loc()

File index.pyx:196, in pandas._libs.index.IndexEngine.get_loc()

File pandas/_libs/hashtable_class_helper.pxi:7081, in pandas._libs.hashtable.

File pandas/_libs/hashtable_class_helper.pxi:7089, in pandas._libs.hashtable.

KeyError Traceback (most recent call last)

File /usr/local/lib/python3.11/site-packages/pandas/core/frame.py:4090, in␣

4088 if self.columns.nlevels > 1:

3807 if isinstance(casted_key, slice) or (

[3]: import pandas as pd

# Load the Excel file

# Now, let's count the gender distribution again.

# Display the gender distribution

File index.pyx:167, in pandas._libs.index.IndexEngine.get_loc()

File index.pyx:196, in pandas._libs.index.IndexEngine.get_loc()

File pandas/_libs/hashtable_class_helper.pxi:7081, in pandas._libs.hashtable.

File pandas/_libs/hashtable_class_helper.pxi:7089, in pandas._libs.hashtable.

KeyError Traceback (most recent call last)

File /usr/local/lib/python3.11/site-packages/pandas/core/frame.py:4090, in␣

4088 if self.columns.nlevels > 1:

3807 if isinstance(casted_key, slice) or (

[4]: import pandas as pd

# Load the Excel file with correct parameters

# Count the number of each gender in the dataset

gender_chart.update_layout(title_text='Gender Distribution in the Dataset',␣

Gender Distribution in the Dataset

Explore performance differences between genders

File /usr/local/lib/python3.11/site-packages/pandas/core/groupby/ops.py:864, in␣

862 preserve_dtype = True

File /usr/local/lib/python3.11/site-packages/pandas/core/groupby/ops.py:885, in␣

884 for i, group in enumerate(splitter):

File /usr/local/lib/python3.11/site-packages/pandas/core/series.py:6540, in␣

6532 @doc(make_doc("mean", ndim=1))

File /usr/local/lib/python3.11/site-packages/pandas/core/generic.py:12417, in␣

File /usr/local/lib/python3.11/site-packages/pandas/core/generic.py:12374, in␣

12372 validate_bool_kwarg(skipna, "skipna", none_allowed=False)

File /usr/local/lib/python3.11/site-packages/pandas/core/series.py:6448, in␣

6444 raise TypeError(

File /usr/local/lib/python3.11/site-packages/pandas/core/nanops.py:147, in␣

File /usr/local/lib/python3.11/site-packages/pandas/core/nanops.py:404, in␣

402 mask = isna(values)

File /usr/local/lib/python3.11/site-packages/pandas/core/nanops.py:720, in␣

719 the_sum = values.sum(axis, dtype=dtype_sum)

File /usr/local/lib/python3.11/site-packages/pandas/core/nanops.py:1701, in␣

1699 if isinstance(x, str):

TypeError: Could not convert string 'CAEXAM' to numeric

TypeError Traceback (most recent call last)

2445 return self._numba_agg_general(

1995 result = self._agg_py_fallback(how, values, ndim=data.ndim, alt=alt)

365 def grouped_reduce(self, func):

1992 return result

1944 msg = f"agg function failed [how->{how},dtype->{ser.dtype}]"

TypeError: agg function failed [how->mean,dtype->object]

[6]: import pandas as pd

# Load the Excel file with correct parameters