Ali Hamza

Frequency distribution :
A frequency distribution is a statistical representation of how often different values or

categories occur in a dataset. It lists the values or categories along with the number of
times each value or category appears in the dataset. This information helps in
summarizing and understanding the distribution or pattern of data.
Formula :
Frequency (f) = Number of data points in a specific category or with a particular value
In mathematical notation, for a specific category or value 'c', the formula is:
f(c) = Number of data points in the dataset that equal 'c'
Example in c++ :
#include <iostream>
#include <map>
using namespace std;
int main() {
map<int, int> frequencyMap;
int n; // Number of data points
cout << "Enter the number of data points: ";
cin >> n;
// Input data points
cout << "Enter the data points, separated by spaces:\n";
for (int i = 0; i < n; ++i) {
int dataPoint;
cin >> dataPoint;
frequencyMap[dataPoint]++;
// Display frequency distribution
cout << "Frequency Distribution:\n";
for (const auto& pair : frequencyMap) {
cout << "Value: " << pair.first << " - Frequency: " << pair.second << std::endl;
}
return 0;
Arithmetic Mean :
The arithmetic mean, often referred to as the average, is a measure of central tendency in statistics. It is
calculated by adding up all the values in a dataset and then dividing the sum by the number of values.
The formula for the arithmetic mean (mean or average) is as follows:
Arithmetic Mean (μ) = (Sum of all values) / (Number of values)
Example in c++ :
#include <iostream>
int main() {
int n;
double sum = 0.0;
cout << "Enter the number of values: ";
cin >> n;
if (n <= 0) {
cout << "Please enter a valid number of values." << endl;
return 1;
cout << "Enter the values, separated by spaces:\n";
for (int i = 0; i < n; ++i) {
double value;
cin >> value;
sum += value;
double mean = sum / n;
cout << "Arithmetic Mean (Average) is: " << mean << endl;
return 0;
Harmonic Mean :
The harmonic mean is a mathematical average used to calculate the reciprocal of the arithmetic mean
of a set of values. It is often used in situations where rates or ratios are involved. The formula for the
harmonic mean of n values, denoted as H, is:
Formula:
H = n / [(1/x₁) + (1/x₂) + (1/x₃) + ... + (1/xₙ)]
Where:
H is the harmonic mean.
n is the number of values in the set.
x₁, x₂, x₃, ..., xₙ are the individual values for which you want to calculate the harmonic mean.
Example in c++ :
#include <iostream>
int main() {
int n; // Number of values
double sumReciprocals = 0.0;
cin >> n;
if (n <= 0) {
return 1;
for (int i = 1; i <= n; i++) {
double x;
cout << "Enter value " << i << ": ";
cin >> x;
sumReciprocals += 1.0 / x;
double harmonicMean = n / sumReciprocals;
cout << "The harmonic mean is: " << harmonicMean << endl;
return
Geometric Mean :
The geometric mean is a measure of central tendency used to find the average of a set of values by
multiplying them together and then taking the nth root, where "n" is the number of values. It is often
used when dealing with quantities that have a multiplicative relationship, such as growth rates or
investment returns
Example in c++ :
#include <iostream>
#include <cmath>
int main() {
double product = 1.0;
cin >> n;
if (n <= 0) {
return 1;
for (int i = 1; i <= n; i++) {
double x;
cin >> x;
product *= x;
}
double geometricMean = pow(product, 1.0 / n);
cout << "The geometric mean is: " << geometricMean << endl;
return 0;
Mode :
The mode is a statistical measure that represents the value(s) in a data set that occur most frequently. In
other words, it is the value(s) that appear with the highest frequency within a dataset. A dataset can
have one mode (unimodal) or multiple modes (multimodal) if there are multiple values that occur with
the same highest frequency.
Example in c++ :
#include <iostream>
#include <map>
#include <vector>
int main() {
map<int, int> valueFrequency; // Map to store value-frequency pairs
vector<int> modes; // Vector to store modes
cin >> n;
if (n <= 0) {
return 1;
for (int i = 1; i <= n; i++) {
int x;
cin >> x;
valueFrequency[x]++;
int maxFrequency = 0;
for (const auto& pair : valueFrequency) {
if (pair.second > maxFrequency) {
modes.clear();
modes.push_back(pair.first);
maxFrequency = pair.second;
} else if (pair.second == maxFrequency) {
modes.push_back(pair.first);
if (modes.size() == 1) {
cout << "The mode is: " << modes[0] << " with a frequency of " << maxFrequency << endl;
} else {
cout << "The dataset is multimodal. The modes are: ";
for (int i = 0; i < modes.size(); i++) {
cout << modes[i];
if (i < modes.size() - 1) {
cout << ", ";
cout << " with a frequency of " << maxFrequency << endl;
return 0;
}
Median :
The median is a measure of central tendency in statistics. It is the middle value of a dataset when the
data is arranged in ascending or descending order. In other words, it is the value that separates the
higher half from the lower half of the data. If the dataset has an even number of values, the median is
the average of the two middle values.
Example in c++ :
#include <iostream>
#include <algorithm>
#include <vector>
int main() {
vector<double> values;
cin >> n;
if (n <= 0) {
return 1;
for (int i = 1; i <= n; i++) {
double x;
cin >> x;
values.push_back(x);
sort(values.begin(), values.end());
double median;
int middle = n / 2;
if (n % 2 == 0) {
// If the number of values is even, take the average of the two middle values
median = (values[middle - 1] + values[middle]) / 2.0;
} else {
// If the number of values is odd, the median is the middle value
median = values[middle];
cout << "The median is: " << median << endl;
return 0;
Range :
In statistics, the range of a dataset is a measure of the spread or dispersion of the data. It is defined as
the difference between the maximum (largest) value and the minimum (smallest) value in the dataset.
The range provides a simple way to understand the extent to which the data values vary.
Mathematically, the range (R) can be calculated as follows:
Range = Maximum value − Minimum value
Example in c++ :
#include <iostream>
#include <vector>
#include <algorithm>
int main() {
cin >> n;
if (n <= 0) {
return 1;
}
for (int i = 1; i <= n; i++) {
double x;
cin >> x;
// Sort the values in ascending order
sort(values.begin(), values.end());
double range = values.back() - values.front();
cout << "The range is: " << range << endl;
return 0;
Variance :
Variance is a statistical measure that quantifies the extent to which data points in a dataset deviate from
the mean (average). It provides a measure of the spread or dispersion of the data. A higher variance
indicates that the data points are more spread out from the mean, while a lower variance suggests that
they are closer to the mean.
Example in c++ :
#include <iostream>
#include <vector>
int main() {
double mean = 0.0;
double variance = 0.0;
cin >> n;
if (n <= 0) {
return 1;
for (int i = 1; i <= n; i++) {
double x;
cin >> x;
mean += x;
if (n > 1) {
mean /= n;
for (int i = 0; i < n; i++) {
variance += (values[i] - mean) * (values[i] - mean);
variance /= n - 1; // Use (n - 1) for sample variance, n for population variance
} else {
variance = 0.0;
cout << "The variance is: " << variance << endl;
return 0;
Standard deviation :
The standard deviation is a measure of the amount of variation or dispersion in a set of values. It
quantifies how spread out the values are from the mean (average) of the dataset. A higher standard
deviation indicates that the values are more spread out, while a lower standard deviation indicates that
the values are closer to the mean.
Example in c++ :
#include <iostream>
#include <vector>
#include <cmath>
int main() {
double mean = 0.0;
double variance = 0.0;
double standardDeviation = 0.0;
cin >> n;
if (n <= 0) {
return 1;
for (int i = 1; i <= n; i++) {
double x;
cin >> x;
mean += x;
if (n > 1) {
mean /= n;
for (int i = 0; i < n; i++) {
variance += (values[i] - mean) * (values[i] - mean);
}
variance /= n - 1; // Sample variance
standardDeviation = sqrt(variance); // Calculate standard deviation
} else {
standardDeviation = 0.0;
cout << "The standard deviation is: " << standardDeviation << endl;
return 0;
Regression :
Regression is a statistical technique used to model and analyze the relationship between a dependent
variable and one or more independent variables. It is commonly used in various fields, including
economics, social sciences, natural sciences, and machine learning. The primary goal of regression
analysis is to understand and quantify the relationships between variables, make predictions, and
identify patterns in the data.
Example in c++ :
#include <iostream>
#include <Eigen/Dense>
using namespace Eigen;
int main() {
cin >> n;
if (n <= 1) {
cout << "Regression analysis requires at least two data points." << endl;
return 1;
VectorXd x(n); // Independent variable (predictor)
VectorXd y(n); // Dependent variable (response)

cout << "Enter the data points (x, y):" << endl;
for (int i = 0; i < n; i++) {
cin >> x(i) >> y(i);
// Performing linear regression
Vector2d coefficients = x.colwise().homogeneous().augmented(VectorXd::Ones(n)).jacobiSvd
double slope = coefficients(0);
double intercept = coefficients(1);
cout << "Linear Regression Equation: y = " << slope << "x + " << intercept << endl;
return 0;
Skewness :
Skewness is a statistical measure that describes the asymmetry of the probability distribution of a real-
valued random variable about its mean. In simpler terms, it quantifies the degree to which the data in a
dataset is skewed or biased to one side of the mean.
Example in c++ :
#include <iostream>
#include <vector>
#include <boost/accumulators/accumulators.hpp>
#include <boost/accumulators/statistics.hpp>
#include <boost/accumulators/statistics/skewness.hpp>
using namespace boost::accumulators;
int main() {
vector<double> data;
cin >> n;
if (n <= 2) {
cout << "Skewness analysis requires at least three data points." << endl;
return 1;
cout << "Enter the data points:" << endl;
for (int i = 0; i < n; i++) {
double x;
cin >> x;
data.push_back(x);
accumulator_set<double, stats<tag::skewness>> acc;
for (int i = 0; i < n; i++) {
acc(data[i]);
double skew = skewness(acc);
cout << "Skewness of the data: " << skew << endl;
return 0;

Ali Hamza

Uploaded by

Document Information

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Ali Hamza

Uploaded by

Copyright:

Available Formats

Frequency distribution :

A frequency distribution is a statistical representation of how often different values or

f(c) = Number of data points in the dataset that equal 'c'

using namespace std;

map<int, int> frequencyMap;

int n; // Number of data points

cout << "Enter the number of data points: ";

// Input data points

cout << "Enter the data points, separated by spaces:\n";

for (int i = 0; i < n; ++i) {

cin >> dataPoint;

// Display frequency distribution

cout << "Frequency Distribution:\n";

for (const auto& pair : frequencyMap) {

Arithmetic Mean (μ) = (Sum of all values) / (Number of values)

using namespace std;

double sum = 0.0;

cout << "Enter the number of values: ";

cout << "Please enter a valid number of values." << endl;

cout << "Enter the values, separated by spaces:\n";

for (int i = 0; i < n; ++i) {

cin >> value;

double mean = sum / n;

H is the harmonic mean.

n is the number of values in the set.

using namespace std;

int n; // Number of values

double sumReciprocals = 0.0;

cout << "Enter the number of values: ";

cout << "Please enter a valid number of values." << endl;

for (int i = 1; i <= n; i++) {

cout << "Enter value " << i << ": ";

double harmonicMean = n / sumReciprocals;

using namespace std;

int n; // Number of values

double product = 1.0;

cout << "Enter the number of values: ";

cout << "Please enter a valid number of values." << endl;

for (int i = 1; i <= n; i++) {

cout << "Enter value " << i << ": ";

double geometricMean = pow(product, 1.0 / n);

using namespace std;

int n; // Number of values

map<int, int> valueFrequency; // Map to store value-frequency pairs

vector<int> modes; // Vector to store modes

cout << "Enter the number of values: ";

cout << "Please enter a valid number of values." << endl;

for (int i = 1; i <= n; i++) {

cout << "Enter value " << i << ": ";

for (const auto& pair : valueFrequency) {

if (pair.second > maxFrequency) {

} else if (pair.second == maxFrequency) {

cout << "The dataset is multimodal. The modes are: ";

for (int i = 0; i < modes.size(); i++) {

cout << modes[i];

cout << ", ";

using namespace std;

int n; // Number of values

cout << "Enter the number of values: ";

cout << "Please enter a valid number of values." << endl;

for (int i = 1; i <= n; i++) {

cout << "Enter value " << i << ": ";

median = (values[middle - 1] + values[middle]) / 2.0;