ML Merge
ML Merge
import numpy as np
# numpy.linspace()
a_linspace = np.linspace(1, 10, 5)
print("Arrays linspace:",a_linspace )
print("---------------------------------------------------------------
-----------")
# numpy.repeat()
a_repeat = np.repeat(3, 4)
print("Array repeat:",a_repeat)
print("---------------------------------------------------------------
-----------")
# numpy.random()
a_random = np.random.rand(3, 2)
print("Array Random:",a_random)
print("---------------------------------------------------------------
-----------")
# numpy.nan()
a_nan = np.nan
print("Array NaN:",a_nan)
print("---------------------------------------------------------------
-----------")
# numpy.min() and numpy.max()
a = np.array([4, 2, 8, 6, 5])
min_val = np.min(a)
max_val = np.max(a)
print("Minimam Value:",min_val)
print("---------------------------------------------------------------
-----------")
print("Maximam Value:",max_val)
print("---------------------------------------------------------------
-----------")
# numpy.shape()
a_shape = np.array([[1, 2], [3, 4], [5, 6]])
shape = np.shape(a_shape)
print(a)
print("---------------------------------------------------------------
-----------")
print("Array Shape:",a_shape)
print("---------------------------------------------------------------
-----------")
# numpy.argmax()
a_argmax = np.array([10, 30, 20, 40, 50])
argmax_idx = np.argmax(a_argmax)
print("Maximum argument:",a_argmax)
print("---------------------------------------------------------------
-----------")
print("argument index:",argmax_idx)
print("---------------------------------------------------------------
-----------")
# numpy.reshape()
a_reshape = np.arange(12).reshape(3, 4)
print("Array reshape:",a_reshape)
print("---------------------------------------------------------------
-----------")
# numpy.histogram()
hist_values, bin_edges = np.histogram(a_argmax, bins=[0, 20, 40, 60])
print("---------------------------------------------------------------
-----------")
# numpy.mean()
mean_val = np.mean(a_argmax)
print("Array mean value:",mean_val)
print("---------------------------------------------------------------
-----------")
# numpy.sort()
a_sort = np.array([5, 3, 1, 4, 2])
sorted_a = np.sort(a_sort)
print(a_sort)
print("---------------------------------------------------------------
-----------")
print("Sorted Array:",sorted_a)
Shape: (4, 2)
Dimensions: 2
# 3. Create a 5x2 integer array with values ranging from 100 to 200 in
steps of 10:
arr_range = np.arange(100, 200, 10).reshape(5, 2)
print(arr_range)
[[100 110]
[120 130]
[140 150]
[160 170]
[180 190]]
# 4. Return the third column from all rows of the provided array:
a = np.array([[11, 22, 33], [44, 55, 66], [77, 88, 99]])
third_column = a[:, 2]
print(a)
print("---------------------------------------------------------------
-----------")
print("third_column:", third_column)
[[11 22 33]
[44 55 66]
[77 88 99]]
third_column: [33 66 99]
[[ 3 6 9 12]
[15 18 21 24]
[27 30 33 36]
[39 42 45 48]
[51 54 57 60]]
odd_rows_even_columns: [[ 6 12]
[30 36]
[54 60]]
print("result_array:",result_array )
# 9. Print max from axis 0 and min from axis 1 of a 2-D array:
a = np.array([[35, 46, 77], [83, 23, 13], [55, 96, 67]])
max_axis0 = np.max(a, axis=0)
min_axis1 = np.min(a, axis=1)
#1. Write a Python program to plot two or more lines on same plot with
suitable legends of each line.
<matplotlib.legend.Legend at 0x2d814877a20>
#2. Write a Python program to plot two or more lines with legends,
different widths and colors.
<matplotlib.legend.Legend at 0x2d81493c5f8>
plt.minorticks_on()
plt.grid(which='major', linestyle='-', linewidth='0.5', color='red')
plt.grid(which='minor', linestyle=':', linewidth='0.5', color='black')
plt.show()
#3. Write a Python program to draw a scatter plot comparing two
subject marks of
#Mathematics and Science. Use marks of 10 students.
math_marks = [88, 92, 80, 89, 100, 80, 60, 100, 80, 34]
science_marks = [35, 79, 79, 48, 100, 88, 32, 45, 20, 30]
marks_range = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
plt.show()
#4. Write a Python programming to create a pie chart of the popularity
of
#programming Languages.
# Data
programming_languages = ['Java', 'Python', 'PHP', 'JavaScript', 'C#',
'C++']
Popularity = [22.2, 17.6, 8.8, 8, 7.7, 6.7]
explode = (0.1, 0, 0, 0, 0, 0) # Explode the second slice (Python)
plt.show()
Basics Level
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
ds = pd.Series([2, 4, 6, 8, 10])
print(ds)
0 2
1 4
2 6
3 8
4 10
dtype: int64
ds = pd.Series([2, 4, 6, 8, 10])
print("Pandas Series and type")
print(ds)
print(type(ds))
print("Convert Pandas Series to Python list")
print(ds.tolist())
print(type(ds.tolist()))
Original dictionary:
{'a': 100, 'b': 200, 'c': 300, 'd': 400, 'e': 800}
Converted series:
a 100
b 200
c 300
d 400
e 800
dtype: int64
NumPy array:
[10 20 30 40 50]
Converted Pandas series:
0 10
1 20
2 30
3 40
4 50
dtype: int32
#5. Write a Pandas program to change the data type of given a column
or a Series
#6. Write a Pandas program to convert the first column of a Data frame
as a Series.
df = pd.DataFrame(data=d)
print("Original DataFrame")
print(df)
s1 = df.ix[:,0]
print("\n1st column as a Series:")
print(s1)
print(type(s1))
Original DataFrame
col1 col2 col3
0 1 4 7
1 2 5 5
2 3 6 8
3 4 9 12
4 7 5 1
5 11 0 11
----------------------------------------------------------------------
-----
AttributeError Traceback (most recent call
last)
Input In [11], in <cell line: 10>()
8 print("Original DataFrame")
9 print(df)
---> 10 s1 = df.ix[:,0]
11 print("\n1st column as a Series:")
13 print(s1)
File ~\anaconda3\lib\site-packages\pandas\core\generic.py:5575, in
NDFrame.__getattr__(self, name)
5568 if (
5569 name not in self._internal_names_set
5570 and name not in self._metadata
5571 and name not in self._accessors
5572 and
self._info_axis._can_hold_identifiers_and_holds_name(name)
5573 ):
5574 return self[name]
-> 5575 return object.__getattribute__(self, name)
#7. Write a Pandas program to join the two given data frames along
rows and assign all data.
student_data1 = pd.DataFrame({
'student_id': ['S1', 'S2', 'S3', 'S4', 'S5'],
'name': ['Danniella Fenton', 'Ryder Storey',
'Bryce Jensen', 'Ed Bernal', 'Kwame Morin'],
'marks': [200, 210, 190, 222, 199]})
student_data2 = pd.DataFrame({
'student_id': ['S4', 'S5', 'S6', 'S7', 'S8'],
'name': ['Scarlette Fisher', 'Carla Williamson',
'Dante Morse', 'Kaiser William', 'Madeeha Preston'],
'marks': [201, 200, 198, 219, 201]})
print("Original DataFrames:")
print(student_data1)
print("-------------------------------------")
print(student_data2)
print("\nJoin the said two dataframes along rows:")
result_data = pd.concat([student_data1, student_data2])
print(result_data)
Original DataFrames:
student_id name marks
0 S1 Danniella Fenton 200
1 S2 Ryder Storey 210
2 S3 Bryce Jensen 190
3 S4 Ed Bernal 222
4 S5 Kwame Morin 199
-------------------------------------
student_id name marks
0 S4 Scarlette Fisher 201
1 S5 Carla Williamson 200
2 S6 Dante Morse 198
3 S7 Kaiser William 219
4 S8 Madeeha Preston 201
col1
0 9
1 11
2 13
3 15
4 17
5 19
#9. Create a custom data frame and apply logical, query and filter
methods on the same
df= pd.DataFrame({
'name':['Jane','John','Ashley','Mike','Emily','Jack','Catlin'],
'ctg':['A','A','C','B','B','C','B'],
'val':np.random.random(7).round(2),
'val2':np.random.randint(1,10, size=7)
})
print(df)
#11. Calculate the mean, median and mode for the following data frame.
dataMatrix = {"D1":[135, 137, 136, 138, 138],
"D2":[43, 42, 42, 42, 42],
"D3":[72, 73, 72, 72, 73],
"D4":[100, 102, 100, 103, 104] };
dataFrame = pd.DataFrame(data=dataMatrix);
print("DataFrame:");
print("Mean:Computed column-wise:");
meanData = dataFrame.mean();
print(meanData);
print("Mean:Computed row-wise:");
meanData = dataFrame.mean(axis=1);
print(meanData);
print("Median:Computed column-wise:");
medianData = dataFrame.median();
print(medianData);
print("Median:Computed row-wise:");
medianData = dataFrame.median(axis=1);
print(medianData);
print("Mode:Computed column-wise:");
modeData = dataFrame.mode();
print(modeData);
print("Mode:Computed row-wise:");
modeData = dataFrame.mode(axis=1);
print(modeData);
DataFrame:
Mean:Computed column-wise:
D1 136.8
D2 42.2
D3 72.4
D4 101.8
dtype: float64
Mean:Computed row-wise:
0 87.50
1 88.50
2 87.50
3 88.75
4 89.25
dtype: float64
Median:Computed column-wise:
D1 137.0
D2 42.0
D3 72.0
D4 102.0
dtype: float64
Median:Computed row-wise:
0 86.0
1 87.5
2 86.0
3 87.5
4 88.5
dtype: float64
Mode:Computed column-wise:
D1 D2 D3 D4
0 138 42 72 100
Mode:Computed row-wise:
0 1 2 3
0 43 72 100 135
1 42 73 102 137
2 42 72 100 136
3 42 72 103 138
4 42 73 104 138
#Intermediate Level:
# 1. Read the CSV from the given URL with and without header.
url =r"C:\Users\quite\Downloads\Iris.csv"
# Read without header (assuming no header in the CSV)
df1 = pd.read_csv(url, header=None)
print(df1)
0 1 2 3 4 \
0 Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm
1 1 5.1 3.5 1.4 0.2
2 2 4.9 3.0 1.4 0.2
3 3 4.7 3.2 1.3 0.2
4 4 4.6 3.1 1.5 0.2
.. ... ... ... ... ...
146 146 6.7 3.0 5.2 2.3
147 147 6.3 2.5 5.0 1.9
148 148 6.5 3.0 5.2 2.0
149 149 6.2 3.4 5.4 2.3
150 150 5.9 3.0 5.1 1.8
5
0 Species
1 Iris-setosa
2 Iris-setosa
3 Iris-setosa
4 Iris-setosa
.. ...
146 Iris-virginica
147 Iris-virginica
148 Iris-virginica
149 Iris-virginica
150 Iris-virginica
[151 rows x 6 columns]
import pandas as pd
# 1. Read the CSV from the given URL with and without header.
url = r"C:\Users\quite\Downloads\Iris.csv"
# Read with header (assuming the first row contains column names)
df = pd.read_csv(url)
print(df)
Species
0 Iris-setosa
1 Iris-setosa
2 Iris-setosa
3 Iris-setosa
4 Iris-setosa
.. ...
145 Iris-virginica
146 Iris-virginica
147 Iris-virginica
148 Iris-virginica
149 Iris-virginica
# 3. Read data from "1.csv" file and measure mean, mode, and standard
deviation
data_from_file = pd.read_csv(r"C:\Users\quite\Downloads\Iris.csv")
mean = data_from_file.mean()
mode = data_from_file.mode().iloc[0] # Mode can have multiple values,
taking the first
std_dev = data_from_file.std()
print("mean:", mean)
print("---------------------------------------------------------------
-----------")
print("mode:", mode)
print("---------------------------------------------------------------
-----------")
print("std_dev:", std_dev)
mean: Id 75.500000
SepalLengthCm 5.843333
SepalWidthCm 3.054000
PetalLengthCm 3.758667
PetalWidthCm 1.198667
dtype: float64
----------------------------------------------------------------------
----
mode: Id 1
SepalLengthCm 5.0
SepalWidthCm 3.0
PetalLengthCm 1.5
PetalWidthCm 0.2
Species Iris-setosa
Name: 0, dtype: object
----------------------------------------------------------------------
----
std_dev: Id 43.445368
SepalLengthCm 0.828066
SepalWidthCm 0.433594
PetalLengthCm 1.764420
PetalWidthCm 0.763161
dtype: float64
C:\Users\quite\AppData\Local\Temp\ipykernel_7512\1955642370.py:3:
FutureWarning: Dropping of nuisance columns in DataFrame reductions
(with 'numeric_only=None') is deprecated; in a future version this
will raise TypeError. Select only valid columns before calling the
reduction.
mean = data_from_file.mean()
C:\Users\quite\AppData\Local\Temp\ipykernel_7512\1955642370.py:5:
FutureWarning: Dropping of nuisance columns in DataFrame reductions
(with 'numeric_only=None') is deprecated; in a future version this
will raise TypeError. Select only valid columns before calling the
reduction.
std_dev = data_from_file.std()
num_columns = len(df.columns)
column_names = df.columns.tolist()
print("number of columns:",num_columns )
print("column names:",column_names)
number of columns: 6
column names: ['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm',
'PetalWidthCm', 'Species']
print(columns_n_to_m)
SepalLengthCm PetalLengthCm
10 5.4 1.5
11 4.8 1.6
12 4.8 1.4
13 4.3 1.1
14 5.8 1.2
15 5.7 1.5
# 10. Count unique values in the first column
unique_values_count = df['SepalLengthCm'].unique()
print(unique_values_count)
[5.1 4.9 4.7 4.6 5. 5.4 4.4 4.8 4.3 5.8 5.7 5.2 5.5 4.5 5.3 7. 6.4
6.9
6.5 6.3 6.6 5.9 6. 6.1 5.6 6.7 6.2 6.8 7.1 7.6 7.3 7.2 7.7 7.4 7.9]
# 11. Calculate mean, median, and mode for a specific column (e.g.,
'sepal_length')
column_name = 'SepalLengthCm'
mean= df[column_name].mean()
median= df[column_name].median()
mode= df[column_name].mode().iloc[0]
print("mean:", mean)
print("median:", median)
print("mode:", mode)
mean: 5.843333333333335
median: 5.8
mode: 5.0