0% found this document useful (0 votes)
5 views

ML Merge

Uploaded by

Kiki Nhabinde
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
5 views

ML Merge

Uploaded by

Kiki Nhabinde
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 24

import numpy as np

import numpy as np

# 1. Demonstrate the working of specified functions:

# numpy.linspace()
a_linspace = np.linspace(1, 10, 5)
print("Arrays linspace:",a_linspace )
print("---------------------------------------------------------------
-----------")
# numpy.repeat()
a_repeat = np.repeat(3, 4)
print("Array repeat:",a_repeat)
print("---------------------------------------------------------------
-----------")
# numpy.random()
a_random = np.random.rand(3, 2)
print("Array Random:",a_random)
print("---------------------------------------------------------------
-----------")
# numpy.nan()
a_nan = np.nan
print("Array NaN:",a_nan)
print("---------------------------------------------------------------
-----------")
# numpy.min() and numpy.max()
a = np.array([4, 2, 8, 6, 5])
min_val = np.min(a)
max_val = np.max(a)
print("Minimam Value:",min_val)
print("---------------------------------------------------------------
-----------")
print("Maximam Value:",max_val)
print("---------------------------------------------------------------
-----------")

# numpy.shape()
a_shape = np.array([[1, 2], [3, 4], [5, 6]])
shape = np.shape(a_shape)
print(a)
print("---------------------------------------------------------------
-----------")
print("Array Shape:",a_shape)

print("---------------------------------------------------------------
-----------")
# numpy.argmax()
a_argmax = np.array([10, 30, 20, 40, 50])
argmax_idx = np.argmax(a_argmax)
print("Maximum argument:",a_argmax)
print("---------------------------------------------------------------
-----------")
print("argument index:",argmax_idx)

print("---------------------------------------------------------------
-----------")
# numpy.reshape()
a_reshape = np.arange(12).reshape(3, 4)
print("Array reshape:",a_reshape)

print("---------------------------------------------------------------
-----------")
# numpy.histogram()
hist_values, bin_edges = np.histogram(a_argmax, bins=[0, 20, 40, 60])

print("Computation of histogram:",hist_values, bin_edges)

print("---------------------------------------------------------------
-----------")
# numpy.mean()
mean_val = np.mean(a_argmax)
print("Array mean value:",mean_val)

print("---------------------------------------------------------------
-----------")
# numpy.sort()
a_sort = np.array([5, 3, 1, 4, 2])
sorted_a = np.sort(a_sort)
print(a_sort)
print("---------------------------------------------------------------
-----------")
print("Sorted Array:",sorted_a)

Arrays linspace: [ 1. 3.25 5.5 7.75 10. ]


----------------------------------------------------------------------
----
Array repeat: [3 3 3 3]
----------------------------------------------------------------------
----
Array Random: [[0.25458965 0.46699484]
[0.3527139 0.41972761]
[0.58007813 0.59765947]]
----------------------------------------------------------------------
----
Array NaN: nan
----------------------------------------------------------------------
----
Minimam Value: 2
----------------------------------------------------------------------
----
Maximam Value: 8
----------------------------------------------------------------------
----
[4 2 8 6 5]
----------------------------------------------------------------------
----
Array Shape: [[1 2]
[3 4]
[5 6]]
----------------------------------------------------------------------
----
Maximum argument: [10 30 20 40 50]
----------------------------------------------------------------------
----
argument index: 4
----------------------------------------------------------------------
----
Array reshape: [[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]]
----------------------------------------------------------------------
----
Computation of histogram: [1 2 2] [ 0 20 40 60]
----------------------------------------------------------------------
----
Array mean value: 30.0
----------------------------------------------------------------------
----
[5 3 1 4 2]
----------------------------------------------------------------------
----
Sorted Array: [1 2 3 4 5]

# 2. Create a 4x2 integer array and print its attributes:


arr_4x2 = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
print("Shape:", arr_4x2.shape)
print("Dimensions:", arr_4x2.ndim)

Shape: (4, 2)
Dimensions: 2

# 3. Create a 5x2 integer array with values ranging from 100 to 200 in
steps of 10:
arr_range = np.arange(100, 200, 10).reshape(5, 2)
print(arr_range)

[[100 110]
[120 130]
[140 150]
[160 170]
[180 190]]

# 4. Return the third column from all rows of the provided array:
a = np.array([[11, 22, 33], [44, 55, 66], [77, 88, 99]])
third_column = a[:, 2]
print(a)
print("---------------------------------------------------------------
-----------")
print("third_column:", third_column)

[[11 22 33]
[44 55 66]
[77 88 99]]
third_column: [33 66 99]

# 5. Return odd rows and even columns from a given array:


a = np.array([[3, 6, 9, 12], [15, 18, 21, 24], [27, 30, 33, 36], [39,
42, 45, 48], [51, 54, 57, 60]])
odd_rows_even_columns = a[::2, 1::2]
print(a)
print("---------------------------------------------------------------
-----------")
print("odd_rows_even_columns:", odd_rows_even_columns)

[[ 3 6 9 12]
[15 18 21 24]
[27 30 33 36]
[39 42 45 48]
[51 54 57 60]]
odd_rows_even_columns: [[ 6 12]
[30 36]
[54 60]]

# 6. Create a result array by adding and then squaring two arrays:


a1 = np.array([[8, 9, 10], [23, 28, 29]])
a2 = np.array([[17, 36, 28], [6, 7, 1]])
result_array = (a1 + a2) ** 2

print("result_array:",result_array )

result_array: [[ 625 2025 1444]


[ 841 1225 900]]

# 7. Split an array into four equal-sized sub-arrays:


arr_split = np.arange(10, 34).reshape(8, 3)
sub_arrays = np.split(arr_split, 4)
print("arr_split", arr_split)
print("---------------------------------------------------------------
-----------")
print("sub_arrays", sub_arrays)
arr_split [[10 11 12]
[13 14 15]
[16 17 18]
[19 20 21]
[22 23 24]
[25 26 27]
[28 29 30]
[31 32 33]]
sub_arrays [array([[10, 11, 12],
[13, 14, 15]]), array([[16, 17, 18],
[19, 20, 21]]), array([[22, 23, 24],
[25, 26, 27]]), array([[28, 29, 30],
[31, 32, 33]])]

# 8. Sort a numpy array by the second row and second column:


sampleArray = np.array([[36, 46, 77], [87, 28, 19], [55, 97, 68]])
sorted_by_second_row = sampleArray[:, sampleArray[1].argsort()]
sorted_by_second_column = sampleArray[sampleArray[:, 1].argsort()]
print("sorted by the second row:",sorted_by_second_row)
print("---------------------------------------------------------------
-----------")
print("sorted by the second column:",sorted_by_second_column)

sorted by the second row: [[77 46 36]


[19 28 87]
[68 97 55]]
----------------------------------------------------------------------
----
sorted by the second column: [[87 28 19]
[36 46 77]
[55 97 68]]

# 9. Print max from axis 0 and min from axis 1 of a 2-D array:
a = np.array([[35, 46, 77], [83, 23, 13], [55, 96, 67]])
max_axis0 = np.max(a, axis=0)
min_axis1 = np.min(a, axis=1)

print("max axis 0:",max_axis0)


print("---------------------------------------------------------------
-----------")
print("mix axis 1:",min_axis1)

max axis 0: [83 96 77]


----------------------------------------------------------------------
----
mix axis 1: [35 13 55]

# 10. Delete the second column and insert a new column:


a = np.array([[44, 45, 76], [84, 24, 15], [56, 97, 68]])
newColumn = np.array([[20, 20, 20]])
a = np.delete(a, 1, axis=1)
a = np.insert(a, 1, newColumn, axis=1)
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

#1. Write a Python program to plot two or more lines on same plot with
suitable legends of each line.

x1 =np.array ([10, 20, 30])


x2 =np.array ([10, 20, 30])
y1 = np.array([20, 40, 10])
y2= np.array ([40,10, 30])
plt.xlabel ("x-axis")
plt.ylabel ("y-axis")

plt.plot(x1,y1, color= "purple")


plt.plot( x2, y2, color = "green")
plt.legend(["Line 1", "Line 2"],loc = "upper right")

<matplotlib.legend.Legend at 0x2d814877a20>

#2. Write a Python program to plot two or more lines with legends,
different widths and colors.

x1 =np.array ([10, 20, 30])


x2 =np.array ([10, 20, 30])
y1 = np.array([20, 40, 10])
y2= np.array ([40,10, 30])
plt.xlabel ("x-axis")
plt.ylabel ("y-axis")

plt.plot(x1,y1, color= "blue", linewidth = 3)


plt.plot( x2, y2, color = "red", linewidth =5)
plt.legend(["Line 1 width =3", "Line 2 width = 5"],loc = "upper
right")

<matplotlib.legend.Legend at 0x2d81493c5f8>

# Write a Python programming to display a bar chart of the popularity


of programming Languages.
x= ['Java', 'Python',' PHP', 'JavaScript', 'C#', 'C++']
y= [22.2, 17.6, 8.8, 8, 7.7, 6.7]
plt.bar( x,y, color="blue")
plt.xlabel(" Programming Languages")
plt.ylabel ("Popularity")
plt.title ("Popularity of programming languages worldwide, Oct 2017
compared to a year ago")

plt.minorticks_on()
plt.grid(which='major', linestyle='-', linewidth='0.5', color='red')
plt.grid(which='minor', linestyle=':', linewidth='0.5', color='black')
plt.show()
#3. Write a Python program to draw a scatter plot comparing two
subject marks of
#Mathematics and Science. Use marks of 10 students.

math_marks = [88, 92, 80, 89, 100, 80, 60, 100, 80, 34]
science_marks = [35, 79, 79, 48, 100, 88, 32, 45, 20, 30]
marks_range = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]

plt.scatter(math_marks, marks_range, c ="red")


plt.scatter(science_marks, marks_range, c ="green")
plt.xlabel(" Marks Range")
plt.ylabel ("Marks Scored")
plt.title ("Scatter Plot")
plt.legend(["math_marks", "science_marks"],loc = "upper right")

plt.show()
#4. Write a Python programming to create a pie chart of the popularity
of
#programming Languages.

import matplotlib.pyplot as plt

# Data
programming_languages = ['Java', 'Python', 'PHP', 'JavaScript', 'C#',
'C++']
Popularity = [22.2, 17.6, 8.8, 8, 7.7, 6.7]
explode = (0.1, 0, 0, 0, 0, 0) # Explode the second slice (Python)

# Create a pie chart with labels


plt.pie(Popularity, explode=explode, labels=programming_languages,
autopct='%1.1f%%',
shadow=True, startangle=90)

plt.show()
Basics Level

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

#1. Write a Pandas program to create and display a one-dimensional


array-like object containing an array of data.

ds = pd.Series([2, 4, 6, 8, 10])
print(ds)

0 2
1 4
2 6
3 8
4 10
dtype: int64

#2. Write a Pandas program to convert a Panda module Series to Python


list and it’sntype.

ds = pd.Series([2, 4, 6, 8, 10])
print("Pandas Series and type")
print(ds)
print(type(ds))
print("Convert Pandas Series to Python list")
print(ds.tolist())
print(type(ds.tolist()))

Pandas Series and type


0 2
1 4
2 6
3 8
4 10
dtype: int64
<class 'pandas.core.series.Series'>
Convert Pandas Series to Python list
[2, 4, 6, 8, 10]
<class 'list'>

#3. Write a Pandas program to convert a dictionary to a Pandas series.


Sample
#dictionary: d1 = {'a': 100, 'b': 200, 'c':300, 'd':400, 'e':800}

d1 = {'a': 100, 'b': 200, 'c':300, 'd':400, 'e':800}


print("Original dictionary:")
print(d1)
new_series = pd.Series(d1)
print("Converted series:")
print(new_series)

Original dictionary:
{'a': 100, 'b': 200, 'c': 300, 'd': 400, 'e': 800}
Converted series:
a 100
b 200
c 300
d 400
e 800
dtype: int64

#4. Write a Pandas program to convert a NumPy array to a Pandas


series.

np_array = np.array([10, 20, 30, 40, 50])


print("NumPy array:")
print(np_array)
new_series = pd.Series(np_array)
print("Converted Pandas series:")
print(new_series)

NumPy array:
[10 20 30 40 50]
Converted Pandas series:
0 10
1 20
2 30
3 40
4 50
dtype: int32

#5. Write a Pandas program to change the data type of given a column
or a Series

s1 = pd.Series(['100', '200', 'python', '300.12',


'400'])
print("Original Data Series:")
print(s1)
print("Change the said data type to numeric:")
s2 = pd.to_numeric(s1, errors='coerce')
print(s2)

Original Data Series:


0 100
1 200
2 python
3 300.12
4 400
dtype: object
Change the said data type to numeric:
0 100.00
1 200.00
2 NaN
3 300.12
4 400.00
dtype: float64

#6. Write a Pandas program to convert the first column of a Data frame
as a Series.

d = {'col1': [1, 2, 3, 4, 7, 11],


'col2': [4, 5, 6, 9, 5, 0],
'col3': [7, 5, 8, 12, 1,11]}

df = pd.DataFrame(data=d)
print("Original DataFrame")
print(df)
s1 = df.ix[:,0]
print("\n1st column as a Series:")

print(s1)
print(type(s1))

Original DataFrame
col1 col2 col3
0 1 4 7
1 2 5 5
2 3 6 8
3 4 9 12
4 7 5 1
5 11 0 11

----------------------------------------------------------------------
-----
AttributeError Traceback (most recent call
last)
Input In [11], in <cell line: 10>()
8 print("Original DataFrame")
9 print(df)
---> 10 s1 = df.ix[:,0]
11 print("\n1st column as a Series:")
13 print(s1)

File ~\anaconda3\lib\site-packages\pandas\core\generic.py:5575, in
NDFrame.__getattr__(self, name)
5568 if (
5569 name not in self._internal_names_set
5570 and name not in self._metadata
5571 and name not in self._accessors
5572 and
self._info_axis._can_hold_identifiers_and_holds_name(name)
5573 ):
5574 return self[name]
-> 5575 return object.__getattribute__(self, name)

AttributeError: 'DataFrame' object has no attribute 'ix'

#7. Write a Pandas program to join the two given data frames along
rows and assign all data.

student_data1 = pd.DataFrame({
'student_id': ['S1', 'S2', 'S3', 'S4', 'S5'],
'name': ['Danniella Fenton', 'Ryder Storey',
'Bryce Jensen', 'Ed Bernal', 'Kwame Morin'],
'marks': [200, 210, 190, 222, 199]})

student_data2 = pd.DataFrame({
'student_id': ['S4', 'S5', 'S6', 'S7', 'S8'],
'name': ['Scarlette Fisher', 'Carla Williamson',
'Dante Morse', 'Kaiser William', 'Madeeha Preston'],
'marks': [201, 200, 198, 219, 201]})

print("Original DataFrames:")
print(student_data1)
print("-------------------------------------")

print(student_data2)
print("\nJoin the said two dataframes along rows:")
result_data = pd.concat([student_data1, student_data2])
print(result_data)

Original DataFrames:
student_id name marks
0 S1 Danniella Fenton 200
1 S2 Ryder Storey 210
2 S3 Bryce Jensen 190
3 S4 Ed Bernal 222
4 S5 Kwame Morin 199
-------------------------------------
student_id name marks
0 S4 Scarlette Fisher 201
1 S5 Carla Williamson 200
2 S6 Dante Morse 198
3 S7 Kaiser William 219
4 S8 Madeeha Preston 201

Join the said two dataframes along rows:


student_id name marks
0 S1 Danniella Fenton 200
1 S2 Ryder Storey 210
2 S3 Bryce Jensen 190
3 S4 Ed Bernal 222
4 S5 Kwame Morin 199
0 S4 Scarlette Fisher 201
1 S5 Carla Williamson 200
2 S6 Dante Morse 198
3 S7 Kaiser William 219
4 S8 Madeeha Preston 201

#8. Filter a specific rows/columns from the data frame

x = pd.DataFrame({"col1" : np.arange(1,20,2)}, index=[9,8,7,6,0, 1, 2,


3, 4, 5])
x.iloc[0:5]
x.loc[0:5]

col1
0 9
1 11
2 13
3 15
4 17
5 19

#9. Create a custom data frame and apply logical, query and filter
methods on the same

df= pd.DataFrame({
'name':['Jane','John','Ashley','Mike','Emily','Jack','Catlin'],
'ctg':['A','A','C','B','B','C','B'],
'val':np.random.random(7).round(2),
'val2':np.random.randint(1,10, size=7)
})

print(df)

name ctg val val2


0 Jane A 0.61 7
1 John A 0.03 7
2 Ashley C 0.43 8
3 Mike B 0.62 8
4 Emily B 0.30 3
5 Jack C 0.63 2
6 Catlin B 0.82 8

#11. Calculate the mean, median and mode for the following data frame.
dataMatrix = {"D1":[135, 137, 136, 138, 138],
"D2":[43, 42, 42, 42, 42],
"D3":[72, 73, 72, 72, 73],
"D4":[100, 102, 100, 103, 104] };
dataFrame = pd.DataFrame(data=dataMatrix);
print("DataFrame:");

print("Mean:Computed column-wise:");
meanData = dataFrame.mean();
print(meanData);

print("Mean:Computed row-wise:");
meanData = dataFrame.mean(axis=1);
print(meanData);

print("Median:Computed column-wise:");
medianData = dataFrame.median();

print(medianData);

print("Median:Computed row-wise:");
medianData = dataFrame.median(axis=1);
print(medianData);

print("Mode:Computed column-wise:");
modeData = dataFrame.mode();
print(modeData);

print("Mode:Computed row-wise:");
modeData = dataFrame.mode(axis=1);
print(modeData);

DataFrame:
Mean:Computed column-wise:
D1 136.8
D2 42.2
D3 72.4
D4 101.8
dtype: float64
Mean:Computed row-wise:
0 87.50
1 88.50
2 87.50
3 88.75
4 89.25
dtype: float64
Median:Computed column-wise:
D1 137.0
D2 42.0
D3 72.0
D4 102.0
dtype: float64
Median:Computed row-wise:
0 86.0
1 87.5
2 86.0
3 87.5
4 88.5
dtype: float64
Mode:Computed column-wise:
D1 D2 D3 D4
0 138 42 72 100
Mode:Computed row-wise:
0 1 2 3
0 43 72 100 135
1 42 73 102 137
2 42 72 100 136
3 42 72 103 138
4 42 73 104 138

#Intermediate Level:

# 1. Read the CSV from the given URL with and without header.
url =r"C:\Users\quite\Downloads\Iris.csv"
# Read without header (assuming no header in the CSV)
df1 = pd.read_csv(url, header=None)
print(df1)

0 1 2 3 4 \
0 Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm
1 1 5.1 3.5 1.4 0.2
2 2 4.9 3.0 1.4 0.2
3 3 4.7 3.2 1.3 0.2
4 4 4.6 3.1 1.5 0.2
.. ... ... ... ... ...
146 146 6.7 3.0 5.2 2.3
147 147 6.3 2.5 5.0 1.9
148 148 6.5 3.0 5.2 2.0
149 149 6.2 3.4 5.4 2.3
150 150 5.9 3.0 5.1 1.8

5
0 Species
1 Iris-setosa
2 Iris-setosa
3 Iris-setosa
4 Iris-setosa
.. ...
146 Iris-virginica
147 Iris-virginica
148 Iris-virginica
149 Iris-virginica
150 Iris-virginica
[151 rows x 6 columns]

import pandas as pd

# 1. Read the CSV from the given URL with and without header.
url = r"C:\Users\quite\Downloads\Iris.csv"
# Read with header (assuming the first row contains column names)
df = pd.read_csv(url)
print(df)

Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm \


0 1 5.1 3.5 1.4 0.2
1 2 4.9 3.0 1.4 0.2
2 3 4.7 3.2 1.3 0.2
3 4 4.6 3.1 1.5 0.2
4 5 5.0 3.6 1.4 0.2
.. ... ... ... ... ...
145 146 6.7 3.0 5.2 2.3
146 147 6.3 2.5 5.0 1.9
147 148 6.5 3.0 5.2 2.0
148 149 6.2 3.4 5.4 2.3
149 150 5.9 3.0 5.1 1.8

Species
0 Iris-setosa
1 Iris-setosa
2 Iris-setosa
3 Iris-setosa
4 Iris-setosa
.. ...
145 Iris-virginica
146 Iris-virginica
147 Iris-virginica
148 Iris-virginica
149 Iris-virginica

[150 rows x 6 columns]

# 2. Change the order of the columns (example, changing the order of


the first two columns)
df_reorder= df[['SepalWidthCm', 'SepalLengthCm', 'PetalLengthCm',
'PetalWidthCm', 'Species']]
print(df_reorder)

SepalWidthCm SepalLengthCm PetalLengthCm PetalWidthCm


Species
0 3.5 5.1 1.4 0.2
Iris-setosa
1 3.0 4.9 1.4 0.2
Iris-setosa
2 3.2 4.7 1.3 0.2
Iris-setosa
3 3.1 4.6 1.5 0.2
Iris-setosa
4 3.6 5.0 1.4 0.2
Iris-setosa
.. ... ... ... ...
...
145 3.0 6.7 5.2 2.3 Iris-
virginica
146 2.5 6.3 5.0 1.9 Iris-
virginica
147 3.0 6.5 5.2 2.0 Iris-
virginica
148 3.4 6.2 5.4 2.3 Iris-
virginica
149 3.0 5.9 5.1 1.8 Iris-
virginica

[150 rows x 5 columns]

# 3. Read data from "1.csv" file and measure mean, mode, and standard
deviation
data_from_file = pd.read_csv(r"C:\Users\quite\Downloads\Iris.csv")
mean = data_from_file.mean()
mode = data_from_file.mode().iloc[0] # Mode can have multiple values,
taking the first
std_dev = data_from_file.std()
print("mean:", mean)
print("---------------------------------------------------------------
-----------")
print("mode:", mode)
print("---------------------------------------------------------------
-----------")
print("std_dev:", std_dev)

mean: Id 75.500000
SepalLengthCm 5.843333
SepalWidthCm 3.054000
PetalLengthCm 3.758667
PetalWidthCm 1.198667
dtype: float64
----------------------------------------------------------------------
----
mode: Id 1
SepalLengthCm 5.0
SepalWidthCm 3.0
PetalLengthCm 1.5
PetalWidthCm 0.2
Species Iris-setosa
Name: 0, dtype: object
----------------------------------------------------------------------
----
std_dev: Id 43.445368
SepalLengthCm 0.828066
SepalWidthCm 0.433594
PetalLengthCm 1.764420
PetalWidthCm 0.763161
dtype: float64

C:\Users\quite\AppData\Local\Temp\ipykernel_7512\1955642370.py:3:
FutureWarning: Dropping of nuisance columns in DataFrame reductions
(with 'numeric_only=None') is deprecated; in a future version this
will raise TypeError. Select only valid columns before calling the
reduction.
mean = data_from_file.mean()
C:\Users\quite\AppData\Local\Temp\ipykernel_7512\1955642370.py:5:
FutureWarning: Dropping of nuisance columns in DataFrame reductions
(with 'numeric_only=None') is deprecated; in a future version this
will raise TypeError. Select only valid columns before calling the
reduction.
std_dev = data_from_file.std()

# 4. Read and display the first three rows


df.head(3)

Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm


Species
0 1 5.1 3.5 1.4 0.2 Iris-
setosa
1 2 4.9 3.0 1.4 0.2 Iris-
setosa
2 3 4.7 3.2 1.3 0.2 Iris-
setosa

# 5. Read and display the first n samples


n = 5
first_n_samples = df.head(n)
print(first_n_samples)

Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm


Species
0 1 5.1 3.5 1.4 0.2 Iris-
setosa
1 2 4.9 3.0 1.4 0.2 Iris-
setosa
2 3 4.7 3.2 1.3 0.2 Iris-
setosa
3 4 4.6 3.1 1.5 0.2 Iris-
setosa
4 5 5.0 3.6 1.4 0.2 Iris-
setosa

# 6. Display the number of columns and their names

num_columns = len(df.columns)
column_names = df.columns.tolist()
print("number of columns:",num_columns )
print("column names:",column_names)

number of columns: 6
column names: ['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm',
'PetalWidthCm', 'Species']

# 7. Display columns using column slicing (n to m)


n = 2 # Start column index
m = 4 # End column index (inclusive)
columns_n_to_m = df.iloc[:, n:m+1]

print(columns_n_to_m)

SepalWidthCm PetalLengthCm PetalWidthCm


0 3.5 1.4 0.2
1 3.0 1.4 0.2
2 3.2 1.3 0.2
3 3.1 1.5 0.2
4 3.6 1.4 0.2
.. ... ... ...
145 3.0 5.2 2.3
146 2.5 5.0 1.9
147 3.0 5.2 2.0
148 3.4 5.4 2.3
149 3.0 5.1 1.8

[150 rows x 3 columns]

# 8. Display rows using row slicing (14th to 34th rows)


rows = df.iloc[13:34]
print(rows)

Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm


Species
13 14 4.3 3.0 1.1 0.1
Iris-setosa
14 15 5.8 4.0 1.2 0.2
Iris-setosa
15 16 5.7 4.4 1.5 0.4
Iris-setosa
16 17 5.4 3.9 1.3 0.4
Iris-setosa
17 18 5.1 3.5 1.4 0.3
Iris-setosa
18 19 5.7 3.8 1.7 0.3
Iris-setosa
19 20 5.1 3.8 1.5 0.3
Iris-setosa
20 21 5.4 3.4 1.7 0.2
Iris-setosa
21 22 5.1 3.7 1.5 0.4
Iris-setosa
22 23 4.6 3.6 1.0 0.2
Iris-setosa
23 24 5.1 3.3 1.7 0.5
Iris-setosa
24 25 4.8 3.4 1.9 0.2
Iris-setosa
25 26 5.0 3.0 1.6 0.2
Iris-setosa
26 27 5.0 3.4 1.6 0.4
Iris-setosa
27 28 5.2 3.5 1.5 0.2
Iris-setosa
28 29 5.2 3.4 1.4 0.2
Iris-setosa
29 30 4.7 3.2 1.6 0.2
Iris-setosa
30 31 4.8 3.1 1.6 0.2
Iris-setosa
31 32 5.4 3.4 1.5 0.4
Iris-setosa
32 33 5.2 4.1 1.5 0.1
Iris-setosa
33 34 5.5 4.2 1.4 0.2
Iris-setosa

# 9. Display specific rows and columns using "iloc" and "loc"


functions
specific_rows_columns = df.loc[10:15, ['SepalLengthCm',
'PetalLengthCm']]
print(specific_rows_columns)

SepalLengthCm PetalLengthCm
10 5.4 1.5
11 4.8 1.6
12 4.8 1.4
13 4.3 1.1
14 5.8 1.2
15 5.7 1.5
# 10. Count unique values in the first column
unique_values_count = df['SepalLengthCm'].unique()
print(unique_values_count)

[5.1 4.9 4.7 4.6 5. 5.4 4.4 4.8 4.3 5.8 5.7 5.2 5.5 4.5 5.3 7. 6.4
6.9
6.5 6.3 6.6 5.9 6. 6.1 5.6 6.7 6.2 6.8 7.1 7.6 7.3 7.2 7.7 7.4 7.9]

# 11. Calculate mean, median, and mode for a specific column (e.g.,
'sepal_length')
column_name = 'SepalLengthCm'
mean= df[column_name].mean()
median= df[column_name].median()
mode= df[column_name].mode().iloc[0]
print("mean:", mean)
print("median:", median)
print("mode:", mode)

mean: 5.843333333333335
median: 5.8
mode: 5.0

You might also like

pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy