Applied Data Science with Python - Part 1 - Python Refresher
Applied Data Science with Python
https://www.coursera.org/learn/python-data-analysis
The course covers various data analysis techniques, such as cleaning and manipulating data, running statistical analyses, and using functions like groupby, merge, and pivot tables. These skills are valuable in many industries and can help you make data-driven decisions.
Python Refresher
Datatypes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
print(type("hello")) # <class 'str'>
print(type(None)) # <class 'NoneType'>
print(type(1)) # <class 'int'>
print(type(1.0)) # <class 'float'>
# Python functions with default arguments
def add(x, y, z = 1, kind = 'subtract'):
if kind == 'add':
return x+y+z
else:
return x-y-z
print(type(add)) # <class 'function'>
# dictionary
x = {'a': 1, 'b': 2}
print(type(x)) # <class 'dict'>
# iterate over dictionary
for key, value in x.items():
print(key, value)
print(x['a']) # # get value of key 'a'
print(x.get('c', 3)) # get value of key 'c' or return 3 if key 'c' does not exist
x['c']=3 # add key 'c' with value 3
print(x) # {'a': 1, 'b': 2, 'c': 3}
# list
y = [1, 2, 3]
print(type(y)) # <class 'list'>
y.append(4)
print(y) # 1, 2, 3, 4]
# tuple
z = (1, 2, 3)
print(type(z)) # <class 'tuple'>
print(z) # (1, 2, 3)
# unpacking tuple into variables
a, b, c = z
print(a, b, c)
List and String
1
2
3
4
5
6
7
8
9
10
print([1, 2] + [3, 4]) # concatenation
print([1, 2] * 3) # replication
print(1 in [1, 2, 3]) # membership
print([1, 2, 3][0]) # indexing
print([1, 2, 3][1:]) # slicing
print([1, 2, 3][::-1]) # slicing
x = 'This is a string'
print(x[0]) # indexing
print(x[1:]) # slicing
print(x[::-1]) # slicing to reverse the string
Output:
1
2
3
4
5
6
7
8
9
[1, 2, 3, 4]
[1, 2, 1, 2, 1, 2]
True
1
[2, 3]
[3, 2, 1]
T
his is a string
gnirts a si sihT
Looping
1
2
3
4
5
6
7
8
9
10
for item in y: # iterate through list
print(item)
for i, item in enumerate(y): # enumerate returns index and item
print(i, item)
i = 0
while i < len(y): # while loop
print(y[i])
i += 1
String
1
2
3
4
5
6
7
8
9
# string
firstname = 'Watsh'
lastname = 'Rajneesh'
print(firstname + ' ' + lastname) # concatenation
print(firstname * 3) # replication
print ('Watsh' in firstname) # membership
fullname = 'Watsh Ranjit Singh Rajneesh'
print(fullname.split(' ')[0]) # split string into list of words
print(fullname.split(' ')[-1])
Output:
1
2
3
4
5
Watsh Rajneesh
WatshWatshWatsh
True
Watsh
Rajneesh
Date and Time
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
import datetime as dt
import time as tm
print(tm.time()) # current time in seconds since epoch
dtnow = dt.datetime.fromtimestamp(tm.time())
print(dtnow)
print(dtnow.year, dtnow.month, dtnow.day, dtnow.hour, dtnow.minute, dtnow.second)
# timedelta - used to add or subtract time from a date
delta = dt.timedelta(days = 100)
print(delta)
today = dt.date.today()
print(today)
print(today - delta)
print(today > today - delta)
Output:
1
2
3
4
5
6
7
1713679451.155692
2024-04-21 01:04:11.155776
2024 4 21 1 4 11
100 days, 0:00:00
2024-04-21
2024-01-12
True
Object
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Python Objects and map()
class Person:
department = 'School of Information'
def set_name(self, new_name):
self.name = new_name
def set_location(self, new_location):
self.location = new_location
person = Person()
person.set_name('Watsh')
person.set_location('Austin')
print('{} lives in {} and works in the department {}'.format(person.name, person.location, person.department))
Output:
1
Watsh lives in Austin and works in the department School of Information
map()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
store1 = [10.00, 11.00, 12.34, 2.34]
store2 = [9.00, 11.10, 12.34, 2.01]
cheapest = map(min, store1, store2) # returns iterator
print(cheapest)
for item in cheapest: # iterate through iterator
print(item)
people = ['Dr. Christopher Brooks', 'Dr. Kevyn Collins-Thompson', 'Dr. VG Vinod Vydiswaran', 'Dr. Daniel Romero']
def split_title_and_name(person):
return person.split(' ')[0] + ' ' + person.split(' ')[-1]
list(map(split_title_and_name, people))
print(list(map(lambda person: person.split(' ')[0] + ' ' + person.split(' ')[-1], people)))
Output:
1
2
3
4
5
6
<map object at 0x103a79580>
9.0
11.0
12.34
2.01
['Dr. Brooks', 'Dr. Collins-Thompson', 'Dr. Vydiswaran', 'Dr. Romero']
Lambda functions
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
my_function = lambda a, b, c : a + b + c
print(my_function(1, 2, 3))
# list iteration
my_list = []
for number in range(0, 1000):
if number % 2 == 0:
my_list.append(number)
print(my_list)
# list comprehension - more concise way to write the above code
my_list = [number for number in range(0, 1000) if number % 2 == 0]
my_list
def times_tables():
lst = []
for i in range(10):
for j in range (10):
lst.append(i*j)
return lst
# alternative way to write the above function using list comprehension
times_tables() == [i*j for i in range(10) for j in range(10)]
lowercase = 'abcdefghijklmnopqrstuvwxyz'
digits = '0123456789'
answer = [a+b+c+d for a in lowercase for b in lowercase for c in digits for d in digits]
print(answer[:50])
Output:
1
2
3
6
[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 9848, ...., 850, 852, 854, 856, 858, 860, 862, 864, 866, 868, 870, 872, 874, 876, 878, 880, 882, 884, 886, 888, 890, 892, 894, 896, 898, 900, 902, 904, 906, 908, 910, 912, 914, 916, 918, 920, 922, 924, 926, 928, 930, 932, 934, 936, 938, 940, 942, 944, 946, 948, 950, 952, 954, 956, 958, 960, 962, 964, 966, 968, 970, 972, 974, 976, 978, 980, 982, 984, 986, 988, 990, 992, 994, 996, 998]
['aa00', 'aa01', 'aa02', 'aa03', 'aa04', 'aa05', 'aa06', 'aa07', 'aa08', 'aa09', 'aa10', 'aa11', 'aa12', 'aa13', 'aa14', 'aa15', 'aa16', 'aa17', 'aa18', 'aa19', 'aa20', 'aa21', 'aa22', 'aa23', 'aa24', 'aa25', 'aa26', 'aa27', 'aa28', 'aa29', 'aa30', 'aa31', 'aa32', 'aa33', 'aa34', 'aa35', 'aa36', 'aa37', 'aa38', 'aa39', 'aa40', 'aa41', 'aa42', 'aa43', 'aa44', 'aa45', 'aa46', 'aa47', 'aa48', 'aa49']
This post is licensed under CC BY 4.0 by the author.