Must Know | Classes | Functions |
---|---|---|
Collections | Itertools | Functools |
---|---|---|
String | Int | Set | Tuple |
---|---|---|---|
Conditional | For-Loop | Try-Except | Design | Ipython |
---|---|---|---|---|
Built-ins |
---|
Numpy | Pandas | Matplotlib (Pyplot) |
---|---|---|
Seaborn |
---|
[(i, j) for i in range(3) for j in range(3) if i > j]
# [(1, 0), (2, 0), (2, 1)]
li = [1, 2, 3]
li = [*map(lambda x: x * 10, li)]
#li = [10, 20, 30]
num1 = [100, 1, 20]
num2 = [19, 4, 94]
num3 = [40, 6, 30]
[*map(lambda x, y, z: max(x, y, z), num1, num2, num3)]
# [100, 6, 94]
names = ['Liam', 'Olivia', 'Noah', 'Emma', 'Oliver', 'Ava']
choice = filter(lambda x: x.startswith('O'), names)
print(*choice, sep=', ') # Olivia, Oliver
a = [1, 2, 3]
b = [4, 5, 6]
c = [*zip(a, b)] # [(1, 4), (2, 5), (3, 6)]
a, b = zip(*c) # a=(1, 2, 3), b=(4, 5, 6)
def example(a, *arg, b=0, **kwarg):
print(a) # 1
print(arg) # (2, 3)
print(b) # 1
print(kwarg) # {'x': 'a', 'y': [1, 2, 3]}
example(1, 2, 3, b=1, x='a', y=[1, 2, 3])
def func(greet, time, name):
print(greet, time, name)
func(*["Good", "Morning"], **{"name": "Jay"})
# Good Morning Jay
a, b, *_ = [1, 2, 3, 4, 5]
# 1, 2, [3, 4, 5]
first, *amid, last = map(lambda x: x**2, range(1, 10000))
first # 1
last # 99980001
sales = [("Pencil", 0.22, 1500), ("Notebook", 1.30, 550)]
for product, *_ in sales:
print(product)
# Pencil, Notebook
def compute(i):
return i, i ** 2, i ** 3, i ** 4, i ** 5
num, power, cube, *_ = compute(3)
power # 9
cube # 27
number = {"one": 1, "two": 2}
letter = {"a": "A", "b": "B"}
combine = {**number, **letter}
combine # {'one': 1, 'two': 2, 'a': 'A', 'b': 'B'}
def square_it(value):
for i in range(value):
yield i**2
li = square_it(10_000_000)
[i for i in li if i < 50] # [0, 1, 4, 9, 16, 25, 36, 49]
def count_decorator(count): # new decorator with argument
def decorator(orig_func):
def wrapper(*args, **kwargs):
print(f"func name: {orig_func.__name__}")
print(f"func args: {args}, {kwargs}")
for _ in range(count): # use the argument
orig_func(*args, **kwargs)
return wrapper
return decorator # return the original decorator
@count_decorator(2)
def greet(msg):
print(msg)
greet("hello")
# func name: greet
# func args: ('hello',), {}
# hello
# hello
@contextmanager
def enterFolder(folderName):
home = os.getcwd()
os.chdir(folderName)
yield
os.chdir(home)
with enterFolder('folder1'), open('example1.txt', 'w') as f:
f.write('file1')
class BinaryInt(str):
def __new__(cls, val):
return str.__new__(cls, f"{val: b}")
def __add__(self, val):
val += int(self, 2)
return f"{val:b}"
a = BinaryInt(2)
print(a) # 10
print(a + 4) # 110
class Meta(type):
def __new__(mtcls, name, bases, attrs):
if name != "Base" and "must_to_do" not in attrs:
raise TypeError("Bad Class: must_to_do() is needed")
return super().__new__(mtcls, name, bases, attrs)
class Base(metaclass=Meta):
def server_func(self):
return self.must_to_do()
class Derived(Base):
...
# TypeError: Bad Class: must_to_do() is needed
import concurrent.futures
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
futures = [executor.submit(load_url, url, 60) for url in URLS]
for future in concurrent.futures.as_completed(futures):
result = future.result()
print(len(result))
with concurrent.futures.ProcessPoolExecutor() as executor:
results = executor.map(load_url, URLS, [60] * len(URLS), chunksize=4)
for result in results:
print(len(result))
class Person:
def __init__(self, name):
self.name = name
def say(self):
return f"I'm {self.name}"
p = Person("Jay")
p.say() == Person.say(p) # True
class Employee:
num_emp = 0 # Class variable
def __init__(self, pay):
self.pay = pay # Instance variable
Employee.num_emp += 1
e1 = Employee(100)
e2 = Employee(200)
e1.num_emp # 2
Employee.num_emp # 2
e1.pay # 100
Employee.pay # AttributeError: type object 'Employee' has no attribute 'pay'
class Person:
def __init__(self, name, age):
self.name = name
self.age = age
@staticmethod
def splitPersonString(string, split_sign="-"):
return string.split(split_sign)
@classmethod
def fromString(cls, cls_str):
return cls(*cls.splitPersonString(cls_str, ", "))
p1 = Person.fromString("Jay, 99")
p1.name # Jay
p1.age # 99
class Dog:
_weight = 5 # private variable
def __bark(self): # name mangling fucntion
print("bark")
dog = Dog()
dog._weight # 5
dog.__bark() # AttributeError: 'Dog' object has no attribute '__bark'
dog._Dog__bark() # bark
class User:
def __init__(self, first_name, last_name, password):
self.first_name = first_name
self.last_name = last_name
self.password = password
@property
def fullname(self):
return f"{self.first_name} {self.last_name}"
@property
def password(self):
raise AttributeError("password is not readable.")
@password.setter
def password(self, passord):
from hashlib import md5
self.password_hash = md5(b"{password}").hexdigest()
user = User("Mimi", "Wang", "0000")
user.fullname # Mimi Wang
user.password_hash # 7fbccc9c3a9a5afef65563cd00404c1416
user.password # Attribute Error: password is not readable.
min([1, 2, 31]) # builtins min
min = "global min"
def outer():
# we can do "global min" here to change global
min = "enclosing min"
def inner():
# we can do "nonlocal min" here to change enclosing
min = "local min"
from abc import ABC, abstractmethod
class Base(ABC, object):
@property
@abstractmethod
def foo(self):
...
@abstractmethod
def do(self):
...
from dataclasses import InitVar, dataclass, field
from typing import List
@dataclass
class InventoryItem:
name: str
unit_price: float = field(default=0.0)
quantity_on_hand: int = field(default=0, repr=False)
parts: List[str] = field(default_factory=list)
parts_number: InitVar[int] = 0
def __post_init__(self, parts_number):
self.parts.extend([f"part{i}" for i in range(1, parts_number + 1)])
item = InventoryItem("product", parts_number=2)
# InventoryItem (name = 'product', unit_price=0.0, parts=['part1', 'part2'])
def getClass(x):
if x == 1:
for i in range(11):
class Example:
a = i
return Example
cls = getClass(1)
cls.b = "123"
print(cls.a, cls.b) # 10 123
def add_with_b(b):
def add(a):
return a + b
return add
add4 = add_with_b(4)
add4(3) # 7
add4(7) # 11
class Cat:
def __repr__(self):
return f"({self.name}: {self.age})"
listOfCats = []
attrs = [{"name": "meow1", "age": 5}, {"name": "meow2", "age": 10}]
for attr in attrs:
cat = Cat()
for key, val in attr.items():
setattr(cat, key, val)
listOfCats.append(cat)
print(listOfCats)
# [(meow1: 5), (meow2: 10)]
for i in range(100):
def say():
print(i)
def returnFunc(a):
if a < 100:
def mul(b):
print(a * b)
return mul
else:
def add(b):
print(a + b)
return add
from collections import defaultdict
d = defaultdict(list)
d["a"] = [1, 2, 3]
d["b"].append(4)
d["c"].extend([5, 6])
# defaultdict(<class 'list'>, {'a': [1, 2, 3], 'b': [4], 'c': [5, 6]})
from collections import OrderedDict
location = ["C", "B", "A"]
population = [32, 46, 12]
d = OrderedDict({l: p for l, p in zip(location, population)})
# OrderedDict([('C', 32), ('B', 46), ('A', 12)])
d["D"] = 44
# OrderedDict([('C', 32), ('B', 46), ('A', 12), ('D', 44)])
d.popitem(last=False)
# OrderedDict([('B', 46), ('A', 12), ('D', 44)])
d.move_to_end("D", last=False)
# OrderedDict ([( 'D', 44), ('B', 46), ('A', 12)])
from collections import Counter
c = Counter(cats=4, dogs=8)
# Counter({'dogs': 8, 'cats': 4})
c.update(birds=10)
# Counter({'birds': 10, 'dogs': 8, 'cats': 4})
c = c - Counter({"birds": 5})
# Counter({'dogs': 8, 'birds': 5, 'cats': 4})
c.most_common(2)
# [('dogs', 8), ('birds', 5)]
from collections import namedtuple
Dog = namedtuple("Dog", "name, age")
d1 = Dog("funny", 4)
features = ["happy", 3]
d2 = Dog._make(features)
# Dog(name='happy', age=3)
d2._asdict()
# OrderedDict([('name', 'happy'), ('age', 3)])
from collections import deque
li = [40, 30, 50, 46, 39, 44]
d = deque(li[:2])
# Let 's compute the moving average with range=3
d.appendleft(0)
s = sum(d)
for elem in li[2:]:
s += elem - d.popleft()
d.append(elem)
print(s / 3)
# 40, 42, 45, 43
from itertools import count
gen = count(2.5, 0.5)
for x in gen:
print(x)
# 2.5, 3.0, 3.5, 4.0, ... non-stop
from itertools import cycle
gen = cycle([1, 2, 3])
for x in gen:
print(x)
# 1, 2, 3, 1, 2, ... non-stop
from itertools import repeat
class Cat:
...
gen = repeat(Cat(), 2)
for cat in gen:
print(cat)
# <__main__.Cat object at 0x0000019AC1C5D348>
# <__main__.Cat object at 0x0000019AC1C5D348>
import operator
from itertools import accumulate
gen = accumulate([1, 2, 3, 4])
list(gen) # [1, 3, 6, 10]
gen = accumulate([1, 2, 3, 4], func=operator.mul)
list(gen) # [1, 2, 6, 24]
from itertools import chain
gen = chain([1, 2], [3, 4])
list(gen) # [1, 2, 3, 4]
gen = chain("AB", "CD")
list(gen) # [A, B, C, D]
from itertools import compress
gen = compress([1, 2, 3], [1, 0, 1])
gen = compress([1, 2, 3], [True, False, True]) # same
list(gen) # [1, 3]
from itertools import filterfalse
gen = filterfalse(lambda x: x%2 == 0, [1, 2, 3])
list(gen) # [1, 3]
from itertools import groupby
gen = groupby("AABBCCCAA") # default func = lambda x: x
for k, g in gen:
print(k, list(g))
# A [A, A]
# B [B, B]
# C [C, C, C]
# A [A, A]
gen = groupby([1, 2, 3, 4], lambda x: x // 3)
for k, g in gen:
print(k, list(g))
# 0 [1, 2]
# 1 [3, 4]
gen = groupby([("A", 100), ("B", 200), ("C", 600)], lambda x: x[1] > 500)
for k, g in gen:
print(k, list(g))
# False [(A, 100), (B, 200)]
# True [(C, 600)]
gen = islice([1, 2, 3], 2) # equals to A[:2]
list(gen) # [1, 2]
gen = islice("ABCD", 2, 4) # equals to A[2:4]
list(gen) # [C, D]
gen = islice("ABCD", 0, None, 2) # equals to A[::2]
list(gen) # [A, C]
from itertools import starmap
# with only one argument
gen = starmap(lambda x: x.lower(), "ABCD")
list(gen) # [a, b, c, d]
# with 2 arguments
gen = starmap(lambda x, y: x + y, [(1, 2), (3, 4)])
list(gen) # [3, 7]
# with different size of arugments
gen = starmap(lambda *keys: sum(keys) / len(keys), [[3, 8, 3], [4, 2]])
list(gen) # [4.6666667, 3.0]
from itertools import takewhile
gen = takewhile(lambda x: x < 2, [1, 2, 3, 2, 1])
list(gen) # [1]
gen = takewhile(lambda x: x.isupper(), "ABCdefgHIJ")
list(gen) # [A, B, C]
gen = dropwhile(lambda x: x < 2, [1, 2, 3, 2, 1])
list(gen) # [2, 3, 2, 1]
gen = dropwhile(lambda x: x.isupper(), "ABCdefgHIJ")
list(gen) # [d, e, f, g, H, I, J]
from itertools import zip_longest
gen = zip_longest("ABC", ("X", "Y"))
list(gen) # [('A', 'X'), ('B', 'Y'), ('C', None)]
gen = zip_longest("ABC", [1, 2], fillvalue=-1)
list(gen) # [('A', 1), ('B', 2), ('C', -1)]
from itertools import product
gen = product("AB", "CD")
list(gen) # [AC, AD, BC, BD]
gen = product("AB", repeat=2)
list(gen) # [AA, AB, BA, BB]
gen = product("AB", "CD", repeat=2)
list(gen)
# [ACAC, ACAD, ACBC, ACBD,
# ADAC, ADAD, ADBC, ADBD,
# BCAC, BCAD, BCBC, BCBD,
# BDAC, BDAD, BDBC, BDBD]
gen = permutations("ABC") # same as r=3
list(gen) # [ABC, ACB, BAC, BCA, CAB, CBA]
gen = permutations("ABC", r=2)
list(gen) # [AB, AC, BA, BC, CA, CB]
gen = permutations("ABC", r=1)
list(gen) # [A, B, C]
gen = combinations("ABC", 1)
list(gen)
# [A, B, C]
gen = combinations("ABC", 2)
list(gen)
# [AB, AC, BC]
gen = combinations("ABC", 3)
list(gen)
# [ABC]
gen = combinations_with_replacement("ABC", 1)
list(gen)
# [A, B, C]
gen = combinations_with_replacement("ABC", 2)
list(gen)
# [AA, AB, AC,
# BB, BC,
# CC]
gen = combinations_with_replacement("ABC", 3)
list(gen)
# [AAA, AAB, AAC, ABB, ABC, ACC,
# BBB, BBC, BCC,
# CCC]
from functools import reduce
reduce(lambda x, y: x - y, [1, 2, 3, 4, 5], 100) # 85
first_name = "Kain"
last_name = "Mccarthy"
print(f"Hi, I'm {first_name} {last_name}.") # Hi, I'm Kain Mccarthy.
pi = 3.14159265359
print(f"{pi:.2f}") # 3.14
d = {"name": "Shelly"}
print(f"She is {d['name']}") # She is Shelly
i = 1000000
print(f"{i:,}") # 1,000,000
# Ref:
# * https://youtu.be/nghuHvKLhJA
# * https://blog.louie.lu/2017/08/08/outdate-python-string-format-and-fstring/
a = 100_000_000
b = 10_000_000
c = 1_0_0
print(f"{a+b+c:,}") # 110,000,100
# Ref:
# * https://youtu.be/C-gEQdGVXbk&t=140
long_list = [i for i in range(100_000_000)]
long_set = set(long_list)
%%time
100_000_000 in long_list
# False
# Wall time: 1.26 s
%%time
100_000_000 in long_set
# False
# Wall time: 0 ns
# Ref:
# * https://stackoverflow.com/questions/2831212/python-sets-vs-lists/17945009
# * https://youtu.be/r3R3h5ly_8g?t=1010
a, b = 1, 2
a # 1
b # 2
a, b = b, a
a # 2
b # 1
# Ref:
# * https://youtu.be/VBokjWj_cEA?list=LL&t=445
if x < 1:
x += 1
else:
x -= 1
# equivalent to:
x = (x + 1) if (x < 1) else (x - 1)
# Ref:
# * https://www.youtube.com/watch?v=C-gEQdGVXbk&t=34s
arr = ["a", "b", "c"]
for index, element in enumerate(arr):
print(index, element)
# 0 a
# 1 b
# 2 c
for index, element in enumerate(arr, start=3):
print(index, element)
# 3 a
# 4 b
# 5 c
# Ref
# * https://youtu.be/VBokjWj_cEA?list=LL&t=190
for text in "to be or not to be".split():
if text.strip().startswith("o"):
print(f"Found it! `{text}`")
break
else:
print("Not found")
# Found it! `or`
# Ref:
# * https://www.youtube.com/watch?v=Dh-0lAyc3Bc
try:
print(1/1)
except Exception as e:
print(e)
else:
print("Safe") # executed when except didn't happen
finally:
print("Done") # Always executed
# 1.0
# Safe
# Done
# Ref:
# * https://youtu.be/VBokjWj_cEA?list=LL&t=1331
def func(a: str, b: int = 3) -> str:
return a*b
func.__annotations__ # {'a': <class 'str'>, 'b': <class 'int'>, 'return': <class 'str'>}
func("hi") # hihihi
func("hi", 5) # hihihihihi
def func(a: "str longer than 5", b: 1+2 = 3) -> "str longer b times":
return a*b
func.__annotations__ # {'a': 'str longer than 5', 'b': 3, 'return': 'str longer b times'}
func("hi") # hihihi
func("ohayou", 2) # ohayouohayou
from typing import Any, Dict, Iterable, List, Union
def func(a: List[int], b: Union[str, int], c: Dict[str, int], d: Iterable, e: Any):
print(len(a))
print(f"{b} can be str or int.")
print(f"{c['something']} will return int.")
for i in d:
print(i)
print(f"{type(e)} can be any type.")
# Ref:
# * https://myapollo.com.tw/zh-tw/python-typing-module/
# Style 1
def my_abstract_method(self):
pass
# Style 2
def my_abstract_method(self):
...
# Style 3
def my_abstract_method(self):
"""
This function is ...
"""
# Ref:
# * https://stackoverflow.com/questions/55274977/when-is-the-usage-of-the-python-ellipsis-to-be-preferred-over-pass
# * https://stackoverflow.com/questions/772124/what-does-the-ellipsis-object-do
#%%
1+1
# 2
# Ref:
# * https://code.visualstudio.com/docs/python/jupyter-support-py
%time sleep(0.3)
# Wall time: 310 ms
%timeit sleep(0.3)
# 311 ms Β± 2.06 ms per loop (mean Β± std. dev. of 7 runs, 1 loop each)
%%time
for i in range(10):
sleep(0.1)
# Wall time: 1.09 s
%%timeit
for i in range(10):
sleep(0.1)
# 1.09 s Β± 2.07 ms per loop (mean Β± std. dev. of 7 runs, 1 loop each)
- https://stackoverflow.com/questions/17579357/time-time-vs-timeit-timeit
- https://blog.csdn.net/shuibuzhaodeshiren/article/details/86650688
!pip install -U memory_profiler
%load_ext memory_profiler
%memit [i for i in range(1000)]
# peak memory: 51.31 MiB, increment: 0.36 MiB
%%memit
l = []
for x in range(10000):
l.append(x*2)
# peak memory: 52.76 MiB, increment: 0.70 MiB
- https://pypi.org/project/memory-profiler/
- https://ipython-books.github.io/44-profiling-the-memory-usage-of-your-code-with-memory_profiler/
sub_folder = Path("subfolder/subfolder")
sub_folder.mkdir(parents=True, exist_ok=True)
file_ = sub_folder / Path("test.txt")
file_.touch()
file_.write_text("Hello")
file_.read_text()
file_.unlink()
Path("subfolder/subfolder").rmdir()
np.array([[1, 2], [3, 4], [5, 6]]) # create from list
np.zeros((3, 3)) # create filled with 0's
np.ones((2, 4, 4)) # create filled with 1's
np.empty((5, 2)) # create with speed
np.arange(2, 10, 3) # create array from range (start, end, step_size)
np.linspace(5, 50, 20) # create a linear space (start, end, num_elements)
# create from random generator
rng = np.random.default_rng(seed=42)
rng.random((2, 4))
rng.normal(3, 2.5, size=(2, 4)) # sample from N(3, 6.25)
rng.integers(low=2, high=10, size=(10, 2)) # random integer matrix
np.sort(a, axis=None)
np.sort(a, axis=-1)[::-1]
a.sort()
a[::-1].sort()
np.concatenate((a, b), axis=None)
np.concatenate((a, b), axis=2)
a = np.arange(5) # [0, 1, 2, 3, 4]
b = np.ones(5, dtype=int) # [1, 1, 1, 1, 1]
a + b # [1 2 3 4 5]
a - b # [-1 0 1 2 3]
a ^ 2 # [ 0 1 4 9 16]
a * 10 # [ 0 10 20 30 40]
a > 2 # [False False False True True]
np.sqrt(a) # [0. , 1. , 1.41421356, 1.73205081, 2. ]
a*b # [0 1 2 3 4]
a@b # 10
A = np.random.default_rng(42).random((2, 4))
# [[0.77395605, 0.43887844, 0.85859792, 0.69736803],
# [0.09417735, 0.97562235, 0.7611397 , 0.78606431]])
A.max() # 0.97562235
A.max(axis=0) # [0.77395605, 0.97562235, 0.85859792, 0.78606431]
A.max(axis=1) # [0.85859792, 0.97562235]
A.mean() # 0.6732255180088094
A.mean(axis=0) # [0.4340667 , 0.7072504 , 0.80986881, 0.74171617]
A.mean(axis=1) # [0.69220011, 0.65425093]
# Index and slicing arrays
x[1, 3] == x[1][3]
y[1:5:2, ::3]
# Indexing arrays
x[np.array([0, 1, 2, -1, -2])]
y[np.array([1, 2, 3]), 1:4:2]
y[np.array([1, 2]), np.array([-1, -1])]
# Masking arrays
x[x>5]
x[(x%2==0) | (x>7)]
y[[True]*3 + [False] + [True] + [False], 2::2]
# Ellipsis syntax
x[-1, ..., 3] # same as x[-1, :, 3]
x[:3, ...] # same as x[0:3, :, :] and x[0:3] and x[:3]
x[::2, ..., np.array([0, 2])] # same as x[0:5:2, :, np.array([0, 2])]
A = np.array([[[1, 2, 3], [4, 5, 6]], [[4, 6, 8], [2, 1, 6]]])
A.shape # (2, 2, 3)
A = A.reshape(3, 2, 2) # (3, 2, 2)
A = A[np.newaxis, ...] # (1, 3, 2, 2)
A = np.expand_dims(A, axis=4) # (1, 3, 2, 2, 1)
A = A.flatten() # (12,)
A = A.reshape(2, -1, 2) # (2, 3, 2)
# shallow copy: values will change on every variable
a = np.arange(10).reshape(5, 2)
b = a.view()
c = a.reshape(-1)
d = a[:3, :1]
# deep copy: copy and create an entirely new array
a = np.arange(10000000)
b = a[:100].copy()
del a
# scalar broadcasting
a = np.array([1, 2, 3])
a * 3 # [3, 6, 9]
# general broadcasting
a = np.ones( (8, 1, 6, 1))
b = np.zeros( (7, 1, 5))
(a*b).shape # 8, 7, 6, 5
# outer product
a = np.arange(4)[:, np.newaxis] # (4, 1)
b = np.array([1, 2, 3]) # (3,)
a + b # (4, 3)
# [0] + [1, 2, 3] = [1 2 3]
# [1] [2 3 4]
# [2] [3 4 5]
# [3] [4 5 6]
# Create Series
pd.Series([1, 2, 3, 4, 5])
pd.Series(np.arange(1, 6), index=list("abcde"))
pd.Series({"a": 100, "b": 50, "c": 120})
pd.Series("hi", index=list("12345"))
# Create DataFrame
pd.DataFrame({
"col_1": [1, 2, 3, 4, 5],
"col_2": np.arange(1, 6),
"col_3": pd.Series(np.arange(1, 7), index=list("abc123")),
}, index=list("abcde"))
pd.DataFrame(
[
{"a": 1, "b": 2},
{"b": 10, "c": 5},
{"a": 55, "b": 489, "c": 32, "d": 590},
],
index=["first", "second", "third"],
columns=list("ab")
)
pd.DataFrame(
np.arange(10).reshape(2, 5),
# [[0,1,2,3,4], [5,6,7,8,9]]
index=pd.date_range("20200101", periods=2),
columns=list("abcde"))
# Viewing
df.head(2)
df.tail(3)
df.index
df.columns
df.to_numpy()
df.sort_index()
df.sort_values("col_name")
Single Column | Multiple Columns | Continuous Columns | All Columns | |
---|---|---|---|---|
Single Row | df.loc[row, column] or df.at[row, column] |
df.loc[row, [column, column]] |
df.loc[row, column:column] |
df.loc[row] |
Multiple Rows | df.loc[[row, row], column] |
df.loc[[row, row], [column, column]] |
df.loc[[row, row], column:column] |
df.loc[[row, row]] |
Continuous Rows | df.loc[row:row, column] |
df.loc[row:row, [column, column]] |
df.loc[row:row, column:column] |
df[row:row] |
All Rows | df[column] |
df[[column, column]] or df.loc[:, [column, column]] |
df.loc[:, column:column] |
df |
df["col1"]
df[["col1", "col2"]]
df["row1":"row5"]
df.loc["row1", "col1"] # df.iloc[0, 0]
df.at["row1", "col1"] # df.iat[0, 0]
df.loc["row1", ["col1", "col2"]] # df.iloc[0, [0, 1]]
df.loc["row1", "col1":"col5"] # df.iloc[0, 0:4]
df.loc[["row1", "row2"]] # df.iloc[[0, 1]]
df.loc["row1":"row5", "col1"] # df.iloc[0:4, 0]
df[(df["col1"] > 18)]
df[(df > 6) & (df < 25)]
df[df["col1"].isin([10, 15, 0])]
df.iloc
is same asdf.loc
but using position.df.iat
is same asdf.at
but using position.- Details π₯
# Modify columns
df["col1"] += 10
df.loc[:, "col1"] = "bar"
df.loc[:, ["col1", "col3"]] = np.arange(12).reshape(6, 2)
# Modify single element
df.loc["row1", "col1"] = 0
df.iloc[0, 0] = 1
# Modify by boolean indexing
df[df < 100] = -df
# Append
df["total"] = df.sum(axis=1).to_numpy()
df["gt"] = df["total"] > 50000
df["foo"] = "bar"
# Insert
df.insert(0, "col0", df["col2"][:2]) # col_index, col_name, values
# Delete column
del df["total"]
df.drop(columns=["foo"], inplace=True) # same as `df.drop(["foo"], axis=1)`
gt50000 = df.pop("gt50000")
# Delete row
df.drop(["e", "d"], inplace=True)
# Handle NaN
miss_df.dropna(how='any')
miss_df.fillna(value=10000000)
# Arithmetic
df + df2
df - df.iloc[0]
1 / df
# Numpy
np.sqrt(df)
np.max(df, axis=1)
# Built-in
df.mean()
df.max(axis=1)
# Apply
df.apply(np.cumsum, axis=1)
df.apply(lambda x: x.sum() / x.size) # x means df
# Series
s.value_counts()
s.str.upper()
s.str.split("-").str.get(0)
# Concat rows
pd.concat([df[:3], df.iloc[7:, :2]])
# Merge two DataFrame
pd.merge(df, df2, on="name", how="right")
# Groupby
df.groupby("col_A").sum()
df.groupby(["col_A", "col_B"]).max()
# Categorical - discrete
df["grade"] = df["grade"].astype("category")
df["grade"].cat.categories = ["Bad", "Good", "Excellent"]
df.sort_values(by="grade")
df.groupby("grade").size()
# Categorical - continuous
df["grade-labels"] = pd.cut(df["score"], bins=range(0, 120, 20), labels=list("EDCBA"))
# Rename Columns
df.columns = ["col_one", "col_two"]
df = df.add_prefix("Xx_")
df = df.add_suffix("_xX")
df.columns = df.columns.str.replace("Xx", "Oo")
df.columns = df.columns.str.replace("xX", "oO")
# Reverse Row or Column Order
df.loc[::-1].reset_index(drop=True) # reverse rows
df.loc[:, ::-1] # reverse columns
# Split DataFrame into 2 random subsets
sub1 = df.sample(frac=0.75, random_state=42)
sub2 = df.drop(sub1.index)
sub1.index = sub1.index.sort_values()
sub2.index = sub2.index.sort_values()
# Filter by Category (or Largest Category)
df[df.genre.isin(["A", "D"])]
df[~df.genre.isin(["A", "D"])]
df[df.genre.isin(df.genre.value_counts().nlargest(1).index)]
# Split String into Multiple Columns
df[["first", "last"]] = df["name"].str.split(' ', expand=True)
df["city"] = df["location"].str.split(", ", expand=True)[0]
# Change Display Options (Not Change Data)
pd.set_option("display.float_format", "${:.2f}".format)
pd.reset_option("display.float_format")
# Style a DataFrame
style = {"Date": "{:%Y/%m/%d}", "Value": "${:d}", "Volume": "{:,}"}
df.style.format(style) \
.hide_index() \
.highlight_max("Value", color="red") \
.highlight_min("Value", color="green") \
.bar("Area", color="orange", align="zero") \
.background_gradient(subset="Volume", cmap="Greens") \
.set_caption("Random Chart")
import matplotlib.pyplot as plt
# with this magic function, we can skip `plt.show()`
%matplotlib inline
plt.plot(np.sin(np.linspace(0, 10, 100)), "*-b", lw=2, markersize=5, label="sin(x)")
plt.plot(np.log(np.arange(100)), c="g", ls="--", marker=".", lw=2, markersize=5, label="log(x)")
plt.xlabel("X here")
plt.ylabel("Y here")
plt.title("sin(x) and log(x)")
plt.grid()
plt.legend()
plt.text(x=70, y=-1, s="hahahaha")
plt.annotate("wow \nmax", xy=(16, 1), xytext=(40, 0.9), arrowprops={"facecolor": "orange", "shrink": 0.05})
plt.annotate("wow \nmax again", xy=(78, 1), xytext=(95, 0.9), arrowprops={"facecolor": "red", "shrink": 0.05})
# Object-oriented style
fig1, ax = plt.subplots()
ax.plot(...)
fig2, axs = plt.subplots(2, 1)
axs[0].plot(...)
axs[1].plot(...)
# Pyplot style
plt.figure(1)
plt.title("Figure 1")
plt.figure(2)
plt.subplot(311)
plt.title("Figure 2")
plt.subplot(323)
plt.subplot(324)
plt.subplot(337)
plt.subplot(338)
plt.subplot(339)
years = [1.1, 1.3, 1.5, 2.0, 2.2, ...]
salary = [39343.00, 46205.00, 37731.00, 43525.00, 39891.00, ...]
salary_mean = np.mean(salary)
# Line Plots
plt.plot(years,
salary,
marker="o",
markersize=5,
lw=2,
ls="-",
)
# Filling Areas
plt.fill_between(years,
salary,
salary_mean,
where=(salary > salary_mean),
alpha=.4,
color="green",
edgecolor="black",
interpolate=True,
label="On Average"
)
import matplotlib.dates as mdates
dates = np.arange(np.datetime64("2021-01-01"), np.datetime64("2021-01-22"))
prices = np.random.default_rng(42).normal(500, 30, len(dates))
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter("%a, %d %m"))
plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=7))
plt.gca().xaxis.set_minor_locator(mdates.DayLocator())
plt.plot_date(dates, prices,
ls="solid",
c="orange",
marker="^",
markersize=10)
plt.grid()
plt.tight_layout()
temperature = [14.2, 16.4, 11.9, 15.2, ...]
ice_cream_sales = [215, 325, 185, 332, ...]
colors = np.array(ice_cream_sales) / np.linalg.norm(ice_cream_sales)
plt.scatter(temperature, ice_cream_sales,
s=ice_cream_sales, # set the size according to the prices of the ice cream
c=colors, # set the colors according to the prices of the ice cream
cmap="Greens", # preferred color type
edgecolor="black", # the edge color of points
lw=0.5, # the edge width of points
alpha=.75,
)
plt.xlabel("temperature")
plt.ylabel("ice cream price")
plt.yscale("log") # use log scale on y-axis to handle outliners
cbar = plt.colorbar()
cbar.set_label("Expensive")
plt.tight_layout()
# Bar Charts
ages = [25, 26, 27, 28, 29, ...]
salary_all = [38496, 42000, 46752, 49320, 53200, ...]
index = np.arange(len(ages))
width = 0.25
plt.bar(index - width, salary_all, width=0.25, label="All Devs")
plt.bar(index, salary_py, width=0.25, label="Python")
plt.bar(index + width, salary_js, width=0.25, label="JavaScript")
plt.xticks(ticks=index, labels=ages)
plt.title("Median Salary (USD) by Age")
plt.xlabel("Ages")
plt.ylabel("Median Salary (USD)")
plt.legend()
plt.tight_layout()
# Horizontal Bar Charts
language = ['JavaScript', 'HTML/CSS', 'SQL', 'Python', ...]
popularity = [59219, 55466, 47544, 36443, ...]
plt.barh(language, popularity)
plt.title("Most Popular Languages")
plt.xlabel("Number of People Who Use")
plt.tight_layout()
grade = ["A", "B", "C", "D", "E"]
number = [10, 18, 23, 8, 5]
explode = [0.1, 0, 0, 0, 0]
plt.pie(number,
labels=grade,
shadow=True,
autopct="%1.1f%%",
pctdistance=0.6,
startangle=90,
explode=explode
)
plt.title("Test Grade")
plt.tight_layout()
height_stats = np.random.default_rng(42).normal(160, 15, 1000)
interval_bin = [120, 130, 140, 150, 160, 170, 180, 190, 200]
plt.hist(height_stats, bins=interval_bin,
edgecolor="black", lw=1, density=True)
# Plot the probability density curve
import scipy.stats as ss
density = ss.kde.gaussian_kde(height_stats)
index = np.arange(120, 200)
plt.plot(index,
density.evaluate(index),
color="pink",
lw=3,
ls="--",
label="Probability Density")
# Plot the mean line
plt.axvline(np.mean(height_stats), c="orange", lw=5, label="Height Mean")
plt.legend()
plt.title("Height Stats")
plt.xlabel("Heights")
plt.ylabel("Probability Density")
plt.tight_layout()
years = [1950, 1960, 1970, 1980, 1990, 2000, 2010, 2018]
population_by_continent = {
'africa': [228, 284, 365, 477, 631, 814, 1044, 1275],
'americas': [340, 425, 519, 619, 727, 840, 943, 1006],
'asia': [1394, 1686, 2120, 2625, 3202, 3714, 4169, 4560],
'europe': [220, 253, 276, 295, 310, 303, 294, 293],
'oceania': [12, 15, 19, 22, 26, 31, 36, 39],
}
y = population_by_continent.values()
labels = population_by_continent.keys()
colors = ["#96ceb4", "#ffeead", "#ff6f69", "#ffcc5c", "#88d8b0"]
plt.style.use("seaborn")
plt.stackplot(years, y, labels=labels, colors=colors)
plt.legend(loc="upper left")
plt.title("World Population")
plt.xlabel("Year")
plt.ylabel("Population (Millions)")
plt.tight_layout()
img = mpimg.imread("https://www.catster.com/wp-content/uploads/1970/01/Am-ShortHair-breed_getty1140883355-768x513.png")
plt.imshow(img)
# Applying pseudocolor schemes
plt.imshow(img[..., 0], cmap="gray")
plt.colorbar()
# Flipping Photos Vertically or Horizontally
plt.imshow(img[::-1]) # Reverse at the first axis == vertical flip
plt.imshow(img[:, ::-1]) # Reverse at the second axis == horizontal flip
# Switch Style
plt.style.use("seaborn-pastel")
# Data
x = np.random.default_rng(42).integers(0, 100, 100)
y = (2*x+1) * np.random.default_rng(43).normal(5, 1, 100)
regr = sklearn.linear_model.LinearRegression()
regr.fit(x[:, np.newaxis], y[:, np.newaxis])
regr_line = regr.predict(x[:, np.newaxis])
# Plotting with fancy color and colormap
plt.scatter(x, y, c=y, alpha=0.25, cmap="plasma")
plt.plot(x, regr_line,
color="darkviolet",
alpha=0.5,
lw=5, ls="-",
label="regression line")
plt.title("Linear Regression Test")
plt.xlabel("X")
plt.ylabel("y")
plt.legend()
plt.colorbar()
x = np.array(range(1, 5))
y = x**2
df = pd.DataFrame(zip(x, y), columns=["col_1", "col_2"])
# Plotting with data parameter
def plot():
sns.lineplot(x="col_1", y="col_2", data=df)
# Seaborn Styles
sns.set_style("white")
# Scaling the plots
sns.set_context("paper", font_scale=1.5)
# Changing the figure Size
plt.figure(figsize=(8, 4)) # width, height
# Using Seaborn with Matplotlib
plt.subplot(211)
plt.title("Square X")
plot()
# Seaborn Styles Context Manager
with sns.axes_style("darkgrid"):
plt.subplot(212)
plot()
plt.tight_layout()
# Sequential Palette
palette = sns.color_palette("YlGn")
sns.palplot(palette)
plt.title("YlGn Colormap (Sequential)")
# Diverging Palette
palette = sns.color_palette("coolwarm")
sns.palplot(palette)
plt.title("coolwarm Colormap (Diverging)")
# Qualitative Palette
palette = sns.color_palette("Pastel2")
sns.palplot(palette)
plt.title("Pastel2 Colormap (Qualitative)")
data = sns.load_dataset("iris")
plt.figure(figsize=(11, 3))
plt.subplot(121)
sns.lineplot(x="sepal_length", y="sepal_width", data=data)
plt.subplot(122)
sns.lineplot(x="petal_length", y="petal_width", data=data)
grid = sns.FacetGrid(data, col="species")
grid.map(plt.plot, "sepal_width")
x_vars = ["sepal_length", "sepal_width", "petal_length", "petal_width"]
y_vars = ["species"]
grid = sns.PairGrid(data, x_vars=x_vars, y_vars=y_vars)
grid.map(sns.barplot)