import pandas as pd
for col in df.columns:
series = df[col]
new_ser = pd.Series()
new_df = pd.DataFrame()
sample_list_1 = [10, 20, 30]
sample_ser_1 = pd.Series(sample_list_1)
sample_ser_1
0 10 1 20 2 30 dtype: int64
# len of list(values) shall be the same
a = {'name': ['a', 'b', 'c'], 'gender': ['f','m','m']}
a = pd.DataFrame(a)
a
name | gender | |
---|---|---|
0 | a | f |
1 | b | m |
2 | c | m |
# keys can be different
b = {'name': 'a', 'gender': 'f', 'age': 5}, {'name': 'b', 'gender': 'm', 'age': 15}, {'name': 'c', 'gender':'m', 'age': 25}, {'name': 'c', 'gender':'m'}
b = pd.DataFrame(b)
b
age | gender | name | |
---|---|---|---|
0 | 5.0 | f | a |
1 | 15.0 | m | b |
2 | 25.0 | m | c |
3 | NaN | m | c |
pd.read_csv(path)
path = "train.csv"
file = pd.read_csv(path)
file.head()
survived | pclass | name | sex | age | sibsp | parch | ticket | fare | cabin | embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 3 | Braund, Mr. Owen Harris | male | 22.0 | 1 | 0 | A/5 21171 | 7.2500 | NaN | S |
1 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | female | 38.0 | 1 | 0 | PC 17599 | 71.2833 | C85 | C |
2 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S |
3 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | S |
4 | 0 | 3 | Allen, Mr. William Henry | male | 35.0 | 0 | 0 | 373450 | 8.0500 | NaN | S |
print(type(file.survived))
print(type(file))
<class 'pandas.core.series.Series'> <class 'pandas.core.frame.DataFrame'>
# If import a dataset without a header
file_no_header = pd.read_csv(path, header = None) # use index as a temporary header (row index)
file_no_header.head()
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | survived | pclass | name | sex | age | sibsp | parch | ticket | fare | cabin | embarked |
1 | 0 | 3 | Braund, Mr. Owen Harris | male | 22 | 1 | 0 | A/5 21171 | 7.25 | NaN | S |
2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | female | 38 | 1 | 0 | PC 17599 | 71.2833 | C85 | C |
3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26 | 0 | 0 | STON/O2. 3101282 | 7.925 | NaN | S |
4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35 | 1 | 0 | 113803 | 53.1 | C123 | S |
# rename the header
file_no_header = file_no_header.rename(columns={0: "a", 1: "b"})
file_no_header.head()
a | b | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | survived | pclass | name | sex | age | sibsp | parch | ticket | fare | cabin | embarked |
1 | 0 | 3 | Braund, Mr. Owen Harris | male | 22 | 1 | 0 | A/5 21171 | 7.25 | NaN | S |
2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | female | 38 | 1 | 0 | PC 17599 | 71.2833 | C85 | C |
3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26 | 0 | 0 | STON/O2. 3101282 | 7.925 | NaN | S |
4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35 | 1 | 0 | 113803 | 53.1 | C123 | S |
# rename the index
file_no_header = file_no_header.rename(index={0: "a", 1: "b"})
file_no_header.head()
a | b | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | |
---|---|---|---|---|---|---|---|---|---|---|---|
a | survived | pclass | name | sex | age | sibsp | parch | ticket | fare | cabin | embarked |
b | 0 | 3 | Braund, Mr. Owen Harris | male | 22 | 1 | 0 | A/5 21171 | 7.25 | NaN | S |
2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | female | 38 | 1 | 0 | PC 17599 | 71.2833 | C85 | C |
3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26 | 0 | 0 | STON/O2. 3101282 | 7.925 | NaN | S |
4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35 | 1 | 0 | 113803 | 53.1 | C123 | S |
# If you have index in the dataset
file_index_col = pd.read_csv(path, index_col = ['name', 'survived']) # use 'name' as column index
file_index_col.head()
pclass | sex | age | sibsp | parch | ticket | fare | cabin | embarked | ||
---|---|---|---|---|---|---|---|---|---|---|
name | survived | |||||||||
Braund, Mr. Owen Harris | 0 | 3 | male | 22.0 | 1 | 0 | A/5 21171 | 7.2500 | NaN | S |
Cumings, Mrs. John Bradley (Florence Briggs Thayer) | 1 | 1 | female | 38.0 | 1 | 0 | PC 17599 | 71.2833 | C85 | C |
Heikkinen, Miss. Laina | 1 | 3 | female | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S |
Futrelle, Mrs. Jacques Heath (Lily May Peel) | 1 | 1 | female | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | S |
Allen, Mr. William Henry | 0 | 3 | male | 35.0 | 0 | 0 | 373450 | 8.0500 | NaN | S |
file.columns
Index(['survived', 'pclass', 'name', 'sex', 'age', 'sibsp', 'parch', 'ticket', 'fare', 'cabin', 'embarked'], dtype='object')
file.tail()
survived | pclass | name | sex | age | sibsp | parch | ticket | fare | cabin | embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
886 | 0 | 2 | Montvila, Rev. Juozas | male | 27.0 | 0 | 0 | 211536 | 13.00 | NaN | S |
887 | 1 | 1 | Graham, Miss. Margaret Edith | female | 19.0 | 0 | 0 | 112053 | 30.00 | B42 | S |
888 | 0 | 3 | Johnston, Miss. Catherine Helen "Carrie" | female | NaN | 1 | 2 | W./C. 6607 | 23.45 | NaN | S |
889 | 1 | 1 | Behr, Mr. Karl Howell | male | 26.0 | 0 | 0 | 111369 | 30.00 | C148 | C |
890 | 0 | 3 | Dooley, Mr. Patrick | male | 32.0 | 0 | 0 | 370376 | 7.75 | NaN | Q |
file.sample() # default: output 1 row
survived | pclass | name | sex | age | sibsp | parch | ticket | fare | cabin | embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
616 | 0 | 3 | Danbom, Mr. Ernst Gilbert | male | 34.0 | 1 | 1 | 347080 | 14.4 | NaN | S |
file.sample(5)
survived | pclass | name | sex | age | sibsp | parch | ticket | fare | cabin | embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
20 | 0 | 2 | Fynney, Mr. Joseph J | male | 35.0 | 0 | 0 | 239865 | 26.0000 | NaN | S |
91 | 0 | 3 | Andreasson, Mr. Paul Edvin | male | 20.0 | 0 | 0 | 347466 | 7.8542 | NaN | S |
251 | 0 | 3 | Strom, Mrs. Wilhelm (Elna Matilda Persson) | female | 29.0 | 1 | 1 | 347054 | 10.4625 | G6 | S |
791 | 0 | 2 | Gaskell, Mr. Alfred | male | 16.0 | 0 | 0 | 239865 | 26.0000 | NaN | S |
133 | 1 | 2 | Weisz, Mrs. Leopold (Mathilde Francoise Pede) | female | 29.0 | 1 | 0 | 228414 | 26.0000 | NaN | S |
file.info() # see data type, data size, and na value
<class 'pandas.core.frame.DataFrame'> RangeIndex: 891 entries, 0 to 890 Data columns (total 11 columns): survived 891 non-null int64 pclass 891 non-null int64 name 891 non-null object sex 891 non-null object age 714 non-null float64 sibsp 891 non-null int64 parch 891 non-null int64 ticket 891 non-null object fare 891 non-null float64 cabin 204 non-null object embarked 889 non-null object dtypes: float64(2), int64(4), object(5) memory usage: 76.6+ KB
file.describe() # only for float/integer
survived | pclass | age | sibsp | parch | fare | |
---|---|---|---|---|---|---|
count | 891.000000 | 891.000000 | 714.000000 | 891.000000 | 891.000000 | 891.000000 |
mean | 0.383838 | 2.308642 | 29.699118 | 0.523008 | 0.381594 | 32.204208 |
std | 0.486592 | 0.836071 | 14.526497 | 1.102743 | 0.806057 | 49.693429 |
min | 0.000000 | 1.000000 | 0.420000 | 0.000000 | 0.000000 | 0.000000 |
25% | 0.000000 | 2.000000 | 20.125000 | 0.000000 | 0.000000 | 7.910400 |
50% | 0.000000 | 3.000000 | 28.000000 | 0.000000 | 0.000000 | 14.454200 |
75% | 1.000000 | 3.000000 | 38.000000 | 1.000000 | 0.000000 | 31.000000 |
max | 1.000000 | 3.000000 | 80.000000 | 8.000000 | 6.000000 | 512.329200 |
file.age.mean()
29.69911764705882
file.age.sum()
21205.17
file.age.min()
0.42
file.age.max()
80.0
file.count()
survived 891 pclass 891 name 891 sex 891 age 714 sibsp 891 parch 891 ticket 891 fare 891 cabin 204 embarked 889 dtype: int64
file.nunique()
survived 2 pclass 3 name 891 sex 2 age 88 sibsp 7 parch 7 ticket 681 fare 248 cabin 147 embarked 3 dtype: int64
file.pclass.value_counts()
3 491 1 216 2 184 Name: pclass, dtype: int64
df['col_name']
df[True/ False condition]
df.loc/ df.iloc
file['name'].head()
0 Braund, Mr. Owen Harris 1 Cumings, Mrs. John Bradley (Florence Briggs Th... 2 Heikkinen, Miss. Laina 3 Futrelle, Mrs. Jacques Heath (Lily May Peel) 4 Allen, Mr. William Henry Name: name, dtype: object
file[['name', 'survived']].head()
name | survived | |
---|---|---|
0 | Braund, Mr. Owen Harris | 0 |
1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | 1 |
2 | Heikkinen, Miss. Laina | 1 |
3 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | 1 |
4 | Allen, Mr. William Henry | 0 |
# select 2 columns and top 5th-9th rows
file[['name', 'survived']][5:10]
name | survived | |
---|---|---|
5 | Moran, Mr. James | 0 |
6 | McCarthy, Mr. Timothy J | 0 |
7 | Palsson, Master. Gosta Leonard | 0 |
8 | Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) | 1 |
9 | Nasser, Mrs. Nicholas (Adele Achem) | 1 |
# iloc select: index
file.iloc[5:10,:8] # [5:10] end_value excl.
survived | pclass | name | sex | age | sibsp | parch | ticket | |
---|---|---|---|---|---|---|---|---|
5 | 0 | 3 | Moran, Mr. James | male | NaN | 0 | 0 | 330877 |
6 | 0 | 1 | McCarthy, Mr. Timothy J | male | 54.0 | 0 | 0 | 17463 |
7 | 0 | 3 | Palsson, Master. Gosta Leonard | male | 2.0 | 3 | 1 | 349909 |
8 | 1 | 3 | Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) | female | 27.0 | 0 | 2 | 347742 |
9 | 1 | 2 | Nasser, Mrs. Nicholas (Adele Achem) | female | 14.0 | 1 | 0 | 237736 |
# loc select: column/ row name
file.loc[5:10,:'ticket'] # [5:10] end_value inclu.
survived | pclass | name | sex | age | sibsp | parch | ticket | |
---|---|---|---|---|---|---|---|---|
5 | 0 | 3 | Moran, Mr. James | male | NaN | 0 | 0 | 330877 |
6 | 0 | 1 | McCarthy, Mr. Timothy J | male | 54.0 | 0 | 0 | 17463 |
7 | 0 | 3 | Palsson, Master. Gosta Leonard | male | 2.0 | 3 | 1 | 349909 |
8 | 1 | 3 | Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) | female | 27.0 | 0 | 2 | 347742 |
9 | 1 | 2 | Nasser, Mrs. Nicholas (Adele Achem) | female | 14.0 | 1 | 0 | 237736 |
10 | 1 | 3 | Sandstrom, Miss. Marguerite Rut | female | 4.0 | 1 | 1 | PP 9549 |
file.iloc[[0,1],[0,7]]
survived | ticket | |
---|---|---|
0 | 0 | A/5 21171 |
1 | 1 | PC 17599 |
file.loc[[0,1],['survived','ticket']]
survived | ticket | |
---|---|---|
0 | 0 | A/5 21171 |
1 | 1 | PC 17599 |
file[file.age > 60] # == file[file['age'] > 60]
survived | pclass | name | sex | age | sibsp | parch | ticket | fare | cabin | embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
33 | 0 | 2 | Wheadon, Mr. Edward H | male | 66.0 | 0 | 0 | C.A. 24579 | 10.5000 | NaN | S |
54 | 0 | 1 | Ostby, Mr. Engelhart Cornelius | male | 65.0 | 0 | 1 | 113509 | 61.9792 | B30 | C |
96 | 0 | 1 | Goldschmidt, Mr. George B | male | 71.0 | 0 | 0 | PC 17754 | 34.6542 | A5 | C |
116 | 0 | 3 | Connors, Mr. Patrick | male | 70.5 | 0 | 0 | 370369 | 7.7500 | NaN | Q |
170 | 0 | 1 | Van der hoef, Mr. Wyckoff | male | 61.0 | 0 | 0 | 111240 | 33.5000 | B19 | S |
252 | 0 | 1 | Stead, Mr. William Thomas | male | 62.0 | 0 | 0 | 113514 | 26.5500 | C87 | S |
275 | 1 | 1 | Andrews, Miss. Kornelia Theodosia | female | 63.0 | 1 | 0 | 13502 | 77.9583 | D7 | S |
280 | 0 | 3 | Duane, Mr. Frank | male | 65.0 | 0 | 0 | 336439 | 7.7500 | NaN | Q |
326 | 0 | 3 | Nysveen, Mr. Johan Hansen | male | 61.0 | 0 | 0 | 345364 | 6.2375 | NaN | S |
438 | 0 | 1 | Fortune, Mr. Mark | male | 64.0 | 1 | 4 | 19950 | 263.0000 | C23 C25 C27 | S |
456 | 0 | 1 | Millet, Mr. Francis Davis | male | 65.0 | 0 | 0 | 13509 | 26.5500 | E38 | S |
483 | 1 | 3 | Turkula, Mrs. (Hedwig) | female | 63.0 | 0 | 0 | 4134 | 9.5875 | NaN | S |
493 | 0 | 1 | Artagaveytia, Mr. Ramon | male | 71.0 | 0 | 0 | PC 17609 | 49.5042 | NaN | C |
545 | 0 | 1 | Nicholson, Mr. Arthur Ernest | male | 64.0 | 0 | 0 | 693 | 26.0000 | NaN | S |
555 | 0 | 1 | Wright, Mr. George | male | 62.0 | 0 | 0 | 113807 | 26.5500 | NaN | S |
570 | 1 | 2 | Harris, Mr. George | male | 62.0 | 0 | 0 | S.W./PP 752 | 10.5000 | NaN | S |
625 | 0 | 1 | Sutton, Mr. Frederick | male | 61.0 | 0 | 0 | 36963 | 32.3208 | D50 | S |
630 | 1 | 1 | Barkworth, Mr. Algernon Henry Wilson | male | 80.0 | 0 | 0 | 27042 | 30.0000 | A23 | S |
672 | 0 | 2 | Mitchell, Mr. Henry Michael | male | 70.0 | 0 | 0 | C.A. 24580 | 10.5000 | NaN | S |
745 | 0 | 1 | Crosby, Capt. Edward Gifford | male | 70.0 | 1 | 1 | WE/P 5735 | 71.0000 | B22 | S |
829 | 1 | 1 | Stone, Mrs. George Nelson (Martha Evelyn) | female | 62.0 | 0 | 0 | 113572 | 80.0000 | B28 | NaN |
851 | 0 | 3 | Svensson, Mr. Johan | male | 74.0 | 0 | 0 | 347060 | 7.7750 | NaN | S |
file[(file.age > 60) & (file.sex == 'female')] # & == AND
survived | pclass | name | sex | age | sibsp | parch | ticket | fare | cabin | embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
275 | 1 | 1 | Andrews, Miss. Kornelia Theodosia | female | 63.0 | 1 | 0 | 13502 | 77.9583 | D7 | S |
483 | 1 | 3 | Turkula, Mrs. (Hedwig) | female | 63.0 | 0 | 0 | 4134 | 9.5875 | NaN | S |
829 | 1 | 1 | Stone, Mrs. George Nelson (Martha Evelyn) | female | 62.0 | 0 | 0 | 113572 | 80.0000 | B28 | NaN |
file[(file.age > 60) | (file.sex == 'female')] # | == OR
survived | pclass | name | sex | age | sibsp | parch | ticket | fare | cabin | embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
1 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | female | 38.0 | 1 | 0 | PC 17599 | 71.2833 | C85 | C |
2 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S |
3 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | S |
8 | 1 | 3 | Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) | female | 27.0 | 0 | 2 | 347742 | 11.1333 | NaN | S |
9 | 1 | 2 | Nasser, Mrs. Nicholas (Adele Achem) | female | 14.0 | 1 | 0 | 237736 | 30.0708 | NaN | C |
10 | 1 | 3 | Sandstrom, Miss. Marguerite Rut | female | 4.0 | 1 | 1 | PP 9549 | 16.7000 | G6 | S |
11 | 1 | 1 | Bonnell, Miss. Elizabeth | female | 58.0 | 0 | 0 | 113783 | 26.5500 | C103 | S |
14 | 0 | 3 | Vestrom, Miss. Hulda Amanda Adolfina | female | 14.0 | 0 | 0 | 350406 | 7.8542 | NaN | S |
15 | 1 | 2 | Hewlett, Mrs. (Mary D Kingcome) | female | 55.0 | 0 | 0 | 248706 | 16.0000 | NaN | S |
18 | 0 | 3 | Vander Planke, Mrs. Julius (Emelia Maria Vande... | female | 31.0 | 1 | 0 | 345763 | 18.0000 | NaN | S |
19 | 1 | 3 | Masselmani, Mrs. Fatima | female | NaN | 0 | 0 | 2649 | 7.2250 | NaN | C |
22 | 1 | 3 | McGowan, Miss. Anna "Annie" | female | 15.0 | 0 | 0 | 330923 | 8.0292 | NaN | Q |
24 | 0 | 3 | Palsson, Miss. Torborg Danira | female | 8.0 | 3 | 1 | 349909 | 21.0750 | NaN | S |
25 | 1 | 3 | Asplund, Mrs. Carl Oscar (Selma Augusta Emilia... | female | 38.0 | 1 | 5 | 347077 | 31.3875 | NaN | S |
28 | 1 | 3 | O'Dwyer, Miss. Ellen "Nellie" | female | NaN | 0 | 0 | 330959 | 7.8792 | NaN | Q |
31 | 1 | 1 | Spencer, Mrs. William Augustus (Marie Eugenie) | female | NaN | 1 | 0 | PC 17569 | 146.5208 | B78 | C |
32 | 1 | 3 | Glynn, Miss. Mary Agatha | female | NaN | 0 | 0 | 335677 | 7.7500 | NaN | Q |
33 | 0 | 2 | Wheadon, Mr. Edward H | male | 66.0 | 0 | 0 | C.A. 24579 | 10.5000 | NaN | S |
38 | 0 | 3 | Vander Planke, Miss. Augusta Maria | female | 18.0 | 2 | 0 | 345764 | 18.0000 | NaN | S |
39 | 1 | 3 | Nicola-Yarred, Miss. Jamila | female | 14.0 | 1 | 0 | 2651 | 11.2417 | NaN | C |
40 | 0 | 3 | Ahlin, Mrs. Johan (Johanna Persdotter Larsson) | female | 40.0 | 1 | 0 | 7546 | 9.4750 | NaN | S |
41 | 0 | 2 | Turpin, Mrs. William John Robert (Dorothy Ann ... | female | 27.0 | 1 | 0 | 11668 | 21.0000 | NaN | S |
43 | 1 | 2 | Laroche, Miss. Simonne Marie Anne Andree | female | 3.0 | 1 | 2 | SC/Paris 2123 | 41.5792 | NaN | C |
44 | 1 | 3 | Devaney, Miss. Margaret Delia | female | 19.0 | 0 | 0 | 330958 | 7.8792 | NaN | Q |
47 | 1 | 3 | O'Driscoll, Miss. Bridget | female | NaN | 0 | 0 | 14311 | 7.7500 | NaN | Q |
49 | 0 | 3 | Arnold-Franchi, Mrs. Josef (Josefine Franchi) | female | 18.0 | 1 | 0 | 349237 | 17.8000 | NaN | S |
52 | 1 | 1 | Harper, Mrs. Henry Sleeper (Myna Haxtun) | female | 49.0 | 1 | 0 | PC 17572 | 76.7292 | D33 | C |
53 | 1 | 2 | Faunthorpe, Mrs. Lizzie (Elizabeth Anne Wilkin... | female | 29.0 | 1 | 0 | 2926 | 26.0000 | NaN | S |
54 | 0 | 1 | Ostby, Mr. Engelhart Cornelius | male | 65.0 | 0 | 1 | 113509 | 61.9792 | B30 | C |
56 | 1 | 2 | Rugg, Miss. Emily | female | 21.0 | 0 | 0 | C.A. 31026 | 10.5000 | NaN | S |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
809 | 1 | 1 | Chambers, Mrs. Norman Campbell (Bertha Griggs) | female | 33.0 | 1 | 0 | 113806 | 53.1000 | E8 | S |
813 | 0 | 3 | Andersson, Miss. Ebba Iris Alfrida | female | 6.0 | 4 | 2 | 347082 | 31.2750 | NaN | S |
816 | 0 | 3 | Heininen, Miss. Wendla Maria | female | 23.0 | 0 | 0 | STON/O2. 3101290 | 7.9250 | NaN | S |
820 | 1 | 1 | Hays, Mrs. Charles Melville (Clara Jennings Gr... | female | 52.0 | 1 | 1 | 12749 | 93.5000 | B69 | S |
823 | 1 | 3 | Moor, Mrs. (Beila) | female | 27.0 | 0 | 1 | 392096 | 12.4750 | E121 | S |
829 | 1 | 1 | Stone, Mrs. George Nelson (Martha Evelyn) | female | 62.0 | 0 | 0 | 113572 | 80.0000 | B28 | NaN |
830 | 1 | 3 | Yasbeck, Mrs. Antoni (Selini Alexander) | female | 15.0 | 1 | 0 | 2659 | 14.4542 | NaN | C |
835 | 1 | 1 | Compton, Miss. Sara Rebecca | female | 39.0 | 1 | 1 | PC 17756 | 83.1583 | E49 | C |
842 | 1 | 1 | Serepeca, Miss. Augusta | female | 30.0 | 0 | 0 | 113798 | 31.0000 | NaN | C |
849 | 1 | 1 | Goldenberg, Mrs. Samuel L (Edwiga Grabowska) | female | NaN | 1 | 0 | 17453 | 89.1042 | C92 | C |
851 | 0 | 3 | Svensson, Mr. Johan | male | 74.0 | 0 | 0 | 347060 | 7.7750 | NaN | S |
852 | 0 | 3 | Boulos, Miss. Nourelain | female | 9.0 | 1 | 1 | 2678 | 15.2458 | NaN | C |
853 | 1 | 1 | Lines, Miss. Mary Conover | female | 16.0 | 0 | 1 | PC 17592 | 39.4000 | D28 | S |
854 | 0 | 2 | Carter, Mrs. Ernest Courtenay (Lilian Hughes) | female | 44.0 | 1 | 0 | 244252 | 26.0000 | NaN | S |
855 | 1 | 3 | Aks, Mrs. Sam (Leah Rosen) | female | 18.0 | 0 | 1 | 392091 | 9.3500 | NaN | S |
856 | 1 | 1 | Wick, Mrs. George Dennick (Mary Hitchcock) | female | 45.0 | 1 | 1 | 36928 | 164.8667 | NaN | S |
858 | 1 | 3 | Baclini, Mrs. Solomon (Latifa Qurban) | female | 24.0 | 0 | 3 | 2666 | 19.2583 | NaN | C |
862 | 1 | 1 | Swift, Mrs. Frederick Joel (Margaret Welles Ba... | female | 48.0 | 0 | 0 | 17466 | 25.9292 | D17 | S |
863 | 0 | 3 | Sage, Miss. Dorothy Edith "Dolly" | female | NaN | 8 | 2 | CA. 2343 | 69.5500 | NaN | S |
865 | 1 | 2 | Bystrom, Mrs. (Karolina) | female | 42.0 | 0 | 0 | 236852 | 13.0000 | NaN | S |
866 | 1 | 2 | Duran y More, Miss. Asuncion | female | 27.0 | 1 | 0 | SC/PARIS 2149 | 13.8583 | NaN | C |
871 | 1 | 1 | Beckwith, Mrs. Richard Leonard (Sallie Monypeny) | female | 47.0 | 1 | 1 | 11751 | 52.5542 | D35 | S |
874 | 1 | 2 | Abelson, Mrs. Samuel (Hannah Wizosky) | female | 28.0 | 1 | 0 | P/PP 3381 | 24.0000 | NaN | C |
875 | 1 | 3 | Najib, Miss. Adele Kiamie "Jane" | female | 15.0 | 0 | 0 | 2667 | 7.2250 | NaN | C |
879 | 1 | 1 | Potter, Mrs. Thomas Jr (Lily Alexenia Wilson) | female | 56.0 | 0 | 1 | 11767 | 83.1583 | C50 | C |
880 | 1 | 2 | Shelley, Mrs. William (Imanita Parrish Hall) | female | 25.0 | 0 | 1 | 230433 | 26.0000 | NaN | S |
882 | 0 | 3 | Dahlberg, Miss. Gerda Ulrika | female | 22.0 | 0 | 0 | 7552 | 10.5167 | NaN | S |
885 | 0 | 3 | Rice, Mrs. William (Margaret Norton) | female | 39.0 | 0 | 5 | 382652 | 29.1250 | NaN | Q |
887 | 1 | 1 | Graham, Miss. Margaret Edith | female | 19.0 | 0 | 0 | 112053 | 30.0000 | B42 | S |
888 | 0 | 3 | Johnston, Miss. Catherine Helen "Carrie" | female | NaN | 1 | 2 | W./C. 6607 | 23.4500 | NaN | S |
333 rows × 11 columns
df.append()
del df['col_name']
df.drop()
df.pop()
# If we want to insert a new column called `fare_lv`
a_file = file.copy()
a_file['fare_lv'] = ''
a_file.head()
survived | pclass | name | sex | age | sibsp | parch | ticket | fare | cabin | embarked | fare_lv | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 3 | Braund, Mr. Owen Harris | male | 22.0 | 1 | 0 | A/5 21171 | 7.2500 | NaN | S | |
1 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | female | 38.0 | 1 | 0 | PC 17599 | 71.2833 | C85 | C | |
2 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S | |
3 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | S | |
4 | 0 | 3 | Allen, Mr. William Henry | male | 35.0 | 0 | 0 | 373450 | 8.0500 | NaN | S |
# We have an existing pandas series called `fare_lv_series`
print(type(fare_lv_series), '\n')
print(fare_lv_series.head(3))
<class 'pandas.core.series.Series'> 0 low 1 high 2 low Name: fare, dtype: category Categories (3, object): [low < medium < high]
# We insert `fare_lv_series` as values in `fare_lv` column in a_file
a_file['fare_lv'] = fare_lv_series
a_file.head(5)
survived | pclass | name | sex | age | sibsp | parch | ticket | fare | cabin | embarked | fare_lv | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 3 | Braund, Mr. Owen Harris | male | 22.0 | 1 | 0 | A/5 21171 | 7.2500 | NaN | S | low |
1 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | female | 38.0 | 1 | 0 | PC 17599 | 71.2833 | C85 | C | high |
2 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S | low |
3 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | S | high |
4 | 0 | 3 | Allen, Mr. William Henry | male | 35.0 | 0 | 0 | 373450 | 8.0500 | NaN | S | low |
# We have an existing pandas dataframe called `add_row`
add_row.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 5 entries, 0 to 4 Data columns (total 11 columns): pclass 5 non-null int64 name 5 non-null object sex 5 non-null object age 5 non-null float64 sibsp 5 non-null int64 parch 5 non-null int64 ticket 5 non-null object fare 5 non-null float64 cabin 0 non-null object embarked 5 non-null object survived 5 non-null int64 dtypes: float64(2), int64(4), object(5) memory usage: 520.0+ bytes
add_row
pclass | name | sex | age | sibsp | parch | ticket | fare | cabin | embarked | survived | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 3 | Kelly, Mr. James | male | 34.5 | 0 | 0 | 330911 | 7.8292 | NaN | Q | 1 |
1 | 3 | Wilkes, Mrs. James (Ellen Needs) | female | 47.0 | 1 | 0 | 363272 | 7.0000 | NaN | S | 1 |
2 | 2 | Myles, Mr. Thomas Francis | male | 62.0 | 0 | 0 | 240276 | 9.6875 | NaN | Q | 1 |
3 | 3 | Wirz, Mr. Albert | male | 27.0 | 0 | 0 | 315154 | 8.6625 | NaN | S | 1 |
4 | 3 | Hirvonen, Mrs. Alexander (Helga E Lindqvist) | female | 22.0 | 1 | 1 | 3101298 | 12.2875 | NaN | S | 1 |
# To insert `add_row` into a_file
a_file = a_file.append(add_row)
a_file.tail(10)
C:\Users\angela\Anaconda3\lib\site-packages\pandas\core\frame.py:6692: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version of pandas will change to not sort by default. To accept the future behavior, pass 'sort=False'. To retain the current behavior and silence the warning, pass 'sort=True'. sort=sort)
age | cabin | embarked | fare | fare_lv | name | parch | pclass | sex | sibsp | survived | ticket | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
886 | 27.0 | NaN | S | 13.0000 | medium | Montvila, Rev. Juozas | 0 | 2 | male | 0 | 0 | 211536 |
887 | 19.0 | B42 | S | 30.0000 | high | Graham, Miss. Margaret Edith | 0 | 1 | female | 0 | 1 | 112053 |
888 | NaN | NaN | S | 23.4500 | medium | Johnston, Miss. Catherine Helen "Carrie" | 2 | 3 | female | 1 | 0 | W./C. 6607 |
889 | 26.0 | C148 | C | 30.0000 | high | Behr, Mr. Karl Howell | 0 | 1 | male | 0 | 1 | 111369 |
890 | 32.0 | NaN | Q | 7.7500 | low | Dooley, Mr. Patrick | 0 | 3 | male | 0 | 0 | 370376 |
0 | 34.5 | NaN | Q | 7.8292 | NaN | Kelly, Mr. James | 0 | 3 | male | 0 | 1 | 330911 |
1 | 47.0 | NaN | S | 7.0000 | NaN | Wilkes, Mrs. James (Ellen Needs) | 0 | 3 | female | 1 | 1 | 363272 |
2 | 62.0 | NaN | Q | 9.6875 | NaN | Myles, Mr. Thomas Francis | 0 | 2 | male | 0 | 1 | 240276 |
3 | 27.0 | NaN | S | 8.6625 | NaN | Wirz, Mr. Albert | 0 | 3 | male | 0 | 1 | 315154 |
4 | 22.0 | NaN | S | 12.2875 | NaN | Hirvonen, Mrs. Alexander (Helga E Lindqvist) | 1 | 3 | female | 1 | 1 | 3101298 |
del df['col_name']
del a_file['cabin']
a_file.head(5)
age | embarked | fare | fare_lv | name | parch | pclass | sex | sibsp | survived | ticket | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 22.0 | S | 7.2500 | low | Braund, Mr. Owen Harris | 0 | 3 | male | 1 | 0 | A/5 21171 |
1 | 38.0 | C | 71.2833 | high | Cumings, Mrs. John Bradley (Florence Briggs Th... | 0 | 1 | female | 1 | 1 | PC 17599 |
2 | 26.0 | S | 7.9250 | low | Heikkinen, Miss. Laina | 0 | 3 | female | 0 | 1 | STON/O2. 3101282 |
3 | 35.0 | S | 53.1000 | high | Futrelle, Mrs. Jacques Heath (Lily May Peel) | 0 | 1 | female | 1 | 1 | 113803 |
4 | 35.0 | S | 8.0500 | low | Allen, Mr. William Henry | 0 | 3 | male | 0 | 0 | 373450 |
df.pop()
a_file.pop('name').head()
0 Braund, Mr. Owen Harris 1 Cumings, Mrs. John Bradley (Florence Briggs Th... 2 Heikkinen, Miss. Laina 3 Futrelle, Mrs. Jacques Heath (Lily May Peel) 4 Allen, Mr. William Henry Name: name, dtype: object
a_file.head()
age | embarked | fare | fare_lv | parch | pclass | sex | sibsp | survived | ticket | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 22.0 | S | 7.2500 | low | 0 | 3 | male | 1 | 0 | A/5 21171 |
1 | 38.0 | C | 71.2833 | high | 0 | 1 | female | 1 | 1 | PC 17599 |
2 | 26.0 | S | 7.9250 | low | 0 | 3 | female | 0 | 1 | STON/O2. 3101282 |
3 | 35.0 | S | 53.1000 | high | 0 | 1 | female | 1 | 1 | 113803 |
4 | 35.0 | S | 8.0500 | low | 0 | 3 | male | 0 | 0 | 373450 |
df.drop()
a_file.drop(columns = ['parch', 'sibsp']).head()
age | embarked | fare | fare_lv | pclass | sex | survived | ticket | |
---|---|---|---|---|---|---|---|---|
0 | 22.0 | S | 7.2500 | low | 3 | male | 0 | A/5 21171 |
1 | 38.0 | C | 71.2833 | high | 1 | female | 1 | PC 17599 |
2 | 26.0 | S | 7.9250 | low | 3 | female | 1 | STON/O2. 3101282 |
3 | 35.0 | S | 53.1000 | high | 1 | female | 1 | 113803 |
4 | 35.0 | S | 8.0500 | low | 3 | male | 0 | 373450 |
a_file.drop(index = [0, 1]).head()
# if not `a_file = a_file.drop(index = [0, 1])`, then the drop execution won't be saved as a_file
age | embarked | fare | fare_lv | parch | pclass | sex | sibsp | survived | ticket | |
---|---|---|---|---|---|---|---|---|---|---|
2 | 26.0 | S | 7.9250 | low | 0 | 3 | female | 0 | 1 | STON/O2. 3101282 |
3 | 35.0 | S | 53.1000 | high | 0 | 1 | female | 1 | 1 | 113803 |
4 | 35.0 | S | 8.0500 | low | 0 | 3 | male | 0 | 0 | 373450 |
5 | NaN | Q | 8.4583 | low | 0 | 3 | male | 0 | 0 | 330877 |
6 | 54.0 | S | 51.8625 | high | 0 | 1 | male | 0 | 0 | 17463 |
print(file.info())
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 11 columns):
survived 891 non-null int64
pclass 891 non-null int64
name 891 non-null object
sex 891 non-null object
age 714 non-null float64
sibsp 891 non-null int64
parch 891 non-null int64
ticket 891 non-null object
fare 891 non-null float64
cabin 204 non-null object
embarked 889 non-null object
dtypes: float64(2), int64(4), object(5)
memory usage: 76.6+ KB
age_na = file[file.age.isnull()]
age_na.head(5)
survived | pclass | name | sex | age | sibsp | parch | ticket | fare | cabin | embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
5 | 0 | 3 | Moran, Mr. James | male | NaN | 0 | 0 | 330877 | 8.4583 | NaN | Q |
17 | 1 | 2 | Williams, Mr. Charles Eugene | male | NaN | 0 | 0 | 244373 | 13.0000 | NaN | S |
19 | 1 | 3 | Masselmani, Mrs. Fatima | female | NaN | 0 | 0 | 2649 | 7.2250 | NaN | C |
26 | 0 | 3 | Emir, Mr. Farred Chehab | male | NaN | 0 | 0 | 2631 | 7.2250 | NaN | C |
28 | 1 | 3 | O'Dwyer, Miss. Ellen "Nellie" | female | NaN | 0 | 0 | 330959 | 7.8792 | NaN | Q |
age_na.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 177 entries, 5 to 888 Data columns (total 11 columns): survived 177 non-null int64 pclass 177 non-null int64 name 177 non-null object sex 177 non-null object age 0 non-null float64 sibsp 177 non-null int64 parch 177 non-null int64 ticket 177 non-null object fare 177 non-null float64 cabin 19 non-null object embarked 177 non-null object dtypes: float64(2), int64(4), object(5) memory usage: 16.6+ KB
age_na.age.fillna(0).sample(5)
837 0.0 464 0.0 295 0.0 223 0.0 511 0.0 Name: age, dtype: float64
sample_age = file.copy()
# int(sample_age.age.mean()) == 29
age_na.age.fillna(int(sample_age.age.mean())).sample(5)
485 29.0 364 29.0 459 29.0 783 29.0 475 29.0 Name: age, dtype: float64
# fill na value with the value ahead
# eg. if row 5 has na value and row 4 has non-na value, value in row 5 will be filled with value in row 4
# `age` in row 5, 17, 19 were na value
sample_age.fillna(method = 'ffill').head(20)
survived | pclass | name | sex | age | sibsp | parch | ticket | fare | cabin | embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 3 | Braund, Mr. Owen Harris | male | 22.0 | 1 | 0 | A/5 21171 | 7.2500 | NaN | S |
1 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | female | 38.0 | 1 | 0 | PC 17599 | 71.2833 | C85 | C |
2 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | C85 | S |
3 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | S |
4 | 0 | 3 | Allen, Mr. William Henry | male | 35.0 | 0 | 0 | 373450 | 8.0500 | C123 | S |
5 | 0 | 3 | Moran, Mr. James | male | 35.0 | 0 | 0 | 330877 | 8.4583 | C123 | Q |
6 | 0 | 1 | McCarthy, Mr. Timothy J | male | 54.0 | 0 | 0 | 17463 | 51.8625 | E46 | S |
7 | 0 | 3 | Palsson, Master. Gosta Leonard | male | 2.0 | 3 | 1 | 349909 | 21.0750 | E46 | S |
8 | 1 | 3 | Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) | female | 27.0 | 0 | 2 | 347742 | 11.1333 | E46 | S |
9 | 1 | 2 | Nasser, Mrs. Nicholas (Adele Achem) | female | 14.0 | 1 | 0 | 237736 | 30.0708 | E46 | C |
10 | 1 | 3 | Sandstrom, Miss. Marguerite Rut | female | 4.0 | 1 | 1 | PP 9549 | 16.7000 | G6 | S |
11 | 1 | 1 | Bonnell, Miss. Elizabeth | female | 58.0 | 0 | 0 | 113783 | 26.5500 | C103 | S |
12 | 0 | 3 | Saundercock, Mr. William Henry | male | 20.0 | 0 | 0 | A/5. 2151 | 8.0500 | C103 | S |
13 | 0 | 3 | Andersson, Mr. Anders Johan | male | 39.0 | 1 | 5 | 347082 | 31.2750 | C103 | S |
14 | 0 | 3 | Vestrom, Miss. Hulda Amanda Adolfina | female | 14.0 | 0 | 0 | 350406 | 7.8542 | C103 | S |
15 | 1 | 2 | Hewlett, Mrs. (Mary D Kingcome) | female | 55.0 | 0 | 0 | 248706 | 16.0000 | C103 | S |
16 | 0 | 3 | Rice, Master. Eugene | male | 2.0 | 4 | 1 | 382652 | 29.1250 | C103 | Q |
17 | 1 | 2 | Williams, Mr. Charles Eugene | male | 2.0 | 0 | 0 | 244373 | 13.0000 | C103 | S |
18 | 0 | 3 | Vander Planke, Mrs. Julius (Emelia Maria Vande... | female | 31.0 | 1 | 0 | 345763 | 18.0000 | C103 | S |
19 | 1 | 3 | Masselmani, Mrs. Fatima | female | 31.0 | 0 | 0 | 2649 | 7.2250 | C103 | C |
sample_age.dropna() # default: how = 'any'
survived | pclass | name | sex | age | sibsp | parch | ticket | fare | cabin | embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
1 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | female | 38.0 | 1 | 0 | PC 17599 | 71.2833 | C85 | C |
3 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | S |
6 | 0 | 1 | McCarthy, Mr. Timothy J | male | 54.0 | 0 | 0 | 17463 | 51.8625 | E46 | S |
10 | 1 | 3 | Sandstrom, Miss. Marguerite Rut | female | 4.0 | 1 | 1 | PP 9549 | 16.7000 | G6 | S |
11 | 1 | 1 | Bonnell, Miss. Elizabeth | female | 58.0 | 0 | 0 | 113783 | 26.5500 | C103 | S |
21 | 1 | 2 | Beesley, Mr. Lawrence | male | 34.0 | 0 | 0 | 248698 | 13.0000 | D56 | S |
23 | 1 | 1 | Sloper, Mr. William Thompson | male | 28.0 | 0 | 0 | 113788 | 35.5000 | A6 | S |
27 | 0 | 1 | Fortune, Mr. Charles Alexander | male | 19.0 | 3 | 2 | 19950 | 263.0000 | C23 C25 C27 | S |
52 | 1 | 1 | Harper, Mrs. Henry Sleeper (Myna Haxtun) | female | 49.0 | 1 | 0 | PC 17572 | 76.7292 | D33 | C |
54 | 0 | 1 | Ostby, Mr. Engelhart Cornelius | male | 65.0 | 0 | 1 | 113509 | 61.9792 | B30 | C |
62 | 0 | 1 | Harris, Mr. Henry Birkhardt | male | 45.0 | 1 | 0 | 36973 | 83.4750 | C83 | S |
66 | 1 | 2 | Nye, Mrs. (Elizabeth Ramell) | female | 29.0 | 0 | 0 | C.A. 29395 | 10.5000 | F33 | S |
75 | 0 | 3 | Moen, Mr. Sigurd Hansen | male | 25.0 | 0 | 0 | 348123 | 7.6500 | F G73 | S |
88 | 1 | 1 | Fortune, Miss. Mabel Helen | female | 23.0 | 3 | 2 | 19950 | 263.0000 | C23 C25 C27 | S |
92 | 0 | 1 | Chaffee, Mr. Herbert Fuller | male | 46.0 | 1 | 0 | W.E.P. 5734 | 61.1750 | E31 | S |
96 | 0 | 1 | Goldschmidt, Mr. George B | male | 71.0 | 0 | 0 | PC 17754 | 34.6542 | A5 | C |
97 | 1 | 1 | Greenfield, Mr. William Bertram | male | 23.0 | 0 | 1 | PC 17759 | 63.3583 | D10 D12 | C |
102 | 0 | 1 | White, Mr. Richard Frasar | male | 21.0 | 0 | 1 | 35281 | 77.2875 | D26 | S |
110 | 0 | 1 | Porter, Mr. Walter Chamberlain | male | 47.0 | 0 | 0 | 110465 | 52.0000 | C110 | S |
118 | 0 | 1 | Baxter, Mr. Quigg Edmond | male | 24.0 | 0 | 1 | PC 17558 | 247.5208 | B58 B60 | C |
123 | 1 | 2 | Webber, Miss. Susan | female | 32.5 | 0 | 0 | 27267 | 13.0000 | E101 | S |
124 | 0 | 1 | White, Mr. Percival Wayland | male | 54.0 | 0 | 1 | 35281 | 77.2875 | D26 | S |
136 | 1 | 1 | Newsom, Miss. Helen Monypeny | female | 19.0 | 0 | 2 | 11752 | 26.2833 | D47 | S |
137 | 0 | 1 | Futrelle, Mr. Jacques Heath | male | 37.0 | 1 | 0 | 113803 | 53.1000 | C123 | S |
139 | 0 | 1 | Giglio, Mr. Victor | male | 24.0 | 0 | 0 | PC 17593 | 79.2000 | B86 | C |
148 | 0 | 2 | Navratil, Mr. Michel ("Louis M Hoffman") | male | 36.5 | 0 | 2 | 230080 | 26.0000 | F2 | S |
151 | 1 | 1 | Pears, Mrs. Thomas (Edith Wearne) | female | 22.0 | 1 | 0 | 113776 | 66.6000 | C2 | S |
170 | 0 | 1 | Van der hoef, Mr. Wyckoff | male | 61.0 | 0 | 0 | 111240 | 33.5000 | B19 | S |
174 | 0 | 1 | Smith, Mr. James Clinch | male | 56.0 | 0 | 0 | 17764 | 30.6958 | A7 | C |
177 | 0 | 1 | Isham, Miss. Ann Elizabeth | female | 50.0 | 0 | 0 | PC 17595 | 28.7125 | C49 | C |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
737 | 1 | 1 | Lesurer, Mr. Gustave J | male | 35.0 | 0 | 0 | PC 17755 | 512.3292 | B101 | C |
741 | 0 | 1 | Cavendish, Mr. Tyrell William | male | 36.0 | 1 | 0 | 19877 | 78.8500 | C46 | S |
742 | 1 | 1 | Ryerson, Miss. Susan Parker "Suzette" | female | 21.0 | 2 | 2 | PC 17608 | 262.3750 | B57 B59 B63 B66 | C |
745 | 0 | 1 | Crosby, Capt. Edward Gifford | male | 70.0 | 1 | 1 | WE/P 5735 | 71.0000 | B22 | S |
748 | 0 | 1 | Marvin, Mr. Daniel Warner | male | 19.0 | 1 | 0 | 113773 | 53.1000 | D30 | S |
751 | 1 | 3 | Moor, Master. Meier | male | 6.0 | 0 | 1 | 392096 | 12.4750 | E121 | S |
759 | 1 | 1 | Rothes, the Countess. of (Lucy Noel Martha Dye... | female | 33.0 | 0 | 0 | 110152 | 86.5000 | B77 | S |
763 | 1 | 1 | Carter, Mrs. William Ernest (Lucile Polk) | female | 36.0 | 1 | 2 | 113760 | 120.0000 | B96 B98 | S |
765 | 1 | 1 | Hogeboom, Mrs. John C (Anna Andrews) | female | 51.0 | 1 | 0 | 13502 | 77.9583 | D11 | S |
772 | 0 | 2 | Mack, Mrs. (Mary) | female | 57.0 | 0 | 0 | S.O./P.P. 3 | 10.5000 | E77 | S |
779 | 1 | 1 | Robert, Mrs. Edward Scott (Elisabeth Walton Mc... | female | 43.0 | 0 | 1 | 24160 | 211.3375 | B3 | S |
781 | 1 | 1 | Dick, Mrs. Albert Adrian (Vera Gillespie) | female | 17.0 | 1 | 0 | 17474 | 57.0000 | B20 | S |
782 | 0 | 1 | Long, Mr. Milton Clyde | male | 29.0 | 0 | 0 | 113501 | 30.0000 | D6 | S |
789 | 0 | 1 | Guggenheim, Mr. Benjamin | male | 46.0 | 0 | 0 | PC 17593 | 79.2000 | B82 B84 | C |
796 | 1 | 1 | Leader, Dr. Alice (Farnham) | female | 49.0 | 0 | 0 | 17465 | 25.9292 | D17 | S |
802 | 1 | 1 | Carter, Master. William Thornton II | male | 11.0 | 1 | 2 | 113760 | 120.0000 | B96 B98 | S |
806 | 0 | 1 | Andrews, Mr. Thomas Jr | male | 39.0 | 0 | 0 | 112050 | 0.0000 | A36 | S |
809 | 1 | 1 | Chambers, Mrs. Norman Campbell (Bertha Griggs) | female | 33.0 | 1 | 0 | 113806 | 53.1000 | E8 | S |
820 | 1 | 1 | Hays, Mrs. Charles Melville (Clara Jennings Gr... | female | 52.0 | 1 | 1 | 12749 | 93.5000 | B69 | S |
823 | 1 | 3 | Moor, Mrs. (Beila) | female | 27.0 | 0 | 1 | 392096 | 12.4750 | E121 | S |
835 | 1 | 1 | Compton, Miss. Sara Rebecca | female | 39.0 | 1 | 1 | PC 17756 | 83.1583 | E49 | C |
853 | 1 | 1 | Lines, Miss. Mary Conover | female | 16.0 | 0 | 1 | PC 17592 | 39.4000 | D28 | S |
857 | 1 | 1 | Daly, Mr. Peter Denis | male | 51.0 | 0 | 0 | 113055 | 26.5500 | E17 | S |
862 | 1 | 1 | Swift, Mrs. Frederick Joel (Margaret Welles Ba... | female | 48.0 | 0 | 0 | 17466 | 25.9292 | D17 | S |
867 | 0 | 1 | Roebling, Mr. Washington Augustus II | male | 31.0 | 0 | 0 | PC 17590 | 50.4958 | A24 | S |
871 | 1 | 1 | Beckwith, Mrs. Richard Leonard (Sallie Monypeny) | female | 47.0 | 1 | 1 | 11751 | 52.5542 | D35 | S |
872 | 0 | 1 | Carlsson, Mr. Frans Olof | male | 33.0 | 0 | 0 | 695 | 5.0000 | B51 B53 B55 | S |
879 | 1 | 1 | Potter, Mrs. Thomas Jr (Lily Alexenia Wilson) | female | 56.0 | 0 | 1 | 11767 | 83.1583 | C50 | C |
887 | 1 | 1 | Graham, Miss. Margaret Edith | female | 19.0 | 0 | 0 | 112053 | 30.0000 | B42 | S |
889 | 1 | 1 | Behr, Mr. Karl Howell | male | 26.0 | 0 | 0 | 111369 | 30.0000 | C148 | C |
183 rows × 11 columns
sample_age.dropna(how = 'all')
survived | pclass | name | sex | age | sibsp | parch | ticket | fare | cabin | embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 3 | Braund, Mr. Owen Harris | male | 22.0 | 1 | 0 | A/5 21171 | 7.2500 | NaN | S |
1 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | female | 38.0 | 1 | 0 | PC 17599 | 71.2833 | C85 | C |
2 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S |
3 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | S |
4 | 0 | 3 | Allen, Mr. William Henry | male | 35.0 | 0 | 0 | 373450 | 8.0500 | NaN | S |
5 | 0 | 3 | Moran, Mr. James | male | NaN | 0 | 0 | 330877 | 8.4583 | NaN | Q |
6 | 0 | 1 | McCarthy, Mr. Timothy J | male | 54.0 | 0 | 0 | 17463 | 51.8625 | E46 | S |
7 | 0 | 3 | Palsson, Master. Gosta Leonard | male | 2.0 | 3 | 1 | 349909 | 21.0750 | NaN | S |
8 | 1 | 3 | Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) | female | 27.0 | 0 | 2 | 347742 | 11.1333 | NaN | S |
9 | 1 | 2 | Nasser, Mrs. Nicholas (Adele Achem) | female | 14.0 | 1 | 0 | 237736 | 30.0708 | NaN | C |
10 | 1 | 3 | Sandstrom, Miss. Marguerite Rut | female | 4.0 | 1 | 1 | PP 9549 | 16.7000 | G6 | S |
11 | 1 | 1 | Bonnell, Miss. Elizabeth | female | 58.0 | 0 | 0 | 113783 | 26.5500 | C103 | S |
12 | 0 | 3 | Saundercock, Mr. William Henry | male | 20.0 | 0 | 0 | A/5. 2151 | 8.0500 | NaN | S |
13 | 0 | 3 | Andersson, Mr. Anders Johan | male | 39.0 | 1 | 5 | 347082 | 31.2750 | NaN | S |
14 | 0 | 3 | Vestrom, Miss. Hulda Amanda Adolfina | female | 14.0 | 0 | 0 | 350406 | 7.8542 | NaN | S |
15 | 1 | 2 | Hewlett, Mrs. (Mary D Kingcome) | female | 55.0 | 0 | 0 | 248706 | 16.0000 | NaN | S |
16 | 0 | 3 | Rice, Master. Eugene | male | 2.0 | 4 | 1 | 382652 | 29.1250 | NaN | Q |
17 | 1 | 2 | Williams, Mr. Charles Eugene | male | NaN | 0 | 0 | 244373 | 13.0000 | NaN | S |
18 | 0 | 3 | Vander Planke, Mrs. Julius (Emelia Maria Vande... | female | 31.0 | 1 | 0 | 345763 | 18.0000 | NaN | S |
19 | 1 | 3 | Masselmani, Mrs. Fatima | female | NaN | 0 | 0 | 2649 | 7.2250 | NaN | C |
20 | 0 | 2 | Fynney, Mr. Joseph J | male | 35.0 | 0 | 0 | 239865 | 26.0000 | NaN | S |
21 | 1 | 2 | Beesley, Mr. Lawrence | male | 34.0 | 0 | 0 | 248698 | 13.0000 | D56 | S |
22 | 1 | 3 | McGowan, Miss. Anna "Annie" | female | 15.0 | 0 | 0 | 330923 | 8.0292 | NaN | Q |
23 | 1 | 1 | Sloper, Mr. William Thompson | male | 28.0 | 0 | 0 | 113788 | 35.5000 | A6 | S |
24 | 0 | 3 | Palsson, Miss. Torborg Danira | female | 8.0 | 3 | 1 | 349909 | 21.0750 | NaN | S |
25 | 1 | 3 | Asplund, Mrs. Carl Oscar (Selma Augusta Emilia... | female | 38.0 | 1 | 5 | 347077 | 31.3875 | NaN | S |
26 | 0 | 3 | Emir, Mr. Farred Chehab | male | NaN | 0 | 0 | 2631 | 7.2250 | NaN | C |
27 | 0 | 1 | Fortune, Mr. Charles Alexander | male | 19.0 | 3 | 2 | 19950 | 263.0000 | C23 C25 C27 | S |
28 | 1 | 3 | O'Dwyer, Miss. Ellen "Nellie" | female | NaN | 0 | 0 | 330959 | 7.8792 | NaN | Q |
29 | 0 | 3 | Todoroff, Mr. Lalio | male | NaN | 0 | 0 | 349216 | 7.8958 | NaN | S |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
861 | 0 | 2 | Giles, Mr. Frederick Edward | male | 21.0 | 1 | 0 | 28134 | 11.5000 | NaN | S |
862 | 1 | 1 | Swift, Mrs. Frederick Joel (Margaret Welles Ba... | female | 48.0 | 0 | 0 | 17466 | 25.9292 | D17 | S |
863 | 0 | 3 | Sage, Miss. Dorothy Edith "Dolly" | female | NaN | 8 | 2 | CA. 2343 | 69.5500 | NaN | S |
864 | 0 | 2 | Gill, Mr. John William | male | 24.0 | 0 | 0 | 233866 | 13.0000 | NaN | S |
865 | 1 | 2 | Bystrom, Mrs. (Karolina) | female | 42.0 | 0 | 0 | 236852 | 13.0000 | NaN | S |
866 | 1 | 2 | Duran y More, Miss. Asuncion | female | 27.0 | 1 | 0 | SC/PARIS 2149 | 13.8583 | NaN | C |
867 | 0 | 1 | Roebling, Mr. Washington Augustus II | male | 31.0 | 0 | 0 | PC 17590 | 50.4958 | A24 | S |
868 | 0 | 3 | van Melkebeke, Mr. Philemon | male | NaN | 0 | 0 | 345777 | 9.5000 | NaN | S |
869 | 1 | 3 | Johnson, Master. Harold Theodor | male | 4.0 | 1 | 1 | 347742 | 11.1333 | NaN | S |
870 | 0 | 3 | Balkic, Mr. Cerin | male | 26.0 | 0 | 0 | 349248 | 7.8958 | NaN | S |
871 | 1 | 1 | Beckwith, Mrs. Richard Leonard (Sallie Monypeny) | female | 47.0 | 1 | 1 | 11751 | 52.5542 | D35 | S |
872 | 0 | 1 | Carlsson, Mr. Frans Olof | male | 33.0 | 0 | 0 | 695 | 5.0000 | B51 B53 B55 | S |
873 | 0 | 3 | Vander Cruyssen, Mr. Victor | male | 47.0 | 0 | 0 | 345765 | 9.0000 | NaN | S |
874 | 1 | 2 | Abelson, Mrs. Samuel (Hannah Wizosky) | female | 28.0 | 1 | 0 | P/PP 3381 | 24.0000 | NaN | C |
875 | 1 | 3 | Najib, Miss. Adele Kiamie "Jane" | female | 15.0 | 0 | 0 | 2667 | 7.2250 | NaN | C |
876 | 0 | 3 | Gustafsson, Mr. Alfred Ossian | male | 20.0 | 0 | 0 | 7534 | 9.8458 | NaN | S |
877 | 0 | 3 | Petroff, Mr. Nedelio | male | 19.0 | 0 | 0 | 349212 | 7.8958 | NaN | S |
878 | 0 | 3 | Laleff, Mr. Kristo | male | NaN | 0 | 0 | 349217 | 7.8958 | NaN | S |
879 | 1 | 1 | Potter, Mrs. Thomas Jr (Lily Alexenia Wilson) | female | 56.0 | 0 | 1 | 11767 | 83.1583 | C50 | C |
880 | 1 | 2 | Shelley, Mrs. William (Imanita Parrish Hall) | female | 25.0 | 0 | 1 | 230433 | 26.0000 | NaN | S |
881 | 0 | 3 | Markun, Mr. Johann | male | 33.0 | 0 | 0 | 349257 | 7.8958 | NaN | S |
882 | 0 | 3 | Dahlberg, Miss. Gerda Ulrika | female | 22.0 | 0 | 0 | 7552 | 10.5167 | NaN | S |
883 | 0 | 2 | Banfield, Mr. Frederick James | male | 28.0 | 0 | 0 | C.A./SOTON 34068 | 10.5000 | NaN | S |
884 | 0 | 3 | Sutehall, Mr. Henry Jr | male | 25.0 | 0 | 0 | SOTON/OQ 392076 | 7.0500 | NaN | S |
885 | 0 | 3 | Rice, Mrs. William (Margaret Norton) | female | 39.0 | 0 | 5 | 382652 | 29.1250 | NaN | Q |
886 | 0 | 2 | Montvila, Rev. Juozas | male | 27.0 | 0 | 0 | 211536 | 13.0000 | NaN | S |
887 | 1 | 1 | Graham, Miss. Margaret Edith | female | 19.0 | 0 | 0 | 112053 | 30.0000 | B42 | S |
888 | 0 | 3 | Johnston, Miss. Catherine Helen "Carrie" | female | NaN | 1 | 2 | W./C. 6607 | 23.4500 | NaN | S |
889 | 1 | 1 | Behr, Mr. Karl Howell | male | 26.0 | 0 | 0 | 111369 | 30.0000 | C148 | C |
890 | 0 | 3 | Dooley, Mr. Patrick | male | 32.0 | 0 | 0 | 370376 | 7.7500 | NaN | Q |
891 rows × 11 columns
sample_age.dropna(subset = ['cabin']) # if cabin has na value --> remove the row
survived | pclass | name | sex | age | sibsp | parch | ticket | fare | cabin | embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
1 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | female | 38.0 | 1 | 0 | PC 17599 | 71.2833 | C85 | C |
3 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | S |
6 | 0 | 1 | McCarthy, Mr. Timothy J | male | 54.0 | 0 | 0 | 17463 | 51.8625 | E46 | S |
10 | 1 | 3 | Sandstrom, Miss. Marguerite Rut | female | 4.0 | 1 | 1 | PP 9549 | 16.7000 | G6 | S |
11 | 1 | 1 | Bonnell, Miss. Elizabeth | female | 58.0 | 0 | 0 | 113783 | 26.5500 | C103 | S |
21 | 1 | 2 | Beesley, Mr. Lawrence | male | 34.0 | 0 | 0 | 248698 | 13.0000 | D56 | S |
23 | 1 | 1 | Sloper, Mr. William Thompson | male | 28.0 | 0 | 0 | 113788 | 35.5000 | A6 | S |
27 | 0 | 1 | Fortune, Mr. Charles Alexander | male | 19.0 | 3 | 2 | 19950 | 263.0000 | C23 C25 C27 | S |
31 | 1 | 1 | Spencer, Mrs. William Augustus (Marie Eugenie) | female | NaN | 1 | 0 | PC 17569 | 146.5208 | B78 | C |
52 | 1 | 1 | Harper, Mrs. Henry Sleeper (Myna Haxtun) | female | 49.0 | 1 | 0 | PC 17572 | 76.7292 | D33 | C |
54 | 0 | 1 | Ostby, Mr. Engelhart Cornelius | male | 65.0 | 0 | 1 | 113509 | 61.9792 | B30 | C |
55 | 1 | 1 | Woolner, Mr. Hugh | male | NaN | 0 | 0 | 19947 | 35.5000 | C52 | S |
61 | 1 | 1 | Icard, Miss. Amelie | female | 38.0 | 0 | 0 | 113572 | 80.0000 | B28 | NaN |
62 | 0 | 1 | Harris, Mr. Henry Birkhardt | male | 45.0 | 1 | 0 | 36973 | 83.4750 | C83 | S |
66 | 1 | 2 | Nye, Mrs. (Elizabeth Ramell) | female | 29.0 | 0 | 0 | C.A. 29395 | 10.5000 | F33 | S |
75 | 0 | 3 | Moen, Mr. Sigurd Hansen | male | 25.0 | 0 | 0 | 348123 | 7.6500 | F G73 | S |
88 | 1 | 1 | Fortune, Miss. Mabel Helen | female | 23.0 | 3 | 2 | 19950 | 263.0000 | C23 C25 C27 | S |
92 | 0 | 1 | Chaffee, Mr. Herbert Fuller | male | 46.0 | 1 | 0 | W.E.P. 5734 | 61.1750 | E31 | S |
96 | 0 | 1 | Goldschmidt, Mr. George B | male | 71.0 | 0 | 0 | PC 17754 | 34.6542 | A5 | C |
97 | 1 | 1 | Greenfield, Mr. William Bertram | male | 23.0 | 0 | 1 | PC 17759 | 63.3583 | D10 D12 | C |
102 | 0 | 1 | White, Mr. Richard Frasar | male | 21.0 | 0 | 1 | 35281 | 77.2875 | D26 | S |
110 | 0 | 1 | Porter, Mr. Walter Chamberlain | male | 47.0 | 0 | 0 | 110465 | 52.0000 | C110 | S |
118 | 0 | 1 | Baxter, Mr. Quigg Edmond | male | 24.0 | 0 | 1 | PC 17558 | 247.5208 | B58 B60 | C |
123 | 1 | 2 | Webber, Miss. Susan | female | 32.5 | 0 | 0 | 27267 | 13.0000 | E101 | S |
124 | 0 | 1 | White, Mr. Percival Wayland | male | 54.0 | 0 | 1 | 35281 | 77.2875 | D26 | S |
128 | 1 | 3 | Peter, Miss. Anna | female | NaN | 1 | 1 | 2668 | 22.3583 | F E69 | C |
136 | 1 | 1 | Newsom, Miss. Helen Monypeny | female | 19.0 | 0 | 2 | 11752 | 26.2833 | D47 | S |
137 | 0 | 1 | Futrelle, Mr. Jacques Heath | male | 37.0 | 1 | 0 | 113803 | 53.1000 | C123 | S |
139 | 0 | 1 | Giglio, Mr. Victor | male | 24.0 | 0 | 0 | PC 17593 | 79.2000 | B86 | C |
148 | 0 | 2 | Navratil, Mr. Michel ("Louis M Hoffman") | male | 36.5 | 0 | 2 | 230080 | 26.0000 | F2 | S |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
751 | 1 | 3 | Moor, Master. Meier | male | 6.0 | 0 | 1 | 392096 | 12.4750 | E121 | S |
759 | 1 | 1 | Rothes, the Countess. of (Lucy Noel Martha Dye... | female | 33.0 | 0 | 0 | 110152 | 86.5000 | B77 | S |
763 | 1 | 1 | Carter, Mrs. William Ernest (Lucile Polk) | female | 36.0 | 1 | 2 | 113760 | 120.0000 | B96 B98 | S |
765 | 1 | 1 | Hogeboom, Mrs. John C (Anna Andrews) | female | 51.0 | 1 | 0 | 13502 | 77.9583 | D11 | S |
772 | 0 | 2 | Mack, Mrs. (Mary) | female | 57.0 | 0 | 0 | S.O./P.P. 3 | 10.5000 | E77 | S |
776 | 0 | 3 | Tobin, Mr. Roger | male | NaN | 0 | 0 | 383121 | 7.7500 | F38 | Q |
779 | 1 | 1 | Robert, Mrs. Edward Scott (Elisabeth Walton Mc... | female | 43.0 | 0 | 1 | 24160 | 211.3375 | B3 | S |
781 | 1 | 1 | Dick, Mrs. Albert Adrian (Vera Gillespie) | female | 17.0 | 1 | 0 | 17474 | 57.0000 | B20 | S |
782 | 0 | 1 | Long, Mr. Milton Clyde | male | 29.0 | 0 | 0 | 113501 | 30.0000 | D6 | S |
789 | 0 | 1 | Guggenheim, Mr. Benjamin | male | 46.0 | 0 | 0 | PC 17593 | 79.2000 | B82 B84 | C |
796 | 1 | 1 | Leader, Dr. Alice (Farnham) | female | 49.0 | 0 | 0 | 17465 | 25.9292 | D17 | S |
802 | 1 | 1 | Carter, Master. William Thornton II | male | 11.0 | 1 | 2 | 113760 | 120.0000 | B96 B98 | S |
806 | 0 | 1 | Andrews, Mr. Thomas Jr | male | 39.0 | 0 | 0 | 112050 | 0.0000 | A36 | S |
809 | 1 | 1 | Chambers, Mrs. Norman Campbell (Bertha Griggs) | female | 33.0 | 1 | 0 | 113806 | 53.1000 | E8 | S |
815 | 0 | 1 | Fry, Mr. Richard | male | NaN | 0 | 0 | 112058 | 0.0000 | B102 | S |
820 | 1 | 1 | Hays, Mrs. Charles Melville (Clara Jennings Gr... | female | 52.0 | 1 | 1 | 12749 | 93.5000 | B69 | S |
823 | 1 | 3 | Moor, Mrs. (Beila) | female | 27.0 | 0 | 1 | 392096 | 12.4750 | E121 | S |
829 | 1 | 1 | Stone, Mrs. George Nelson (Martha Evelyn) | female | 62.0 | 0 | 0 | 113572 | 80.0000 | B28 | NaN |
835 | 1 | 1 | Compton, Miss. Sara Rebecca | female | 39.0 | 1 | 1 | PC 17756 | 83.1583 | E49 | C |
839 | 1 | 1 | Marechal, Mr. Pierre | male | NaN | 0 | 0 | 11774 | 29.7000 | C47 | C |
849 | 1 | 1 | Goldenberg, Mrs. Samuel L (Edwiga Grabowska) | female | NaN | 1 | 0 | 17453 | 89.1042 | C92 | C |
853 | 1 | 1 | Lines, Miss. Mary Conover | female | 16.0 | 0 | 1 | PC 17592 | 39.4000 | D28 | S |
857 | 1 | 1 | Daly, Mr. Peter Denis | male | 51.0 | 0 | 0 | 113055 | 26.5500 | E17 | S |
862 | 1 | 1 | Swift, Mrs. Frederick Joel (Margaret Welles Ba... | female | 48.0 | 0 | 0 | 17466 | 25.9292 | D17 | S |
867 | 0 | 1 | Roebling, Mr. Washington Augustus II | male | 31.0 | 0 | 0 | PC 17590 | 50.4958 | A24 | S |
871 | 1 | 1 | Beckwith, Mrs. Richard Leonard (Sallie Monypeny) | female | 47.0 | 1 | 1 | 11751 | 52.5542 | D35 | S |
872 | 0 | 1 | Carlsson, Mr. Frans Olof | male | 33.0 | 0 | 0 | 695 | 5.0000 | B51 B53 B55 | S |
879 | 1 | 1 | Potter, Mrs. Thomas Jr (Lily Alexenia Wilson) | female | 56.0 | 0 | 1 | 11767 | 83.1583 | C50 | C |
887 | 1 | 1 | Graham, Miss. Margaret Edith | female | 19.0 | 0 | 0 | 112053 | 30.0000 | B42 | S |
889 | 1 | 1 | Behr, Mr. Karl Howell | male | 26.0 | 0 | 0 | 111369 | 30.0000 | C148 | C |
204 rows × 11 columns
sample_age.drop_duplicates()
# how to check if we have no duplicates
# sample_age.drop_duplicates().info() == sample_age.info()
survived | pclass | name | sex | age | sibsp | parch | ticket | fare | cabin | embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 3 | Braund, Mr. Owen Harris | male | 22.0 | 1 | 0 | A/5 21171 | 7.2500 | NaN | S |
1 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | female | 38.0 | 1 | 0 | PC 17599 | 71.2833 | C85 | C |
2 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S |
3 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | S |
4 | 0 | 3 | Allen, Mr. William Henry | male | 35.0 | 0 | 0 | 373450 | 8.0500 | NaN | S |
5 | 0 | 3 | Moran, Mr. James | male | NaN | 0 | 0 | 330877 | 8.4583 | NaN | Q |
6 | 0 | 1 | McCarthy, Mr. Timothy J | male | 54.0 | 0 | 0 | 17463 | 51.8625 | E46 | S |
7 | 0 | 3 | Palsson, Master. Gosta Leonard | male | 2.0 | 3 | 1 | 349909 | 21.0750 | NaN | S |
8 | 1 | 3 | Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) | female | 27.0 | 0 | 2 | 347742 | 11.1333 | NaN | S |
9 | 1 | 2 | Nasser, Mrs. Nicholas (Adele Achem) | female | 14.0 | 1 | 0 | 237736 | 30.0708 | NaN | C |
10 | 1 | 3 | Sandstrom, Miss. Marguerite Rut | female | 4.0 | 1 | 1 | PP 9549 | 16.7000 | G6 | S |
11 | 1 | 1 | Bonnell, Miss. Elizabeth | female | 58.0 | 0 | 0 | 113783 | 26.5500 | C103 | S |
12 | 0 | 3 | Saundercock, Mr. William Henry | male | 20.0 | 0 | 0 | A/5. 2151 | 8.0500 | NaN | S |
13 | 0 | 3 | Andersson, Mr. Anders Johan | male | 39.0 | 1 | 5 | 347082 | 31.2750 | NaN | S |
14 | 0 | 3 | Vestrom, Miss. Hulda Amanda Adolfina | female | 14.0 | 0 | 0 | 350406 | 7.8542 | NaN | S |
15 | 1 | 2 | Hewlett, Mrs. (Mary D Kingcome) | female | 55.0 | 0 | 0 | 248706 | 16.0000 | NaN | S |
16 | 0 | 3 | Rice, Master. Eugene | male | 2.0 | 4 | 1 | 382652 | 29.1250 | NaN | Q |
17 | 1 | 2 | Williams, Mr. Charles Eugene | male | NaN | 0 | 0 | 244373 | 13.0000 | NaN | S |
18 | 0 | 3 | Vander Planke, Mrs. Julius (Emelia Maria Vande... | female | 31.0 | 1 | 0 | 345763 | 18.0000 | NaN | S |
19 | 1 | 3 | Masselmani, Mrs. Fatima | female | NaN | 0 | 0 | 2649 | 7.2250 | NaN | C |
20 | 0 | 2 | Fynney, Mr. Joseph J | male | 35.0 | 0 | 0 | 239865 | 26.0000 | NaN | S |
21 | 1 | 2 | Beesley, Mr. Lawrence | male | 34.0 | 0 | 0 | 248698 | 13.0000 | D56 | S |
22 | 1 | 3 | McGowan, Miss. Anna "Annie" | female | 15.0 | 0 | 0 | 330923 | 8.0292 | NaN | Q |
23 | 1 | 1 | Sloper, Mr. William Thompson | male | 28.0 | 0 | 0 | 113788 | 35.5000 | A6 | S |
24 | 0 | 3 | Palsson, Miss. Torborg Danira | female | 8.0 | 3 | 1 | 349909 | 21.0750 | NaN | S |
25 | 1 | 3 | Asplund, Mrs. Carl Oscar (Selma Augusta Emilia... | female | 38.0 | 1 | 5 | 347077 | 31.3875 | NaN | S |
26 | 0 | 3 | Emir, Mr. Farred Chehab | male | NaN | 0 | 0 | 2631 | 7.2250 | NaN | C |
27 | 0 | 1 | Fortune, Mr. Charles Alexander | male | 19.0 | 3 | 2 | 19950 | 263.0000 | C23 C25 C27 | S |
28 | 1 | 3 | O'Dwyer, Miss. Ellen "Nellie" | female | NaN | 0 | 0 | 330959 | 7.8792 | NaN | Q |
29 | 0 | 3 | Todoroff, Mr. Lalio | male | NaN | 0 | 0 | 349216 | 7.8958 | NaN | S |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
861 | 0 | 2 | Giles, Mr. Frederick Edward | male | 21.0 | 1 | 0 | 28134 | 11.5000 | NaN | S |
862 | 1 | 1 | Swift, Mrs. Frederick Joel (Margaret Welles Ba... | female | 48.0 | 0 | 0 | 17466 | 25.9292 | D17 | S |
863 | 0 | 3 | Sage, Miss. Dorothy Edith "Dolly" | female | NaN | 8 | 2 | CA. 2343 | 69.5500 | NaN | S |
864 | 0 | 2 | Gill, Mr. John William | male | 24.0 | 0 | 0 | 233866 | 13.0000 | NaN | S |
865 | 1 | 2 | Bystrom, Mrs. (Karolina) | female | 42.0 | 0 | 0 | 236852 | 13.0000 | NaN | S |
866 | 1 | 2 | Duran y More, Miss. Asuncion | female | 27.0 | 1 | 0 | SC/PARIS 2149 | 13.8583 | NaN | C |
867 | 0 | 1 | Roebling, Mr. Washington Augustus II | male | 31.0 | 0 | 0 | PC 17590 | 50.4958 | A24 | S |
868 | 0 | 3 | van Melkebeke, Mr. Philemon | male | NaN | 0 | 0 | 345777 | 9.5000 | NaN | S |
869 | 1 | 3 | Johnson, Master. Harold Theodor | male | 4.0 | 1 | 1 | 347742 | 11.1333 | NaN | S |
870 | 0 | 3 | Balkic, Mr. Cerin | male | 26.0 | 0 | 0 | 349248 | 7.8958 | NaN | S |
871 | 1 | 1 | Beckwith, Mrs. Richard Leonard (Sallie Monypeny) | female | 47.0 | 1 | 1 | 11751 | 52.5542 | D35 | S |
872 | 0 | 1 | Carlsson, Mr. Frans Olof | male | 33.0 | 0 | 0 | 695 | 5.0000 | B51 B53 B55 | S |
873 | 0 | 3 | Vander Cruyssen, Mr. Victor | male | 47.0 | 0 | 0 | 345765 | 9.0000 | NaN | S |
874 | 1 | 2 | Abelson, Mrs. Samuel (Hannah Wizosky) | female | 28.0 | 1 | 0 | P/PP 3381 | 24.0000 | NaN | C |
875 | 1 | 3 | Najib, Miss. Adele Kiamie "Jane" | female | 15.0 | 0 | 0 | 2667 | 7.2250 | NaN | C |
876 | 0 | 3 | Gustafsson, Mr. Alfred Ossian | male | 20.0 | 0 | 0 | 7534 | 9.8458 | NaN | S |
877 | 0 | 3 | Petroff, Mr. Nedelio | male | 19.0 | 0 | 0 | 349212 | 7.8958 | NaN | S |
878 | 0 | 3 | Laleff, Mr. Kristo | male | NaN | 0 | 0 | 349217 | 7.8958 | NaN | S |
879 | 1 | 1 | Potter, Mrs. Thomas Jr (Lily Alexenia Wilson) | female | 56.0 | 0 | 1 | 11767 | 83.1583 | C50 | C |
880 | 1 | 2 | Shelley, Mrs. William (Imanita Parrish Hall) | female | 25.0 | 0 | 1 | 230433 | 26.0000 | NaN | S |
881 | 0 | 3 | Markun, Mr. Johann | male | 33.0 | 0 | 0 | 349257 | 7.8958 | NaN | S |
882 | 0 | 3 | Dahlberg, Miss. Gerda Ulrika | female | 22.0 | 0 | 0 | 7552 | 10.5167 | NaN | S |
883 | 0 | 2 | Banfield, Mr. Frederick James | male | 28.0 | 0 | 0 | C.A./SOTON 34068 | 10.5000 | NaN | S |
884 | 0 | 3 | Sutehall, Mr. Henry Jr | male | 25.0 | 0 | 0 | SOTON/OQ 392076 | 7.0500 | NaN | S |
885 | 0 | 3 | Rice, Mrs. William (Margaret Norton) | female | 39.0 | 0 | 5 | 382652 | 29.1250 | NaN | Q |
886 | 0 | 2 | Montvila, Rev. Juozas | male | 27.0 | 0 | 0 | 211536 | 13.0000 | NaN | S |
887 | 1 | 1 | Graham, Miss. Margaret Edith | female | 19.0 | 0 | 0 | 112053 | 30.0000 | B42 | S |
888 | 0 | 3 | Johnston, Miss. Catherine Helen "Carrie" | female | NaN | 1 | 2 | W./C. 6607 | 23.4500 | NaN | S |
889 | 1 | 1 | Behr, Mr. Karl Howell | male | 26.0 | 0 | 0 | 111369 | 30.0000 | C148 | C |
890 | 0 | 3 | Dooley, Mr. Patrick | male | 32.0 | 0 | 0 | 370376 | 7.7500 | NaN | Q |
891 rows × 11 columns
pd.to_datetime(series)
df = pd.read_csv("superstore_dataset2011-2015.csv", encoding = 'ISO-8859-1')
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 51290 entries, 0 to 51289 Data columns (total 24 columns): Row ID 51290 non-null int64 Order ID 51290 non-null object Order Date 51290 non-null object Ship Date 51290 non-null object Ship Mode 51290 non-null object Customer ID 51290 non-null object Customer Name 51290 non-null object Segment 51290 non-null object City 51290 non-null object State 51290 non-null object Country 51290 non-null object Postal Code 9994 non-null float64 Market 51290 non-null object Region 51290 non-null object Product ID 51290 non-null object Category 51290 non-null object Sub-Category 51290 non-null object Product Name 51290 non-null object Sales 51290 non-null float64 Quantity 51290 non-null int64 Discount 51290 non-null float64 Profit 51290 non-null float64 Shipping Cost 51290 non-null float64 Order Priority 51290 non-null object dtypes: float64(5), int64(2), object(17) memory usage: 9.4+ MB
df.sample(2).T
20640 | 5100 | |
---|---|---|
Row ID | 26584 | 47068 |
Order ID | ID-2013-30544 | IR-2012-3550 |
Order Date | 13-06-2013 | 5/9/2012 |
Ship Date | 15-06-2013 | 10/9/2012 |
Ship Mode | First Class | Standard Class |
Customer ID | CL-12700 | TB-11400 |
Customer Name | Craig Leslie | Tom Boeckenhauer |
Segment | Home Office | Consumer |
City | Ho Chi Minh City | Khomeynishahr |
State | Ho Chí Minh City | Esfahan |
Country | Vietnam | Iran |
Postal Code | NaN | NaN |
Market | APAC | EMEA |
Region | Southeast Asia | EMEA |
Product ID | TEC-MA-10004298 | TEC-SAM-10001017 |
Category | Technology | Technology |
Sub-Category | Machines | Phones |
Product Name | Panasonic Calculator, Durable | Samsung Speaker Phone, Cordless |
Sales | 83.664 | 756.54 |
Quantity | 2 | 6 |
Discount | 0.17 | 0 |
Profit | -16.176 | 128.52 |
Shipping Cost | 11.38 | 82.55 |
Order Priority | High | High |
del df['Row ID']
df['Order Date'] = pd.to_datetime(df['Order Date'])
df['Ship Date'] = pd.to_datetime(df['Ship Date'])
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 51290 entries, 0 to 51289 Data columns (total 23 columns): Order ID 51290 non-null object Order Date 51290 non-null datetime64[ns] Ship Date 51290 non-null datetime64[ns] Ship Mode 51290 non-null object Customer ID 51290 non-null object Customer Name 51290 non-null object Segment 51290 non-null object City 51290 non-null object State 51290 non-null object Country 51290 non-null object Postal Code 9994 non-null float64 Market 51290 non-null object Region 51290 non-null object Product ID 51290 non-null object Category 51290 non-null object Sub-Category 51290 non-null object Product Name 51290 non-null object Sales 51290 non-null float64 Quantity 51290 non-null int64 Discount 51290 non-null float64 Profit 51290 non-null float64 Shipping Cost 51290 non-null float64 Order Priority 51290 non-null object dtypes: datetime64[ns](2), float64(5), int64(1), object(15) memory usage: 9.0+ MB
df['Order Weekday'] = df['Order Date'].dt.day_name()
df['Ship Weekday'] = df['Ship Date'].dt.day_name()
df['Order Month'] = df['Order Date'].dt.month
df['Ship Month'] = df['Ship Date'].dt.month
df['Order Year'] = df['Order Date'].dt.year
df['Ship Year'] = df['Ship Date'].dt.year
# to convert unixtime to readable time format
# df.convert_time = pd.to_datetime(df.game_action_ts, unit = 'ms')
df.sample(4)
Order ID | Order Date | Ship Date | Ship Mode | Customer ID | Customer Name | Segment | City | State | Country | ... | Discount | Profit | Shipping Cost | Order Priority | Order Weekday | Ship Weekday | Order Month | Ship Month | Order Year | Ship Year | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
34880 | IN-2014-20338 | 2014-09-21 | 2014-09-26 | Standard Class | VB-21745 | Victoria Brennan | Corporate | Manila | National Capital | Philippines | ... | 0.45 | 2.679 | 1.99 | Medium | Sunday | Friday | 9 | 9 | 2014 | 2014 |
3631 | MX-2012-163279 | 2012-01-06 | 2012-03-06 | Second Class | RD-19930 | Russell D'Ascenzo | Consumer | Ciego de Ávila | Ciego de Ávila | Cuba | ... | 0.00 | 2.800 | 12.53 | Critical | Friday | Tuesday | 1 | 3 | 2012 | 2012 |
11712 | ES-2013-5056342 | 2013-09-12 | 2013-12-15 | Standard Class | RD-19480 | Rick Duston | Consumer | Peterborough | England | United Kingdom | ... | 0.00 | 89.700 | 26.26 | Low | Thursday | Sunday | 9 | 12 | 2013 | 2013 |
50774 | TU-2014-9480 | 2014-07-31 | 2014-03-08 | First Class | SC-10050 | Sample Company A | Home Office | Istanbul | Istanbul | Turkey | ... | 0.60 | -7.596 | 1.40 | Medium | Thursday | Saturday | 7 | 3 | 2014 | 2014 |
4 rows × 29 columns
pd.pivot_table(data, values = 'col_name', index = 'col_name', columns = 'col_name', aggfunc = 'func')
temp = pd.pivot_table(df, values = 'Sales', index = 'Country', columns = 'Order Year', aggfunc = 'sum')
temp.sort_values(by = 'Country').head(10)
Order Year | 2011 | 2012 | 2013 | 2014 |
---|---|---|---|---|
Country | ||||
Afghanistan | 1729.41000 | 9071.820 | 4242.81000 | 6629.28000 |
Albania | 1707.54000 | 948.120 | 821.25000 | 411.21000 |
Algeria | 8539.80000 | 9288.990 | 5478.60000 | 12784.20000 |
Angola | 6416.91000 | 3172.980 | 7919.55000 | 8044.56000 |
Argentina | 8739.30512 | 7037.005 | 17955.92804 | 23779.54512 |
Armenia | 136.26000 | NaN | 20.49000 | NaN |
Australia | 137580.06000 | 203950.842 | 268971.84900 | 314733.10200 |
Austria | 21734.49000 | 18105.000 | 23178.00000 | 29521.56000 |
Azerbaijan | 802.92000 | 479.640 | 2692.98000 | 1655.97000 |
Bahrain | NaN | NaN | 669.18000 | NaN |
type(temp)
pandas.core.frame.DataFrame
str_as_value = pd.pivot_table(df, values = 'Customer ID', index = 'Order Weekday', aggfunc = pd.Series.nunique)
str_as_value.head(10)
Customer ID | |
---|---|
Order Weekday | |
Friday | 1310 |
Monday | 1290 |
Saturday | 1146 |
Sunday | 917 |
Thursday | 1299 |
Tuesday | 1335 |
Wednesday | 1310 |
temp3 = pd.pivot_table(df, values = ['Profit', 'Sales'], index = 'Order Weekday',
aggfunc = {'Profit':'sum',
'Sales':['sum', 'max']})
temp3
Profit | Sales | ||
---|---|---|---|
sum | max | sum | |
Order Weekday | |||
Friday | 243802.83544 | 22638.480 | 2.144812e+06 |
Monday | 246526.55710 | 13999.960 | 2.067053e+06 |
Saturday | 176486.55222 | 8749.950 | 1.433806e+06 |
Sunday | 104117.90698 | 17499.950 | 8.346382e+05 |
Thursday | 241183.07994 | 11199.968 | 2.057502e+06 |
Tuesday | 249788.05098 | 10499.970 | 2.150076e+06 |
Wednesday | 205552.30862 | 9892.740 | 1.954614e+06 |
df.groupby(by = 'col_name').agg(func)
# example
df = df.groupby('game_name').agg({'col_1':'sum', 'col_2':'sum'})
df_g = df.groupby(by = ['Country']).sum()
df_g.sample(5)
Postal Code | Sales | Quantity | Discount | Profit | Shipping Cost | Order Month | Ship Month | Order Year | Ship Year | |
---|---|---|---|---|---|---|---|---|---|---|
Country | ||||||||||
Burundi | 0.0 | 267.72000 | 4 | 0.000 | 103.08000 | 18.51 | 14 | 18 | 4023 | 4023 |
Central African Republic | 0.0 | 2377.56000 | 15 | 0.000 | 468.54000 | 403.38 | 32 | 16 | 14088 | 14088 |
Panama | 0.0 | 51539.92752 | 1426 | 157.644 | -17723.45248 | 5416.24 | 2795 | 2924 | 780994 | 781001 |
Bolivia | 0.0 | 11588.97000 | 174 | 0.202 | 2229.35000 | 1234.83 | 357 | 350 | 96615 | 96615 |
Slovakia | 0.0 | 865.32000 | 18 | 0.000 | 115.26000 | 86.10 | 31 | 46 | 16093 | 16093 |
df_g1 = df.groupby(by = ['Country', 'Order Year']).agg({
'Quantity': 'mean',
'Profit': 'mean',
'Sales': 'mean',
'Shipping Cost': 'mean'})
df_g1.head(10)
Quantity | Profit | Sales | Shipping Cost | ||
---|---|---|---|---|---|
Country | Order Year | ||||
Afghanistan | 2011 | 5.111111 | 32.660000 | 192.156667 | 18.357778 |
2012 | 4.066667 | 128.276000 | 604.788000 | 75.201333 | |
2013 | 3.750000 | 95.745000 | 353.567500 | 30.488333 | |
2014 | 4.000000 | 110.172632 | 348.909474 | 33.743684 | |
Albania | 2011 | 3.000000 | 53.508000 | 341.508000 | 61.636000 |
2012 | 1.600000 | 58.260000 | 189.624000 | 14.462000 | |
2013 | 2.000000 | 20.990000 | 273.750000 | 89.953333 | |
2014 | 3.666667 | 29.170000 | 137.070000 | 16.073333 | |
Algeria | 2011 | 2.806452 | 56.010000 | 275.477419 | 33.336774 |
2012 | 2.282609 | 48.218478 | 201.934565 | 18.865652 |
df.sort_values(by = 'col_name', ascending = True)
df_g1.sort_values(by = 'Profit', ascending = False).head(5)
Quantity | Profit | Sales | Shipping Cost | ||
---|---|---|---|---|---|
Country | Order Year | ||||
Lesotho | 2013 | 12.00 | 455.76 | 3799.0800 | 369.730000 |
Montenegro | 2014 | 3.25 | 321.69 | 1001.0925 | 93.937500 |
Slovenia | 2014 | 9.00 | 269.73 | 658.2600 | 89.095000 |
Estonia | 2012 | 2.50 | 245.10 | 941.2800 | 82.495000 |
Mauritania | 2013 | 4.00 | 212.74 | 504.3000 | 66.823333 |
df1 = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],
'B': ['B0', 'B1', 'B2', 'B3'],
'C': ['C0', 'C1', 'C2', 'C3'],
'D': ['D0', 'D1', 'D2', 'D3']},
index=[0, 1, 2, 3])
df2 = pd.DataFrame({'A': ['A4', 'A5', 'A6', 'A7'],
'B': ['B4', 'B5', 'B6', 'B7'],
'C': ['C4', 'C5', 'C6', 'C7'],
'D': ['D4', 'D5', 'D6', 'D7']},
index=[4, 5, 6, 7])
df3 = pd.DataFrame({'A': ['A8', 'A9', 'A10', 'A11'],
'B': ['B8', 'B9', 'B10', 'B11'],
'C': ['C8', 'C9', 'C10', 'C11'],
'D': ['D8', 'D9', 'D10', 'D11']},
index=[8, 9, 10, 11])
df4 = pd.DataFrame({'A': ['A8', 'A9', 'A10', 'A11'],
'B': ['B8', 'B9', 'B10', 'B11'],
'C': ['C8', 'C9', 'C10', 'C11'],
'D': ['D8', 'D9', 'D10', 'D11']},
index=[0, 1, 2, 3])
df1
A | B | C | D | |
---|---|---|---|---|
0 | A0 | B0 | C0 | D0 |
1 | A1 | B1 | C1 | D1 |
2 | A2 | B2 | C2 | D2 |
3 | A3 | B3 | C3 | D3 |
df3
A | B | C | D | |
---|---|---|---|---|
8 | A8 | B8 | C8 | D8 |
9 | A9 | B9 | C9 | D9 |
10 | A10 | B10 | C10 | D10 |
11 | A11 | B11 | C11 | D11 |
pd.concat([df1, df3])
A | B | C | D | |
---|---|---|---|---|
0 | A0 | B0 | C0 | D0 |
1 | A1 | B1 | C1 | D1 |
2 | A2 | B2 | C2 | D2 |
3 | A3 | B3 | C3 | D3 |
8 | A8 | B8 | C8 | D8 |
9 | A9 | B9 | C9 | D9 |
10 | A10 | B10 | C10 | D10 |
11 | A11 | B11 | C11 | D11 |
pd.concat([df1, df3], axis = 1)
A | B | C | D | A | B | C | D | |
---|---|---|---|---|---|---|---|---|
0 | A0 | B0 | C0 | D0 | NaN | NaN | NaN | NaN |
1 | A1 | B1 | C1 | D1 | NaN | NaN | NaN | NaN |
2 | A2 | B2 | C2 | D2 | NaN | NaN | NaN | NaN |
3 | A3 | B3 | C3 | D3 | NaN | NaN | NaN | NaN |
8 | NaN | NaN | NaN | NaN | A8 | B8 | C8 | D8 |
9 | NaN | NaN | NaN | NaN | A9 | B9 | C9 | D9 |
10 | NaN | NaN | NaN | NaN | A10 | B10 | C10 | D10 |
11 | NaN | NaN | NaN | NaN | A11 | B11 | C11 | D11 |
df4
A | B | C | D | |
---|---|---|---|---|
0 | A8 | B8 | C8 | D8 |
1 | A9 | B9 | C9 | D9 |
2 | A10 | B10 | C10 | D10 |
3 | A11 | B11 | C11 | D11 |
pd.concat([df1, df4])
A | B | C | D | |
---|---|---|---|---|
0 | A0 | B0 | C0 | D0 |
1 | A1 | B1 | C1 | D1 |
2 | A2 | B2 | C2 | D2 |
3 | A3 | B3 | C3 | D3 |
0 | A8 | B8 | C8 | D8 |
1 | A9 | B9 | C9 | D9 |
2 | A10 | B10 | C10 | D10 |
3 | A11 | B11 | C11 | D11 |
pd.concat([df1, df4], axis = 1)
A | B | C | D | A | B | C | D | |
---|---|---|---|---|---|---|---|---|
0 | A0 | B0 | C0 | D0 | A8 | B8 | C8 | D8 |
1 | A1 | B1 | C1 | D1 | A9 | B9 | C9 | D9 |
2 | A2 | B2 | C2 | D2 | A10 | B10 | C10 | D10 |
3 | A3 | B3 | C3 | D3 | A11 | B11 | C11 | D11 |
df1.join(df2, how = 'left', on = 'col_name')
df1.merge(df2, how = 'left', left_on = 'col_name', right_on = 'col_name')
df1.join(df2, how = 'left', on = 'col_name', sort = False).reset_index()
left = pd.DataFrame({'key1': ['K0', 'K0', 'K1', 'K2'],
'key2': ['K0', 'K1', 'K0', 'K1'],
'A': ['A0', 'A1', 'A2', 'A3'],
'B': ['B0', 'B1', 'B2', 'B3']})
right = pd.DataFrame({'key1': ['K0', 'K1', 'K1', 'K2'],
'key2': ['K0', 'K0', 'K0', 'K0'],
'C': ['C0', 'C1', 'C2', 'C3'],
'D': ['D0', 'D1', 'D2', 'D3']})
left
key1 | key2 | A | B | |
---|---|---|---|---|
0 | K0 | K0 | A0 | B0 |
1 | K0 | K1 | A1 | B1 |
2 | K1 | K0 | A2 | B2 |
3 | K2 | K1 | A3 | B3 |
right
key1 | key2 | C | D | |
---|---|---|---|---|
0 | K0 | K0 | C0 | D0 |
1 | K1 | K0 | C1 | D1 |
2 | K1 | K0 | C2 | D2 |
3 | K2 | K0 | C3 | D3 |
left.merge(right, on = ['key1', 'key2'], how = 'left')
key1 | key2 | A | B | C | D | |
---|---|---|---|---|---|---|
0 | K0 | K0 | A0 | B0 | C0 | D0 |
1 | K0 | K1 | A1 | B1 | NaN | NaN |
2 | K1 | K0 | A2 | B2 | C1 | D1 |
3 | K1 | K0 | A2 | B2 | C2 | D2 |
4 | K2 | K1 | A3 | B3 | NaN | NaN |
left.merge(right, on = ['key1', 'key2'], how = 'left')
key1 | key2 | A | B | C | D | |
---|---|---|---|---|---|---|
0 | K0 | K0 | A0 | B0 | C0 | D0 |
1 | K0 | K1 | A1 | B1 | NaN | NaN |
2 | K1 | K0 | A2 | B2 | C1 | D1 |
3 | K1 | K0 | A2 | B2 | C2 | D2 |
4 | K2 | K1 | A3 | B3 | NaN | NaN |
left.join(right, how = 'left', lsuffix = '_l', rsuffix = '_r')
key1_l | key2_l | A | B | key1_r | key2_r | C | D | |
---|---|---|---|---|---|---|---|---|
0 | K0 | K0 | A0 | B0 | K0 | K0 | C0 | D0 |
1 | K0 | K1 | A1 | B1 | K1 | K0 | C1 | D1 |
2 | K1 | K0 | A2 | B2 | K1 | K0 | C2 | D2 |
3 | K2 | K1 | A3 | B3 | K2 | K0 | C3 | D3 |
left = left.set_index(['key1', 'key2'])
right = right.set_index(['key1', 'key2'])
left
A | B | ||
---|---|---|---|
key1 | key2 | ||
K0 | K0 | A0 | B0 |
K1 | A1 | B1 | |
K1 | K0 | A2 | B2 |
K2 | K1 | A3 | B3 |
right
C | D | ||
---|---|---|---|
key1 | key2 | ||
K0 | K0 | C0 | D0 |
K1 | K0 | C1 | D1 |
K0 | C2 | D2 | |
K2 | K0 | C3 | D3 |
# join can only by index, if have repeated column name, need r/lsuffix for rename
left.join(right, lsuffix = '_l', rsuffix = '_r')
A | B | C | D | ||
---|---|---|---|---|---|
key1 | key2 | ||||
K0 | K0 | A0 | B0 | C0 | D0 |
K1 | A1 | B1 | NaN | NaN | |
K1 | K0 | A2 | B2 | C1 | D1 |
K0 | A2 | B2 | C2 | D2 | |
K2 | K1 | A3 | B3 | NaN | NaN |
left.merge(right, on = ['key1', 'key2'], how = 'left')
A | B | C | D | ||
---|---|---|---|---|---|
key1 | key2 | ||||
K0 | K0 | A0 | B0 | C0 | D0 |
K1 | A1 | B1 | NaN | NaN | |
K1 | K0 | A2 | B2 | C1 | D1 |
K0 | A2 | B2 | C2 | D2 | |
K2 | K1 | A3 | B3 | NaN | NaN |
left = left.reset_index()
left
key1 | key2 | A | B | |
---|---|---|---|---|
0 | K0 | K0 | A0 | B0 |
1 | K0 | K1 | A1 | B1 |
2 | K1 | K0 | A2 | B2 |
3 | K2 | K1 | A3 | B3 |
df.to_csv(path)
df.to_excel(path)
df.to_json(path)
save_path = 'tutorial_left.csv'
left.to_csv(save_path)
save_path = 'tutorial_left.xlsx'
left.to_excel(save_path, sheet_name = 'left')