Skip to content
Snippets Groups Projects
Commit 31f42551 authored by Andrii Skyba's avatar Andrii Skyba
Browse files

lab3

parent e35766aa
No related branches found
No related tags found
No related merge requests found
%% Cell type:code id:24359ac1-37a0-4e08-aa35-b787d396aeef tags:
``` python
(23 + 4) % 5 + 1
```
%% Output
3
%% Cell type:code id:3f35943f-cf4d-486c-9459-85227daab9cd tags:
``` python
import pandas as pd
data = pd.read_csv(r"C:\Users\skiba\Downloads\NationalNames.csv")
print(data.head(8))
```
%% Output
Id Name Year Gender Count
0 1 Mary 1880 F 7065
1 2 Anna 1880 F 2604
2 3 Emma 1880 F 2003
3 4 Elizabeth 1880 F 1939
4 5 Minnie 1880 F 1746
5 6 Margaret 1880 F 1578
6 7 Ida 1880 F 1472
7 8 Alice 1880 F 1414
%% Cell type:code id:80b2aade-5879-440b-98bf-0e7d616fe025 tags:
``` python
import pandas as pd
data = pd.read_csv(r"C:\Users\skiba\Downloads\NationalNames.csv")
print(data.tail(8))
```
%% Output
Id Name Year Gender Count
1825425 1825426 Zo 2014 M 5
1825426 1825427 Zyeir 2014 M 5
1825427 1825428 Zyel 2014 M 5
1825428 1825429 Zykeem 2014 M 5
1825429 1825430 Zymeer 2014 M 5
1825430 1825431 Zymiere 2014 M 5
1825431 1825432 Zyran 2014 M 5
1825432 1825433 Zyrin 2014 M 5
%% Cell type:code id:bced77b4-a7c2-4539-8e33-4bdc5c99ffa5 tags:
``` python
import pandas as pd
data = pd.read_csv(r"C:\Users\skiba\Downloads\NationalNames.csv")
print(data.describe())
```
%% Output
Id Year Count
count 1.825433e+06 1.825433e+06 1.825433e+06
mean 9.127170e+05 1.972620e+03 1.846879e+02
std 5.269573e+05 3.352891e+01 1.566711e+03
min 1.000000e+00 1.880000e+03 5.000000e+00
25% 4.563590e+05 1.949000e+03 7.000000e+00
50% 9.127170e+05 1.982000e+03 1.200000e+01
75% 1.369075e+06 2.001000e+03 3.200000e+01
max 1.825433e+06 2.014000e+03 9.968000e+04
%% Cell type:code id:930bb0db-c5d5-4073-b90b-4b2faf54b09b tags:
``` python
import pandas as pd
data = pd.read_csv(r"C:\Users\skiba\Downloads\NationalNames.csv")
unique_names_count = data['Name'].nunique()
print(unique_names_count)
```
%% Output
93889
%% Cell type:code id:229bb8f5-b2f2-43f9-ba48-2f6aeae5402e tags:
``` python
import pandas as pd
data = pd.read_csv(r"C:\Users\skiba\Downloads\NationalNames.csv")
unique_names_by_gender = data.groupby('Gender')['Name'].nunique()
print(unique_names_by_gender.to_string())
```
%% Output
Gender
F 64911
M 39199
%% Cell type:code id:8a21170a-3453-486c-bb23-72c038b0b35b tags:
``` python
import pandas as pd
data = pd.read_csv(r"C:\Users\skiba\Downloads\NationalNames.csv")
male_names_2010 = data[(data['Gender'] == 'M') & (data['Year'] == 2010)]
top_5_male_names_2010 = male_names_2010.sort_values(by='Count', ascending=False).head(5)
print(top_5_male_names_2010)
```
%% Output
Id Name Year Gender Count
1677392 1677393 Jacob 2010 M 22082
1677393 1677394 Ethan 2010 M 17985
1677394 1677395 Michael 2010 M 17308
1677395 1677396 Jayden 2010 M 17152
1677396 1677397 William 2010 M 17030
%% Cell type:code id:5d703d4a-5925-4825-ae73-d7c0f6612d81 tags:
``` python
import pandas as pd
data = pd.read_csv(r"C:\Users\skiba\Downloads\NationalNames.csv")
most_popular_name = data.loc[data['Count'].idxmax()]
print(f"The most popular name is '{most_popular_name['Name']}' in {most_popular_name['Year']}")
```
%% Output
The most popular name is 'Linda' in 1947
%% Cell type:code id:8f7cca93-5963-495c-8781-4954b8a34af5 tags:
``` python
import pandas as pd
data = pd.read_csv(r"C:\Users\skiba\Downloads\NationalNames.csv")
min_count = data['Count'].min()
min_count_records = data[data['Count'] == min_count].shape[0]
print(min_count_records)
```
%% Output
254615
%% Cell type:code id:aa57c3b3-df2b-4baa-8106-673180a18fff tags:
``` python
import pandas as pd
data = pd.read_csv(r"C:\Users\skiba\Downloads\NationalNames.csv")
unique_names_per_year = data.groupby('Year')['Name'].nunique()
print(unique_names_per_year.head(5).to_string())
```
%% Output
Year
1880 1889
1881 1830
1882 2012
1883 1962
1884 2158
%% Cell type:code id:5c4b1d82-dfdd-4e4a-adba-413837a45349 tags:
``` python
import pandas as pd
data = pd.read_csv(r"C:\Users\skiba\Downloads\NationalNames.csv")
unique_names_per_year = data.groupby('Year')['Name'].nunique()
year_with_most_unique_names = unique_names_per_year.idxmax()
max_unique_names = unique_names_per_year.max()
print(f"year {year_with_most_unique_names} with {max_unique_names} unique names.")
```
%% Output
year 2008 with 32488 unique names.
%% Cell type:code id:4ad64ee0-09bf-447f-a791-2236d164f7dc tags:
``` python
import pandas as pd
data = pd.read_csv(r"C:\Users\skiba\Downloads\NationalNames.csv")
names_2008 = data[data['Year'] == 2008]
most_popular_name_2008 = names_2008.loc[names_2008['Count'].idxmax()]['Name']
print(most_popular_name_2008)
```
%% Output
Jacob
%% Cell type:code id:dcb3f7a6-4a51-4e0b-b59a-66847b85829a tags:
``` python
import pandas as pd
data = pd.read_csv(r"C:\Users\skiba\Downloads\NationalNames.csv")
births_per_year_gender = data.groupby(['Year', 'Gender'])['Count'].sum().unstack()
years_more_female_births = (births_per_year_gender['F'] > births_per_year_gender['M']).sum()
print(years_more_female_births)
```
%% Output
54
%% Cell type:code id:df949e94-f0bd-4b0d-8d96-0e875eb48a67 tags:
``` python
import pandas as pd
import matplotlib.pyplot as plt
data = pd.read_csv(r"C:\Users\skiba\Downloads\NationalNames.csv")
births_per_year_gender = data.groupby(['Year', 'Gender'])['Count'].sum().unstack()
births_per_year_gender.plot()
plt.title('Total births per year by gender')
plt.xlabel('Year')
plt.ylabel('Total births per year')
plt.show()
```
%% Output
%% Cell type:code id:94ffcd43-3945-4371-ad44-a883a842f33d tags:
``` python
import pandas as pd
data = pd.read_csv(r"C:\Users\skiba\Downloads\NationalNames.csv")
gender_neutral_names = data.groupby('Name')['Gender'].nunique()
neutral_name_count = (gender_neutral_names == 2).sum()
print(neutral_name_count)
```
%% Output
10221
%% Cell type:code id:c23dc98e-acc4-4c29-b46d-d6a646fff899 tags:
``` python
import pandas as pd
data = pd.read_csv(r"C:\Users\skiba\Downloads\NationalNames.csv")
barbara_boys_count = data[(data['Name'] == 'Barbara') & (data['Gender'] == 'M')]['Count'].sum()
print(barbara_boys_count)
```
%% Output
4139
%% Cell type:code id:8a96540b-6d88-47af-9ec0-267df3c57426 tags:
``` python
import pandas as pd
data = pd.read_csv(r"C:\Users\skiba\Downloads\NationalNames.csv")
gender_neutral_names = data.groupby('Name')['Gender'].nunique()
neutral_names = gender_neutral_names[gender_neutral_names == 2].index
names_year_count = data[data['Name'].isin(neutral_names)].groupby('Name')['Year'].nunique()
most_popular_neutral_names = data[data['Name'].isin(neutral_names)].groupby('Name')['Count'].sum().sort_values(ascending=False)
print(most_popular_neutral_names.head(15).index)
```
%% Output
Index(['James', 'John', 'Robert', 'Michael', 'Mary', 'William', 'David',
'Joseph', 'Richard', 'Charles', 'Thomas', 'Christopher', 'Daniel',
'Elizabeth', 'Patricia'],
dtype='object', name='Name')
%% Cell type:code id:8341cc37-c11d-4366-a039-423cda5bc1f3 tags:
``` python
import pandas as pd
import matplotlib.pyplot as plt
data = pd.read_csv(r"C:\Users\skiba\Downloads\NationalNames.csv")
john_data = data[data['Name'] == 'John'].groupby('Year')['Count'].sum()
mary_data = data[data['Name'] == 'Mary'].groupby('Year')['Count'].sum()
plt.plot(john_data.index, john_data.values, label='John')
plt.plot(mary_data.index, mary_data.values, label='Mary')
plt.title("Distribution of the names 'John' and 'Mary' over the years")
plt.xlabel('Year')
plt.ylabel('Count')
plt.legend(title='Name')
plt.show()
```
%% Output
%% Cell type:code id:cb409075-b622-4a6d-a44b-e91050ef343a tags:
``` python
import pandas as pd
data = pd.read_csv(r"C:\Users\skiba\Downloads\NationalNames.csv")
most_popular_names_per_year = data.loc[data.groupby('Year')['Count'].idxmax()][['Year', 'Name', 'Count']]
most_popular_names_per_year = most_popular_names_per_year.reset_index(drop=True)
print(most_popular_names_per_year)
```
%% Output
Year Name Count
0 1880 John 9655
1 1881 John 8769
2 1882 John 9557
3 1883 John 8894
4 1884 John 9388
.. ... ... ...
130 2010 Isabella 22883
131 2011 Sophia 21816
132 2012 Sophia 22267
133 2013 Sophia 21147
134 2014 Emma 20799
[135 rows x 3 columns]
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment