Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
P
Python Lab
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Andrii Skyba
Python Lab
Commits
e69af998
Commit
e69af998
authored
8 months ago
by
Andrii Skyba
Browse files
Options
Downloads
Patches
Plain Diff
lab 4,5,6
parent
31f42551
Branches
main
No related tags found
No related merge requests found
Changes
3
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
Lab4.ipynb
+943
-0
943 additions, 0 deletions
Lab4.ipynb
Lab5.ipynb
+187
-0
187 additions, 0 deletions
Lab5.ipynb
Lab6.ipynb
+262
-0
262 additions, 0 deletions
Lab6.ipynb
with
1392 additions
and
0 deletions
Lab4.ipynb
0 → 100644
+
943
−
0
View file @
e69af998
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"id": "37adaf49-0222-4be7-8977-5957e21e6792",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting xlrd\n",
" Downloading xlrd-2.0.1-py2.py3-none-any.whl.metadata (3.4 kB)\n",
"Downloading xlrd-2.0.1-py2.py3-none-any.whl (96 kB)\n",
"Installing collected packages: xlrd\n",
"Successfully installed xlrd-2.0.1\n"
]
}
],
"source": [
"!pip install xlrd"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "687a9237-70e6-4c71-9f07-b2312dd79040",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting openpyxl\n",
" Downloading openpyxl-3.1.5-py2.py3-none-any.whl.metadata (2.5 kB)\n",
"Collecting et-xmlfile (from openpyxl)\n",
" Downloading et_xmlfile-2.0.0-py3-none-any.whl.metadata (2.7 kB)\n",
"Downloading openpyxl-3.1.5-py2.py3-none-any.whl (250 kB)\n",
"Downloading et_xmlfile-2.0.0-py3-none-any.whl (18 kB)\n",
"Installing collected packages: et-xmlfile, openpyxl\n",
"Successfully installed et-xmlfile-2.0.0 openpyxl-3.1.5\n"
]
}
],
"source": [
"!pip install openpyxl\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "af57f689-9056-483e-b262-81a4561030c9",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Country</th>\n",
" <th>Energy Supply</th>\n",
" <th>Energy Supply per Capita</th>\n",
" <th>% Renewable</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>American Samoa</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>0.641026</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>Bolivia</td>\n",
" <td>3.360000e+08</td>\n",
" <td>32</td>\n",
" <td>31.477120</td>\n",
" </tr>\n",
" <tr>\n",
" <th>164</th>\n",
" <td>South Korea</td>\n",
" <td>1.100700e+10</td>\n",
" <td>221</td>\n",
" <td>2.279353</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Country Energy Supply Energy Supply per Capita % Renewable\n",
"3 American Samoa NaN ... 0.641026\n",
"24 Bolivia 3.360000e+08 32 31.477120\n",
"164 South Korea 1.100700e+10 221 2.279353"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Імпорт необхідних бібліотек\n",
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"# Завантаження файлу Excel\n",
"file_path = 'C:/Users/skiba/python_khpi/En_In.xls'\n",
"energy_data = pd.read_excel(file_path, skiprows=17, usecols=\"C:F\")\n",
"\n",
"# Перейменування стовпців\n",
"energy_data.columns = ['Country', 'Energy Supply', 'Energy Supply per Capita', '% Renewable']\n",
"\n",
"# Перетворення 'Energy Supply' у ГДж, заміна '...' на NaN\n",
"energy_data['Energy Supply'] = pd.to_numeric(energy_data['Energy Supply'], errors='coerce') * 1_000_000\n",
"\n",
"# Очищення назв країн від цифр та тексту в дужках\n",
"energy_data['Country'] = energy_data['Country'].str.replace(r'\\d+|\\(.*\\)', '', regex=True).str.strip()\n",
"\n",
"# Перейменування вказаних країн\n",
"country_renames = {\n",
" \"Republic of Korea\": \"South Korea\", \n",
" \"United States of America\": \"United States\", \n",
" \"United Kingdom of Great Britain and Northern Ireland\": \"United Kingdom\", \n",
" \"China, Hong Kong Special Administrative Region\": \"Hong Kong\"\n",
"}\n",
"energy_data['Country'] = energy_data['Country'].replace(country_renames)\n",
"\n",
"# Фільтрація для перевірки результату (American Samoa, South Korea, Bolivia)\n",
"filtered_countries = energy_data.loc[energy_data['Country'].isin(['American Samoa', 'South Korea', 'Bolivia'])]\n",
"filtered_countries\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "c4b4031d-5205-44e0-b0fa-e3585b18ec53",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Country Name</th>\n",
" <th>Country Code</th>\n",
" <th>Indicator Name</th>\n",
" <th>Indicator Code</th>\n",
" <th>1960</th>\n",
" <th>1961</th>\n",
" <th>1962</th>\n",
" <th>1963</th>\n",
" <th>1964</th>\n",
" <th>1965</th>\n",
" <th>...</th>\n",
" <th>2006</th>\n",
" <th>2007</th>\n",
" <th>2008</th>\n",
" <th>2009</th>\n",
" <th>2010</th>\n",
" <th>2011</th>\n",
" <th>2012</th>\n",
" <th>2013</th>\n",
" <th>2014</th>\n",
" <th>2015</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Aruba</td>\n",
" <td>ABW</td>\n",
" <td>GDP at market prices (constant 2010 US$)</td>\n",
" <td>NY.GDP.MKTP.KD</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2.467704e+09</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1 rows × 60 columns</p>\n",
"</div>"
],
"text/plain": [
" Country Name Country Code Indicator Name \\\n",
"0 Aruba ABW GDP at market prices (constant 2010 US$) \n",
"\n",
" Indicator Code 1960 1961 1962 1963 1964 1965 ... 2006 2007 2008 \\\n",
"0 NY.GDP.MKTP.KD NaN NaN NaN NaN NaN NaN ... NaN NaN NaN \n",
"\n",
" 2009 2010 2011 2012 2013 2014 2015 \n",
"0 NaN 2.467704e+09 NaN NaN NaN NaN NaN \n",
"\n",
"[1 rows x 60 columns]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"import pandas as pd\n",
"\n",
"\n",
"gdp_data = pd.read_csv('C:/Users/skiba/python_khpi/gpd.csv', skiprows=4)\n",
"\n",
"\n",
"country_renames = {\n",
" \"Korea, Rep.\": \"South Korea\", \n",
" \"Iran, Islamic Rep.\": \"Iran\", \n",
" \"Hong Kong SAR, China\": \"Hong Kong\"\n",
"}\n",
"gdp_data['Country Name'] = gdp_data['Country Name'].replace(country_renames)\n",
"\n",
"\n",
"gdp_data.head(1)\n"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "22abd678-8561-410c-ab1d-58ee49394567",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(15, 20)\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Rank</th>\n",
" <th>Documents</th>\n",
" <th>Citable documents</th>\n",
" <th>Citations</th>\n",
" <th>Self-citations</th>\n",
" <th>Citations per document</th>\n",
" <th>H index</th>\n",
" <th>Energy Supply</th>\n",
" <th>Energy Supply per Capita</th>\n",
" <th>% Renewable</th>\n",
" <th>2006</th>\n",
" <th>2007</th>\n",
" <th>2008</th>\n",
" <th>2009</th>\n",
" <th>2010</th>\n",
" <th>2011</th>\n",
" <th>2012</th>\n",
" <th>2013</th>\n",
" <th>2014</th>\n",
" <th>2015</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Country</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>China</th>\n",
" <td>1</td>\n",
" <td>127050</td>\n",
" <td>126767</td>\n",
" <td>597237</td>\n",
" <td>411683</td>\n",
" <td>4.70</td>\n",
" <td>138</td>\n",
" <td>1.271910e+11</td>\n",
" <td>93</td>\n",
" <td>19.75491</td>\n",
" <td>3.992331e+12</td>\n",
" <td>4.559041e+12</td>\n",
" <td>4.997775e+12</td>\n",
" <td>5.459247e+12</td>\n",
" <td>6.039659e+12</td>\n",
" <td>6.612490e+12</td>\n",
" <td>7.124978e+12</td>\n",
" <td>7.672448e+12</td>\n",
" <td>8.230121e+12</td>\n",
" <td>8.797999e+12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>United States</th>\n",
" <td>2</td>\n",
" <td>96661</td>\n",
" <td>94747</td>\n",
" <td>792274</td>\n",
" <td>265436</td>\n",
" <td>8.20</td>\n",
" <td>230</td>\n",
" <td>9.083800e+10</td>\n",
" <td>286</td>\n",
" <td>11.57098</td>\n",
" <td>1.479230e+13</td>\n",
" <td>1.505540e+13</td>\n",
" <td>1.501149e+13</td>\n",
" <td>1.459484e+13</td>\n",
" <td>1.496437e+13</td>\n",
" <td>1.520402e+13</td>\n",
" <td>1.554216e+13</td>\n",
" <td>1.577367e+13</td>\n",
" <td>1.615662e+13</td>\n",
" <td>1.654857e+13</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Japan</th>\n",
" <td>3</td>\n",
" <td>30504</td>\n",
" <td>30287</td>\n",
" <td>223024</td>\n",
" <td>61554</td>\n",
" <td>7.31</td>\n",
" <td>134</td>\n",
" <td>1.898400e+10</td>\n",
" <td>149</td>\n",
" <td>10.23282</td>\n",
" <td>5.496542e+12</td>\n",
" <td>5.617036e+12</td>\n",
" <td>5.558527e+12</td>\n",
" <td>5.251308e+12</td>\n",
" <td>5.498718e+12</td>\n",
" <td>5.473738e+12</td>\n",
" <td>5.569102e+12</td>\n",
" <td>5.644659e+12</td>\n",
" <td>5.642884e+12</td>\n",
" <td>5.669563e+12</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Rank Documents Citable documents Citations Self-citations \\\n",
"Country \n",
"China 1 127050 126767 597237 411683 \n",
"United States 2 96661 94747 792274 265436 \n",
"Japan 3 30504 30287 223024 61554 \n",
"\n",
" Citations per document H index Energy Supply \\\n",
"Country \n",
"China 4.70 138 1.271910e+11 \n",
"United States 8.20 230 9.083800e+10 \n",
"Japan 7.31 134 1.898400e+10 \n",
"\n",
" Energy Supply per Capita % Renewable 2006 \\\n",
"Country \n",
"China 93 19.75491 3.992331e+12 \n",
"United States 286 11.57098 1.479230e+13 \n",
"Japan 149 10.23282 5.496542e+12 \n",
"\n",
" 2007 2008 2009 2010 \\\n",
"Country \n",
"China 4.559041e+12 4.997775e+12 5.459247e+12 6.039659e+12 \n",
"United States 1.505540e+13 1.501149e+13 1.459484e+13 1.496437e+13 \n",
"Japan 5.617036e+12 5.558527e+12 5.251308e+12 5.498718e+12 \n",
"\n",
" 2011 2012 2013 2014 \\\n",
"Country \n",
"China 6.612490e+12 7.124978e+12 7.672448e+12 8.230121e+12 \n",
"United States 1.520402e+13 1.554216e+13 1.577367e+13 1.615662e+13 \n",
"Japan 5.473738e+12 5.569102e+12 5.644659e+12 5.642884e+12 \n",
"\n",
" 2015 \n",
"Country \n",
"China 8.797999e+12 \n",
"United States 1.654857e+13 \n",
"Japan 5.669563e+12 "
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"import pandas as pd\n",
"\n",
"\n",
"scimago_data = pd.read_excel('C:/Users/skiba/python_khpi/scimagojr.xlsx')\n",
"\n",
"scimago_top15 = scimago_data[scimago_data['Rank'] <= 15]\n",
"\n",
"energy_data = pd.read_excel('C:/Users/skiba/python_khpi/En_In.xls', skiprows=17, usecols=\"C:F\")\n",
"energy_data.columns = ['Country', 'Energy Supply', 'Energy Supply per Capita', '% Renewable']\n",
"energy_data['Energy Supply'] = pd.to_numeric(energy_data['Energy Supply'], errors='coerce') * 1_000_000\n",
"energy_data['Country'] = energy_data['Country'].str.replace(r'\\d+|\\(.*\\)', '', regex=True).str.strip()\n",
"country_renames = {\n",
" \"Republic of Korea\": \"South Korea\", \n",
" \"United States of America\": \"United States\", \n",
" \"United Kingdom of Great Britain and Northern Ireland\": \"United Kingdom\", \n",
" \"China, Hong Kong Special Administrative Region\": \"Hong Kong\"\n",
"}\n",
"energy_data['Country'] = energy_data['Country'].replace(country_renames)\n",
"\n",
"gdp_data = pd.read_csv('C:/Users/skiba/python_khpi/gpd.csv', skiprows=4)\n",
"gdp_data.rename(columns={'Country Name': 'Country'}, inplace=True)\n",
"gdp_filtered = gdp_data[['Country', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015']]\n",
"\n",
"gdp_country_renames = {\n",
" \"Korea, Rep.\": \"South Korea\", \n",
" \"Iran, Islamic Rep.\": \"Iran\", \n",
" \"Hong Kong SAR, China\": \"Hong Kong\"\n",
"}\n",
"\n",
"# Заміна назв країн у GDP із використанням .loc\n",
"gdp_filtered.loc[:, 'Country'] = gdp_filtered['Country'].replace(gdp_country_renames)\n",
"\n",
"\n",
"\n",
"# Об'єднання даних\n",
"merged_data = scimago_top15.merge(energy_data, how='inner', on='Country')\n",
"merged_data = merged_data.merge(gdp_filtered, how='inner', on='Country')\n",
"\n",
"\n",
"final_columns = ['Rank', 'Documents', 'Citable documents', 'Citations', 'Self-citations', \n",
" 'Citations per document', 'H index', 'Energy Supply', 'Energy Supply per Capita', \n",
" '% Renewable', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015']\n",
"final_data = merged_data.set_index('Country')[final_columns]\n",
"\n",
"print(final_data.shape) \n",
"final_data.head(3) \n"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "e1294e32-5a5b-4632-aaed-f247d7c944cb",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Country\n",
"United States 1.536434e+13\n",
"China 6.348609e+12\n",
"Japan 5.542208e+12\n",
"Germany 3.493025e+12\n",
"France 2.681725e+12\n",
"United Kingdom 2.487907e+12\n",
"Brazil 2.189794e+12\n",
"Italy 2.120175e+12\n",
"India 1.769297e+12\n",
"Canada 1.660647e+12\n",
"Russian Federation 1.565459e+12\n",
"Spain 1.418078e+12\n",
"Australia 1.164043e+12\n",
"South Korea 1.106715e+12\n",
"Iran 4.441558e+11\n",
"Name: avgGDP, dtype: float64"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def task_eight(data):\n",
" # Обчислення середнього значення ВВП за період з 2006 по 2015 рік\n",
" avg_gdp = data.loc[:, '2006':'2015'].mean(axis=1)\n",
" # Сортування за спаданням\n",
" avg_gdp_sorted = avg_gdp.sort_values(ascending=False)\n",
" \n",
" avg_gdp_sorted.name = 'avgGDP'\n",
" return avg_gdp_sorted\n",
"\n",
"\n",
"task_eight_result = task_eight(final_data)\n",
"task_eight_result\n"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "c303dc26-4820-49af-bf8e-b504883c9c39",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"('France', 153345695364.24023)"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Функція для обчислення зміни ВВП для країни з 5-м найвищим середнім ВВП\n",
"def task_nine(data):\n",
" # Обчислення середнього ВВП кожної країни за останні 10 років та сортування за спаданням\n",
" avg_gdp = data.loc[:, '2006':'2015'].mean(axis=1).sort_values(ascending=False)\n",
" # Отримання країни з 5-м найвищим середнім ВВП\n",
" fifth_country = avg_gdp.index[4]\n",
" # Обчислення зміни ВВП за останні 10 років для цієї країни\n",
" gdp_change = data.loc[fifth_country, '2015'] - data.loc[fifth_country, '2006']\n",
" return (fifth_country, gdp_change)\n",
"\n",
"\n",
"task_nine_result = task_nine(final_data)\n",
"task_nine_result\n"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "0f5c8f0e-f124-43bb-8181-9a77c0e959ff",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"('Brazil', 69.64803)"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Функція для визначення країни з максимальним відсотком поновлюваних джерел енергії\n",
"def task_ten(data):\n",
" # Знаходимо країну з найбільшим % Renewable\n",
" max_renewable_country = data['% Renewable'].idxmax()\n",
" # Отримуємо значення максимального % Renewable\n",
" max_renewable_value = data.loc[max_renewable_country, '% Renewable']\n",
" return (max_renewable_country, max_renewable_value)\n",
"\n",
"\n",
"task_ten_result = task_ten(final_data)\n",
"task_ten_result\n"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "3d83b785-0fad-4101-99af-88bda4360a73",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"('Japan', 127409395.97315437)"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Функція для оцінки чисельності населення та визначення шостої країни за населенням\n",
"def task_eleven(data):\n",
" # Оцінка чисельності населення\n",
" data['Estimated Population'] = data['Energy Supply'] / data['Energy Supply per Capita']\n",
" # Сортування за населенням за спаданням\n",
" sorted_population = data['Estimated Population'].sort_values(ascending=False)\n",
" # Отримання 6-ї країни за населенням\n",
" sixth_country = sorted_population.index[5]\n",
" sixth_population = sorted_population.iloc[5]\n",
" return (sixth_country, sixth_population)\n",
"\n",
"\n",
"task_eleven_result = task_eleven(final_data)\n",
"task_eleven_result\n"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "4b01c65d-49cd-4a01-a7d1-e7c024543761",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.7940010435442946"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Функція для обчислення кореляції між цитованими документами на душу населення та енергопостачанням на душу населення\n",
"def task_twelve(data):\n",
" # Оцінка чисельності населення\n",
" data['Estimated Population'] = data['Energy Supply'] / data['Energy Supply per Capita']\n",
" # Обчислення цитованих документів на душу населення\n",
" data['Cited Documents per Capita'] = data['Citable documents'] / data['Estimated Population']\n",
" # Переконуємося, що дані числові, і видаляємо пропущені значення\n",
" data_cleaned = data[['Cited Documents per Capita', 'Energy Supply per Capita']].dropna()\n",
" data_cleaned = data_cleaned.apply(pd.to_numeric, errors='coerce').dropna() # Забезпечуємо числовий тип даних\n",
" # Обчислення кореляції\n",
" correlation = data_cleaned['Cited Documents per Capita'].corr(data_cleaned['Energy Supply per Capita'])\n",
" return correlation\n",
"\n",
"task_twelve_result = task_twelve(final_data)\n",
"task_twelve_result\n"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "cfb7eaa8-6bc1-4df5-99ad-bf373fd8124d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Country\n",
"Australia 0\n",
"Brazil 1\n",
"Canada 1\n",
"China 1\n",
"France 1\n",
"Germany 1\n",
"India 0\n",
"Iran 0\n",
"Italy 1\n",
"Japan 0\n",
"Russian Federation 1\n",
"South Korea 0\n",
"Spain 1\n",
"United Kingdom 0\n",
"United States 0\n",
"Name: High Renewable, dtype: int32"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Функція для створення нового стовпця на основі медіани % Renewable\n",
"def task_thirteen(data):\n",
" # Обчислення медіани для % Renewable\n",
" renewable_median = data['% Renewable'].median()\n",
" # Створення нового стовпця: 1, якщо % Renewable >= медіани, інакше 0\n",
" data['High Renewable'] = (data['% Renewable'] >= renewable_median).astype(int)\n",
" # Повертаємо Series, відсортований за Rank у порядку зростання, індексований за назвою країни\n",
" sorted_series = data['High Renewable'].sort_index(ascending=True)\n",
" return sorted_series\n",
"\n",
"task_thirteen_result = task_thirteen(final_data)\n",
"task_thirteen_result\n"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "d2402833-9146-4a85-9efa-a63f81268620",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>size</th>\n",
" <th>sum</th>\n",
" <th>mean</th>\n",
" <th>std</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Continent</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Asia</th>\n",
" <td>5</td>\n",
" <td>2898666386.6106</td>\n",
" <td>579733277.32212</td>\n",
" <td>6.790979e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Australia</th>\n",
" <td>1</td>\n",
" <td>23316017.316017</td>\n",
" <td>23316017.316017</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Europe</th>\n",
" <td>6</td>\n",
" <td>457929667.216372</td>\n",
" <td>76321611.202729</td>\n",
" <td>3.464767e+07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>North America</th>\n",
" <td>2</td>\n",
" <td>352855249.48025</td>\n",
" <td>176427624.740125</td>\n",
" <td>1.996696e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>South America</th>\n",
" <td>1</td>\n",
" <td>205915254.237288</td>\n",
" <td>205915254.237288</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" size sum mean std\n",
"Continent \n",
"Asia 5 2898666386.6106 579733277.32212 6.790979e+08\n",
"Australia 1 23316017.316017 23316017.316017 NaN\n",
"Europe 6 457929667.216372 76321611.202729 3.464767e+07\n",
"North America 2 352855249.48025 176427624.740125 1.996696e+08\n",
"South America 1 205915254.237288 205915254.237288 NaN"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def task_forteen(data):\n",
" # Словник континентів\n",
" ContinentDict = {\n",
" 'China':'Asia', 'United States':'North America', 'Japan':'Asia',\n",
" 'United Kingdom':'Europe', 'Russian Federation':'Europe', 'Canada':'North America',\n",
" 'Germany':'Europe', 'India':'Asia', 'France':'Europe', 'South Korea':'Asia',\n",
" 'Italy':'Europe', 'Spain':'Europe', 'Iran':'Asia', 'Australia':'Australia',\n",
" 'Brazil':'South America'\n",
" }\n",
" \n",
" data['Estimated Population'] = data['Energy Supply'] / data['Energy Supply per Capita']\n",
" data['Continent'] = data.index.to_series().map(ContinentDict)\n",
" \n",
" continent_stats = data.groupby('Continent')['Estimated Population'].agg(['size', 'sum', 'mean', 'std'])\n",
" return continent_stats\n",
"\n",
"task_forteen_result = task_forteen(final_data)\n",
"task_forteen_result\n"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "791927f6-9b3d-47b8-b852-405420083db6",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 1200x800 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"\n",
"\n",
"def task_fifteen(data):\n",
" continent_colors = {\n",
" 'Asia': 'red', 'Australia': 'yellow', 'Europe': 'green', \n",
" 'North America': 'blue', 'South America': 'orange'\n",
" }\n",
" \n",
" data['Color'] = data['Continent'].map(continent_colors)\n",
" \n",
" # Побудова бульбашкової діаграми\n",
" plt.figure(figsize=(12, 8))\n",
" plt.scatter(\n",
" data['Rank'], data['% Renewable'], \n",
" s=data['2015'] / 1e10, # Масштабування розміру бульбашок\n",
" c=data['Color'], alpha=0.6, edgecolors=\"w\", linewidth=0.5\n",
" )\n",
" \n",
" for i, country in enumerate(data.index):\n",
" plt.text(data['Rank'][i], data['% Renewable'][i], country, ha='center', va='center', fontsize=8)\n",
"\n",
" # Підпис осей та заголовок\n",
" plt.xlabel('Rank')\n",
" plt.ylabel('% Renewable')\n",
" plt.title('Bubble chart')\n",
" plt.show()\n",
"\n",
"\n",
"task_fifteen(final_data)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bcca3423-4203-453a-afe1-bd0df4be4ff6",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.19"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
%% Cell type:code id:37adaf49-0222-4be7-8977-5957e21e6792 tags:
```
python
!
pip
install
xlrd
```
%% Output
Collecting xlrd
Downloading xlrd-2.0.1-py2.py3-none-any.whl.metadata (3.4 kB)
Downloading xlrd-2.0.1-py2.py3-none-any.whl (96 kB)
Installing collected packages: xlrd
Successfully installed xlrd-2.0.1
%% Cell type:code id:687a9237-70e6-4c71-9f07-b2312dd79040 tags:
```
python
!
pip
install
openpyxl
```
%% Output
Collecting openpyxl
Downloading openpyxl-3.1.5-py2.py3-none-any.whl.metadata (2.5 kB)
Collecting et-xmlfile (from openpyxl)
Downloading et_xmlfile-2.0.0-py3-none-any.whl.metadata (2.7 kB)
Downloading openpyxl-3.1.5-py2.py3-none-any.whl (250 kB)
Downloading et_xmlfile-2.0.0-py3-none-any.whl (18 kB)
Installing collected packages: et-xmlfile, openpyxl
Successfully installed et-xmlfile-2.0.0 openpyxl-3.1.5
%% Cell type:code id:af57f689-9056-483e-b262-81a4561030c9 tags:
```
python
# Імпорт необхідних бібліотек
import
pandas
as
pd
import
numpy
as
np
# Завантаження файлу Excel
file_path
=
'
C:/Users/skiba/python_khpi/En_In.xls
'
energy_data
=
pd
.
read_excel
(
file_path
,
skiprows
=
17
,
usecols
=
"
C:F
"
)
# Перейменування стовпців
energy_data
.
columns
=
[
'
Country
'
,
'
Energy Supply
'
,
'
Energy Supply per Capita
'
,
'
% Renewable
'
]
# Перетворення 'Energy Supply' у ГДж, заміна '...' на NaN
energy_data
[
'
Energy Supply
'
]
=
pd
.
to_numeric
(
energy_data
[
'
Energy Supply
'
],
errors
=
'
coerce
'
)
*
1_000_000
# Очищення назв країн від цифр та тексту в дужках
energy_data
[
'
Country
'
]
=
energy_data
[
'
Country
'
].
str
.
replace
(
r
'
\d+|\(.*\)
'
,
''
,
regex
=
True
).
str
.
strip
()
# Перейменування вказаних країн
country_renames
=
{
"
Republic of Korea
"
:
"
South Korea
"
,
"
United States of America
"
:
"
United States
"
,
"
United Kingdom of Great Britain and Northern Ireland
"
:
"
United Kingdom
"
,
"
China, Hong Kong Special Administrative Region
"
:
"
Hong Kong
"
}
energy_data
[
'
Country
'
]
=
energy_data
[
'
Country
'
].
replace
(
country_renames
)
# Фільтрація для перевірки результату (American Samoa, South Korea, Bolivia)
filtered_countries
=
energy_data
.
loc
[
energy_data
[
'
Country
'
].
isin
([
'
American Samoa
'
,
'
South Korea
'
,
'
Bolivia
'
])]
filtered_countries
```
%% Output
Country Energy Supply Energy Supply per Capita % Renewable
3 American Samoa NaN ... 0.641026
24 Bolivia 3.360000e+08 32 31.477120
164 South Korea 1.100700e+10 221 2.279353
%% Cell type:code id:c4b4031d-5205-44e0-b0fa-e3585b18ec53 tags:
```
python
import
pandas
as
pd
gdp_data
=
pd
.
read_csv
(
'
C:/Users/skiba/python_khpi/gpd.csv
'
,
skiprows
=
4
)
country_renames
=
{
"
Korea, Rep.
"
:
"
South Korea
"
,
"
Iran, Islamic Rep.
"
:
"
Iran
"
,
"
Hong Kong SAR, China
"
:
"
Hong Kong
"
}
gdp_data
[
'
Country Name
'
]
=
gdp_data
[
'
Country Name
'
].
replace
(
country_renames
)
gdp_data
.
head
(
1
)
```
%% Output
Country Name Country Code Indicator Name \
0 Aruba ABW GDP at market prices (constant 2010 US$)
Indicator Code 1960 1961 1962 1963 1964 1965 ... 2006 2007 2008 \
0 NY.GDP.MKTP.KD NaN NaN NaN NaN NaN NaN ... NaN NaN NaN
2009 2010 2011 2012 2013 2014 2015
0 NaN 2.467704e+09 NaN NaN NaN NaN NaN
[1 rows x 60 columns]
%% Cell type:code id:22abd678-8561-410c-ab1d-58ee49394567 tags:
```
python
import
pandas
as
pd
scimago_data
=
pd
.
read_excel
(
'
C:/Users/skiba/python_khpi/scimagojr.xlsx
'
)
scimago_top15
=
scimago_data
[
scimago_data
[
'
Rank
'
]
<=
15
]
energy_data
=
pd
.
read_excel
(
'
C:/Users/skiba/python_khpi/En_In.xls
'
,
skiprows
=
17
,
usecols
=
"
C:F
"
)
energy_data
.
columns
=
[
'
Country
'
,
'
Energy Supply
'
,
'
Energy Supply per Capita
'
,
'
% Renewable
'
]
energy_data
[
'
Energy Supply
'
]
=
pd
.
to_numeric
(
energy_data
[
'
Energy Supply
'
],
errors
=
'
coerce
'
)
*
1_000_000
energy_data
[
'
Country
'
]
=
energy_data
[
'
Country
'
].
str
.
replace
(
r
'
\d+|\(.*\)
'
,
''
,
regex
=
True
).
str
.
strip
()
country_renames
=
{
"
Republic of Korea
"
:
"
South Korea
"
,
"
United States of America
"
:
"
United States
"
,
"
United Kingdom of Great Britain and Northern Ireland
"
:
"
United Kingdom
"
,
"
China, Hong Kong Special Administrative Region
"
:
"
Hong Kong
"
}
energy_data
[
'
Country
'
]
=
energy_data
[
'
Country
'
].
replace
(
country_renames
)
gdp_data
=
pd
.
read_csv
(
'
C:/Users/skiba/python_khpi/gpd.csv
'
,
skiprows
=
4
)
gdp_data
.
rename
(
columns
=
{
'
Country Name
'
:
'
Country
'
},
inplace
=
True
)
gdp_filtered
=
gdp_data
[[
'
Country
'
,
'
2006
'
,
'
2007
'
,
'
2008
'
,
'
2009
'
,
'
2010
'
,
'
2011
'
,
'
2012
'
,
'
2013
'
,
'
2014
'
,
'
2015
'
]]
gdp_country_renames
=
{
"
Korea, Rep.
"
:
"
South Korea
"
,
"
Iran, Islamic Rep.
"
:
"
Iran
"
,
"
Hong Kong SAR, China
"
:
"
Hong Kong
"
}
# Заміна назв країн у GDP із використанням .loc
gdp_filtered
.
loc
[:,
'
Country
'
]
=
gdp_filtered
[
'
Country
'
].
replace
(
gdp_country_renames
)
# Об'єднання даних
merged_data
=
scimago_top15
.
merge
(
energy_data
,
how
=
'
inner
'
,
on
=
'
Country
'
)
merged_data
=
merged_data
.
merge
(
gdp_filtered
,
how
=
'
inner
'
,
on
=
'
Country
'
)
final_columns
=
[
'
Rank
'
,
'
Documents
'
,
'
Citable documents
'
,
'
Citations
'
,
'
Self-citations
'
,
'
Citations per document
'
,
'
H index
'
,
'
Energy Supply
'
,
'
Energy Supply per Capita
'
,
'
% Renewable
'
,
'
2006
'
,
'
2007
'
,
'
2008
'
,
'
2009
'
,
'
2010
'
,
'
2011
'
,
'
2012
'
,
'
2013
'
,
'
2014
'
,
'
2015
'
]
final_data
=
merged_data
.
set_index
(
'
Country
'
)[
final_columns
]
print
(
final_data
.
shape
)
final_data
.
head
(
3
)
```
%% Output
(15, 20)
Rank Documents Citable documents Citations Self-citations \
Country
China 1 127050 126767 597237 411683
United States 2 96661 94747 792274 265436
Japan 3 30504 30287 223024 61554
Citations per document H index Energy Supply \
Country
China 4.70 138 1.271910e+11
United States 8.20 230 9.083800e+10
Japan 7.31 134 1.898400e+10
Energy Supply per Capita % Renewable 2006 \
Country
China 93 19.75491 3.992331e+12
United States 286 11.57098 1.479230e+13
Japan 149 10.23282 5.496542e+12
2007 2008 2009 2010 \
Country
China 4.559041e+12 4.997775e+12 5.459247e+12 6.039659e+12
United States 1.505540e+13 1.501149e+13 1.459484e+13 1.496437e+13
Japan 5.617036e+12 5.558527e+12 5.251308e+12 5.498718e+12
2011 2012 2013 2014 \
Country
China 6.612490e+12 7.124978e+12 7.672448e+12 8.230121e+12
United States 1.520402e+13 1.554216e+13 1.577367e+13 1.615662e+13
Japan 5.473738e+12 5.569102e+12 5.644659e+12 5.642884e+12
2015
Country
China 8.797999e+12
United States 1.654857e+13
Japan 5.669563e+12
%% Cell type:code id:e1294e32-5a5b-4632-aaed-f247d7c944cb tags:
```
python
def
task_eight
(
data
):
# Обчислення середнього значення ВВП за період з 2006 по 2015 рік
avg_gdp
=
data
.
loc
[:,
'
2006
'
:
'
2015
'
].
mean
(
axis
=
1
)
# Сортування за спаданням
avg_gdp_sorted
=
avg_gdp
.
sort_values
(
ascending
=
False
)
avg_gdp_sorted
.
name
=
'
avgGDP
'
return
avg_gdp_sorted
task_eight_result
=
task_eight
(
final_data
)
task_eight_result
```
%% Output
Country
United States 1.536434e+13
China 6.348609e+12
Japan 5.542208e+12
Germany 3.493025e+12
France 2.681725e+12
United Kingdom 2.487907e+12
Brazil 2.189794e+12
Italy 2.120175e+12
India 1.769297e+12
Canada 1.660647e+12
Russian Federation 1.565459e+12
Spain 1.418078e+12
Australia 1.164043e+12
South Korea 1.106715e+12
Iran 4.441558e+11
Name: avgGDP, dtype: float64
%% Cell type:code id:c303dc26-4820-49af-bf8e-b504883c9c39 tags:
```
python
# Функція для обчислення зміни ВВП для країни з 5-м найвищим середнім ВВП
def
task_nine
(
data
):
# Обчислення середнього ВВП кожної країни за останні 10 років та сортування за спаданням
avg_gdp
=
data
.
loc
[:,
'
2006
'
:
'
2015
'
].
mean
(
axis
=
1
).
sort_values
(
ascending
=
False
)
# Отримання країни з 5-м найвищим середнім ВВП
fifth_country
=
avg_gdp
.
index
[
4
]
# Обчислення зміни ВВП за останні 10 років для цієї країни
gdp_change
=
data
.
loc
[
fifth_country
,
'
2015
'
]
-
data
.
loc
[
fifth_country
,
'
2006
'
]
return
(
fifth_country
,
gdp_change
)
task_nine_result
=
task_nine
(
final_data
)
task_nine_result
```
%% Output
('France', 153345695364.24023)
%% Cell type:code id:0f5c8f0e-f124-43bb-8181-9a77c0e959ff tags:
```
python
# Функція для визначення країни з максимальним відсотком поновлюваних джерел енергії
def
task_ten
(
data
):
# Знаходимо країну з найбільшим % Renewable
max_renewable_country
=
data
[
'
% Renewable
'
].
idxmax
()
# Отримуємо значення максимального % Renewable
max_renewable_value
=
data
.
loc
[
max_renewable_country
,
'
% Renewable
'
]
return
(
max_renewable_country
,
max_renewable_value
)
task_ten_result
=
task_ten
(
final_data
)
task_ten_result
```
%% Output
('Brazil', 69.64803)
%% Cell type:code id:3d83b785-0fad-4101-99af-88bda4360a73 tags:
```
python
# Функція для оцінки чисельності населення та визначення шостої країни за населенням
def
task_eleven
(
data
):
# Оцінка чисельності населення
data
[
'
Estimated Population
'
]
=
data
[
'
Energy Supply
'
]
/
data
[
'
Energy Supply per Capita
'
]
# Сортування за населенням за спаданням
sorted_population
=
data
[
'
Estimated Population
'
].
sort_values
(
ascending
=
False
)
# Отримання 6-ї країни за населенням
sixth_country
=
sorted_population
.
index
[
5
]
sixth_population
=
sorted_population
.
iloc
[
5
]
return
(
sixth_country
,
sixth_population
)
task_eleven_result
=
task_eleven
(
final_data
)
task_eleven_result
```
%% Output
('Japan', 127409395.97315437)
%% Cell type:code id:4b01c65d-49cd-4a01-a7d1-e7c024543761 tags:
```
python
# Функція для обчислення кореляції між цитованими документами на душу населення та енергопостачанням на душу населення
def
task_twelve
(
data
):
# Оцінка чисельності населення
data
[
'
Estimated Population
'
]
=
data
[
'
Energy Supply
'
]
/
data
[
'
Energy Supply per Capita
'
]
# Обчислення цитованих документів на душу населення
data
[
'
Cited Documents per Capita
'
]
=
data
[
'
Citable documents
'
]
/
data
[
'
Estimated Population
'
]
# Переконуємося, що дані числові, і видаляємо пропущені значення
data_cleaned
=
data
[[
'
Cited Documents per Capita
'
,
'
Energy Supply per Capita
'
]].
dropna
()
data_cleaned
=
data_cleaned
.
apply
(
pd
.
to_numeric
,
errors
=
'
coerce
'
).
dropna
()
# Забезпечуємо числовий тип даних
# Обчислення кореляції
correlation
=
data_cleaned
[
'
Cited Documents per Capita
'
].
corr
(
data_cleaned
[
'
Energy Supply per Capita
'
])
return
correlation
task_twelve_result
=
task_twelve
(
final_data
)
task_twelve_result
```
%% Output
0.7940010435442946
%% Cell type:code id:cfb7eaa8-6bc1-4df5-99ad-bf373fd8124d tags:
```
python
# Функція для створення нового стовпця на основі медіани % Renewable
def
task_thirteen
(
data
):
# Обчислення медіани для % Renewable
renewable_median
=
data
[
'
% Renewable
'
].
median
()
# Створення нового стовпця: 1, якщо % Renewable >= медіани, інакше 0
data
[
'
High Renewable
'
]
=
(
data
[
'
% Renewable
'
]
>=
renewable_median
).
astype
(
int
)
# Повертаємо Series, відсортований за Rank у порядку зростання, індексований за назвою країни
sorted_series
=
data
[
'
High Renewable
'
].
sort_index
(
ascending
=
True
)
return
sorted_series
task_thirteen_result
=
task_thirteen
(
final_data
)
task_thirteen_result
```
%% Output
Country
Australia 0
Brazil 1
Canada 1
China 1
France 1
Germany 1
India 0
Iran 0
Italy 1
Japan 0
Russian Federation 1
South Korea 0
Spain 1
United Kingdom 0
United States 0
Name: High Renewable, dtype: int32
%% Cell type:code id:d2402833-9146-4a85-9efa-a63f81268620 tags:
```
python
def
task_forteen
(
data
):
# Словник континентів
ContinentDict
=
{
'
China
'
:
'
Asia
'
,
'
United States
'
:
'
North America
'
,
'
Japan
'
:
'
Asia
'
,
'
United Kingdom
'
:
'
Europe
'
,
'
Russian Federation
'
:
'
Europe
'
,
'
Canada
'
:
'
North America
'
,
'
Germany
'
:
'
Europe
'
,
'
India
'
:
'
Asia
'
,
'
France
'
:
'
Europe
'
,
'
South Korea
'
:
'
Asia
'
,
'
Italy
'
:
'
Europe
'
,
'
Spain
'
:
'
Europe
'
,
'
Iran
'
:
'
Asia
'
,
'
Australia
'
:
'
Australia
'
,
'
Brazil
'
:
'
South America
'
}
data
[
'
Estimated Population
'
]
=
data
[
'
Energy Supply
'
]
/
data
[
'
Energy Supply per Capita
'
]
data
[
'
Continent
'
]
=
data
.
index
.
to_series
().
map
(
ContinentDict
)
continent_stats
=
data
.
groupby
(
'
Continent
'
)[
'
Estimated Population
'
].
agg
([
'
size
'
,
'
sum
'
,
'
mean
'
,
'
std
'
])
return
continent_stats
task_forteen_result
=
task_forteen
(
final_data
)
task_forteen_result
```
%% Output
size sum mean std
Continent
Asia 5 2898666386.6106 579733277.32212 6.790979e+08
Australia 1 23316017.316017 23316017.316017 NaN
Europe 6 457929667.216372 76321611.202729 3.464767e+07
North America 2 352855249.48025 176427624.740125 1.996696e+08
South America 1 205915254.237288 205915254.237288 NaN
%% Cell type:code id:791927f6-9b3d-47b8-b852-405420083db6 tags:
```
python
import
matplotlib.pyplot
as
plt
def
task_fifteen
(
data
):
continent_colors
=
{
'
Asia
'
:
'
red
'
,
'
Australia
'
:
'
yellow
'
,
'
Europe
'
:
'
green
'
,
'
North America
'
:
'
blue
'
,
'
South America
'
:
'
orange
'
}
data
[
'
Color
'
]
=
data
[
'
Continent
'
].
map
(
continent_colors
)
# Побудова бульбашкової діаграми
plt
.
figure
(
figsize
=
(
12
,
8
))
plt
.
scatter
(
data
[
'
Rank
'
],
data
[
'
% Renewable
'
],
s
=
data
[
'
2015
'
]
/
1e10
,
# Масштабування розміру бульбашок
c
=
data
[
'
Color
'
],
alpha
=
0.6
,
edgecolors
=
"
w
"
,
linewidth
=
0.5
)
for
i
,
country
in
enumerate
(
data
.
index
):
plt
.
text
(
data
[
'
Rank
'
][
i
],
data
[
'
% Renewable
'
][
i
],
country
,
ha
=
'
center
'
,
va
=
'
center
'
,
fontsize
=
8
)
# Підпис осей та заголовок
plt
.
xlabel
(
'
Rank
'
)
plt
.
ylabel
(
'
% Renewable
'
)
plt
.
title
(
'
Bubble chart
'
)
plt
.
show
()
task_fifteen
(
final_data
)
```
%% Output
%% Cell type:code id:bcca3423-4203-453a-afe1-bd0df4be4ff6 tags:
```
python
```
This diff is collapsed.
Click to expand it.
Lab5.ipynb
0 → 100644
+
187
−
0
View file @
e69af998
This diff is collapsed.
Click to expand it.
Lab6.ipynb
0 → 100644
+
262
−
0
View file @
e69af998
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment