Made stuff into functions and cleaned up the code

This commit is contained in:
Deni 2021-09-21 15:48:44 +02:00
parent f76b804f0c
commit b8ef451357

View File

@ -4,6 +4,8 @@ import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import pandas as pd import pandas as pd
from multiprocessing import Process
# URLs to parse # URLs to parse
modra_URL = "https://www.modra.si/skladi-in-podskladi/" modra_URL = "https://www.modra.si/skladi-in-podskladi/"
infond_URL = "https://www.infond.si/tecajnica-vzajemnih-skladov" infond_URL = "https://www.infond.si/tecajnica-vzajemnih-skladov"
@ -15,20 +17,19 @@ headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleW
############################################################################### ###############################################################################
# Parse Modra Zavarovalnica # Parse Modra Zavarovalnica
# #
def parse_modra():
a = requests.get(modra_URL, headers = headers)
df = pd.read_html(a.text, thousands=None)[0]
a = requests.get(modra_URL, headers = headers) # Rename Columns
df.rename(columns = {'VEP ? Vrednost enote premoženja':'VEP', 'Sklad':'SKLAD'}, inplace = True)
df_list = pd.read_html(a.text, thousands=None) # Drop all columns except the ones we want
df = df_list[0] df = df.filter(['SKLAD', 'VEP'])
# Rename Columns # Drop all rows except the ones we want
df.rename(columns = {'VEP ? Vrednost enote premoženja':'VEP', 'Sklad':'SKLAD'}, inplace = True) return df[(df["SKLAD"]=="Dinamični podsklad") | (df["SKLAD"] =="Zajamčeni podsklad") ]
# Drop all columns except the ones we want
df = df.filter(['SKLAD', 'VEP'])
# Drop all rows except the ones we want
subset = df[(df["SKLAD"]=="Dinamični podsklad") | (df["SKLAD"] =="Zajamčeni podsklad") ]
@ -36,33 +37,30 @@ subset = df[(df["SKLAD"]=="Dinamični podsklad") | (df["SKLAD"] =="Zajamčeni po
# Parse Sava Infond Skladi # Parse Sava Infond Skladi
# #
a = requests.get(infond_URL, headers = headers) def parse_infond():
df_list = pd.read_html(a.text, thousands=None) a = requests.get(infond_URL, headers = headers)
df = pd.read_html(a.text, thousands=None)[0]
# Drop all columns except the ones we want
df = df.filter(['SKLAD', 'VEP'])
# Drop all columns except the ones we want # Cleanup the "SKLAD" name
df_list[0] = df_list[0].filter(['SKLAD', 'VEP']) a = df.at[23,'SKLAD']
df.at[23,'SKLAD'] = a.split()[0]+' '+a.split()[1]
df.at[23,'VEP'] = a.split()[2]
# Cleanup the "SKLAD" name a = df.at[15,'SKLAD']
a = df_list[0].at[23,'SKLAD'] df.at[15,'SKLAD'] = a.split()[0]+' '+a.split()[1]
df_list[0].at[23,'SKLAD'] = a.split()[0]+' '+a.split()[1] df.at[15,'VEP'] = a.split()[2]
df_list[0].at[23,'VEP'] = a.split()[2]
a = df_list[0].at[15,'SKLAD']
df_list[0].at[15,'SKLAD'] = a.split()[0]+' '+a.split()[1]
df_list[0].at[15,'VEP'] = a.split()[2]
# Drop all rows except the ones we want
df = df_list[0]
subset1 = df[(df["SKLAD"]=="Infond Defensive") | (df["SKLAD"] =="Infond Technology") ]
# Drop all rows except the ones we want
return df[(df["SKLAD"]=="Infond Defensive") | (df["SKLAD"] =="Infond Technology") ]
############################################################################### ###############################################################################
# Create new datatable and output it # Create new datatable and output it
# #
if __name__ == '__main__':
output_table = pd.concat([subset, subset1], axis=0) output_table = pd.concat([parse_modra(), parse_infond()], axis=0)
output_table = output_table.reset_index(drop=True) output_table = output_table.reset_index(drop=True)
print(output_table) print(output_table)