|
|
|
@ -4,6 +4,8 @@ import requests
|
|
|
|
|
from bs4 import BeautifulSoup |
|
|
|
|
import pandas as pd |
|
|
|
|
|
|
|
|
|
from multiprocessing import Process |
|
|
|
|
|
|
|
|
|
# URLs to parse |
|
|
|
|
modra_URL = "https://www.modra.si/skladi-in-podskladi/" |
|
|
|
|
infond_URL = "https://www.infond.si/tecajnica-vzajemnih-skladov" |
|
|
|
@ -15,20 +17,19 @@ headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleW
|
|
|
|
|
############################################################################### |
|
|
|
|
# Parse Modra Zavarovalnica |
|
|
|
|
# |
|
|
|
|
def parse_modra(): |
|
|
|
|
a = requests.get(modra_URL, headers = headers) |
|
|
|
|
df = pd.read_html(a.text, thousands=None)[0] |
|
|
|
|
|
|
|
|
|
a = requests.get(modra_URL, headers = headers) |
|
|
|
|
|
|
|
|
|
df_list = pd.read_html(a.text, thousands=None) |
|
|
|
|
df = df_list[0] |
|
|
|
|
# Rename Columns |
|
|
|
|
df.rename(columns = {'VEP ? Vrednost enote premoženja':'VEP', 'Sklad':'SKLAD'}, inplace = True) |
|
|
|
|
|
|
|
|
|
# Rename Columns |
|
|
|
|
df.rename(columns = {'VEP ? Vrednost enote premoženja':'VEP', 'Sklad':'SKLAD'}, inplace = True) |
|
|
|
|
# Drop all columns except the ones we want |
|
|
|
|
df = df.filter(['SKLAD', 'VEP']) |
|
|
|
|
|
|
|
|
|
# Drop all columns except the ones we want |
|
|
|
|
df = df.filter(['SKLAD', 'VEP']) |
|
|
|
|
# Drop all rows except the ones we want |
|
|
|
|
return df[(df["SKLAD"]=="Dinamični podsklad") | (df["SKLAD"] =="Zajamčeni podsklad") ] |
|
|
|
|
|
|
|
|
|
# Drop all rows except the ones we want |
|
|
|
|
subset = df[(df["SKLAD"]=="Dinamični podsklad") | (df["SKLAD"] =="Zajamčeni podsklad") ] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -36,33 +37,30 @@ subset = df[(df["SKLAD"]=="Dinamični podsklad") | (df["SKLAD"] =="Zajamčeni po
|
|
|
|
|
# Parse Sava Infond Skladi |
|
|
|
|
# |
|
|
|
|
|
|
|
|
|
a = requests.get(infond_URL, headers = headers) |
|
|
|
|
df_list = pd.read_html(a.text, thousands=None) |
|
|
|
|
|
|
|
|
|
def parse_infond(): |
|
|
|
|
a = requests.get(infond_URL, headers = headers) |
|
|
|
|
df = pd.read_html(a.text, thousands=None)[0] |
|
|
|
|
|
|
|
|
|
# Drop all columns except the ones we want |
|
|
|
|
df_list[0] = df_list[0].filter(['SKLAD', 'VEP']) |
|
|
|
|
# Drop all columns except the ones we want |
|
|
|
|
df = df.filter(['SKLAD', 'VEP']) |
|
|
|
|
|
|
|
|
|
# Cleanup the "SKLAD" name |
|
|
|
|
a = df_list[0].at[23,'SKLAD'] |
|
|
|
|
df_list[0].at[23,'SKLAD'] = a.split()[0]+' '+a.split()[1] |
|
|
|
|
df_list[0].at[23,'VEP'] = a.split()[2] |
|
|
|
|
# Cleanup the "SKLAD" name |
|
|
|
|
a = df.at[23,'SKLAD'] |
|
|
|
|
df.at[23,'SKLAD'] = a.split()[0]+' '+a.split()[1] |
|
|
|
|
df.at[23,'VEP'] = a.split()[2] |
|
|
|
|
|
|
|
|
|
a = df_list[0].at[15,'SKLAD'] |
|
|
|
|
df_list[0].at[15,'SKLAD'] = a.split()[0]+' '+a.split()[1] |
|
|
|
|
df_list[0].at[15,'VEP'] = a.split()[2] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Drop all rows except the ones we want |
|
|
|
|
df = df_list[0] |
|
|
|
|
subset1 = df[(df["SKLAD"]=="Infond Defensive") | (df["SKLAD"] =="Infond Technology") ] |
|
|
|
|
a = df.at[15,'SKLAD'] |
|
|
|
|
df.at[15,'SKLAD'] = a.split()[0]+' '+a.split()[1] |
|
|
|
|
df.at[15,'VEP'] = a.split()[2] |
|
|
|
|
|
|
|
|
|
# Drop all rows except the ones we want |
|
|
|
|
return df[(df["SKLAD"]=="Infond Defensive") | (df["SKLAD"] =="Infond Technology") ] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
############################################################################### |
|
|
|
|
# Create new datatable and output it |
|
|
|
|
# |
|
|
|
|
|
|
|
|
|
output_table = pd.concat([subset, subset1], axis=0) |
|
|
|
|
output_table = output_table.reset_index(drop=True) |
|
|
|
|
print(output_table) |
|
|
|
|
if __name__ == '__main__': |
|
|
|
|
output_table = pd.concat([parse_modra(), parse_infond()], axis=0) |
|
|
|
|
output_table = output_table.reset_index(drop=True) |
|
|
|
|
print(output_table) |
|
|
|
|