parser-skladi/parse.py

70 lines
2.1 KiB
Python
Raw Permalink Normal View History

2021-09-21 14:43:16 +02:00
#!/usr/bin/env python3
2024-02-19 08:15:17 +01:00
# parse.py
2021-09-21 14:43:16 +02:00
import requests
from bs4 import BeautifulSoup
import pandas as pd
from multiprocessing import Process
2021-09-21 14:43:16 +02:00
# URLs to parse
modra_URL = "https://www.modra.si/skladi-in-podskladi/"
infond_URL = "https://www.infond.si/tecajnica-vzajemnih-skladov"
# Fake headers, otherwise Modra won't work
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
###############################################################################
# Parse Modra Zavarovalnica
#
def parse_modra():
a = requests.get(modra_URL, headers = headers)
df = pd.read_html(a.text, thousands=None)[0]
2021-09-21 14:43:16 +02:00
# Rename Columns
df.rename(columns = {'VEP ? Vrednost enote premoženja':'VEP', 'Sklad':'SKLAD'}, inplace = True)
2021-09-21 14:43:16 +02:00
# Drop all columns except the ones we want
df = df.filter(['SKLAD', 'VEP'])
2021-09-21 14:43:16 +02:00
# Drop all rows except the ones we want
return df[(df["SKLAD"]=="Dinamični podsklad") | (df["SKLAD"] =="Zajamčeni podsklad") ]
2021-09-21 14:43:16 +02:00
###############################################################################
# Parse Sava Infond Skladi
#
def parse_infond():
a = requests.get(infond_URL, headers = headers)
df = pd.read_html(a.text, thousands=None)[0]
2021-09-21 14:43:16 +02:00
# Drop all columns except the ones we want
df = df.filter(['SKLAD', 'VEP'])
2021-09-21 14:43:16 +02:00
# Cleanup the "SKLAD" name
2024-01-29 10:41:02 +01:00
a = df.at[11,'SKLAD']
df.at[11,'SKLAD'] = a.split()[0]+' '+a.split()[1]
df.at[11,'VEP'] = a.split()[2]
2021-09-21 14:43:16 +02:00
2024-01-29 10:41:02 +01:00
a = df.at[19,'SKLAD']
df.at[19,'SKLAD'] = a.split()[0]+' '+a.split()[1]+' '+a.split()[2]
df.at[19,'VEP'] = a.split()[3]
2021-09-21 14:43:16 +02:00
# Drop all rows except the ones we want
2024-01-29 10:41:02 +01:00
#return df[(df["SKLAD"]=="Infond Defensive") | (df["SKLAD"] =="Infond Technology") ]
return df[(df["SKLAD"]=="Infond Globalni defenzivni") | (df["SKLAD"] =="Infond Tehnologija") ]
2021-09-21 14:43:16 +02:00
###############################################################################
# Create new datatable and output it
#
if __name__ == '__main__':
output_table = pd.concat([parse_modra(), parse_infond()], axis=0)
output_table = output_table.reset_index(drop=True)
print(output_table)