#!/usr/bin/env python3 # parse.py import requests from bs4 import BeautifulSoup import pandas as pd from multiprocessing import Process # URLs to parse modra_URL = "https://www.modra.si/skladi-in-podskladi/" infond_URL = "https://www.infond.si/tecajnica-vzajemnih-skladov" # Fake headers, otherwise Modra won't work headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'} ############################################################################### # Parse Modra Zavarovalnica # def parse_modra(): a = requests.get(modra_URL, headers = headers) df = pd.read_html(a.text, thousands=None)[0] # Rename Columns df.rename(columns = {'VEP ? Vrednost enote premoženja':'VEP', 'Sklad':'SKLAD'}, inplace = True) # Drop all columns except the ones we want df = df.filter(['SKLAD', 'VEP']) # Drop all rows except the ones we want return df[(df["SKLAD"]=="Dinamični podsklad") | (df["SKLAD"] =="Zajamčeni podsklad") ] ############################################################################### # Parse Sava Infond Skladi # def parse_infond(): a = requests.get(infond_URL, headers = headers) df = pd.read_html(a.text, thousands=None)[0] # Drop all columns except the ones we want df = df.filter(['SKLAD', 'VEP']) # Cleanup the "SKLAD" name a = df.at[11,'SKLAD'] df.at[11,'SKLAD'] = a.split()[0]+' '+a.split()[1] df.at[11,'VEP'] = a.split()[2] a = df.at[19,'SKLAD'] df.at[19,'SKLAD'] = a.split()[0]+' '+a.split()[1]+' '+a.split()[2] df.at[19,'VEP'] = a.split()[3] # Drop all rows except the ones we want #return df[(df["SKLAD"]=="Infond Defensive") | (df["SKLAD"] =="Infond Technology") ] return df[(df["SKLAD"]=="Infond Globalni defenzivni") | (df["SKLAD"] =="Infond Tehnologija") ] ############################################################################### # Create new datatable and output it # if __name__ == '__main__': output_table = pd.concat([parse_modra(), parse_infond()], axis=0) output_table = output_table.reset_index(drop=True) print(output_table)