ich folgende Fehlermeldung erhalte:soup.find nicht mehr Text auf Yahoo Finance finden
Invalid figure 'Total Cash Flow From Operating Activities' passed.
Aus dieser url:
Scrape Yahoo Finance Income Statement with Python
„Dies wird ein wenig schwieriger, weil die "Net Income" in einem starken Tag eingeschlossen "
Kann mir jemand erklären, warum dieser Code nicht funktioniert für Total Cash Flow aus der Geschäftstätigkeit und wie ich ca n bestimmen, dass etwas eine starke Markierung hat?
Code:
import re, requests
from bs4 import BeautifulSoup
import sys
"""
import os # file system operations
import re # regular expressions
import pandas as pd # pandas... the best time series library out there
import datetime as dt # date and time functions
import io
"""
# search with regular expressions
# "CrumbStore":\{"crumb":"(?<crumb>[^"]+)"\}
def get_crumb():
url = 'https://uk.finance.yahoo.com/quote/AAPL/history' # url for a ticker symbol, with a download link
r = requests.get(url) # download page
txt = r.text # extract html
cookie = r.cookies['B'] # the cooke we're looking for is named 'B'
print('Cookie: ', cookie)
# Now we need to extract the token from html.
# the string we need looks like this: "CrumbStore":{"crumb":"lQHxbbYOBCq"}
# regular expressions will do the trick!
pattern = re.compile('.*"CrumbStore":\{"crumb":"(?P<crumb>[^"]+)"\}')
for line in txt.splitlines():
m = pattern.match(line)
if m is not None:
crumb = m.groupdict()['crumb']
print('Crumb=',crumb)
return crumb
def periodic_figure_values(soup, yahoo_figure):
values = []
pattern = re.compile(yahoo_figure)
title = soup.find("strong", text=pattern) # works for the figures printed in bold
if title:
row = title.parent.parent
else:
title = soup.find("td", text=pattern) # works for any other available figure
if title:
row = title.parent
else:
sys.exit("Invalid figure '" + yahoo_figure + "' passed.")
cells = row.find_all("td")[1:] # exclude the <td> with figure name
for cell in cells:
if cell.text.strip() != yahoo_figure: # needed because some figures are indented
str_value = cell.text.strip().replace(",", "").replace("(", "-").replace(")", "")
if str_value == "-":
str_value = 0
value = int(str_value) * 1000
values.append(value)
return values
def financials_soup(ticker_symbol, statement, quarterly=False):
if statement == "is" or statement == "bs" or statement == "cf":
crumb = get_crumb()
url = "https://finance.yahoo.com/q/" + statement + "?s=" + ticker_symbol + "&crumb=" + crumb
if not quarterly:
url += "&annual"
return BeautifulSoup(requests.get(url).text, "html.parser")
return sys.exit("Invalid financial statement code '" + statement + "' passed.")
print(periodic_figure_values(financials_soup("AAPL", "cf"), "Total Cash Flow From Operating Activities"))
Edit:
konnte ich Ergebnisse erhalten, indem die financials_soup Funktion auf die folgende Veränderung:
def financials_soup(ticker_symbol, statement, quarterly=False):
if statement == "financials" or statement == "balance-sheet" or statement == "cash-flow":
crumb = get_crumb()
url = "https://finance.yahoo.com/quote/" + ticker_symbol + "/" + statement + "?p=" + ticker_symbol + "&crumb=" + crumb
if not quarterly:
url += "&annual"
return BeautifulSoup(requests.get(url).text, "html.parser")
return sys.exit("Invalid financial statement code '" + statement + "' passed.")
Zroq, Dank viel für mich zu helfen. Mit Ihren Anregungen konnte ich die gewünschten Ergebnisse von Yahoo Finance erzielen. Darf ich fragen, was Sie mit Inspektion gemeint haben? Wird die URL einfach in den Webbrowser kopiert und eingefügt? –