Hello,
I have a local URL that I can only access while within company network and I would like to read data from URL into Big query table. I have tested my python code locally and I am able to read data and write data into big query table. Now, I moved the python code to cloud function and I am getting error "“An error occurred while fetching data from http://10.XX.XX.XX: HTTPConnectionPool(host=‘10.XX.XX.XX’, port=80): Max retries exceeded with url:(Caused by ConnectTimeoutError(<urllib3.connection.HTTPConnection object at 0x3e6b5d14b470>, ‘Connection to 10.XX.XX.XX timed out. (connect timeout=None)’))”
does anyone knows how I can configure GCP to access the local URL ? I have tried to create subnets and firewall in VPC with the IP range but it is not work.
import functions_framework
import os
import requests
import pandas as pd
from google.cloud import bigquery
from pandas_gbq import to_gbq
from io import StringIO
from datetime import datetime, timedelta
from flask import Flask, jsonify
#credentialsPath =
client = bigquery.Client()
# BigQuery configuration
PROJECT_ID = "projectsample"
DATASET_TABLE = "sample.testtable"
# List of URLs
base_urls = [
"http://10.XX.XX.XX1",
"http://10.XX.XX.XX2"
]
def get_date_range():
end_date = datetime.now()
start_date = end_date - timedelta(days=5)
return start_date.strftime("%Y%m%d"), end_date.strftime("%Y%m%d")
def fetch_data(url):
try:
response = requests.get(url)
response.raise_for_status()
csv_file = StringIO(response.content.decode('utf-8'))
df = pd.read_csv(csv_file)
df['source_url'] = url
return df
except requests.RequestException as e:
print(f"An error occurred while fetching data from {url}: {e}")
return pd.DataFrame()
except pd.errors.ParserError as e:
print(f"An error occurred while parsing the CSV from {url}: {e}")
return pd.DataFrame()
@functions_framework.http
def main(request):
start_date, end_date = get_date_range()
df_list = []
for base_url in base_urls:
url = f"{base_url}{start_date}-{end_date}"
df = fetch_data(url)
if not df.empty:
df_list.append(df)
if df_list:
all_data_df = pd.concat(df_list, ignore_index=True)
print(all_data_df)
to_gbq(all_data_df, DATASET_TABLE, project_id=PROJECT_ID, if_exists='replace')
return jsonify({"message": f"Data successfully written to {DATASET_TABLE}", "status": "success"}), 200
else:
return jsonify({"message": "No data was fetched from any URL.", "status": "failure"}), 400
Thanks