Quick question here for anyone that can answer. I’m using a Python script to run the streaming API. I’ve been trying to get real time notifications of company status changes (i.e. from Active to Liquidation or Administration etc).
I’m using this end point: “https://stream.companieshouse.gov.uk/insolvency-cases”
Now I’ve been using this for the last few days and I’ve been testing it out with 3 companies that recently went into liquidation as per the Gazette Online. I’ve been waiting for Companies House to update their system and reflect the change. So far the streaming API has not picked up the change.
However, I’m also using a REST API to double check i don’t miss the notifications. Despite running the streaming API, I’ve noticed that i have missed notifications for 2 companies. This leads me to consider that maybe I should be using the following streaming end point instead: “https://stream.companieshouse.gov.uk/companies”
Can someone offer advice please? I assume it’s user error on my end because I’m new to using this but i just wanted to double check if I’m missing something obvious.
Here is my python script:
import requests
import json
import pandas as pd
from datetime import datetime
from openpyxl import load_workbook
Constants
EXCEL_FILE = r"C:\Users\User\Desktop\Company Watchlist.xlsx"
SHEET_NAME = “Tracked Companies”
STREAMING_URL = “https://stream.companieshouse.gov.uk/insolvency-cases”
API_KEY = “Redacted”
Function to load the company watchlist
def load_company_watchlist(file_path, sheet_name):
try:
# Load the Excel sheet into a DataFrame
df = pd.read_excel(file_path, sheet_name=sheet_name)
print(“Company watchlist loaded successfully.”) # Debug
return df
except Exception as e:
print(f"Error loading company watchlist: {e}")
raise
Function to save updates to the Excel file
def save_updates(file_path, sheet_name, updates):
try:
wb = load_workbook(file_path)
ws = wb[sheet_name]
for update in updates:
row = update['row']
ws[f"D{row}"] = update['date_of_last_check']
ws[f"E{row}"] = update['notes']
print(f"Updated row {row} in spreadsheet.") # Debug: Log spreadsheet updates
wb.save(file_path)
print("Spreadsheet saved successfully.") # Debug: Confirm save
except Exception as e:
print(f"Error saving updates to spreadsheet: {e}")
raise
Function to process insolvency updates
def process_data(data, df):
updates =
company_number = data.get(“company_number”, None)
insolvency_details = data.get(“data”, None) # Key field for insolvency events
# Skip if the company number is not in the watchlist
if company_number not in df['Company Number'].values:
return updates # Skip irrelevant data
print(f"Company number {company_number} found in watchlist.") # Debug: Matching company
# Find the corresponding row in the DataFrame
row = df[df['Company Number'] == company_number].index[0] + 2 # Adjust for Excel 1-based index
current_status = df.at[row - 2, "Last Status"] # Current status in Column C
# Extract new status or other relevant insolvency details
new_status = insolvency_details.get("case_type", "Unknown") if insolvency_details else "Unknown"
print(f"Current status: {current_status}, New status: {new_status}") # Debug: Compare statuses
# Compare current status (Column C) with new status from the API
if current_status != new_status:
print(f"Status changed for {company_number}. Updating spreadsheet.") # Debug: Status change
date_of_last_check = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
notes = f"Status changed from {current_status} to {new_status}"
# Prepare updates for Excel
updates.append({
"row": row,
"date_of_last_check": date_of_last_check,
"notes": notes
})
# Update the DataFrame for tracking (in memory)
df.at[row - 2, "Last Status"] = new_status
else:
print(f"No status change for {company_number}.") # Debug
return updates
Main function to connect to the Streaming API
def main():
try:
# Load company watchlist
df = load_company_watchlist(EXCEL_FILE, SHEET_NAME)
# Connect to the Companies House Streaming API
with requests.get(STREAMING_URL, auth=(API_KEY, ""), stream=True) as response:
if response.status_code == 200:
print("Connected to Companies House Streaming API.")
for line in response.iter_lines():
if line:
data = json.loads(line.decode('utf-8'))
company_number = data.get("company_number", None)
# Extract company number from resource_uri if not provided
if not company_number and 'resource_uri' in data:
company_number = data['resource_uri'].split('/')[2] # Extract from URI
# Log invalid data if company_number is still None
if not company_number:
print(f"Invalid data received: {data}")
print("Skipping invalid data (no company number).")
continue # Skip to the next line
# Filter out irrelevant companies
if company_number in df['Company Number'].values:
print("Relevant insolvency data received:", data) # Debug: Print relevant data
updates = process_data(data, df)
if updates:
save_updates(EXCEL_FILE, SHEET_NAME, updates)
print(f"Updates saved to {EXCEL_FILE}.")
else:
print(f"Irrelevant company number {company_number}. Skipping.") # Debug for irrelevant data
else:
print(f"Failed to connect. HTTP Status: {response.status_code}")
except Exception as e:
print(f"Error occurred: {e}")
Run the script
if name == “main”:
main()
Thank you to whoever responds.