Using pywin32 to query Outlook and extract email metadata and attachments
August 19, 2024
- install https://github.com/mhammond/pywin32 with
pip install pywin32
import os
import datetime
import win32com.client
def save_attachments(start_date, end_date, names, file_extensions, save_folder):
"""
Save email attachments from emails received between start_date and end_date,
where the recipient or CC includes any name from the provided list, and the
attachment file type matches any of the provided extensions.
:param start_date: Start date as a datetime object
:param end_date: End date as a datetime object
:param names: List of names to check in 'To' or 'CC' fields
:param file_extensions: List of file extensions to check (e.g., ['pdf', 'xlsx'])
:param save_folder: Folder path where attachments will be saved
"""
outlook = win32com.client.Dispatch("Outlook.Application")
namespace = outlook.GetNamespace("MAPI")
inbox = namespace.GetDefaultFolder(6)
items = inbox.Items
items = items.Restrict(
f"[ReceivedTime] >= '{start_date.strftime('%m/%d/%Y %H:%M %p')}' AND [ReceivedTime] <= '{end_date.strftime('%m/%d/%Y %H:%M %p')}'"
)
for item in items:
if item.Class == 43: # 43 is the constant for MailItem in Outlook
if any(name in item.To for name in names) or any(
name in item.CC for name in names
):
received_time = item.ReceivedTime.strftime("%Y-%m-%d_%H-%M-%S")
for attachment in item.Attachments:
if any(
attachment.FileName.lower().endswith(ext)
for ext in file_extensions
):
filename = f"{received_time}_{attachment.FileName}"
filepath = os.path.join(save_folder, filename)
attachment.SaveAsFile(filepath)
print(f"Attachments saved for email: {item.Subject}")
if __name__ == "__main__":
# output folder
save_folder = "C:/Users/lmiloszewski/code/aaa/outlook"
# date params
end_date = datetime.datetime.now()
start_date = end_date - datetime.timedelta(days=30)
# inbox names
names = ["Investment Analytics", "Luke Miloszewski"]
# file extensions
file_extensions = ["pdf", "xlsx"]
save_attachments(start_date, end_date, names, file_extensions, save_folder)