Simplifying Excel Tasks with Python: Data Handling and Testing Tutorial

Working with Python and Excel is actually quite handy, especially if you are into it. In this article, you can see how to create Excel files, write data and formulas into them and read these. Pretty much simple CRUD methods.

The interesting part of the article are the tests and the test coverage tools, provided in Python.

This is how test coverage looks like.

This is how the file with the excel operations looks like:

import pandas as pd
from openpyxl import load_workbook


def read_excel(file_path):
    df = pd.read_excel(file_path)
    print("Original Data:")
    print(df)
    return df

def manipulate_data(df):
    df['Total'] = df['Quantity'] * df['UnitPrice']
    df['TotalWithVAT'] = 0
    print("Manipulated Data:")
    print(df)
    return df

def write_to_excel_with_formulas(df, output_file_path):
    df.to_excel(output_file_path, index=False)
    workbook = load_workbook(output_file_path)
    worksheet = workbook.active

    for row in range(2, len(df)+2):
        cell = worksheet.cell(row=row, column=4)
        cell.value = f'=C{row}*1.2'

    workbook.save(output_file_path)
    print(f"Data with formulas has been written to {output_file_path}")

if __name__ == "__main__":
    input_file_path = 'input.xlsx'
    output_file_path = 'output.xlsx'

    data = read_excel(input_file_path)
    manipulated_data = manipulate_data(data)
    write_to_excel_with_formulas(manipulated_data, output_file_path)

The following functions are tested:

  • read_excel(file_path)
  • manipulate_data(df)
  • write_to_excel_with_formulas(df, output_file_path)

    This is how the functions are tested:

import unittest
import os
import pandas as pd
from excel_operations import read_excel, manipulate_data, write_to_excel_with_formulas

class TestExcelOperations(unittest.TestCase):

    @classmethod
    def setUpClass(cls):
        cls.input_file_path = 'test_input.xlsx'
        cls.output_file_path = 'test_output.xlsx'

        df = pd.DataFrame({
            'Quantity': [10, 20, 30],
            'UnitPrice': [100, 200, 300]
        })
        df.to_excel(cls.input_file_path, index=False)

    def test_read_excel(self):
        df = read_excel(self.input_file_path)
        self.assertEqual(len(df), 3)
        self.assertTrue('Quantity' in df)
        self.assertTrue('UnitPrice' in df)


    def test_manipulate_data(self):
        df = read_excel(self.input_file_path)
        manipulated_df = manipulate_data(df)

        self.assertEqual(len(manipulated_df), 3)
        self.assertTrue('Total' in manipulated_df)
        self.assertTrue('TotalWithVAT' in manipulated_df)

        expected_total_series = pd.Series([1000, 4000, 30*300], name='Total')
        pd.testing.assert_series_equal(manipulated_df['Total'],expected_total_series,check_names=True )

    def test_write_to_excel_with_formulas(self):
        df = read_excel(self.input_file_path)
        manipulated_df = manipulate_data(df)
        write_to_excel_with_formulas(manipulated_df, self.output_file_path)
        self.assertTrue(os.path.exists(self.output_file_path))

    @classmethod
    def tearDownClass(cls):
        os.remove(cls.input_file_path)
        os.remove(cls.output_file_path)
        

if __name__ == '__main__':
    unittest.main()
    

The command to test the functions is:

  • python -m unittest excel_operations_test.py

Then for the test coverage, these are the commands used:

  • coverage run -m unittest discover
  • coverage report
  • coverage html

To open the index.html with the default browser (or the Excel file) from the terminal:

  • start .\htmlcov\index.html
  • start excel.exe .\test_output.xlsx

This is the YouTube video with the live coding for that one:

Simplifying Excel Tasks with Python: Data Handling and Testing Tutorial

Thank you for watching!

Enjoy it!