Using Python and Python-Docx to build Word Docs

Using Python and Python-Docx to build Word Docs

As someone who loves automating things, I've learned to... well... not. It's often not worth it.

I'll look to automate if I think it will improve the accuracy of a process that needs to be particularly accurate (e.g. calculating commissions)[1]. I'll also shoot for automation in cases like the following: at the office, I maintain the commission and bonus plans for the sales team. All together there are 30ish plans to maintain... most of the language is the same, but each doc is unique.

For a while we were managing these files simply as individual Word documents, which was a nightmare: making the exact same update to 30ish files is a tedious time sink; making the exact same update to a subset of 30ish files gets very confusing very quickly.

Recently, I took over management of the plans with a mind to introduce some consistency and efficiency to the process. I'm sure people who manage contracts professionally purchase amazing software to automate this process; I used Python and python-docx.

Now, I have a single repository for content and I generate all of the plans dynamically. I don't think I saw anything quite like this online when I was building it out, so I thought I would share the concept using the sample code below. While I personally use it for sales plans, I could see it working well for vendor contracts, annual performance reviews,[2] etc.

Things I used:

# Add document content as variables

top_header = 'Official Document'
top_header_date_USA = 'Effective June 1, 2020'
top_header_date_NZL = 'Effective 1 June 2020'

opening_paragraph = 'Nulla dictum, lacus non tempor tincidunt, urna ligula vulputate lorem, a blandit nisl lacus ut felis. Suspendisse potenti. Nam ut tempus ante. Sed in erat sed ligula sagittis ornare. Aenean sagittis tortor suscipit, dapibus felis vitae, dapibus nunc. Phasellus diam leo, laoreet a interdum pretium, vulputate vel orci.'

heading_widgets_USA = 'USA Widgets'
heading_widgets_NZL = 'New Zealand Widgets'
widgets_paragraph = 'Cras pharetra, mi quis faucibus eleifend, justo orci tincidunt lacus, at consectetur felis nisi id tellus. '

third_paragraph = 'Duis eu erat quis felis ultrices congue.'

heading_colors = 'Colors'

colorJohnDoe ='Your color is silver.'

colorJimDoe='Your color is red.'

firstColorJillDoe='Your first color is navy.' #Could also place in an array
secondColorJillDoe='Your second color is plum'

firstColorJaneDoe='Your first color is green.' #Could also place in an array
secondColorJaneDoe='Your second color is pink.'

header_acknowledgement = 'Acknowledgement'
ive_read = 'I have read this document.'
Signed = 'Signed:'
a_long_line = '____________________________________________________'
a_short_line = '__________'
signature_line_spacing ='   '
signature_word_spacing = '                              '
the_date = 'Dated'
person_A = 'Person A'
person_B = 'Person B'

Bikes = 'Bikes'
Cars = 'Cars'
Scooters = 'Scooters'
Skates = 'Skates'

johnDoeWordDocCreate = 'John Doe File.docx'
jimDoeWordDocCreate = 'Jim Doe File.docx'
janeDoeWordDocCreate = 'Jane Doe File.docx'
jillDoeWordDocCreate = 'Jill Doe File.docx'

March = 'March'
June = 'June'
September = 'September'
December = 'December'

cell_text_1 = '1'
cell_text_2 = '2'
cell_text_3 = '3'
cell_text_4 = '4'
cell_text_5 = '5'
cell_text_6 = '6'
cell_text_7 = '7'
cell_text_8 = '8'
cell_text_9 = '9'
cell_text_10 = '10'
cell_text_11 = '11'
cell_text_12 = '12'
cell_text_13 = '13'
cell_text_14 = '14'
cell_text_15 = '15'
cell_text_16 = '16'

cell_text_25 = '25'
cell_text_40 = '40'

#Constants
TABLE_ONE = 1
TABLE_TWO = 2
TABLE_THREE = 3

CELL_WIDTH = 0.9
CELL_WIDTH_WIDE = 1.8

#Imports
from docx.shared import Pt
from docx.shared import Inches
from docx import Document
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.enum.table import WD_TABLE_ALIGNMENT
from docx.oxml.shared import OxmlElement, qn
import shutil #to copy template file with header/footer

# Create custom styles (e.g. 'Embedded Heading 1') in a Word doc and 
#       save the blank doc: 'The Base Doc.docx'.
#       Each new doc will use that blank file as its base, which allows you to reference the
#       custom styles
mainDocument = Document('The Base Doc.docx')


# Functions to add building blocks to the doc

def addTitleUsa(doc):
#TITLE
    heading = doc.add_paragraph(top_header, style='Embedded Heading 1')
    heading.alignment = WD_ALIGN_PARAGRAPH.CENTER

    heading = doc.add_paragraph(top_header_date_USA, style='Embedded Heading 1')
    heading.alignment = WD_ALIGN_PARAGRAPH.CENTER

    doc.add_paragraph() #used to add blank lines for spacing

    doc.add_paragraph(opening_paragraph)

def addTitleNzl(doc):
    heading = doc.add_paragraph(top_header, style='Embedded Heading 1')
    heading.alignment = WD_ALIGN_PARAGRAPH.CENTER

    heading = doc.add_paragraph(top_header_date_NZL, style='Embedded Heading 1')
    heading.alignment = WD_ALIGN_PARAGRAPH.CENTER
    doc.add_paragraph()
    doc.add_paragraph(opening_paragraph)

def addWidgets(doc, number_of_rows, number_of_columns, country):
    if country == 'NZL': doc.add_paragraph(heading_widgets_NZL, style='Embedded Heading 2')
    elif country == 'USA': doc.add_paragraph(heading_widgets_USA, style='Embedded Heading 2')
    else: doc.add_paragraph('Error / Country Missing', style='Embedded Heading 2')
    doc.add_paragraph(widgets_paragraph)
    table = doc.add_table(rows=number_of_rows, cols=number_of_columns)
    doc.add_paragraph()
    doc.add_paragraph(third_paragraph)
    populateTable(table)

def addColorsUsa(doc, firstColor, secondColor):
    doc.add_paragraph(heading_colors, style='Embedded Heading 2')
    indented = doc.add_paragraph(firstColor)
    indented_format = indented.paragraph_format
    indented_format.left_indent = Inches(0.25)
    indented = doc.add_paragraph(secondColor)
    indented_format = indented.paragraph_format
    indented_format.left_indent = Inches(0.25)


def addColorsNzl(doc, color):
    doc.add_paragraph(heading_colors, style='Embedded Heading 2')
    doc.add_paragraph(color)


def addAcknowledgement(doc):
    doc.add_paragraph(header_acknowledgement, style='Embedded Heading 2')
    doc.add_paragraph(ive_read)
    doc.add_paragraph(Signed, style='Embedded Heading 3')
    doc.add_paragraph()
    doc.add_paragraph(a_long_line + signature_line_spacing  + a_short_line)
    doc.add_paragraph(person_A + signature_word_spacing + the_date)
    doc.add_paragraph()
    doc.add_paragraph(a_long_line + signature_line_spacing  + a_short_line)
    doc.add_paragraph(person_B + signature_word_spacing + the_date)

def populateTable(table):

    table.left_indent = Inches(1.50)
    table.alignment = WD_TABLE_ALIGNMENT.CENTER
    # table.style = 'TableGrid'

    cell = table.cell(0, 1)
    cell.text = Bikes
    cell.width = Inches(CELL_WIDTH)

    cell = table.cell(0, 2)
    cell.text = Cars
    cell.width = Inches(CELL_WIDTH)

    cell = table.cell(0, 3)
    cell.text = Scooters
    cell.width = Inches(CELL_WIDTH)

    cell = table.cell(0, 4)
    cell.text = Skates
    cell.width = Inches(CELL_WIDTH)

    cell = table.cell(1, 0)
    cell.text = March
    cell.width = Inches(CELL_WIDTH_WIDE)

    cell = table.cell(1, 1)
    cell.text = cell_text_2
    cell.width = Inches(CELL_WIDTH)

    cell = table.cell(1, 2)
    cell.text = cell_text_15
    cell.width = Inches(CELL_WIDTH)

    cell = table.cell(1, 3)
    cell.text = cell_text_25
    cell.width = Inches(CELL_WIDTH)
    tc = cell._tc
    tcPr = tc.get_or_add_tcPr()
    tcVAlign = OxmlElement('w:vAlign')
    tcVAlign.set(qn('w:val'), "center")
    tcPr.append(tcVAlign)
    cell.add_paragraph()

    cell = table.cell(1, 4)
    cell.text = cell_text_40
    cell.width = Inches(CELL_WIDTH)
    tc = cell._tc
    tcPr = tc.get_or_add_tcPr()
    tcVAlign = OxmlElement('w:vAlign')
    tcVAlign.set(qn('w:val'), "center")
    tcPr.append(tcVAlign)
    cell.add_paragraph()


    cell = table.cell(2, 0)
    cell.text = June
    cell.width = Inches(CELL_WIDTH_WIDE)

    cell = table.cell(2, 1)
    cell.text = cell_text_12
    cell.width = Inches(CELL_WIDTH)

    cell = table.cell(2, 2)
    cell.text = cell_text_6
    cell.width = Inches(CELL_WIDTH)

    cell = table.cell(3, 0)
    cell.text = September
    cell.width = Inches(CELL_WIDTH_WIDE)

    cell = table.cell(3, 1)
    cell.text = cell_text_13
    cell.width = Inches(CELL_WIDTH)

    cell = table.cell(3, 2)
    cell.text = cell_text_11
    cell.width = Inches(CELL_WIDTH)

    cell = table.cell(4, 0)
    cell.text = December
    cell.width = Inches(CELL_WIDTH_WIDE)

    cell = table.cell(4, 1)
    cell.text = cell_text_12
    cell.width = Inches(CELL_WIDTH)

    cell = table.cell(4, 2)
    cell.text = cell_text_6
    cell.width = Inches(CELL_WIDTH)

    a = table.cell(1,4)
    b = table.cell(4,4)
    A = a.merge(b)

    a = table.cell(1,3)
    b = table.cell(4,3)
    A = a.merge(b)



# Region specific build functions

def buildUsaPlan(doc, firstColor, secondColor, rowNo, colNo):
    addTitleUsa(doc)
    addWidgets(doc, rowNo, colNo, 'USA')
    addColorsUsa(doc,firstColor, secondColor)
    addAcknowledgement(doc)



def buildNzlPlan(doc, color, rowNo, colNo, country):
    addTitleNzl(doc)
    addWidgets(doc, rowNo, colNo, 'NZL')
    addColorsNzl(doc, color)
    addAcknowledgement(doc)



# Create and save each individual plan

# John Doe
johnDoeDocument = Document('The Base Doc.docx')
buildNzlPlan(johnDoeDocument, colorJohnDoe, 5, 5, TABLE_TWO)
johnDoeDocument.save(johnDoeWordDocCreate)

# Jim Doe
jimDoeDocument = Document('The Base Doc.docx')
buildNzlPlan(jimDoeDocument, colorJimDoe, 5, 5, TABLE_ONE)
jimDoeDocument.save(jimDoeWordDocCreate)

# Jane Doe
janeDoeDocument = Document('The Base Doc.docx')
buildUsaPlan(janeDoeDocument, firstColorJaneDoe, secondColorJaneDoe, 5, 5)
janeDoeDocument.save(janeDoeWordDocCreate)

# Jill Doe
jillDoeDocument = Document('The Base Doc.docx')
buildUsaPlan(jillDoeDocument, firstColorJillDoe, secondColorJillDoe, 5, 5)
jillDoeDocument.save(jillDoeWordDocCreate)

Two examples:
docNZL
docUSA


  1. Not coincidentally, I also calculate commissions in Python at the moment. (In the future I may tuck those calculations right into Salesforce.) ↩︎

  2. Years ago, I tried a similar "single repository" concept in Excel for annual performance reviews; it was a vastly less elegant solution. ↩︎

  3. At work I'm on a Windows machine. ↩︎