For fun, here’s an example using COM interrop, but I encourage you to work with Open XML.
# Load the Python Standard and DesignScript Libraries
import sys
import clr
import System
from System import Array
from System.Collections.Generic import List, IList, Dictionary
from System.Reflection import BindingFlags
clr.AddReference("System.Runtime.InteropServices")
import System.Runtime.InteropServices
clr.AddReference('Microsoft.VisualBasic.Core')
import Microsoft.VisualBasic
from Microsoft.VisualBasic import Information
import traceback
import time
file_path = IN[0]
lst_search_term = IN[1]
missing = System.Type.Missing
lst_range_found = []
class WordlEnum:
"""
values Enum from API Interop Mircrosoft Doc , to avoid import lib in clr for Enum
"""
Word_WdReplace_wdReplaceAll = System.Int32(2)
Word_WdReplace_wdReplaceNone = System.Int32(0)
Word_WdReplace_wdReplaceOne = System.Int32(1)
Word_WdFindWrap_wdFindContinue = System.Int32(1)
Word_WdColorIndex_wdYellow = System.Int32(7)
# clear session
word_application = System.Activator.CreateInstance(System.Type.GetTypeFromProgID("Word.Application", True))
word_application.GetType().InvokeMember("Quit", BindingFlags.InvokeMethod, None, word_application, None)
word_application = None
#
word_application = System.Activator.CreateInstance(System.Type.GetTypeFromProgID("Word.Application", True))
word_application.GetType().InvokeMember("Visible", BindingFlags.SetProperty, None, word_application, (True, ))
documents = word_application.GetType().InvokeMember("Documents",BindingFlags.GetProperty, None, word_application, None)
lst_range_found = []
try:
#
doc = documents.GetType().InvokeMember("Open", BindingFlags.InvokeMethod, None, documents, (file_path, ))
for search_term in lst_search_term:
#
select_obj = word_application.GetType().InvokeMember("Selection", BindingFlags.GetProperty, None, word_application, None)
#
find_obj = select_obj.GetType().InvokeMember("Find", BindingFlags.GetProperty, None, select_obj, None)
#
find_obj.GetType().InvokeMember("ClearFormatting", BindingFlags.InvokeMethod, None, find_obj, None)
#
find_obj.GetType().InvokeMember("Text", BindingFlags.SetProperty, None, find_obj, (search_term, ))
#
args = (missing,
False,
True,
missing,
missing,
missing,
True,
WordlEnum.Word_WdFindWrap_wdFindContinue,
True,
missing,
missing)
# execute the search engine, highlight, and store result with the start range number
i = 0
while i < 5000:
i += 1
flag = find_obj.GetType().InvokeMember("Execute", BindingFlags.InvokeMethod, None, find_obj, args)
if flag:
#
range = select_obj.GetType().InvokeMember("Range", BindingFlags.GetProperty, None, select_obj, None)
r_start = range.GetType().InvokeMember("Start", BindingFlags.GetProperty, None, range, None)
if (r_start, search_term) not in lst_range_found:
range.GetType().InvokeMember("HighlightColorIndex", BindingFlags.SetProperty, None, range, (WordlEnum.Word_WdColorIndex_wdYellow, ))
# add data to lst
lst_range_found.append((r_start, search_term))
#
else:
break
else:
break
except Exception as ex:
print(traceback.format_exc())
#
lst_range_found.sort(key = lambda x : x[0])
OUT = lst_range_found