I don’t know why the “unexpected indent” error can’t give me a line to look at. It is driving me crazy whenever I run into it. I’ve tried looking thru this code 10 times and can’t spot it. I even copied it to pycharm and can’t see any errors there relating to it.
# Load the Python Standard and DesignScript Libraries
import sys
import clr
clr.AddReference('ProtoGeometry')
from Autodesk.DesignScript.Geometry import *
import System
clr.AddReference("Microsoft.Office.Interop.Word")
import Microsoft.Office.Interop.Word as Word
import re
from itertools import groupby
from operator import itemgetter
########################################################################
# The inputs to this node will be stored as a list in the IN variables.
wordDoc = IN[0]
missing = System.Type.Missing
word_application = Word.ApplicationClass()
word_application.visible = False
document = word_application.Documents.Open(wordDoc, missing, True)
wdoc = document.Range()
#Get word doc and paragraphs from the document
rText = wdoc.Text
rParagraphs = wdoc.Paragraphs
pCount = rParagraphs.Count
startIndex = 0
#*******************Class********************************
class WordParagraph(object):
def __init__(self, stringContent, stringLength, paraType, paraUnderlineStart, paraUnderlineLength, listLetter, listLevel, paraTab, bulletStart, bulletLength):
self.stringContent = stringContent
self.stringLength = stringLength
self.paraUnderlineStart = paraUnderlineStart
self.paraUnderlineLength = paraUnderlineLength
self.paraType = paraType
self.listLetter = listLetter
self.listLevel = listLevel
self.paraTab = paraTab
self.bulletStart = bulletStart
self.bulletLength = bulletLength
#List of the class instances of the paragraphs
paragraphList = []
#******************Temp Parameters************************
pContent = "" #used for stringContent
pStringLength = 0 #used for stringLength
pParaType = 0 #used for paraType
#ForListFormat.List
# 2 = Bullet
# 3 = simple numericlist(can be letters)
# item = not a word list but still can be a manual list
pListLevel = 0 #used for nested bullet lists will be "x" if not needed
pListLetter = 0
pParaUnderline = []
pTabbedStatus = 0
frontTrim = 0
#***Test Parameters(TO BE DELETED)***
stringContentTest = []
stringLengthTest = []
paraTypeTest = []
listLevelTest = []
listLetterTest = []
paraUnderStartTest = []
paraUnderLengthTest = []
paraTabbedStatusTest = []
#********************Main for loop*******************
for iP in range(pCount - 1):
frontTrim = 0
pRange = rParagraphs[iP + 1].Range
if pRange.Characters.Count > 2:
#stringContent
pContent = pRange.Text
originalContent = pRange.Text
pContent = pContent.upper()
if re.match("^\t", pContent):
frontTrim = 1
elif re.match("^\t\t", pContent):
frontTrim = 2
elif re.match("^\t\t\t", pContent):
frontTrim = 3
else:
pass
pContent = pContent.strip()
stringContentTest.append(originalContent)
#paraType, listLevel, and listLetter
pRange.ListFormat.ListType
if pRange.ListFormat.ListType == 2:
pParaType = "bullet"
pListLevel = pRange.ListFormat.ListLevelNumber
pListLetter = 0
elif pRange.ListFormat.ListType == 3:
pParaType = "letter"
pListLevel = "x"
pListLetter = pRange.ListFormat.ListString
elif re.match("^.[.]\t", pContent):
pParaType = "letter"
pListLevel = "x"
pListLetter = pContent[0:2]
pContent = pContent[3::]
frontTrim = 3
elif re.match("^- ", pContent): #dash with space
pParaType = "bullet"
pListLevel = 1
pListLetter = 0
pContent = pContent[2::]
frontTrim += 2
elif re.match("^-", pContent): #dash
pParaType = "bullet"
pListLevel = 1
pListLetter = 0
pContent = pContent[1::]
frontTrim += 1
elif re.match("^\t\t-", pContent): # this might do nothing because of the strip at stringContent above
pParaType = "bullet"
pListLevel = 2
pListLetter = 0
else:
pParaType = "item"
pListLevel = "x"
pListLetter = 0
paraTypeTest.append(pParaType)
listLevelTest.append(pListLevel)
listLetterTest.append(pListLetter)
#paraUnderline
paraUnderIndex = []
cRange = pRange.Characters
for i in range(cRange.Count):
if cRange[i + 1].Underline == 1:
paraUnderIndex.append(i)
else:
pass
pParaUnderline.append(paraUnderIndex)
#stringLength
pStringLength = len(pContent)
stringLengthTest.append(pStringLength) #!!!!!DELETE!!!!!!
#Takes the above indexes and groups them and...
groupedIndex = []
groupedRange = []
count = 0
if paraUnderIndex != []:
listLength = len(paraUnderIndex)
forRun = 0
for i in range(listLength):
forRun += 1
if forRun < listLength:
if i == 0:
count += 1
groupedIndex.append(paraUnderIndex[i] - frontTrim)
else:
if paraUnderIndex[i] + 1 == paraUnderIndex[i + 1]:
count += 1
if paraUnderIndex[i] - 1 != paraUnderIndex[i - 1]:
groupedIndex.append(paraUnderIndex[i] - frontTrim)
else:
pass
else:
count += 1
if count > pStringLength:
groupRange.append(pStringLength)
else:
groupedRange.append(count)
count = 0
else:
if paraUnderIndex[i] - 1 != paraUnderIndex[i - 1]:
groupedIndex.append(paraUnderIndex[i] - frontTrim)
groupedRange.append(1)
else:
count += 1
if count > pStringLength:
groupedRange.append(pStringLength)
else:
groupedRange.append(count)
else:
groupedIndex.append("x")
groupedRange.append("x")
#Tabbed
if re.match("OUTLINE", [pContent][0]) or re.match("DIVISION", [pContent][0]) or re.match("SECTION", [pContent][0]) or pParaType == "letter":
pTabbedStatus = 0
else:
pTabbedStatus = 1
paraTabbedStatusTest.append(pTabbedStatus)
#Append the pContent for testing
stringContentTest.append(pContent)
paraUnderStartTest.append(groupedIndex)
paraUnderLengthTest.append(groupedRange)
#bulletStart and bulletLength
groupedBulletStart = []
groupedBulletLength = []
if pParaType == "bullet":
groupedBulletStart.append([0])
groupedBulletLength.append([pStringLength])
else:
pBulletStart = "x"
pBulletLength = "x"
#Create the list of paragraphs as class instances
paragraphList.append(WordParagraph(pContent, pStringLength, pParaType, groupedIndex, groupedRange, pListLetter, pListLevel, pTabbedStatus, groupedBulletStart, groupedBulletLength))
else:
pass
#***************Concat Bullet list under 1000 words*******************
concatListStart = 0
delList = []
for i in range(len(paragraphList) -1):
if paragraphList[i].paraType == "bullet": #index is a bullet
if concatListStart != 0: #a current bullet grouping is started
if paragraphList[concatListStart].stringLength + paragraphList[i].stringLength < 1000: # if adding current index would go over limit for characters
# concat characters
paragraphList[concatListStart].stringContent = paragraphList[concatListStart].stringContent + "\n" + paragraphList[i].stringContent
# if statement for concat underline information
if paragraphList[i].paraUnderlineLength == ["x"]: # if i has no underline then it will not have it's underline information added
pass
else:
if paragraphList[concatListStart].paraUnderlineLength == ["x"]: # if start list is an "x", meaning it has no underlines. It will be replaced with i's information
paragraphList[concatListStart].paraUnderlineLength = [paragraphList[i].paraUnderlineLength[0]]
paragraphList[concatListStart].paraUnderlineStart = [paragraphList[i].paraUnderlineStart[0] + paragraphList[concatListStart].stringLength]
else: # else it has an underline already so i's information will be appended
paragraphList[concatListStart].paraUnderlineLength.append(paragraphList[i].paraUnderlineLength[0])
paragraphList[concatListStart].paraUnderlineStart.append(paragraphList[i].paraUnderlineStart[0] + paragraphList[concatListStart].stringLength)
# add length to the start index length
paragraphList[concatListStart].stringLength += paragraphList[i].stringLength
delList.append(i)
elif paragraphList[i].stringLength < 1000: #if length would be too long but new bullet is not
concatListStart = i
else: # i is too long to start a list group
concatListStart = 0
elif paragraphList[i].stringLength < 1000: # if new index is under limit so it becomes the new start of the bullet group
concatListStart = i
else:
pass
else:
concatListStart = 0
# Removes the paragraphs (class instances) that have been concatenated.
for i in delList:
del paragraphList[i]
for g in range(len(delList)):
delList[g] = delList[g] - 1
###########################Find start index##############################
startIndex = 0
for iP in range(len(paragraphList)):
if re.match("^OUTLINE .* SPECIFICATIONS", paragraphList[iP].stringContent):
startIndex = iP
break
else:
pass
paragraphList = paragraphList[startIndex:]
outStringContent = []
outStringLength = []
outParaType = []
outParaUnderlineStart = []
outParaUnderlineLength = []
outListLetter = []
outListLevel = []
outParaTab = []
outBulletStart = []
outBulletLength = []
for i in range(len(paragraphList)):
outStringContent.append(paragraphList[i].stringContent)
outStringLength.append(paragraphList[i].stringLength)
outParaType.append(paragraphList[i].paraType)
outParaUnderlineStart.append(paragraphList[i].paraUnderlineStart)
outParaUnderlineLength.append(paragraphList[i].paraUnderlineLength)
outListLetter.append(paragraphList[i].listLetter)
outListLevel.append(paragraphList[i].listLevel)
outParaTab.append(paragraphList[i].paraTab)
outBulletStart.append(paragraphList[i].bulletStart)
outBulletLength.append(paragraphList[i].bulletLength)
##################Check for underline errors####################
underLengthCheck = []
for i in range(len(outStringLength)):
if outParaUnderlineLength[i][0] != "x":
if outStringLength[i] < outParaUnderlineStart[i][-1] + outParaUnderlineLength[i][-1] - 1:
underLengthCheck.append(i)
else:
pass
else:
pass
########################
word_application.Quit()
word_application = None
OUT = outStringContent, outStringLength, outParaType, outParaUnderlineStart, outParaUnderlineLength, outListLetter, outListLevel, outParaTab, pParaUnderline, stringContentTest, underLengthCheck, outBulletStart, outBulletLength