Thursday, June 13, 2019

Generating a List of Unique Words

Two great and useful macros here:

Number one

Sub UniqueWordList()
Dim wList As New Collection
Dim wrd
Dim chkwrd
Dim sTemp As String
Dim k As Long

For Each wrd In ActiveDocument.Range.Words
sTemp = Trim(LCase(wrd))
If sTemp >= "a" And sTemp <= "z" Then k = 0 For Each chkwrd In wList k = k + 1 If chkwrd = sTemp Then GoTo nw If chkwrd > sTemp Then
wList.Add Item:=sTemp, Before:=k
GoTo nw
End If
Next chkwrd
wList.Add Item:=sTemp
End If
nw:
Next wrd

sTemp = "There are " & ActiveDocument.Range.Words.Count & " words "
sTemp = sTemp & "in the document, before this summary, but there "
sTemp = sTemp & "are only " & wList.Count & " unique words."

ActiveDocument.Range.Select
Selection.Collapse Direction:=wdCollapseEnd
Selection.TypeText vbCrLf & sTemp & vbCrLf
For Each chkwrd In wList
Selection.TypeText chkwrd & vbCrLf
Next chkwrd
End Sub

Number 2

From Ken Endacott on 2016-10-15 07:40:30 - Depending on the vocabulary of the author, large documents can have over 5000 unique words. The above macro displays one word per line which means that the list will occupy around 100 pages. The macro below will write the list of words into a new document with five columns per page.

Sub UniqueWordList()
Dim wList As New Collection
Dim wrd
Dim chkwrd
Dim sTemp As String
Dim k As Long

For Each wrd In ActiveDocument.Range.words
sTemp = Trim(LCase(wrd))
If sTemp >= "a" And sTemp <= "z" Then k = 0 For Each chkwrd In wList k = k + 1 If chkwrd = sTemp Then GoTo nw If chkwrd > sTemp Then
wList.Add Item:=sTemp, Before:=k
GoTo nw
End If
Next chkwrd
wList.Add Item:=sTemp
End If
nw:
Next wrd

sTemp = "There are " & ActiveDocument.Range.words.Count & " words " & _
"in " & ActiveDocument.Name & ", but there "
Documents.Add
With ActiveDocument.Styles("Normal")
With .ParagraphFormat
.SpaceBefore = 0
.SpaceAfter = 0
.LineSpacingRule = wdLineSpaceSingle
End With
.NextParagraphStyle = "Normal"
.NoSpaceBetweenParagraphsOfSameStyle = False
.Font.Name = "Times New Roman"
.Font.Size = 10
End With

sTemp = sTemp & "are only " & wList.Count & " unique words."
Selection.TypeText vbCrLf & sTemp & vbCrLf & vbCrLf

Selection.InsertBreak Type:=wdSectionBreakContinuous
With Selection.PageSetup.TextColumns
.SetCount NumColumns:=5
.EvenlySpaced = True
.LineBetween = True
.Width = CentimetersToPoints(2.8)
.Spacing = CentimetersToPoints(0.4)
End With

For Each chkwrd In wList
Selection.TypeText chkwrd & vbCrLf
Next chkwrd
End Sub

If you want to know how often a word appears then the macro UniqueWordCount will list unique words and give a count of how many times each word appears in the document.

Sub UniqueWordCount()
Dim wList As New Collection
Dim wCount As New Collection
Dim wrd
Dim chkwrd
Dim sTemp As String
Dim k As Long
Dim j As Long

For Each wrd In ActiveDocument.Range.words
sTemp = Trim(LCase(wrd))
If sTemp >= "a" And sTemp <= "z" Then k = 0 For Each chkwrd In wList k = k + 1 If chkwrd = sTemp Then j = wCount(k) + 1 wCount.Remove (k) If k > wCount.Count Or wCount.Count = 1 Then
wCount.Add Item:=j
Else
wCount.Add Item:=j, Before:=k
End If
GoTo nw
End If
If chkwrd > sTemp Then
wList.Add Item:=sTemp, Before:=k
wCount.Add Item:=1, Before:=k
GoTo nw
End If
Next chkwrd
wList.Add Item:=sTemp
wCount.Add Item:=1
End If
nw:
Next wrd

sTemp = "There are " & ActiveDocument.Range.words.Count & " words " & _
"in " & ActiveDocument.Name & ", but there "
Documents.Add
With ActiveDocument.Styles("Normal")
With .ParagraphFormat
.SpaceBefore = 0
.SpaceAfter = 0
.LineSpacingRule = wdLineSpaceSingle
End With
.NextParagraphStyle = "Normal"
.NoSpaceBetweenParagraphsOfSameStyle = False
.Font.Name = "Times New Roman"
.Font.Size = 10
End With

sTemp = sTemp & "are only " & wList.Count & " unique words."
Selection.TypeText vbCrLf & sTemp & vbCrLf & vbCrLf

Selection.InsertBreak Type:=wdSectionBreakContinuous
With Selection.PageSetup.TextColumns
.SetCount NumColumns:=5
.EvenlySpaced = True
.LineBetween = True
.Width = CentimetersToPoints(2.8)
.Spacing = CentimetersToPoints(0.4)
End With

For k = 1 To wList.Count
Selection.TypeText wList(k) & "(" & wCount(k) & ")" & vbCrLf
Next k
End Sub

No comments: