|
发表于 2008-9-14 17:39:22
|
显示全部楼层
罗云彬 《windows环境下32为汇编语言设计》 第10章 内存管理和文件操作 10.2 文件操作 第379页
有一个对文本文件的英语单词进行统计的小程序 WordCount 汇编写的
看中很久了
算法介绍如下
采用树型结构的办法。树的每个接点有个计数器,还有26个子结点指针,子结点指针根据单词中的下一个字母是A-Z分别指向不同的子结点,假如我们遇到一个单词“ver”,就为它创建4层结点,第1层结点中的第22个指针(代表V)指向第2层,第2层结点中的第5个指针(代表E)再指向第3层结点,同样第3层结点中第19个指针(代表R)指向第4层结点,并将第4层结点的计数中增加1表示遇到了一个“Ver”单词。如果遇到一个单词“var”,那么在第2层结点中的A位置讲出现一个分支指向另一个第2层结点。这样一来,数据树最大可能的深度就是单词的最大长度。
结构中设置了26个指针用来指向下一层结点,分别代表下一个字母分别是A-Z的情况,如果没有下一个字母,则指针设置为NULL,dwCount字段用来计数。
每当程序遇到一个单词的开始字母,就从第1层结点开始处理,如果对应位置的指针已经存在,则继续移动下去,如果不存在,表示以前还没有遇到这样的单词,那么就新申请一块内存建立一个下层结点,依此顺序随着单词中的字母一层层处理下去,直到遇到单词结尾的时候在当前结点数的计数上加1就可以了。
在输出结果的时候,程序用递归的方法遍历整个树,并将每个计数不是0的结点写到记录文件中。
今天正好,改成vb版本了。
也顺便练习一下 用类实现链表结构
这个是用空间换时间的做法,在扫描的时候只需要一次FOR循环
但是输出结果的时候要遍历树,需要循环或者递归。
要重复数据多的时候才能体现这个算法的优势。同一个单词重复上百次,用的空间也不用增加。
代码如下
CWord.cls
- Private m_lpLetter(0 To 25) As CWord
- Private dwCount As Long
- Private dwDepth As Long
- Private dwIndex As Long
- Private m_Value As String
- Public Property Let tValue(ByVal vData As String)
- m_Value = vData
- End Property
- Public Property Get tValue() As String
- tValue = m_Value
- End Property
- Public Property Let Count(ByVal vData As Long)
- dwCount = vData
- End Property
- Public Property Get Count() As Long
- Count = dwCount
- End Property
- Public Property Let Depth(ByVal vData As Long)
- dwDepth = vData
- End Property
- Public Property Get Depth() As Long
- Depth = dwDepth
- End Property
- Public Property Let iIndex(ByVal vData As Long)
- dwIndex = vData
- End Property
- Public Property Get iIndex() As Long
- iIndex = dwIndex
- End Property
- Public Property Let NextLetter(Index As Integer, ByVal vData As CWord)
- Set m_lpLetter(Index) = vData
- End Property
- Public Property Get NextLetter(Index As Integer) As CWord
- Set NextLetter = m_lpLetter(Index)
- End Property
复制代码
Form1.frm
- Dim root As CWord, p As CWord
-
- Dim dwCount As Long
- Dim dwIndex As Long
- Private Sub Command1_Click()
- Set root = New CWord
- dwCount = 0
- dwIndex = 0
- Dim bytWord() As Byte
- Dim strword() As String
- Dim strtmp As String
- 'strtmp = "I LOVE Love YOU i me too to"
- 'strtmp = StrConv(strtmp, vbLowerCase)
- 'bytWord = StrConv(strtmp, vbFromUnicode)
- Open App.Path & "\input.txt" For Binary As #1
- ReDim bytWord(FileLen(App.Path & "\input.txt"))
- Get #1, , bytWord
- Close
- Dim i As Long
- Set p = root
- For i = 0 To UBound(bytWord)
- Call CountLetter(LCase(bytWord(i)))
- 'Call CountLetter(bytWord(i))
- Next
- 'MsgBox UBound(bytWord)
- '读文件的时候,似乎默认最后一个字符是0
- 'Call CountLetter(0)
- 'MsgBox dwCount
- 'MsgBox dwIndex
- MsgBox "共" & dwCount & "个单词,不重复单词共" & dwIndex & "个", vbInformation
- ReDim strword(0 To dwIndex - 1)
- Call WalkTree(strword(), root)
- 'MsgBox strword(dwIndex - 1)
- strtmp = Join(strword, " ")
- bytWord = StrConv(strtmp, vbFromUnicode)
- Open App.Path & "\output.txt" For Binary As #2
- Put #2, , bytWord
- Close
- End Sub
- Private Sub CountLetter(b As Byte)
- Dim n As Integer
- Dim q As CWord
- Dim dwDepth As Long
- Static strtmp As String
- 'LCase
- 'If b >= 65 And b <= 90 Then b = b + 32
- If b >= 97 And b <= 122 Then
- strtmp = strtmp & Chr(b)
- n = b - 97
- If p.NextLetter(n) Is Nothing Then
- dwDepth = p.Depth
- Set q = New CWord
- p.NextLetter(n) = q
- q.Depth = dwDepth + 1
- Set p = q
- Else
- Set p = p.NextLetter(n)
- End If
- Else
- If p.Count = 0 Then
- p.iIndex = dwIndex
- p.tValue = strtmp
- dwIndex = dwIndex + 1
- End If
- p.Count = p.Count + 1
- If p.Depth Then dwCount = dwCount + 1
- Set p = root
- strtmp = ""
- End If
- End Sub
-
- Private Sub WalkTree(StrArr() As String, q As CWord)
- Dim i As Integer
- If q.Depth > 0 And q.Count > 0 Then
- StrArr(q.iIndex) = q.tValue
- End If
- For i = 0 To 25
- If Not (q.NextLetter(i) Is Nothing) Then
- Call WalkTree(StrArr, q.NextLetter(i))
- End If
- Next
- End Sub
复制代码
原版的汇编代码没什么注释,那我也不加了。算法原理已经说了,大家可以按照自己的理解重新写都可以。 |
本帖子中包含更多资源
您需要 登录 才可以下载或查看,没有帐号?立即注册
x
|