本文实例为大家分享了英文单词统计器php 实现,供大家参考,具体内容如下
程序开始运行, 按"浏览"钮选择一个英文文档, 再按"统计 Statistics"钮, 即可得到按字母顺序列出的所有单词,及其出现的次数
用于测试的数据文档: data.txt
驱动程序:word.php
output.php 和 StringTokenizer.php 是 要求在同一个文件夹中的程序
1. words_statistics_PHP.png
2. word.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
|
< html > < style > td{ background-color:#CF6; width:100px; margin:5px; } </ style > < body > <? php /** * 程序开始运行, 按"浏览"钮选择一个英文文档, 再按"统计"钮, * 即可得到按字母顺序列出的所有单词,及其出现的次数 * * 作者: 许同春 author Tongchun Xu * @开源中国 Open Source, Chna communiity * 完成日期:2016年6月10日 completion date: 10 June, 2016 */ require("StringTokenizer.php"); require("output.php"); if($_POST['submit']){ if ($_FILES["file"]["error"] > 0) echo "Error: " . $_FILES["file"]["error"] . "< br />"; else { $myfile = fopen($_FILES["file"]["tmp_name"], "r") or die("Unable to open file!"); $str = fread($myfile,filesize($_FILES["file"]["tmp_name"])); $delim = "?\\,. /:!\"()\t\n\r\f%"; $st = new StringTokenizer($str, $delim); echo '找到字符串: '.$st->countTokens(); $list=new LinkedList(); while ($st->hasMoreTokens()) { $list->orderInsert($st->nextToken()); } $list->words_count(); $list->traversal(); fclose($myfile); } } ?> < h2 >英文文档单词统计 Statistics on English words </ h2 > < p >程序开始运行, 按"浏览"钮选择一个英文文档, 再按"统计 Statistics"钮, 即可得到按字母顺序列出的所有单词,及其出现的次数 </ p > < form action = "word.php" method = "post" enctype = "multipart/form-data" > < label for = "file" >英文文档名 File Name:</ label > < input type = "file" name = "file" id = "file" /> < input type = "submit" name = "submit" value = "统计 Statistics" /> </ form > </ body > </ html > |
3. output.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
|
<meta charset= "utf-8" /> <? /** * The class LinkedList allows an application to store strings in * alphabetical order by calling orderInsert(). * 此处定义的 LinkedList 类,可以调用它的 方法 orderInsert(),来以字母 * 大小的顺序储存 英文字符串。 * 同时记录 英文单词出现的次数 * 作者: 许同春 author Tongchun Xu * @开源中国 Open Source, China communiity * 完成日期:2016年6月10日 completion date: 10 June, 2016 */ class Node{ public $data ; public $frequency ; public $next ; function __construct( $data , $next = null, $frequency = 1){ $this ->data = $data ; //英文字符串 $this ->next = $next ; //指向后继结点的指针 $this ->frequency= $frequency ; //英文字符串出现的次数 } } class LinkedList{ private $head ; //单链表的头结点,不存储数据 function __construct(){ //单链表的构造方法 //头结点的数据为"傀儡", 不代表 任何数据 $this ->head = new Node( "dummy 傀儡" ); $this ->first = null; } function isEmpty(){ return ( $this ->head->next == null); } /* orderInsert($data) 方法, * 按给定字符串 $data 的大小, 将其安插到适当的位置, * 以保证单链表中字符串的存储,始终是有序的。 */ function orderInsert( $data ){ $p = new Node( $data ); if ( $this ->isEmpty()){ $this ->head->next = $p ; } else { $node = $this ->find( $data ); if (! $node ){ $q = $this ->head; while ( $q ->next != NULL && strcmp ( $data , $q ->next->data)> 0 ){ $q = $q ->next; } $p ->next = $q ->next; $q ->next = $p ; } else $node ->frequency++; } } function insertLast( $data ){ //将字符串插到单链表的尾部 $p = new Node( $data ); if ( $this ->isEmpty()){ $this ->head->next = $p ; } else { $q = $this ->head->next; while ( $q ->next != NULL) $q = $q ->next; $q ->next = $p ; } } function find( $value ){ //查询是否有给定的字符串 $q = $this ->head->next; while ( $q ->next != null){ if ( strcmp ( $q ->data, $value )==0){ break ; } $q = $q ->next; } if ( $q ->data == $value ) return $q ; else return null; } function traversal(){ //遍历单链表 if (! $this ->isEmpty()){ $p = $this ->head->next; echo "输出结果:<table><tr>" ; echo "<td>" . $p ->data. "<br>出现次数:" . $p ->frequency. "</td>" ; $n =1; while ( $p ->next != null){ $p = $p ->next; echo "<td>" . $p ->data. "<br>出现次数:" . $p ->frequency. "</td>" ; $n ++; if ( $n %11==0) echo "</tr><tr>" ; } echo "</tr></table>" ; } else echo "链表为空!" ; } function words_count(){ if ( $this ->isEmpty()) echo "<br>没有储存字符串 <br>" ; else { $counter =0; $p = $this ->head->next; while ( $p ->next != null){ $p = $p ->next; $counter ++; }; echo "***共有单词 " . $counter . " 个***" ; } }} ?> |
4. StringTokenizer.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
|
<?php /** * The string tokenizer class allows an application to break a string into tokens. * * @author Azeem Michael * @example The following is one example of the use of the tokenizer. The code: * <code> * <?php * $str = "this is:@\t\n a test!"; * $delim = " !@:'\t\n\0"; // remove these chars * $st = new StringTokenizer($str, $delim); * echo 'Total tokens: '.$st->countTokens().'<br/>'; * while ($st->hasMoreTokens()) { * echo $st->nextToken() . '<br/>'; * } * prints the following output: * Total tokens: 4 * this * is * a * test * ?> * </code> */ class StringTokenizer { /** @var string */ private $string ; /** @var string */ private $token ; /** @var string */ private $delim ; /** * Constructs a string tokenizer for the specified string. * @param string $str String to tokenize * @param string $delim The set of delimiters (the characters that separate tokens) * specified at creation time, default to " \n\r\t\0" */ public function __construct( $str , $delim = " \n\r\t\0" ) { $this ->string = $str ; $this ->delim = $delim ; $this ->token = strtok ( $str , $delim ); } /** * Destructor to prevent memory leaks */ public function __destruct() { unset( $this ); } /** * Calculates the number of times that this tokenizer's nextToken method can * be called before it generates an exception * @return int - number of tokens */ public function countTokens() { $counter = 0; while ( $this ->hasMoreTokens()) { $counter ++; $this ->nextToken(); } $this ->token = strtok ( $this ->string, $this ->delim); return $counter ; } /** * Tests if there are more tokens available from this tokenizer's string. It * does not move the internal pointer in any way. To move the internal pointer * to the next element call nextToken() * @return boolean - true if has more tokens, false otherwise */ public function hasMoreTokens() { return ( $this ->token !== false); } /** * Returns the next token from this string tokenizer and advances the internal * pointer by one. * @return string - next element in the tokenized string */ public function nextToken() { $hold = $this ->token; //hold current pointer value $this ->token = strtok ( $this ->delim); //increment pointer return $hold ; //return current pointer value } } ?> |
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持服务器之家。