一 什么是集合和映射?
集合是高級(jí)數(shù)據(jù)結(jié)構(gòu)的一種匕争,它們的底層可以用多種方式來實(shí)現(xiàn)避乏,就像之前的棧和隊(duì)列一樣。集合就像一個(gè)容器一樣甘桑,我們這里實(shí)現(xiàn)的集合里邊存儲(chǔ)的數(shù)據(jù)是不能重復(fù)的拍皮,這樣的數(shù)據(jù)結(jié)構(gòu)可以幫助我們更加快速的進(jìn)行統(tǒng)計(jì)。講到這里是不是發(fā)現(xiàn)和我們之前的二分搜索樹非常相似扇住,其實(shí)這一次我們就會(huì)使用二分搜索樹作為集合的底層實(shí)現(xiàn)春缕。
二 二分搜索樹實(shí)現(xiàn)集合Set
1?? 實(shí)現(xiàn)集合Set接口
package com.mufeng.set;
/**
* Created by wb-yxk397023 on 2018/7/4.
*/
public interface Set<E> {
/**
* 向集合中添加元素(實(shí)現(xiàn)去重的操作)
* @param e
*/
void add(E e);
/**
* 從集合中刪除元素
* @param e
*/
void remove(E e);
/**
* 查詢集合中是否有相同元素
* @param e
* @return
*/
boolean contains(E e);
/**
* 獲取集合中元素的個(gè)數(shù)
* @return
*/
int getSize();
/**
* 查看集合是否為空
* @return
*/
boolean isEmpty();
}
2?? 實(shí)現(xiàn)具體的Set類
package com.mufeng.set;
import com.mufeng.binarySearchTree.BST;
/**
* Created by wb-yxk397023 on 2018/7/4.
*/
public class BSTSet<E extends Comparable<E>> implements Set<E> {
/**
* 引入二分搜索樹
*/
private BST<E> bst;
/**
* 初始化bst
*/
public BSTSet(){
bst = new BST<>();
}
/**
* 向集合中添加元素(實(shí)現(xiàn)去重添加)
* @param e
*/
@Override
public void add(E e) {
bst.add(e);
}
/**
* 從集合中刪除一個(gè)元素
* @param e
*/
@Override
public void remove(E e) {
bst.remove(e);
}
/**
* 查看集合中是否包含元素e
* @param e
* @return
*/
@Override
public boolean contains(E e) {
return bst.contains(e);
}
/**
* 獲取集合中元素的個(gè)數(shù)
* @return
*/
@Override
public int getSize() {
return bst.size();
}
/**
* 查看集合是否為空
* @return
*/
@Override
public boolean isEmpty() {
return bst.isEmpty();
}
}
其實(shí)不難發(fā)現(xiàn),在使用二分搜索樹實(shí)現(xiàn)集合的時(shí)候艘蹋,我們沒有做多余的操作锄贼,因?yàn)槲覀冏约簩?shí)現(xiàn)的二分搜索樹已經(jīng)完全包含了集合的所有特性;
三 測(cè)試
1?? 編寫測(cè)試工具類
package com.mufeng.set;
import java.io.FileInputStream;
import java.util.ArrayList;
import java.util.Scanner;
import java.util.Locale;
import java.io.File;
import java.io.BufferedInputStream;
import java.io.IOException;
/**
* Created by wb-yxk397023 on 2018/7/4.
*/
public class FileOperation {
// 讀取文件名稱為filename中的內(nèi)容女阀,并將其中包含的所有詞語放進(jìn)words中
public static boolean readFile(String filename, ArrayList<String> words){
if (filename == null || words == null){
System.out.println("filename is null or words is null");
return false;
}
// 文件讀取
Scanner scanner;
try {
File file = new File(filename);
if(file.exists()){
FileInputStream fis = new FileInputStream(file);
scanner = new Scanner(new BufferedInputStream(fis), "UTF-8");
scanner.useLocale(Locale.ENGLISH);
}
else
return false;
}
catch(IOException ioe){
System.out.println("Cannot open " + filename);
return false;
}
// 簡(jiǎn)單分詞
// 這個(gè)分詞方式相對(duì)簡(jiǎn)陋, 沒有考慮很多文本處理中的特殊問題
// 在這里只做demo展示用
if (scanner.hasNextLine()) {
String contents = scanner.useDelimiter("\\A").next();
int start = firstCharacterIndex(contents, 0);
for (int i = start + 1; i <= contents.length(); )
if (i == contents.length() || !Character.isLetter(contents.charAt(i))) {
String word = contents.substring(start, i).toLowerCase();
words.add(word);
start = firstCharacterIndex(contents, i);
i = start + 1;
} else
I++;
}
return true;
}
// 尋找字符串s中宅荤,從start的位置開始的第一個(gè)字母字符的位置
private static int firstCharacterIndex(String s, int start){
for( int i = start ; i < s.length() ; i ++ )
if( Character.isLetter(s.charAt(i)) )
return I;
return s.length();
}
}
2?? 引入測(cè)試文件
a-tale-of-two-cities.txt
pride-and-prejudice.txt
3?? 編寫測(cè)試程序
package com.mufeng;
import com.mufeng.set.BSTSet;
import com.mufeng.set.FileOperation;
import java.util.ArrayList;
public class Main {
public static void main(String[] args) {
System.out.println("Pride and Prejudice");
ArrayList<String> words1 = new ArrayList<>();
if(FileOperation.readFile("pride-and-prejudice.txt", words1)) {
System.out.println("Total words: " + words1.size());
BSTSet<String> set1 = new BSTSet<>();
for (String word : words1)
set1.add(word);
System.out.println("Total different words: " + set1.getSize());
}
System.out.println();
System.out.println("A Tale of Two Cities");
ArrayList<String> words2 = new ArrayList<>();
if(FileOperation.readFile("a-tale-of-two-cities.txt", words2)){
System.out.println("Total words: " + words2.size());
BSTSet<String> set2 = new BSTSet<>();
for(String word: words2)
set2.add(word);
System.out.println("Total different words: " + set2.getSize());
}
}
}
測(cè)試結(jié)果
四 鏈表實(shí)現(xiàn)集合Set
package com.mufeng.set;
import com.mufeng.linkedList.LinkedList;
/**
* Created by wb-yxk397023 on 2018/7/4.
*/
public class LinkedListSet<E> implements Set<E> {
/**
* 引入鏈表
*/
private LinkedList<E> linkedList;
/**
* 初始化
*/
public LinkedListSet(){
linkedList = new LinkedList<>();
}
/**
* 向集合中添加元素(去重添加)
* @param e
*/
@Override
public void add(E e) {
// 實(shí)現(xiàn)去重添加元素e
if (!linkedList.contains(e)){
linkedList.addFrist(e);
}
}
/**
* 從集合中刪除元素
* @param e
*/
@Override
public void remove(E e) {
linkedList.removeElement(e);
}
/**
* 查看集合是否包含元素e
* @param e
* @return
*/
@Override
public boolean contains(E e) {
return linkedList.contains(e);
}
/**
* 獲取集合中元素的個(gè)數(shù)
* @return
*/
@Override
public int getSize() {
return linkedList.getSize();
}
/**
* 查看集合是否為空
* @return
*/
@Override
public boolean isEmpty() {
return linkedList.isEmpty();
}
}
五 測(cè)試
package com.mufeng;
import com.mufeng.set.FileOperation;
import com.mufeng.set.LinkedListSet;
import java.util.ArrayList;
public class Main {
public static void main(String[] args) {
System.out.println("Pride and Prejudice");
ArrayList<String> words1 = new ArrayList<>();
if(FileOperation.readFile("pride-and-prejudice.txt", words1)) {
System.out.println("Total words: " + words1.size());
LinkedListSet<String> set1 = new LinkedListSet<>();
for (String word : words1)
set1.add(word);
System.out.println("Total different words: " + set1.getSize());
}
System.out.println();
System.out.println("A Tale of Two Cities");
ArrayList<String> words2 = new ArrayList<>();
if(FileOperation.readFile("a-tale-of-two-cities.txt", words2)){
System.out.println("Total words: " + words2.size());
LinkedListSet<String> set2 = new LinkedListSet<>();
for(String word: words2)
set2.add(word);
System.out.println("Total different words: " + set2.getSize());
}
}
}
測(cè)試結(jié)果
六 兩種實(shí)現(xiàn)方式的性能對(duì)比
1?? 測(cè)試類
package com.mufeng;
import com.mufeng.set.BSTSet;
import com.mufeng.set.FileOperation;
import com.mufeng.set.LinkedListSet;
import com.mufeng.set.Set;
import java.util.ArrayList;
public class Main {
private static double testSet(Set<String> set, String filename){
long startTime = System.nanoTime();
System.out.println(filename);
ArrayList<String> words = new ArrayList<>();
if(FileOperation.readFile(filename, words)) {
System.out.println("Total words: " + words.size());
for (String word : words)
set.add(word);
System.out.println("Total different words: " + set.getSize());
}
long endTime = System.nanoTime();
return (endTime - startTime) / 1000000000.0;
}
public static void main(String[] args) {
String filename = "pride-and-prejudice.txt";
BSTSet<String> bstSet = new BSTSet<>();
double time1 = testSet(bstSet, filename);
System.out.println("BST Set: " + time1 + " s");
System.out.println();
LinkedListSet<String> linkedListSet = new LinkedListSet<>();
double time2 = testSet(linkedListSet, filename);
System.out.println("Linked List Set: " + time2 + " s");
}
}
測(cè)試結(jié)果