元字符
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class TestMetaChar {
// 對(duì)元字符進(jìn)行轉(zhuǎn)義
private static Pattern pattern = Pattern.compile("myArray\\[0\\]");
// 匹配 \ (java里\需要用\\表示)
private static Pattern pattern02 = Pattern.compile("\\\\");
// 匹配空白字符 \r\n 是 回車+換行 todo 匹配不到?
private static Pattern pattern03 = Pattern.compile("\r\n\r\n", Pattern.MULTILINE);
// 簡(jiǎn)寫元字符:
/* \d 即 [0-9]; \D 即 [^0-9] 【注意:從小到大】*/
/* \w 即 [a-zA-Z0-9_]; \W 即 [^a-zA-Z0-9_]*/
/* \s 即 [\f\n\r\t\v]; \S 即 [^\f\n\r\t\v]*/
// 這個(gè)模式匹配不出myArray[10], 需要加上重復(fù)次數(shù)才行:myArray\[\d+\]
private static Pattern pattern04 = Pattern.compile("myArray\\[\\d\\]");
// 這個(gè)模式匹配的字符串腹纳,必須是6個(gè)字符须喂,所以5個(gè)純數(shù)字不會(huì)匹配(\w\d\w\d\w\d?可以匹配5個(gè)純數(shù)字)
private static Pattern pattern05 = Pattern.compile("\\w\\d\\w\\d\\w\\d");
public static void main(String[] args) {
String js = "var myArray = new Array(); \n" +
"... \n" +
"if (myArray[0] == 0 || myArray[10] == 10) { \n" +
"... \n" +
"}";
Matcher matcher = pattern.matcher(js);
if (matcher.find())
System.out.println(matcher.group());
String url = "\\home\\ben\\sales";
Matcher matcher1 = pattern02.matcher(url);
while (matcher1.find())
System.out.println(matcher1.group());
String txt = "101, ben forta " +
"102, jim james " +
" " +
"103, roberta robertson ";
Matcher matcher2 = pattern03.matcher(txt);
while (matcher2.find())
System.out.println(matcher2.group() + "ok");
Matcher matcher3 = pattern04.matcher(js);
while (matcher3.find())
System.out.println(matcher3.group());
String txt2 = "11213 a1c2e3 48075 48237 m1b4f2 90046 h1h2h3 123456";
Matcher matcher4 = pattern05.matcher(txt2);
while (matcher4.find())
System.out.println(matcher4.group());
}
}
// output:
// myArray[0]
// \
// \
// \
// myArray[0]
// a1c2e3
// m1b4f2
// h1h2h3
// 123456
匹配次數(shù)
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class TestRepeatMatch {
// "\\w+@\\w+.\\w+"模式匹配不了 ben.forta@forta.com 因?yàn)閈w是[a-zA-Z0-9_],只能匹配數(shù)字字母下劃線
private static Pattern pattern = Pattern.compile("\\w+@\\w+.\\w+");
private static Pattern pattern04 = Pattern.compile("[\\w.]+@[\\w.]+\\w+");
//[0-9]+匹配一個(gè)或多個(gè)數(shù)字,等效于\d+ [0-9+]匹配一個(gè)數(shù)字或者一個(gè)+【其實(shí)匹配+最好是進(jìn)行轉(zhuǎn)義\+, 但是在正則的字符集合里面好像可以不轉(zhuǎn)義】
// + 等效于 {1,}
private static Pattern pattern02 = Pattern.compile("[0-9]+");
private static Pattern pattern03 = Pattern.compile("[0-9+]");
// 如果首字符是. 則不是合法的郵箱直晨,pattern04會(huì)匹配出.ben@forta.com夫植, 所以改造為pattern05 【* 匹配零次或多次 等價(jià)于 {0,}】
private static Pattern pattern05 = Pattern.compile("\\w+[\\w.]*@[\\w.]+\\w+");
// ? 匹配0次【false】或1【true】次 等價(jià)于{0,1}
private static Pattern pattern06 = Pattern.compile("https?://[\\w./]+");
// 精確重復(fù)次數(shù)
private static Pattern pattern07 = Pattern.compile("#[\\da-fA-F]{6}");
// 重復(fù)次數(shù)區(qū)間
private static Pattern pattern08 = Pattern.compile("\\d{1,2}[/-]\\d{1,2}[/-]\\d{2,4}");
// 至少重復(fù)多少次帜羊, 匹配大于100沒有的價(jià)格
private static Pattern pattern09 = Pattern.compile("\\$\\d{3,}");
// 【貪婪模式】
private static Pattern pattern11 = Pattern.compile("<[Bb]>.*</[Bb]>");
// 防止過度匹配【懶惰模式】
/**
* 貪婪模式 懶惰模式
* * *?
* + +?
* {n,} {n,}?
*/
private static Pattern pattern10 = Pattern.compile("<[Bb]>.*?</[Bb]>");
public static void main(String[] args) {
String txt = "send personal email to ben@forta.com or ben.forta@forta.com. for questions " +
"about a book use support@forta.com or ben@urgent.forta.com. feel free to send" +
"unsolicited email to spam@forta.com (wouldn't it be" +
"nice if it were that simple, huh?). 88 1 + 6";
Matcher matcher = pattern.matcher(txt);
while (matcher.find())
System.out.println(matcher.group());
System.out.println("===================1====================");
Matcher matcher02 = pattern02.matcher(txt);
while (matcher02.find()) {
System.out.println(matcher02.group());
}
System.out.println("===================2====================");
Matcher matcher03 = pattern03.matcher(txt);
while (matcher03.find())
System.out.println(matcher03.group());
System.out.println("===================3====================");
Matcher matcher04 = pattern04.matcher(txt);
while (matcher04.find())
System.out.println(matcher04.group());
System.out.println("===================4====================");
String txt02 = "hello .ben@forta.com is my email address";
Matcher matcher05 = pattern05.matcher(txt02);
while (matcher05.find())
System.out.println(matcher05.group());
System.out.println("===================5====================");
String txt03 = "the url is http://www.forta.com/, to connect" +
"securely use https://www.forta.com/ instead.";
Matcher matcher1 = pattern06.matcher(txt03);
while (matcher1.find())
System.out.println(matcher1.group());
System.out.println("===================6====================");
String html = "<body bgcolor='#336633' text='#ffffff' width='666' height='444'>";
Matcher matcher2 = pattern07.matcher(html);
while (matcher2.find())
System.out.println(matcher2.group());
System.out.println("===================7====================");
String txt05 = "4/8/03\n" +
"10-6-2004\n" +
"2/2/2\n" +
"01-01-01\n";
Matcher matcher3 = pattern08.matcher(txt05);
while (matcher3.find())
System.out.println(matcher3.group());
System.out.println("===================8====================");
String price = "$496.80 $1290.69 $26.43 $613.42 $7.61 $414.90 $25.00";
Matcher matcher4 = pattern09.matcher(price);
while (matcher4.find())
System.out.println(matcher4.group());
System.out.println("===================9====================");
String htm = "<B>i am a B</B> <b>i am a b too</b>";
Matcher matcher5 = pattern11.matcher(htm);
while (matcher5.find())
System.out.println(matcher5.group());
System.out.println("===================10====================");
Matcher matcher6 = pattern10.matcher(htm);
while (matcher6.find())
System.out.println(matcher6.group());
}
}
// output:
// ben@forta.com
// forta@forta.com
// support@forta.com
// ben@urgent.forta
// spam@forta.com
// ===================1====================
// 88
// 1
// 6
// ===================2====================
// 8
// 8
// 1
// +
// 6
// ===================3====================
// ben@forta.com
// ben.forta@forta.com
// support@forta.com
// ben@urgent.forta.com
// spam@forta.com
// ===================4====================
// ben@forta.com
// ===================5====================
// http://www.forta.com/
// https://www.forta.com/
// ===================6====================
// #336633
// #ffffff
// ===================7====================
// 4/8/03
// 10-6-2004
// 01-01-01
// ===================8====================
// $496
// $1290
// $613
// $414
// ===================9====================
// <B>i am a B</B> <b>i am a b too</b>
// ===================10====================
// <B>i am a B</B>
// <b>i am a b too</b>
匹配范圍
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class TestRangeMatch {
// pattern會(huì)匹配到cat.xls
private static Pattern pattern = Pattern.compile(".a.\\.xls");
// pattern02會(huì)匹配到usa1.xls
private static Pattern pattern02 = Pattern.compile("[ns]a.\\.xls");
// Pattern.MULTILINE表示多行匹配 等同于:Pattern.compile("(?m)^[ns]a.\\.xls")
private static Pattern pattern03 = Pattern.compile("^[ns]a.\\.xls");
// 這種用法適合局部忽略大小寫的匹配
private static Pattern pattern04 = Pattern.compile("[Rr]eg[eE]x");
// [0123456789]可以縮寫為[0-9] (從小到大)系吭,或者用特殊字符:\\d
/* - 是特殊的元字符咙鞍,作為元字符它只能用在[]中*/
private static Pattern pattern05 = Pattern.compile("^[ns]a[0123456789]\\.xls", Pattern.MULTILINE);
private static Pattern pattern06 = Pattern.compile("#[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f]");
// 取非匹配
/* ^ 作用范圍:集合里的所有字符或字符區(qū)間*/
private static Pattern pattern07 = Pattern.compile("[ns]a[^0-9]\\.xls");
public static void main(String[] args) {
String fileLs = "sales1.xls\n" +
"orders3.xls\n" +
"sales2.xls\n" +
"sales.xls\n" +
"sales3.xls\n" +
"apac1.xls\n" +
"europe2.xls\n" +
"na1.xls\n" +
"na2.xls\n" +
"cat.xls\n" +
"usa1.xls\n" +
"sam.xls\n" +
"sa1.xls\n";
Matcher matcher03 = pattern03.matcher(fileLs);
while (matcher03.find()) {
System.out.println(matcher03.group());
}
System.out.println("================");
Matcher matcher = pattern05.matcher(fileLs);
while (matcher.find()) {
System.out.println(matcher.group());
}
String txt = "The phrase 'regular expression' is often abbreviated as RegEx or regex";
Matcher matcher1 = pattern04.matcher(txt);
while (matcher1.find())
System.out.println(matcher1.group());
String html = "<body bgcolor='#336633' text='#ffffff' width='666' height='444'>";
Matcher matcher2 = pattern06.matcher(html);
while (matcher2.find())
System.out.println(matcher2.group());
Matcher matcher3 = pattern07.matcher(fileLs);
while (matcher3.find())
System.out.println(matcher3.group());
}
}
// output:
//================
// na1.xls
// na2.xls
// sa1.xls
// RegEx
// regex
// #336633
// #ffffff
// sam.xls
匹配位置
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class TestLocationMatch {
private static Pattern pattern = Pattern.compile("cat");
// \s會(huì)“消費(fèi)”空格
private static Pattern pattern1 = Pattern.compile("\\scat\\s");
// 使用向后向前查找 定位消費(fèi)內(nèi)容
private static Pattern pattern1_ref = Pattern.compile("(?<=\\s)cat(?=\\s)");
// \b是單詞邊界(不會(huì)消費(fèi)空格): \b匹配這樣一個(gè)位置--位于能夠用來構(gòu)成單詞的字符(字母谬擦,數(shù)字切距,下劃線,也就是與\w匹配的字符)
// 和一個(gè)不能用來構(gòu)成單詞的字符(也就是與\W匹配的字符)之間
// \b只匹配一個(gè)位置惨远,不匹配任何字符谜悟,所以"\bcat\b"匹配到的字符串長(zhǎng)度是3(c,a,t)
private static Pattern pattern2 = Pattern.compile("\\bcat\\b");
private static Pattern pattern3 = Pattern.compile("\\bcap");
private static Pattern pattern4 = Pattern.compile("cap\\b");
// ^匹配整個(gè)字符串的開始位置 注意:^在正則字符集合起到求非作用
private static Pattern pattern5 = Pattern.compile("<\\?xml.*?\\?>");
private static Pattern pattern6 = Pattern.compile("\\s*^<\\?xml.*?\\?>");
// $匹配整個(gè)字符串的結(jié)尾位置 例:web頁面里,</html>標(biāo)簽后面不應(yīng)該再有任何內(nèi)容
// 可以使用這個(gè)模式檢查: </[Hh][Tt][Mm][Ll]>\s*$
private static Pattern pattern7 = Pattern.compile("(?m)//.*$");
public static void main(String[] args) {
String txt = "the cat scattered his food all over the room";
String txt2 = "the captain wore his cap and cape proudly as \n" +
"he sat listening to the recap of how his" +
"crew saved the men from a capsized vessel";
System.out.println(matchAll(pattern, txt));
System.out.println("=====================1========================");
System.out.println("【有空格】" + matchAll(pattern1, txt));
System.out.println("=====================2========================");
System.out.println("【無空格】" + matchAll(pattern1_ref, txt));
System.out.println("=====================3========================");
System.out.println("【無空格】" + matchAll(pattern2, txt));
System.out.println("=====================4========================");
System.out.println(replaceAll(pattern3, txt2, "@@@"));
System.out.println("=====================5========================");
// 如果不對(duì)$轉(zhuǎn)義北秽,將會(huì)拋出異常:Exception in thread "main" java.lang.IllegalArgumentException: Illegal group reference
/**
* https://blog.csdn.net/qq_37502106/article/details/88642840
* 可以看到這里面對(duì)“$”符號(hào)和"\\"符號(hào)進(jìn)行了處理葡幸。出現(xiàn)以上錯(cuò)誤的原因是:String的replaceAll(regex, replacement)方法的第一個(gè)參數(shù)支持正則表達(dá)式,
* 如果參數(shù)replacement中出現(xiàn)符號(hào)“$”,會(huì)按照$1$2的分組模式進(jìn)行匹配贺氓。當(dāng)編譯器發(fā)現(xiàn)“$”后跟的不是整數(shù)的時(shí)候蔚叨,就會(huì)拋出“Illegal group reference”的異常。
*
* 處理辦法:用JDK提供的方法辙培,對(duì)特殊字符進(jìn)行處理:
* replacement = java.util.regex.Matcher.quoteReplacement(replacement);
* 或者手動(dòng)轉(zhuǎn)義:\\$\\$\\$
*/
System.out.println(replaceAll(pattern4, txt2, Matcher.quoteReplacement("$$$")));
System.out.println("=====================6========================");
String txt3 = "this xml file has a bad start\n" +
" <?xml version='1.0' encoding='UTF-8' ?> \n" +
"<beans xmlns=\"http://www.springframework.org/schema/beans\"\n" +
" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n" +
" xsi:schemaLocation=\"http://www.springframework.org/schema/beans\n" +
" http://www.springframework.org/schema/beans/spring-beans.xsd\">\n" +
"</beans>";
System.out.println(matchAll(pattern5, txt3).size() == 1 ? "合格xml" : "不合格xml");
System.out.println("=====================7========================");
System.out.println(matchAll(pattern6, txt3).size() == 1 ? "合格xml" : "不合格xml");
System.out.println("=====================8========================");
String js = "ar URL= {\n" +
" addBatchTemplate: '/myApp/addBatchTemplate',//添加模板\n" +
" getBackDeviceTypeList:'/myApp/switcher-device-template/getFrontDeviceTypeList', //獲取型號(hào)列表\n" +
" loadConfig: '/myApp/loadConfig',//post 下發(fā)配置\n" +
" isInvalidEdit: '/myApp/isInvalidEdit',//權(quán)限判斷\n" +
" isInvalidConfig: '/myApp/isInvalidConfig',//權(quán)限判斷\n" +
" getBatchTemplateInfo:'/myApp/getBatchTemplateInfo', //查詢信息\n" +
" };\n";
System.out.println(matchAll(pattern7, js));
}
private static List<String> matchAll(Pattern pattern, String source) {
List<String> matchLs = new ArrayList<>(10);
Matcher matcher = pattern.matcher(source);
while (matcher.find())
matchLs.add(matcher.group());
return matchLs;
}
private static String replaceAll(Pattern pattern, String source, String replaceStr) {
/**
* String的replaceAll也是使用正則:Pattern.compile(regex).matcher(this).replaceAll(replacement);
* 而正則replaceAll的底層使用sb拼接結(jié)果: return text.toString(); 所以不會(huì)對(duì)源字符串造成副作用
*/
Matcher matcher = pattern.matcher(source);
return matcher.replaceAll(replaceStr);
}
}
// output:
// [cat, cat]
// =====================1========================
// 【有空格】[ cat ]
// =====================2========================
// 【無空格】[cat]
// =====================3========================
// 【無空格】[cat]
// =====================4========================
// the @@@tain wore his @@@ and @@@e proudly as
// he sat listening to the recap of how hiscrew saved the men from a @@@sized vessel
// =====================5========================
// the captain wore his $$$ and cape proudly as
// he sat listening to the re$$$ of how hiscrew saved the men from a capsized vessel
// =====================6========================
// 合格xml
// =====================7========================
// 不合格xml
// =====================8========================
// [//添加模板, //獲取型號(hào)列表, //post 下發(fā)配置, //權(quán)限判斷, //權(quán)限判斷, //查詢信息]
子表達(dá)式
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class TestSubExp {
// {2,} 只能匹配 ;;;;; 【因?yàn)閧2,}只作用于前一個(gè)元素】
private static Pattern pattern = Pattern.compile(" {2,}");
// 【子表達(dá)式限定作用】( ){2,} 可以匹配 ()括起來的就是一個(gè)子表達(dá)式
private static Pattern pattern2 = Pattern.compile("( ){2,}");
// 粗略匹配ip
private static Pattern pattern3 = Pattern.compile("\\d{1,3}.\\d{1,3}.\\d{1,3}.\\d{1,3}");
// 【子表達(dá)式簡(jiǎn)化作用】
private static Pattern pattern4 = Pattern.compile("(\\d{1,3}.){3}\\d{1,3}");
// "19|20\\d{2}"只會(huì)匹配出 19 或 20XX, 因?yàn)閨把位于它左邊和右邊的兩個(gè)部分都作為一個(gè)整體看待缅叠, todo: |優(yōu)先級(jí)最低?
// 即:(19)|(20\\d{2}), 所以要匹配出正確的出生年份應(yīng)該用"(19|20)\\d{2}"
private static Pattern pattern5 = Pattern.compile("19|20\\d{2}");
// 【子表達(dá)式去提高優(yōu)先級(jí)作用】
private static Pattern pattern6 = Pattern.compile("(19|20)\\d{2}");
// 精確匹配ip
/**
* 上面用"(\\d{1,3}.){3}\\d{1,3}"粗略匹配ip是有問題的虏冻,這個(gè)模式會(huì)匹配出不合法的ip, 例:666.77.8.999
* 合法的ip是由4個(gè)字節(jié)組成(對(duì)應(yīng)逗號(hào)隔開的4組數(shù)字)肤粱,一個(gè)字節(jié)的所表示范圍是0~255,即ip中的每組數(shù)字都是0~255
* 正則不能夠直接表示數(shù)字范圍厨相,所以需要將ip取值限制 轉(zhuǎn)成 正則能夠表示的規(guī)則:
*
* (一)任何一個(gè)1位或2位數(shù)字 【1~99】 (\d{1,2})
* (二)任何一個(gè)以1開頭的3位數(shù)字 【100~199】 (1\d{2})
* (三)任何一個(gè)以2開頭领曼、第2位在0~4的3位數(shù)字 【200~249】 (2[0-4]\d)
* (四)任何一個(gè)以25開頭鸥鹉、第3位在0~5的3位數(shù)字 【250~255】 (25[0-5])
*
* 三四不能用2[0-5][0-5]表達(dá): 因?yàn)槠ヅ洳坏?06 207 208 209
*/
static String quarter = "(\\d{1,2})|(1\\d{2})|(2[0-4]\\d)|(25[0-5])";
private static Pattern pattern7 = Pattern.compile("(("+quarter+")\\.){3}("+quarter+")");
/*public static void main(String[] args) {
Pattern compile = Pattern.compile("2[0-5][0-5]");
for (int i = 200; i < 299; i++) {
if (!compile.asPredicate().test(String.valueOf(i))) {
System.out.println(i);
}
}
}*/
public static void main(String[] args) {
String s = "hello, my name is mike jordan, and i am" +
"the best basketball player, rank No 1";
System.out.println(replaceAll(pattern, s, "空格"));
System.out.println("=====================1========================");
System.out.println(replaceAll(pattern2, s, "空格"));
System.out.println("=====================2========================");
String s1 = "ping 12.159.46.200 ... ";
System.out.println(matchAll(pattern3, s1));
System.out.println("=====================3========================");
System.out.println(matchAll(pattern4, s1));
System.out.println("=====================4========================");
String s2 = "ID: 042" +
"SEX: M" +
"BIRTH: 1967-08-17" +
"DIE: 2067-08-17" +
"STATUS: Active";
System.out.println(matchAll(pattern5, s2));
System.out.println("=====================5========================");
System.out.println(matchAll(pattern6, s2));
System.out.println("=====================6========================");
String s3 = "illegal ip: 666.77.8.999" +
"legal ip: 12.159.46.200";
System.out.println(matchAll(pattern7, s3));
System.out.println("=====================7========================");
}
private static List<String> matchAll(Pattern pattern, String source) {
List<String> matchLs = new ArrayList<>(10);
Matcher matcher = pattern.matcher(source);
while (matcher.find())
matchLs.add(matcher.group());
return matchLs;
}
private static String replaceAll(Pattern pattern, String source, String replaceStr) {
/**
* String的replaceAll也是使用正則:Pattern.compile(regex).matcher(this).replaceAll(replacement);
* 而正則replaceAll的底層使用sb拼接結(jié)果: return text.toString(); 所以不會(huì)對(duì)源字符串造成副作用
*/
Matcher matcher = pattern.matcher(source);
return matcher.replaceAll(replaceStr);
}
}
// output:
// hello, my name is mike jordan, and i amthe best basketball player, rank No 1
// =====================1========================
// hello, my name is mike jordan, and i amthe best basketball player, rank No空格1
// =====================2========================
// [12.159.46.200]
// =====================3========================
// [12.159.46.200]
// =====================4========================
// [19, 2067]
// =====================5========================
// [1967, 2067]
// =====================6========================
// [66.77.8.99, 12.159.46.20]
// =====================7========================
前后查找
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class TestLookAround {
// 匹配標(biāo)簽+內(nèi)容
private static Pattern pattern = Pattern.compile("<(title)>(.*)</\\1>", Pattern.CASE_INSENSITIVE);
// 匹配內(nèi)容
private static Pattern pattern2 = Pattern.compile("(?<=<(title)>).*(?=</\\1>)", Pattern.CASE_INSENSITIVE);
// 匹配價(jià)格
private static Pattern pattern3 = Pattern.compile("[0-9.]+");
private static Pattern pattern4 = Pattern.compile("\\$[0-9.]+");
// 向后查找
private static Pattern pattern5 = Pattern.compile("(?<=\\$)[0-9.]+");
// (?<=<(title)>).*(?=</\1>) 用來分隔向后查找和向前查找的()不算子表達(dá)式組
public static void main01(String[] args) {
String source = "<HEAD>" +
"<TITLE>BEN FORTA'S HOMEPAGE</title>" +
"</HEAD>";
Matcher matcher = pattern2.matcher(source);
while (matcher.find()) {
System.out.println(matcher.group(0));
System.out.println(matcher.group(1));
// System.out.println(matcher.group(2));
}
System.out.println("======");
// 關(guān)于group分層
Pattern pattern = Pattern.compile("((<(title)>)(.*)(</(title)>))", Pattern.CASE_INSENSITIVE);
matcher = pattern.matcher(source);
while (matcher.find()) {
System.out.println(matcher.group(0));
System.out.println(matcher.group(1)); // 最外層()
System.out.println(matcher.group(2)); // 第二層第1個(gè)()
System.out.println(matcher.group(3)); // 第三層第1個(gè)()
System.out.println(matcher.group(4)); // 第二層第2個(gè)()
System.out.println(matcher.group(5)); // 第二層第3個(gè)()
System.out.println(matcher.group(6)); // 第三層第2個(gè)()
}
}
public static void main(String[] args) {
String s = "<HEAD>" +
"<TITLE>BEN FORTA'S HOMEPAGE</TITLE>" +
"</HEAD>";
// 獲取匹配的整個(gè)結(jié)果
System.out.println(matchAll(pattern, s));
System.out.println("=====================1========================");
// 獲取匹配的結(jié)果中,某個(gè)子表達(dá)式的匹配部分(子表達(dá)式)
System.out.println(matchTagContent(pattern, s));
System.out.println("=====================2========================");
// 前后查找匹配
System.out.println(matchAll(pattern2, s));
System.out.println("=====================3========================");
String s1 = "ABC01: $23.45" +
"HGG42: $5.31" +
"CFMX1: $899.00" +
"XTC99: $69.96" +
"Total items found: 4";
System.out.println(matchAll(pattern3, s1));
System.out.println("=====================4========================");
System.out.println(matchAll(pattern4, s1));
System.out.println("=====================5========================");
System.out.println(matchAll(pattern5, s1));
}
private static List<String> matchTagContent(Pattern pattern, String source) {
List<String> matchLs = new ArrayList<>(10);
Matcher matcher = pattern.matcher(source);
while (matcher.find()) {
// matcher.group(int subExpIdx)
matchLs.add(matcher.group(2));
}
return matchLs;
}
private static List<String> matchAll(Pattern pattern, String source) {
List<String> matchLs = new ArrayList<>(10);
Matcher matcher = pattern.matcher(source);
while (matcher.find())
matchLs.add(matcher.group());
return matchLs;
}
private static String replaceAll(Pattern pattern, String source, String replaceStr) {
/**
* String的replaceAll也是使用正則:Pattern.compile(regex).matcher(this).replaceAll(replacement);
* 而正則replaceAll的底層使用sb拼接結(jié)果: return text.toString(); 所以不會(huì)對(duì)源字符串造成副作用
*/
Matcher matcher = pattern.matcher(source);
return matcher.replaceAll(replaceStr);
}
}
// output:
// [<TITLE>BEN FORTA'S HOMEPAGE</TITLE>]
// =====================1========================
// [BEN FORTA'S HOMEPAGE]
// =====================2========================
// [BEN FORTA'S HOMEPAGE]
// =====================3========================
// [01, 23.45, 42, 5.31, 1, 899.00, 99, 69.96, 4]
// =====================4========================
// [$23.45, $5.31, $899.00, $69.96]
// =====================5========================
// [23.45, 5.31, 899.00, 69.96]
回溯引用
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class TestBackTraceRef {
// 匹配一級(jí)標(biāo)題: <[Hh]1>.*?</[Hh]1>
private static Pattern pattern = Pattern.compile("<h1>.*?</h1>", Pattern.CASE_INSENSITIVE);
// 匹配任意級(jí)別標(biāo)題: <[Hh][1-6]>.*?</[Hh][1-6]>
private static Pattern pattern1 = Pattern.compile("<h[1-6]>.*?</h[1-6]>", Pattern.CASE_INSENSITIVE);
// 回溯引用去掉不合法標(biāo)簽: <[Hh]([1-6])>.*?</[Hh]\1> 【回溯引用只能用來引用子表達(dá)式】
private static Pattern pattern2 = Pattern.compile("<(h[1-6])>.*?</\\1>", Pattern.CASE_INSENSITIVE);
// 回溯引用匹配重復(fù)單詞
private static Pattern pattern3 = Pattern.compile("\\s(\\w+)\\s\\1");
// 替換中使用回溯
private static Pattern pattern4 = Pattern.compile("(\\w+[\\w.]*@[\\w.]+\\.\\w+)");
private static Pattern pattern5 = Pattern.compile("(\\d{3})-(\\d{3})-(\\d{4})");
public static void main(String[] args) {
String html = "<BODY>" +
"<H1>welcome to general expression lesson</H1>" +
"<H2>regexp is a good tool to handle string</H2>" +
"<H2>you can be a regex master</H2>" +
"<H2>this is not valid</H3>" +
"</BODY>";
System.out.println(matchAll(pattern, html));
System.out.println("=====================1========================");
System.out.println(matchAll(pattern1, html));
System.out.println("=====================2========================");
System.out.println(matchAll(pattern2, html));
System.out.println("=====================3========================");
String s = "this is a block of of text," +
"several words here are are" +
"repeated, and and they" +
"should not be";
System.out.println(matchAll(pattern3, s));
System.out.println("=====================4========================");
/**
* 在用于替換的字符串中:通過$1,$2引用子表達(dá)式匹配結(jié)果
*/
String s1 = "hi, ben@forta.com is my email address";
System.out.println(replaceAll(pattern4, s1, "<A HREF='mailto:$1'>$1</A>"));
System.out.println("=====================5========================");
String s2 = "313-555-1234 \n" +
"248-555-9999 \n" +
"810-555-9000 \n";
System.out.println(replaceAll(pattern5, s2, "($1) $2-$3"));
}
private static List<String> matchAll(Pattern pattern, String source) {
List<String> matchLs = new ArrayList<>(10);
Matcher matcher = pattern.matcher(source);
while (matcher.find())
matchLs.add(matcher.group());
return matchLs;
}
private static String replaceAll(Pattern pattern, String source, String replaceStr) {
/**
* String的replaceAll也是使用正則:Pattern.compile(regex).matcher(this).replaceAll(replacement);
* 而正則replaceAll的底層使用sb拼接結(jié)果: return text.toString(); 所以不會(huì)對(duì)源字符串造成副作用
*/
Matcher matcher = pattern.matcher(source);
return matcher.replaceAll(replaceStr);
}
}
// output:
// [<H1>welcome to general expression lesson</H1>]
// =====================1========================
// [<H1>welcome to general expression lesson</H1>, <H2>regexp is a good tool to handle string</H2>, <H2>you can be a regex master</H2>, <H2>this is not valid</H3>]
// =====================2========================
// [<H1>welcome to general expression lesson</H1>, <H2>regexp is a good tool to handle string</H2>, <H2>you can be a regex master</H2>]
// =====================3========================
// [ of of, are are, and and]
// =====================4========================
// hi, <A HREF='mailto:ben@forta.com'>ben@forta.com</A> is my email address
// =====================5========================
// (313) 555-1234
// (248) 555-9999
// (810) 555-9000
特殊替換
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class SpecialReplace {
public static void main(String[] args) {
Pattern p = Pattern.compile("java", Pattern.CASE_INSENSITIVE);
Matcher m = p.matcher("java Java JAVA JAva I love Java and you ?");
StringBuffer sb = new StringBuffer();
int index = 1;
while(m.find()){
// 當(dāng)前成功匹配庶骄,是偶數(shù)次毁渗,就替換為小寫,否則大寫
m.appendReplacement(sb, (index++ & 1) == 0 ? "java" : "JAVA");
}
m.appendTail(sb);//把剩余的字符串加入
System.out.println(sb);
}
}
// output:
// JAVA java JAVA java I love JAVA and you ?
多行匹配
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class TestMultilineMatch {
public static void main(String[] args) {
String fileLs = "sales1.xls\n" +
"orders3.xls\n" +
"sales2.xls\n" +
"sales.xls\n" +
"sales3.xls\n" +
"apac1.xls\n" +
"europe2.xls\n" +
"na1.xls\n" +
"na2.xls\n" +
"cat.xls\n" +
"usa1.xls\n" +
"sam.xls\n" +
"sa1.xls\n";
// Pattern.MULTILINE表示多行匹配 等同于:Pattern.compile("(?m)^[ns]a.\\.xls")
Pattern pattern = Pattern.compile("^[ns]a.\\.xls", Pattern.MULTILINE);
Matcher matcher = pattern.matcher(fileLs);
String replaceResult = matcher.replaceAll("多行匹配");
System.out.println(replaceResult);
Pattern pattern02 = Pattern.compile("^[ns]a.\\.xls");
Matcher matcher02 = pattern02.matcher(fileLs);
String replaceResult02 = matcher02.replaceAll("整串匹配");
System.out.println(replaceResult02);
}
}
// output:
// sales1.xls
// orders3.xls
// sales2.xls
// sales.xls
// sales3.xls
// apac1.xls
// europe2.xls
// 多行匹配
// 多行匹配
// cat.xls
// usa1.xls
// 多行匹配
// 多行匹配
//
// sales1.xls
// orders3.xls
// sales2.xls
// sales.xls
// sales3.xls
// apac1.xls
// europe2.xls
// na1.xls
// na2.xls
// cat.xls
// usa1.xls
// sam.xls
// sa1.xls
貪婪模式
import java.util.regex.MatchResult;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class TestGreedyAndLazyMode {
public static void main(String[] args) {
String string = "<books><book>西游記</book><book>三國演義</book><book>水滸傳</book></books>";
Pattern pattern = Pattern.compile("<(book)>.*</\\1>", Pattern.CASE_INSENSITIVE);
Matcher matcher = pattern.matcher(string);
String replaceResult = matcher.replaceFirst("貪婪模式");
System.out.println(replaceResult);
Pattern pattern02 = Pattern.compile("<(book)>.*?</\\1>");
Matcher matcher02 = pattern02.matcher(string);
String replaceResult02 = matcher02.replaceFirst("懶惰模式");
System.out.println(replaceResult02);
}
}
// output:
// <books>貪婪模式</books>
// <books>懶惰模式<book>三國演義</book><book>水滸傳</book></books>
純文本和點(diǎn)
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class TestPureTextAndDot {
private static final Pattern pattern = Pattern.compile("Ben");
private static final Pattern pattern02 = Pattern.compile("my");
/**正則默認(rèn)是大小寫敏感的单刁,預(yù)編譯模式時(shí)灸异,可以指定大小寫不敏感選項(xiàng)*/
private static final Pattern pattern03 = Pattern.compile("ben", Pattern.CASE_INSENSITIVE);
/**.在正則中可以匹配任意一個(gè)字符:字符,數(shù)字羔飞,字母包括.自身【除了換行符】(SQL中有相同功能的是_); */
private static final Pattern pattern04 = Pattern.compile("sales.");
private static final Pattern pattern05 = Pattern.compile(".a.\\.xls");
public static void main(String[] args) {
String txt = "Hello, my name is Ben. Please visit my01 website at http://www.forta.com/.";
Matcher matcher = pattern03.matcher(txt);
while (matcher.find()) {
System.out.println(matcher.group());
}
String fileLs = "sales1.xls\n" +
"orders3.xls\n" +
"sales2.xls\n" +
"sales.xls\n" +
"sales3.xls\n" +
"apac1.xls\n" +
"europe2.xls\n" +
"na1.xls\n" +
"na2.xls\n" +
"sa1.xls\n";
Matcher matcher1 = pattern04.matcher(fileLs);
while (matcher1.find()) {
System.out.println(matcher1.group());
}
System.out.println("==================================================================");
Matcher matcher2 = pattern05.matcher(fileLs);
while (matcher2.find()) {
System.out.println(matcher2.group());
}
}
}
// output:
// Ben
// sales1
// sales2
// sales.
// sales3
// ==================================================================
// na1.xls
// na2.xls
// sa1.xls
java正則api
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class MathcesAndFindDiff {
public static void main1(String[] args){
Pattern pattern = Pattern.compile("\\d{3,5}");
String charSequence = "123-34345-234-00";
Matcher matcher = pattern.matcher(charSequence);
//雖然匹配失敗肺樟,但由于charSequence里面的"123"和pattern是匹配的,所以下次的匹配從位置4開始
print(matcher.matches()); //雖然整個(gè)匹配失敗了,但是有部分匹配(123,所以于是重置了下次匹配的位置從3開始),所以后面調(diào)用matcher.find()的時(shí)候,調(diào)用matcher.start()匹配成功是從4開始
System.out.println("==============================================");
//測(cè)試匹配位置
matcher.find();
print(matcher.start()); //如果注釋掉前面的print(matcher.matches());,這里會(huì)打印出0
System.out.println("==============================================");
//使用reset方法重置匹配位置
matcher.reset();
//第一次find匹配以及匹配的目標(biāo)和匹配的起始位置
print(matcher.find());
System.out.println("=====================1========================");
print(matcher.group()+" - "+matcher.start());
System.out.println("=====================2========================");
//第二次find匹配以及匹配的目標(biāo)和匹配的起始位置
print(matcher.find());
System.out.println("=====================3========================");
print(matcher.group()+" - "+matcher.start());
System.out.println("=====================4========================");
/**
* matcher.lookingAt() : Attempts to match the input sequence, starting at the beginning of the
* * region, against the pattern (只進(jìn)行一次前綴匹配)
*/
//第一次lookingAt匹配以及匹配的目標(biāo)和匹配的起始位置
print(matcher.lookingAt());
System.out.println("=====================5========================");
print(matcher.group()+" - "+matcher.start());
System.out.println("=====================6========================");
//第二次lookingAt匹配以及匹配的目標(biāo)和匹配的起始位置
print(matcher.lookingAt());
System.out.println("=====================7========================");
print(matcher.group()+" - "+matcher.start());
System.out.println("=====================8========================");
}
private static void print(Object o){
System.out.println(o);
}
public static void main(String[] args) {
String input = "I dream of engines\nmore engines, all day long";
System.out.println("INPUT:" + input);
System.out.println();
String[] patt = {"engines.more engines", "ines\nmore", "engines$"};
for (int i = 0; i < patt.length; i++) {
System.out.println("PATTERN:" + patt[i]);
boolean found;
Pattern p1l = Pattern.compile(patt[i]);
found = p1l.matcher(input).find();
System.out.println("DEFAULT match " + found);
// .代表任何符號(hào)(DOT ALL),
Pattern pml = Pattern.compile(patt[i], Pattern.DOTALL | Pattern.MULTILINE);
found = pml.matcher(input).find();
System.out.println("Multiline match " + found);
System.out.println();
}
}
}
// output 1:
// false
// ==============================================
// 4
// ==============================================
// true
// =====================1========================
// 123 - 0
// =====================2========================
// true
// =====================3========================
// 34345 - 4
// =====================4========================
// true
// =====================5========================
// 123 - 0
// =====================6========================
// true
// =====================7========================
// 123 - 0
// =====================8========================
// output2:
// INPUT:I dream of engines
// more engines, all day long
//
// PATTERN:engines.more engines
// DEFAULT match false
// Multiline match true
//
// PATTERN:ines
// more
// DEFAULT match true
// Multiline match true
//
// PATTERN:engines$
// DEFAULT match false
// Multiline match true