Java正則

元字符

import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class TestMetaChar {
    // 對(duì)元字符進(jìn)行轉(zhuǎn)義
    private static Pattern pattern = Pattern.compile("myArray\\[0\\]");
    // 匹配 \ (java里\需要用\\表示)
    private static Pattern pattern02 = Pattern.compile("\\\\");
    // 匹配空白字符 \r\n 是 回車+換行 todo 匹配不到?
    private static Pattern pattern03 = Pattern.compile("\r\n\r\n", Pattern.MULTILINE);
    // 簡(jiǎn)寫元字符:
    /* \d 即 [0-9];          \D 即 [^0-9] 【注意:從小到大】*/
    /* \w 即 [a-zA-Z0-9_];   \W 即 [^a-zA-Z0-9_]*/
    /* \s 即 [\f\n\r\t\v];   \S 即 [^\f\n\r\t\v]*/
    // 這個(gè)模式匹配不出myArray[10], 需要加上重復(fù)次數(shù)才行:myArray\[\d+\]
    private static Pattern pattern04 = Pattern.compile("myArray\\[\\d\\]");

    // 這個(gè)模式匹配的字符串腹纳,必須是6個(gè)字符须喂,所以5個(gè)純數(shù)字不會(huì)匹配(\w\d\w\d\w\d?可以匹配5個(gè)純數(shù)字)
    private static Pattern pattern05 = Pattern.compile("\\w\\d\\w\\d\\w\\d");

    public static void main(String[] args) {
        String js = "var myArray = new Array(); \n" +
                "... \n" +
                "if (myArray[0] == 0 || myArray[10] == 10) { \n" +
                "... \n" +
                "}";
        Matcher matcher = pattern.matcher(js);
        if (matcher.find())
            System.out.println(matcher.group());

        String url = "\\home\\ben\\sales";
        Matcher matcher1 = pattern02.matcher(url);
        while (matcher1.find())
            System.out.println(matcher1.group());

        String txt = "101, ben forta " +
                "102, jim james " +
                "       " +
                "103, roberta robertson ";
        Matcher matcher2 = pattern03.matcher(txt);
        while (matcher2.find())
            System.out.println(matcher2.group() + "ok");

        Matcher matcher3 = pattern04.matcher(js);
        while (matcher3.find())
            System.out.println(matcher3.group());

        String txt2 = "11213 a1c2e3 48075 48237 m1b4f2 90046 h1h2h3 123456";
        Matcher matcher4 = pattern05.matcher(txt2);
        while (matcher4.find())
            System.out.println(matcher4.group());
    }
}

// output:
//        myArray[0]
//        \
//        \
//        \
//        myArray[0]
//        a1c2e3
//        m1b4f2
//        h1h2h3
//        123456

匹配次數(shù)

import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class TestRepeatMatch {
    // "\\w+@\\w+.\\w+"模式匹配不了 ben.forta@forta.com  因?yàn)閈w是[a-zA-Z0-9_],只能匹配數(shù)字字母下劃線
    private static Pattern pattern = Pattern.compile("\\w+@\\w+.\\w+");
    private static Pattern pattern04 = Pattern.compile("[\\w.]+@[\\w.]+\\w+");
    //[0-9]+匹配一個(gè)或多個(gè)數(shù)字,等效于\d+  [0-9+]匹配一個(gè)數(shù)字或者一個(gè)+【其實(shí)匹配+最好是進(jìn)行轉(zhuǎn)義\+, 但是在正則的字符集合里面好像可以不轉(zhuǎn)義】
    // + 等效于 {1,}
    private static Pattern pattern02 = Pattern.compile("[0-9]+");
    private static Pattern pattern03 = Pattern.compile("[0-9+]");

    // 如果首字符是. 則不是合法的郵箱直晨,pattern04會(huì)匹配出.ben@forta.com夫植, 所以改造為pattern05 【* 匹配零次或多次 等價(jià)于 {0,}】
    private static Pattern pattern05 = Pattern.compile("\\w+[\\w.]*@[\\w.]+\\w+");
    // ? 匹配0次【false】或1【true】次 等價(jià)于{0,1}
    private static Pattern pattern06 = Pattern.compile("https?://[\\w./]+");
    // 精確重復(fù)次數(shù)
    private static Pattern pattern07 = Pattern.compile("#[\\da-fA-F]{6}");
    // 重復(fù)次數(shù)區(qū)間
    private static Pattern pattern08 = Pattern.compile("\\d{1,2}[/-]\\d{1,2}[/-]\\d{2,4}");
    // 至少重復(fù)多少次帜羊, 匹配大于100沒有的價(jià)格
    private static Pattern pattern09 = Pattern.compile("\\$\\d{3,}");

    // 【貪婪模式】
    private static Pattern pattern11 = Pattern.compile("<[Bb]>.*</[Bb]>");
    // 防止過度匹配【懶惰模式】
    /**
     *  貪婪模式    懶惰模式
     *  *           *?
     *  +           +?
     *  {n,}        {n,}?
     */
    private static Pattern pattern10 = Pattern.compile("<[Bb]>.*?</[Bb]>");

    public static void main(String[] args) {
        String txt = "send personal email to ben@forta.com or ben.forta@forta.com. for questions " +
                "about a book use support@forta.com or ben@urgent.forta.com. feel free to send" +
                "unsolicited email to spam@forta.com (wouldn't it be" +
                "nice if it were that simple, huh?). 88 1 + 6";

        Matcher matcher = pattern.matcher(txt);
        while (matcher.find())
            System.out.println(matcher.group());
        System.out.println("===================1====================");

        Matcher matcher02 = pattern02.matcher(txt);
        while (matcher02.find()) {
            System.out.println(matcher02.group());
        }
        System.out.println("===================2====================");

        Matcher matcher03 = pattern03.matcher(txt);
        while (matcher03.find())
            System.out.println(matcher03.group());
        System.out.println("===================3====================");

        Matcher matcher04 = pattern04.matcher(txt);
        while (matcher04.find())
            System.out.println(matcher04.group());
        System.out.println("===================4====================");

        String txt02 = "hello .ben@forta.com is my email address";
        Matcher matcher05 = pattern05.matcher(txt02);
        while (matcher05.find())
            System.out.println(matcher05.group());
        System.out.println("===================5====================");

        String txt03 = "the url is http://www.forta.com/, to connect" +
                "securely use https://www.forta.com/ instead.";
        Matcher matcher1 = pattern06.matcher(txt03);
        while (matcher1.find())
            System.out.println(matcher1.group());
        System.out.println("===================6====================");

        String html = "<body bgcolor='#336633' text='#ffffff' width='666' height='444'>";
        Matcher matcher2 = pattern07.matcher(html);
        while (matcher2.find())
            System.out.println(matcher2.group());
        System.out.println("===================7====================");

        String txt05 = "4/8/03\n" +
                "10-6-2004\n" +
                "2/2/2\n" +
                "01-01-01\n";
        Matcher matcher3 = pattern08.matcher(txt05);
        while (matcher3.find())
            System.out.println(matcher3.group());
        System.out.println("===================8====================");

        String price = "$496.80 $1290.69 $26.43 $613.42 $7.61 $414.90 $25.00";
        Matcher matcher4 = pattern09.matcher(price);
        while (matcher4.find())
            System.out.println(matcher4.group());
        System.out.println("===================9====================");

        String htm = "<B>i am a B</B> <b>i am a b too</b>";
        Matcher matcher5 = pattern11.matcher(htm);
        while (matcher5.find())
            System.out.println(matcher5.group());
        System.out.println("===================10====================");

        Matcher matcher6 = pattern10.matcher(htm);
        while (matcher6.find())
            System.out.println(matcher6.group());
    }
}

// output:
//          ben@forta.com
//          forta@forta.com
//          support@forta.com
//          ben@urgent.forta
//          spam@forta.com
//          ===================1====================
//          88
//          1
//          6
//          ===================2====================
//          8
//          8
//          1
//          +
//          6
//          ===================3====================
//          ben@forta.com
//          ben.forta@forta.com
//          support@forta.com
//          ben@urgent.forta.com
//          spam@forta.com
//          ===================4====================
//          ben@forta.com
//          ===================5====================
//          http://www.forta.com/
//          https://www.forta.com/
//          ===================6====================
//          #336633
//          #ffffff
//          ===================7====================
//          4/8/03
//          10-6-2004
//          01-01-01
//          ===================8====================
//          $496
//          $1290
//          $613
//          $414
//          ===================9====================
//          <B>i am a B</B> <b>i am a b too</b>
//          ===================10====================
//          <B>i am a B</B>
//          <b>i am a b too</b>

匹配范圍

import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class TestRangeMatch {

    // pattern會(huì)匹配到cat.xls
    private static Pattern pattern = Pattern.compile(".a.\\.xls");
    // pattern02會(huì)匹配到usa1.xls
    private static Pattern pattern02 = Pattern.compile("[ns]a.\\.xls");
    // Pattern.MULTILINE表示多行匹配 等同于:Pattern.compile("(?m)^[ns]a.\\.xls")
    private static Pattern pattern03 = Pattern.compile("^[ns]a.\\.xls");
    // 這種用法適合局部忽略大小寫的匹配
    private static Pattern pattern04 = Pattern.compile("[Rr]eg[eE]x");

    // [0123456789]可以縮寫為[0-9] (從小到大)系吭,或者用特殊字符:\\d
    /* - 是特殊的元字符咙鞍,作為元字符它只能用在[]中*/
    private static Pattern pattern05 = Pattern.compile("^[ns]a[0123456789]\\.xls", Pattern.MULTILINE);

    private static Pattern pattern06 = Pattern.compile("#[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f]");

    // 取非匹配
    /* ^ 作用范圍:集合里的所有字符或字符區(qū)間*/
    private static Pattern pattern07 = Pattern.compile("[ns]a[^0-9]\\.xls");

    public static void main(String[] args) {
        String fileLs = "sales1.xls\n" +
                "orders3.xls\n" +
                "sales2.xls\n" +
                "sales.xls\n" +
                "sales3.xls\n" +
                "apac1.xls\n" +
                "europe2.xls\n" +
                "na1.xls\n" +
                "na2.xls\n" +
                "cat.xls\n" +
                "usa1.xls\n" +
                "sam.xls\n" +
                "sa1.xls\n";
        Matcher matcher03 = pattern03.matcher(fileLs);
        while (matcher03.find()) {
            System.out.println(matcher03.group());
        }

        System.out.println("================");

        Matcher matcher = pattern05.matcher(fileLs);
        while (matcher.find()) {
            System.out.println(matcher.group());
        }

        String txt = "The phrase 'regular expression' is often abbreviated as RegEx or regex";
        Matcher matcher1 = pattern04.matcher(txt);
        while (matcher1.find())
            System.out.println(matcher1.group());

        String html = "<body bgcolor='#336633' text='#ffffff' width='666' height='444'>";
        Matcher matcher2 = pattern06.matcher(html);
        while (matcher2.find())
            System.out.println(matcher2.group());

        Matcher matcher3 = pattern07.matcher(fileLs);
        while (matcher3.find())
            System.out.println(matcher3.group());
    }
}

// output:
//================
//        na1.xls
//        na2.xls
//        sa1.xls
//        RegEx
//        regex
//        #336633
//        #ffffff
//        sam.xls

匹配位置

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class TestLocationMatch {

   private static Pattern pattern = Pattern.compile("cat");
   // \s會(huì)“消費(fèi)”空格
   private static Pattern pattern1 = Pattern.compile("\\scat\\s");
   // 使用向后向前查找 定位消費(fèi)內(nèi)容
   private static Pattern pattern1_ref = Pattern.compile("(?<=\\s)cat(?=\\s)");
   // \b是單詞邊界(不會(huì)消費(fèi)空格): \b匹配這樣一個(gè)位置--位于能夠用來構(gòu)成單詞的字符(字母谬擦,數(shù)字切距,下劃線,也就是與\w匹配的字符)
    // 和一個(gè)不能用來構(gòu)成單詞的字符(也就是與\W匹配的字符)之間
    // \b只匹配一個(gè)位置惨远,不匹配任何字符谜悟,所以"\bcat\b"匹配到的字符串長(zhǎng)度是3(c,a,t)
   private static Pattern pattern2 = Pattern.compile("\\bcat\\b");
   private static Pattern pattern3 = Pattern.compile("\\bcap");
   private static Pattern pattern4 = Pattern.compile("cap\\b");

    // ^匹配整個(gè)字符串的開始位置 注意:^在正則字符集合起到求非作用
    private static Pattern pattern5 = Pattern.compile("<\\?xml.*?\\?>");
    private static Pattern pattern6 = Pattern.compile("\\s*^<\\?xml.*?\\?>");
    // $匹配整個(gè)字符串的結(jié)尾位置 例:web頁面里,</html>標(biāo)簽后面不應(yīng)該再有任何內(nèi)容
    // 可以使用這個(gè)模式檢查: </[Hh][Tt][Mm][Ll]>\s*$

    private static Pattern pattern7 = Pattern.compile("(?m)//.*$");

    public static void main(String[] args) {
        String txt = "the cat scattered his food all over the room";

        String txt2 = "the captain wore his cap and cape proudly as \n" +
                "he sat listening to the recap of how his" +
                "crew saved the men from a capsized vessel";

        System.out.println(matchAll(pattern, txt));
        System.out.println("=====================1========================");
        System.out.println("【有空格】" + matchAll(pattern1, txt));
        System.out.println("=====================2========================");
        System.out.println("【無空格】" + matchAll(pattern1_ref, txt));
        System.out.println("=====================3========================");
        System.out.println("【無空格】" + matchAll(pattern2, txt));
        System.out.println("=====================4========================");
        System.out.println(replaceAll(pattern3, txt2, "@@@"));
        System.out.println("=====================5========================");

        // 如果不對(duì)$轉(zhuǎn)義北秽,將會(huì)拋出異常:Exception in thread "main" java.lang.IllegalArgumentException: Illegal group reference
        /**
         * https://blog.csdn.net/qq_37502106/article/details/88642840
         * 可以看到這里面對(duì)“$”符號(hào)和"\\"符號(hào)進(jìn)行了處理葡幸。出現(xiàn)以上錯(cuò)誤的原因是:String的replaceAll(regex, replacement)方法的第一個(gè)參數(shù)支持正則表達(dá)式,
         * 如果參數(shù)replacement中出現(xiàn)符號(hào)“$”,會(huì)按照$1$2的分組模式進(jìn)行匹配贺氓。當(dāng)編譯器發(fā)現(xiàn)“$”后跟的不是整數(shù)的時(shí)候蔚叨,就會(huì)拋出“Illegal group reference”的異常。
         *
         * 處理辦法:用JDK提供的方法辙培,對(duì)特殊字符進(jìn)行處理:
         * replacement = java.util.regex.Matcher.quoteReplacement(replacement);
         * 或者手動(dòng)轉(zhuǎn)義:\\$\\$\\$
         */
        System.out.println(replaceAll(pattern4, txt2, Matcher.quoteReplacement("$$$")));
        System.out.println("=====================6========================");

        String txt3 = "this xml file has a bad start\n" +
                "  <?xml version='1.0' encoding='UTF-8' ?> \n" +
                "<beans xmlns=\"http://www.springframework.org/schema/beans\"\n" +
                "    xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n" +
                "    xsi:schemaLocation=\"http://www.springframework.org/schema/beans\n" +
                "                        http://www.springframework.org/schema/beans/spring-beans.xsd\">\n" +
                "</beans>";

        System.out.println(matchAll(pattern5, txt3).size() == 1 ? "合格xml" : "不合格xml");
        System.out.println("=====================7========================");
        System.out.println(matchAll(pattern6, txt3).size() == 1 ? "合格xml" : "不合格xml");
        System.out.println("=====================8========================");

        String js = "ar URL= {\n" +
                "            addBatchTemplate: '/myApp/addBatchTemplate',//添加模板\n" +
                "            getBackDeviceTypeList:'/myApp/switcher-device-template/getFrontDeviceTypeList', //獲取型號(hào)列表\n" +
                "            loadConfig: '/myApp/loadConfig',//post 下發(fā)配置\n" +
                "            isInvalidEdit: '/myApp/isInvalidEdit',//權(quán)限判斷\n" +
                "            isInvalidConfig: '/myApp/isInvalidConfig',//權(quán)限判斷\n" +
                "            getBatchTemplateInfo:'/myApp/getBatchTemplateInfo', //查詢信息\n" +
                "        };\n";
        System.out.println(matchAll(pattern7, js));
    }

    private static List<String> matchAll(Pattern pattern, String source) {
        List<String> matchLs = new ArrayList<>(10);
        Matcher matcher = pattern.matcher(source);
        while (matcher.find())
            matchLs.add(matcher.group());
        return matchLs;
    }

    private static String replaceAll(Pattern pattern, String source, String replaceStr) {
        /**
         * String的replaceAll也是使用正則:Pattern.compile(regex).matcher(this).replaceAll(replacement);
         * 而正則replaceAll的底層使用sb拼接結(jié)果: return text.toString(); 所以不會(huì)對(duì)源字符串造成副作用
         */
        Matcher matcher = pattern.matcher(source);
        return matcher.replaceAll(replaceStr);
    }
}

// output:
//        [cat, cat]
//        =====================1========================
//        【有空格】[ cat ]
//        =====================2========================
//        【無空格】[cat]
//        =====================3========================
//        【無空格】[cat]
//        =====================4========================
//        the @@@tain wore his @@@ and @@@e proudly as
//        he sat listening to the recap of how hiscrew saved the men from a @@@sized vessel
//        =====================5========================
//        the captain wore his $$$ and cape proudly as
//        he sat listening to the re$$$ of how hiscrew saved the men from a capsized vessel
//        =====================6========================
//        合格xml
//        =====================7========================
//        不合格xml
//        =====================8========================
//        [//添加模板, //獲取型號(hào)列表, //post 下發(fā)配置, //權(quán)限判斷, //權(quán)限判斷, //查詢信息]

子表達(dá)式

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class TestSubExp {

    // &nbsp;{2,} 只能匹配 &nbsp;;;;;; 【因?yàn)閧2,}只作用于前一個(gè)元素】
    private static Pattern pattern = Pattern.compile("&nbsp;{2,}");
    // 【子表達(dá)式限定作用】(&nbsp;){2,} 可以匹配 &nbsp;&nbsp; ()括起來的就是一個(gè)子表達(dá)式
    private static Pattern pattern2 = Pattern.compile("(&nbsp;){2,}");

    // 粗略匹配ip
    private static Pattern pattern3 = Pattern.compile("\\d{1,3}.\\d{1,3}.\\d{1,3}.\\d{1,3}");
    // 【子表達(dá)式簡(jiǎn)化作用】
    private static Pattern pattern4 = Pattern.compile("(\\d{1,3}.){3}\\d{1,3}");

    // "19|20\\d{2}"只會(huì)匹配出 19 或 20XX, 因?yàn)閨把位于它左邊和右邊的兩個(gè)部分都作為一個(gè)整體看待缅叠, todo: |優(yōu)先級(jí)最低?
    // 即:(19)|(20\\d{2}), 所以要匹配出正確的出生年份應(yīng)該用"(19|20)\\d{2}"
    private static Pattern pattern5 = Pattern.compile("19|20\\d{2}");
    // 【子表達(dá)式去提高優(yōu)先級(jí)作用】
    private static Pattern pattern6 = Pattern.compile("(19|20)\\d{2}");

    // 精確匹配ip

    /**
     * 上面用"(\\d{1,3}.){3}\\d{1,3}"粗略匹配ip是有問題的虏冻,這個(gè)模式會(huì)匹配出不合法的ip, 例:666.77.8.999
     * 合法的ip是由4個(gè)字節(jié)組成(對(duì)應(yīng)逗號(hào)隔開的4組數(shù)字)肤粱,一個(gè)字節(jié)的所表示范圍是0~255,即ip中的每組數(shù)字都是0~255
     * 正則不能夠直接表示數(shù)字范圍厨相,所以需要將ip取值限制 轉(zhuǎn)成 正則能夠表示的規(guī)則:
     *
     *  (一)任何一個(gè)1位或2位數(shù)字      【1~99】  (\d{1,2})
     *  (二)任何一個(gè)以1開頭的3位數(shù)字   【100~199】    (1\d{2})
     *  (三)任何一個(gè)以2開頭领曼、第2位在0~4的3位數(shù)字    【200~249】   (2[0-4]\d)
     *  (四)任何一個(gè)以25開頭鸥鹉、第3位在0~5的3位數(shù)字   【250~255】   (25[0-5])
     *
     *  三四不能用2[0-5][0-5]表達(dá): 因?yàn)槠ヅ洳坏?06 207 208 209
     */
    static String quarter = "(\\d{1,2})|(1\\d{2})|(2[0-4]\\d)|(25[0-5])";

    private static Pattern pattern7 = Pattern.compile("(("+quarter+")\\.){3}("+quarter+")");

    /*public static void main(String[] args) {
        Pattern compile = Pattern.compile("2[0-5][0-5]");
        for (int i = 200; i < 299; i++) {
            if (!compile.asPredicate().test(String.valueOf(i))) {
                System.out.println(i);
            }
        }
    }*/

    public static void main(String[] args) {

        String s = "hello, my name is mike&nbsp;jordan, and i am" +
                "the best basketball player, rank No&nbsp;&nbsp;1";

        System.out.println(replaceAll(pattern, s, "空格"));
        System.out.println("=====================1========================");
        System.out.println(replaceAll(pattern2, s, "空格"));
        System.out.println("=====================2========================");

        String s1 = "ping 12.159.46.200 ... ";
        System.out.println(matchAll(pattern3, s1));
        System.out.println("=====================3========================");
        System.out.println(matchAll(pattern4, s1));
        System.out.println("=====================4========================");

        String s2 = "ID: 042" +
                "SEX: M" +
                "BIRTH: 1967-08-17" +
                "DIE: 2067-08-17" +
                "STATUS: Active";
        System.out.println(matchAll(pattern5, s2));
        System.out.println("=====================5========================");
        System.out.println(matchAll(pattern6, s2));
        System.out.println("=====================6========================");

        String s3 = "illegal ip: 666.77.8.999" +
                "legal ip: 12.159.46.200";
        System.out.println(matchAll(pattern7, s3));
        System.out.println("=====================7========================");
    }

    private static List<String> matchAll(Pattern pattern, String source) {
        List<String> matchLs = new ArrayList<>(10);
        Matcher matcher = pattern.matcher(source);
        while (matcher.find())
            matchLs.add(matcher.group());
        return matchLs;
    }

    private static String replaceAll(Pattern pattern, String source, String replaceStr) {
        /**
         * String的replaceAll也是使用正則:Pattern.compile(regex).matcher(this).replaceAll(replacement);
         * 而正則replaceAll的底層使用sb拼接結(jié)果: return text.toString(); 所以不會(huì)對(duì)源字符串造成副作用
         */
        Matcher matcher = pattern.matcher(source);
        return matcher.replaceAll(replaceStr);
    }
}

// output:
//        hello, my name is mike&nbsp;jordan, and i amthe best basketball player, rank No&nbsp;&nbsp;1
//        =====================1========================
//        hello, my name is mike&nbsp;jordan, and i amthe best basketball player, rank No空格1
//        =====================2========================
//        [12.159.46.200]
//        =====================3========================
//        [12.159.46.200]
//        =====================4========================
//        [19, 2067]
//        =====================5========================
//        [1967, 2067]
//        =====================6========================
//        [66.77.8.99, 12.159.46.20]
//        =====================7========================

前后查找

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class TestLookAround {

    // 匹配標(biāo)簽+內(nèi)容
    private static Pattern pattern = Pattern.compile("<(title)>(.*)</\\1>", Pattern.CASE_INSENSITIVE);
    // 匹配內(nèi)容
    private static Pattern pattern2 = Pattern.compile("(?<=<(title)>).*(?=</\\1>)", Pattern.CASE_INSENSITIVE);
    // 匹配價(jià)格
    private static Pattern pattern3 = Pattern.compile("[0-9.]+");

    private static Pattern pattern4 = Pattern.compile("\\$[0-9.]+");
    // 向后查找
    private static Pattern pattern5 = Pattern.compile("(?<=\\$)[0-9.]+");

    // (?<=<(title)>).*(?=</\1>) 用來分隔向后查找和向前查找的()不算子表達(dá)式組
    public static void main01(String[] args) {
        String source = "<HEAD>" +
                "<TITLE>BEN FORTA'S HOMEPAGE</title>" +
                "</HEAD>";
        Matcher matcher = pattern2.matcher(source);
        while (matcher.find()) {
            System.out.println(matcher.group(0));
            System.out.println(matcher.group(1));
//            System.out.println(matcher.group(2));
        }

        System.out.println("======");

        // 關(guān)于group分層
        Pattern pattern = Pattern.compile("((<(title)>)(.*)(</(title)>))", Pattern.CASE_INSENSITIVE);
        matcher = pattern.matcher(source);
        while (matcher.find()) {
            System.out.println(matcher.group(0));
            System.out.println(matcher.group(1)); // 最外層()
            System.out.println(matcher.group(2)); // 第二層第1個(gè)()
            System.out.println(matcher.group(3)); // 第三層第1個(gè)()
            System.out.println(matcher.group(4)); // 第二層第2個(gè)()
            System.out.println(matcher.group(5)); // 第二層第3個(gè)()
            System.out.println(matcher.group(6)); // 第三層第2個(gè)()
        }
    }

    public static void main(String[] args) {
        String s = "<HEAD>" +
                "<TITLE>BEN FORTA'S HOMEPAGE</TITLE>" +
                "</HEAD>";
        // 獲取匹配的整個(gè)結(jié)果
        System.out.println(matchAll(pattern, s));
        System.out.println("=====================1========================");
        // 獲取匹配的結(jié)果中,某個(gè)子表達(dá)式的匹配部分(子表達(dá)式)
        System.out.println(matchTagContent(pattern, s));
        System.out.println("=====================2========================");
        // 前后查找匹配
        System.out.println(matchAll(pattern2, s));
        System.out.println("=====================3========================");

        String s1 = "ABC01: $23.45" +
                "HGG42: $5.31" +
                "CFMX1: $899.00" +
                "XTC99: $69.96" +
                "Total items found: 4";
        System.out.println(matchAll(pattern3, s1));
        System.out.println("=====================4========================");
        System.out.println(matchAll(pattern4, s1));
        System.out.println("=====================5========================");
        System.out.println(matchAll(pattern5, s1));
    }

    private static List<String> matchTagContent(Pattern pattern, String source) {
        List<String> matchLs = new ArrayList<>(10);
        Matcher matcher = pattern.matcher(source);
        while (matcher.find()) {
            // matcher.group(int subExpIdx)
            matchLs.add(matcher.group(2));
        }
        return matchLs;
    }

    private static List<String> matchAll(Pattern pattern, String source) {
        List<String> matchLs = new ArrayList<>(10);
        Matcher matcher = pattern.matcher(source);
        while (matcher.find())
            matchLs.add(matcher.group());
        return matchLs;
    }

    private static String replaceAll(Pattern pattern, String source, String replaceStr) {
        /**
         * String的replaceAll也是使用正則:Pattern.compile(regex).matcher(this).replaceAll(replacement);
         * 而正則replaceAll的底層使用sb拼接結(jié)果: return text.toString(); 所以不會(huì)對(duì)源字符串造成副作用
         */
        Matcher matcher = pattern.matcher(source);
        return matcher.replaceAll(replaceStr);
    }
}

// output:
//        [<TITLE>BEN FORTA'S HOMEPAGE</TITLE>]
//        =====================1========================
//        [BEN FORTA'S HOMEPAGE]
//        =====================2========================
//        [BEN FORTA'S HOMEPAGE]
//        =====================3========================
//        [01, 23.45, 42, 5.31, 1, 899.00, 99, 69.96, 4]
//        =====================4========================
//        [$23.45, $5.31, $899.00, $69.96]
//        =====================5========================
//        [23.45, 5.31, 899.00, 69.96]

回溯引用

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class TestBackTraceRef {

    // 匹配一級(jí)標(biāo)題: <[Hh]1>.*?</[Hh]1>
    private static Pattern pattern = Pattern.compile("<h1>.*?</h1>", Pattern.CASE_INSENSITIVE);

    // 匹配任意級(jí)別標(biāo)題: <[Hh][1-6]>.*?</[Hh][1-6]>
    private static Pattern pattern1 = Pattern.compile("<h[1-6]>.*?</h[1-6]>", Pattern.CASE_INSENSITIVE);

    // 回溯引用去掉不合法標(biāo)簽: <[Hh]([1-6])>.*?</[Hh]\1> 【回溯引用只能用來引用子表達(dá)式】
    private static Pattern pattern2 = Pattern.compile("<(h[1-6])>.*?</\\1>", Pattern.CASE_INSENSITIVE);

    // 回溯引用匹配重復(fù)單詞
    private static Pattern pattern3 = Pattern.compile("\\s(\\w+)\\s\\1");

    // 替換中使用回溯
    private static Pattern pattern4 = Pattern.compile("(\\w+[\\w.]*@[\\w.]+\\.\\w+)");
    private static Pattern pattern5 = Pattern.compile("(\\d{3})-(\\d{3})-(\\d{4})");

    public static void main(String[] args) {
        String html = "<BODY>" +
                "<H1>welcome to general expression lesson</H1>" +
                "<H2>regexp is a good tool to handle string</H2>" +
                "<H2>you can be a regex master</H2>" +
                "<H2>this is not valid</H3>" +
                "</BODY>";
        System.out.println(matchAll(pattern, html));
        System.out.println("=====================1========================");
        System.out.println(matchAll(pattern1, html));
        System.out.println("=====================2========================");
        System.out.println(matchAll(pattern2, html));
        System.out.println("=====================3========================");

        String s = "this is a block of of text," +
                "several words here are are" +
                "repeated, and and they" +
                "should not be";
        System.out.println(matchAll(pattern3, s));
        System.out.println("=====================4========================");

        /**
         * 在用于替換的字符串中:通過$1,$2引用子表達(dá)式匹配結(jié)果
         */
        String s1 = "hi, ben@forta.com is my email address";
        System.out.println(replaceAll(pattern4, s1, "<A HREF='mailto:$1'>$1</A>"));
        System.out.println("=====================5========================");

        String s2 = "313-555-1234 \n" +
                "248-555-9999 \n" +
                "810-555-9000 \n";
        System.out.println(replaceAll(pattern5, s2, "($1) $2-$3"));
    }

    private static List<String> matchAll(Pattern pattern, String source) {
        List<String> matchLs = new ArrayList<>(10);
        Matcher matcher = pattern.matcher(source);
        while (matcher.find())
            matchLs.add(matcher.group());
        return matchLs;
    }

    private static String replaceAll(Pattern pattern, String source, String replaceStr) {
        /**
         * String的replaceAll也是使用正則:Pattern.compile(regex).matcher(this).replaceAll(replacement);
         * 而正則replaceAll的底層使用sb拼接結(jié)果: return text.toString(); 所以不會(huì)對(duì)源字符串造成副作用
         */
        Matcher matcher = pattern.matcher(source);
        return matcher.replaceAll(replaceStr);
    }
}

// output:
//        [<H1>welcome to general expression lesson</H1>]
//        =====================1========================
//        [<H1>welcome to general expression lesson</H1>, <H2>regexp is a good tool to handle string</H2>, <H2>you can be a regex master</H2>, <H2>this is not valid</H3>]
//        =====================2========================
//        [<H1>welcome to general expression lesson</H1>, <H2>regexp is a good tool to handle string</H2>, <H2>you can be a regex master</H2>]
//        =====================3========================
//        [ of of,  are are,  and and]
//        =====================4========================
//        hi, <A HREF='mailto:ben@forta.com'>ben@forta.com</A> is my email address
//        =====================5========================
//        (313) 555-1234
//        (248) 555-9999
//        (810) 555-9000 

特殊替換

import java.util.regex.Matcher;
        import java.util.regex.Pattern;

public class SpecialReplace {
    public static void main(String[] args) {
        Pattern p = Pattern.compile("java", Pattern.CASE_INSENSITIVE);
        Matcher m = p.matcher("java Java JAVA JAva I love Java and you ?");
        StringBuffer sb = new StringBuffer();
        int index = 1;
        while(m.find()){
            // 當(dāng)前成功匹配庶骄,是偶數(shù)次毁渗,就替換為小寫,否則大寫
            m.appendReplacement(sb, (index++ & 1) == 0 ? "java" : "JAVA");
        }
        m.appendTail(sb);//把剩余的字符串加入
        System.out.println(sb);
    }
}

// output:
//          JAVA java JAVA java I love JAVA and you ?

多行匹配

import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class TestMultilineMatch {

    public static void main(String[] args) {
        String fileLs = "sales1.xls\n" +
                "orders3.xls\n" +
                "sales2.xls\n" +
                "sales.xls\n" +
                "sales3.xls\n" +
                "apac1.xls\n" +
                "europe2.xls\n" +
                "na1.xls\n" +
                "na2.xls\n" +
                "cat.xls\n" +
                "usa1.xls\n" +
                "sam.xls\n" +
                "sa1.xls\n";

        // Pattern.MULTILINE表示多行匹配 等同于:Pattern.compile("(?m)^[ns]a.\\.xls")
        Pattern pattern = Pattern.compile("^[ns]a.\\.xls", Pattern.MULTILINE);
        Matcher matcher = pattern.matcher(fileLs);
        String replaceResult = matcher.replaceAll("多行匹配");
        System.out.println(replaceResult);

        Pattern pattern02 = Pattern.compile("^[ns]a.\\.xls");
        Matcher matcher02 = pattern02.matcher(fileLs);
        String replaceResult02 = matcher02.replaceAll("整串匹配");
        System.out.println(replaceResult02);
    }
}

// output:
//        sales1.xls
//        orders3.xls
//        sales2.xls
//        sales.xls
//        sales3.xls
//        apac1.xls
//        europe2.xls
//        多行匹配
//        多行匹配
//        cat.xls
//        usa1.xls
//        多行匹配
//        多行匹配
//
//        sales1.xls
//        orders3.xls
//        sales2.xls
//        sales.xls
//        sales3.xls
//        apac1.xls
//        europe2.xls
//        na1.xls
//        na2.xls
//        cat.xls
//        usa1.xls
//        sam.xls
//        sa1.xls

貪婪模式

import java.util.regex.MatchResult;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class TestGreedyAndLazyMode {
    public static void main(String[] args) {
        String string = "<books><book>西游記</book><book>三國演義</book><book>水滸傳</book></books>";

        Pattern pattern = Pattern.compile("<(book)>.*</\\1>", Pattern.CASE_INSENSITIVE);
        Matcher matcher = pattern.matcher(string);
        String replaceResult = matcher.replaceFirst("貪婪模式");
        System.out.println(replaceResult);

        Pattern pattern02 = Pattern.compile("<(book)>.*?</\\1>");
        Matcher matcher02 = pattern02.matcher(string);
        String replaceResult02 = matcher02.replaceFirst("懶惰模式");
        System.out.println(replaceResult02);
    }
}

// output: 
//      <books>貪婪模式</books>
//      <books>懶惰模式<book>三國演義</book><book>水滸傳</book></books>

純文本和點(diǎn)

import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class TestPureTextAndDot {

    private static final Pattern pattern = Pattern.compile("Ben");
    private static final Pattern pattern02 = Pattern.compile("my");
    /**正則默認(rèn)是大小寫敏感的单刁,預(yù)編譯模式時(shí)灸异,可以指定大小寫不敏感選項(xiàng)*/
    private static final Pattern pattern03 = Pattern.compile("ben", Pattern.CASE_INSENSITIVE);

    /**.在正則中可以匹配任意一個(gè)字符:字符,數(shù)字羔飞,字母包括.自身【除了換行符】(SQL中有相同功能的是_); */
    private static final Pattern pattern04 = Pattern.compile("sales.");
    private static final Pattern pattern05 = Pattern.compile(".a.\\.xls");

    public static void main(String[] args) {
        String txt = "Hello, my name is Ben. Please visit my01 website at http://www.forta.com/.";
        Matcher matcher = pattern03.matcher(txt);
        while (matcher.find()) {
            System.out.println(matcher.group());
        }

        String fileLs = "sales1.xls\n" +
                        "orders3.xls\n" +
                        "sales2.xls\n" +
                        "sales.xls\n" +
                        "sales3.xls\n" +
                        "apac1.xls\n" +
                        "europe2.xls\n" +
                        "na1.xls\n" +
                        "na2.xls\n" +
                        "sa1.xls\n";
        Matcher matcher1 = pattern04.matcher(fileLs);
        while (matcher1.find()) {
            System.out.println(matcher1.group());
        }

        System.out.println("==================================================================");

        Matcher matcher2 = pattern05.matcher(fileLs);
        while (matcher2.find()) {
            System.out.println(matcher2.group());
        }
    }
}

// output:
//        Ben
//        sales1
//        sales2
//        sales.
//        sales3
//        ==================================================================
//        na1.xls
//        na2.xls
//        sa1.xls

java正則api

import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class MathcesAndFindDiff {
    public static void main1(String[] args){
        Pattern pattern = Pattern.compile("\\d{3,5}");
        String charSequence = "123-34345-234-00";
        Matcher matcher = pattern.matcher(charSequence);

        //雖然匹配失敗肺樟,但由于charSequence里面的"123"和pattern是匹配的,所以下次的匹配從位置4開始
        print(matcher.matches());  //雖然整個(gè)匹配失敗了,但是有部分匹配(123,所以于是重置了下次匹配的位置從3開始),所以后面調(diào)用matcher.find()的時(shí)候,調(diào)用matcher.start()匹配成功是從4開始
        System.out.println("==============================================");
        //測(cè)試匹配位置
        matcher.find();
        print(matcher.start());   //如果注釋掉前面的print(matcher.matches());,這里會(huì)打印出0
        System.out.println("==============================================");

        //使用reset方法重置匹配位置
        matcher.reset();

        //第一次find匹配以及匹配的目標(biāo)和匹配的起始位置
        print(matcher.find());
        System.out.println("=====================1========================");
        print(matcher.group()+" - "+matcher.start());
        System.out.println("=====================2========================");
        //第二次find匹配以及匹配的目標(biāo)和匹配的起始位置
        print(matcher.find());
        System.out.println("=====================3========================");
        print(matcher.group()+" - "+matcher.start());
        System.out.println("=====================4========================");

        /**
         * matcher.lookingAt() : Attempts to match the input sequence, starting at the beginning of the
         *      * region, against the pattern (只進(jìn)行一次前綴匹配)
         */
        //第一次lookingAt匹配以及匹配的目標(biāo)和匹配的起始位置
        print(matcher.lookingAt());
        System.out.println("=====================5========================");
        print(matcher.group()+" - "+matcher.start());
        System.out.println("=====================6========================");

        //第二次lookingAt匹配以及匹配的目標(biāo)和匹配的起始位置
        print(matcher.lookingAt());
        System.out.println("=====================7========================");
        print(matcher.group()+" - "+matcher.start());
        System.out.println("=====================8========================");
    }

    private static void print(Object o){
        System.out.println(o);
    }




    public static void main(String[] args) {
        String input = "I dream of engines\nmore engines, all day long";
        System.out.println("INPUT:" + input);
        System.out.println();
        String[] patt = {"engines.more engines", "ines\nmore", "engines$"};
        for (int i = 0; i < patt.length; i++) {
            System.out.println("PATTERN:" + patt[i]);
            boolean found;
            Pattern p1l = Pattern.compile(patt[i]);
            found = p1l.matcher(input).find();
            System.out.println("DEFAULT match " + found);
            // .代表任何符號(hào)(DOT ALL),
            Pattern pml = Pattern.compile(patt[i], Pattern.DOTALL | Pattern.MULTILINE);
            found = pml.matcher(input).find();
            System.out.println("Multiline match " + found);
            System.out.println();
        }
    }
}

// output 1:
//        false
//        ==============================================
//        4
//        ==============================================
//        true
//        =====================1========================
//        123 - 0
//        =====================2========================
//        true
//        =====================3========================
//        34345 - 4
//        =====================4========================
//        true
//        =====================5========================
//        123 - 0
//        =====================6========================
//        true
//        =====================7========================
//        123 - 0
//        =====================8========================

// output2:
//        INPUT:I dream of engines
//        more engines, all day long
//
//        PATTERN:engines.more engines
//        DEFAULT match false
//        Multiline match true
//
//        PATTERN:ines
//        more
//        DEFAULT match true
//        Multiline match true
//
//        PATTERN:engines$
//        DEFAULT match false
//        Multiline match true
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請(qǐng)聯(lián)系作者
  • 序言:七十年代末逻淌,一起剝皮案震驚了整個(gè)濱河市么伯,隨后出現(xiàn)的幾起案子,更是在濱河造成了極大的恐慌卡儒,老刑警劉巖田柔,帶你破解...
    沈念sama閱讀 211,194評(píng)論 6 490
  • 序言:濱河連續(xù)發(fā)生了三起死亡事件,死亡現(xiàn)場(chǎng)離奇詭異骨望,居然都是意外死亡硬爆,警方通過查閱死者的電腦和手機(jī),發(fā)現(xiàn)死者居然都...
    沈念sama閱讀 90,058評(píng)論 2 385
  • 文/潘曉璐 我一進(jìn)店門擎鸠,熙熙樓的掌柜王于貴愁眉苦臉地迎上來摆屯,“玉大人,你說我怎么就攤上這事糠亩。” “怎么了准验?”我有些...
    開封第一講書人閱讀 156,780評(píng)論 0 346
  • 文/不壞的土叔 我叫張陵赎线,是天一觀的道長(zhǎng)。 經(jīng)常有香客問我糊饱,道長(zhǎng)垂寥,這世上最難降的妖魔是什么? 我笑而不...
    開封第一講書人閱讀 56,388評(píng)論 1 283
  • 正文 為了忘掉前任另锋,我火速辦了婚禮滞项,結(jié)果婚禮上,老公的妹妹穿的比我還像新娘夭坪。我一直安慰自己文判,他們只是感情好,可當(dāng)我...
    茶點(diǎn)故事閱讀 65,430評(píng)論 5 384
  • 文/花漫 我一把揭開白布室梅。 她就那樣靜靜地躺著戏仓,像睡著了一般疚宇。 火紅的嫁衣襯著肌膚如雪。 梳的紋絲不亂的頭發(fā)上赏殃,一...
    開封第一講書人閱讀 49,764評(píng)論 1 290
  • 那天敷待,我揣著相機(jī)與錄音,去河邊找鬼仁热。 笑死榜揖,一個(gè)胖子當(dāng)著我的面吹牛,可吹牛的內(nèi)容都是我干的抗蠢。 我是一名探鬼主播举哟,決...
    沈念sama閱讀 38,907評(píng)論 3 406
  • 文/蒼蘭香墨 我猛地睜開眼,長(zhǎng)吁一口氣:“原來是場(chǎng)噩夢(mèng)啊……” “哼物蝙!你這毒婦竟也來了炎滞?” 一聲冷哼從身側(cè)響起,我...
    開封第一講書人閱讀 37,679評(píng)論 0 266
  • 序言:老撾萬榮一對(duì)情侶失蹤诬乞,失蹤者是張志新(化名)和其女友劉穎册赛,沒想到半個(gè)月后,有當(dāng)?shù)厝嗽跇淞掷锇l(fā)現(xiàn)了一具尸體震嫉,經(jīng)...
    沈念sama閱讀 44,122評(píng)論 1 303
  • 正文 獨(dú)居荒郊野嶺守林人離奇死亡森瘪,尸身上長(zhǎng)有42處帶血的膿包…… 初始之章·張勛 以下內(nèi)容為張勛視角 年9月15日...
    茶點(diǎn)故事閱讀 36,459評(píng)論 2 325
  • 正文 我和宋清朗相戀三年,在試婚紗的時(shí)候發(fā)現(xiàn)自己被綠了票堵。 大學(xué)時(shí)的朋友給我發(fā)了我未婚夫和他白月光在一起吃飯的照片扼睬。...
    茶點(diǎn)故事閱讀 38,605評(píng)論 1 340
  • 序言:一個(gè)原本活蹦亂跳的男人離奇死亡,死狀恐怖悴势,靈堂內(nèi)的尸體忽然破棺而出窗宇,到底是詐尸還是另有隱情,我是刑警寧澤特纤,帶...
    沈念sama閱讀 34,270評(píng)論 4 329
  • 正文 年R本政府宣布军俊,位于F島的核電站,受9級(jí)特大地震影響捧存,放射性物質(zhì)發(fā)生泄漏粪躬。R本人自食惡果不足惜,卻給世界環(huán)境...
    茶點(diǎn)故事閱讀 39,867評(píng)論 3 312
  • 文/蒙蒙 一昔穴、第九天 我趴在偏房一處隱蔽的房頂上張望镰官。 院中可真熱鬧,春花似錦吗货、人聲如沸泳唠。這莊子的主人今日做“春日...
    開封第一講書人閱讀 30,734評(píng)論 0 21
  • 文/蒼蘭香墨 我抬頭看了看天上的太陽警检。三九已至孙援,卻和暖如春,著一層夾襖步出監(jiān)牢的瞬間扇雕,已是汗流浹背拓售。 一陣腳步聲響...
    開封第一講書人閱讀 31,961評(píng)論 1 265
  • 我被黑心中介騙來泰國打工, 沒想到剛下飛機(jī)就差點(diǎn)兒被人妖公主榨干…… 1. 我叫王不留镶奉,地道東北人础淤。 一個(gè)月前我還...
    沈念sama閱讀 46,297評(píng)論 2 360
  • 正文 我出身青樓,卻偏偏與公主長(zhǎng)得像哨苛,于是被迫代替她去往敵國和親鸽凶。 傳聞我的和親對(duì)象是個(gè)殘疾皇子,可洞房花燭夜當(dāng)晚...
    茶點(diǎn)故事閱讀 43,472評(píng)論 2 348

推薦閱讀更多精彩內(nèi)容