package main
import (
"fmt"
"regexp"
)
func main() {
text := `<p>更多分析師觀點詳見
<a target="_blank" rel="noopener">
一周策略前瞻:周期之火破滅了蔓彩?</a></p>
<p><strong>來看主題:</strong></p>
<p><strong>
1治笨、<a class="" >PPP</a> :
</strong>
<a target="_blank" rel="noopener">
國辦發(fā)文力促民資參與PPP,經(jīng)濟(jì)回落下行業(yè)有望再成>穩(wěn)增長抓手 </a></p>
<p>參見上文邏輯赤嚼,此處不多說了旷赖。地產(chǎn)板塊也是類似。</p>`
//var SubjectRegexp = regexp.MustCompile(`<a href="https://[[:ascii:]]*">(?P<ct>.*)</a>`)
var ArticleRegexp = regexp.MustCompile(`<a.*href="https://(m|api).xxx.cn(.*)/(article|subjects)/[\d]+"(.*)>(.+)</a>`)
fmt.Println(ArticleRegexp.FindAllString(text, -1))
//fmt.Println(SubjectRegexp.FindAllString(text, -1))
//fmt.Println(SubjectRegexp.ReplaceAllString(text, `${ct}`))
text2 := `I'm singing while you're dancing.`
RegExpIng := regexp.MustCompile(`((\')\w{1,2})`)
fmt.Println(RegExpIng.FindAllString(text2, -1))
}
Output:
[<a target="_blank" rel="noopener">一周策略前瞻:周期之火破滅了更卒?</a> <a class="" >PPP</a> :</strong><a target="_blank" rel="noopener">國辦發(fā)文力促民資參與PPP等孵,經(jīng)濟(jì)回落下行業(yè)有望再成穩(wěn)增長抓手 </a>]
['m 're]
1.MustCompile(...)
VS Compile(...)
func Compile(expr string) (*Regexp, error) {
return compile(expr, syntax.Perl, false)
}
MustComile實際上調(diào)用的是Compile。加了錯誤檢測蹂空。
func MustCompile(str string) *Regexp {
regexp, error := Compile(str)
if error != nil {
panic(`regexp: Compile(` + quote(str) + `): ` + error.Error())
}
return regexp
}
2. MatchString檢測是否匹配正則俯萌,參數(shù)為被檢測的字符串,返回布爾值
// MatchString reports whether the string s
// contains any match of the regular expression re.
func (re *Regexp) MatchString(s string) bool {
return re.doMatch(nil, nil, s)
}
3. FindAllString(...)
有兩個參數(shù)腌闯,第一個參數(shù)為要處理的字符串绳瘟,第二個參數(shù)獲取匹配的結(jié)果數(shù)量,如果為負(fù)數(shù)姿骏,則取出所有滿足條件的匹配結(jié)果
// FindAllString is the 'All' version of FindString; it returns a slice of all
// successive matches of the expression, as defined by the 'All' description
// in the package comment.
// A return value of nil indicates no match.
func (re *Regexp) FindAllString(s string, n int) []string {
if n < 0 {
n = len(s) + 1
}
result := make([]string, 0, startSize)
re.allMatches(s, nil, n, func(match []int) {
result = append(result, s[match[0]:match[1]])
})
if len(result) == 0 {
return nil
}
return result
}
其中核心是調(diào)用了 allMatches的私有方法獲取的結(jié)果糖声。該方法的第一個參數(shù)為要處理的文本字符串,第二個參數(shù)為字節(jié)數(shù)字切片分瘦,在FindAllString中使用的空指針蘸泻。第三個參數(shù)為FindAllString的第二個參數(shù)n,第四個參數(shù)為一個函數(shù),它負(fù)責(zé)把所有的收集嘲玫。
// Find matches in slice b if b is non-nil, otherwise find matches in string s.
func (re *Regexp) allMatches(s string, b []byte, n int, deliver func([]int)) {
var end int
if b == nil {
end = len(s)
} else {
end = len(b)
}
for pos, i, prevMatchEnd := 0, 0, -1; i < n && pos <= end; {
matches := re.doExecute(nil, b, s, pos, re.prog.NumCap, nil)
if len(matches) == 0 {
break
}
accept := true
if matches[1] == pos {
// We've found an empty match.
if matches[0] == prevMatchEnd {
// We don't allow an empty match right
// after a previous match, so ignore it.
accept = false
}
var width int
// TODO: use step()
if b == nil {
_, width = utf8.DecodeRuneInString(s[pos:end])
} else {
_, width = utf8.DecodeRune(b[pos:end])
}
if width > 0 {
pos += width
} else {
pos = end + 1
}
} else {
pos = matches[1]
}
prevMatchEnd = matches[1]
if accept {
deliver(re.pad(matches))
i++
}
}
}
re := regexp.MustCompile("a.")
fmt.Println(re.FindAllString("paranormal", -1))
fmt.Println(re.FindAllString("paranormal", 2))
fmt.Println(re.FindAllString("graal", -1))
fmt.Println(re.FindAllString("none", -1))
Output:
[ar an al]
[ar an]
[aa]
[]
4.ReplaceAllString(...)
替換所有匹配到的結(jié)果為指定的字符串悦施。第二個參數(shù)給出了要替換的值
func (re *Regexp) ReplaceAllString(src, repl string) string {
n := 2
if strings.Contains(repl, "$") {
n = 2 * (re.numSubexp + 1)
}
b := re.replaceAll(nil, src, n, func(dst []byte, match []int) []byte {
return re.expand(dst, repl, nil, src, match)
})
return string(b)
}
replaceAll
func (re *Regexp) replaceAll(bsrc []byte, src string, nmatch int, repl func(dst []byte, m []int) []byte) []byte {
lastMatchEnd := 0 // end position of the most recent match
searchPos := 0 // position where we next look for a match
var buf []byte
var endPos int
if bsrc != nil {
endPos = len(bsrc)
} else {
endPos = len(src)
}
if nmatch > re.prog.NumCap {
nmatch = re.prog.NumCap
}
var dstCap [2]int
for searchPos <= endPos {
a := re.doExecute(nil, bsrc, src, searchPos, nmatch, dstCap[:0])
if len(a) == 0 {
break // no more matches
}
// Copy the unmatched characters before this match.
if bsrc != nil {
buf = append(buf, bsrc[lastMatchEnd:a[0]]...)
} else {
buf = append(buf, src[lastMatchEnd:a[0]]...)
}
// Now insert a copy of the replacement string, but not for a
// match of the empty string immediately after another match.
// (Otherwise, we get double replacement for patterns that
// match both empty and nonempty strings.)
if a[1] > lastMatchEnd || a[0] == 0 {
buf = repl(buf, a)
}
lastMatchEnd = a[1]
// Advance past this match; always advance at least one character.
var width int
if bsrc != nil {
_, width = utf8.DecodeRune(bsrc[searchPos:])
} else {
_, width = utf8.DecodeRuneInString(src[searchPos:])
}
if searchPos+width > a[1] {
searchPos += width
} else if searchPos+1 > a[1] {
// This clause is only needed at the end of the input
// string. In that case, DecodeRuneInString returns width=0.
searchPos++
} else {
searchPos = a[1]
}
}
// Copy the unmatched characters after the last match.
if bsrc != nil {
buf = append(buf, bsrc[lastMatchEnd:]...)
} else {
buf = append(buf, src[lastMatchEnd:]...)
}
return buf
}
5. ReplaceAllStringFunc
func ConvertTabToEmptyString(text string) string {
return TabRegExp.ReplaceAllStringFunc(text, func(matchedStr string) string {
return strings.Replace(matchedStr, " ", " ", -1)
})
}
6.FindAllStringSubmatch
找出有匹配到的字符串子組列表,第二個參數(shù)小于0去团,表示全部匹配
// FindAllStringSubmatch is the 'All' version of FindStringSubmatch; it
// returns a slice of all successive matches of the expression, as defined by
// the 'All' description in the package comment.
// A return value of nil indicates no match.
func (re *Regexp) FindAllStringSubmatch(s string, n int) [][]string {
if n < 0 {
n = len(s) + 1
}
var result [][]string
re.allMatches(s, nil, n, func(match []int) {
if result == nil {
result = make([][]string, 0, startSize)
}
slice := make([]string, len(match)/2)
for j := range slice {
if match[2*j] >= 0 {
slice[j] = s[match[2*j]:match[2*j+1]]
}
}
result = append(result, slice)
})
return result
}
示例:
func AlliRemLinkUrls(articleArr []*Article) {
for _, article := range articleArr {
if strArrArr := LinkUrlRegExp.FindAllStringSubmatch(article.Content, -1); strArrArr != nil {
for _, strArr := range strArrArr {
article.Content = strings.Replace(article.Content, strArr[0], strArr[3], 1)
}
}
}
}