python-正则表达式

#!/usr/local/bin/python3
# -*- coding:utf-8 -*-

import re

# ----------正则表达式符号----------
'''
'.'     默认匹配除\n之外的任意一个字符,若指定flag DOTALL,则匹配任意字符,包括换行
'.+'    匹配任意字符
'^'     匹配字符开头,若指定flags MULTILINE,这种也可以匹配上(r"^a","\nabc\neee",flags=re.MULTILINE)
        注意:在 match中无卵用
'$'     匹配字符结尾,或e.search("foo$","bfoo\nsdfsf",flags=re.MULTILINE).group()也可以
        注:'a$'指待查的字符串的结尾为'a'
'*'     匹配*号前的字符0次或多次,re.findall("ab*","cabb3abcbbac")  结果为['abb', 'ab', 'a']
'+'     匹配前一个字符1次或多次,re.findall("ab+","ab+cd+abb+bba") 结果['ab', 'abb']
'?'     匹配前一个字符1次或0次
'{m}'   匹配前一个字符m次
'{n,m}' 匹配前一个字符n到m次,re.findall("ab{1,3}","abb abc abbcbbb") 结果'abb', 'ab', 'abb']
'|'     匹配|左或|右的字符,re.search("abc|ABC","ABCBabcCD").group() 结果'ABC'
'(...)' 分组匹配,re.search("(abc){2}a(123|456)c", "abcabca456c").group() 结果 abcabca456c


'\A'    只从字符开头匹配,re.search("\Aabc","alexabc") 是匹配不到的
'\Z'    匹配字符结尾,同$
'\d'    匹配数字0-9
'\D'    匹配非数字
'\w'    匹配[A-Za-z0-9]
'\W'    匹配非[A-Za-z0-9]
's'     匹配空白字符、\t、\n、\r , re.search("\s+","ab\tc1\n3").group() 结果 '\t'

'(?P<name>...)' 分组匹配 re.search("(?P<province>[0-9]{4})(?P<city>[0-9]{2})(?P<birthday>[0-9]{4})",\
    "371481199306143242").groupdict("city") 结果{'province': '3714', 'city': '81', 'birthday': '1993'}

------------------------------------------------
最常用的匹配语法:
    re.match 从头开始匹配
    re.search 匹配包含
    re.findall 把所有匹配到的字符放到以列表中的元素返回
    re.splitall 以匹配到的字符当做列表分隔符
    re.sub      匹配字符并替换

几个匹配模式:
    1:re.I(re.IGNORECASE): 忽略大小写(括号内是完整写法,下同)
        案例:print(re.search("[a-z]+", "abcdA", flags = re.I).group())
        结果:abcdA
    2:M(MULTILINE): 多行模式,改变'^'和'$'的行为(参见上图)
    3:S(DOTALL): 点任意匹配模式,改变'.'的行为
        案例:print(re.search(r".+", "\nabc\neee", flags=re.S).group())

'''

# ----------匹配语法----------

# match:从头开始匹配
res = re.match(".+", "Zhangyu520Mahongyan")
print(res.group())  # 结果:Zhangyu520Mahongyan

# search:匹配包含,只返回一次
# M.+n:截取以'M'开头,'n'结尾的字符串(此处的'n'是指待查字符串中的最后一个'n',中间有多少个'n'都不算)
res1 = re.search("M.+n", "Zhangyu521MahongyanMahongyan250n")
print(res1.group()) # 结果:MahongyanMahongyan250n
# M[a-z]+n:截取以'M'开头,第一个'n'结尾的字符串(此处以'[a-z]'代替了'.')
res2 = re.search("M[a-z]+n", "Zhangyu521MahongyanMahongyan250")
print(res2.group()) # 结果:Mahongyan
res3 = re.search("#[a-zA-Z]+#", "Zhangyu#Mahongyan#Zhangyu")
print(res3.group()) # 结果:#Mahongyan#
# 以res3.group()返回的字符串再次进行计算
res4 = re.search("M[a-z]+g", res3.group())
print(res4.group()) # 结果:Mahong

res5 = re.search("aaa?", "aalex")
print(res5.group()) # 结果:aa

res6 = re.search("[0-9]{3}", "aa1x2a345aa")
print(res6.group()) # 结果:345

# findall:把所有匹配到的字符放到以列表中的元素返回,没有group()方法
# {1,3}表示 1-3 个数
res7 = re.findall("[0-9]{1,3}", "aa1x2a3456aa")
print(res7) # 结果:['1', '2', '345', '6']

res8 = re.search("abc|ABC","ABCBabcCD")
print(res8.group()) # 结果:ABC
res9 = re.findall("abc|ABC","ABCBabcCD")
print(res9) # 结果:['ABC', 'abc']

res10 = re.search("abc{2}","alexabccc")
print(res10.group())    # 结果:abcc
res10 = re.search("(abc){2}","alexabcabc")
print(res10.group())    # 结果:abcabc
res11 = re.search("(abc){2}\|","alexabcabc|")
print(res11.group())    # 结果:abcabc|
res12 = re.search("(abc){2}(\|\|\=){2}","alexabcabc||=||=")
print(res12.group())    # 结果:abcabc||=||=

res13 = re.search("\D+","123$-a")
print(res13.group())    # 结果:$-a

res14 = re.search("\w+","1ddDFR23$- \r\na")
print(res14.group())    # 结果:1ddDFR23

res15 = re.search("\W+","1ddDFR23$- \r\na")
print(res15.group())    # 结果:$-

res16 = re.search("\s+","1ddDFR23$- \r\na")
print(res16)    # 结果: \r\n

# split:以匹配到的字符当做列表分隔符
res16 = re.split("[0-9]+", "abc12de3f45GH")
print(res16)    # 结果:['abc', 'de', 'f', 'GH']

# 匹配字符并替换
res17 = re.sub("[0-9]+", "|", "abc12de3f45GH")
print(res17)    # 结果:abc|de|f|GH
# 只替换2次
res18 = re.sub("[0-9]+", "|", "abc12de3f45GH", count=2)
print(res18)

# -----------案例----------
# 分组匹配
a = re.search("(?P<id>[0-9]+)(?P<name>[a-zA-Z]+)", "abcd1234daf@34").groupdict()
print(a["id"])
print(a["name"])

b = re.search("(?P<province>[0-9]{4})(?P<city>[0-9]{2})(?P<birthday>[0-9]{4})","371481199306143242")\
    .groupdict("city")
print(b)

if __name__ == '__main__':
    pass

相关推荐