Python                                                          Home : www.sharetechnote.com Python - Regular Expression    Regular Expression is a kind of standard defining a pattern for a string. This is most widely used to find a substring using a pattern. You can easily google a lot of information on Regular Expression (I think Wikipedia : Regular Expression would be pretty good start), but it would be almost impossible to understand the details without practicing on your own. I will just keep posting a bunch of examples as my own cheatsheet and practice. I hope this helps you as well.       Example 1 : '\d' findall() ===================================================   import re   p = re.compile('\d') print("re.compile('\d')", "=" , p.findall('1 little 10 little 1000 little indians'))   p = re.compile('\d.') print("re.compile('\d.')", "=" ,p.findall('1 little 10 little 1000 little indians'))   p = re.compile('\d..') print("re.compile('\d..')", "=" ,p.findall('1 little 10 little 1000 little indians'))   p = re.compile('\d{2}') print("re.compile('\d{2}')", "=" ,p.findall('1 little 10 little 1000 little indians'))   p = re.compile('\d{3}') print("re.compile('\d{3}')", "=" ,p.findall('1 little 10 little 1000 little indians'))   p = re.compile('\d+') print("re.compile('\d+')", "=" ,p.findall('1 little 10 little 1000 little indians'))   p = re.compile('\d?') print("re.compile('\d?')", "=" ,p.findall('1 little 10 little 1000 little indians'))   p = re.compile('\d*') print("re.compile('\d*')", "=" ,p.findall('1 little 10 little 1000 little indians'))   Result :----------------------------------------------------------------------   re.compile('\d') = ['1', '1', '0', '1', '0', '0', '0'] re.compile('\d.') = ['1 ', '10', '10', '00'] re.compile('\d..') = ['1 l', '10 ', '100', '0 l'] re.compile('\d{2}') = ['10', '10', '00'] re.compile('\d{3}') = ['100'] re.compile('\d+') = ['1', '10', '1000'] re.compile('\d?') = ['1', '', '', '', '', '', '', '', '', '1', '0', '', '', '', '', '', '', '', '', '1', '0', '0', '0', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''] re.compile('\d*') = ['1', '', '', '', '', '', '', '', '', '10', '', '', '', '', '', '', '', '', '1000', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '']     Example 2 : '^[ ... ]' match() ===================================================   # ^ : start # \$ : end #[...] : group # * : zero or more characters   import re   p = re.compile('^[a-z]*\$') print("re.compile('^[a-z]*\$');p.match('helloworld')", "\n\t" , p.match('helloworld'))   p = re.compile('^[a-z]*\$') print("re.compile('^[a-z]*\$');p.match('hello world')", "\n\t"  , p.match('hello world'))   p = re.compile('^[a-z\s]*\$') print("re.compile('^[a-z\s]*\$');p.match('hello world')", "\n\t" , p.match('hello world'))   p = re.compile('^[a-z]*\$') print("re.compile('^[a-z]*\$');p.match('HelloWorld')", "\n\t"  , p.match('HelloWorld'))   p = re.compile('^[a-zA-Z]*\$') print("re.compile('^[a-zA-Z]*\$');p.match('HelloWorld')", "\n\t"  , p.match('HelloWorld'))   p = re.compile('^[a-zA-Z\s]*\$') print("re.compile('^[a-zA-Z\s]*\$');p.match('Hello World')", "\n\t"  , p.match('Hello World'))   p = re.compile('^[a-zA-Z\s]*\$') print("re.compile('^[a-zA-Z\s]*\$');p.match('Hello1234 World')", "\n\t"  , p.match('Hello1234 World'))   p = re.compile('^[a-zA-Z0-9\s]*\$') print("re.compile('^[a-zA-Z0-9\s]*\$');p.match('Hello1234 World')", "\n\t"  , p.match('Hello1234 World'))   p = re.compile('^[a-zA-Z0-9\s]*\$') print("re.compile('^[a-zA-Z0-9\s]*\$');p.match('###Hello1234 World')", "\n\t"  , p.match('###Hello1234 World'))   p = re.compile('[a-zA-Z0-9#\s]*\$') print("re.compile('[a-zA-Z0-9#\s]*\$');p.match('###Hello1234 World')", "\n\t"  , p.match('###Hello1234 World'))   p = re.compile('.*[a-zA-Z0-9\s]*\$') print("re.compile('.*[a-zA-Z0-9\s]*\$');p.match('###Hello1234 World')", "\n\t"  , p.match('###Hello1234 World'))     Result :----------------------------------------------------------------------   // ^[a-z]*\$ <-- alphabet only and all lower case any number of characters, no space //'helloworld'  match this criteria re.compile('^[a-z]*\$');p.match('helloworld')      <_sre.SRE_Match object; span=(0, 10), match='helloworld'>   // ^[a-z]*\$ <-- alphabet only and  all lower case, any number of characters, no space //'hello world' doesn't match this criteria since there is a space in it re.compile('^[a-z]*\$');p.match('hello world')      None   // ^[a-z\s]*\$ <-- alphabet only and all lower case, any number of characters, any number of white space //'hello world' match this criteria re.compile('^[a-z\s]*\$');p.match('hello world')      <_sre.SRE_Match object; span=(0, 11), match='hello world'>   // ^[a-z]*\$ <-- alphabet only and all lower case any number of characters, no space //'HelloWorld' doesn't match this criteria since it has capital letters in it. re.compile('^[a-z]*\$');p.match('HelloWorld')      None   // ^[a-zA-Z]*\$ <-- alphabet only and lower or upper case, any number of characters, no space //'HelloWorld' match this criteria. re.compile('^[a-zA-Z]*\$');p.match('HelloWorld')      <_sre.SRE_Match object; span=(0, 10), match='HelloWorld'>   // ^[a-zA-Z\s]*\$ <-- alphabet only and lower or upper case, any number of characters, any number of space //'Hello World' match this criteria re.compile('^[a-zA-Z\s]*\$');p.match('Hello World')      <_sre.SRE_Match object; span=(0, 11), match='Hello World'>   // ^[a-zA-Z\s]*\$ <-- alphabet only and lower or upper case, any number of characters, any number of space //'Hello1234 World' doesn't match this criteria because it has numbers in it. re.compile('^[a-zA-Z\s]*\$');p.match('Hello1234 World')      None   // ^[a-zA-Z0-9\s]*\$ <-- alphabet only and lower or upper case, numbers, any number of characters, //any number of space //'Hello1234 World' match this criteria . re.compile('^[a-zA-Z0-9\s]*\$');p.match('Hello1234 World')      <_sre.SRE_Match object; span=(0, 15), match='Hello1234 World'>   // ^[a-zA-Z0-9\s]*\$ <-- alphabet only and lower or upper case, numbers, any number of characters, //any number of space //'###Hello1234 World' doesn't match this criteria because it has non-alphanet characters (#) in it re.compile('^[a-zA-Z0-9\s]*\$');p.match('###Hello1234 World')      None   // ^[a-zA-Z0-9#\s]*\$ <-- alphabet only and lower or upper case, numbers, any number of characters, //any number of #,any number of space //'###Hello1234 World' match this criteria re.compile('[a-zA-Z0-9#\s]*\$');p.match('###Hello1234 World')      <_sre.SRE_Match object; span=(0, 18), match='###Hello1234 World'>   //.*[a-zA-Z0-9#\s]*\$ <-- ignore any number of characters before the specified pattern is found. //alphabet only and lower or upper case, numbers, any number of characters, any number of #,any number of space //'###Hello1234 World' match this criteria re.compile('.*[a-zA-Z0-9\s]*\$');p.match('###Hello1234 World')      <_sre.SRE_Match object; span=(0, 18), match='###Hello1234 World'>       Reference :