| Author | 
              Topic: Regular expression with examples  |  
           
         |  
        
          
            
              
                
                	
                  
                    
                      Alex_Raj member offline     |  
                    
                      |   |  
                    
                      
                        
                          
                            | posts: | 
                            99 |  
                          
                            | joined: | 
                            05/16/2006 |  
                          
                            | from: | 
                            San Jose, CA |  
                         
                       |  
                    | 
                  | 
                
                  
                    
                       |  
                    
                       |  
                    
                      
                        
                          | Regular expression with examples |  
                        
                          A regular expression is a sequence of characters that define a pattern which is mainly used for:  finding match  -- for a given string, is there any match found for the specific pattern?   validation/assertion -- for a given string, does the string fit the specific pattern? 
 
  Example: Finding match -- the regular expression '[hc]at' matches 'cat' and 'hat' in string "The cat wears a hat."
 
    String regex = "[hc]at"; 
    String input = "The cat wears a hat.";
    System.out.println("Regex: " + regex);
    System.out.println("Input: " + input);
	
    Pattern pattern = Pattern.compile(regex);
    Matcher matcher = pattern.matcher(input);
		
    boolean found = false;
    while (matcher.find()) {
        System.out.println("Found '" + matcher.group() + 
                           "' from " + matcher.start() + 
                           " to " + matcher.end());
        found = true;
    }
    if(!found)
        System.out.println("No match found!");
  output:
 
    Regex: [hc]at
    Input: The cat wears a hat.
    Found 'cat' from 4 to 7
    Found 'hat' from 16 to 19
 
 
  Example: Validation/assertion -- the input telephone number "415-555-1234" is good for the pattern '\d{3}-\d{3}-\d{4}'.
 
    String regex = "\\d{3}-\\d{3}-\\d{4}"; 
    String input = "415-555-1234";
    System.out.println("Regex: " + regex);
    System.out.println("Input: " + input);
  		
    Pattern pattern = Pattern.compile(regex);
    Matcher matcher = pattern.matcher(input);
  		
    if (matcher.matches()) {
        System.out.println("Match!");
    } else {
        System.out.println("Does not match!");
    }
  output:
 
    Regex: \d{3}-\d{3}-\d{4}
    Input: 415-555-1234
    Match!
  |  
                        
                           |  
                        |  
                    
                       |  
                    
                       |  
                    |  
                |  
        
          
            
              
                
                	
                  
                    
                      Alex_Raj member offline     |  
                    
                      |   |  
                    
                      
                        
                          
                            | posts: | 
                            99 |  
                          
                            | joined: | 
                            05/16/2006 |  
                          
                            | from: | 
                            San Jose, CA |  
                         
                       |  
                    | 
                  | 
                
                  
                    
                       |  
                    
                       |  
                    
                      
                        
                          | Special characters in regular expression |  
                        
                          The following characters are metacharacters used in constructing regular expression. They must be escaped by '\' if they are used as their own literal characters.
  Metacharacters
 
\      -- The backslash
[]     -- Matches a single character that is contained within the brackets.
[^ ]   -- Matches a single character that is NOT contained within the brackets.
[ && ] -- Denotes a intersection like [a-g&&[e-z]], which matches 'e', 'f', or 'g'
[ - ]  -- Denotes a range like [a-z]. Otherwise, like [abc-], it a literal character.
^      -- Matches the starting position within the string.
$      -- Matches the ending position within the string.
()     -- Defines a group or scope.
|      -- Logical OR
*      -- Matches the preceding element zero or more times. For example, 'ab*' matches 'a', 'ab', 
       -- 'abb', etc. '(ab)*' matches '', 'ab', 'abab', and so on. '[ab]*' matches '', 'a', 'b',
       -- 'ab', 'ba', 'aabab', and so on.
?      -- Matches the preceding element zero or one time. x? = (x|E)
+      -- Matches the preceding element one and more times. x+ = xx*
{m}    -- Matches the preceding element exactly m times.
{m,}   -- Matches the preceding element at least m times.
{m,n}  -- Matches the preceding element at least m but not more than n times.
 
  Predefined characters
 
 
.  -- Any single character, for example, '.at' matches 'bat' 'cat', or 'hat'. But it is 
   -- literal character within [ ], for example, '[.]at' matches '.at' only.
\d -- A digit: [0-9]
\D -- A non-digit: [^0-9]
\s -- A whitespace: [ \t\n\x0B\f\r]
\S -- A non-whitespace: [^\s]
\w -- A word character: [a-zA-Z0-9_]
\W -- A non-word character: [^\w]
 
 
  |  
                        
                           |  
                        |  
                    
                       |  
                    
                       |  
                    |  
                |  
        
          
            
              
                
                	
                  
                    
                      Alex_Raj member offline     |  
                    
                      |   |  
                    
                      
                        
                          
                            | posts: | 
                            99 |  
                          
                            | joined: | 
                            05/16/2006 |  
                          
                            | from: | 
                            San Jose, CA |  
                         
                       |  
                    | 
                  | 
                
                  
                    
                       |  
                    
                       |  
                    
                      
                        
                          | Lookahead and lookbehind assertions |  
                        
                          
 
a(?=b)	-- positive lookahead:  Is there any item matches 'a' which is followed by 'b'?
a(?!b)	-- negative lookahead:  Is there any item matches 'a' which is NOT followed by 'b'?
(?<=a)b	-- positive lookbehind: Is there any item matches 'b' which is led by 'a'?
(?<!a)b	-- negative lookbehind: Is there any item matches 'b' which is NOT led by 'a'?
 
  The simpler forms:
 
(?=x)	-- positive lookahead:  Is there any item matches '' which is followed by 'x'?
(?!x)	-- negative lookahead:  Is there any item matches '' which is NOT followed by 'x'?
(?<=x)	-- positive lookbehind: Is there any item matches '' which is led by 'x'?
(?<!x)	-- negative lookbehind: Is there any item matches '' which is NOT led by 'x'?
 
 
 
 
  Example: Lookahead assertion -- Find the 'black' color used for 'cat'.
 
 
    String regex = "black(?= cat)"; 
    String input = "The black cat wears a black hat.";
    System.out.println("Regex: " + regex);
    System.out.println("Input: " + input);
	
    Pattern pattern = Pattern.compile(regex);
    Matcher matcher = pattern.matcher(input);
		
    boolean found = false;
    while (matcher.find()) {
        System.out.println("Found '" + matcher.group() + 
                           "' from " + matcher.start() + 
                           " to " + matcher.end());
        found = true;
    }
    if(!found)
        System.out.println("No match found!");
 
  The ouput:
 
 
    Regex: black(?= cat)
    Input: The black cat wears a black hat.
    Found 'black' from 4 to 9
 
 
  Example: Lookahead assertion -- Find the 'black' color used NOT for 'cat'.
 
 
    String regex = "black(?! cat)"; 
    String input = "The black cat wears a black hat.";
    System.out.println("Regex: " + regex);
    System.out.println("Input: " + input);
	
    Pattern pattern = Pattern.compile(regex);
    Matcher matcher = pattern.matcher(input);
		
    boolean found = false;
    while (matcher.find()) {
        System.out.println("Found '" + matcher.group() + 
                           "' from " + matcher.start() + 
                           " to " + matcher.end());
        found = true;
    }
    if(!found)
        System.out.println("No match found!");
 
  The ouput:
 
 
    Regex: black(?! cat)
    Input: The black cat wears a black hat.
    Found 'black' from 22 to 27
 
 
  Example: Lookbehind assertion -- Find the 'cat' with 'black' color.
 
 
    String regex = "(?<=black )cat"; 
    String input = "The black cat and the white cat.";
    System.out.println("Regex: " + regex);
    System.out.println("Input: " + input);
	
    Pattern pattern = Pattern.compile(regex);
    Matcher matcher = pattern.matcher(input);
		
    boolean found = false;
    while (matcher.find()) {
        System.out.println("Found '" + matcher.group() + 
                           "' from " + matcher.start() + 
                           " to " + matcher.end());
        found = true;
    }
    if(!found)
        System.out.println("No match found!");
 
  The ouput:
 
 
    Regex: (?<=black )cat
    Input: The black cat and the white cat.
    Found 'cat' from 10 to 13
 
 
  Example: Lookbehind assertion -- Find the 'cat' with NOT 'black' color.
 
 
    String regex = "(?<!black )cat"; 
    String input = "The black cat and the white cat.";
    System.out.println("Regex: " + regex);
    System.out.println("Input: " + input);
	
    Pattern pattern = Pattern.compile(regex);
    Matcher matcher = pattern.matcher(input);
		
    boolean found = false;
    while (matcher.find()) {
        System.out.println("Found '" + matcher.group() + 
                           "' from " + matcher.start() + 
                           " to " + matcher.end());
        found = true;
    }
    if(!found)
        System.out.println("No match found!");
 
  The ouput:
 
 
    Regex: (?<!black )cat
    Input: The black cat and the white cat.
    Found 'cat' from 28 to 31
  |  
                        
                           |  
                        |  
                    
                       |  
                    
                       |  
                    |  
                |  
        
          
            
              
                
                	
                  
                    
                      Alex_Raj member offline     |  
                    
                      |   |  
                    
                      
                        
                          
                            | posts: | 
                            99 |  
                          
                            | joined: | 
                            05/16/2006 |  
                          
                            | from: | 
                            San Jose, CA |  
                         
                       |  
                    | 
                  | 
                
                  
                    
                       |  
                    
                       |  
                    
                      
                        
                          | Lookahead assertion examples |  
                        
                          Example: Find empty string '' which is immediately followed by lower-case character
 
    String regex = "(?=[a-z])"; 
    String input = "123a567b9";
    System.out.println("Regex: " + regex);
    System.out.println("Input: " + input);
	
    Pattern pattern = Pattern.compile(regex);
    Matcher matcher = pattern.matcher(input);
		
    boolean found = false;
    while (matcher.find()) {
        System.out.println("Found '" + matcher.group() + 
                           "' from " + matcher.start() + 
                           " to " + matcher.end());
        found = true;
    }
    if(!found)
        System.out.println("No match found!");
 
  The ouput:
 
 
    Regex: (?=[a-z])
    Input: 123a567b9
    Found '' from 3 to 3
    Found '' from 7 to 7
 
 
  Example: Find string which 1) starts with '' and 2) is immediately followed by lower-case character and 3) has total length of 2 any characters
 
    String regex = "(?=[a-z]).{2}"; 
    String input = "123a567b9";
 
  The ouput:
 
 
    Regex: (?=[a-z]).{2}
    Input: 123a567b9
    Found 'a5' from 3 to 5
    Found 'b9' from 7 to 9
 
 
  Example: Find string which 1) starts with '' and 2) is immediately followed by lower-case character and 3) has total length of 3 any characters
 
    String regex = "(?=[a-z]).{3}"; 
    String input = "123a567b9";
 
  The ouput:
 
 
    Regex: (?=[a-z]).{3}
    Input: 123a567b9
    Found 'a56' from 3 to 6
 
  |  
                        
                           |  
                        |  
                    
                       |  
                    
                       |  
                    |  
                |  
        
          
            
              
                
                	
                  
                    
                      Alex_Raj member offline     |  
                    
                      |   |  
                    
                      
                        
                          
                            | posts: | 
                            99 |  
                          
                            | joined: | 
                            05/16/2006 |  
                          
                            | from: | 
                            San Jose, CA |  
                         
                       |  
                    | 
                  | 
                
                  
                    
                       |  
                    
                       |  
                    
                      
                        
                          | Regular expression examples |  
                        
                          Example: Ipv4 address
 
String REGEX_IPv4 = "(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])" + 
                    "\\." + 
                    "(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)" +
                    "\\." +
                    "(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)" +
                    "\\." +
                    "(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[0-9])";
 
 
  Example: Ipv6 address
 
String REGEX_IPv6 = "(?i)" + "[[a-f][0-9]:/.+*%\\[\\]]+";
 
 
 
  Example: Email address
 
String REGEX_EMAIL = "[a-zA-Z0-9_+.%\\-]{1,256}" +
                     "\\@" +
                     "[a-zA-Z0-9][a-zA-Z0-9\\-]{0,64}" +
                     "(" + 
                     "\\.[a-zA-Z0-9][a-zA-Z0-9\\-]{0,25}" +
                     ")+";
 
 
  Example: WEB URL
 
String WEB_URL = "(?i)" + 
                 "\\b(https?|ftp|file)://" + 
                 "[a-zA-Z0-9+&@#/%?=~_|!:,.;\\-]*[a-zA-Z0-9+&@#/%=~_|\\-]";
 
  Example: Password strength validity
  The password strength criteria requires:
 
8+ characters length 
1+ Special Character
1+ letters in Upper Case
2+ numerals (0-9)
3+ letters in Lower Case
 
 
 
String REGEX_PWD = 
  "^" +                          // starting      
  "" +                           // empty string   
  "(?=.*[\\W])" +                // followed by at least 1 special char   
  "(?=.*[A-Z])" +                // followed by at least 1 upper-case char
  "(?=.*[0-9].*[0-9])" +         // followed by at least 2 digits
  "(?=.*[a-z].*[a-z].*[a-z])" +  // followed by at least 3 lower-case chars
  ".{8,}" +                      // followed by any (.) chars with total length of at least 8
  "$";                           // ending
 
  |  
                        
                           |  
                        |  
                    
                       |  
                    
                       |  
                    |  
                |  
        
          
            
              
                
                	
                  
                    
                      Alex_Raj member offline     |  
                    
                      |   |  
                    
                      
                        
                          
                            | posts: | 
                            99 |  
                          
                            | joined: | 
                            05/16/2006 |  
                          
                            | from: | 
                            San Jose, CA |  
                         
                       |  
                    | 
                  | 
                
                  
                    
                       |  
                    
                       |  
                    
                      
                        
                          | matches() vs find() |  
                        
                           matches() return true -- if the whole string matches the pattern  find() return true -- if any substring matches the pattern
  Exmaple
 
 
	Pattern pattern = Pattern.compile("\\d+");
	System.out.println("Pattern: '\\d+'");
	String input = "123A45B6";
	System.out.println("Inout 1: " + input);
	System.out.println("--- Result #1 -------");
	Matcher matcher = pattern.matcher(input);			
	//System.out.println("matches()? : " + matcher.matches()); // matcher.matches() will spoil the cursor
	while(matcher.find()){
		System.out.println("found at   : " + matcher.start());
		System.out.println("found end  : " + matcher.end());
		System.out.println("found what : " + matcher.group());
	}
	System.out.println("matches()? : " + matcher.matches());
	
	System.out.println("\n=============================\n");
	
	pattern = Pattern.compile("\\d+");
	System.out.println("Pattern: '\\d+'");
	input = "123456";
	System.out.println("Inout 2: " + input);
	System.out.println("--- Result #2 -------");
	matcher = pattern.matcher(input);			
	//System.out.println("matches()? : " + matcher.matches()); // matcher.matches() will spoil the cursor
	while(matcher.find()){
		System.out.println("found at   : " + matcher.start());
		System.out.println("found end  : " + matcher.end());
		System.out.println("found what : " + matcher.group());
	}
	System.out.println("matches()? : " + matcher.matches());
  Output:
 
Pattern: '\d+'
Inout 1: 123A45B6
--- Result #1 -------
found at   : 0
found end  : 3
found what : 123
found at   : 4
found end  : 6
found what : 45
found at   : 7
found end  : 8
found what : 6
matches()? : false
=============================
Pattern: '\d+'
Inout 2: 123456
--- Result #2 -------
found at   : 0
found end  : 6
found what : 123456
matches()? : true
 
  |  
                        
                           |  
                        |  
                    
                       |  
                    
                       |  
                    |  
                |  
      |