正则表达式
正则表达式基础:importjava.util.regex.Matcher;importjava.util.regex.Pattern;publicclassTest{//.1个字符//*0个或多个//+1个或多个//?0个或1个//[]匹配一个字符//\s空白字符\S非空白字符//\w单词[a-zA-Z_0-9]\W非单词//\d数字\D非数字//\b单词边界\B非单词边界//^行的开头;在[]内表示取反//$行的结尾;publicstaticvoidmain(String[]args){//简单认识正则表达式/*p("abc".matches("..."));p("a8729a".replaceAll("\\d","-"));//Pattern就是编译好后的模式Patternp=Pattern.compile("[a-z]{3}");//Matcher是模式匹配字符串后产生的结果Matcherm=p.matcher("fgh");p(m.matches());p("fgha".matches("[a-z]{3}"));*///认识.*+?/*p("a".matches("."));//truep("aa".matches("aa"));//truep("aaaa".matches("a*"));//truep("aaaa".matches("a+"));//truep("".matches("a*"));//truep("aaaa".matches("a?"));//falsep("".matches("a?"));//truep("a".matches("a?"));//truep("43243243242".matches("\\d{3,100}"));//truep("192.168.0.aaa".matches("\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}"));//falsep("192".matches("[0-2][0-9][0-9]"));//true*///认识[]:一个[]内只去匹配一个字符/*p("a".matches("[abc]"));//truep("a".matches("[^abc]"));//^取反falsep("A".matches("[a-zA-Z]"));//truep("A".matches("[a-z]|[A-Z]"));//同上truep("A".matches("[a-z[A-Z]]"));//同上truep("R".matches("[A-Z&&[RFG]]"));//true*///认识\s\w\d/*p("\n\r\t".matches("\\s{4}"));//truep("".matches("\\S"));//falsep("a_8".matches("\\w{3}"));//truep("abc888&^%".matches("[a-z]{1,3}\\d+[&^#%]+"));//truep("\\".matches("\\\\"));//true*///认识^\b$/*p("hellosir".matches("^h.*"));//truep("hellosir".matches(".*ir$"));//truep("hellosir".matches("^h[a-z]{1,3}o\\b.*"));//\b单词边界truep("hellosir".matches("^h[a-z]{1,3}o\\b.*"));//false//空白行p("\n".matches("^[\\s&&[^\\n]]*\\n$"));*///Email//p("[email protected]".matches("\\w{1,}@\\w{1,}(\\.com)$"));//URL//p("http://www.baidu.com/".matches("^((http|https|ftp)://)[\\w-\\.]{1,}\\.(com|cn|net|org|com\\.cn)(/?|/[\\w-\\./]*)"));//查找/*Patternp=Pattern.compile("\\d{3,5}");Strings="123-33333-444-00";Matcherm=p.matcher(s);//matches()总是匹配完整字符串p(m.matches());m.reset();//find()从左到右找子串p(m.find());p(m.start()+"-"+m.end());p(m.find());p(m.start()+"-"+m.end());p(m.find());p(m.start()+"-"+m.end());p(m.find());//p(m.start()+"-"+m.end());//lookingAt()总从头上开始找子串p(m.lookingAt());//truep(m.lookingAt());//true*///查找/*Patternp=Pattern.compile("java");Matcherm=p.matcher("javaJavaJAVaIloveJAVAyouhateJavafasdfas");while(m.find()){p(m.group());}*///替换/*Patternp=Pattern.compile("java");//编译时忽略大小写Matcherm=p.matcher("javaJavaJAVaIloveJAVAyouhateJavafasdfas");p(m.replaceAll("JAVA"));*///替换/*Patternp=Pattern.compile("java",Pattern.CASE_INSENSITIVE);//编译时忽略大小写Matcherm=p.matcher("javaJavaJAVaIloveJAVAyouhateJavafasdfas");StringBuffersb=newStringBuffer();inti=0;while(m.find()){i++;if(i%2==0){m.appendReplacement(sb,"java");}else{m.appendReplacement(sb,"JAVA");}}m.appendTail(sb);p(sb);*///正则表达式分组group/*Patternp=Pattern.compile("(\\d{3,5})([a-z]{2})");Strings="123aa-34345bb-234cc-00";Matcherm=p.matcher(s);while(m.find()){p(m.group(1));//打印匹配到的第一组}*/}publicstaticvoidp(Objecto){System.out.println(o);}}抓去网页中的Email:importjava.io.BufferedReader;importjava.io.FileReader;importjava.util.regex.Matcher;importjava.util.regex.Pattern;publicclassEmailSpider{publicstaticvoidmain(String[]args)throwsException{BufferedReaderbr=newBufferedReader(newFileReader("C:\\index.html"));Stringline="";while((line=br.readLine())!=null){parse(line);}}privatestaticvoidparse(Stringline){Patternp=Pattern.compile("\\w{1,}@\\w{1,}(\\.com)$");Matcherm=p.matcher(line);while(m.find()){System.out.println(m.group());}}}