html转txt,防止XSS漏洞
/** * html转txt * * @param inputString * @param count * @return */ public static String filterHtmlToText(String input) { String htmlStr = input; // 含html标签的字符串 String textStr = ""; java.util.regex.Pattern p_script; java.util.regex.Matcher m_script; java.util.regex.Pattern p_style; java.util.regex.Matcher m_style; java.util.regex.Pattern p_html; java.util.regex.Matcher m_html; try { // 定义script的正则表达式{或<script[^>]*?>[\\s\\S]*?<\\/script> String regEx_script = "<[\\s]*?script[^>]*?>[\\s\\S]*?<[\\s]*?\\/[\\s]*?script[\\s]*?>"; // 定义style的正则表达式{或<style[^>]*?>[\\s\\S]*?<\\/style> String regEx_style = "<[\\s]*?style[^>]*?>[\\s\\S]*?<[\\s]*?\\/[\\s]*?style[\\s]*?>"; String regEx_html = "[<>].*"; // 定义HTML标签的正则表达式 p_script = Pattern.compile(regEx_script, Pattern.CASE_INSENSITIVE); m_script = p_script.matcher(htmlStr); htmlStr = m_script.replaceAll(""); // 过滤script标签 p_style = Pattern.compile(regEx_style, Pattern.CASE_INSENSITIVE); m_style = p_style.matcher(htmlStr); htmlStr = m_style.replaceAll(""); // 过滤style标签 p_html = Pattern.compile(regEx_html, Pattern.CASE_INSENSITIVE); m_html = p_html.matcher(htmlStr); htmlStr = m_html.replaceAll(""); // 过滤html标签 textStr = htmlStr;// replaceAll(" ", ""); textStr = textStr.replaceAll("……", "……"); textStr = textStr.replaceAll(" ", "").trim(); textStr = textStr.replaceAll("\"", ""); textStr = textStr.replaceAll("\r", ""); textStr.replaceAll("\n", ""); } catch (Exception e) { logger.error("filterHtmlToText(String)", e); } return textStr; }
相关推荐
Lzs 2020-10-23
聚合室 2020-11-16
零 2020-09-18
Justhavefun 2020-10-22
jacktangj 2020-10-14
ChaITSimpleLove 2020-10-06
Andrea0 2020-09-18
周游列国之仕子 2020-09-15
afanti 2020-09-16
88234852 2020-09-15
YClimb 2020-09-15
风雨断肠人 2020-09-04
卖口粥湛蓝的天空 2020-09-15
stulen 2020-09-15
pythonxuexi 2020-09-06
abfdada 2020-08-26
梦的天空 2020-08-25