本文介绍了正则表达式忽略< script>标签的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧! 问题描述 29岁程序员,3月因学历无情被辞! 我很抱歉,因为我对Regex知之甚少,甚至都不明白这个正则表达式在做什么(我没有写它 - source ),除了搜索特定术语的事实以便突出显示。 这是正则表达式: /(\ b $ | $ term $ b 问题是我需要确保它与< script> 和< / script> 标签。现在我知道脚本标记的写法有很多种变化,但我真正需要的是忽略< script 和 / script>< / code>考虑到脚本和< 之间的可能空格<脚本或 /脚本> 。 是否有人能够在此修改它办法?我将通知插件的作者谁写这个reg-ex包括在未来的版本。 编辑:这是它的起源函数: $ b $ function relevanssi_highlight_terms($ excerpt,$ query){ $ type = get_option(relevanssi_highlight); if(none== $ type){ return $ excerpt; } switch($ type){ casemark://谢谢Jeff Byrnes $ start_emp =< mark>; $ end_emp =< / mark>; 休息; 案例strong: $ start_emp =< strong>; $ end_emp =< / strong>; 休息; caseem: $ start_emp =< em>; $ end_emp =< / em>; 休息; casecol: $ col = get_option(relevanssi_txt_col); if(!$ col)$ col =#ff0000; $ start_emp =< span style ='color:$ col'>; $ end_emp =< / span>; 休息; casebgcol: $ col = get_option(relevanssi_bg_col); if(!$ col)$ col =#ff0000; $ start_emp =< span style ='background-color:$ col'>; $ end_emp =< / span>; 休息; casecss: $ css = get_option(relevanssi_css); if(!$ css)$ css =color:#ff0000; $ start_emp =< span style ='$ css'>; $ end_emp =< / span>; 休息; caseclass: $ css = get_option(relevanssi_class); if(!$ css)$ css =relevanssi-query-term; $ start_emp =< span class ='$ css'>; $ end_emp =< / span>; 休息; 默认值:返回$摘录; } $ start_emp_token =* [/; $ end_emp_token =\] *; if(function_exists('mb_internal_encoding')) mb_internal_encoding(UTF-8); $ terms = array_keys(relevanssi_tokenize($ query,$ remove_stopwords = true)); $ phrases = relevanssi_extract_phrases(stripslashes($ query)); $ non_phrase_terms = array(); foreach($ phrase作为$ phrase){ $ phrase_terms = array_keys(relevanssi_tokenize($ phrase,false)); foreach($ terms as $ term){ if(!in_array($ term,$ phrase_terms)){ $ non_phrase_terms [] = $ term; } } $ terms = $ non_phrase_terms; $ terms [] = $ phrase; } usort($ terms,'relevanssi_strlen_sort'); get_option('relevanssi_word_boundaries','on')=='on'? $ word_boundaries = true:$ word_boundaries = false; foreach($ terms as $ term){ $ pr_term = preg_quote($ term,'/');如果($ word_boundaries){ $ excerpt = preg_replace(/(\ b $ pr_term | $ pr_term\b)(?!([^ >)/ iu,$ start_emp_token。'\\1'。$ end_emp_token,$ excerpt); } else { $ excerpt = preg_replace(/($ pr_term)(?!([^ >)/ iu,$ start_emp_token。'\\ \\\1'。$ end_emp_token,$ excerpt); } //感谢http://pureform.wordpress.com/2008/01/04/matching-a-word-characters-outside-of-html-tags/ } $摘录= relevanssi_remove_nested_highlights($摘录,$ start_emp_token,$ end_emp_token); $ excerpt = str_replace($ start_emp_token,$ start_emp,$ excerpt); $ excerpt = str_replace($ end_emp_token,$ end_emp,$ excerpt); $ excerpt = str_replace($ end_emp。$ start_emp,,$ excerpt); if(function_exists('mb_ereg_replace')){ $ pattern = $ end_emp。 '\s *'。 $ start_emp; $ excerpt = mb_ereg_replace($ pattern,,$ excerpt); } return $摘录; 解决方案既然 lookbehind assertions 需要固定长度,你不能用它们来寻找前面的在搜索到的字词之前< script> 标记某处。 因此,在您替换所有字词的所有匹配项后,您需要第二次回复才能恢复出现在字词中的修饰字词 < script> 标记。 #提供一些示例数据 $摘录='我的名字是bob! 鲍勃很酷。 < script type =text / javascript> var bobby =即使您已经在script标签内标记了术语< em> bob< / em>,它也能正常工作。; alert(bobby); var bob = 5; < / script> 是的,bob这个词很好。 $ start_emp_token ='< em>'; $ end_emp_token ='< / em>'; $ pr_term ='bob'; #取代所有内容(不在标签中) $ excerpt = preg_replace(/(\ b $ pr_term | $ pr_term \b)(?!([^ #取消一些替换 $ excerpt = preg_replace_callback('#(< script(?:[^>] *)>)(。*?)(< / script>)#是', create_function('$ matches','global $ start_emp_token,$ end_emp_token,$ pr_term; return $ matches [1] .str_replace ($ start_emp_token $ pr_term $ end_emp_token,$ pr_term,$ matches [2])。$ matches [3];'), $ excerpt); var_dump($ excerpt); 以上代码产生以下输出: $ b< em> bob< / em>很酷。 < script type =text / javascript> var bobby =即使您已经标记术语< em> bob< / em> ; alert(bobby); var bob = 5; < / script> 是的,单词< em> bob< / em>正常工作。 I apologise as I have very little knowledge about Regex and I don't even understand exactly what this regex is doing (I didn't write it - source) apart from the fact it searches for a certain term so that it can be highlighted.Here is the Regex:/(\b$term|$term\b)(?!([^<]+)?>)/iuThe problem is I need to make sure it doesn't match anything between <script> and </script> tags. Now I know there are many variations of how a script tag can be written but really all I need it to do is ignore any text between <script and /script> taking into account possible whitespace between script and < like < script or /script >.Is anyone able to modify it in this way? I will notify the plugin's author who wrote this reg-ex for inclusion in future releases.Edit: Here is the function it originates from:function relevanssi_highlight_terms($excerpt, $query) { $type = get_option("relevanssi_highlight"); if ("none" == $type) { return $excerpt; } switch ($type) { case "mark": // thanks to Jeff Byrnes $start_emp = "<mark>"; $end_emp = "</mark>"; break; case "strong": $start_emp = "<strong>"; $end_emp = "</strong>"; break; case "em": $start_emp = "<em>"; $end_emp = "</em>"; break; case "col": $col = get_option("relevanssi_txt_col"); if (!$col) $col = "#ff0000"; $start_emp = "<span style='color: $col'>"; $end_emp = "</span>"; break; case "bgcol": $col = get_option("relevanssi_bg_col"); if (!$col) $col = "#ff0000"; $start_emp = "<span style='background-color: $col'>"; $end_emp = "</span>"; break; case "css": $css = get_option("relevanssi_css"); if (!$css) $css = "color: #ff0000"; $start_emp = "<span style='$css'>"; $end_emp = "</span>"; break; case "class": $css = get_option("relevanssi_class"); if (!$css) $css = "relevanssi-query-term"; $start_emp = "<span class='$css'>"; $end_emp = "</span>"; break; default: return $excerpt; } $start_emp_token = "*[/"; $end_emp_token = "\]*"; if ( function_exists('mb_internal_encoding') ) mb_internal_encoding("UTF-8"); $terms = array_keys(relevanssi_tokenize($query, $remove_stopwords = true)); $phrases = relevanssi_extract_phrases(stripslashes($query)); $non_phrase_terms = array(); foreach ($phrases as $phrase) { $phrase_terms = array_keys(relevanssi_tokenize($phrase, false)); foreach ($terms as $term) { if (!in_array($term, $phrase_terms)) { $non_phrase_terms[] = $term; } } $terms = $non_phrase_terms; $terms[] = $phrase; } usort($terms, 'relevanssi_strlen_sort'); get_option('relevanssi_word_boundaries', 'on') == 'on' ? $word_boundaries = true : $word_boundaries = false; foreach ($terms as $term) { $pr_term = preg_quote($term, '/'); if ($word_boundaries) { $excerpt = preg_replace("/(\b$pr_term|$pr_term\b)(?!([^<]+)?>)/iu", $start_emp_token . '\\1' . $end_emp_token, $excerpt); } else { $excerpt = preg_replace("/($pr_term)(?!([^<]+)?>)/iu", $start_emp_token . '\\1' . $end_emp_token, $excerpt); } // thanks to http://pureform.wordpress.com/2008/01/04/matching-a-word-characters-outside-of-html-tags/ } $excerpt = relevanssi_remove_nested_highlights($excerpt, $start_emp_token, $end_emp_token); $excerpt = str_replace($start_emp_token, $start_emp, $excerpt); $excerpt = str_replace($end_emp_token, $end_emp, $excerpt); $excerpt = str_replace($end_emp . $start_emp, "", $excerpt); if (function_exists('mb_ereg_replace')) { $pattern = $end_emp . '\s*' . $start_emp; $excerpt = mb_ereg_replace($pattern, " ", $excerpt); } return $excerpt;} 解决方案 Since lookbehind assertions need to be fixed in length, you cannot use them to look for a preceding <script> tag somewhere before the searched term.So, after you replace all the occurrences of the desired term, you need a second pass to revert back those occurrences of the modified term that appear to be inside a <script> tag.# provide some sample data$excerpt = 'My name is bob!And bob is cool.<script type="text/javascript">var bobby = "It works fine even if you already have tagged the term <em>bob</em> inside the script tag.";alert(bobby);var bob = 5;</script>Yeah, the word "bob" works fine.';$start_emp_token = '<em>';$end_emp_token = '</em>';$pr_term = 'bob';# replace everything (not in a tag)$excerpt = preg_replace("/(\b$pr_term|$pr_term\b)(?!([^<]+)?>)/iu", $start_emp_token . '$1' . $end_emp_token, $excerpt);# undo some of the replacements$excerpt = preg_replace_callback('#(<script(?:[^>]*)>)(.*?)(</script>)#is', create_function( '$matches', 'global $start_emp_token, $end_emp_token, $pr_term; return $matches[1].str_replace("$start_emp_token$pr_term$end_emp_token", "$pr_term", $matches[2]).$matches[3];' ), $excerpt);var_dump($excerpt);The code above produces the following output:string(271) "My name is <em>bob</em>!And <em>bob</em> is cool.<script type="text/javascript">var bobby = "It works fine even if you already have tagged the term <em>bob</em> inside the script tag.";alert(bobby);var bob = 5;</script>Yeah, the word "<em>bob</em>" works fine." 这篇关于正则表达式忽略< script>标签的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持! 上岸,阿里云! 08-12 13:08