如果我們有如下一段數據,我們想獲取img中src的內容。
<p>橋邊姑娘,我把你放心上</p><p><img src="https://n.sinaimg.cn/news/transform/20171113/puY7-fynship2141885.jpg" _src="https://n.sinaimg.cn/news/transform/20171113/puY7-fynship2141885.jpg" style="width: 550px; height: 317px;"/></p>
具體實現
這裏我們分兩步走,先匹配img標籤,然後再獲取src後面的內容。具體代碼如下:
private static final Pattern IMAGE_TAG_PATTERN = Pattern.compile("<(img|IMG)(.*?)>");
private static Pattern IMAGE_SRC_PATTERN = Pattern.compile("(src|SRC)=\"(.*?)\"");
private static Pattern IMAGE__SRC_PATTERN = Pattern.compile("(_src|_SRC)=\"(.*?)\"");
public static void matchImgSrcTag(String srcStr) {
List<String> targets = new ArrayList<>();
// 針對src標籤
// 先匹配img標籤
Matcher imageTagMatcher = IMAGE_TAG_PATTERN.matcher(srcStr);
while (imageTagMatcher.find()) {
String image = imageTagMatcher.group(2).trim();
// 獲取src後面的內容
Matcher imageSrcMatcher = IMAGE_SRC_PATTERN.matcher(image);
String src = null;
if (imageSrcMatcher.find()) {
src = imageSrcMatcher.group(2).trim();
}
if (src == null || src.isEmpty()) {
continue;
}
System.out.println("src:" + src);
targets.add(src);
}
// 針對_src標籤
while (imageTagMatcher.find()) {
String image = imageTagMatcher.group(2).trim();
Matcher imageSrcMatcher = IMAGE__SRC_PATTERN.matcher(image);
String src = null;
if (imageSrcMatcher.find()) {
src = imageSrcMatcher.group(2).trim();
}
if (src == null || src.isEmpty()) {
continue;
}
System.out.println("_src_:" + src);
targets.add(src);
}
}
測試驗證
public static void main(String[] args) {
String src = "<p>橋邊姑娘,我把你放心上</p><p><img src=\"https://n.sinaimg.cn/news/transform/20171113/puY7-fynship2141885.jpg\" _src=\"https://n.sinaimg.cn/news/transform/20171113/puY7-fynship2141885.jpg\" style=\"width: 550px; height: 317px;\"/></p>";
matchImgSrcTag(src);
}
output:
src:https://n.sinaimg.cn/news/transform/20171113/puY7-fynship2141885.jpg
_src_:https://n.sinaimg.cn/news/transform/20171113/puY7-fynship2141885.jpg