本文整理了Java中org.htmlcleaner.HtmlCleaner.getOpenTags()
方法的一些代码示例,展示了HtmlCleaner.getOpenTags()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。HtmlCleaner.getOpenTags()
方法的具体详情如下:
包路径:org.htmlcleaner.HtmlCleaner
类名称:HtmlCleaner
方法名:getOpenTags
暂无
代码示例来源:origin: fivesmallq/web-data-extractor
private boolean isAllowedInLastOpenTag(BaseToken token, CleanTimeValues cleanTimeValues) {
TagPos last = getOpenTags(cleanTimeValues).getLastTagPos();
if (last != null) {
if (last.info != null) {
return last.info.allowsItem(token);
}
}
return true;
}
代码示例来源:origin: amplafi/htmlcleaner
/**
* Checks if open fatal tag is missing if there is a fatal tag for
* the specified tag.
* @param tag
*/
private boolean isFatalTagSatisfied(TagInfo tag, CleanTimeValues cleanTimeValues) {
if (tag != null) {
String fatalTagName = tag.getFatalTag();
return fatalTagName == null ? true : getOpenTags(cleanTimeValues).tagExists(fatalTagName);
}
return true;
}
代码示例来源:origin: amplafi/htmlcleaner
private boolean isAllowedInLastOpenTag(BaseToken token, CleanTimeValues cleanTimeValues) {
TagPos last = getOpenTags(cleanTimeValues).getLastTagPos();
if (last != null) {
if (last.info != null) {
return last.info.allowsItem(token);
}
}
return true;
}
代码示例来源:origin: amplafi/htmlcleaner
private void saveToLastOpenTag(List nodeList, Object tokenToAdd, CleanTimeValues cleanTimeValues) {
TagPos last = getOpenTags(cleanTimeValues).getLastTagPos();
if ( last != null && last.info != null && last.info.isIgnorePermitted() ) {
return;
}
TagPos rubbishPos = getOpenTags(cleanTimeValues).findTagToPlaceRubbish();
if (rubbishPos != null) {
TagNode startTagToken = (TagNode) nodeList.get(rubbishPos.position);
startTagToken.addItemForMoving(tokenToAdd);
}
}
代码示例来源:origin: amplafi/htmlcleaner
/**
* Close all unclosed tags if there are any.
*/
private void closeAll(List nodeList, CleanTimeValues cleanTimeValues) {
TagPos firstTagPos = getOpenTags(cleanTimeValues).findFirstTagPos();
for (TagPos pos : getOpenTags(cleanTimeValues).list) {
properties.fireHtmlError(true, (TagNode)nodeList.get(pos.position), ErrorType.UnclosedTag);
}
if (firstTagPos != null) {
closeSnippet(nodeList, firstTagPos, null, cleanTimeValues);
}
}
代码示例来源:origin: fivesmallq/web-data-extractor
private void saveToLastOpenTag(List nodeList, Object tokenToAdd, CleanTimeValues cleanTimeValues) {
TagPos last = getOpenTags(cleanTimeValues).getLastTagPos();
//
// If we can simply ignore this token, then we remove it.
//
if (last != null && last.info != null && last.info.isIgnorePermitted()) {
return;
}
//
// Find an open tag where the token can live
//
TagPos rubbishPos = getOpenTags(cleanTimeValues).findTagToPlaceRubbish();
if (rubbishPos != null) {
TagNode startTagToken = (TagNode) nodeList.get(rubbishPos.position);
if (startTagToken != null) {
startTagToken.addItemForMoving(tokenToAdd);
return;
}
}
}
代码示例来源:origin: fivesmallq/web-data-extractor
/**
* Close all unclosed tags if there are any.
*/
private void closeAll(List nodeList, CleanTimeValues cleanTimeValues) {
TagPos firstTagPos = getOpenTags(cleanTimeValues).findFirstTagPos();
for (TagPos pos : getOpenTags(cleanTimeValues).list) {
if (Thread.currentThread().isInterrupted()) {
handleInterruption();
return;
}
properties.fireHtmlError(true, (TagNode) nodeList.get(pos.position), ErrorType.UnclosedTag);
}
if (firstTagPos != null) {
closeSnippet(nodeList, firstTagPos, null, cleanTimeValues);
}
}
代码示例来源:origin: amplafi/htmlcleaner
/**
* Check if specified tag requires parent tag, but that parent
* tag is missing in the appropriate context.
* @param tag
*/
private boolean mustAddRequiredParent(TagInfo tag, CleanTimeValues cleanTimeValues) {
if (tag != null) {
String requiredParent = tag.getRequiredParent();
if (requiredParent != null) {
String fatalTag = tag.getFatalTag();
int fatalTagPositon = -1;
if (fatalTag != null) {
TagPos tagPos = getOpenTags(cleanTimeValues).findTag(fatalTag);
if (tagPos != null) {
fatalTagPositon = tagPos.position;
}
}
// iterates through the list of open tags from the end and check if there is some higher
ListIterator it = getOpenTags(cleanTimeValues).list.listIterator( getOpenTags(cleanTimeValues).list.size() );
while ( it.hasPrevious() ) {
TagPos currTagPos = (TagPos) it.previous();
if (tag.isHigher(currTagPos.name)) {
return currTagPos.position <= fatalTagPositon;
}
}
return true;
}
}
return false;
}
代码示例来源:origin: fivesmallq/web-data-extractor
/**
* Checks if open fatal tag is missing if there is a fatal tag for
* the specified tag.
*
* @param tag
*/
private boolean isFatalTagSatisfied(TagInfo tag, CleanTimeValues cleanTimeValues) {
boolean fatal = true;
if (tag != null) {
if (tag.getFatalTags().isEmpty()) return true;
fatal = false;
for (String fatalTagName : tag.getFatalTags()) {
if (getOpenTags(cleanTimeValues).tagExists(fatalTagName)) {
fatal = true;
}
}
}
return fatal;
}
代码示例来源:origin: fivesmallq/web-data-extractor
private void reopenBrokenNode(ListIterator
TagNode closedByPresidence = toReopen;
TagNode copy = closedByPresidence.makeCopy();
copy.setAutoGenerated(true);
copy.removeAttribute("id");
nodeIterator.add(copy);
getOpenTags(cleanTimeValues).addTag(closedByPresidence.getName(), nodeIterator.previousIndex());
}
代码示例来源:origin: amplafi/htmlcleaner
private void reopenBrokenNode(ListIterator
TagNode closedByPresidence = toReopen;
TagNode copy = closedByPresidence.makeCopy();
copy.setAutoGenerated(true);
copy.removeAttribute("id");
nodeIterator.add(copy);
getOpenTags(cleanTimeValues).addTag(closedByPresidence.getName(), nodeIterator.previousIndex());
}
代码示例来源:origin: fivesmallq/web-data-extractor
for (String fatalTag : tag.getFatalTags()) {
if (fatalTag != null) {
TagPos tagPos = getOpenTags(cleanTimeValues).findTag(fatalTag);
if (tagPos != null) {
fatalTagPosition = tagPos.position;
for (String requiredTag : tag.getRequiredParentTags()) {
if (requiredTag != null) {
TagPos currTagPos = getOpenTags(cleanTimeValues).findTag(requiredTag);
if (currTagPos != null) {
requiredTagMissing = currTagPos.position <= fatalTagPosition;
ListIterator it = getOpenTags(cleanTimeValues).list.listIterator(getOpenTags(cleanTimeValues).list.size());
while (it.hasPrevious()) {
TagPos currTagPos = (TagPos) it.previous();
代码示例来源:origin: fivesmallq/web-data-extractor
getOpenTags(cleanTimeValues).removeTag(newTagNode.getName());
tagNode = newTagNode;
} else {
代码示例来源:origin: amplafi/htmlcleaner
getOpenTags(cleanTimeValues).removeTag( newTagNode.getName() );
tagNode = newTagNode;
} else {
代码示例来源:origin: fivesmallq/web-data-extractor
TagPos matchingPosition = getOpenTags(cleanTimeValues).findTag(tagName);
TagInfo tag = getTagInfo(tagName, cleanTimeValues);
TagPos lastTagPos = getOpenTags(cleanTimeValues).isEmpty() ? null : getOpenTags(cleanTimeValues).getLastTagPos();
TagInfo lastTagInfo = lastTagPos == null ? null : getTagInfo(lastTagPos.name, cleanTimeValues);
closeSnippet(nodeList, lastTagPos, startTagToken, cleanTimeValues);
nodeIterator.previous();
} else if (tag != null && tag.hasPermittedTags() && getOpenTags(cleanTimeValues).someAlreadyOpen(tag.getPermittedTags())) {
nodeIterator.set(null);
} else if (tag != null && tag.isUnique() && getOpenTags(cleanTimeValues).tagEncountered(tagName)) {
nodeIterator.set(null);
properties.fireHtmlError(true, startTagToken, ErrorType.UniqueTagDuplicated);
getOpenTags(cleanTimeValues).addTag(tagName, nodeIterator.previousIndex());
if (getOpenTags(cleanTimeValues).getLastTagPos() == null) {
cleanTimeValues._headTags.add(new ProxyTagNode((CommentNode) token, cleanTimeValues.bodyNode));
代码示例来源:origin: amplafi/htmlcleaner
} else {
TagPos matchingPosition = getOpenTags(cleanTimeValues).findTag(tagName);
TagInfo tag = getTagInfoProvider().getTagInfo(tagName);
TagPos lastTagPos = getOpenTags(cleanTimeValues).isEmpty() ? null : getOpenTags(cleanTimeValues).getLastTagPos();
TagInfo lastTagInfo = lastTagPos == null ? null : getTagInfoProvider().getTagInfo(lastTagPos.name);
closeSnippet(nodeList, lastTagPos, startTagToken, cleanTimeValues);
nodeIterator.previous();
} else if ( tag != null && tag.hasPermittedTags() && getOpenTags(cleanTimeValues).someAlreadyOpen(tag.getPermittedTags()) ) {
nodeIterator.set(null);
} else if ( tag != null && tag.isUnique() && getOpenTags(cleanTimeValues).tagEncountered(tagName) ) {
nodeIterator.set(null);
properties.fireHtmlError(true, startTagToken, ErrorType.UniqueTagDuplicated);
getOpenTags(cleanTimeValues).addTag( tagName, nodeIterator.previousIndex() );
if (getOpenTags(cleanTimeValues).getLastTagPos()==null) {
cleanTimeValues._headTags.add(new ProxyTagNode((CommentNode)token, cleanTimeValues.bodyNode));