package com.xebialabs.deployit;

import java.util.Arrays;
import java.util.regex.Pattern;
import org.owasp.html.HtmlChangeListener;
import org.owasp.html.HtmlPolicyBuilder;
import org.owasp.html.HtmlSanitizer;
import org.owasp.html.HtmlStreamRenderer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class Sanitizer {

    private static final Logger logger = LoggerFactory.getLogger(Sanitizer.class);

    private static final HtmlPolicyBuilder POLICY_DEFINITION = new HtmlPolicyBuilder().allowStandardUrlProtocols()
            // Allow title="..." on any element.
            .allowAttributes("title").globally()
                    // Allow href="..." on <a> elements.
            .allowAttributes("href").onElements("a")
                    // Defeat link spammers.
            .requireRelNofollowOnLinks()
                    // Allow lang= with an alphabetic value on any element.
            .allowAttributes("lang").matching(Pattern.compile("[a-zA-Z]{2,20}")).globally()
                    // The align attribute on <p> elements can have any value below.
            .allowAttributes("align").matching(true, "center", "left", "right", "justify", "char").onElements("p")
                    // These elements are allowed.
            .allowElements("a", "p", "div", "i", "b", "em", "blockquote", "tt", "strong", "br", "ul", "ol", "li")
                    // Custom slashdot tags.
                    // These could be rewritten in the sanitizer using an ElementPolicy.
            .allowElements("quote", "ecode");

    public static String sanitize(String input) {
        final StringBuilder sanitized = new StringBuilder();
        HtmlSanitizer.sanitize(input, POLICY_DEFINITION.build(HtmlStreamRenderer.create(sanitized, null), new HtmlChangeListener<Object>() {
            @Override
            public void discardedTag(final Object context, final String elementName) {
                logger.warn("Found errors during sanitation: discarded tag {}", elementName);
            }

            @Override
            public void discardedAttributes(final Object context, final String tagName, final String... attributeNames) {
                logger.warn("Found errors during sanitation: discarded attributes {} on tag {}", Arrays.toString(attributeNames), tagName);
            }
        }, null));
        return sanitized.toString();
    }
}
