Test paragraph.
Unwanted div, ought be stripped out. Div>
< cusTom>Handles custom tags and is case insensitive.< / CUstom>
Currently it always forces elements to lowercase.
To be converted to general XML use this would
NEED to change.
Finds attributes of different delineations and standardises them.
Check out how this images attributes change;
stuff
With this code...
alert(cleanHTML(document.getElementById('sample').value, {em:null, br:null, hr:null, p:null, img:['href', 'height', 'width'], custom:null}));
function propertiesToArray(object) {
var result = [];
for (var p in object) {result.push(p)}
return result;
}
function cleanHTML(HTML, allowedElements) {
//parse the permitted attributes for a found element
var parseAttributes = function(matchedSubstring, ref1, ref2, ref3, ref4) {
var attributes = '';
ref2 = ref2.toLowerCase();
if (allowedElements[ref2]) {
var m, re = new RegExp('\\s(' + allowedElements[ref2].join('|') + ')\\s*=\\s*("([^"]+)"|\'([^\']+)\'|([^\\s]+))', 'ig');
while (m = re.exec(ref3)) {
attributes += ' ' + m[1] + '="' + (((m[3]) ? m[3] : '') + ((m[4]) ? m[4] : '') + ((m[5]) ? m[5] : '')).replace(/"/g, '"') + '"';
}
}
return '[#[' + ref1 + ref2 + attributes + ref4 + ']#]';
}
//escape any possible existing tokens
HTML = HTML.replace(/#/g, '##');
//convert permitted elements to tokens
HTML = HTML.replace(new RegExp('<\\s*(\\/?)\\s*(' + propertiesToArray(allowedElements).join('|') + ')(\\s*[^\\/>]*)\\s*(\\/?)\\s*>', 'ig'), parseAttributes);
//delete ALL other HTML elements
HTML = HTML.replace(/<[^>]*>/g, '');
//convert tokens back to HTML elements, this will have stripped any styling
HTML = HTML.replace(/\[#\[([^\]]+)\]#\]/g, '<$1>');
//return result, unescaping possible tokens
return HTML.replace(/##/g, '#');
}
Format of allowedElements
Provide an object where each property is a permitted element tag and has the value null if no attributes are allowed, or a value which is an array of attribute names to permit.