The open source JTidy project does an excellent job of converting HTML files to the newer XHTML standard. The following code shows how to invoke JTidy programmatically from Java:
/*In: C:Data_Localxmldocs est.htmlOut: C:Data_Localxmldocs estXHTML.xml*/import org.w3c.tidy.Tidy;import java.io.FileInputStream;import java.io.FileOutputStream;import org.w3c.dom.Document;public class HTML_to_XHTML{ public static void main(String[] args){ try{ FileInputStream FIS=new FileInputStream("C://Data_Local //xml//docs//test.html"); FileOutputStream FOS=new FileOutputStream("C://Data_Local //xml//docs//testXHTML.xml"); Tidy T=new Tidy(); Document D=T.parseDOM(FIS,FOS); } catch (java.io.FileNotFoundException e) {System.out.println(e.getMessage());} } }}