The open source JTidy project does an excellent job of converting HTML files to the newer XHTML standard. The following code shows how to invoke JTidy programmatically from Java:
/*
In: C:\Data_Local\xml\docs\test.html
Out: C:\Data_Local\xml\docs\testXHTML.xml
*/
import org.w3c.tidy.Tidy;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import org.w3c.dom.Document;
public class HTML_to_XHTML{
public static void main(String[] args){
try{
FileInputStream FIS=new FileInputStream("C://Data_Local
//xml//docs//test.html");
FileOutputStream FOS=new FileOutputStream("C://Data_Local
//xml//docs//testXHTML.xml");
Tidy T=new Tidy();
Document D=T.parseDOM(FIS,FOS);
}
catch (java.io.FileNotFoundException e)
{System.out.println(e.getMessage());}
}
}
}