1.在build.gradle中安装所需依赖:
implementation group: 'com.itextpdf', name: 'itextpdf', version: '5.5.13'
implementation group: 'com.itextpdf.tool', name: 'xmlworker', version: '5.5.13'
implementation group: 'org.jsoup', name: 'jsoup', version: '1.15.3'
2.创建工具类,实现转换方法
/**
* convert the html to pdf.
*/
public void htmlToPdf(String oldFilePath, String newFilePath) throws IOException, com.itextpdf.text.DocumentException {
Document doc = Jsoup.parse(new File(oldFilePath), "UTF-8");
// jsoup标准化标签,生成闭合标签
doc.outputSettings().syntax(org.jsoup.nodes.Document.OutputSettings.Syntax.xml);
doc.outputSettings().escapeMode(Entities.EscapeMode.xhtml);
com.itextpdf.text.Document document = new com.itextpdf.text.Document(PageSize.A4, 36, 36, 36, 36);
PdfWriter pdfWriter = PdfWriter.getInstance(document, new FileOutputStream(newFilePath));
document.open();
//html to pdf, base64 image support.
final TagProcessorFactory tagProcessorFactory = Tags.getHtmlTagProcessorFactory();
tagProcessorFactory.removeProcessor(HTML.Tag.IMG);
tagProcessorFactory.addProcessor(new ImageTagRefreshFilter(), HTML.Tag.IMG);
//设置中文字体
final CssFilesImpl cssFiles = new CssFilesImpl();
cssFiles.add(XMLWorkerHelper.getInstance().getDefaultCSS());
final StyleAttrCSSResolver cssResolver = new StyleAttrCSSResolver(cssFiles);
final HtmlPipelineContext hpc = new HtmlPipelineContext(new CssAppliersImpl(asianFontRefreshFilter));
hpc.setAcceptUnknown(true).autoBookmark(true).setTagFactory(tagProcessorFactory);
final HtmlPipeline htmlPipeline = new HtmlPipeline(hpc, new PdfWriterPipeline(document, pdfWriter));
final Pipeline<?> pipeline = new CssResolverPipeline(cssResolver, htmlPipeline);
final XMLWorker worker = new XMLWorker(pipeline, true);
final Charset charset = StandardCharsets.UTF_8;
final XMLParser xmlParser = new XMLParser(true, worker, charset);
InputStream inputStream = new ByteArrayInputStream(doc.html().getBytes());
xmlParser.parse(inputStream, charset);
// XMLWorkerHelper.getInstance().parseXHtml(pdfWriter, document, inputStream, Charset.forName("UTF-8"));
document.close();
}
3.base64过滤类:
import com.itextpdf.text.Chunk;
import com.itextpdf.text.Element;
import com.itextpdf.text.Image;
import com.itextpdf.text.pdf.codec.Base64;
import com.itextpdf.tool.xml.NoCustomContextException;
import com.itextpdf.tool.xml.Tag;
import com.itextpdf.tool.xml.WorkerContext;
import com.itextpdf.tool.xml.exceptions.RuntimeWorkerException;
import com.itextpdf.tool.xml.html.HTML;
import com.itextpdf.tool.xml.pipeline.html.HtmlPipelineContext;
public class ImageTagRefreshFilter extends com.itextpdf.tool.xml.html.Image {
/**
* html to pdf, base64 image support.
* */
@Override
public List<Element> end(final WorkerContext ctx, final Tag tag, final List<Element> currentContent) {
final Map<String, String> attributes = tag.getAttributes();
String src = attributes.get(HTML.Attribute.SRC);
List<Element> elements = new ArrayList<Element>(1);
if (null != src && src.length() > 0) {
Image img = null;
if (src.startsWith("data:image/")) {
final String base64Data = src.substring(src.indexOf(",") + 1);
try {
img = Image.getInstance(Base64.decode(base64Data));
} catch (Exception e) {
throw new RuntimeException(e);
}
if (img != null) {
try {
final HtmlPipelineContext htmlPipelineContext = getHtmlPipelineContext(ctx);
elements.add(getCssAppliers().apply(new Chunk((com.itextpdf.text.Image) getCssAppliers().apply(img, tag, htmlPipelineContext), 0, 0, true), tag,
htmlPipelineContext));
} catch (NoCustomContextException e) {
throw new RuntimeWorkerException(e);
}
}
}
if (img == null) {
elements = super.end(ctx, tag, currentContent);
}
}
return elements;
}
}
4.字体类代码,window用户可在C:\windows\font\中寻找自己所需字体即可。我这里用的为黑体:
simhei.ttf
import com.itextpdf.text.Font;
import com.itextpdf.text.pdf.BaseFont;
import com.itextpdf.tool.xml.XMLWorkerFontProvider;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
@Component
public class AsianFontRefreshFilter extends XMLWorkerFontProvider {
//此处写字体文件的绝对路径
private String fontPath;
@Override
public Font getFont(String fontname, String encoding, float size, final int style) {
try {
//字体文件绝对路径
BaseFont bfChinese = BaseFont.createFont(fontPath, BaseFont.IDENTITY_H, BaseFont.EMBEDDED);
return new Font(bfChinese, size, style);
} catch (Exception e) {
e.printStackTrace();
}
return super.getFont(fontname, encoding, size, style);
}
}
效果如下:
html页面预览:
pdf页面预览:
文章来源地址https://www.toymoban.com/news/detail-627124.html文章来源:https://www.toymoban.com/news/detail-627124.html
到了这里,关于java中使用Jsoup和Itext实现将html转换为PDF的文章就介绍完了。如果您还想了解更多内容,请在右上角搜索TOY模板网以前的文章或继续浏览下面的相关文章,希望大家以后多多支持TOY模板网!