python - 将 matplotlib 对象加载到 reportlab

This question already has answers here:

Is there a matplotlib flowable for ReportLab?

(4 个回答)

5年前关闭。

我正在尝试将 matplotlib 对象加载到 reportlab 中。
这是我的代码:

from reportlab.pdfgen import canvas
from reportlab.lib.utils import ImageReader
from reportlab.platypus import Paragraph, SimpleDocTemplate, Spacer, Image
from matplotlib import pyplot as plt

def __get_img_data():
    """
    returns the binary image data of the plot
    """
    img_file = NamedTemporaryFile(delete=False)
    plt.savefig(img_file.name)
    img_data = open(img_file.name + '.png', 'rb').read()
    os.remove(img_file.name)
    os.remove(img_file.name + '.png')
    return img_data

def get_plot():
    # HERE I PLOT SOME STUFF
    img_data = __get_img_data()
    plt.close()
    return img_data

class NumberedCanvas(canvas.Canvas):
    def __init__(self):
        pass

class ReportTemplate:
    def __init__(self):
        pass
    def _header_footer(self, canvas, doc):
        pass

    def get_data(self):
        elements = []
        elements.append('hello')
        ## HERE I WANT TO ADD THE IMAGE
        imgdata = get_plot()
        with open('/tmp/x.png', 'wb') as fh:
            fh.write(imgdata)
        im = Image('/tmp/x.png', width=usable_width, height=usable_width)
        elements.append(im)
        os.remove('/tmp/x.png')
        ######
        doc.build(elements, onFirstPage=self._header_footer,\
                  onLaterPages=self._header_footer,\
                  canvasmaker=NumberedCanvas)
        # blah blah
        return obj

我的目标是将绘图图像插入到报告中。
这工作正常，但我不想写入临时文件。
我尝试安装 PIL，因为我读过一些人使用 PIL 的图像库进行安装，但是一旦我安装了 PIL，由于 Pillow 版本不兼容，我的另一部分代码就会中断。

最佳答案

pdfrw 文档很烂

pdfrw example 中讨论的 the first answer to this question 有点笨拙的唯一原因是 pdfrw 文档很糟糕。由于糟糕的文档，该示例的作者@Larry-Meyn 使用 rst2pdf 的 vectorpdf 扩展作为起点，并且该扩展也没有真正记录在案，并且必须处理 rst2pdf 和 pdfrw 的怪癖(而且更多比您需要的更通用，因为它可以让 rst2pdf 显示来自预先存在的 PDF 的任意页面的任意矩形)。令人惊讶的是，拉里设法让它发挥了作用，我向他致敬。

我完全有资格这么说，因为我是 pdfrw 的作者并且对 rst2pdf 做出了一些贡献，包括那个 vectorpdf 扩展。

但是您可能无论如何都想使用 pdfrw

直到一个月前我才真正关注 stackoverflow，而且 pdfrw 本身已经衰落了几年，但我现在在这里，我认为你应该再看看 pdfrw，即使文档仍然很糟糕.

为什么？ 因为如果你输出到一个png文件，你的图像会被光栅化，如果你使用pdfrw，它会保持矢量格式，这意味着它在任何比例下都会看起来很好。

所以我修改了你答案的 png 示例

你的 png 示例不是一个完整的程序—— doc.build 的参数没有定义，样式没有定义，缺少一些导入等等。但它足够接近以获得一些意图并得到它在职的。

编辑 -- 我刚刚注意到这个示例实际上是 Larry 示例的修改版本，所以 that example 仍然非常有值(value)，因为它在某些方面比这个功能更全。

在我解决了这些问题并获得了一些输出后，我添加了一个选项以能够使用 png 或 pdf，因此您可以看到不同之处。下面的程序将创建两个不同的 PDF 文件，您可以自己比较结果。

import cStringIO
from matplotlib import pyplot as plt
from reportlab.pdfgen import canvas
from reportlab.lib.utils import ImageReader
from reportlab.platypus import Paragraph, SimpleDocTemplate, Spacer, Image, Flowable
from reportlab.lib.units import inch
from reportlab.lib.styles import getSampleStyleSheet

from pdfrw import PdfReader, PdfDict
from pdfrw.buildxobj import pagexobj
from pdfrw.toreportlab import makerl

styles = getSampleStyleSheet()
style = styles['Normal']

def form_xo_reader(imgdata):
    page, = PdfReader(imgdata).pages
    return pagexobj(page)


class PdfImage(Flowable):
    def __init__(self, img_data, width=200, height=200):
        self.img_width = width
        self.img_height = height
        self.img_data = img_data

    def wrap(self, width, height):
        return self.img_width, self.img_height

    def drawOn(self, canv, x, y, _sW=0):
        if _sW > 0 and hasattr(self, 'hAlign'):
            a = self.hAlign
            if a in ('CENTER', 'CENTRE', TA_CENTER):
                x += 0.5*_sW
            elif a in ('RIGHT', TA_RIGHT):
                x += _sW
            elif a not in ('LEFT', TA_LEFT):
                raise ValueError("Bad hAlign value " + str(a))
        canv.saveState()
        img = self.img_data
        if isinstance(img, PdfDict):
            xscale = self.img_width / img.BBox[2]
            yscale = self.img_height / img.BBox[3]
            canv.translate(x, y)
            canv.scale(xscale, yscale)
            canv.doForm(makerl(canv, img))
        else:
            canv.drawImage(img, x, y, self.img_width, self.img_height)
        canv.restoreState()

def make_report(outfn, use_pdfrw):
    fig = plt.figure(figsize=(4, 3))
    plt.plot([1,2,3,4],[1,4,9,26])
    plt.ylabel('some numbers')
    imgdata = cStringIO.StringIO()
    fig.savefig(imgdata, format='pdf' if use_pdfrw else 'png')
    imgdata.seek(0)
    reader = form_xo_reader if use_pdfrw else ImageReader
    image = reader(imgdata)

    doc = SimpleDocTemplate(outfn)
    style = styles["Normal"]
    story = [Spacer(0, inch)]
    img = PdfImage(image, width=200, height=200)

    for i in range(10):
        bogustext = ("Paragraph number %s. " % i)
        p = Paragraph(bogustext, style)
        story.append(p)
        story.append(Spacer(1,0.2*inch))

    story.append(img)

    for i in range(10):
        bogustext = ("Paragraph number %s. " % i)
        p = Paragraph(bogustext, style)
        story.append(p)
        story.append(Spacer(1,0.2*inch))

    doc.build(story)

make_report("hello_png.pdf", False)
make_report("hello_pdf.pdf", True)

这种方法有什么缺点？

第一个明显的缺点是现在需要 pdfrw，但可以从 PyPI 获得。

下一个缺点是，如果您将大量 matplotlib 图放入文档中，我认为这种技术将复制字体等资源，因为我不相信 reportlab 足够聪明以注意到重复项。

我相信这个问题可以通过将所有的图输出到 different pages of a single PDF 来解决。我还没有真正尝试过使用 matplotlib，但 pdfrw 完全能够转换 each page of an existing pdf to a separate flowable 。

因此，如果您有很多绘图并且它使您的最终 PDF 变得太大，您可以研究一下，或者只是尝试其中一个 PDF 优化器，看看它是否有帮助。无论如何，这是不同的一天的不同问题。

关于python - 将 matplotlib 对象加载到 reportlab，我们在Stack Overflow上找到一个类似的问题：https://stackoverflow.com/questions/31712386/

PDFrw

python - 将 matplotlib 对象加载到 reportlab