| 解析报表模板文件获取模板的数据集和SQL语句,并保存为Excel   完整代码 import json
import pandas as pd
from lxml import etree
import os
result_li = []
for root, dirs, files in os.walk(".\\reportlets", topdown=False):
    for name in files:
        abs_name = os.path.join(os.path.abspath(root), name)
        # fineReport .cpt .frm 本身是xml文件,故可直接按行匹配字符查找是否与对象有关
        if abs_name[-4:] not in [".cpt", ".frm"]:
            continue
        # 打开.cpt .frm
        with open(abs_name, mode="rb") as fr:
            xslt_content = fr.read()
        xml_root = etree.XML(xslt_content)
        tabel_data_el = xml_root.xpath("//*/TableDataMap//TableData[@class='com.fr.data.impl.DBTableData']")
        for db_data in tabel_data_el:
            # 数据集名称
            date_source_name = db_data.get("name")
            # 转 string
            string = etree.tostring(db_data, encoding='utf-8').decode('utf-8')
            data_source = etree.XML(string)
            conn = data_source.xpath("/TableData/Connection/DatabaseName/text()")
            conn_collect = []
            for j in conn:
                # 数据链接名字
                conn_collect.append(str(j).strip())
            conn_string = "".join(conn_collect)
            query = data_source.xpath("/TableData/Query/text()")
            query_collect = []
            for j in query:
                # 数据集SQL
                query_collect.append(str(j).strip())
            query_string = "".join(query_collect)
            data_source_json = {
                "report": abs_name,
                "data_source": date_source_name,
                "conn": conn_string,
                "query": query_string
            }
            result_li.append(data_source_json)
            print(json.dumps(data_source_json, ensure_ascii=False))
            print("-----------------------------------")
df = pd.DataFrame(result_li)
df.to_excel("./data_source.xlsx", engine="openpyxl", index=False)
 |