import email from email.message import Message from email.policy import default import json from datetime import datetime from lxml import etree def clean_xml(xml_string): """移除 XML 中的命名空间,包括 xmlns 声明""" parser = etree.XMLParser(remove_blank_text=True) root = etree.XML(xml_string, parser) # 遍历所有元素,移除命名空间 for elem in root.iter(): if '}' in elem.tag: # 如果有命名空间 elem.tag = elem.tag.split('}', 1)[1] # 移除命名空间 if 'version' in elem.attrib: del elem.attrib['version'] etree.cleanup_namespaces(root) return etree.tostring(root, pretty_print=True, encoding="unicode") def parse_xml_to_dict(xml): """ 将xml文件解析成字典形式,参考tensorflow的recursive_parse_xml_to_dict Args: xml: xml tree obtained by parsing XML file contents using lxml.etree Returns: Python dictionary holding XML contents. """ if len(xml) == 0: # 遍历到底层,直接返回tag对应的信息 return{xml.tag: xml.text} result= {} for child in xml: child_result= parse_xml_to_dict(child) # 递归遍历标签信息 if child.tag not in ['pictureInfo', 'ANPR', "PictureURLInfo"]: result[child.tag] = child_result[child.tag] else: if child.tag not in result: # 因为object可能有多个,所以需要放入列表里 result[child.tag] = [] result[child.tag].append(child_result[child.tag]) return {xml.tag: result} def parse_multipart(data, boundary): # 按 boundary 分割数据 parts = data.split(boundary) parsed_data = {} for part in parts: # print("part : ", part) part = part.strip() # print("part strip: ", part) if not part: continue # 获取头部和主体部分 headers, _, body = part.partition(b"\r\n\r\n") # print("headers :", headers) if not headers: continue header_dict = email.message_from_bytes(headers, policy=default) # print("header_dict : ", header_dict) # 获取 Content-Disposition content_disposition = header_dict.get("Content-Disposition", "") # print("content_disposition :", content_disposition) if "name=" in content_disposition: name = content_disposition.split('name="')[1].split('"')[0] filename = None if 'filename="' in content_disposition: filename = content_disposition.split('filename="')[1].split('"')[0] # 提取 Content-Type content_type = header_dict.get("Content-Type", "").strip() # 根据 Content-Type 处理内容 if content_type == "application/json": body_content = json.loads(body.decode("utf-8")) elif content_type == "application/xml": body = body.decode('utf-8').replace("