spark web信息获取 发表于 2019-12-08 分类于 bigdata 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576# -*- coding: utf-8 -*-import reimport requestsdef _get_requests(url): response = requests.get(url) content = "" if response.status_code == 200: content = response.content.decode() return contentdef _get_spark_common(st): s = re.findall(r'<ul class="unstyled".*</ul>', body, re.S)[0] s = re.sub(r'\s{2,}|<a.*?>|</a>', ' ', s) print_format = "{:15}{}".format print("********************************") info = re.findall(r'<strong>(.*?)</strong>\s*(.*?)\s*</li>', s) for k, v in info: print(print_format(k, v)) else: print("\n") def _get_spark_works(st): s = re.findall(r'<div class="aggregated-workers collapsible-table".*?</div>', body, re.S)[0] s = re.sub(r'<a.*>|</a>', "", s) s = re.sub(r'\s+', " ", s) head = re.findall(r'<th .*?>(.*?)</th>', s) work_info = [re.findall(r'<td.*?>\s*(.*?)\s*</td>', x) for x in re.findall(r'<tr>.*?</tr>', s)] work_info.insert(0, head) print_format = "{:<46}{:25}{:10}{:15}{}".format print("****************Workers (%s)****************" % (len(work_info) - 1)) for row in work_info: print(print_format(*row)) else: print("\n") def _get_spark_running_applications(st): s = re.findall(r'<div class="aggregated-activeApps.*?</div>', body, re.S)[0] s = re.sub(r'\s+', " ", s) s = re.sub(r'<a.*?>|</a>|<form.*?>.*</form>', "", s, re.S) head = re.findall(r'<th .*?>(.*?)</th>', s) run_work_info = [re.findall(r'<td.*?>\s*(.*?)\s*</td>', x) for x in re.findall(r'<tr>.*?</tr>', s)] run_work_info.insert(0, head) print_format = "{:<28}{:20}{:10}{:25}{:25}{:10}{:13}{}".format print("****************Running Applications (%s)***************" % (len(run_work_info) - 1)) for row in run_work_info: print(print_format(*row)) else: print("\n")def _get_spark_completed_applications(st): s = re.findall(r'<div class="aggregated-completedApps.*?</div>', body, re.S)[0] s = re.sub(r'\s+', " ", s) s = re.sub(r'<a.*?>|</a>|<form.*?>.*</form>', "", s, re.S) head = re.findall(r'<th .*?>(.*?)</th>', s) run_work_info = [re.findall(r'<td.*?>\s*(.*?)\s*</td>', x) for x in re.findall(r'<tr>.*?</tr>', s)] run_work_info.insert(0, head) print_format = "{:<28}{:20}{:10}{:25}{:25}{:10}{:13}{}".format print("****************Completed Applications (%s)****************" % (len(run_work_info) - 1)) for row in run_work_info: print(print_format(*row)) else: print("\n")if __name__ == "__main__": url = "http://192.168.100.10:8080/" body = _get_requests(url) spark_common_info = _get_spark_common(body) _get_spark_works(body) _get_spark_running_applications(body) _get_spark_completed_applications(body) 本文作者: Mr Wang 本文链接: https://forgetst.github.io/bigdata/spark-web信息获取.html 版权声明: 本博客所有文章除特别声明外,均采用 BY-NC-SA 许可协议。转载请注明出处! -------------本文结束感谢您的阅读-------------