aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMagnus Granberg <zorry@gentoo.org>2022-07-13 21:49:23 +0200
committerMagnus Granberg <zorry@gentoo.org>2022-07-13 21:49:23 +0200
commit1250899a4c17be77c99048576090ae1b32e6d227 (patch)
tree50a5d71ef0ab9218e02979aeb04c24edfdcb5e34
parentAdd support log docker worker (diff)
downloadtinderbox-cluster-1250899a.tar.gz
tinderbox-cluster-1250899a.tar.bz2
tinderbox-cluster-1250899a.zip
Use log docker for log parser
Signed-off-by: Magnus Granberg <zorry@gentoo.org>
-rw-r--r--bin/ci_log_parser20
-rw-r--r--buildbot_gentoo_ci/logs/log_parser.py180
-rw-r--r--buildbot_gentoo_ci/steps/logs.py88
-rw-r--r--py/log_parser.py159
4 files changed, 212 insertions, 235 deletions
diff --git a/bin/ci_log_parser b/bin/ci_log_parser
deleted file mode 100644
index 6401a49..0000000
--- a/bin/ci_log_parser
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/usr/bin/python
-#
-# Copyright 2021 Gentoo Authors
-# Distributed under the terms of the GNU General Public License v2
-
-import argparse
-import sys
-from buildbot_gentoo_ci.logs.log_parser import runLogParser
-
-def main():
-# get filename, project_uuid default_project_uuid
- parser = argparse.ArgumentParser()
- parser.add_argument("-f", "--file", required=True)
- parser.add_argument("-u", "--uuid", required=True)
- args = parser.parse_args()
- runLogParser(args)
- sys.exit()
-
-if __name__ == "__main__":
- main()
diff --git a/buildbot_gentoo_ci/logs/log_parser.py b/buildbot_gentoo_ci/logs/log_parser.py
deleted file mode 100644
index b890c12..0000000
--- a/buildbot_gentoo_ci/logs/log_parser.py
+++ /dev/null
@@ -1,180 +0,0 @@
-# Copyright 2021 Gentoo Authors
-# Distributed under the terms of the GNU General Public License v2
-
-import sys
-from multiprocessing import Pool, cpu_count
-import re
-import io
-import gzip
-import json
-import os
-from sqlalchemy.ext.declarative import declarative_base
-import sqlalchemy as sa
-
-Base = declarative_base()
-
-class ProjectsPattern(Base):
- __tablename__ = "projects_pattern"
- id = sa.Column(sa.Integer, primary_key=True)
- project_uuid = sa.Column(sa.String(36), nullable=False)
- search = sa.Column(sa.String(50), nullable=False)
- start = sa.Column(sa.Integer, default=0)
- end = sa.Column(sa.Integer, default=0)
- status = sa.Column(sa.Enum('info', 'warning', 'ignore', 'error'), default='info')
- type = sa.Column(sa.Enum('info', 'qa', 'compile', 'configure', 'install', 'postinst', 'prepare', 'pretend', 'setup', 'test', 'unpack', 'ignore', 'issues', 'misc', 'elog'), default='info')
- search_type = sa.Column(sa.Enum('in', 'startswith', 'endswith', 'search'), default='in')
-
-def getDBSession(config):
- #FIXME: Read the user/pass from file
- engine = sa.create_engine(config['database'])
- Session = sa.orm.sessionmaker(bind = engine)
- return Session()
-
-def getMultiprocessingPool(config):
- return Pool(processes = int(config['core']))
-
-def addPatternToList(Session, pattern_list, uuid):
- for project_pattern in Session.query(ProjectsPattern).filter_by(project_uuid=uuid).all():
- # check if the search pattern is vaild
- project_pattern_search = project_pattern.search
- try:
- re.compile(project_pattern_search)
- except re.error:
- print("Non valid regex pattern")
- print(project_pattern.search)
- print(project_pattern.id)
- else:
- patten_dict = {}
- patten_dict['id'] = project_pattern.id
- patten_dict['project_uuid'] = project_pattern.project_uuid
- patten_dict['search'] = project_pattern_search
- patten_dict['start'] = project_pattern.start
- patten_dict['end'] = project_pattern.end
- patten_dict['status'] = project_pattern.status
- patten_dict['type'] = project_pattern.type
- patten_dict['search_type'] = project_pattern.search_type
- pattern_list.append(patten_dict)
- return pattern_list
-
-def get_log_search_pattern(Session, uuid, default_uuid):
- # get pattern from the projects
- # add that to log_search_pattern_list
- log_search_pattern_list = []
- log_search_pattern_list = addPatternToList(Session, log_search_pattern_list, uuid)
- log_search_pattern_list = addPatternToList(Session, log_search_pattern_list, default_uuid)
- return log_search_pattern_list
-
-def search_buildlog(log_search_pattern_list, logfile_text_dict, tmp_index, max_text_lines):
- # get text line to search
- text_line = logfile_text_dict[tmp_index]
- summery_dict = {}
- # loop true the pattern list for match
- for search_pattern in log_search_pattern_list:
- search_hit = False
- ignore_line = False
- # check if should ignore the line
- #FIXME take the ignore line pattern from db
- if re.search('^>>> /', text_line):
- ignore_line = True
- #if else re.search('./\w+/'):
- # pass
- else:
- # search for match
- if search_pattern['search_type'] == 'in':
- if search_pattern['search'] in text_line:
- search_hit = True
- if search_pattern['search_type'] == 'startswith':
- if text_line.startswith(search_pattern['search']):
- search_hit = True
- if search_pattern['search_type'] == 'endswith':
- if text_line.endswith(search_pattern['search']):
- search_hit = True
- if search_pattern['search_type'] == 'search':
- if re.search(search_pattern['search'], text_line):
- search_hit = True
- # add the line if the pattern match
- if search_hit:
- summery_dict[tmp_index] = {}
- summery_dict[tmp_index]['text'] = text_line
- summery_dict[tmp_index]['type'] = search_pattern['type']
- summery_dict[tmp_index]['status'] = search_pattern['status']
- summery_dict[tmp_index]['id'] = search_pattern['id']
- summery_dict[tmp_index]['search_pattern'] = search_pattern['search']
- # add upper text lines if requested
- # max 5
- if search_pattern['start'] != 0:
- i = tmp_index - search_pattern['start'] - 1
- match = True
- while match:
- i = i + 1
- if i < (tmp_index - 9) or i == tmp_index:
- match = False
- else:
- if not i in summery_dict:
- summery_dict[i] = {}
- summery_dict[i]['text'] = logfile_text_dict[i]
- summery_dict[i]['type'] = 'info'
- summery_dict[i]['status'] = 'info'
- summery_dict[i]['id'] = 0
- summery_dict[i]['search_pattern'] = 'auto'
- # add lower text lines if requested
- # max 5
- if search_pattern['end'] != 0:
- i = tmp_index
- end = tmp_index + search_pattern['end']
- match = True
- while match:
- i = i + 1
- if i > max_text_lines or i > end:
- match = False
- else:
- if not i in summery_dict:
- summery_dict[i] = {}
- summery_dict[i]['text'] = logfile_text_dict[i]
- summery_dict[i]['type'] = 'info'
- summery_dict[i]['status'] = 'info'
- summery_dict[i]['id'] = 0
- summery_dict[i]['search_pattern'] = 'auto'
- if not ignore_line or not search_hit:
- # we add all line that start with ' * ' as info
- # we add all line that start with '>>>' as info
- if text_line.startswith(' * ') or text_line.startswith('>>>'):
- if not tmp_index in summery_dict:
- summery_dict[tmp_index] = {}
- summery_dict[tmp_index]['text'] = text_line
- summery_dict[tmp_index]['type'] = 'info'
- summery_dict[tmp_index]['status'] = 'info'
- summery_dict[tmp_index]['id'] = 0
- summery_dict[tmp_index]['search_pattern'] = 'auto'
- if summery_dict == {}:
- return False
- return summery_dict
-
-def getConfigSettings():
- configpath = os.getcwd().split('workers/')[0]
- with open(configpath + 'logparser.json') as f:
- config = json.load(f)
- return config
-
-def runLogParser(args):
- index = 1
- max_text_lines = 0
- logfile_text_dict = {}
- config = getConfigSettings()
- Session = getDBSession(config)
- mp_pool = getMultiprocessingPool(config)
- #NOTE: The patten is from https://github.com/toralf/tinderbox/tree/master/data files.
- # Is stored in a db instead of files.
- log_search_pattern_list = get_log_search_pattern(Session, args.uuid, config['default_uuid'])
- Session.close()
- for text_line in io.TextIOWrapper(io.BufferedReader(gzip.open(args.file)), encoding='utf8', errors='ignore'):
- logfile_text_dict[index] = text_line.strip('\n')
- index = index + 1
- max_text_lines = index
- # run the parse patten on the line
- for tmp_index, text in logfile_text_dict.items():
- res = mp_pool.apply_async(search_buildlog, (log_search_pattern_list, logfile_text_dict, tmp_index, max_text_lines,))
- if res.get():
- print(json.dumps(res.get()))
- mp_pool.close()
- mp_pool.join()
diff --git a/buildbot_gentoo_ci/steps/logs.py b/buildbot_gentoo_ci/steps/logs.py
index 6bd3279..e139cf9 100644
--- a/buildbot_gentoo_ci/steps/logs.py
+++ b/buildbot_gentoo_ci/steps/logs.py
@@ -86,6 +86,12 @@ class SetupPropertys(BuildStep):
self.setProperty("default_project_data", default_project_data, 'default_project_data')
self.setProperty("version_data", version_data, 'version_data')
self.setProperty("status", 'completed', 'status')
+ if self.getProperty('faild_cpv'):
+ log_cpv = self.getProperty('faild_cpv')
+ else:
+ log_cpv = self.getProperty('cpv')
+ self.setProperty("log_cpv", log_cpv, 'log_cpv')
+ self.descriptionDone = 'Runing log checker on ' + log_cpv
return SUCCESS
class SetupParserBuildLoger(BuildStep):
@@ -102,24 +108,42 @@ class SetupParserBuildLoger(BuildStep):
@defer.inlineCallbacks
def run(self):
+ self.aftersteps_list = []
workdir = yield os.path.join(self.master.basedir, 'workers', self.getProperty('build_workername'), str(self.getProperty("project_build_data")['buildbot_build_id']))
- if self.getProperty('faild_cpv'):
- log_cpv = self.getProperty('log_build_data')[self.getProperty('faild_cpv')]
- else:
- log_cpv = self.getProperty('log_build_data')[self.getProperty('cpv')]
+ log_cpv = self.getProperty('log_build_data')[self.getProperty('log_cpv')]
+ mastersrc_log = yield os.path.join(workdir, log_cpv['full_logname'])
+ log_py = 'log_parser.py'
+ config_log_py = 'logparser.json'
+ mastersrc_py = yield os.path.join(self.master.basedir, log_py)
+ mastersrc_config = yield os.path.join(self.master.basedir, config_log_py)
+ # Upload logfile to worker
+ self.aftersteps_list.append(steps.FileDownload(
+ mastersrc=mastersrc_log,
+ workerdest=log_cpv['full_logname']
+ ))
+ # Upload log parser py code
+ self.aftersteps_list.append(steps.FileDownload(
+ mastersrc=mastersrc_py,
+ workerdest=log_py
+ ))
+ # Upload log parser py config
+ self.aftersteps_list.append(steps.FileDownload(
+ mastersrc=mastersrc_config,
+ workerdest=config_log_py
+ ))
+ # Run the log parser code
command = []
- command.append('ci_log_parser')
+ command.append('python3')
+ command.append(log_py)
command.append('-f')
command.append(log_cpv['full_logname'])
command.append('-u')
command.append(self.getProperty('project_data')['uuid'])
- self.aftersteps_list = []
- self.aftersteps_list.append(master_steps.MasterSetPropertyFromCommand(
+ self.aftersteps_list.append(steps.SetPropertyFromCommand(
name = 'RunBuildLogParser',
haltOnFailure = True,
flunkOnFailure = True,
command=command,
- workdir=workdir,
strip=False,
extract_fn=PersOutputOfLogParser
))
@@ -296,19 +320,15 @@ class MakeIssue(BuildStep):
separator1 = '\n'
separator2 = ' '
log = yield self.addLog('issue')
- if self.getProperty('faild_cpv'):
- cpv = self.getProperty('faild_cpv')
- else:
- cpv = self.getProperty('cpv')
- self.error_dict['cpv'] = cpv
+ self.error_dict['cpv'] = self.getProperty('log_cpv')
yield log.addStdout('Title:' + '\n')
- yield log.addStdout(separator2.join([cpv, '-', self.error_dict['title']]) + separator1)
+ yield log.addStdout(separator2.join([self.getProperty('log_cpv'), '-', self.error_dict['title']]) + separator1)
yield log.addStdout('Summary:' + '\n')
for line in self.summary_log_list:
yield log.addStdout(line + '\n')
yield log.addStdout('Attachments:' + '\n')
yield log.addStdout('emerge_info.log' + '\n')
- log_cpv = self.getProperty('log_build_data')[cpv]
+ log_cpv = self.getProperty('log_build_data')[self.getProperty('log_cpv')]
yield log.addStdout(log_cpv['full_logname'] + '\n')
yield log.addStdout('world.log' + '\n')
@@ -389,9 +409,9 @@ class setBuildbotLog(BuildStep):
yield log.addStdout(line + '\n')
return SUCCESS
-class SetupParserEmergeInfoLog(BuildStep):
+class ReadEmergeInfoLog(BuildStep):
- name = 'SetupParserEmergeInfoLog'
+ name = 'ReadEmergeInfoLog'
description = 'Running'
descriptionDone = 'Ran'
descriptionSuffix = None
@@ -404,21 +424,22 @@ class SetupParserEmergeInfoLog(BuildStep):
@defer.inlineCallbacks
def run(self):
+ emerge_info_output = {}
+ emerge_info_list = []
+ emerge_package_info = []
+ # Read the file and add it to a property
workdir = yield os.path.join(self.master.basedir, 'workers', self.getProperty('build_workername'), str(self.getProperty("project_build_data")['buildbot_build_id']))
- command = []
- command.append('cat')
- command.append('emerge_info.txt')
- self.aftersteps_list = []
- self.aftersteps_list.append(master_steps.MasterSetPropertyFromCommand(
- name = 'RunEmergeInfoLogParser',
- haltOnFailure = True,
- flunkOnFailure = True,
- command=command,
- workdir=workdir,
- strip=False,
- extract_fn=PersOutputOfEmergeInfo
- ))
- yield self.build.addStepsAfterCurrentStep(self.aftersteps_list)
+ with open(os.path.join(workdir, 'emerge_info.txt'), encoding='utf-8') as source:
+ emerge_info = source.read()
+ # set emerge_info_output Property
+ for line in emerge_info.split('\n'):
+ if line.startswith('['):
+ emerge_package_info.append(line)
+ else:
+ emerge_info_list.append(line)
+ emerge_info_output['emerge_info'] = emerge_info_list
+ emerge_info_output['emerge_package_info'] = emerge_package_info
+ self.setProperty("emerge_info_output", emerge_info_output, 'emerge_info_output')
return SUCCESS
class setEmergeInfoLog(BuildStep):
@@ -483,10 +504,7 @@ class Upload(BuildStep):
@defer.inlineCallbacks
def run(self):
- if self.getProperty('faild_cpv'):
- log_cpv = self.getProperty('log_build_data')[self.getProperty('faild_cpv')]
- else:
- log_cpv = self.getProperty('log_build_data')[self.getProperty('cpv')]
+ log_cpv = self.getProperty('log_build_data')[self.getProperty('log_cpv')]
bucket = self.getProperty('project_data')['uuid'] + '-' + 'logs'
file_path = yield os.path.join(self.master.basedir, 'workers', self.getProperty('build_workername'), str(self.getProperty("project_build_data")['buildbot_build_id']) ,log_cpv['full_logname'])
aftersteps_list = []
diff --git a/py/log_parser.py b/py/log_parser.py
new file mode 100644
index 0000000..dd48295
--- /dev/null
+++ b/py/log_parser.py
@@ -0,0 +1,159 @@
+# Copyright 2022 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+import sys
+from multiprocessing import Pool, cpu_count
+import re
+import io
+import gzip
+import json
+import os
+from sqlalchemy.ext.declarative import declarative_base
+import sqlalchemy as sa
+import argparse
+
+Base = declarative_base()
+
+class ProjectsPattern(Base):
+ __tablename__ = "projects_pattern"
+ id = sa.Column(sa.Integer, primary_key=True)
+ project_uuid = sa.Column(sa.String(36), nullable=False)
+ search = sa.Column(sa.String(50), nullable=False)
+ start = sa.Column(sa.Integer, default=0)
+ end = sa.Column(sa.Integer, default=0)
+ status = sa.Column(sa.Enum('info', 'warning', 'ignore', 'error'), default='info')
+ type = sa.Column(sa.Enum('info', 'qa', 'compile', 'configure', 'install', 'postinst', 'prepare', 'pretend', 'setup', 'test', 'unpack', 'ignore', 'issues', 'misc', 'elog'), default='info')
+ search_type = sa.Column(sa.Enum('in', 'startswith', 'endswith', 'search'), default='in')
+
+def get_pattern_dict(project_pattern):
+ patten_dict = {}
+ patten_dict['id'] = project_pattern.id
+ patten_dict['project_uuid'] = project_pattern.project_uuid
+ patten_dict['search'] = project_pattern.search
+ patten_dict['status'] = project_pattern.status
+ patten_dict['type'] = project_pattern.type
+ return patten_dict
+
+def addPatternToList(Session, log_search_pattern, uuid):
+ for project_pattern in Session.query(ProjectsPattern).filter_by(project_uuid=uuid).all():
+ # check if the search pattern is vaild
+ project_pattern_search = project_pattern.search
+ try:
+ re.compile(project_pattern_search)
+ except re.error:
+ print("Non valid regex pattern")
+ print(project_pattern.search)
+ print(project_pattern.id)
+ else:
+ if project_pattern.type == 'ignore':
+ log_search_pattern['ignore'].append(get_pattern_dict(project_pattern))
+ if project_pattern.type == 'test':
+ log_search_pattern['test'].append(get_pattern_dict(project_pattern))
+ else:
+ log_search_pattern['default'].append(get_pattern_dict(project_pattern))
+ return log_search_pattern
+
+def get_log_search_pattern(Session, uuid, default_uuid):
+ # get pattern from the projects and add that to log_search_pattern
+ log_search_pattern = {}
+ log_search_pattern['ignore'] = []
+ log_search_pattern['default'] = []
+ log_search_pattern['test'] = []
+ log_search_pattern = addPatternToList(Session, log_search_pattern, uuid)
+ log_search_pattern = addPatternToList(Session, log_search_pattern, default_uuid)
+ return log_search_pattern
+
+def get_search_pattern_match(log_search_pattern, text_line):
+ for search_pattern in log_search_pattern:
+ if re.search(search_pattern['search'], text_line):
+ return search_pattern
+ return False
+
+def search_buildlog(log_search_pattern, text_line, index):
+ summary = {}
+ #FIXME: add check for test
+ # don't log ignore lines
+ if get_search_pattern_match(log_search_pattern['ignore'], text_line):
+ return False
+ # search default pattern
+ search_pattern_match = get_search_pattern_match(log_search_pattern['default'], text_line)
+ if search_pattern_match:
+ summary[index] = dict(
+ text = text_line,
+ type = search_pattern_match['type'],
+ status = search_pattern_match['status'],
+ id = search_pattern_match['id'],
+ search_pattern = search_pattern_match['search']
+ )
+ return summary
+ # we add all line that start with ' * ' or '>>>' as info
+ if text_line.startswith(' * ') or text_line.startswith('>>>'):
+ summary[index] = dict(
+ text = text_line,
+ type = 'info',
+ status = 'info',
+ id = 0,
+ search_pattern = 'auto'
+ )
+ return summary
+ return False
+
+def getConfigSettings():
+ #configpath = os.getcwd()
+ with open('logparser.json') as f:
+ config = json.load(f)
+ return config
+
+def getDBSession(config):
+ engine = sa.create_engine(config['database'])
+ Session = sa.orm.sessionmaker(bind = engine)
+ return Session()
+
+def getMultiprocessingPool(config):
+ return Pool(processes = int(config['core']))
+
+def getJsonResult(results):
+ for r in results:
+ try:
+ value = r.get()
+ except Exception as e:
+ print(f'Failed with: {e}')
+ else:
+ if value:
+ print(json.dumps(value), flush=True)
+
+def runLogParser(args):
+ index = 1
+ logfile_text_dict = {}
+ config = getConfigSettings()
+ Session = getDBSession(config)
+ #mp_pool = getMultiprocessingPool(config)
+ summary = {}
+ #NOTE: The patten is from https://github.com/toralf/tinderbox/tree/master/data files.
+ # Is stored in a db instead of files.
+ log_search_pattern = get_log_search_pattern(Session, args.uuid, config['default_uuid'])
+ Session.close()
+ # read the log file to dict
+ for text_line in io.TextIOWrapper(io.BufferedReader(gzip.open(args.file)), encoding='utf8', errors='ignore'):
+ logfile_text_dict[index] = text_line.strip('\n')
+ index = index + 1
+ # run the search parse pattern on the text lines
+ #params = [(log_search_pattern, text, line_index,) for line_index, text in logfile_text_dict.items()]
+ with getMultiprocessingPool(config) as pool:
+ results = list(pool.apply_async(search_buildlog, args=(log_search_pattern, text, line_index,)) for line_index, text in logfile_text_dict.items())
+ #results = pool.starmap(search_buildlog, params)
+ getJsonResult(results)
+ pool.close()
+ pool.join()
+
+def main():
+# get filename, project_uuid default_project_uuid
+ parser = argparse.ArgumentParser()
+ parser.add_argument("-f", "--file", required=True)
+ parser.add_argument("-u", "--uuid", required=True)
+ args = parser.parse_args()
+ runLogParser(args)
+ sys.exit()
+
+if __name__ == "__main__":
+ main()