pym/repoman/utilities.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631

# repoman: Utilities
# Copyright 2007-2011 Gentoo Foundation
# Distributed under the terms of the GNU General Public License v2

"""This module contains utility functions to help repoman find ebuilds to
scan"""

from __future__ import print_function

__all__ = [
	"detect_vcs_conflicts",
	"editor_is_executable",
	"FindPackagesToScan",
	"FindPortdir",
	"FindVCS",
	"format_qa_output",
	"get_commit_message_with_editor",
	"get_commit_message_with_stdin",
	"have_profile_dir",
	"parse_metadata_use",
	"UnknownHerdsError",
	"check_metadata",
	"UpdateChangeLog"
]

import errno
import io
from itertools import chain
import logging
import pwd
import sys
import time
import textwrap
import difflib
import shutil
from tempfile import mkstemp

from portage import os
from portage import subprocess_getstatusoutput
from portage import _encodings
from portage import _unicode_decode
from portage import _unicode_encode
from portage import output
from portage.localization import _
from portage.output import red, green
from portage.process import find_binary
from portage import exception
from portage import util
normalize_path = util.normalize_path
util.initialize_logger()

if sys.hexversion >= 0x3000000:
	basestring = str

def detect_vcs_conflicts(options, vcs):
	"""Determine if the checkout has problems like cvs conflicts.
	
	If you want more vcs support here just keep adding if blocks...
	This could be better.
	
	TODO(antarus): Also this should probably not call sys.exit() as
	repoman is run on >1 packages and one failure should not cause
	subsequent packages to fail.
	
	Args:
		vcs - A string identifying the version control system in use
	Returns:
		None (calls sys.exit on fatal problems)
	"""
	retval = ("","")
	if vcs == 'cvs':
		logging.info("Performing a " + output.green("cvs -n up") + \
			" with a little magic grep to check for updates.")
		retval = subprocess_getstatusoutput("cvs -n up 2>/dev/null | " + \
			"egrep '^[^\?] .*' | " + \
			"egrep -v '^. .*/digest-[^/]+|^cvs server: .* -- ignored$'")
	if vcs == 'svn':
		logging.info("Performing a " + output.green("svn status -u") + \
			" with a little magic grep to check for updates.")
		retval = subprocess_getstatusoutput("svn status -u 2>&1 | " + \
			"egrep -v '^.  +.*/digest-[^/]+' | " + \
			"head -n-1")

	if vcs in ['cvs', 'svn']:
		mylines = retval[1].splitlines()
		myupdates = []
		for line in mylines:
			if not line:
				continue
			if line[0] not in " UPMARD": # unmodified(svn),Updates,Patches,Modified,Added,Removed/Replaced(svn),Deleted(svn)
				# Stray Manifest is fine, we will readd it anyway.
				if line[0] == '?' and line[1:].lstrip() == 'Manifest':
					continue
				logging.error(red("!!! Please fix the following issues reported " + \
					"from cvs: ")+green("(U,P,M,A,R,D are ok)"))
				logging.error(red("!!! Note: This is a pretend/no-modify pass..."))
				logging.error(retval[1])
				sys.exit(1)
			elif vcs == 'cvs' and line[0] in "UP":
				myupdates.append(line[2:])
			elif vcs == 'svn' and line[8] == '*':
				myupdates.append(line[9:].lstrip(" 1234567890"))

		if myupdates:
			logging.info(green("Fetching trivial updates..."))
			if options.pretend:
				logging.info("(" + vcs + " update " + " ".join(myupdates) + ")")
				retval = os.EX_OK
			else:
				retval = os.system(vcs + " update " + " ".join(myupdates))
			if retval != os.EX_OK:
				logging.fatal("!!! " + vcs + " exited with an error. Terminating.")
				sys.exit(retval)


def have_profile_dir(path, maxdepth=3, filename="profiles.desc"):
	""" 
	Try to figure out if 'path' has a profiles/
	dir in it by checking for the given filename.
	"""
	while path != "/" and maxdepth:
		if os.path.exists(os.path.join(path, "profiles", filename)):
			return normalize_path(path)
		path = normalize_path(path + "/..")
		maxdepth -= 1

def parse_metadata_use(xml_tree):
	"""
	Records are wrapped in XML as per GLEP 56
	returns a dict with keys constisting of USE flag names and values
	containing their respective descriptions
	"""
	uselist = {}

	usetags = xml_tree.findall("use")
	if not usetags:
		return uselist

	# It's possible to have multiple 'use' elements.
	for usetag in usetags:
		flags = usetag.findall("flag")
		if not flags:
			# DTD allows use elements containing no flag elements.
			continue

		for flag in flags:
			pkg_flag = flag.get("name")
			if pkg_flag is None:
				raise exception.ParseError("missing 'name' attribute for 'flag' tag")
			flag_restrict = flag.get("restrict")

			# emulate the Element.itertext() method from python-2.7
			inner_text = []
			stack = []
			stack.append(flag)
			while stack:
				obj = stack.pop()
				if isinstance(obj, basestring):
					inner_text.append(obj)
					continue
				if isinstance(obj.text, basestring):
					inner_text.append(obj.text)
				if isinstance(obj.tail, basestring):
					stack.append(obj.tail)
				stack.extend(reversed(obj))

			if pkg_flag not in uselist:
				uselist[pkg_flag] = {}

			# (flag_restrict can be None)
			uselist[pkg_flag][flag_restrict] = " ".join("".join(inner_text).split())

	return uselist

class UnknownHerdsError(ValueError):
	def __init__(self, herd_names):
		_plural = len(herd_names) != 1
		super(UnknownHerdsError, self).__init__(
			'Unknown %s %s' % (_plural and 'herds' or 'herd',
			','.join('"%s"' % e for e in herd_names)))


def check_metadata_herds(xml_tree, herd_base):
	herd_nodes = xml_tree.findall('herd')
	unknown_herds = [name for name in
			(e.text.strip() for e in herd_nodes if e.text is not None)
			if not herd_base.known_herd(name)]

	if unknown_herds:
		raise UnknownHerdsError(unknown_herds)

def check_metadata(xml_tree, herd_base):
	if herd_base is not None:
		check_metadata_herds(xml_tree, herd_base)

def FindPackagesToScan(settings, startdir, reposplit):
	""" Try to find packages that need to be scanned
	
	Args:
		settings - portage.config instance, preferably repoman_settings
		startdir - directory that repoman was run in
		reposplit - root of the repository
	Returns:
		A list of directories to scan
	"""
	
	
	def AddPackagesInDir(path):
		""" Given a list of dirs, add any packages in it """
		ret = []
		pkgdirs = os.listdir(path)
		for d in pkgdirs:
			if d == 'CVS' or d.startswith('.'):
				continue
			p = os.path.join(path, d)

			if os.path.isdir(p):
				cat_pkg_dir = os.path.join(*p.split(os.path.sep)[-2:])
				logging.debug('adding %s to scanlist' % cat_pkg_dir)
				ret.append(cat_pkg_dir)
		return ret
	
	scanlist = []
	repolevel = len(reposplit)
	if repolevel == 1: # root of the tree, startdir = repodir
		for cat in settings.categories:
			path = os.path.join(startdir, cat)
			if not os.path.isdir(path):
				continue
			pkgdirs = os.listdir(path)
			scanlist.extend(AddPackagesInDir(path))
	elif repolevel == 2: # category level, startdir = catdir
		# we only want 1 segment of the directory, is why we use catdir instead of startdir
		catdir = reposplit[-2]
		if catdir not in settings.categories:
			logging.warn('%s is not a valid category according to profiles/categories, ' \
				'skipping checks in %s' % (catdir, catdir))
		else:
			scanlist = AddPackagesInDir(catdir)
	elif repolevel == 3: # pkgdir level, startdir = pkgdir
		catdir = reposplit[-2]
		pkgdir = reposplit[-1]
		if catdir not in settings.categories:
			logging.warn('%s is not a valid category according to profiles/categories, ' \
			'skipping checks in %s' % (catdir, catdir))
		else:
			path = os.path.join(catdir, pkgdir)
			logging.debug('adding %s to scanlist' % path)
			scanlist.append(path)
	return scanlist


def format_qa_output(formatter, stats, fails, dofull, dofail, options, qawarnings):
	"""Helper function that formats output properly
	
	Args:
		formatter - a subclass of Formatter
		stats - a dict of qa status items
		fails - a dict of qa status failures
		dofull - boolean to print full results or a summary
		dofail - boolean to decide if failure was hard or soft
	
	Returns:
		None (modifies formatter)
	"""
	full = options.mode == 'full'
	# we only want key value pairs where value > 0 
	for category, number in \
		filter(lambda myitem: myitem[1] > 0, iter(stats.items())):
		formatter.add_literal_data(_unicode_decode("  " + category.ljust(30)))
		if category in qawarnings:
			formatter.push_style("WARN")
		else:
			formatter.push_style("BAD")
		formatter.add_literal_data(_unicode_decode(str(number)))
		formatter.pop_style()
		formatter.add_line_break()
		if not dofull:
			if not full and dofail and category in qawarnings:
				# warnings are considered noise when there are failures
				continue
			fails_list = fails[category]
			if not full and len(fails_list) > 12:
				fails_list = fails_list[:12]
			for failure in fails_list:
				formatter.add_literal_data(_unicode_decode("   " + failure))
				formatter.add_line_break()


def editor_is_executable(editor):
	"""
	Given an EDITOR string, validate that it refers to
	an executable. This uses shlex_split() to split the
	first component and do a PATH lookup if necessary.

	@param editor: An EDITOR value from the environment.
	@type: string
	@rtype: bool
	@returns: True if an executable is found, False otherwise.
	"""
	editor_split = util.shlex_split(editor)
	if not editor_split:
		return False
	filename = editor_split[0]
	if not os.path.isabs(filename):
		return find_binary(filename) is not None
	return os.access(filename, os.X_OK) and os.path.isfile(filename)


def get_commit_message_with_editor(editor, message=None):
	"""
	Execute editor with a temporary file as it's argument
	and return the file content afterwards.

	@param editor: An EDITOR value from the environment
	@type: string
	@param message: An iterable of lines to show in the editor.
	@type: iterable
	@rtype: string or None
	@returns: A string on success or None if an error occurs.
	"""
	fd, filename = mkstemp()
	try:
		os.write(fd, _unicode_encode(_(
			"\n# Please enter the commit message " + \
			"for your changes.\n# (Comment lines starting " + \
			"with '#' will not be included)\n"),
			encoding=_encodings['content'], errors='backslashreplace'))
		if message:
			os.write(fd, b"#\n")
			for line in message:
				os.write(fd, _unicode_encode("#" + line,
					encoding=_encodings['content'], errors='backslashreplace'))
		os.close(fd)
		retval = os.system(editor + " '%s'" % filename)
		if not (os.WIFEXITED(retval) and os.WEXITSTATUS(retval) == os.EX_OK):
			return None
		try:
			mylines = io.open(_unicode_encode(filename,
				encoding=_encodings['fs'], errors='strict'),
				mode='r', encoding=_encodings['content'], errors='replace'
				).readlines()
		except OSError as e:
			if e.errno != errno.ENOENT:
				raise
			del e
			return None
		return "".join(line for line in mylines if not line.startswith("#"))
	finally:
		try:
			os.unlink(filename)
		except OSError:
			pass


def get_commit_message_with_stdin():
	"""
	Read a commit message from the user and return it.

	@rtype: string or None
	@returns: A string on success or None if an error occurs.
	"""
	print("Please enter a commit message. Use Ctrl-d to finish or Ctrl-c to abort.")
	commitmessage = []
	while True:
		commitmessage.append(sys.stdin.readline())
		if not commitmessage[-1]:
			break
	commitmessage = "".join(commitmessage)
	return commitmessage


def FindPortdir(settings):
	""" Try to figure out what repo we are in and whether we are in a regular
	tree or an overlay.
	
	Basic logic is:
	
	1. Determine what directory we are in (supports symlinks).
	2. Build a list of directories from / to our current location
	3. Iterate over PORTDIR_OVERLAY, if we find a match, search for a profiles directory
		 in the overlay.  If it has one, make it portdir, otherwise make it portdir_overlay.
	4. If we didn't find an overlay in PORTDIR_OVERLAY, see if we are in PORTDIR; if so, set
		 portdir_overlay to PORTDIR.  If we aren't in PORTDIR, see if PWD has a profiles dir, if
		 so, set portdir_overlay and portdir to PWD, else make them False.
	5. If we haven't found portdir_overlay yet, it means the user is doing something odd, report
		 an error.
	6. If we haven't found a portdir yet, set portdir to PORTDIR.
	
	Args:
		settings - portage.config instance, preferably repoman_settings
	Returns:
		list(portdir, portdir_overlay, location)
	"""

	portdir = None
	portdir_overlay = None
	location = os.getcwd()
	pwd = os.environ.get('PWD', '')
	if pwd and pwd != location and os.path.realpath(pwd) == location:
		# getcwd() returns the canonical path but that makes it hard for repoman to
		# orient itself if the user has symlinks in their portage tree structure.
		# We use os.environ["PWD"], if available, to get the non-canonical path of
		# the current working directory (from the shell).
		location = pwd

	location = normalize_path(location)

	path_ids = {}
	p = location
	s = None
	while True:
		s = os.stat(p)
		path_ids[(s.st_dev, s.st_ino)] = p
		if p == "/":
			break
		p = os.path.dirname(p)
	if location[-1] != "/":
		location += "/"

	for overlay in settings["PORTDIR_OVERLAY"].split():
		overlay = os.path.realpath(overlay)
		try:
			s = os.stat(overlay)
		except OSError:
			continue
		overlay = path_ids.get((s.st_dev, s.st_ino))
		if overlay is None:
			continue
		if overlay[-1] != "/":
			overlay += "/"
		if True:
			portdir_overlay = overlay
			subdir = location[len(overlay):]
			if subdir and subdir[-1] != "/":
				subdir += "/"
			if have_profile_dir(location, subdir.count("/")):
				portdir = portdir_overlay
			break

	# Couldn't match location with anything from PORTDIR_OVERLAY,
	# so fall back to have_profile_dir() checks alone. Assume that
	# an overlay will contain at least a "repo_name" file while a
	# master repo (portdir) will contain at least a "profiles.desc"
	# file.
	if not portdir_overlay:
		portdir_overlay = have_profile_dir(location, filename="repo_name")
		if portdir_overlay:
			subdir = location[len(portdir_overlay):]
			if subdir and subdir[-1] != os.sep:
				subdir += os.sep
			if have_profile_dir(location, subdir.count(os.sep)):
				portdir = portdir_overlay

	if not portdir_overlay:
		if (settings["PORTDIR"] + os.path.sep).startswith(location):
			portdir_overlay = settings["PORTDIR"]
		else:
			portdir_overlay = have_profile_dir(location)
		portdir = portdir_overlay
	
	if not portdir_overlay:
		msg = 'Repoman is unable to determine PORTDIR or PORTDIR_OVERLAY' + \
			' from the current working directory'
		logging.critical(msg)
		return (None, None, None)

	if not portdir:
		portdir = settings["PORTDIR"]

	if not portdir_overlay.endswith('/'):
		portdir_overlay += '/'
	
	if not portdir.endswith('/'):
		portdir += '/'

	return [normalize_path(x) for x in (portdir, portdir_overlay, location)]

def FindVCS():
	""" Try to figure out in what VCS' working tree we are. """

	outvcs = []

	def seek(depth = None):
		""" Seek for VCSes that have a top-level data directory only. """
		retvcs = []
		pathprep = ''

		while depth is None or depth > 0:
			if os.path.isdir(os.path.join(pathprep, '.git')):
				retvcs.append('git')
			if os.path.isdir(os.path.join(pathprep, '.bzr')):
				retvcs.append('bzr')
			if os.path.isdir(os.path.join(pathprep, '.hg')):
				retvcs.append('hg')
			if os.path.isdir(os.path.join(pathprep, '.svn')):  # >=1.7
				retvcs.append('svn')

			if retvcs:
				break
			pathprep = os.path.join(pathprep, '..')
			if os.path.realpath(pathprep).strip('/') == '':
				break
			if depth is not None:
				depth = depth - 1

		return retvcs

	# Level zero VCS-es.
	if os.path.isdir('CVS'):
		outvcs.append('cvs')
	if os.path.isdir('.svn'):  # <1.7
		outvcs.append('svn')

	# If we already found one of 'level zeros', just take a quick look
	# at the current directory. Otherwise, seek parents till we get
	# something or reach root.
	if outvcs:
		outvcs.extend(seek(1))
	else:
		outvcs = seek()

	return outvcs

def UpdateChangeLog(pkgdir, category, package, new, removed, changed, msg, pretend):
	""" Write an entry to an existing ChangeLog, or create a new one. """

	# figure out who to write as
	if 'GENTOO_COMMITTER_NAME' in os.environ and \
			'GENTOO_COMMITTER_EMAIL' in os.environ:
		user = '%s <%s>' % (os.environ['GENTOO_COMMITTER_NAME'], \
				os.environ['GENTOO_COMMITTER_EMAIL'])
	elif 'GENTOO_AUTHOR_NAME' in os.environ and \
			'GENTOO_AUTHOR_EMAIL' in os.environ:
		user = '%s <%s>' % (os.environ['GENTOO_AUTHOR_NAME'], \
				os.environ['GENTOO_AUTHOR_EMAIL'])
	elif 'ECHANGELOG_USER' in os.environ:
		user = os.environ['ECHANGELOG_USER']
	else:
		(login, _, _, _, gecos, _, _) = pwd.getpwuid(os.getuid())
		gecos = gecos.split(',')[0]  # bug #80011
		user = '%s <%s@gentoo.org>' % (gecos, login)

	if '<root@' in user:
		err = 'Please set ECHANGELOG_USER or run as non-root'
		logging.critical(err)
		return None

	cl_path = os.path.join(pkgdir, 'ChangeLog')
	f, clnew_path = mkstemp()

	# create an empty ChangeLog.new with correct header first
	try:
		f = os.fdopen(f, 'w+')
		f.write('# ChangeLog for %s/%s\n' % (category, package))
		year = time.strftime('%Y')
		f.write('# Copyright 1999-%s Gentoo Foundation; Distributed under the GPL v2\n' % year)
		f.write('# $Header: $\n')
		f.write('\n')

		# write new ChangeLog entry
		date = time.strftime('%d %b %Y')
		newebuild = False
		for fn in new:
			if not fn.endswith('.ebuild'):
				continue
			ebuild = fn.split(os.sep)[-1][0:-7] 
			f.write('*%s (%s)\n' % (ebuild, date))
			newebuild = True
		if newebuild:
			f.write('\n')
		new = ['+' + elem for elem in new if elem not in ['ChangeLog', 'Manifest']]
		removed = ['-' + elem for elem in removed]
		changed = [elem for elem in changed if elem not in ['ChangeLog', 'Manifest']]
		mesg = '%s; %s %s:' % (date, user, \
				', '.join(chain(new,removed,changed)))
		for line in textwrap.wrap(mesg, 80, \
				initial_indent='  ', subsequent_indent='  ', \
				break_on_hyphens=False):
			f.write('%s\n' % line)
		for line in textwrap.wrap(msg, 80, \
				initial_indent='  ', subsequent_indent='  '):
			f.write('%s\n' % line)

		# append stuff from old ChangeLog
		cl_lines = []
		if os.path.exists(cl_path):
			c = open(cl_path, 'r')
			cl_lines = c.readlines()
			for index, line in enumerate(cl_lines):
				# skip the headers
				if line.startswith('#'):
					# normalise to $Header: $ to avoid pointless diff line
					if line.startswith('# $Header:'):
						cl_lines[index] = '# $Header: $\n'
					continue
				f.write(line)
			c.close()

		# show diff (do we want to keep on doing this, or only when
		# pretend?)
		f.seek(0)
		clnew_lines = f.readlines()
		for line in difflib.unified_diff(cl_lines, clnew_lines, \
				fromfile=cl_path, tofile=cl_path + '.new', n=0):
			print(line.rstrip())
		print()

		f.close()

		if pretend:
			# remove what we've done
			os.remove(clnew_path)
		else:
			# rename ChangeLog.new to ChangeLog
			shutil.move(clnew_path, cl_path)

		if cl_lines == []:
			return True
		else:
			return False
	except IOError as e:
		err = 'Repoman is unable to create/write to Changelog.new file: %s' % (e,)
		logging.critical(err)
		# try to remove if possible
		try:
			os.remove(clnew_path)
		except OSError:
			pass
		return None