From 88c18ffe936e602c45dcaa7500961623c49697ca Mon Sep 17 00:00:00 2001
From: fuzzyray <fuzzyray@gentoo.org>
Date: Wed, 20 May 2009 21:35:12 +0000
Subject: Handle unicode encoding when dumping to stdout and start migration to
 using StringIO

svn path=/trunk/gentoolkit/; revision=645
---
 bin/glsa-check                  |  5 +++-
 pym/gentoolkit/glsa/__init__.py | 51 +++++++++++++++++++++++++----------------
 2 files changed, 35 insertions(+), 21 deletions(-)

diff --git a/bin/glsa-check b/bin/glsa-check
index f33bc23..9b99451 100644
--- a/bin/glsa-check
+++ b/bin/glsa-check
@@ -5,6 +5,7 @@
 
 import os
 import sys
+import codecs
 try:
 	import portage
 except ImportError:
@@ -199,7 +200,9 @@ for p in params[:]:
 
 glsalist.extend([g for g in params if g not in glsalist])
 
-def summarylist(myglsalist, fd1=sys.stdout, fd2=sys.stderr):
+def summarylist(myglsalist, fd1=sys.stdout, fd2=sys.stderr, encoding="utf-8"):
+	fd1 = codecs.getwriter(encoding)(fd1)
+	fd2 = codecs.getwriter(encoding)(fd2)
 	if not quiet:
 		fd2.write(white("[A]")+" means this GLSA was already applied,\n")
 		fd2.write(green("[U]")+" means the system is not affected and\n")
diff --git a/pym/gentoolkit/glsa/__init__.py b/pym/gentoolkit/glsa/__init__.py
index 313c1f4..b325bd5 100644
--- a/pym/gentoolkit/glsa/__init__.py
+++ b/pym/gentoolkit/glsa/__init__.py
@@ -16,8 +16,10 @@ __author__ = "Marius Mauch <genone@gentoo.org>"
 import os
 import sys
 import urllib
+import codecs
 import re
 import xml.dom.minidom
+from StringIO import StringIO
 
 if sys.version_info[0:2] < (2,3):
 	raise NotImplementedError("Python versions below 2.3 have broken XML code " \
@@ -175,7 +177,7 @@ def getListElements(listnode):
 		rValue.append(getText(li, format="strip"))
 	return rValue
 
-def getText(node, format):
+def getText(node, format, textfd = None):
 	"""
 	This is the main parser function. It takes a node and traverses
 	recursive over the subnodes, getting the text of each (and the
@@ -193,50 +195,58 @@ def getText(node, format):
 					replaces multiple spaces with one space.
 					I{xml} does some more formatting, depending on the
 					type of the encountered nodes.
+	@type	textfd: writable file-like object
+	@param	textfd: the file-like object to write the output to
 	@rtype:		String
 	@return:	the (formatted) content of the node and its subnodes
+			except if textfd was not none
 	"""
-	rValue = ""
+	if not textfd:
+		textfd = StringIO()
+		returnNone = False
+	else:
+		returnNone = True
 	if format in ["strip", "keep"]:
 		if node.nodeName in ["uri", "mail"]:
-			rValue += node.childNodes[0].data+": "+node.getAttribute("link")
+			textfd.write(node.childNodes[0].data+": "+node.getAttribute("link"))
 		else:
 			for subnode in node.childNodes:
 				if subnode.nodeName == "#text":
-					rValue += subnode.data
+					textfd.write(subnode.data)
 				else:
-					rValue += getText(subnode, format)
-	else:
+					getText(subnode, format, textfd)
+	else: # format = "xml"
 		for subnode in node.childNodes:
 			if subnode.nodeName == "p":
 				for p_subnode in subnode.childNodes:
 					if p_subnode.nodeName == "#text":
-						rValue += p_subnode.data.strip()
+						textfd.write(p_subnode.data.strip())
 					elif p_subnode.nodeName in ["uri", "mail"]:
-						rValue += p_subnode.childNodes[0].data
-						rValue += " ( "+p_subnode.getAttribute("link")+" )"
-				rValue += NEWLINE_ESCAPE
+						textfd.write(p_subnode.childNodes[0].data)
+						textfd.write(" ( "+p_subnode.getAttribute("link")+" )")
+				textfd.write(NEWLINE_ESCAPE)
 			elif subnode.nodeName == "ul":
 				for li in getListElements(subnode):
-					rValue += "-"+SPACE_ESCAPE+li+NEWLINE_ESCAPE+" "
+					textfd.write("-"+SPACE_ESCAPE+li+NEWLINE_ESCAPE+" ")
 			elif subnode.nodeName == "ol":
 				i = 0
 				for li in getListElements(subnode):
 					i = i+1
-					rValue += str(i)+"."+SPACE_ESCAPE+li+NEWLINE_ESCAPE+" "
+					textfd.write(str(i)+"."+SPACE_ESCAPE+li+NEWLINE_ESCAPE+" ")
 			elif subnode.nodeName == "code":
-				rValue += getText(subnode, format="keep").replace("\n", NEWLINE_ESCAPE)
-				if rValue[-1*len(NEWLINE_ESCAPE):] != NEWLINE_ESCAPE:
-					rValue += NEWLINE_ESCAPE
+				textfd.write(getText(subnode, format="keep").lstrip().replace("\n", NEWLINE_ESCAPE))
+				textfd.write(NEWLINE_ESCAPE)
 			elif subnode.nodeName == "#text":
-				rValue += subnode.data
+				textfd.write(subnode.data)
 			else:
 				raise GlsaFormatException("Invalid Tag found: ", subnode.nodeName)
+	if returnNone:
+		return None
+	rValue = textfd.getvalue()
 	if format == "strip":
 		rValue = rValue.strip(" \n\t")
 		rValue = re.sub("[\s]{2,}", " ", rValue)
-	# Hope that the utf conversion doesn't break anything else
-	return rValue.encode("utf_8")
+	return rValue
 
 def getMultiTagsText(rootnode, tagname, format):
 	"""
@@ -591,16 +601,17 @@ class Glsa:
 		self.services = self.affected.getElementsByTagName("service")
 		return None
 
-	def dump(self, outstream=sys.stdout):
+	def dump(self, outstream=sys.stdout, encoding="utf-8"):
 		"""
 		Dumps a plaintext representation of this GLSA to I{outfile} or 
 		B{stdout} if it is ommitted. You can specify an alternate
-		I{encoding} if needed (default is latin1).
+		I{encoding} if needed (default is utf-8).
 		
 		@type	outstream: File
 		@param	outfile: Stream that should be used for writing
 						 (defaults to sys.stdout)
 		"""
+		outstream = codecs.getwriter(encoding)(outstream)
 		width = int(self.config["PRINTWIDTH"])
 		outstream.write(center("GLSA %s: \n%s" % (self.nr, self.title), width)+"\n")
 		outstream.write((width*"=")+"\n")
-- 
cgit v1.2.3-65-gdbad