Fri Feb 24 00:31:28 UTC 2006  Alberto Bertogli <albertogli@telpin.com.ar>
  * Rewrite encoding handling.
  This patch rewrites fixu8() and affects a couple of places where decode() was
  used directly.
  
  It has the benefit of removing the ugly previous function, replacing it
  with...  well, another ugly function, but with a different kind of uglyness.
  
  It also supports multiple encodings, so if the first one fails, a second one
  is tried (and so on).
  
  There still are some corner cases with file named in alternative encodings,
  but should work much better than the older code.
diff -rN -u old-darcsweb/config.py.sample new-darcsweb/config.py.sample
--- old-darcsweb/config.py.sample	2014-10-30 14:26:56.000000000 +0000
+++ new-darcsweb/config.py.sample	2014-10-30 14:26:56.000000000 +0000
@@ -80,6 +80,9 @@
 	# like 'utf-8' or 'UTF8') if you expect darcsweb to work properly.
 	# This is because to workaround a bug in darcs we need to do some
 	# codec mangling and it needs special cases for UTF8.
+	# You can, optionally, specify multiple encodings; they're tried in
+	# order, and if one fails to decode a string, the next one is tried.
+	# Example: repoencoding = "utf8", "latin1"
 	repoencoding = "latin1"
 
 	# as with the base configuration, the footer is also optional, and it
diff -rN -u old-darcsweb/darcsweb.cgi new-darcsweb/darcsweb.cgi
--- old-darcsweb/darcsweb.cgi	2014-10-30 14:26:56.000000000 +0000
+++ new-darcsweb/darcsweb.cgi	2014-10-30 14:26:56.000000000 +0000
@@ -88,42 +88,24 @@
 
 # I _hate_ this.
 def fixu8(s):
-	openpos = s.find('[_')
-	if openpos < 0:
-		# small optimization to avoid the conversion to utf8 and
-		# entering the loop
-		if type(s) == unicode:
-			# workaround for python < 2.4
-			return s.encode('utf8')
-		else:
-			return s.decode(config.repoencoding).encode('utf8')
+	"""Calls _fixu8(), which does the real work, line by line. Otherwise
+	we choose the wrong encoding for big buffers and end up messing
+	output."""
+	n = []
+	for i in s.split('\n'):
+		n.append(_fixu8(i))
+	return string.join(n, '\n')
+
+def _fixu8(s):
+	if type(s) == unicode:
+		return s.encode('utf8', 'replace')
+	for e in config.repoencoding:
+		try:
+			return s.decode(e).encode('utf8', 'replace')
+		except UnicodeDecodeError:
+			pass
+	raise 'DecodingError', config.repoencoding
 
-	s = s.encode(config.repoencoding).decode('raw_unicode_escape')
-	while openpos >= 0:
-		closepos = s.find('_]', openpos)
-		if closepos < 0:
-			# not closed, probably just luck
-			break
-
-		# middle should be something like 'c3', so we get it by
-		# removing the first three characters ("[_\")
-		middle = s[openpos + 3:closepos]
-		if len(middle) == 2:
-			# now we turn middle into the character "\xc3"
-			char = chr(int(middle, 16))
-
-			# finally, replace s with our new improved string, and
-			# repeat the ugly procedure
-			char = char.decode(config.repoencoding)
-			mn = '[_\\' + middle + '_]'
-			s = s.replace(mn, char, 1)
-		openpos = s.find('[_', openpos + 1)
-
-	if config.repoencoding != 'utf8':
-		s = s.encode('utf8')
-	else:
-		s = s.encode('raw_unicode_escape', 'replace')
-	return s
 
 def escape(s):
 	s = xml_escape(s)
@@ -585,7 +567,7 @@
 	"""Runs darcs on the repodir with the given params, return a file
 	object with its output."""
 	os.chdir(config.repodir)
-	cmd = config.darcspath + "darcs " + params
+	cmd = 'DARCS_DONT_ESCAPE_8BIT=1 ' + config.darcspath + "darcs " + params
 	inf, outf = os.popen4(cmd, 't')
 	darcs_runs.append(params)
 	return outf
@@ -643,6 +625,24 @@
 					return i
 		return ''
 
+class XmlInputWrapper:
+	def __init__(self, fd):
+		self.fd = fd
+		self.times = 0
+		self._read = self.read
+
+	def read(self, *args, **kwargs):
+		self.times += 1
+		if self.times == 1:
+			return '<?xml version="1.0" encoding="utf-8"?>\n'
+		s = self.fd.read(*args, **kwargs)
+		if not s:
+			return s
+		return fixu8(s)
+
+	def close(self, *args, **kwargs):
+		return self.fd.close(*args, **kwargs)
+
 
 # patch parsing, we get them through "darcs changes --xml-output"
 class BuildPatchList(xml.sax.handler.ContentHandler):
@@ -812,7 +812,7 @@
 
 	# get the xml output and parse it
 	xmlf = run_darcs("changes --xml-output " + params)
-	parser.parse(xmlf)
+	parser.parse(XmlInputWrapper(xmlf))
 	xmlf.close()
 
 	return handler
@@ -1009,7 +1009,7 @@
 
 def print_diff(dsrc):
 	for l in dsrc:
-		l = l.decode(config.repoencoding, 'replace').encode('utf-8')
+		l = fixu8(l)
 
 		# remove the trailing newline
 		if len(l) > 1:
@@ -1035,7 +1035,7 @@
 
 def print_darcs_diff(dsrc):
 	for l in dsrc:
-		l = l.decode(config.repoencoding, 'replace').encode('utf-8')
+		l = fixu8(l)
 
 		if not l.startswith("    "):
 			# comments and normal stuff
@@ -2185,7 +2185,13 @@
 				repodir = rdir
 				repodesc = desc
 				repourl = url
-				repoencoding = c.repoencoding
+
+				# repoencoding must be a tuple
+				if c.repoencoding is str:
+					repoencoding = (c.repoencoding, )
+				else:
+					repoencoding = c.repoencoding
+
 				if 'footer' in dir(c):
 					footer = c.footer
 			config.__setattr__(name, tmp_config)