Usuari:TronaBot/Python/ortobot.py

    De Viquipèdia

    Mòdul article requerix el fitxer Usuari:TronaBot/Python/common.py.

    # -*- coding: utf-8 -*-
    
    import sys, os, time, random, re
    from platform import system as platfsys
    import webbrowser
    from platform import system as platfsys
    on_win = platfsys().lower() == "windows"
    home = on_win and r"E:\\iShare\SugarSync\My Python scripts" \
    	   or "/home/pasqual/public_html/"
    for folder in ("pywikilib", "pyuserlib"):
    	sys.path.append(os.path.join(home, folder))
    
    #pywikilib
    import wikipedia as pywikilib, pagegenerators as pg
    from replace import ReplaceRobot
    from pywikibot import i18n
    import editarticle
    
    # Imports predefined replacements tasks from fixes.py
    import fixes
    
    #user implementations
    from common import ArgumentHandler, File
    from common import format_string, get_diffs
    
    class Replacing(ReplaceRobot):
    	def __init__(
    			self, generator, replacements, exceptions={}, acceptall=False,
    			allowoverlap=False, recursive=False, addedCat=None, sleep=None,
    			editSummary='', articles=None, exctitles=None, categories=None
    		):
    		self.generator = generator
    		self.replacements = replacements
    		self.exceptions = exceptions
    		self.acceptall = acceptall
    		self.allowoverlap = allowoverlap
    		self.recursive = recursive
    		if addedCat:
    			site = pywikibot.getSite()
    			self.addedCat = pywikibot.Page(site, addedCat, defaultNamespace=14)
    		self.sleep = sleep
    		# Some function to set default editSummary should probably be added
    		self.edit_summary = editSummary
    		self.articles = articles
    		self.exctitles = exctitles
    
    		# An edit counter to split the file by 100 titles if -save or -savenew
    		# is on, and to display the number of edited articles otherwise.
    		self.editcounter = 0
    		# A counter for saved exceptions
    		self.exceptcounter = 0
    		ReplaceRobot.__init__(
    			self, generator, replacements, exceptions, acceptall, allowoverlap,
    			recursive, addedCat, sleep, editSummary, articles, exctitles
    		)
    		self.categories = categories
    		self.counter = {}
    
    	def doReplacements(self, original_text):
    		"""
    		Returns the text which is generated by applying all replacements to
    		the given text.
    		"""
    		new_text = old_text = original_text
    		exceptions = []
    		if "inside-tags" in self.exceptions:
    			exceptions += self.exceptions['inside-tags']
    		if "inside" in self.exceptions:
    			exceptions += self.exceptions['inside']
    		#reinitializing variables
    		for r in self.replacements:
    			self.counter[r[-1]]=0
    
    		for old, new, repl in self.replacements:
    			if isinstance(new, list): new = random.choice(new)
    			if self.sleep is not None:
    				time.sleep(self.sleep)
    			new_text = pywikilib.replaceExcept(
    				new_text, old, new, exceptions, allowoverlap=self.allowoverlap
    			)
    			if "<:de:>" in new_text:
    				tpls = re.findall(ur"(<:de:> )(\w+)", new_text, re.U)
    				for tpl in tpls:
    					prep = tpl[0]
    					word = tpl[1]
    					if re.match("[aeiouàèéíòóúh]", word):
    						new_text=new_text.replace(u"<:de:> %s" % word,"d'%s" %word)
    					else:
    						new_text=new_text.replace(u"<:de:> %s" % word,"de %s" %word)
    			if new_text !=  old_text:
    				added, removed, kept = get_diffs(new_text, old_text)
    				self.counter[repl]+=len(added)
    				old_text = new_text
    
    		if new_text !=  original_text:
    			replacements = sorted(reversed([(c, r) for r, c in self.counter.items() if c>0]))
    			replacements = ["%s (%i)" % (self.categories.get(r,r),c) for c,r in replacements]
    			replacements = " i ".join(c for c in [", ".join(replacements[:-1]), replacements[-1]] if c)
    			self.editSummary = format_string("$1 $2", self.edit_summary, replacements)
    			msg = format_string("&ysummary: $1", self.editSummary)
    			pywikilib.output(msg)
    
    		return new_text
    
    	def run(self):
    		"""
    		Starts the robot.
    		"""
    		# Run the generator which will yield Pages which might need to be
    		# changed.
    		for page in self.generator:
    			if self.isTitleExcepted(page.title()):
    				pywikilib.output(
    					u'Skipping %s because the title is on the exceptions list.'
    					% page.title(asLink=True))
    				continue
    			#--això
    			try:File("lastpage", "ortobot").backup(page.title())
    			except:pass
    			try:
    				# Load the page's text from the wiki
    				original_text = page.get(get_redirect=True)
    				if not (self.articles or page.canBeEdited()):
    					pywikilib.output(u"You can't edit page %s"
    									 % page.title(asLink=True))
    					continue
    			except pywikilib.NoPage:
    				pywikilib.output(u'Page %s not found' % page.title(asLink=True))
    				continue
    			new_text = original_text
    			while True:
    				if self.isTextExcepted(new_text):
    					pywikilib.output(
    	u'Skipping %s because it contains text that is on the exceptions list.'
    									 % page.title(asLink=True))
    					break
    				new_text = self.doReplacements(new_text)
    				if new_text == original_text:
    					pywikilib.output(u'No changes were necessary in %s'
    									 % page.title(asLink=True))
    					break
    				if self.recursive:
    					newest_text = self.doReplacements(new_text)
    					while (newest_text!=new_text):
    						new_text = newest_text
    						newest_text = self.doReplacements(new_text)
    				if hasattr(self, "addedCat"):
    					cats = page.categories()
    					if self.addedCat not in cats:
    						cats.append(self.addedCat)
    						new_text = pywikilib.replaceCategoryLinks(new_text,
    																  cats)
    				# Show the title of the page we're working on.
    				# Highlight the title in purple.
    				pywikilib.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
    								 % page.title())
    				pywikilib.showDiff(original_text, new_text)
    				if self.acceptall:
    					break
    				if self.exctitles:
    					choice = pywikilib.inputChoice(
    							u'Do you want to accept these changes?',
    							['Yes', 'No', 'no+eXcept', 'Edit',
    							 'open in Browser', 'All', 'Quit'],
    							['y', 'N', 'x', 'e', 'b', 'a', 'q'], 'N')
    				else:
    					choice = pywikilib.inputChoice(
    							u'Do you want to accept these changes?',
    							['Yes', 'No', 'Edit', 'open in Browser', 'All',
    							 'Quit'],
    							['y', 'N', 'e', 'b', 'a', 'q'], 'N')
    				if choice == 'e':
    					editor = editarticle.TextEditor()
    					as_edited = editor.edit(original_text)
    					# if user didn't press Cancel
    					if as_edited and as_edited != new_text:
    						new_text = as_edited
    					continue
    				if choice == 'b':
    					webbrowser.open("http://%s%s" % (
    						page.site.hostname(),
    						page.site.nice_get_address(page.title())
    					))
    					i18n.input('pywikilib-enter-finished-browser')
    					try:
    						original_text = page.get(get_redirect=True, force=True)
    					except pywikilib.NoPage:
    						pywikilib.output(u'Page %s has been deleted.'
    										 % page.title())
    						break
    					new_text = original_text
    					continue
    				if choice == 'q':
    					self.writeEditCounter()
    					self.writeExceptCounter()
    					return
    				if choice == 'a':
    					self.acceptall = True
    				if choice == 'x': #May happen only if self.exctitles isn't None
    					self.exctitles.write(
    						u"ur'^%s$',\n" % re.escape(page.title()))
    					self.exctitles.flush()
    					self.exceptcounter += 1
    				if choice == 'y':
    					if not self.articles:
    						# Primary behaviour: working on wiki
    						page.put_async(new_text, self.editSummary)
    						self.editcounter += 1
    						# Bug: this increments even if put_async fails
    						# This is separately in two clauses of if for
    						# future purposes to get feedback form put_async
    					else:
    						#Save the title for later processing instead of editing
    						self.editcounter += 1
    						self.articles.write(u'#%s\n%s'
    									% (page.title(asLink=True, textlink=True),
    									   self.splitLine()))
    						self.articles.flush() # For the peace of our soul :-)
    				# choice must be 'N'
    				break
    			if self.acceptall and new_text != original_text:
    				if not self.articles:
    					#Primary behaviour: working on wiki
    					try:
    						page.put(new_text, self.editSummary)
    						self.editcounter += 1 #increment only on success
    					except pywikilib.EditConflict:
    						pywikilib.output(u'Skipping %s because of edit conflict'
    										 % (page.title(),))
    					except pywikilib.SpamfilterError, e:
    						pywikilib.output(
    							u'Cannot change %s because of blacklist entry %s'
    							% (page.title(), e.url))
    					except pywikilib.PageNotSaved, error:
    						pywikilib.error(u'putting page: %s'
    										% (error.args,))
    					except pywikilib.LockedPage:
    						pywikilib.output(u'Skipping %s (locked page)'
    										 % (page.title(),))
    				else:
    					#Save the title for later processing instead of editing
    					self.editcounter += 1
    					self.articles.write(u'#%s\n%s'
    									% (page.title(asLink=True, textlink=True),
    									   self.splitLine()))
    					self.articles.flush()
    
    		#Finally:
    		self.writeEditCounter()
    		self.writeExceptCounter()
    
    def main():
    	add_cat = None
    	gen = None
    	# summary message
    	summary_commandline = False
    	# Array which will collect commandline parameters.
    	# First element is original text, second element is replacement text.
    	commandline_replacements = []
    	# A list of 2-tuples of original text and replacement text.
    	replacements = []
    	# Don't edit pages which contain certain texts.
    	exceptions = {
    		'title':         [],
    		'text-contains': [],
    		'inside':        [],
    		'inside-tags':   [],
    		'require-title': [], # using a seperate requirements dict needs some
    	}                        # major refactoring of code.
    
    	# Should the elements of 'replacements' and 'exceptions' be interpreted
    	# as regular expressions?
    	regex = False
    	# Predefined fixes from dictionary 'fixes' (see above).
    	fix = None
    
    	# the dump's path, either absolute or relative, which will be used
    	# if -xml flag is present
    	xmlFilename = None
    	useSql = False
    	PageTitles = []
    	# will become True when the user presses a ('yes to all') or uses the
    	# -always flag.
    	acceptall = False
    	# Will become True if the user inputs the commandline parameter -nocase
    	caseInsensitive = False
    	# Will become True if the user inputs the commandline parameter -dotall
    	dotall = False
    	# Will become True if the user inputs the commandline parameter -multiline
    	multiline = False
    	# Do all hits when they overlap
    	allowoverlap = False
    	# Do not recurse replacement
    	recursive = False
    	# This is the maximum number of pages to load per query
    	maxquerysize = 60
    	# This factory is responsible for processing command line arguments
    	# that are also used by other scripts and that determine on which pages
    	# to work on.
    	genFactory = pg.GeneratorFactory()
    	# Load default summary message.
    	# BUG WARNING: This is probably incompatible with the -lang parameter.
    	editSummary = i18n.twtranslate(pywikilib.getSite(), 'replace-replacing',
    								   {'description': u''})
    	# Between a regex and another (using -fix) sleep some time (not to waste
    	# too much CPU
    	sleep = None
    	# Do not save the page titles, rather work on wiki
    	filename = None # The name of the file to save titles
    	titlefile = None # The file object itself
    	# If we save, primary behaviour is append rather then new file
    	append = True
    	# Default: don't write titles to exception file and don't read them.
    	excoutfilename = None # The name of the file to save exceptions
    	excoutfile = None # The file object itself
    	# excinfilename: reserved for later use (reading back exceptions)
    	# If we save exceptions, primary behaviour is append
    	excappend = True
    
    	#abredged name for the summary specification --això
    	categories = None
    
    	# Read commandline parameters.
    	if args.regex:
    		regex = True
    	if args.xmlstart:
    		if isinstance(args.xmlstart, bool):
    			xmlStart = pywikilib.input(
    				u'Please enter the dumped article to start with:')
    		else:
    			xmlStart = args.xmlstart
    	if args.xml:
    		if isinstance(args.xml, bool):
    			xmlFilename = i18n.input('pywikibot-enter-xml-filename')
    		else:
    			xmlFilename = args.xml
    	if args.sql:
    		useSql = True
    	if args.page:
    		PageTitles = args.page if isinstance(args.page, list) else [args.page]
    		for page in list(PageTitles):
    			if isinstance(page, bool):
    				PageTitles.remove(page)
    				PageTitles.append(pywikilib.input(
    					u'Which page do you want to change?'))
    	if args.saveexcnew:
    		excappend = False
    		if isinstance(args.saveexcnew, bool):
    			excoutfilename = pywikilib.input(
    				u'Please enter the filename to save the excepted titles' +
    				u'\n(will be deleted if exists):')
    		else:
    			excoutfilename = args.saveexcnew
    	if args.saveexc:
    		if isinstance(args.saveexc, bool):
    			excoutfilename = pywikilib.input(
    				u'Please enter the filename to save the excepted titles:')
    		else:
    			excoutfilename = args.saveexc
    	if args.savenew:
    		append = False
    		if isinstance(args.savenew, bool):
    			filename = pywikilib.input(
    				u'Please enter the filename to save the titles' +
    				u'\n(will be deleted if exists):')
    		else:
    			filename = args.savenew
    	if args.save:
    		if isinstance(args.save, bool):
    			filename = pywikilib.input(
    				u'Please enter the filename to save the titles:')
    		else:
    			filename = args.save
    	if args.replacementfile:
    		if isinstance(args.replacementfile, bool):
    			replacefile = pywikilib.input(
    				u'Please enter the filename to read replacements from:')
    		else:
    			replacefile = args.replacementfile
    		try:
    			commandline_replacements.extend(
    				[x.lstrip(u'\uFEFF').rstrip('\r\n')
    				for x in codecs.open(replacefile, 'r', 'utf-8')])
    		except IOError:
    			raise pywikilib.Error(
    		   '\n%s cannot be opened. Try again :-)' % replacefile)
    	if args.excepttitle:
    		exceptions['title'] = args.excepttitle
    	if args.requiretitle:
    		exceptions['require-title'] = args.requiretitle
    	if args.excepttext:
    		exceptions['text-contains'] = args.excepttext
    	if args.exceptinside:
    		exceptions['inside'] = args.exceptinside
    	if args.exceptinsidetag:
    		exceptions['inside-tags'] = exceptinsidetag
    	if args.fix:
    		fix = args.fix
    	if args.sleep:
    		sleep = args.sleep
    	if args.always:
    		acceptall = True
    	if args.recursive:
    		recursive = True
    	if args.nocase:
    		caseInsensitive = True
    	if args.dotall:
    		dotall = True
    	if args.multiline:
    		multiline = True
    	if args.addcat:
    		add_cat = args.addcat
    	if args.summary:
    		editSummary = args.summary
    		summary_commandline = True
    	if args.allowoverlap:
    		allowoverlap = True
    	if args.query:
    		maxquerysize = args.query
    	for arg in args.raw:
    		if not genFactory.handleArg(arg) and not arg.startswith("-"):
    			commandline_replacements.append(arg)
    
    	#acaba antic for arg in ...
    	if pywikilib.verbose:
    		pywikilib.output(u"commandline_replacements: " +
    						 ', '.join(commandline_replacements))
    
    	if (len(commandline_replacements) % 2):
    		raise pywikilib.Error, 'require even number of replacements.'
    	elif (len(commandline_replacements) == 2 and fix is None):
    		replacements.append(
    			(commandline_replacements[0], commandline_replacements[1])
    		)
    		if not summary_commandline:
    			editSummary = i18n.twtranslate(
    				pywikilib.getSite(),
    				'replace-replacing',
    				{
    					'description': ' (-%s +%s)'% (
    						commandline_replacements[0],
    						commandline_replacements[1]
    					)
    				}
    			)
    	elif (len(commandline_replacements) > 1):
    		if (fix is None):
    			for i in xrange (0, len(commandline_replacements), 2):
    				replacements.append((commandline_replacements[i],
    									 commandline_replacements[i + 1]))
    			if not summary_commandline:
    				pairs = [
    					(
    						commandline_replacements[i],
    						commandline_replacements[i + 1]
    					) for i in range(0, len(commandline_replacements), 2)
    				]
    				replacementsDescription = '(%s)' % ', '.join(
    					[('-' + pair[0] + ' +' + pair[1]) for pair in pairs]
    				)
    				editSummary = i18n.twtranslate(
    					pywikilib.getSite(),
    					'replace-replacing',
    					{
    						'description': replacementsDescription
    					}
    				)
    		else:
    			raise pywikilib.Error(
    				'Specifying -fix with replacements is undefined'
    			)
    	elif fix is None:
    		old = pywikilib.input(u'Please enter the text that should be replaced:')
    		new = pywikilib.input(u'Please enter the new text:')
    		change = '(-' + old + ' +' + new
    		replacements.append((old, new))
    		while True:
    			old = pywikilib.input(
    					u'Please enter another text that should be replaced,' +
    					u'\nor press Enter to start:')
    			if old == '':
    				change += ')'
    				break
    			new = i18n.input('pywikibot-enter-new-text')
    			change += ' & -%s +%s' % (old, new)
    			replacements.append((old, new))
    		if not summary_commandline:
    			default_summary_message = i18n.twtranslate(pywikilib.getSite(),
    													   'replace-replacing',
    													   {'description': change})
    			pywikilib.output(u'The summary message will default to: %s'
    							 % default_summary_message)
    			summary_message = pywikilib.input(
    				u'Press Enter to use this default message, or enter a ' +
    				u'description of the\nchanges your bot will make:')
    			if summary_message == '':
    				summary_message = default_summary_message
    			editSummary = summary_message
    
    	else:
    		# Perform one of the predefined actions.
    		fixname = fix # Save the name for passing to exceptions function.
    		try:
    			fix = fixes.fixes[fix]
    		except KeyError:
    			pywikilib.output(u'Available predefined fixes are: %s'
    							 % fixes.fixes.keys())
    			return
    		if "regex" in fix:
    			regex = fix['regex']
    		if "msg" in fix:
    			if isinstance(fix['msg'], basestring):
    				editSummary = i18n.twtranslate(pywikilib.getSite(),
    											   str(fix['msg']))
    			else:
    				editSummary = pywikilib.translate(pywikilib.getSite(),
    												  fix['msg'])
    		#--això
    		if fix.get("categories"):
    			categories = fix['categories']
    
    		if "exceptions" in fix:
    			exceptions = fix['exceptions']
    			# Try to append common extensions for multiple fixes.
    			# It must be either a dictionary or a function that returns a dict.
    			if 'include' in exceptions:
    				incl = exceptions['include']
    				if callable(incl):
    					baseExcDict = incl(fixname)
    				else:
    					try:
    						baseExcDict = incl
    					except NameError:
    						pywikilib.output(
    						  u'\nIncluded exceptions dictionary does not exist.' +
    						  u' Continuing with the exceptions\ngiven in fix.\n')
    						baseExcDict = None
    				if baseExcDict:
    					for l in baseExcDict:
    						try:
    							exceptions[l].extend(baseExcDict[l])
    						except KeyError:
    							exceptions[l] = baseExcDict[l]
    		if "recursive" in fix:
    			recursive = fix['recursive']
    		if "nocase" in fix:
    			caseInsensitive = fix['nocase']
    		try:
    			replacements = fix['replacements']
    			# enable regex/replacements as a dictionary for different langs
    			if isinstance(replacements, dict):
    				replacements = replacements[pywikilib.getSite().lang]
    		except KeyError:
    			pywikilib.output(
    				u"No replacements given in fix.")
    			return
    
    	# Set the regular expression flags
    	flags = re.UNICODE
    	if caseInsensitive:
    		flags = flags | re.IGNORECASE
    	if dotall:
    		flags = flags | re.DOTALL
    	if multiline:
    		flags = flags | re.MULTILINE
    
    	# Pre-compile all regular expressions here to save time later
    	for i in range(len(replacements)):
    		old, new, topic = replacements[i]
    		if not regex:
    			old = re.escape(old)
    		oldR = re.compile(old, flags)
    		replacements[i] = oldR, new, topic
    
    	for exceptionCategory in [
    						'title', 'require-title', 'text-contains', 'inside']:
    		if exceptionCategory in exceptions:
    			patterns = exceptions[exceptionCategory]
    			if not regex:
    				patterns = [re.escape(pattern) for pattern in patterns]
    			patterns = [re.compile(pattern, flags) for pattern in patterns]
    			exceptions[exceptionCategory] = patterns
    
    	if xmlFilename:
    		try:
    			xmlStart
    		except NameError:
    			xmlStart = None
    		gen = XmlDumpReplacePageGenerator(xmlFilename, xmlStart,
    										  replacements, exceptions)
    	elif useSql:
    		whereClause = 'WHERE (%s)' % ' OR '.join(
    			["old_text RLIKE '%s'" % prepareRegexForMySQL(old.pattern)
    			 for (old, new) in replacements])
    		if exceptions:
    			exceptClause = 'AND NOT (%s)' % ' OR '.join(
    				["old_text RLIKE '%s'" % prepareRegexForMySQL(exc.pattern)
    				 for exc in exceptions])
    		else:
    			exceptClause = ''
    		query = u"""
    SELECT page_namespace, page_title
    FROM page
    JOIN text ON (page_id = old_id)
    %s
    %s
    LIMIT 200""" % (whereClause, exceptClause)
    		gen = pg.MySQLPageGenerator(query)
    	elif PageTitles:
    		pages = [pywikilib.Page(pywikilib.getSite(), PageTitle)
    				 for PageTitle in PageTitles]
    		gen = iter(pages)
    
    	gen = genFactory.getCombinedGenerator(gen)
    	if not gen:
    		# syntax error, show help text from the top of this file
    		pywikilib.showHelp('replace')
    		return
    
    	preloadingGen = pg.PreloadingGenerator(gen, pageNumber=maxquerysize)
    
    	# Finally we open the file for page titles or set parameter article to None
    	if filename:
    		try:
    			# This opens in strict error mode, that means bot will stop
    			# on encoding errors with ValueError.
    			# See http://docs.python.org/library/codecs.html#codecs.open
    			titlefile = codecs.open(filename, encoding='utf-8',
    									mode=(lambda x: x and 'a' or 'w')(append))
    		except IOError:
    			pywikilib.output("%s cannot be opened for writing." %
    							 filename)
    			return
    	# The same process with exceptions file:
    	if excoutfilename:
    		try:
    			excoutfile = codecs.open(
    							excoutfilename, encoding='utf-8',
    							mode=(lambda x: x and 'a' or 'w')(excappend))
    		except IOError:
    			pywikilib.output("%s cannot be opened for writing." %
    							 excoutfilename)
    			return
    	bot = Replacing(preloadingGen, replacements, exceptions, acceptall,
    					   allowoverlap, recursive, add_cat, sleep, editSummary,
    					   titlefile, excoutfile, categories)
    	try:
    		bot.run()
    	finally:
    		# Just for the spirit of programming (they were flushed)
    		if titlefile:
    			titlefile.close()
    		if excoutfile:
    			excoutfile.close()
    
    if __name__ == '__main__':
    	try:
    		#pywikilib.verbose = True
    		args = ArgumentHandler()
    		args.parse_arguments()
    		if args.resume:
    			lvp = File("lastpage", "ortobot").load()
    			pywikilib.output(u'last article: "%s"' % lvp)
    			line = (
    				u'-fix:auto -family:wikipedia_o -recursive -always '
    				u'-query:125 -sleep:1 -start:"%s"' % lvp
    			)
    			args.parse_arguments(line)
    		main()
    	except KeyboardInterrupt:
    		msg = format_string("\n&r(:cancel·lat per l'usuari:)")
    		pywikilib.output(msg)
    	finally:
    		pywikilib.stopme()