Wikipedia:編集回数の多いウィキペディアンの一覧/一覧データを生成する方法

一覧データを生成する方法の一例を説明します。 このページで生成方法を説明する一覧データのページは、次のとおりです。

このページで説明する方法による一覧の生成には、Javaプログラムコンパイル/実行する方法について、若干の知識が必要となります(高度な知識は必要ありません)。 一覧の生成では、コンピュータを使い、そのコンピュータ上でJava開発/実行環境とJavaプログラムとを使います。 この方法で一覧データを生成する場合、ある程度のメモリを要します。

前提

編集

AnonymousUsers.java

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

class AnonymousUsers {
	
	private Map<String, String> users = new HashMap<String, String>();
	
	private static final String INPUT_FILE_NAME = "anonymous.txt";
	
	public void initialize() throws FileNotFoundException, IOException {
		BufferedReader reader = null;
		try {
			reader = new BufferedReader(new FileReader(INPUT_FILE_NAME));
			final Pattern pattern = Pattern.compile("^\\s*\\d*\\.?\\s*User:");
			while (true) {
				final String line = reader.readLine();
				if (line == null || line.length() == 0) {
					break;
				}
				final Matcher matcher = pattern.matcher(line);
				final String user = matcher.replaceFirst("");
				users.put(user, user);
			}
		} finally {
			if (reader != null) {
				reader.close();
			}
		}
	}

	public boolean contains(String user) {
		return users.containsKey(user);
	}

	public String toString() {
		return users.toString();
	}

}

Namespaces.java

import java.util.HashMap;
import java.util.Map;

class Namespaces {
	
	public static final int MAIN_NAMESPACE = 0;
	
	private final Map<String, Integer> map = new HashMap<String, Integer>();
	
	public void add(String key, int ns) {
		map.put(key, ns);
	}
	
	public int ns(String text) {
		final String NAMESPACE_SEPARATOR = ":";
		if (!text.contains(NAMESPACE_SEPARATOR)) {
			return MAIN_NAMESPACE;
		}
		Integer ns = map.get(text.split(NAMESPACE_SEPARATOR)[0]);
		if (ns == null) {
			return MAIN_NAMESPACE;
		}
		return ns;
	}

}

UnflaggedBots.java

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class UnflaggedBots {

	private Map<String, String> users = new HashMap<String, String>();
	
	private static final String INPUT_FILE_NAME = "unflagged-bots.txt";
	
	public void initialize() throws FileNotFoundException, IOException {
		BufferedReader reader = null;
		try {
			reader = new BufferedReader(new FileReader(INPUT_FILE_NAME));
			final Pattern pattern = Pattern.compile("^\\s*\\d*\\.?\\s*");
			while (true) {
				final String line = reader.readLine();
				if (line == null || line.length() == 0) {
					break;
				}
				final Matcher matcher = pattern.matcher(line);
				final String user = matcher.replaceFirst("");
				users.put(user, user);
			}
		} finally {
			if (reader != null) {
				reader.close();
			}
		}
	}

	public boolean contains(String user) {
		return users.containsKey(user);
	}

	public String toString() {
		return users.toString();
	}

}

User.java

class User {
	
	private int id = 0;
	
	private String text = null;
	
	private int edits = 0;

	private int editsInRecentDays = 0;

	private int editsMain = 0;

	private int editsMainInRecentDays = 0;

	public int getId() {
		return id;
	}

	public void setId(int id) {
		this.id = id;
	}

	public String getText() {
		return text;
	}
	
	public void setText(String text) {
		this.text = text;
	}

	public int getEdits() {
		return edits;
	}
	
	public int getEditsInRecentDays() {
		return editsInRecentDays;
	}
	
	public int getEditsMain() {
		return editsMain;
	}

	public int getEditsMainInRecentDays() {
		return editsMainInRecentDays;
	}

	public void incrementEdits(){
		edits++;
	}

	public void incrementEditsInRecentDays(){
		editsInRecentDays++;
	}

	public void incrementEditsMain(){
		editsMain++;
	}

	public void incrementEditsMainInRecentDays(){
		editsMainInRecentDays++;
	}

	public User(){
	}
	
	public User(int id, String text){
		this.id = id;
		this.text = text;
	}
	
	public boolean isIpAddress(){
		return id == 0;
	}
	
	public String toString() {
		return "id: " + id
			+ ", text: " + text
			+ ", edits: " + edits
			+ ", editsRecentDays: " + editsInRecentDays;
	}

}

UserGroups.java

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.Map;
import java.util.StringTokenizer;
import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;

class UserGroups {
	
	public static final String SYSOP = "sysop";
	public static final String BOT = "bot";
	public static final String FILE_NAME_SUFFIX = "user_groups.sql.gz";
	
	private final Map<Integer, Integer> sysops = new HashMap<Integer, Integer>();
	private final Map<Integer, Integer> bots = new HashMap<Integer, Integer>();
	
	public void initialize(InputStream inputStream) throws IOException {
		BufferedReader reader = null;
		reader = new BufferedReader(new InputStreamReader(new GZIPInputStream(inputStream)));
		final Pattern lineStartPattern = Pattern.compile("^INSERT INTO `user_groups` VALUES \\(");
		while(true){
			String line = reader.readLine();
			if (line == null) {
				break;
			}
			if (!lineStartPattern.matcher(line).find()){
				continue;
			}
			line = lineStartPattern.matcher(line).replaceFirst("");
			line = Pattern.compile("\\);$").matcher(line).replaceFirst("");
			final String[] userGroupStrings = line.split("\\),\\(");
			for (String userGroupString : userGroupStrings) {
				final StringTokenizer userGroupTokenizer = new StringTokenizer(userGroupString, ",");
				final int user = Integer.parseInt(userGroupTokenizer.nextToken());
				final String group = userGroupTokenizer.nextToken();
				if (group.equals("'" + SYSOP + "'")) {
					sysops.put(user, user);
				} else if (group.equals("'" + BOT + "'")) {
					bots.put(user, user);
				}
			}
		}
	}
		
	public String group(int user) {
		if (sysops.containsKey(user)) {
			return SYSOP;
		} else if (bots.containsKey(user)) {
			return BOT;
		} else {
			return "";
		}
	}

}

WikipediansByNumberOfEdits.java

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.EmptyStackException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.Stack;
import java.util.TimeZone;
import java.util.zip.GZIPInputStream;

import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParserFactory;

import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

public abstract class WikipediansByNumberOfEdits {
	
	private static final String YEARMONTH_FORMAT_STRING = "yyyy-MM";
	private static final String DATE_FORMAT_STRING = YEARMONTH_FORMAT_STRING + "-dd";
	public static final DateFormat DATE_FORMAT = new SimpleDateFormat(DATE_FORMAT_STRING);
	private static final String TIME_FORMAT_STRING = "HH:mm:ss";
	
	private final Date dateStarted =  new Date();
	
	private static final String LIMIT_PROPERTY_KEY = "limit";
	private int limit = 0;
	
	protected void execute(String[] args) {
		
		try {
			final int VALID_ARGUMENT_LENGTH = 2;
			if (args.length < VALID_ARGUMENT_LENGTH) {
				printUsage();
				System.exit(1);
			}
			System.err.println("Started. " + dateStarted);
			String limitText = System.getProperty(LIMIT_PROPERTY_KEY, "5000");
			limit = Integer.parseInt(limitText);
			final File dumpFile = new File(args[0]);
			fileNameCheck(dumpFile);
			final File userGroupsFile = new File(args[1]); 
			fileNameCheck(userGroupsFile);
			final PrintWriter writer = new PrintWriter(new OutputStreamWriter(System.out, "UTF-8"));
			final UserGroups userGroups = new UserGroups();
			InputStream userGroupsInputStream = null;
			try {
				userGroups.initialize(new FileInputStream(userGroupsFile));
			} finally {
				if (userGroupsInputStream != null) {
					try {
						userGroupsInputStream.close();
					} catch (IOException e) {
						e.printStackTrace();
					}
				}
			}
			final DumpHandler dumpHandler = new DumpHandler();
			dumpHandler.setIpAddressesAreToBeCounted(getIpAddressesAreToBeCounted());
			InputStream dumpInputStream = null;
			try {
				dumpInputStream = new GZIPInputStream(new FileInputStream(dumpFile));
				SAXParserFactory.newInstance().newSAXParser().parse(dumpInputStream, dumpHandler);
			} finally {
				if (dumpInputStream != null) {
					try {
						dumpInputStream.close();
					} catch (IOException e) {
						e.printStackTrace();
					}
				}
			}
			final WikipediansPrinter[] printers = createPrinters();
			for (WikipediansPrinter printer : printers) {
				printer.setWriter(writer);
				printer.setBeginTimestamp(dumpHandler.getBeginTimestamp());
				printer.setEndTimestamp(dumpHandler.getEndTimestamp());
				printer.setTotalEdits(dumpHandler.getRevisionCounter());
				printer.setTotalEditsInPeriod(dumpHandler.getRevisionInPeriodCounter());
				printer.print(dumpHandler.getUsers(), userGroups, limit);
				if (!printer.equals(printers[printers.length - 1])) {
					writer.println();
				}
			}
		} catch (NumberFormatException e) {
			System.err.println("The specified system property \"" + LIMIT_PROPERTY_KEY + "\" is not a valid integer.");
			System.err.println(e);
			System.exit(1);
		} catch (FileNotFoundException e) {
			System.err.println(e);
			System.exit(1);
		} catch (ParserConfigurationException e) {
			e.printStackTrace();
			System.exit(1);
		} catch (SAXException e) {
			if (e.getCause() instanceof ParseException) {
				System.err.println(e);
			} else {
				e.printStackTrace();
			}
			System.exit(1);
		} catch (IOException e) {
			e.printStackTrace();
			System.exit(1);
		} finally {
			final Date dateEnded = new Date();
			System.err.println("Ended. " + dateEnded);
			final SimpleDateFormat dateFormat = new SimpleDateFormat(TIME_FORMAT_STRING);
			dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
			System.err.println("Elapsed: " + dateFormat.format(new Date(dateEnded.getTime() - dateStarted.getTime())));
		}

	}
	
	private void printUsage() {
		System.err.print("Usage (example): java -Xmx500m -Dbegin.date=2008-04-01 -Dend.date=2008-04-30 -Dlimit=5000");
		System.err.print(" " + getClass().getName());
		System.err.print(" " + getWikiName() + "-20080501-stub-meta-history.xml.gz");
		System.err.print(" " + getWikiName() + "-20080501-" + UserGroups.FILE_NAME_SUFFIX);
		System.err.print(" > result.txt");
		System.err.println();
	}
	
	private void fileNameCheck(File file) {
		if (!file.getName().startsWith(getWikiName())) {
			System.err.println("WARNING: The specified file name '" + file.getName() + "' does not start with '" + getWikiName() + "'.");
			try {
				Thread.sleep(5000);
			} catch(InterruptedException e) {
			}
		}
	}
	
	protected abstract String getWikiName();
	
	protected abstract WikipediansPrinter[] createPrinters();
	
	protected boolean getIpAddressesAreToBeCounted() {
		return true;
	}
	
	private static class DumpHandler extends DefaultHandler {
		
		private final Namespaces namespaces = new Namespaces(); 
		
		private final Stack<String> elementStack = new Stack<String>();
		
		private Date beginTimestamp = null;
		private Date endTimestamp = null;
		
		public Date getBeginTimestamp() {
			return beginTimestamp;
		}

		public Date getEndTimestamp() {
			return endTimestamp;
		}

		private static final DateFormat TIMESTAMP_DUMP_FORMAT
								= new SimpleDateFormat(DATE_FORMAT_STRING + "'T'" + TIME_FORMAT_STRING + "'Z'z");
		
		private static final String BEGIN_DATE_PROPERTY_KEY = "begin.date";
		private static final String END_DATE_PROPERTY_KEY = "end.date";
		
		private boolean ipAddressesAreToBeCounted = true;
		
		public void setIpAddressesAreToBeCounted(boolean ipAddressesAreToBeCounted) {
			this.ipAddressesAreToBeCounted = ipAddressesAreToBeCounted;
		}
		
		private int editsInLastMonth = 0;
		private Calendar beginCalendar = Calendar.getInstance();
		private Set<String> usersEditedInLastMonth = new HashSet<String>();

		public void startDocument() throws SAXException {
			beginTimestamp = getDateProperty(BEGIN_DATE_PROPERTY_KEY);
			final Calendar endTimestampCalendar = Calendar.getInstance();
			endTimestampCalendar.setTime(getDateProperty(END_DATE_PROPERTY_KEY));
			endTimestampCalendar.add(Calendar.HOUR, 23);
			endTimestampCalendar.add(Calendar.MINUTE, 59);
			endTimestampCalendar.add(Calendar.SECOND, 59);
			endTimestamp = endTimestampCalendar.getTime();
			beginCalendar.setTime(beginTimestamp);
		}
		
		public void endDocument() throws SAXException {
			System.err.println("Processed: " + revisionCounter);
			System.err.println("As of the last month"
								+ " (" + new SimpleDateFormat(YEARMONTH_FORMAT_STRING).format(beginTimestamp) + "),"
								+ " the Wikipedia received "
								+ (int)(editsInLastMonth / beginCalendar.getActualMaximum(Calendar.DATE))
								+ " edits a day.");
			System.err.println(usersEditedInLastMonth.size()
								+ " registered people (including bots) edited the Wikipedia in that month.");
//			System.err.println("Timestamp ParseException: " + timestampParseExceptionCount + " occured.");
//			System.err.println("User ID error: " + userIdErrorCount + " occured.");
			System.err.flush();
		}
		
		private static Date getDateProperty(String key) throws SAXException {
			String property = System.getProperty(key);
			try {
				return DATE_FORMAT.parse(property);
			} catch (ParseException e) {
				throw new SAXException(e);
			}
		}
		
		public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException {
			String name = localName.equals("") ? qName : localName;
			elementStack.push(name);
			if (name.equals("namespace")) {
				String key = "";
				try {
					key = atts.getValue("key");
					ns = Integer.parseInt(key);
				} catch (NumberFormatException e) {
					throw new SAXException("ns: " + key, e);
				}
			}
		}
		
		private int revisionCounter = 0;
		
		int getRevisionCounter() {
			return revisionCounter;
		}

		private int revisionInPeriodCounter = 0;
		
		int getRevisionInPeriodCounter() {
			return revisionInPeriodCounter;
		}
		
		private int ns = 0;
		private String namespace = "";
		
		private String pageTitle = "";
		
		private int userId = 0;
		private String userIdString = "";
		private String userText = "";
		private Date timestamp = null;
		private String timestampString = "";
		
		private boolean ignoreRevision = false;
		
		private Map<String, User> map = new HashMap<String, User>();
		
		public User[] getUsers() {
			return map.values().toArray(new User[map.size()]);
		}
		
		private int timestampParseExceptionCount = 0;
		private int userIdErrorCount = 0;
		
		public void endElement(String uri, String localName, String qName) throws SAXException {
			final String name = elementStack.pop();
			if (name.equals("namespace")) {
				namespaces.add(namespace, ns);
				ns = 0;
				namespace = "";
			} else if (name.equals("page")) {
				pageTitle = "";
			} else if (name.equals("timestamp")) {
				ignoreRevision = false;
				try {
					timestamp = TIMESTAMP_DUMP_FORMAT.parse(timestampString + "UTC");
					timestampString = "";
				} catch (ParseException e) {
					timestampParseExceptionCount++;
					ignoreRevision = true;
				}
			} else if (name.equals("revision")) {
				if (!userIdString.equals("")) {
					try {
						userId = Integer.parseInt(userIdString);
					} catch (NumberFormatException e) {
						ignoreRevision = true;
					}
				}
				if (ignoreRevision) {
					return;
				}
				User user = null;
				if (ipAddressesAreToBeCounted || userId != 0) {
					user = map.get(userText);
					if (user == null) {
						user = new User(userId, userText);
						map.put(userText, user);
					}
					if (user.getId() < userId) {
						user.setId(userId);
					}
					if (user.getId() != userId) {
						userIdErrorCount++;
					}
					if (timestampBeroreOrEquals(timestamp)) {
						user.incrementEdits();
						if (timestampIsInPeriod(timestamp)) {
							user.incrementEditsInRecentDays();
						}
						if (namespaces.ns(pageTitle) == Namespaces.MAIN_NAMESPACE) {
							user.incrementEditsMain();
							if (timestampIsInPeriod(timestamp)) {
								user.incrementEditsMainInRecentDays();
							}
						}
					}
				}
				final Calendar calendar = Calendar.getInstance();
				calendar.setTime(timestamp);
				if (calendar.get(Calendar.YEAR) == beginCalendar.get(Calendar.YEAR)
						&& calendar.get(Calendar.MONTH) == beginCalendar.get(Calendar.MONTH)) {
					editsInLastMonth ++;
					if (user != null) {
						usersEditedInLastMonth.add(user.getText());
					}
				}
				if (timestampIsInPeriod(timestamp)) {
					revisionInPeriodCounter ++;
				}
				userId = 0;
				userIdString = "";
				userText = "";
				timestamp = null;
				revisionCounter++;
				final int LOG_INTERVAL = 10000;
				if (revisionCounter % LOG_INTERVAL == 0) {
					System.err.println("Processed: " + revisionCounter);
				}
			}
		}
		
		private boolean timestampIsInPeriod(Date timestamp) {
			return ( timestamp.equals(beginTimestamp) || timestamp.after(beginTimestamp) )
					&& timestampBeroreOrEquals(timestamp);
		}
		
		private boolean timestampBeroreOrEquals(Date timestamp) {
			return ( timestamp.before(endTimestamp) || timestamp.equals(endTimestamp) );
		}
		
		public void characters (char[] ch, int start, int length) {
			try {
				final String elementName = elementStack.peek();
				final String parentElementName = elementStack.elementAt(elementStack.size() - 2);
				final String string = new String(ch, start, length);
				if (elementName.equals("namespace")) {
					namespace += string;
				}
				if (elementName.equals("title")) {
					pageTitle += string;
				}
				if (elementName.equals("timestamp")) {
					timestampString += string;
//					if (revisionCounter % 10000 == 0) {
//						System.err.println(ch.length);
//					}
				} else if (parentElementName.equals("contributor")) {
					if (elementName.equals("id")) {
						userIdString += string;
					} else if (elementName.equals("username")) {
						userText += string;
					} else if (userText.equals("") && elementName.equals("ip")) {
						userId = 0;
						userText += string;
					}
				}
			} catch (EmptyStackException e) {
				// NOP
			} catch (IndexOutOfBoundsException e) {
				// NOP
			}
		}

	}

}

WikipediansByNumberOfEdits_en.java

import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Comparator;

public class WikipediansByNumberOfEdits_en extends WikipediansByNumberOfEdits {

	private static AnonymousUsers ANONYMOUS_USERS = null;
	
	private static UnflaggedBots UNFLAGGED_BOTS = null;
	
	/**
	 * The main() method for this application.
	 * @param args command-line arguments
	 */
	public static void main(String[] args) {
		
		ANONYMOUS_USERS = new AnonymousUsers();
		UNFLAGGED_BOTS = new UnflaggedBots();
		try {
			ANONYMOUS_USERS.initialize();
			UNFLAGGED_BOTS.initialize();
			new WikipediansByNumberOfEdits_en().execute(args);
		} catch (FileNotFoundException e) {
			e.printStackTrace();
			System.exit(1);
		} catch (IOException e) {
			e.printStackTrace();
			System.exit(1);
		}
		
	}
	
	protected String getWikiName() {
		return "enwiki";
	}
	
	protected boolean getIpAddressesAreToBeCounted() {
		return false;
	}
	
	protected WikipediansPrinter[] createPrinters() {
		final Printer printer = new Printer();
		printer.setAnonymousUsers(ANONYMOUS_USERS);
		printer.setUnflaggedBots(UNFLAGGED_BOTS);
		return new WikipediansPrinter[]{printer};
	}
	
	private static class Printer extends WikipediansPrinter {
		
		private AnonymousUsers anonymousUsers = null;
		
		public void setAnonymousUsers(AnonymousUsers anonymousUsers) {
			this.anonymousUsers = anonymousUsers;
		}
		
		private UnflaggedBots unflaggedBots = null;
		
		public void setUnflaggedBots(UnflaggedBots unflaggedBots) {
			this.unflaggedBots = unflaggedBots;
		}
		
		protected int getTargetEdits(User user) {
			return user.getEdits();
		}
		
		protected int getTargetTotalEdits() {
			return getTotalEdits();
		}

		protected String getTableHeader() {
			return "Rank !! User !! Edits !! Edits in the past 30 days";
		}
		
		protected String getSpecialText() {
			return "Special";
		}
		
		protected String getUserText() {
			return "User";
		}
		
		protected String getSortable() {
			return SORTABLE;
		}
		
		protected void processAnonymous(User user) {
			if (anonymousUsers.contains(user.getText())) {
				user.setText("Place holder");
			}
		}
		
		protected String getGroup(User user, String group) {
			if (group.equals("") && unflaggedBots.contains(user.getText())) {
				return UserGroups.BOT;
			} else {
				return group;
			}
		}
		
		protected Comparator<User> createComparator() {
			return new UsersComparator();
		}
		
		private static class UsersComparator implements Comparator<User> {
			public int compare(User user1, User user2) {
				if (user1.getEdits() != user2.getEdits()) {
					return user2.getEdits() - user1.getEdits(); 
				} else {
					return user2.getEditsInRecentDays() - user1.getEditsInRecentDays();
				}
			}
		}

		protected void printEdits(User user) {
			getWriter().print(" || " + user.getEdits());
			getWriter().print(" || " + user.getEditsInRecentDays());
		}

	}
	
}

WikipediansByNumberOfRecentEdits_en.java

import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Comparator;

public class WikipediansByNumberOfRecentEdits_en extends WikipediansByNumberOfEdits {

	private static AnonymousUsers ANONYMOUS_USERS = null;
	
	/**
	 * The main() method for this application.
	 * @param args command-line arguments
	 */
	public static void main(String[] args) {
		
		ANONYMOUS_USERS = new AnonymousUsers();
		try {
			ANONYMOUS_USERS.initialize();
			new WikipediansByNumberOfRecentEdits_en().execute(args);
		} catch (FileNotFoundException e) {
			e.printStackTrace();
			System.exit(1);
		} catch (IOException e) {
			e.printStackTrace();
			System.exit(1);
		}
		
	}
	
	protected String getWikiName() {
		return "enwiki";
	}
	
	protected boolean getIpAddressesAreToBeCounted() {
		return false;
	}
	
	protected WikipediansPrinter[] createPrinters() {
		final Printer printer = new Printer();
		printer.setAnonymousUsers(ANONYMOUS_USERS);
		return new WikipediansPrinter[]{printer};
	}
	
	private static class Printer extends WikipediansPrinter {
		
		private AnonymousUsers anonymousUsers = null;
		
		public void setAnonymousUsers(AnonymousUsers anonymousUsers) {
			this.anonymousUsers = anonymousUsers;
		}
		
		protected int getTargetEdits(User user) {
			return user.getEditsInRecentDays();
		}
		
		protected String getTableHeader() {
			return "Rank !! User !! Total Edits !! Recent Edits";
		}
		
		protected String getSpecialText() {
			return "Special";
		}
		
		protected String getUserText() {
			return "User";
		}
		
		protected String getSortable() {
			return SORTABLE;
		}
		
		protected void printHeader() {
			getWriter().print("Period: "
					+ DATE_FORMAT.format(getBeginTimestamp())
					+ " &mdash; "
					+ DATE_FORMAT.format(getEndTimestamp())
					+ " (UTC)");
			getWriter().println();
			getWriter().println();
		}
		
		protected void processAnonymous(User user) {
			if (anonymousUsers.contains(user.getText())) {
				user.setText("Place holder");
			}
		}
		
		protected Comparator<User> createComparator() {
			return new UsersComparator();
		}
		
		private static class UsersComparator implements Comparator<User> {
			public int compare(User user1, User user2) {
				if (user1.getEditsInRecentDays() != user2.getEditsInRecentDays()) {
					return user2.getEditsInRecentDays() - user1.getEditsInRecentDays();
				} else {
					return user2.getEdits() - user1.getEdits(); 
				}
			}
		}

		protected void printEdits(User user) {
			getWriter().print(" || " + user.getEdits());
			getWriter().print(" || " + user.getEditsInRecentDays());
		}
	}
	
}

WikipediansByNumberOfRecentEdits_ja.java

import java.util.Comparator;

public class WikipediansByNumberOfRecentEdits_ja extends WikipediansByNumberOfEdits {
	
	/**
	 * The main() method for this application.
	 * @param args command-line arguments
	 */
	public static void main(String[] args) {
		new WikipediansByNumberOfRecentEdits_ja().execute(args);
	}
	
	protected String getWikiName() {
		return "jawiki";
	}
	
	protected WikipediansPrinter[] createPrinters() {
		return new WikipediansPrinter[]{new MainNamespacePrinter(), new AllNamespacePrinter()};
	}
	
	private static abstract class Printer extends WikipediansPrinter {
		
		protected String getTableHeader() {
			return "順位 !! 利用者 !! 編集回数 !! 総編集回数";
		}
		
		protected String getSpecialText() {
			return "特別";
		}
		
		protected String getUserText() {
			return "利用者";
		}
				
		protected String getSortable() {
			return SORTABLE;
		}
		
		protected void printHeader() {
			getWriter().print("== " + getSectionTitle() + " ==\n");
			getWriter().print("期間: "
					+ DATE_FORMAT.format(getBeginTimestamp())
					+ " &mdash; "
					+ DATE_FORMAT.format(getEndTimestamp())
					+ " (UTC)");
			getWriter().println();
			getWriter().println();
		}
		
		protected abstract String getSectionTitle();
		
	}
	
	private static class MainNamespacePrinter extends Printer {
		
		protected int getTargetEdits(User user) {
			return user.getEditsMainInRecentDays();
		}
		
		public String getSectionTitle() {
			return "記事名前空間";
		}
		
		protected void printEdits(User user) {
			getWriter().print(" || " + user.getEditsMainInRecentDays());
			getWriter().print(" || " + user.getEditsMain());
		}
		
		protected Comparator<User> createComparator() {
			return new UsersComparator();
		}
		
		private static class UsersComparator implements Comparator<User> {
			public int compare(User user1, User user2) {
				if (user1.getEditsMainInRecentDays() != user2.getEditsMainInRecentDays()) {
					return user2.getEditsMainInRecentDays() - user1.getEditsMainInRecentDays();
				} else {
					return user2.getEditsMain() - user1.getEditsMain(); 
				}
			}
		}

	}
	
	private static class AllNamespacePrinter extends Printer {
		
		protected int getTargetEdits(User user) {
			return user.getEditsInRecentDays();
		}
		
		public String getSectionTitle() {
			return "全名前空間";
		}
		
		protected void printEdits(User user) {
			getWriter().print(" || " + user.getEditsInRecentDays());
			getWriter().print(" || " + user.getEdits());
		}
		
		protected Comparator<User> createComparator() {
			return new UsersComparator();
		}
		
		private static class UsersComparator implements Comparator<User> {
			public int compare(User user1, User user2) {
				if (user1.getEditsInRecentDays() != user2.getEditsInRecentDays()) {
					return user2.getEditsInRecentDays() - user1.getEditsInRecentDays();
				} else {
					return user2.getEdits() - user1.getEdits(); 
				}
			}
		}

	}
	
}

WikipediansByNumberOfRecentEdits_zh.java

import java.util.Comparator;

public class WikipediansByNumberOfRecentEdits_zh extends WikipediansByNumberOfEdits {
	/**
	 * The main() method for this application.
	 * @param args command-line arguments
	 */
	public static void main(String[] args) {
		new WikipediansByNumberOfRecentEdits_zh().execute(args);
	}
	
	protected String getWikiName() {
		return "zhwiki";
	}
	
	protected boolean getIpAddressesAreToBeCounted() {
		return true;
	}
	
	protected WikipediansPrinter[] createPrinters() {
		final Printer printer = new Printer();
		return new WikipediansPrinter[]{printer};
	}
	
	private static class Printer extends WikipediansPrinter {
		
		protected int getTargetEdits(User user) {
			return user.getEditsInRecentDays();
		}
		
		protected String getTableHeader() {
			return "名次 !! 用户 !! 最近编辑次数 !! 累积编辑次数";
		}
		
		protected String getSpecialText() {
			return "Special";
		}
		
		protected String getUserText() {
			return "User";
		}
				
		protected String getSortable() {
			return SORTABLE;
		}
		
		protected void printHeader() {
			getWriter().print("期间: "
					+ DATE_FORMAT.format(getBeginTimestamp())
					+ " &mdash; "
					+ DATE_FORMAT.format(getEndTimestamp())
					+ " (UTC)");
			getWriter().println();
			getWriter().println();
		}
		
		protected Comparator<User> createComparator() {
			return new UsersComparator();
		}
		
		private static class UsersComparator implements Comparator<User> {
			public int compare(User user1, User user2) {
				if (user1.getEditsInRecentDays() != user2.getEditsInRecentDays()) {
					return user2.getEditsInRecentDays() - user1.getEditsInRecentDays();
				} else {
					return user2.getEdits() - user1.getEdits(); 
				}
			}
		}

		protected void printEdits(User user) {
			getWriter().print(" || " + user.getEditsInRecentDays());
			getWriter().print(" || " + user.getEdits());
		}
		
	}
	
}

WikipediansPrinter.java

import java.io.PrintWriter;
import java.text.DecimalFormat;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Date;

abstract class WikipediansPrinter {
	
	private PrintWriter writer = null;
	
	public PrintWriter getWriter() {
		return writer;
	}

	public void setWriter(PrintWriter writer) {
		this.writer = writer;
	}
	
	private Date beginTimestamp = null;
	private Date endTimestamp = null;
	
	public Date getBeginTimestamp() {
		return beginTimestamp;
	}

	public void setBeginTimestamp(Date beginTimestamp) {
		this.beginTimestamp = beginTimestamp;
	}

	public Date getEndTimestamp() {
		return endTimestamp;
	}

	public void setEndTimestamp(Date endTimestamp) {
		this.endTimestamp = endTimestamp;
	}
	
	private int totalEdits = 0;

	public void setTotalEdits(int totalEdits) {
		this.totalEdits = totalEdits;
	}

	public int getTotalEdits() {
		return totalEdits;
	}

	private int totalEditsInPeriod = 0;

	public void setTotalEditsInPeriod(int totalEditsInPeriod) {
		this.totalEditsInPeriod = totalEditsInPeriod;
	}
	
	protected int getTargetTotalEdits() {
		return totalEditsInPeriod;
	}

	public void print(User[] users, UserGroups userGroups, int limit) {
		try {
			printHeader();
			Arrays.sort(users, createComparator());
			writer.print("{| class=\"wikitable" + getSortable() + "\"");
			writer.println();
			writer.print("! " + getTableHeader());
			writer.println();
			int rank = 0;
			int prevCount = 0;
			int sameRank = 0;
			int totalEditsByListedUsers = 0;
			int numberOfListedEditors = 0;
			for (User user : users) {
				final String group = getGroup(user, userGroups.group(user.getId()));
				final String groupText = (group.equals("") ? "" : " (" + group + ")");
				final String rankText;
				if (!group.equals(UserGroups.BOT)) {
					if (rank == 0) {
						rank++;
						sameRank = 1;
					} else if (getTargetEdits(user) < prevCount) {
						rank += sameRank;
						sameRank = 1;
					} else {
						sameRank++;
					}
					rankText = Integer.toString(rank);
					numberOfListedEditors++;
					totalEditsByListedUsers += getTargetEdits(user);
					prevCount = getTargetEdits(user);
				} else {
					rankText = "";
				}
				if (rank > limit) {
					break;
				}
				writer.print("|-");
				writer.println();
				writer.print("| " + rankText);
				writer.print(" || ");
				processAnonymous(user);
				if (user.getId() == 0) {
					writer.print("[[" + getSpecialText() + ":Contributions/" + user.getText() + "|" + user.getText() + "]]");
				} else {
					writer.print("[[" + getUserText() + ":" + user.getText() + "|" + user.getText() + "]]");
				}
				writer.print(groupText);
				printEdits(user);
				writer.println();

			}
			writer.print("|}");
			writer.println();
			System.err.println("This list of " + limit + " editors represents " + totalEditsByListedUsers + " total edits,"
					+ " with an average of " + (int)(totalEditsByListedUsers / numberOfListedEditors) + " per editor.");
			System.err.println("This accounts for "
					+ new DecimalFormat("#0.0").format(((float)totalEditsByListedUsers / (float)getTargetTotalEdits()) * 100) + "%"
					+ " of the " + getTargetTotalEdits() + " total edits made to the Wikipedia.");
		} finally {
			writer.flush();
			System.err.flush();
		}
	}
	
	protected abstract int getTargetEdits(User user);
	
	protected abstract String getTableHeader();
	
	protected abstract String getSpecialText();
	
	protected abstract String getUserText();
	
	protected abstract Comparator<User> createComparator();
	
	protected void printHeader() {
		return;
	}
	
	protected abstract void printEdits(User user);
	
	protected void processAnonymous(User user) {
		return;
	}
	
	protected String getGroup(User user, String group) {
		return group;
	}
	
	protected final String SORTABLE = " sortable";
	
	protected String getSortable() {
		return "";
	}

}

手順

編集
  • データベースダンプが提供されているサイトからダンプデータをダウンロードします (https://download.wikimedia.org/) 。データベースダンプのデータの生成は不定期に行われています。必要となるのは次のファイルです。
  • Javaプログラムを実行して一覧データを生成します。
  • 出力されたテキストファイルはウィキソースの形式になっていますので、テキストファイルをエディタで開いてコピーペーストしてウェブブラウザから一覧ページを更新することができます。
java -Xmx500m -Dbegin.date=2008-04-01 -Dend.date=2008-04-30 -Dlimit=200 WikipediansByNumberOfRecentEdits_ja jawiki-20080501-stub-meta-history.xml.gz jawiki-20080501-user_groups.sql.gz > result.txt

英語版の場合

編集
  • 英語版では、一覧に掲載されたくない方々については、一覧に掲載しない慣習になっているようです。次のようにしてください。
  • anonymous.txt の内容の例:
   1. User:Mikkalai
   2. User:Haemo
   3. User:Jeffrey O. Gustafson
   .
   .
   .

もしくは、

User:Mikkalai 
User:Haemo 
User:Jeffrey O. Gustafson 
   .
   .
   .
  • 英語版では、フラグありのボットに加えて、フラグなしのボットもボットとして扱う慣習になっているようです。次のようにしてください。
  • unflagged-bots.txt の内容の例:
   1. Bluebot
   2. AntiVandalBot
   3. MartinBot
   .
   .
   .

もしくは、

Bluebot 
AntiVandalBot 
MartinBot 
   .
   .
   .
java -Xmx1500m -Dbegin.date=2008-04-01 -Dend.date=2008-04-30 -Dlimit=5000 WikipediansByNumberOfRecentEdits_en enwiki-20080501-stub-meta-history.xml.gz enwiki-20080501-user_groups.sql.gz > result.txt
java -Xmx1500m -Dbegin.date=2008-04-01 -Dend.date=2008-04-30 -Dlimit=4000 WikipediansByNumberOfEdits_en enwiki-20080501-stub-meta-history.xml.gz enwiki-20080501-user_groups.sql.gz > result.txt