summaryrefslogtreecommitdiff
blob: 0ce706db99a6bb9d423880ae656c9710d71ab99a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
Searching for pages using the MediaWiki API returns at most 500 results
(hi Patrick). To get a list of all pages in a larger wiki, we need to run
repeated searches...

Source: https://github.com/moy/Git-Mediawiki/issues/32
Author: anarcat https://github.com/anarcat

diff --git a/contrib/mw-to-git/git-remote-mediawiki.perl b/contrib/mw-to-git/git-remote-mediawiki.perl
index 8dd74a9..f2ce311 100755
--- a/contrib/mw-to-git/git-remote-mediawiki.perl
+++ b/contrib/mw-to-git/git-remote-mediawiki.perl
@@ -259,16 +259,29 @@ sub get_mw_tracked_categories {
 sub get_mw_all_pages {
 	my $pages = shift;
 	# No user-provided list, get the list of pages from the API.
-	my $mw_pages = $mediawiki->list({
+	my $query = {
 		action => 'query',
 		list => 'allpages',
 		aplimit => 'max'
-	});
-	if (!defined($mw_pages)) {
+	};
+	my $curpage;
+	my $oldpage = '';
+	while (1) {
+	if (defined($curpage)) {
+		if ($oldpage eq $curpage) {
+		    last;
+		}
+		$query->{apfrom} = $curpage;
+		$oldpage = $curpage;
+	    }
+	    my $mw_pages = $mediawiki->list($query);
+	    if (!defined($mw_pages)) {
 		fatal_mw_error("get the list of wiki pages");
-	}
-	foreach my $page (@{$mw_pages}) {
+	    }
+	    foreach my $page (@{$mw_pages}) {
 		$pages->{$page->{title}} = $page;
+		$curpage = $page->{title};
+	    }
 	}
 	return;
 }