From 64cebf712e669b1e84428bbdca2a5653751c93ed Mon Sep 17 00:00:00 2001
From: Andreas Gohr <andi@splitbrain.org>
Date: Fri, 28 Jul 2017 10:03:33 +0200
Subject: [PATCH] made bin/wantedpage.php more flexible

* results can be sorted by wanted or origin
* second dimension can be skipped
* results should be easier to parse with standard unix tools now
* use proper otpions, not commands
---
 bin/wantedpages.php | 99 ++++++++++++++++++++++++++-------------------
 1 file changed, 57 insertions(+), 42 deletions(-)

diff --git a/bin/wantedpages.php b/bin/wantedpages.php
index a7f0ab70d..6887ac18e 100755
--- a/bin/wantedpages.php
+++ b/bin/wantedpages.php
@@ -1,8 +1,8 @@
 #!/usr/bin/php
 <?php
-if(!defined('DOKU_INC')) define('DOKU_INC', realpath(dirname(__FILE__).'/../').'/');
+if(!defined('DOKU_INC')) define('DOKU_INC', realpath(dirname(__FILE__) . '/../') . '/');
 define('NOSESSION', 1);
-require_once(DOKU_INC.'inc/init.php');
+require_once(DOKU_INC . 'inc/init.php');
 
 /**
  * Find wanted pages
@@ -10,9 +10,14 @@ require_once(DOKU_INC.'inc/init.php');
 class WantedPagesCLI extends DokuCLI {
 
     const DIR_CONTINUE = 1;
-    const DIR_NS       = 2;
-    const DIR_PAGE     = 3;
-    private $show_pages = false;
+    const DIR_NS = 2;
+    const DIR_PAGE = 3;
+
+    private $skip = false;
+    private $sort = 'wanted';
+
+    private $result = array();
+
     /**
      * Register options and arguments on the given $options object
      *
@@ -21,18 +26,27 @@ class WantedPagesCLI extends DokuCLI {
      */
     protected function setup(DokuCLI_Options $options) {
         $options->setHelp(
-            'Outputs a list of wanted pages (pages which have internal links but do not yet exist).'
+            'Outputs a list of wanted pages (pages that do not exist yet) and their origin pages ' .
+            ' (the pages that are linkin to these missing pages).'
         );
         $options->registerArgument(
             'namespace',
             'The namespace to lookup. Defaults to root namespace',
             false
         );
-            $options->registerCommand(
-            'show-pages',
-            'Show wiki pages on which broken links (i.e. wanted pages) are found, listed as: wiki_page=>broken_link' 
+
+        $options->registerOption(
+            'sort',
+            'Sort by wanted or origin page',
+            's',
+            '(wanted|origin)'
         );
 
+        $options->registerOption(
+            'skip',
+            'Do not show the second dimension',
+            'k'
+        );
     }
 
     /**
@@ -46,28 +60,30 @@ class WantedPagesCLI extends DokuCLI {
     protected function main(DokuCLI_Options $options) {
 
         if($options->args) {
-            $startdir = dirname(wikiFN($options->args[0].':xxx'));
+            $startdir = dirname(wikiFN($options->args[0] . ':xxx'));
         } else {
             $startdir = dirname(wikiFN('xxx'));
         }
-        
-        $cmd = $options->getCmd();
-        if($cmd == 'show-pages') {
-            $this->show_pages = true;
-        }
 
-        $this->info("searching $startdir");
+        $this->skip = $options->getOpt('skip');
+        $this->sort = $options->getOpt('sort');
 
-        $wanted_pages = array();
+        $this->info("searching $startdir");
 
         foreach($this->get_pages($startdir) as $page) {
-            $wanted_pages = array_merge($wanted_pages, $this->internal_links($page));
+            $this->internal_links($page);
         }
-        $wanted_pages = array_unique($wanted_pages);
-        sort($wanted_pages);
-
-        foreach($wanted_pages as $page) {
-            print $page."\n";
+        ksort($this->result);
+        foreach($this->result as $main => $subs) {
+            if($this->skip) {
+                print "$main\n";
+            } else {
+                $subs = array_unique($subs);
+                sort($subs);
+                foreach($subs as $sub) {
+                    printf("%-40s %s\n", $main, $sub);
+                }
+            }
         }
     }
 
@@ -82,7 +98,7 @@ class WantedPagesCLI extends DokuCLI {
         if($entry == '.' || $entry == '..') {
             return WantedPagesCLI::DIR_CONTINUE;
         }
-        if(is_dir($basepath.'/'.$entry)) {
+        if(is_dir($basepath . '/' . $entry)) {
             if(strpos($entry, '_') === 0) {
                 return WantedPagesCLI::DIR_CONTINUE;
             }
@@ -105,7 +121,7 @@ class WantedPagesCLI extends DokuCLI {
         static $trunclen = null;
         if(!$trunclen) {
             global $conf;
-            $trunclen = strlen($conf['datadir'].':');
+            $trunclen = strlen($conf['datadir'] . ':');
         }
 
         if(!is_dir($dir)) {
@@ -113,17 +129,17 @@ class WantedPagesCLI extends DokuCLI {
         }
 
         $pages = array();
-        $dh    = opendir($dir);
+        $dh = opendir($dir);
         while(false !== ($entry = readdir($dh))) {
             $status = $this->dir_filter($entry, $dir);
             if($status == WantedPagesCLI::DIR_CONTINUE) {
                 continue;
             } else if($status == WantedPagesCLI::DIR_NS) {
-                $pages = array_merge($pages, $this->get_pages($dir.'/'.$entry));
+                $pages = array_merge($pages, $this->get_pages($dir . '/' . $entry));
             } else {
-                $page    = array(
-                    'id'   => pathID(substr($dir.'/'.$entry, $trunclen)),
-                    'file' => $dir.'/'.$entry,
+                $page = array(
+                    'id' => pathID(substr($dir . '/' . $entry, $trunclen)),
+                    'file' => $dir . '/' . $entry,
                 );
                 $pages[] = $page;
             }
@@ -133,35 +149,34 @@ class WantedPagesCLI extends DokuCLI {
     }
 
     /**
-     * Parse instructions and returns the non-existing links
+     * Parse instructions and add the non-existing links to the result array
      *
      * @param array $page array with page id and file path
-     * @return array
      */
     function internal_links($page) {
         global $conf;
         $instructions = p_get_instructions(file_get_contents($page['file']));
-        $links        = array();
-        $cns          = getNS($page['id']);
-        $exists       = false;
+        $cns = getNS($page['id']);
+        $exists = false;
         $pid = $page['id'];
         foreach($instructions as $ins) {
             if($ins[0] == 'internallink' || ($conf['camelcase'] && $ins[0] == 'camelcaselink')) {
                 $mid = $ins[1][0];
                 resolve_pageid($cns, $mid, $exists);
                 if(!$exists) {
-                    list($mid) = explode('#', $mid); //record pages without hashs
-                    if($this->show_pages) {
-                    $links[] = "$pid => $mid";
-                    }                    
-                    else $links[] = $mid;
+                    list($mid) = explode('#', $mid); //record pages without hashes
+
+                    if($this->sort == 'origin') {
+                        $this->result[$pid][] = $mid;
+                    } else {
+                        $this->result[$mid][] = $pid;
+                    }
                 }
             }
         }
-        return $links;
     }
 }
 
 // Main
 $cli = new WantedPagesCLI();
-$cli->run();
\ No newline at end of file
+$cli->run();
-- 
GitLab