News:

Bored?  Looking to kill some time?  Want to chat with other SMF users?  Join us in IRC chat or Discord

Main Menu

Anyone interested in a raw xml sitemap generator? (2.0 only)

Started by Vekseid, June 29, 2009, 05:31:03 PM

Previous topic - Next topic

Vekseid

I designed a script that can build a sitemap index via a cron run. The mod won't index an entire site - this will, and provides a nice starting point to making something more full-featured rather than the hack job it is now. It also does not hit the CPU on each and every load.

Change the settings as appropriate. You'll want to place it in your forum folder, and access it with
scriptname.php?key=key&a=xxx

Where xxx is one of
boards
topics# where # is the modulus result appropriate to your $sitemap_digits setting (0 means you just use topics)
index - actually builds a sitemap index using all of your available sitemaps.

This is a pretty horrible hackjob. I imagine, eventually, some forum is going to break a million threads, but by then SMF 3.0 should be out and this wouldn't work on it anyway.

I'm posting this with the expectation that people who will use it know what they are doing. I don't really have much time to offer free support, I am afraid, but feel free to butcher this into a proper script, just give credit please : )


<?php
  $key 
'changeme'// This script is supposed to be called via cron. To prevent abuse and allow testing, we require that a key be passed.
  
  
if (!isset ($_GET['key']) || $_GET['key'] != $key)
    {
      die (
'Please pass your key as a get parameter.');
    }
    
  function 
db_query ($query)
    {
      
//echo 'Query: '.$query.'<br />';
      
$result mysql_query ($query) or die (mysql_error ().' <br /><b>Query:</b> '.$query);
      return 
$result;
    }

  
define ('PATH_FORUM'dirname (__FILE__), false);
  require (
PATH_FORUM.'/Settings.php');
  
  
$sitemap_path realpath (PATH_FORUM.'/../sitemaps'); // Change this to the appropriate relative path.
  
$site_url 'http://mysite.com/forums'// Base site.
  
$map_url 'http://mysite.com/sitemaps'// Base site.
  
$sitemap_topics 'sitemap.topics#.xml'// Name of sitemap(s) for smf's topics.  # gets replaced by 0-4 digits.
  
$sitemap_digits 0// Number of digits. 0-4. Each sitemap can hold up to 50,000 entries.  How many entries you need depends on your posts per page.
  
$sitemap_boards 'sitemap.boards.xml'// Name of sitemap for smf's boards. 
  
$sitemap_index 'sitemap.index.xml'// This script will autogenerate an index that includes additional defined sitemaps.
  
$sitemaps = array ('sitemap.main.xml'$sitemap_boards); // Any additional sitemaps, in an array. Load times will automatically be calclulated.
  
  
mysql_connect ($db_server$db_user$db_passwd);
  
mysql_select_db ($db_name);
  
  
  if (!isset (
$_GET['a']))
    {
      die (
'You need to set a=boards, a=index, or a=topics# where # is the sitemap # of topics being created.');
    }
    
  if (
$_GET['a'] == 'boards')
    {
      
$query 'select value from '.$db_prefix.'settings where variable="defaultMaxTopics"';
      
$result mysql_fetch_assoc (db_query ($query));
      
$topicsperpage $result['value'];
      
      
$query 'select DISTINCT b.id_board, b.child_level, b.count_posts, b.member_groups, COUNT(t.id_topic) as numtopics from '.$db_prefix.'boards as b inner join '.$db_prefix.'topics as t on t.id_board=b.id_board where b.member_groups LIKE "-1%" GROUP BY t.id_board';
      
$result db_query ($query);
      
      
$fp fopen ($sitemap_path.'/'.$sitemap_boards'w');
      
      
fwrite ($fp'<?xml version="1.0" encoding="UTF-8"?>
'."\n".'<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'."\n");
     
      while ($row = mysql_fetch_assoc ($result))
        {
          $boardvalue = '.9';
         
          if ($row['count_posts'] > 0)
            {
              $boardvalue = '.5';
            }
          elseif ($row['child_level'] > 0)
            {
              $boardvalue = '.7';
            }
           
          $string = '   <url>'."\n".
                    '      <loc>'.$site_url.'/index.php?board='.$row['id_board'].'.0</loc>'."\n".
                    '      <changefreq>hourly</changefreq>'."\n".
                    '      <priority>'.$boardvalue.'</priority>'."\n".
                    '   </url>'."\n";
                   
          fwrite ($fp, $string);
         
          $numboardpages = intval (floor ($row['numtopics'] / $topicsperpage));
         
          for ($i = 0; $i < $numboardpages; $i++)
            {
              $start = $topicsperpage + $i * $topicsperpage;
             
              $string = '   <url>'."\n".
                        '      <loc>'.$site_url.'/index.php?board='.$row['id_board'].'.'.$start.'</loc>'."\n".
                        '      <changefreq>daily</changefreq>'."\n".
                        '      <priority>.1</priority>'."\n".
                        '   </url>'."\n";
                   
              fwrite ($fp, $string);
            }
        }
       
      fwrite ($fp, '</urlset>');
      fclose ($fp);
     
      mysql_free_result($result);
    }
  elseif ($_GET['a'] == 'index')
    {
      $fp = fopen ($sitemap_path.'/'.$sitemap_index, 'w');
     
      fwrite ($fp, '<?xml version="1.0" encoding="UTF-8"?>'."\n".'<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'."\n");
     
      $topicmapper = explode ('#', $sitemap_topics);
     
      if ($sitemap_digits)
        {
          $topicmaps = pow (10, $sitemap_digits);
         
          for ($i = 0; $i < $topicmaps; $i++)
            {
              $sitemaps[] = $topicmapper[0].$i.$topicmapper[1];
            }
        }
      else
        {
          $sitemaps[] = $topicmapper[0].$topicmapper[1];
        }
       
      foreach ($sitemaps as $map)
        {
          if (file_exists ($sitemap_path.'/'.$map))
            {
              $mtime = filemtime ($sitemap_path.'/'.$map);
              $date = date ('Y-m-d', $mtime);
             
              $string = '   <sitemap>'."\n".
                        '      <loc>'.$map_url.'/'.$map.'</loc>'."\n".
                        '      <lastmod>'.$date.'</lastmod>'."\n".
                        '   </sitemap>'."\n";
                       
              fwrite ($fp, $string);
            }
/*          else
            {
              echo $sitemap_path.'/'.$map.' not found.<br />';
            } // */
        }
     
      fwrite ($fp, '</sitemapindex>');
      fclose ($fp);
    }
  else
    {
      $map = explode ('s', $_GET['a']);
     
      if ($map[0] != 'topic')
        {
          die ('a needs to be topics#, where # is the id of the topic index.');
        }
     
      $query = 'select value from '.$db_prefix.'settings where variable="defaultMaxMessages"';
      $result = mysql_fetch_assoc (db_query ($query));
      $postsperpage = $result['value'];
      $modquery = '';
      $num = '';
       
      if ($sitemap_digits > 0)
        {
          if (!isset ($map[1]) && strlen ($map[1]))
            {
              die ('$sitemap_digits is over zero. a needs to be topics#, where # is the id of the topic index.');
            }
         
          $num = abs (intval ($map[1]));
         
          if ($num < pow (10, $sitemap_digits))
            {
              $modulo = pow (10, $sitemap_digits);
              $modquery = ' && id_topic % '.$modulo.' = '.$num;
            }
          else
            {
              die ('# in topics# needs to be an integer number with less than or equal to '.$sitemap_digits.' digits.');
            }
        }
       
      $query = 'select t.id_topic, t.is_sticky, t.num_replies, b.count_posts, m2.modified_time as fmodified_time, m1.poster_time as lposter_time, m1.modified_time as lmodified_time from '.$db_prefix.'topics as t inner join '.$db_prefix.'boards as b on b.id_board = t.id_board inner join '.$db_prefix.'messages as m1 on t.id_last_msg=m1.id_msg inner join '.$db_prefix.'messages as m2 on t.id_first_msg=m2.id_msg where b.member_groups LIKE "-1%" '.$modquery;
     
      $result = db_query ($query);
      $topicmapper = explode ('#', $sitemap_topics);
      $fp = fopen ($sitemap_path.'/'.$topicmapper[0].$num.$topicmapper[1], 'w');
      fwrite ($fp, '<?xml version="1.0" encoding="UTF-8"?>'."\n".'<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'."\n");
     
      while ($row = mysql_fetch_assoc ($result))
        {
          $topicvalue = '.7';
          $nextvalue = '.5';
          $date = date ('Y-m-d', max ($row['fmodified_time'], $row['lposter_time'], $row['lmodified_time']));
         
          if ($row['count_posts'] > 0)
            {
              $topicvalue = '.5';
              $nextvalue = '.1';
            }
          elseif ($row['is_sticky'] || $row['num_replies'] > 24)
            {
              $topicvalue = '.9';
              $nextvalue = '.6';
            }
         
          $string = '   <url>'."\n".
                    '      <loc>'.$site_url.'/index.php?topic='.$row['id_topic'].'.0</loc>'."\n".
                    '      <lastmod>'.$date.'</lastmod>'."\n".
                    '      <changefreq>daily</changefreq>'."\n".
                    '      <priority>'.$topicvalue.'</priority>'."\n".
                    '   </url>'."\n";
                   
          fwrite ($fp, $string);
         
          $numtopicpages = intval (floor (($row['num_replies'] + 1) / $postsperpage));
         
          for ($i = 0; $i < $numtopicpages; $i++)
            {
              $start = $postsperpage + $i * $postsperpage;
             
              $string = '   <url>'."\n".
                        '      <loc>'.$site_url.'/index.php?topic='.$row['id_topic'].'.'.$start.'</loc>'."\n".
                        '      <lastmod>'.$date.'</lastmod>'."\n".
                        '      <changefreq>daily</changefreq>'."\n".
                        '      <priority>'.$nextvalue.'</priority>'."\n".
                        '   </url>'."\n";
                   
              fwrite ($fp, $string);
            }
        }
       
      fwrite ($fp, '</urlset>');
      fclose ($fp);
     
      mysql_free_result($result);
    }


Edit: Added thread lastmod
Adult Role Playing Forums - - Over five million posts - - Elliquiy's LAMP configuration (maybe NSFW)

Blog about Forums and Servers - - Twenty things to make Simple Machines Forum go faster

Private/Instant Message requests for free support will be ignored.

Advertisement: