#!/usr/bin/perl -wT # # tag-syndicate # # Copyright (c) 2006-2007 Max Kanat-Alexander. All rights reserved. # # This program is free software; you can redistribute it and/or # modify it under the same terms as Perl itself. # use strict; use XML::Simple qw(:strict); local $XML::Simple::PREFERRED_PARSER = 'XML::Parser'; use CGI; use LWP::UserAgent; use CGI::Util qw(escape); use CGI::Carp qw(fatalsToBrowser); sub user_url { my $username = shift; if ($username =~ /_/) { return "http://users.livejournal.com/" . escape($username) . "/"; } return "http://" . escape($username) . ".livejournal.com/"; } my $cgi = new CGI; $cgi->charset('UTF-8'); # Set up script parameters my $user = $cgi->param('user'); $user || die "You must specify a user."; my @tags = $cgi->param('tag'); my @not_tags = $cgi->param('notag'); my $base_url = user_url($user); my $rss_url = $base_url . 'data/rss'; if (!@not_tags) { my $redirect_query = @tags ? "?tag=" . join(',', @tags) : ''; $cgi->redirect(-status => 301, uri => "$rss_url$redirect_query"); exit; } my $tag_url = $base_url; if (@tags == 1) { $tag_url .= "tag/" . escape($tags[0]); } my $user_file = $user; $user_file =~ s![\s`\\/:\*\?\"\.<>\|]!_!g; $user_file =~ /(\w+)/; $user_file = $1; my $rss_file = "rss/" . $user_file; # Get the RSS my $ua = new LWP::UserAgent(agent => 'Tag Syndicator 1.0: hostmaster@kanat.us'); my $size; my $tries = 0; while (!$size && $tries < 5) { $ua->mirror($rss_url, $rss_file); # Sometimes we get a zero-length file, so we try again. my @stat = stat $rss_file; die "Invalid User: $user" unless @stat; $size = $stat[7]; sleep(1) if !$size; $tries++; } # Parse the RSS my $parser = new XML::Simple( ForceArray => 1, AttrIndent => 1, KeepRoot => 1, KeyAttr => [], XMLDecl => q{}, ); my $xml_data = $parser->XMLin($rss_file); my $items = $xml_data->{rss}->[0]->{channel}->[0]->{item}; my @new_items; foreach my $item (@$items) { my $categories = $item->{category}; # If there are no categories, and we either didn't specify # a tag or specified an empty tag if (!$categories && (!@tags || grep($_ eq '', @tags))) { # Unless we're excluding untagged entries. push(@new_items, $item) unless grep($_ eq '', @not_tags); } # If there *are* categories if ($categories) { my ($matches_tags, $matches_not_tags); # If @tags is defined, then we are limiting by tags if (@tags) { # Check if any category matches anything in @tags. foreach my $category (@$categories) { $matches_tags = 1 if grep($_ eq $category, @tags); } } # Otherwise, everything matches. else { $matches_tags = 1; } # Check if any category matches anything in @not_tags; foreach my $category (@$categories) { $matches_not_tags = 1 if grep($_ eq $category, @not_tags); } push(@new_items, $item) if ($matches_tags && !$matches_not_tags); } } $xml_data->{rss}->[0]->{channel}->[0]->{item} = \@new_items; # Also fix the title to be more tech-like, and link to only the tag if ($user eq 'avatraxiom') { $xml_data->{rss}->[0]->{channel}->[0]->{title}->[0] = q{Max's Blog}; } $xml_data->{rss}->[0]->{channel}->[0]->{link}->[0] = $tag_url; my $last_modified = $new_items[0]->{pubDate}->[0] || 0; # print the XML binmode STDOUT, ":utf8"; print $cgi->header(-type => 'text/xml', -last_modified => $last_modified); print $parser->XMLout($xml_data); 1;