| 1 |
|
|---|
| 2 |
use lib "/home/fml/public_html/lib"; |
|---|
| 3 |
use FeedMeLinks::Environment; |
|---|
| 4 |
|
|---|
| 5 |
|
|---|
| 6 |
my ($debug, $link, $tag, @tags, @links, @folks, $url, $hideGarbage, $dd, $glc, $mandatory_tag, $path, $filename, $bin, $format, $cmd, $depth, $folksPath, $lastDepth, $lastFolder, $description); |
|---|
| 7 |
|
|---|
| 8 |
$glc = 0; |
|---|
| 9 |
$debug = 0; |
|---|
| 10 |
|
|---|
| 11 |
$link = $tag = undef; |
|---|
| 12 |
$hideGarbage = 1; |
|---|
| 13 |
$glc = 0; |
|---|
| 14 |
push @folks, "ROOT"; |
|---|
| 15 |
$mandatory_tag = "imported links"; |
|---|
| 16 |
push @tags, $mandatory_tag; |
|---|
| 17 |
|
|---|
| 18 |
$dd = "___DELIMETER___"; |
|---|
| 19 |
|
|---|
| 20 |
|
|---|
| 21 |
$path = $filename = $ARGV[0]; |
|---|
| 22 |
my $parser = $ARGV[1]; |
|---|
| 23 |
my $link_xslt = $ARGV[2]; |
|---|
| 24 |
my $tag_xslt = $ARGV[3]; |
|---|
| 25 |
$path =~ s:\/([^/]+)$::; |
|---|
| 26 |
$bin ="/home/fml/public_html/bin"; |
|---|
| 27 |
|
|---|
| 28 |
my $success_code = ""; |
|---|
| 29 |
|
|---|
| 30 |
my $converted = "/tmp/import-" . int(rand(1000)); |
|---|
| 31 |
|
|---|
| 32 |
$format = detect_bookmarks_format( $bin, $filename ); |
|---|
| 33 |
|
|---|
| 34 |
print "format: |$format|\n" if( $debug ); |
|---|
| 35 |
|
|---|
| 36 |
if( $format eq "delicious-hybrid" ) { |
|---|
| 37 |
print "converting from hybrid format:\n" if( $debug ); |
|---|
| 38 |
$cmd = "$bin/hybrid-to-xml.pl $filename > $converted"; |
|---|
| 39 |
print "cmd: $cmd\n"; |
|---|
| 40 |
`$cmd`; |
|---|
| 41 |
$filename = $converted; |
|---|
| 42 |
|
|---|
| 43 |
my $out = `cat $filename`; |
|---|
| 44 |
print "$filename contains: \n</code><form><textarea rows='40' cols='150'>$out</textarea></form><code>\n\n" if( $debug ); |
|---|
| 45 |
$success_code = "converted from hybrid to stock xml"; |
|---|
| 46 |
|
|---|
| 47 |
|
|---|
| 48 |
$format = "delicious-API"; |
|---|
| 49 |
} |
|---|
| 50 |
|
|---|
| 51 |
while( <> ) { |
|---|
| 52 |
my $line = $_; |
|---|
| 53 |
|
|---|
| 54 |
if( $format eq "delicious-API" ) { |
|---|
| 55 |
|
|---|
| 56 |
$cmd = "xsltproc $link_xslt $filename > $path/LINKS.RAW\n"; |
|---|
| 57 |
print "cmd:<br>$cmd\n" if( $debug ); |
|---|
| 58 |
my $out = `$cmd`; |
|---|
| 59 |
print "cmd output was: $out\n"; |
|---|
| 60 |
|
|---|
| 61 |
$cmd = "xsltproc $tag_xslt $filename | sort | uniq | grep -v '^\$' > $path/TAGS.RAW\n"; |
|---|
| 62 |
print "cmd: $cmd\n"; |
|---|
| 63 |
$out = `$cmd`; |
|---|
| 64 |
print "cmd output: $out\n"; |
|---|
| 65 |
|
|---|
| 66 |
|
|---|
| 67 |
print "leaving the xml loop thing\n"; |
|---|
| 68 |
print qq(<result delimeter="$dd" note="$success_code">SUCCESS</result>\n); |
|---|
| 69 |
exit 0; |
|---|
| 70 |
|
|---|
| 71 |
} |
|---|
| 72 |
|
|---|
| 73 |
|
|---|
| 74 |
|
|---|
| 75 |
if( $line =~ m/^(\s+)<D/ ) { |
|---|
| 76 |
$depth = length( $1 ) / 4; |
|---|
| 77 |
|
|---|
| 78 |
|
|---|
| 79 |
$folksPath = get_folks_path(); |
|---|
| 80 |
|
|---|
| 81 |
if( $line =~ m/<H3.+>(.*)<\/H3>/ ) { |
|---|
| 82 |
|
|---|
| 83 |
|
|---|
| 84 |
add_pending_link_or_tag(); |
|---|
| 85 |
$tag = $1; |
|---|
| 86 |
if( $depth < $lastDepth ) { |
|---|
| 87 |
d( "close " . ($lastDepth - $depth) . " folders" ); |
|---|
| 88 |
d( "found folder '$tag' (child of '" . $folks[ (scalar @folks) - 1 - ($lastDepth - $depth)] . "')" ); |
|---|
| 89 |
pop @folks; |
|---|
| 90 |
} else { |
|---|
| 91 |
d( "found folder '$tag' (child of '$folksPath')" ); |
|---|
| 92 |
} |
|---|
| 93 |
push @folks, ($tag ? $tag : "__ROOT__"); |
|---|
| 94 |
|
|---|
| 95 |
} elsif( $line =~ m/<A HREF="([^"]+)"[^>]*>([^<]+)<\/A>/ ) { |
|---|
| 96 |
|
|---|
| 97 |
|
|---|
| 98 |
add_pending_link_or_tag(); |
|---|
| 99 |
$url = $1; |
|---|
| 100 |
|
|---|
| 101 |
|
|---|
| 102 |
$link = $2; |
|---|
| 103 |
|
|---|
| 104 |
if( $depth < $lastDepth ) { |
|---|
| 105 |
d( "close " . ($lastDepth - $depth) . " folders" ); |
|---|
| 106 |
pop @folks; |
|---|
| 107 |
} |
|---|
| 108 |
d( "found link: '$link' ($url)" ); |
|---|
| 109 |
} |
|---|
| 110 |
|
|---|
| 111 |
} elsif( $line =~ m/^<DD>(.*)$/ ) { |
|---|
| 112 |
|
|---|
| 113 |
|
|---|
| 114 |
unless( m/no-updated/ ) { |
|---|
| 115 |
$description = $1; |
|---|
| 116 |
d( " found description for " . ($link ? "link '$link'" : " tag '$tag'" ) . ": $description" ); |
|---|
| 117 |
} |
|---|
| 118 |
|
|---|
| 119 |
} elsif( $line =~ m/<\/DL><p>/ ) { |
|---|
| 120 |
|
|---|
| 121 |
} else { |
|---|
| 122 |
if( $description ) { |
|---|
| 123 |
chomp( $line ); |
|---|
| 124 |
$description = $description . "\n" . $line; |
|---|
| 125 |
d( " found description for " . ($link ? "link '$link'" : " tag '$tag'" ) . ": $description" ); |
|---|
| 126 |
|
|---|
| 127 |
} else { |
|---|
| 128 |
if( ! $line =~ m/^\w*$/ ) { |
|---|
| 129 |
p( "XXXXXXXXXXXX GARBAGE FOUND: $line" ) unless $hideGarbage; |
|---|
| 130 |
} |
|---|
| 131 |
} |
|---|
| 132 |
} |
|---|
| 133 |
|
|---|
| 134 |
$lastDepth = $depth; |
|---|
| 135 |
$lastFolder = $tag; |
|---|
| 136 |
} |
|---|
| 137 |
|
|---|
| 138 |
$folksPath = get_folks_path(); |
|---|
| 139 |
add_pending_link_or_tag(); |
|---|
| 140 |
write_raw_files(); |
|---|
| 141 |
unlink( $converted ); |
|---|
| 142 |
|
|---|
| 143 |
exit( 0 ); |
|---|
| 144 |
|
|---|
| 145 |
|
|---|
| 146 |
|
|---|
| 147 |
sub detect_bookmarks_format { |
|---|
| 148 |
my( $bin, $filename) = @_; |
|---|
| 149 |
my $cmd = "$bin/detect-bookmarks-file-format.sh $filename"; |
|---|
| 150 |
my $out = `$cmd`; |
|---|
| 151 |
chomp( $out ); |
|---|
| 152 |
return $out; |
|---|
| 153 |
} |
|---|
| 154 |
|
|---|
| 155 |
sub write_raw_files() { |
|---|
| 156 |
@tags = sort @tags; |
|---|
| 157 |
open LINKS, ">$path/LINKS.RAW" || die( "can't open $path/LINKS.RAW" ); |
|---|
| 158 |
foreach( @links ) { print LINKS "$_\n"; } |
|---|
| 159 |
close( LINKS ); |
|---|
| 160 |
open TAGS, ">$path/TAGS.RAW" || die( "can't open $path/TAGS.RAW" ); |
|---|
| 161 |
foreach( @tags ) { print TAGS "$_\n"; } |
|---|
| 162 |
close( TAGS ); |
|---|
| 163 |
print qq(<result delimeter="$dd" note="$success_code">SUCCESS</result>\n); |
|---|
| 164 |
} |
|---|
| 165 |
|
|---|
| 166 |
sub get_folks_path() { |
|---|
| 167 |
my $folksPath = ""; |
|---|
| 168 |
foreach( @folks ) { |
|---|
| 169 |
my $f = $_; |
|---|
| 170 |
$folksPath = "$folksPath / $f"; |
|---|
| 171 |
} |
|---|
| 172 |
$folksPath =~ s/^ \/ //; |
|---|
| 173 |
return $folksPath; |
|---|
| 174 |
} |
|---|
| 175 |
|
|---|
| 176 |
sub strip_first { |
|---|
| 177 |
my $fp = $_[0]; |
|---|
| 178 |
$fp =~ s/ \/ /$dd/g; |
|---|
| 179 |
$fp =~ s/^ROOT/$mandatory_tag/; |
|---|
| 180 |
return $fp; |
|---|
| 181 |
} |
|---|
| 182 |
|
|---|
| 183 |
sub add_pending_link_or_tag() { |
|---|
| 184 |
my $s; |
|---|
| 185 |
if( $link ) { |
|---|
| 186 |
my $tag_list = strip_first( $folksPath); |
|---|
| 187 |
$s = $url . $dd . $link . $dd . $tag_list; |
|---|
| 188 |
push @links, $s; |
|---|
| 189 |
$link = $description = undef; |
|---|
| 190 |
} elsif( $tag ) { |
|---|
| 191 |
$s = $tag; |
|---|
| 192 |
push @tags, $s; |
|---|
| 193 |
$tag = undef; |
|---|
| 194 |
} |
|---|
| 195 |
} |
|---|
| 196 |
|
|---|
| 197 |
sub p() { |
|---|
| 198 |
++$glc; |
|---|
| 199 |
print $glc; |
|---|
| 200 |
print "> " . $_[0] . "\n"; |
|---|
| 201 |
} |
|---|
| 202 |
|
|---|
| 203 |
sub d() { |
|---|
| 204 |
|
|---|
| 205 |
} |
|---|
| 206 |
|
|---|