root/feedmelinks/bin/import-bookmarks.pl

Revision 1424, 5.5 kB (checked in by jm3, 2 years ago)

import fixes

  • Property svn:eol-style set to native
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
Line 
1 #!/usr/local/bin/perl
2 use lib "/home/fml/public_html/lib";
3 use FeedMeLinks::Environment;
4 #use strict;
5
6 my ($debug, $link, $tag, @tags, @links, @folks, $url, $hideGarbage, $dd, $glc, $mandatory_tag, $path, $filename, $bin, $format, $cmd, $depth, $folksPath, $lastDepth, $lastFolder, $description);
7
8 $glc = 0;
9 $debug = 0;
10
11 $link = $tag = undef;
12 $hideGarbage = 1;
13 $glc = 0;
14 push @folks, "ROOT";
15 $mandatory_tag = "imported links";
16 push @tags, $mandatory_tag;
17
18 $dd = "___DELIMETER___";
19
20 # extract the user name from the file path (this practice oculd be considered a bit shady...)
21 $path = $filename = $ARGV[0];
22 my $parser     = $ARGV[1];
23 my $link_xslt  = $ARGV[2];
24 my $tag_xslt   = $ARGV[3];
25 $path =~ s:\/([^/]+)$::; #strip filename
26 $bin ="/home/fml/public_html/bin";
27
28 my $success_code = "";
29
30 my $converted = "/tmp/import-" . int(rand(1000));
31 # one of: delicious-hybrid, netscape, delicious-API
32 $format = detect_bookmarks_format( $bin, $filename );
33
34 print "format: |$format|\n" if( $debug );
35
36 if( $format eq "delicious-hybrid" ) {
37         print "converting from hybrid format:\n" if( $debug );
38         $cmd = "$bin/hybrid-to-xml.pl $filename > $converted";
39         print "cmd: $cmd\n";
40         `$cmd`;
41         $filename = $converted;
42         #print "new converted xml output is in $filename\n";
43         my $out = `cat $filename`;
44         print "$filename contains: \n</code><form><textarea rows='40' cols='150'>$out</textarea></form><code>\n\n" if( $debug );
45         $success_code = "converted from hybrid to stock xml";
46
47         #magic format swap-eroo now that we've converted
48         $format = "delicious-API";
49 }
50
51 while( <> ) {
52         my $line = $_;
53
54         if( $format eq "delicious-API" ) {
55
56                 $cmd = "xsltproc $link_xslt $filename > $path/LINKS.RAW\n";
57                 print "cmd:<br>$cmd\n" if( $debug );
58                 my $out = `$cmd`;
59                 print "cmd output was: $out\n";
60                
61                 $cmd = "xsltproc $tag_xslt $filename | sort | uniq | grep -v '^\$' > $path/TAGS.RAW\n";
62                 print "cmd: $cmd\n";
63                 $out = `$cmd`;
64                 print "cmd output: $out\n";
65
66                 # FIXME:
67                 print "leaving the xml loop thing\n";
68                 print qq(<result delimeter="$dd" note="$success_code">SUCCESS</result>\n);
69                 exit 0;
70
71         }
72
73         # match lines starting with whitespace that are not just whitespace or link descriptions
74         # these lines mean we need to do something with, like links or tags
75         if( $line =~ m/^(\s+)<D/ ) {
76                 $depth = length( $1 ) / 4;
77                 # this needs to change also, for IE
78
79                 $folksPath = get_folks_path();
80
81                 if( $line =~ m/<H3.+>(.*)<\/H3>/ ) { # match folder lines and grab the folder name
82                 #moz: #if( $line =~ m/<H3 ADD_DATE.*>(.*)<\/H3>/ ) { # match folder lines and grab the folder name
83
84                         add_pending_link_or_tag();
85                         $tag = $1;
86                         if( $depth < $lastDepth ) {
87                                 d( "close " . ($lastDepth - $depth) . " folders" );
88                                 d( "found folder '$tag' (child of '" . $folks[ (scalar @folks) - 1 - ($lastDepth - $depth)] .  "')" );
89                                 pop @folks;
90                         } else {
91                                 d( "found folder '$tag' (child of '$folksPath')" );
92                         }
93                         push @folks, ($tag ? $tag : "__ROOT__");
94
95                 } elsif( $line =~ m/<A HREF="([^"]+)"[^>]*>([^<]+)<\/A>/ ) { # match link lines and grab the link url and name and date
96                 #moz: } elsif( $line =~ m/<A HREF="([^"]+)" ADD_DATE="([^"]+)" .+>([^<]+)<\/A>/ ) { # match link lines and grab the link url and name and date
97                        
98                         add_pending_link_or_tag();
99                         $url = $1;
100                         #moz: $date = $2;
101                         #moz: $link = $3;
102                         $link = $2;
103                        
104                         if( $depth < $lastDepth ) {
105                                 d( "close " . ($lastDepth - $depth) . " folders" );
106                                 pop @folks;
107                         }
108                         d( "found link: '$link' ($url)" );
109                 }
110
111         } elsif( $line =~ m/^<DD>(.*)$/ ) { # start of a description lins and grab the description
112
113                 # ignore firefox's (or is it sage's) internal housekeeping no-updated crap
114                 unless( m/no-updated/ ) {
115                         $description = $1;
116                         d( "  found description for " . ($link ? "link '$link'" : " tag '$tag'" ) . ": $description" );
117                 }
118
119         } elsif( $line =~ m/<\/DL><p>/ ) { # no-op; stupid netscape delimiter crap
120
121         } else { # additional description
122                 if( $description ) {
123                         chomp( $line );
124                         $description = $description . "\n" . $line;
125                         d( "  found description for " . ($link ? "link '$link'" : " tag '$tag'" ) . ": $description" );
126
127                 } else {
128                         if( ! $line =~ m/^\w*$/ ) { # don't really need to bitch about whitespace lines, just ignore the lies we cannot parse...
129                                 p( "XXXXXXXXXXXX GARBAGE FOUND: $line" ) unless $hideGarbage;
130                         }
131                 }
132         }
133
134         $lastDepth = $depth;
135         $lastFolder = $tag;
136 }
137
138 $folksPath = get_folks_path();
139 add_pending_link_or_tag();
140 write_raw_files();
141 unlink( $converted );
142
143 exit( 0 );
144
145 ## END ##
146
147 sub detect_bookmarks_format {
148         my( $bin, $filename) = @_;
149         my $cmd = "$bin/detect-bookmarks-file-format.sh $filename";
150         my $out = `$cmd`;
151         chomp( $out );
152         return $out;
153 }
154
155 sub write_raw_files() {
156         @tags = sort @tags;
157         open LINKS, ">$path/LINKS.RAW" || die( "can't open $path/LINKS.RAW" );
158         foreach( @links ) { print LINKS "$_\n"; }
159         close( LINKS );
160         open TAGS, ">$path/TAGS.RAW" || die( "can't open $path/TAGS.RAW" );
161         foreach( @tags ) { print TAGS "$_\n"; }
162         close( TAGS );
163         print qq(<result delimeter="$dd" note="$success_code">SUCCESS</result>\n);
164 }
165
166 sub get_folks_path() {
167         my $folksPath = "";
168         foreach( @folks ) {
169                 my $f = $_;
170                 $folksPath = "$folksPath / $f";
171         }
172         $folksPath =~ s/^ \/ //;
173         return $folksPath;
174 }
175
176 sub strip_first {
177         my $fp = $_[0];
178         $fp =~ s/ \/ /$dd/g;
179         $fp =~ s/^ROOT/$mandatory_tag/;
180         return $fp;
181 }
182
183 sub add_pending_link_or_tag() {
184         my $s;
185         if( $link ) {
186                 my $tag_list = strip_first( $folksPath);
187                 $s = $url . $dd . $link . $dd . $tag_list;
188                 push @links, $s;
189                 $link = $description = undef;
190         } elsif( $tag ) {
191                 $s = $tag;
192                 push @tags, $s;
193                 $tag = undef;
194         }
195 }
196
197 sub p() {
198         ++$glc;
199         print $glc;
200         print "> " . $_[0] . "\n";
201 }
202
203 sub d() {
204         #print "DEBUG> " . $_[0] . "\n";
205 }
206
Note: See TracBrowser for help on using the browser.