root/feedmelinks/bin/cvs2cl.pl

Revision 265, 85.5 kB (checked in by hirokai, 5 years ago)

etc.

  • Property svn:eol-style set to native
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
Line 
1 #!/bin/sh
2 exec perl -w -x $0 ${1+"$@"} # -*- mode: perl; perl-indent-level: 2; -*-
3 #!perl -w
4
5
6 ##############################################################
7 ###                                                        ###
8 ### cvs2cl.pl: produce ChangeLog(s) from `cvs log` output. ###
9 ###                                                        ###
10 ##############################################################
11
12 ## $Revision$
13 ## $Date$
14 ## $Author$
15 ##
16
17 use strict;
18
19 use File::Basename qw( fileparse );
20 use Getopt::Long   qw( GetOptions );
21 use Text::Wrap     qw( );
22 use Time::Local    qw( timegm );
23 use User::pwent    qw( getpwnam );
24
25 # The Plan:
26 #
27 # Read in the logs for multiple files, spit out a nice ChangeLog that
28 # mirrors the information entered during `cvs commit'.
29 #
30 # The problem presents some challenges. In an ideal world, we could
31 # detect files with the same author, log message, and checkin time --
32 # each <filelist, author, time, logmessage> would be a changelog entry.
33 # We'd sort them; and spit them out.  Unfortunately, CVS is *not atomic*
34 # so checkins can span a range of times.  Also, the directory structure
35 # could be hierarchical.
36 #
37 # Another question is whether we really want to have the ChangeLog
38 # exactly reflect commits. An author could issue two related commits,
39 # with different log entries, reflecting a single logical change to the
40 # source. GNU style ChangeLogs group these under a single author/date.
41 # We try to do the same.
42 #
43 # So, we parse the output of `cvs log', storing log messages in a
44 # multilevel hash that stores the mapping:
45 #   directory => author => time => message => filelist
46 # As we go, we notice "nearby" commit times and store them together
47 # (i.e., under the same timestamp), so they appear in the same log
48 # entry.
49 #
50 # When we've read all the logs, we twist this mapping into
51 # a time => author => message => filelist mapping for each directory.
52 #
53 # If we're not using the `--distributed' flag, the directory is always
54 # considered to be `./', even as descend into subdirectories.
55
56 # Call Tree
57
58 # name                         number of lines (10.xii.03)
59 # parse_options                         192
60 # derive_changelog                       13
61 # +-maybe_grab_accumulation_date         38
62 # +-read_changelog                      277
63 #   +-maybe_read_user_map_file           94
64 #     +-run_ext                           9
65 #   +-read_file_path                     29
66 #   +-read_symbolic_name                 43
67 #   +-read_revision                      49
68 #   +-read_date_author_and_state         25
69 #     +-parse_date_author_and_state      20
70 #   +-read_branches                      36
71 # +-output_changelog                    424
72 #   +-pretty_file_list                  290
73 #     +-common_path_prefix               35
74 #   +-preprocess_msg_text                30
75 #     +-min                               1
76 #   +-mywrap                             16
77 #   +-last_line_len                       5
78 #   +-wrap_log_entry                    177
79 #
80 # Utilities
81 #
82 # xml_escape                              6
83 # slurp_file                             11
84 # debug                                   5
85 # version                                 2
86 # usage                                 142
87
88 # -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*-
89 #
90 # Note about a bug-slash-opportunity:
91 # -----------------------------------
92 #
93 # There's a bug in Text::Wrap, which affects cvs2cl.  This script
94 # reveals it:
95 #
96 #   #!/usr/bin/perl -w
97 #
98 #   use Text::Wrap;
99 #
100 #   my $test_text =
101 #   "This script demonstrates a bug in Text::Wrap.  The very long line
102 #   following this paragraph will be relocated relative to the surrounding
103 #   text:
104 #
105 #   ====================================================================
106 #
107 #   See?  When the bug happens, we'll get the line of equal signs below
108 #   this paragraph, even though it should be above.";
109 #
110 #
111 #   # Print out the test text with no wrapping:
112 #   print "$test_text";
113 #   print "\n";
114 #   print "\n";
115 #
116 #   # Now print it out wrapped, and see the bug:
117 #   print wrap ("\t", "        ", "$test_text");
118 #   print "\n";
119 #   print "\n";
120 #
121 # If the line of equal signs were one shorter, then the bug doesn't
122 # happen.  Interesting.
123 #
124 # Anyway, rather than fix this in Text::Wrap, we might as well write a
125 # new wrap() which has the following much-needed features:
126 #
127 # * initial indentation, like current Text::Wrap()
128 # * subsequent line indentation, like current Text::Wrap()
129 # * user chooses among: force-break long words, leave them alone, or die()?
130 # * preserve existing indentation: chopped chunks from an indented line
131 #   are indented by same (like this line, not counting the asterisk!)
132 # * optional list of things to preserve on line starts, default ">"
133 #
134 # Note that the last two are essentially the same concept, so unify in
135 # implementation and give a good interface to controlling them.
136 #
137 # And how about:
138 #
139 # Optionally, when encounter a line pre-indented by same as previous
140 # line, then strip the newline and refill, but indent by the same.
141 # Yeah...
142
143 # Globals --------------------------------------------------------------------
144
145 use constant MAILNAME => "/etc/mailname";
146
147 # In case we have to print it out:
148 my $VERSION = '$Revision$';
149 $VERSION =~ s/\S+\s+(\S+)\s+\S+/$1/;
150
151 ## Vars set by options:
152
153 # Print debugging messages?
154 my $Debug = 0;
155
156 # Just show version and exit?
157 my $Print_Version = 0;
158
159 # Just print usage message and exit?
160 my $Print_Usage = 0;
161
162 # What file should we generate (defaults to "ChangeLog")?
163 my $Log_File_Name = "ChangeLog";
164
165 # Grab most recent entry date from existing ChangeLog file, just add
166 # to that ChangeLog.
167 my $Cumulative = 0;
168
169 # `cvs log -d`, this will repeat the last entry in the old log.  This is OK,
170 # as it guarantees at least one entry in the update changelog, which means
171 # that there will always be a date to extract for the next update.  The repeat
172 # entry can be removed in postprocessing, if necessary.
173
174 # MJP 2003-08-02
175 # I don't think this actually does anything useful
176 my $Update = 0;
177
178 # Expand usernames to email addresses based on a map file?
179 my $User_Map_File = '';
180 my $User_Passwd_File;
181 my $Mail_Domain;
182
183 # Output log in chronological order? [default is reverse chronological order]
184 my $Chronological_Order = 0;
185
186 # Grab user details via gecos
187 my $Gecos = 0;
188
189 # User domain for gecos email addresses
190 my $Domain;
191
192 # Output to a file or to stdout?
193 my $Output_To_Stdout = 0;
194
195 # Eliminate empty log messages?
196 my $Prune_Empty_Msgs = 0;
197
198 # Tags of which not to output
199 my %ignore_tags;
200
201 # Show only revisions with Tags
202 my %show_tags;
203
204 # Don't call Text::Wrap on the body of the message
205 my $No_Wrap = 1;
206
207 # Indentation of log messages
208 my $Indent = "  ";
209
210 # Don't do any pretty print processing
211 my $Summary = 0;
212
213 # Separates header from log message.  Code assumes it is either " " or
214 # "\n\n", so if there's ever an option to set it to something else,
215 # make sure to go through all conditionals that use this var.
216 my $After_Header = " ";
217
218 # XML Encoding
219 my $XML_Encoding = '';
220
221 # Format more for programs than for humans.
222 my $XML_Output = 0;
223 my $No_XML_Namespace = 0;
224 my $No_XML_ISO_Date = 0;
225
226 # Do some special tweaks for log data that was written in FSF
227 # ChangeLog style.
228 my $FSF_Style = 0;
229
230 # Show times in UTC instead of local time
231 my $UTC_Times = 0;
232
233 # Show times in output?
234 my $Show_Times = 1;
235
236 # Show day of week in output?
237 my $Show_Day_Of_Week = 1;
238
239 # Show revision numbers in output?
240 my $Show_Revisions = 1;
241
242 # Show dead files in output?
243 my $Show_Dead = 0;
244
245 # Hide dead trunk files which were created as a result of additions on a
246 # branch?
247 my $Hide_Branch_Additions = 1;
248
249 # Show tags (symbolic names) in output?
250 my $Show_Tags = 0;
251
252 # Show tags separately in output?
253 my $Show_Tag_Dates = 0;
254
255 # Show branches by symbolic name in output?
256 my $Show_Branches = 0;
257
258 # Show only revisions on these branches or their ancestors.
259 my @Follow_Branches;
260
261 # Don't bother with files matching this regexp.
262 my @Ignore_Files;
263
264 # How exactly we match entries.  We definitely want "o",
265 # and user might add "i" by using --case-insensitive option.
266 my $Case_Insensitive = 0;
267
268 # Maybe only show log messages matching a certain regular expression.
269 my $Regexp_Gate = '';
270
271 # Pass this global option string along to cvs, to the left of `log':
272 my $Global_Opts = '';
273
274 # Pass this option string along to the cvs log subcommand:
275 my $Command_Opts = '';
276
277 # Read log output from stdin instead of invoking cvs log?
278 my $Input_From_Stdin = 0;
279
280 # Don't show filenames in output.
281 my $Hide_Filenames = 0;
282
283 # Don't shorten directory names from filenames.
284 my $Common_Dir = 1;
285
286 # Max checkin duration. CVS checkin is not atomic, so we may have checkin
287 # times that span a range of time. We assume that checkins will last no
288 # longer than $Max_Checkin_Duration seconds, and that similarly, no
289 # checkins will happen from the same users with the same message less
290 # than $Max_Checkin_Duration seconds apart.
291 my $Max_Checkin_Duration = 180;
292
293 # What to put at the front of [each] ChangeLog.
294 my $ChangeLog_Header = '';
295
296 # Whether to enable 'delta' mode, and for what start/end tags.
297 my $Delta_Mode = 0;
298 my $Delta_From = '';
299 my $Delta_To = '';
300
301 my $TestCode;
302
303 # Whether to parse filenames from the RCS filename, and if so what
304 # prefix to strip.
305 my $RCS_Root;
306
307 ## end vars set by options.
308
309 # latest observed times for the start/end tags in delta mode
310 my $Delta_StartTime = 0;
311 my $Delta_EndTime = 0;
312
313 # In 'cvs log' output, one long unbroken line of equal signs separates
314 # files:
315 my $file_separator = "======================================="
316                    . "======================================";
317
318 # In 'cvs log' output, a shorter line of dashes separates log messages
319 # within a file:
320 my $logmsg_separator = "----------------------------";
321
322 my $No_Ancestors = 0;
323
324 my $No_Extra_Indent = 0;
325
326 my $GroupWithinDate = 0;
327
328 # ----------------------------------------------------------------------------
329
330 package CVS::Utils::ChangeLog::EntrySet;
331
332 sub new {
333   my $class = shift;
334   my %self;
335   bless \%self, $class;
336 }
337
338 # -------------------------------------
339
340 sub output_changelog {
341   my $output_type = $XML_Output ? 'XML' : 'Text';
342   my $output_class = "CVS::Utils::ChangeLog::EntrySet::Output::${output_type}";
343   $output_class->new->output_changelog(@_);
344 }
345
346 # ----------------------------------------------------------------------------
347
348 package CVS::Utils::ChangeLog::EntrySet::Output::Text;
349
350 use base qw( CVS::Utils::ChangeLog::EntrySet::Output );
351
352 use File::Basename qw( fileparse );
353
354 sub new {
355   my $class = shift;
356   bless \(my($ self)), $class;
357 }
358
359 # -------------------------------------
360
361 sub wday {
362   my $self = shift; my $class = ref $self;
363   my ($wday) = @_;
364
365   return $Show_Day_Of_Week ? ' ' . $class->weekday_en($wday) : '';
366 }
367
368 # -------------------------------------
369
370 sub header_line {
371   my $self = shift;
372   my ($time, $author, $lastdate) = @_;
373
374   my $header_line = '';
375
376   my (undef,$min,$hour,$mday,$mon,$year,$wday)
377     = $UTC_Times ? gmtime($time) : localtime($time);
378
379   my $date = $self->fdatetime($time);
380
381   if ($Show_Times) {
382     $header_line =
383       sprintf "%s  %s\n\n", $date, $author;
384   } else {
385     if ( ! defined $lastdate or $date ne $lastdate or ! $GroupWithinDate ) {
386       if ( $GroupWithinDate ) {
387         $header_line = "$date\n\n";
388       } else {
389         $header_line = "$date  $author\n\n";
390       }
391     } else {
392       $header_line = '';
393     }
394   }
395 }
396
397 # -------------------------------------
398
399 sub preprocess_msg_text {
400   my $self = shift;
401   my ($text) = @_;
402
403   $text = $self->SUPER::preprocess_msg_text($text);
404
405   unless ( $No_Wrap ) {
406     # Strip off lone newlines, but only for lines that don't begin with
407     # whitespace or a mail-quoting character, since we want to preserve
408     # that kind of formatting.  Also don't strip newlines that follow a
409     # period; we handle those specially next.  And don't strip
410     # newlines that precede an open paren.
411     1 while $text =~ s/(^|\n)([^>\s].*[^.\n])\n([^>\n])/$1$2 $3/g;
412
413     # If a newline follows a period, make sure that when we bring up the
414     # bottom sentence, it begins with two spaces.
415     1 while $text =~ s/(^|\n)([^>\s].*)\n([^>\n])/$1$2  $3/g;
416   }
417
418   return $text;
419 }
420
421 # -------------------------------------
422
423 # Here we take a bunch of qunks and convert them into printed
424 # summary that will include all the information the user asked for.
425 sub pretty_file_list {
426   my $self = shift;
427
428   return ''
429     if $Hide_Filenames;
430
431   my $qunksref = shift;
432
433   my @filenames;
434   my $beauty = '';          # The accumulating header string for this entry.
435   my %non_unanimous_tags;   # Tags found in a proper subset of qunks
436   my %unanimous_tags;       # Tags found in all qunks
437   my %all_branches;         # Branches found in any qunk
438   my $fbegun = 0;           # Did we begin printing filenames yet?
439
440   my ($common_dir, $qunkrefs) =
441     $self->_pretty_file_list(\(%unanimous_tags, %non_unanimous_tags, %all_branches), $qunksref);
442
443   my @qunkrefs = @$qunkrefs;
444
445   # Not XML output, so complexly compactify for chordate consumption.  At this
446   # point we have enough global information about all the qunks to organize
447   # them non-redundantly for output.
448
449   if ($common_dir) {
450     # Note that $common_dir still has its trailing slash
451     $beauty .= "$common_dir: ";
452   }
453
454   if ($Show_Branches)
455   {
456     # For trailing revision numbers.
457     my @brevisions;
458
459     foreach my $branch (keys (%all_branches))
460     {
461       foreach my $qunkref (@qunkrefs)
462       {
463         if ((defined ($qunkref->branch))
464             and ($qunkref->branch eq $branch))
465         {
466           if ($fbegun) {
467             # kff todo: comma-delimited in XML too?  Sure.
468             $beauty .= ", _BREAK_ ";
469           }
470           else {
471             $fbegun = 1;
472           }
473           my $fname = substr ($qunkref->filename, length ($common_dir));
474           $beauty .= $fname;
475           $qunkref->{'printed'} = 1;  # Just setting a mark bit, basically
476
477           if ( $Show_Tags and defined $qunkref->tags ) {
478             my @tags = grep ($non_unanimous_tags{$_}, @{$qunkref->tags});
479
480             if (@tags) {
481               $beauty .= " (tags: ";
482               $beauty .= join (', ', @tags);
483               $beauty .= ")";
484             }
485           }
486
487           if ($Show_Revisions) {
488             # Collect the revision numbers' last components, but don't
489             # print them -- they'll get printed with the branch name
490             # later.
491             $qunkref->revision =~ /.+\.([\d]+)$/;
492             push (@brevisions, $1);
493
494             # todo: we're still collecting branch roots, but we're not
495             # showing them anywhere.  If we do show them, it would be
496             # nifty to just call them revision "0" on a the branch.
497             # Yeah, that's the ticket.
498           }
499         }
500       }
501       $beauty .= " ($branch";
502       if (@brevisions) {
503         if ((scalar (@brevisions)) > 1) {
504           $beauty .= ".[";
505           $beauty .= (join (',', @brevisions));
506           $beauty .= "]";
507         }
508         else {
509           # Square brackets are spurious here, since there's no range to
510           # encapsulate
511           $beauty .= ".$brevisions[0]";
512         }
513       }
514       $beauty .= ")";
515     }
516   }
517
518   # Okay; any qunks that were done according to branch are taken care
519   # of, and marked as printed.  Now print everyone else.
520
521   my %fileinfo_printed;
522   foreach my $qunkref (@qunkrefs)
523   {
524     next if (defined ($qunkref->{'printed'}));   # skip if already printed
525
526     my $b = substr ($qunkref->filename, length ($common_dir));
527     # todo: Shlomo's change was this:
528     # $beauty .= substr ($qunkref->filename,
529     #              (($common_dir eq "./") ? '' : length ($common_dir)));
530     $qunkref->{'printed'} = 1;  # Set a mark bit.
531
532     if ($Show_Revisions || $Show_Tags || $Show_Dead)
533     {
534       my $started_addendum = 0;
535
536       if ($Show_Revisions) {
537         $started_addendum = 1;
538         $b .= " (";
539         $b .= $qunkref->revision;
540       }
541       if ($Show_Dead && $qunkref->state =~ /dead/)
542       {
543         # Deliberately not using $started_addendum. Keeping it simple.
544         $b .= "[DEAD]";
545       }
546       if ($Show_Tags && (defined $qunkref->tags)) {
547         my @tags = grep ($non_unanimous_tags{$_}, @{$qunkref->tags});
548         if ((scalar (@tags)) > 0) {
549           if ($started_addendum) {
550             $b .= ", ";
551           }
552           else {
553             $b .= " (tags: ";
554           }
555           $b .= join (', ', @tags);
556           $started_addendum = 1;
557         }
558       }
559       if ($started_addendum) {
560         $b .= ")";
561       }
562     }
563
564     unless ( exists $fileinfo_printed{$b} ) {
565       if ($fbegun) {
566         $beauty .= ", ";
567       } else {
568         $fbegun = 1;
569       }
570       $beauty .= $b, $fileinfo_printed{$b} = 1;
571     }
572   }
573
574   # Unanimous tags always come last.
575   if ($Show_Tags && %unanimous_tags)
576   {
577     $beauty .= " (utags: ";
578     $beauty .= join (', ', sort keys (%unanimous_tags));
579     $beauty .= ")";
580   }
581
582   # todo: still have to take care of branch_roots?
583
584   $beauty = "$beauty:";
585
586   return $beauty;
587 }
588
589 # -------------------------------------
590
591 sub output_tagdate {
592   my $self = shift;
593   my ($fh, $time, $tag) = @_;
594
595   my $fdatetime = $self->fdatetime($time);
596   print $fh "$fdatetime  tag $tag\n\n";
597   return;
598 }
599
600 # -------------------------------------
601
602 sub format_body {
603   my $self = shift;
604   my ($msg, $files, $qunklist) = @_;
605
606   my $body;
607
608   if ( $No_Wrap and ! $Summary ) {
609     $msg = $self->preprocess_msg_text($msg);
610     $files = $self->mywrap("\t", "\t  ", "* $files") . "\n\n\t";
611     #$files =~ s:, :\n\t\t:g;
612     #$files = "\t* files:\n\t\t$files\n\t";
613     $msg =~ s/\n(.+)/\n$Indent$1/g;
614     unless ($After_Header eq " ") {
615       $msg =~ s/^(.+)/$Indent$1/g;
616     }
617     if ( $Hide_Filenames ) {
618       $body = $After_Header . $msg;
619     } else {
620       $body = $files . $After_Header . $msg;
621     }
622   } elsif ( $Summary ) {
623     my ($filelist, $qunk);
624     my (@DeletedQunks, @AddedQunks, @ChangedQunks);
625
626     $msg = $self->preprocess_msg_text($msg);
627     #
628     #     Sort the files (qunks) according to the operation that was
629     # performed.  Files which were added have no line change
630     # indicator, whereas deleted files have state dead.
631     #
632     foreach $qunk ( @$qunklist ) {
633       if ( "dead" eq $qunk->state) {
634         push @DeletedQunks, $qunk;
635       } elsif ( ! defined $qunk->lines ) {
636         push @AddedQunks, $qunk;
637       } else {
638         push @ChangedQunks, $qunk;
639       }
640     }
641     #
642     #     The qunks list was  originally in tree search order.  Let's
643     # get that back.  The lists, if they exist, will be reversed upon
644     # processing.
645     #
646
647     #
648     #     Now write the three sections onto $filelist
649     #
650     if ( @DeletedQunks ) {
651       $filelist .= "\tDeleted:\n";
652       foreach $qunk ( @DeletedQunks ) {
653         $filelist .= "\t\t" . $qunk->filename;
654         $filelist .= " (" . $qunk->revision . ")";
655         $filelist .= "\n";
656       }
657       undef @DeletedQunks;
658     }
659
660     if ( @AddedQunks ) {
661       $filelist .= "\tAdded:\n";
662       foreach $qunk (@AddedQunks) {
663         $filelist .= "\t\t" . $qunk->filename;
664         $filelist .= " (" . $qunk->revision . ")";
665         $filelist .= "\n";
666       }
667       undef @AddedQunks ;
668     }
669
670     if ( @ChangedQunks ) {
671       $filelist .= "\tChanged:\n";
672       foreach $qunk (@ChangedQunks) {
673         $filelist .= "\t\t" . $qunk->filename;
674         $filelist .= " (" . $qunk->revision . ")";
675         $filelist .= ", \"" . $qunk->state . "\"";
676         $filelist .= ", lines: " . $qunk->lines;
677         $filelist .= "\n";
678       }
679       undef @ChangedQunks;
680     }
681
682     chomp $filelist;
683
684     if ( $Hide_Filenames ) {
685       $filelist = '';
686     }
687
688     $msg =~ s/\n(.*)/\n$Indent$1/g;
689     unless ( $After_Header eq " " or $FSF_Style ) {
690       $msg =~ s/^(.*)/$Indent$1/g;
691     }
692