attachment:man2wiki.text von ManPageImWiki - LinuxWiki.org

Dateianhang 'man2wiki.text'

#!/usr/bin/perl
##---------------------------------------------------------------------------##
##  File:
##      @(#) man2html 1.2 97/08/12 12:57:30 @(#)
##  Author:
##      Earl Hood, ehood@medusa.acs.uci.edu
##  Description:
##      man2html is a Perl program to convert formatted nroff output
##      to HTML.
##      
##      Recommend command-line options based on platform:
##
##      Platform                Options
##      ---------------------------------------------------------------------
##      c2mp                    <None, the defaults should be okay>
##      hp9000s700/800          -leftm 1 -topm 8
##      sun4                    -sun
##      ---------------------------------------------------------------------
##
##---------------------------------------------------------------------------##
##  Copyright (C) 1995-1997     Earl Hood, ehood@medusa.acs.uci.edu
##
##  This program is free software; you can redistribute it and/or modify
##  it under the terms of the GNU General Public License as published by
##  the Free Software Foundation; either version 2 of the License, or
##  (at your option) any later version.
##  
##  This program is distributed in the hope that it will be useful,
##  but WITHOUT ANY WARRANTY; without even the implied warranty of
##  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
##  GNU General Public License for more details.
##  
##  You should have received a copy of the GNU General Public License
##  along with this program; if not, write to the Free Software
##  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
##  02111-1307, USA
##---------------------------------------------------------------------------##

package Man2Wiki;

use Getopt::Long;

($PROG = $0) =~ s/.*\///;
$VERSION = "3.0.1";

## Input and outputs filehandles
$InFH   = \*STDIN   unless $InFH;
$OutFH  = \*STDOUT  unless $OutFH;

## Backspace character:  Used in overstriking detection
*bs = \"\b";

##      Hash of section titles and their HTML tag wrapper.
##      This list allows customization of what HTML tag is used for
##      a given section head.
##
##      The section title can be a regular expression.  Therefore, one must
##      be careful about quoting special characters.
##
%SectionHead = (

    '\S.*OPTIONS.*'             => ' == ',
    'AUTHORS?'                  => ' == ',
    'BUGS'                      => ' == ',
    'COMPATIBILITY'             => ' == ',
    'DEPENDENCIES'              => ' == ',
    'DESCRIPTION'               => ' == ',
    'DIAGNOSTICS'               => ' == ',
    'ENVIRONMENT'               => ' == ',
    'ERRORS'                    => ' == ',
    'EXAMPLES'                  => ' == ',
    'EXTERNAL INFLUENCES'       => ' == ',
    'FILES'                     => ' == ',
    'LIMITATIONS'               => ' == ',
    'NAME'                      => ' == ',
    'NOTES?'                    => ' == ',
    'OPTIONS'                   => ' == ',
    'REFERENCES'                => ' == ',
    'RETURN VALUE'              => ' == ',
    'SECTION.*:'                => ' == ',
    'SEE ALSO'                  => ' == ',
    'STANDARDS CONFORMANCE'     => ' == ',
    'STYLE CONVENTION'          => ' == ',
    'SYNOPSIS'                  => ' == ',
    'SYNTAX'                    => ' == ',
    'WARNINGS'                  => ' == ',
    '\s+Section.*:'             => ' === ',

);

## Fallback tag if above is not found
$HeadFallback = ' == ';

## Other gobals

$Bare      = 0;         # Skip printing HTML head/foot flag
$BTag      = '\'\'\'';  # Overstrike tag
$CgiUrl    = '';        # CGI URL expression
$Compress  = 0;         # Do blank line compression flag
$K         = 0;         # Do keyword search processing flag
$NoDepage  = 0;         # Do not strip page information
$NoHeads   = 0;         # Do no header detection flag
$SeeAlso   = 0;         # Do only SEE ALSO xrefs flag
$Solaris   = 0;         # Solaris keyword search processing flag
$Sun       = 0;         # Headers not overstriken flag
$Title     = '';        # Title
$UTag      = '\'\'';    # Underline tag
$ftsz      = 7;         # Bottome margin size
$hdsz      = 7;         # Top margin size
$leftm     = '';        # Left margin pad
$leftmsz   = 0;         # Left margin size
$pgsz      = 66;        # Size of page size
$txsz      = 52;        # Text body length size

#############################################################################
##      Main Block
#############################################################################
{
    if (get_cli_opts()) {
        if ($K) {
            man_k();
        } else {
            do_it();
        }
    } else {
        usage();
    }
}

#############################################################################
##      Subroutines
#############################################################################

sub do_it {

    ##  Define while loop and then eval it when used.  The reason
    ##  is to avoid the regular expression reevaulation in the
    ##  section head detection code.

    $doitcode =<<'EndOfDoItCode';

    my($line, $tmp, $i, $head, $preindent, $see_also, $do);

    $see_also = !$SeeAlso;
    print $OutFH "<!-- Manpage converted by man2html $VERSION -->\n";
    LOOP: while(!eof($InFH)) {
        $blank = 0;
        for ($i=0; $i < $hdsz; $i++) {
            last LOOP  unless defined($_ = <$InFH>);
        }
        for ($i=0; $i < $txsz; $i++) {
            last LOOP  unless defined($_ = <$InFH>);

            ## Check if compress consecutive blank lines
            if ($Compress and !/\S/) {
                if ($blank) { next; } else { $blank = 1; }
            } else {
                $blank = 0;
            }

            ## Try to check if line space is needed at page boundaries ##
            if (!$NoDepage && ($i==0 || $i==($txsz-1)) && !/^\s*$/) {
                /^(\s*)/;  $tmp = length($1);
                if ($do) {
                    if ($tmp < $preindent) { print $OutFH "\n"; }
                } else {
                    $do = 1;
                }
                $preindent = $tmp;
            } else {
                $do = 0;  $preindent = 0;
            }

            ## Interpret line
            $line = $_;
            entitize(\$_);              # Convert [$<>] to entity references

            ## Check for 'SEE ALSO' link only
            if (!$see_also && $CgiUrl && $SeeAlso) {
                ($tmp = $line) =~ s/.\010//go;
                if ($tmp =~ /^\s*SEE\s+ALSO\s*$/o) { $see_also = 1; }
                else { $see_also = 0; }
            }

            ## Create anchor links for manpage references
            s/((((.\010)+)?[\+_\.\w-])+\(((.\010)+)?
              \d((.\010)+)?\w?\))
             /make_xref($1)
             /geox  if $see_also;

            ## Emphasize underlined words
            # s/((_\010[^_])+[\.\(\)_]?(_\010[^_])+\)?)/emphasize($1)/oge;
            # s/((_\010[^_])+([\.\(\)_]?(_\010[^_])+)?)/emphasize($1)/oge;
            #
            # The previous expressions were trying to be clever about
            # detecting underlined text which contain non-alphanumeric
            # characters.  nroff will not underline non-alphanumeric
            # characters in an underlined phrase, and the above was trying
            # to detect that.  It does not work all the time, and it
            # screws up other text, so a simplified expression is used.

            s/((_\010[^_])+)/emphasize($1)/oge;

            $secth = 0;
            ## Check for strong text and headings
            if ($Sun || /.\010./o) {
                if (!$NoHeads) {
                    $line =~ s/.\010//go;
                    $tmp = $HeadFallback;
EndOfDoItCode

    ##  Create switch statement for detecting a heading
    ##
    $doitcode .= "HEADSW: {\n";
    foreach $head (keys %SectionHead) {
        $doitcode .= join("", "\$tmp = '$SectionHead{$head}', ",
                              "\$secth = 1, last HEADSW  ",
                              "if \$line =~ /^$leftm$head/o;\n");
    }
    $doitcode .= "}\n";

    ##  Rest of routine
    ##
    $doitcode .=<<'EndOfDoItCode';
                    if ($secth || $line =~ /^$leftm\S/o) {
                        chop $line;
                        $_ = $tmp . $line . $tmp;
                        s%<([^>]*)>$%</$1>%;
                        #$_ = "\n</PRE>\n" . $_ . "<PRE>\n";
                        $_ = "\n" . $_ . "\n";
                    } else {
                        s/(((.\010)+.)+)/strongize($1)/oge;
                    }
                } else {
                    s/(((.\010)+.)+)/strongize($1)/oge;
                }
            }
            print $OutFH $_;
        }

        for ($i=0; $i < $ftsz; $i++) {
            last LOOP  unless defined($_ = <$InFH>);
        }
    }
EndOfDoItCode


    ##  Perform processing.

    printhead()  unless $Bare;
    #print $OutFH "<PRE>\n";
    print $OutFH "\n";
    eval $doitcode;                     # $doitcode defined above
    print $OutFH "\n";
    printtail()  unless $Bare;
}

##---------------------------------------------------------------------------
##
sub get_cli_opts {
    return 0  unless
    GetOptions(
        "bare",         # Leave out HTML, HEAD, BODY tags.
        "belem=s",      # HTML Element for overstriked text (def: "B")
        "botm=i",       # Number of lines for bottom margin (def: 7)
        "cgiurl=s",     # CGI URL for linking to other manpages
        "cgiurlexp=s",  # CGI URL Perl expr for linking to other manpages
        "compress",     # Compress consecutive blank lines
        "headmap=s",    # Filename of user section head map file
        "k",            # Process input from 'man -k' output.
        "leftm=i",      # Character width of left margin (def: 0)
        "nodepage",     # Do not remove pagination lines
        "noheads",      # Do not detect for section heads
        "pgsize=i",     # Number of lines in a page (def: 66)
        "seealso",      # Link to other manpages only in the SEE ALSO section
        "solaris",      # Parse 'man -k' output from a solaris system
        "sun",          # Section heads are not overstriked in input
        "title=s",      # Title of manpage (def: Not defined)
        "topm=i",       # Number of lines for top margin (def: 7)
        "uelem=s",      # HTML Element for underlined text (def: "I")

        "help"          # Short usage message
    );
    return 0  if defined($opt_help);

    $pgsz = $opt_pgsize || $pgsz;
    if (defined($opt_nodepage)) {
        $hdsz   = 0;
        $ftsz   = 0;
    } else {
        $hdsz   = $opt_topm  if defined($opt_topm);
        $ftsz   = $opt_botm  if defined($opt_botm);
    }
    $txsz       = $pgsz - ($hdsz + $ftsz);
    $leftmsz    = $opt_leftm  if defined($opt_leftm);
    $leftm      = ' ' x $leftmsz;

    $Bare       = defined($opt_bare);
    $Compress   = defined($opt_compress);
    $K          = defined($opt_k);
    $NoDepage   = defined($opt_nodepage);
    $NoHeads    = defined($opt_noheads);
    $SeeAlso    = defined($opt_seealso);
    $Solaris    = defined($opt_solaris);
    $Sun        = defined($opt_sun);

    $Title      = $opt_title || $Title;
    $CgiUrl     = $opt_cgiurlexp ||
                        ($opt_cgiurl ? qq{return "$opt_cgiurl"} : '');

    $BTag       = $opt_belem || $BTag;
    $UTag       = $opt_uelem || $UTag;
    $BTag       =~ s/[<>]//g;
    $UTag       =~ s/[<>]//g;

    if (defined($opt_headmap)) {
        require $opt_headmap or warn "Unable to read $opt_headmap\n";
    }
    1;
}

##---------------------------------------------------------------------------
sub printhead {
    #print $OutFH "<HTML>\n";
    #print $OutFH "<HEAD>\n",
#                "<TITLE>$Title</TITLE>\n",
#                "</HEAD>\n"  if $Title;
#    print $OutFH "<BODY>\n";
#    print $OutFH "<H1>$Title</H1>\n",
#                "<HR>\n"  if $Title;
    print $OutFH "= $Title =\n",
                 "----\n"  if $Title;
}

##---------------------------------------------------------------------------
sub printtail {
    print $OutFH <<EndOfRef;
----
Man(1) output converted with [http://www.oac.uci.edu/indiv/ehood/man2html.html man2html].
Changed by FabianFranz, for use in Wikis
EndOfRef
}

##---------------------------------------------------------------------------
sub emphasize {
    my($txt) = shift;
    $txt =~ s/.\010//go;
    $txt = "$UTag$txt$UTag";
    $txt;
}

##---------------------------------------------------------------------------
sub strongize {
    my($txt) = shift;
    $txt =~ s/.\010//go;
    $txt = "$BTag$txt$BTag";
    $txt;
}

##---------------------------------------------------------------------------
sub entitize {
    my($txt) = shift;

    ## Check for special characters in overstrike text ##
    #$$txt =~ s/_\010\&/strike('_', '&')/geo;
    #$$txt =~ s/_\010</strike('_', '<')/geo;
    #$$txt =~ s/_\010>/strike('_', '>')/geo;

    #$$txt =~ s/(\&\010)+\&/strike('&', '&')/geo;
    #$$txt =~ s/(<\010)+</strike('<', '<')/geo;
    #$$txt =~ s/(>\010)+>/strike('>', '>')/geo;

    ## Check for special characters in regular text.  Must be careful
    ## to check before/after character in expression because it might be
    ## a special character.
    #$$txt =~ s/([^\010]\&[^\010])/htmlize2($1)/geo;
    #$$txt =~ s/([^\010]<[^\010])/htmlize2($1)/geo;
    #$$txt =~ s/([^\010]>[^\010])/htmlize2($1)/geo;
}

##---------------------------------------------------------------------------
##      escape special characters in a string, in-place
##
sub htmlize {
    my($str) = shift;
    #$$str =~ s/&/\&amp;/g;
    #$$str =~ s/</\&lt;/g;
    #$$str =~ s/>/\&gt;/g;
    #$$str;
}

##---------------------------------------------------------------------------
##      htmlize2() is used by entitize.
##
sub htmlize2 {
    my($str) = shift;
    #$str =~ s/&/\&amp;/g;
    #$str =~ s/</\&lt;/g;
    #$str =~ s/>/\&gt;/g;
    #$str;
}

##---------------------------------------------------------------------------
##      strike converts HTML special characters in overstriked text
##      into entity references.  The entities are overstriked so
##      strongize() and emphasize() will recognize the entity to be
##      wrapped in tags.
##
sub strike {
    my($w, $char) = @_;
    my($ret);
#    if ($w eq '_') {
#       if ($char eq '&') {
#           $ret = "_$bs\&_${bs}a_${bs}m_${bs}p_${bs};";
#       } elsif ($char eq '<') {
            $ret = "_$bs\&_${bs}l_${bs}t_${bs};";
#       } elsif ($char eq '>') {
#           $ret = "_$bs\&_${bs}g_${bs}t_${bs};";
#       } else {
#           warn qq|Unrecognized character, "$char", passed to strike()\n|;
#       }
#    } else {
#       if ($char eq '&') {
#           $ret = "\&$bs\&a${bs}am${bs}mp${bs}p;${bs};";
#       } elsif ($char eq '<') {
#           $ret = "\&$bs\&l${bs}lt${bs}t;${bs};";
#       } elsif ($char eq '>') {
#           $ret = "\&$bs\&g${bs}gt${bs}t;${bs};";
#       } else {
#           warn qq|Unrecognized character, "$char", passed to strike()\n|;
#       }
#    }
#    $ret;
}

##---------------------------------------------------------------------------
##      make_xref() converts a manpage crossreference into a hyperlink.
##
sub make_xref {
    my $str = shift;
    $str =~ s/.\010//go;                        # Remove overstriking

 #   if ($CgiUrl) {
        my($title,$section,$subsection) =
            ($str =~ /([\+_\.\w-]+)\((\d)(\w?)\)/);

        $title =~ s/\+/%2B/g;
        $str =~ s/\(/\//g;
        $str =~ s/\)//g;
        my($href) = (eval $CgiUrl);
        qq|[\"$href$str\"]|;
 #   } else {
#       qq|<B>$str</B>|;
#    }
}

##---------------------------------------------------------------------------
##      man_k() process a keyword search.  The problem we have is there
##      is no standard for keyword search results from man.  Solaris
##      systems have a different enough format to warrent dealing
##      with it as a special case.  For other cases, we try our best.
##      Unfortunately, there are some lines of results that may be
##      skipped.
##
sub man_k {
    my($line,$refs,$section,$subsection,$desc,$i,
       %Sec1, %Sec1sub, %Sec2, %Sec2sub, %Sec3, %Sec3sub,
       %Sec4, %Sec4sub, %Sec5, %Sec5sub, %Sec6, %Sec6sub,
       %Sec7, %Sec7sub, %Sec8, %Sec8sub, %Sec9, %Sec9sub,
       %SecN, %SecNsub, %SecNsec);

    printhead()  unless $Bare;
    print $OutFH "<!-- Man keyword results converted by ",
                      "man2html $VERSION -->\n";

    while ($line = <$InFH>) {
        next if $line !~ /\(\d\w?\)\s+-\s/; # check if line can be handled
        ($refs,$section,$subsection,$desc) =
            $line =~ /^\s*(.*)\((\d)(\w?)\)\s*-\s*(.*)$/;

        if ($Solaris) {
            $refs =~ s/^\s*([\+_\.\w-]+)\s+([\+_\.\w-]+)\s*$/$1/;
                                        #  <topic> <manpage>
        } else {
            $refs =~ s/\s(and|or)\s/,/gi; # Convert and/or to commas
            $refs =~ s/^[^:\s]:\s*//;   # Remove prefixed whatis path
        }
        $refs =~ s/\s//g;               # Remove all whitespace
        $refs =~ s/,/, /g;              # Put space after comma
        htmlize(\$desc);                # Check for special chars in desc
        $desc =~ s/^(.)/\U$1/;          # Uppercase first letter in desc

        if ($section eq '1') {
            $Sec1{$refs} = $desc; $Sec1sub{$refs} = $subsection;
        } elsif ($section eq '2') {
            $Sec2{$refs} = $desc; $Sec2sub{$refs} = $subsection;
        } elsif ($section eq '3') {
            $Sec3{$refs} = $desc; $Sec3sub{$refs} = $subsection;
        } elsif ($section eq '4') {
            $Sec4{$refs} = $desc; $Sec4sub{$refs} = $subsection;
        } elsif ($section eq '5') {
            $Sec5{$refs} = $desc; $Sec5sub{$refs} = $subsection;
        } elsif ($section eq '6') {
            $Sec6{$refs} = $desc; $Sec6sub{$refs} = $subsection;
        } elsif ($section eq '7') {
            $Sec7{$refs} = $desc; $Sec7sub{$refs} = $subsection;
        } elsif ($section eq '8') {
            $Sec8{$refs} = $desc; $Sec8sub{$refs} = $subsection;
        } elsif ($section eq '9') {
            $Sec9{$refs} = $desc; $Sec9sub{$refs} = $subsection;
        } else {                        # Catch all
            $SecN{$refs} = $desc; $SecNsec{$refs} = $section;
            $SecNsub{$refs} = $subsection;
        }
    }
    print_mank_sec(\%Sec1, 1, \%Sec1sub);
    print_mank_sec(\%Sec2, 2, \%Sec2sub);
    print_mank_sec(\%Sec3, 3, \%Sec3sub);
    print_mank_sec(\%Sec4, 4, \%Sec4sub);
    print_mank_sec(\%Sec5, 5, \%Sec5sub);
    print_mank_sec(\%Sec6, 6, \%Sec6sub);
    print_mank_sec(\%Sec7, 7, \%Sec7sub);
    print_mank_sec(\%Sec8, 8, \%Sec8sub);
    print_mank_sec(\%Sec9, 9, \%Sec9sub);
    print_mank_sec(\%SecN, 'N', \%SecNsub, \%SecNsec);

    printtail()  unless $Bare;
}
##---------------------------------------------------------------------------
##      print_mank_sec() prints out manpage cross-refs of a specific section.
##
sub print_mank_sec {
    my($sec, $sect, $secsub, $secsec) = @_;
    my(@array, @refs, $href, $item, $title, $subsection, $i, $section,
       $xref);
    $section = $sect;

    @array = sort keys %$sec;
    if ($#array >= 0) {
        print $OutFH "== Section $section ==\n";
                     #"<DL COMPACT>\n";
        foreach $item (@array) {
            @refs = split(/,/, $item);
            $section = $secsec->{$item}  if $sect eq 'N';
            $subsection = $secsub->{$item};
            #if ($CgiUrl) {
                ($title = $refs[0]) =~ s/\(\)//g;  # watch out for extra ()'s
                $xref = eval $CgiUrl;
            #}
            #print $OutFH "<DT>\n";
            print $OutFH " ";
            $i = 0;
            foreach (@refs) {
                #if ($CgiUrl) {
                    print $OutFH qq|[\"$xref$_\"]|;
                #} else {
                #    print $OutFH $_;
                #}
                print $OutFH ", "  if $i < $#refs;
                $i++;
            }
            print $OutFH " ($section$subsection)\n",
                         ":: ",
                         $sec->{$item}, "\n";
        }
        #print $OutFH "</DL>\n";
    }
}

##---------------------------------------------------------------------------
##
sub usage {
    print $OutFH <<EndOfUsage;
Usage: $PROG [ options ] < infile > outfile
Options:
  -bare            : Do not put in HTML, HEAD, BODY tags
  -belem <elem>    : HTML Element for overstriked text (def: "B")
  -botm <#>        : Number of lines for bottom margin (def: 7)
  -cgiurl <url>    : URL for linking to other manpages
  -cgiurlexp <url> : Perl expression URL for linking to other manpages
  -compress        : Compress consective blank lines
  -headmap <file>  : Filename of user section head map file
  -help            : This message
  -k               : Process a keyword search result
  -leftm <#>       : Character width of left margin (def: 0)
  -nodepage        : Do not remove pagination lines
  -noheads         : Turn off section head detection
  -pgsize <#>      : Number of lines in a page (def: 66)
  -seealso         : Link to other manpages only in the SEE ALSO section
  -solaris         : Process keyword search result in Solaris format
  -sun             : Section heads are not overstriked in input
  -title <string>  : Title of manpage (def: Not defined)
  -topm <#>        : Number of lines for top margin (def: 7)
  -uelem <elem>    : HTML Element for underlined text (def: "I")

Description:
  $PROG takes formatted manpages from STDIN and converts it to HTML sent
  to STDOUT.  The -topm and -botm arguments are the number of lines to the
  main body text and NOT to the running headers/footers.

Version:
  $VERSION
  Copyright (C) 1995-1997  Earl Hood, ehood\@medusa.acs.uci.edu
  $PROG comes with ABSOLUTELY NO WARRANTY and $PROG may be copied only
  under the terms of the GNU General Public License, which may be found in
  the $PROG distribution.

EndOfUsage
    exit 0;
}
Gespeicherte Dateianhänge

Um Dateianhänge in eine Seite einzufügen sollte unbedingt eine Angabe wie attachment:dateiname benutzt werden, wie sie auch in der folgenden Liste der Dateien erscheint. Es sollte niemals die URL des Verweises ("laden") kopiert werden, da sich diese jederzeit ändern kann und damit der Verweis auf die Datei brechen würde.
Sie dürfen keine Anhänge an diese Seite anhängen!