saving uncommitted changes in /etc prior to emerge run
This commit is contained in:
495
htdig/HtFileType-magic.mime
Normal file
495
htdig/HtFileType-magic.mime
Normal file
@@ -0,0 +1,495 @@
|
||||
# Magic data for for file(1) command
|
||||
#
|
||||
# The format is 4-5 columns:
|
||||
# Column #1: byte number to begin checking from, ">" indicates continuation
|
||||
# Column #2: type of data to match
|
||||
# Column #3: contents of data to match
|
||||
# Column #4: MIME type of result
|
||||
# Column #5: MIME encoding of result (optional)
|
||||
#
|
||||
# Modified by <mailto:lha@users.sourceforge.net> for compatibility with
|
||||
# different versions of file(1):
|
||||
# - Columns are separated by TABs (for traditional versions)
|
||||
# - spaces and '<'s within a column are escaped by '\' (for new versions)
|
||||
# - Hex numbers in strings are given as '\0x' (traditional) and '\x' (new)
|
||||
# - Null characters (\000) traditionally terminate strings, but now don't
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Localstuff: file(1) magic for locally observed files
|
||||
# Add any locally observed files here.
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# end local stuff
|
||||
#------------------------------------------------------------------------------
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Java
|
||||
|
||||
0 short 0xcafe
|
||||
>2 short 0xbabe application/java
|
||||
|
||||
# Microsoft WAVE format (*.wav)
|
||||
# [GRR 950115: probably all of the shorts and longs should be leshort/lelong]
|
||||
# Microsoft RIFF
|
||||
0 string RIFF audio/unknown
|
||||
# - WAVE format
|
||||
>8 string WAVE audio/x-wav
|
||||
>8 string AVI video/x-msvideo
|
||||
#
|
||||
0 belong 0x2e7261fd application/x-realaudio
|
||||
|
||||
# MPEG Layer 3 sound files
|
||||
0 beshort &0xffe0 audio/mpeg
|
||||
#MP3 with ID3 tag
|
||||
0 string ID3 audio/mpeg
|
||||
# Ogg/Vorbis
|
||||
0 string OggS audio/x-ogg
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# commands: file(1) magic for various shells and interpreters
|
||||
#
|
||||
#0 string :\ shell archive or commands for antique kernel text
|
||||
0 string #!/bin/sh application/x-shellscript
|
||||
0 string #!\ /bin/sh application/x-shellscript
|
||||
0 string #!/bin/csh application/x-shellscript
|
||||
0 string #!\ /bin/csh application/x-shellscript
|
||||
# korn shell magic, sent by George Wu, gwu@clyde.att.com
|
||||
0 string #!/bin/ksh application/x-shellscript
|
||||
0 string #!\ /bin/ksh application/x-shellscript
|
||||
0 string #!/bin/tcsh application/x-shellscript
|
||||
0 string #!\ /bin/tcsh application/x-shellscript
|
||||
0 string #!/usr/local/tcsh application/x-shellscript
|
||||
0 string #!\ /usr/local/tcsh application/x-shellscript
|
||||
0 string #!/usr/local/bin/tcsh application/x-shellscript
|
||||
0 string #!\ /usr/local/bin/tcsh application/x-shellscript
|
||||
# bash shell magic, from Peter Tobias (tobias@server.et-inf.fho-emden.de)
|
||||
0 string #!/bin/bash application/x-shellscript
|
||||
0 string #!\ /bin/bash application/x-shellscript
|
||||
0 string #!/usr/local/bin/bash application/x-shellscript
|
||||
0 string #!\ /usr/local/bin/bash application/x-shellscript
|
||||
|
||||
#
|
||||
# zsh/ash/ae/nawk/gawk magic from cameron@cs.unsw.oz.au (Cameron Simpson)
|
||||
0 string #!/usr/local/bin/zsh application/x-shellscript
|
||||
0 string #!\ /usr/local/bin/zsh application/x-shellscript
|
||||
0 string #!/usr/local/bin/ash application/x-shellscript
|
||||
0 string #!\ /usr/local/bin/ash application/x-shellscript
|
||||
#0 string #!/usr/local/bin/ae Neil Brown's ae
|
||||
#0 string #!\ /usr/local/bin/ae Neil Brown's ae
|
||||
0 string #!/bin/nawk application/x-nawk
|
||||
0 string #!\ /bin/nawk application/x-nawk
|
||||
0 string #!/usr/bin/nawk application/x-nawk
|
||||
0 string #!\ /usr/bin/nawk application/x-nawk
|
||||
0 string #!/usr/local/bin/nawk application/x-nawk
|
||||
0 string #!\ /usr/local/bin/nawk application/x-nawk
|
||||
0 string #!/bin/gawk application/x-gawk
|
||||
0 string #!\ /bin/gawk application/x-gawk
|
||||
0 string #!/usr/bin/gawk application/x-gawk
|
||||
0 string #!\ /usr/bin/gawk application/x-gawk
|
||||
0 string #!/usr/local/bin/gawk application/x-gawk
|
||||
0 string #!\ /usr/local/bin/gawk application/x-gawk
|
||||
#
|
||||
0 string #!/bin/awk application/x-awk
|
||||
0 string #!\ /bin/awk application/x-awk
|
||||
0 string #!/usr/bin/awk application/x-awk
|
||||
0 string #!\ /usr/bin/awk application/x-awk
|
||||
0 string BEGIN application/x-awk
|
||||
|
||||
# For Larry Wall's perl language. The ``eval'' line recognizes an
|
||||
# outrageously clever hack for USG systems.
|
||||
# Keith Waclena <keith@cerberus.uchicago.edu>
|
||||
0 string #!/bin/perl application/x-perl
|
||||
0 string #!\ /bin/perl application/x-perl
|
||||
0 string eval\ "exec\ /bin/perl application/x-perl
|
||||
0 string #!/usr/bin/perl application/x-perl
|
||||
0 string #!\ /usr/bin/perl application/x-perl
|
||||
0 string eval\ "exec\ /usr/bin/perl application/x-perl
|
||||
0 string #!/usr/local/bin/perl application/x-perl
|
||||
0 string #!\ /usr/local/bin/perl application/x-perl
|
||||
0 string eval\ "exec\ /usr/local/bin/perl application/x-perl
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# compress: file(1) magic for pure-compression formats (no archives)
|
||||
#
|
||||
# compress, gzip, pack, compact, huf, squeeze, crunch, freeze, yabba, whap, etc.
|
||||
#
|
||||
# Formats for various forms of compressed data
|
||||
# Formats for "compress" proper have been moved into "compress.c",
|
||||
# because it tries to uncompress it to figure out what's inside.
|
||||
# (Technically, "gzip", "bzip2" etc. are encodings, not mime-types,
|
||||
# and should also decompress to find out the type of data inside.)
|
||||
|
||||
# standard unix compress
|
||||
0 string \037\235 application/x-compress
|
||||
|
||||
# gzip (GNU zip, not to be confused with [Info-ZIP/PKWARE] zip archiver)
|
||||
0 string \037\213 application/x-gzip
|
||||
|
||||
# bzip2
|
||||
0 string BZh application/x-bzip2
|
||||
|
||||
0 string PK\003\004 application/x-zip
|
||||
|
||||
# According to gzip.h, this is the correct byte order for packed data.
|
||||
0 string \037\036 application/octet-stream
|
||||
#
|
||||
# This magic number is byte-order-independent.
|
||||
#
|
||||
0 short 017437 application/octet-stream
|
||||
|
||||
# XXX - why *two* entries for "compacted data", one of which is
|
||||
# byte-order independent, and one of which is byte-order dependent?
|
||||
#
|
||||
# compacted data
|
||||
0 short 0x1fff application/octet-stream
|
||||
0 string \377\037 application/octet-stream
|
||||
# huf output
|
||||
0 short 0145405 application/octet-stream
|
||||
|
||||
# Squeeze and Crunch...
|
||||
# These numbers were gleaned from the Unix versions of the programs to
|
||||
# handle these formats. Note that I can only uncrunch, not crunch, and
|
||||
# I didn't have a crunched file handy, so the crunch number is untested.
|
||||
# Keith Waclena <keith@cerberus.uchicago.edu>
|
||||
#0 leshort 0x76FF squeezed data (CP/M, DOS)
|
||||
#0 leshort 0x76FE crunched data (CP/M, DOS)
|
||||
|
||||
# Freeze
|
||||
#0 string \037\237 Frozen file 2.1
|
||||
#0 string \037\236 Frozen file 1.0 (or gzip 0.5)
|
||||
|
||||
# lzh?
|
||||
#0 string \037\240 LZH compressed data
|
||||
|
||||
257 string ustar\0 application/x-tar posix
|
||||
257 string ustar\040\040\0 application/x-tar gnu
|
||||
|
||||
0 short 070707 application/x-cpio
|
||||
0 short 0143561 application/x-cpio swapped
|
||||
|
||||
0 string =<ar> application/x-archive
|
||||
0 string !<arch> application/x-archive
|
||||
>8 string debian application/x-debian-package
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
#
|
||||
# RPM: file(1) magic for Red Hat Packages Erik Troan (ewt@redhat.com)
|
||||
#
|
||||
0 beshort 0xedab
|
||||
>2 beshort 0xeedb application/x-rpm
|
||||
|
||||
0 lelong&0x8080ffff 0x0000081a application/x-arc lzw
|
||||
0 lelong&0x8080ffff 0x0000091a application/x-arc squashed
|
||||
0 lelong&0x8080ffff 0x0000021a application/x-arc uncompressed
|
||||
0 lelong&0x8080ffff 0x0000031a application/x-arc packed
|
||||
0 lelong&0x8080ffff 0x0000041a application/x-arc squeezed
|
||||
0 lelong&0x8080ffff 0x0000061a application/x-arc crunched
|
||||
|
||||
0 leshort 0xea60 application/octet-stream x-arj
|
||||
|
||||
# LHARC/LHA archiver (Greg Roelofs, newt@uchicago.edu)
|
||||
2 string -lh0- application/x-lharc lh0
|
||||
2 string -lh1- application/x-lharc lh1
|
||||
2 string -lz4- application/x-lharc lz4
|
||||
2 string -lz5- application/x-lharc lz5
|
||||
# [never seen any but the last; -lh4- reported in comp.compression:]
|
||||
2 string -lzs- application/x-lha lzs
|
||||
2 string -lh\ - application/x-lha lh
|
||||
2 string -lhd- application/x-lha lhd
|
||||
2 string -lh2- application/x-lha lh2
|
||||
2 string -lh3- application/x-lha lh3
|
||||
2 string -lh4- application/x-lha lh4
|
||||
2 string -lh5- application/x-lha lh5
|
||||
2 string -lh6- application/x-lha lh6
|
||||
2 string -lh7- application/x-lha lh7
|
||||
# Shell archives
|
||||
10 string #\ This\ is\ a\ shell\ archive application/octet-stream x-shell
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# frame: file(1) magic for FrameMaker files
|
||||
#
|
||||
# This stuff came on a FrameMaker demo tape, most of which is
|
||||
# copyright, but this file is "published" as witness the following:
|
||||
#
|
||||
0 string \<MakerFile application/x-frame
|
||||
0 string \<MIFFile application/x-frame
|
||||
0 string \<MakerDictionary application/x-frame
|
||||
0 string \<MakerScreenFon application/x-frame
|
||||
0 string \<MML application/x-frame
|
||||
0 string \<Book application/x-frame
|
||||
0 string \<Maker application/x-frame
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# html: file(1) magic for HTML (HyperText Markup Language) docs
|
||||
#
|
||||
# from Daniel Quinlan <quinlan@yggdrasil.com>
|
||||
# modified by Lachlan Andrew <lha@users.sourceforge.net> to
|
||||
# match leading whitespace, but still work with old versions
|
||||
# of file(1) which don't recognise the /cb options
|
||||
#
|
||||
0 string \<HEAD text/html
|
||||
0 string \<head text/html
|
||||
0 string \<TITLE text/html
|
||||
0 string \<title text/html
|
||||
0 string \<HTML text/html
|
||||
0 string \<html text/html
|
||||
0 string \<!-- text/html
|
||||
0 string \<H1 text/html
|
||||
0 string \<h1 text/html
|
||||
0 string \<!DOCTYPE\ HTML text/html
|
||||
0 string \<!doctype\ HTML text/html
|
||||
0 string \<!doctype\ html text/html
|
||||
0 string \<!DOCTYPE\ NETSCAPE-Bookmark text/html
|
||||
0 string/cb \ <head text/html
|
||||
0 string/cb \ <html text/html
|
||||
0 string/cb \ <title text/html
|
||||
0 string/cb \ <!doctype\ html text/html
|
||||
0 string \<!\ text/html
|
||||
|
||||
# Extensible markup language (XML), a subset of SGML
|
||||
# from Marc Prud'hommeaux (marc@apocalypse.org)
|
||||
0 string \<?xml text/xml
|
||||
0 string/cb \ \<?xml text/xml
|
||||
|
||||
# SGML, mostly from rph@sq
|
||||
0 string \<!doctype text/sgml
|
||||
0 string \<!subdoc text/sgml
|
||||
0 string/cb \ \<!doctype text/sgml
|
||||
0 string/cb \ \<!subdoc text/sgml
|
||||
|
||||
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# images: file(1) magic for image formats (see also "c-lang" for XPM bitmaps)
|
||||
#
|
||||
# originally from jef@helios.ee.lbl.gov (Jef Poskanzer),
|
||||
# additions by janl@ifi.uio.no as well as others. Jan also suggested
|
||||
# merging several one- and two-line files into here.
|
||||
#
|
||||
# XXX - byte order for GIF and TIFF fields?
|
||||
# [GRR: TIFF allows both byte orders; GIF is probably little-endian]
|
||||
#
|
||||
|
||||
# [GRR: what the hell is this doing in here?]
|
||||
#0 string xbtoa btoa'd file
|
||||
|
||||
# PBMPLUS
|
||||
# PBM file
|
||||
0 string P1 image/x-portable-bitmap 7bit
|
||||
# PGM file
|
||||
0 string P2 image/x-portable-greymap 7bit
|
||||
# PPM file
|
||||
0 string P3 image/x-portable-pixmap 7bit
|
||||
# PBM "rawbits" file
|
||||
0 string P4 image/x-portable-bitmap
|
||||
# PGM "rawbits" file
|
||||
0 string P5 image/x-portable-greymap
|
||||
# PPM "rawbits" file
|
||||
0 string P6 image/x-portable-pixmap
|
||||
|
||||
# NIFF (Navy Interchange File Format, a modification of TIFF)
|
||||
# [GRR: this *must* go before TIFF]
|
||||
0 string IIN1 image/x-niff
|
||||
|
||||
# TIFF and friends
|
||||
# TIFF file, big-endian
|
||||
0 string MM image/tiff
|
||||
# TIFF file, little-endian
|
||||
0 string II image/tiff
|
||||
|
||||
# possible GIF replacements; none yet released!
|
||||
# (Greg Roelofs, newt@uchicago.edu)
|
||||
#
|
||||
# GRR 950115: this was mine ("Zip GIF"):
|
||||
# ZIF image (GIF+deflate alpha)
|
||||
0 string GIF94z image/unknown
|
||||
#
|
||||
# GRR 950115: this is Jeremy Wohl's Free Graphics Format (better):
|
||||
# FGF image (GIF+deflate beta)
|
||||
0 string FGF95a image/unknown
|
||||
#
|
||||
# GRR 950115: this is Thomas Boutell's Portable Bitmap Format proposal
|
||||
# (best; not yet implemented):
|
||||
# PBF image (deflate compression)
|
||||
0 string PBF image/unknown
|
||||
|
||||
# GIF
|
||||
0 string GIF image/gif
|
||||
|
||||
# JPEG images
|
||||
0 beshort 0xffd8 image/jpeg
|
||||
0 string \377\330\377\340 image/jpeg
|
||||
0 string \377\330\377\341 image/jpeg
|
||||
0 string \377\330\377\356 image/jpeg
|
||||
|
||||
|
||||
# PC bitmaps (OS/2, Windoze BMP files) (Greg Roelofs, newt@uchicago.edu)
|
||||
0 string BM image/bmp
|
||||
#>14 byte 12 (OS/2 1.x format)
|
||||
#>14 byte 64 (OS/2 2.x format)
|
||||
#>14 byte 40 (Windows 3.x format)
|
||||
#0 string IC icon
|
||||
#0 string PI pointer
|
||||
#0 string CI color icon
|
||||
#0 string CP color pointer
|
||||
#0 string BA bitmap array
|
||||
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# lisp: file(1) magic for lisp programs
|
||||
#
|
||||
# various lisp types, from Daniel Quinlan (quinlan@yggdrasil.com)
|
||||
0 string ;; text/plain 8bit
|
||||
# Emacs 18 - this is always correct, but not very magical.
|
||||
0 string \012( application/x-elc
|
||||
# Emacs 19
|
||||
0 string ;ELC\023\000\000\000 application/x-elc
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# mail.news: file(1) magic for mail and news
|
||||
#
|
||||
# There are tests to ascmagic.c to cope with mail and news.
|
||||
0 string Relay-Version: message/rfc822 7bit
|
||||
0 string #!\ rnews message/rfc822 7bit
|
||||
0 string N#!\ rnews message/rfc822 7bit
|
||||
0 string Forward\ to message/rfc822 7bit
|
||||
0 string Pipe\ to message/rfc822 7bit
|
||||
0 string Return-Path: message/rfc822 7bit
|
||||
0 string Path: message/news 8bit
|
||||
0 string Xref: message/news 8bit
|
||||
0 string From: message/rfc822 7bit
|
||||
0 string Article message/news 8bit
|
||||
#------------------------------------------------------------------------------
|
||||
# msword: file(1) magic for MS Word files
|
||||
#
|
||||
# Contributor claims:
|
||||
# Reversed-engineered MS Word magic numbers
|
||||
# Some of these also occur in PowerPoint. -- lha@users.sourceforge.net
|
||||
|
||||
0 string \376\067\0\043 application/msword
|
||||
0 string \320\317\021\340\241\261 application/msword
|
||||
0 string \333\245-\0\0\0 application/msword
|
||||
|
||||
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# printer: file(1) magic for printer-formatted files
|
||||
#
|
||||
|
||||
# PostScript
|
||||
0 string %! application/postscript
|
||||
0 string \004%! application/postscript
|
||||
0 string \033%-12345X%!PS application/postscript
|
||||
|
||||
# Acrobat
|
||||
# (due to clamen@cs.cmu.edu)
|
||||
0 string %PDF- application/pdf
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# sc: file(1) magic for "sc" spreadsheet
|
||||
#
|
||||
38 string Spreadsheet application/x-sc
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# tex: file(1) magic for TeX files
|
||||
#
|
||||
# XXX - needs byte-endian stuff (big-endian and little-endian DVI?)
|
||||
#
|
||||
# From <conklin@talisman.kaleida.com>
|
||||
|
||||
# Although we may know the offset of certain text fields in TeX DVI
|
||||
# and font files, we can't use them reliably because they are not
|
||||
# zero terminated. [but we do anyway, christos]
|
||||
0 string \367\002 application/x-dvi
|
||||
#0 string \367\203 TeX generic font data
|
||||
#0 string \367\131 TeX packed font data
|
||||
#0 string \367\312 TeX virtual font data
|
||||
#0 string This\ is\ TeX, TeX transcript text
|
||||
#0 string This\ is\ METAFONT, METAFONT transcript text
|
||||
|
||||
# Texinfo and GNU Info, from Daniel Quinlan (quinlan@yggdrasil.com)
|
||||
#0 string \\input\ texinfo Texinfo source text
|
||||
#0 string This\ is\ Info\ file GNU Info text
|
||||
|
||||
# correct TeX magic for Linux (and maybe more)
|
||||
# from Peter Tobias (tobias@server.et-inf.fho-emden.de)
|
||||
#
|
||||
0 leshort 0x02f7 application/x-dvi
|
||||
|
||||
# RTF - Rich Text Format
|
||||
0 string {\\rtf text/rtf
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# animation: file(1) magic for animation/movie formats
|
||||
#
|
||||
# animation formats, originally from vax@ccwf.cc.utexas.edu (VaX#n8)
|
||||
# MPEG file
|
||||
0 belong 0x000001b3 video/mpeg
|
||||
0 belong 0x000001ba video/mpeg
|
||||
# FLI animation format
|
||||
0 leshort 0xAF11 video/fli
|
||||
# FLC animation format
|
||||
0 leshort 0xAF12 video/flc
|
||||
# AVI
|
||||
>8 string AVI\ video/avi
|
||||
#
|
||||
# SGI and Apple formats
|
||||
#
|
||||
0 string MOVI video/sgi
|
||||
4 string moov video/quicktime moov
|
||||
4 string mdat video/quicktime mdat
|
||||
# The contributor claims:
|
||||
# I couldn't find a real magic number for these, however, this
|
||||
# -appears- to work. Note that it might catch other files, too,
|
||||
# so BE CAREFUL!
|
||||
#
|
||||
# Note that title and author appear in the two 20-byte chunks
|
||||
# at decimal offsets 2 and 22, respectively, but they are XOR'ed with
|
||||
# 255 (hex FF)! DL format SUCKS BIG ROCKS.
|
||||
#
|
||||
# DL file version 1 , medium format (160x100, 4 images/screen)
|
||||
0 byte 1 video/unknown
|
||||
0 byte 2 video/unknown
|
||||
#
|
||||
# Databases
|
||||
#
|
||||
# GDBM magic numbers
|
||||
# Will be maintained as part of the GDBM distribution in the future.
|
||||
# <downsj@teeny.org>
|
||||
0 belong 0x13579ace application/x-gdbm
|
||||
0 lelong 0x13579ace application/x-gdbm
|
||||
0 string GDBM application/x-gdbm
|
||||
#
|
||||
0 belong 0x061561 application/x-dbm
|
||||
#
|
||||
# Executables
|
||||
#
|
||||
0 string \177ELF
|
||||
>4 byte 0
|
||||
>4 byte 1
|
||||
>4 byte 2
|
||||
>5 byte 0
|
||||
>5 byte 1
|
||||
>>16 leshort 0
|
||||
>>16 leshort 1 application/x-object
|
||||
>>16 leshort 2 application/x-executable
|
||||
>>16 leshort 3 application/x-sharedlib
|
||||
>>16 leshort 4 application/x-coredump
|
||||
#
|
||||
# DOS
|
||||
0 string MZ application/x-dosexec
|
||||
#
|
||||
# KDE
|
||||
0 string [KDE\ Desktop\ Entry] application/x-kdelnk
|
||||
0 string \#\ KDE\ Config\ File application/x-kdelnk
|
||||
# xmcd database file for kscd
|
||||
0 string \#\ xmcd text/xmcd
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# pkgadd: file(1) magic for SysV R4 PKG Datastreams
|
||||
#
|
||||
0 string #\ PaCkAgE\ DaTaStReAm application/x-svr4-package
|
||||
|
||||
#PNG Image Format
|
||||
0 string \x89PNG image/png
|
||||
0 string \0x89PNG image/png
|
||||
38
htdig/cookies.txt
Normal file
38
htdig/cookies.txt
Normal file
@@ -0,0 +1,38 @@
|
||||
#
|
||||
# Example of input file for cookies for ht://Dig and ht://Check
|
||||
#
|
||||
# Copyright (c) 1999-2004 Comune di Prato - Prato - Italy
|
||||
# Copyright (c) 1995-2004 The ht://Dig Group <www.htdig.org>
|
||||
# Author: Gabriele Bartolini - Prato - Italy <angusgb@users.sourceforge.net>
|
||||
#
|
||||
# For copyright details, see the file COPYING in your distribution
|
||||
# or the GNU General Public License version 2 or later
|
||||
# <http://www.gnu.org/copyleft/gpl.html>
|
||||
#
|
||||
# This file must be located through the 'cookies_input_file' directive, and
|
||||
# its purpose is to pre-load cookies into ht://Check and ht://Dig and to be used for a
|
||||
# crawl. Each line contains one name-value pair. Lines beginning with '#'
|
||||
# or empty ones are ignored.
|
||||
#
|
||||
# Info have been taken from: http://www.cookiecentral.com/faq/#3.5
|
||||
#
|
||||
# Each line represents a single piece of stored information.
|
||||
# A tab is inserted between each of the fields. From left-to-right,
|
||||
# here is what each field represents:
|
||||
#
|
||||
# domain The domain that created AND that can read the variable.
|
||||
# flag A TRUE/FALSE value indicating if all machines within a given
|
||||
# domain can access the variable. This value is IGNORED.
|
||||
# path The path within the domain that the variable is valid for.
|
||||
# secure A TRUE/FALSE value indicating if a secure connection with the
|
||||
# domain is needed to access the variable. IGNORED.
|
||||
# expiration The UNIX time that the variable will expire on. UNIX time is
|
||||
# defined as the number of seconds since epoc (Jan 1, 1970 00:00:00 GMT).
|
||||
# If you want to issue a session cookie, just set this field
|
||||
# value to '0'.
|
||||
# name The name of the variable.
|
||||
# value The value of the variable.
|
||||
#
|
||||
# For instance, a cookies.txt file may have an entry that looks like this:
|
||||
#
|
||||
# .netscape.com TRUE / FALSE 946684799 NETSCAPE_ID 100103
|
||||
190
htdig/htdig.conf
Normal file
190
htdig/htdig.conf
Normal file
@@ -0,0 +1,190 @@
|
||||
#
|
||||
# Example config file for ht://Dig.
|
||||
#
|
||||
# This configuration file is used by all the programs that make up ht://Dig.
|
||||
# Please refer to the attribute reference manual for more details on what
|
||||
# can be put into this file. (http://www.htdig.org/confindex.html)
|
||||
# Note that most attributes have very reasonable default values so you
|
||||
# really only have to add attributes here if you want to change the defaults.
|
||||
#
|
||||
# What follows are some of the common attributes you might want to change.
|
||||
#
|
||||
|
||||
#
|
||||
# Specify where the database files need to go. Make sure that there is
|
||||
# plenty of free disk space available for the databases. They can get
|
||||
# pretty big.
|
||||
#
|
||||
database_dir: /var/lib/htdig/db
|
||||
|
||||
#
|
||||
# This specifies the URL where the robot (htdig) will start. You can specify
|
||||
# multiple URLs here. Just separate them by some whitespace.
|
||||
# The example here will cause the ht://Dig homepage and related pages to be
|
||||
# indexed.
|
||||
# You could also index all the URLs in a file like so:
|
||||
# start_url: `${common_dir}/start.url`
|
||||
#
|
||||
start_url: http://www.htdig.org/
|
||||
|
||||
#
|
||||
# This attribute limits the scope of the indexing process. The default is to
|
||||
# set it to the same as the start_url above. This way only pages that are on
|
||||
# the sites specified in the start_url attribute will be indexed and it will
|
||||
# reject any URLs that go outside of those sites.
|
||||
#
|
||||
# Keep in mind that the value for this attribute is just a list of string
|
||||
# patterns. As long as URLs contain at least one of the patterns it will be
|
||||
# seen as part of the scope of the index.
|
||||
#
|
||||
limit_urls_to: ${start_url}
|
||||
|
||||
#
|
||||
# This attribute is used for compressing the database. The default is to
|
||||
# set it to the same as the limit_urls_to above, plus some common endings.
|
||||
#
|
||||
# Keep in mind that this list should be short. If your start_url is a very
|
||||
# long list of URLs, it may be wise to replace it with something like
|
||||
# http://www. or comment this out and use the compiled-in default.
|
||||
#
|
||||
common_url_parts: ${limit_urls_to} .html .htm .shtml
|
||||
|
||||
#
|
||||
# If there are particular pages that you definitely do NOT want to index, you
|
||||
# can use the exclude_urls attribute. The value is a list of string patterns.
|
||||
# If a URL matches any of the patterns, it will NOT be indexed. This is
|
||||
# useful to exclude things like virtual web trees or database accesses. By
|
||||
# default, all CGI URLs will be excluded. (Note that the /cgi-bin/ convention
|
||||
# may not work on your web server. Check the path prefix used on your web
|
||||
# server.)
|
||||
#
|
||||
exclude_urls: /cgi-bin/ .cgi
|
||||
|
||||
#
|
||||
# Since ht://Dig does not (and cannot) parse every document type, this
|
||||
# attribute is a list of strings (extensions) that will be ignored during
|
||||
# indexing. These are *only* checked at the end of a URL, whereas
|
||||
# exclude_url patterns are matched anywhere.
|
||||
#
|
||||
# Also keep in mind that while other attributes allow regex, these must be
|
||||
# actual strings.
|
||||
#
|
||||
bad_extensions: .wav .gz .z .sit .au .zip .tar .hqx .exe .com .gif \
|
||||
.jpg .jpeg .aiff .class .map .ram .tgz .bin .rpm .mpg .mov .avi .css
|
||||
|
||||
#
|
||||
# The string htdig will send in every request to identify the robot. Change
|
||||
# this to your email address.
|
||||
#
|
||||
maintainer: unconfigured@htdig.searchengine.maintainer
|
||||
|
||||
#
|
||||
# The excerpts that are displayed in long results rely on stored information
|
||||
# in the index databases. The compiled default only stores 512 characters of
|
||||
# text from each document (this excludes any HTML markup...) If you plan on
|
||||
# using the excerpts you probably want to make this larger. The only concern
|
||||
# here is that more disk space is going to be needed to store the additional
|
||||
# information. Since disk space is cheap (! :-)) you might want to set this
|
||||
# to a value so that a large percentage of the documents that you are going
|
||||
# to be indexing are stored completely in the database. At SDSU we found
|
||||
# that by setting this value to about 50k the index would get 97% of all
|
||||
# documents completely and only 3% was cut off at 50k. You probably want to
|
||||
# experiment with this value.
|
||||
# Note that if you want to set this value low, you probably want to set the
|
||||
# excerpt_show_top attribute to false so that the top excerpt_length characters
|
||||
# of the document are always shown.
|
||||
#
|
||||
max_head_length: 10000
|
||||
|
||||
#
|
||||
# To limit network connections, ht://Dig will only pull up to a certain limit
|
||||
# of bytes. This prevents the indexing from dying because the server keeps
|
||||
# sending information. However, several FAQs happen because people have files
|
||||
# bigger than the default limit of 100KB. This sets the default a bit higher.
|
||||
# (see <http://www.htdig.org/FAQ.html> for more)
|
||||
#
|
||||
max_doc_size: 200000
|
||||
|
||||
#
|
||||
# Most people expect some sort of excerpt in results. By default, if the
|
||||
# search words aren't found in context in the stored excerpt, htsearch shows
|
||||
# the text defined in the no_excerpt_text attribute:
|
||||
# (None of the search words were found in the top of this document.)
|
||||
# This attribute instead will show the top of the excerpt.
|
||||
#
|
||||
no_excerpt_show_top: true
|
||||
|
||||
#
|
||||
# Depending on your needs, you might want to enable some of the fuzzy search
|
||||
# algorithms. There are several to choose from and you can use them in any
|
||||
# combination you feel comfortable with. Each algorithm will get a weight
|
||||
# assigned to it so that in combinations of algorithms, certain algorithms get
|
||||
# preference over others. Note that the weights only affect the ranking of
|
||||
# the results, not the actual searching.
|
||||
# The available algorithms are:
|
||||
# accents
|
||||
# exact
|
||||
# endings
|
||||
# metaphone
|
||||
# prefix
|
||||
# regex
|
||||
# soundex
|
||||
# speling [sic]
|
||||
# substring
|
||||
# synonyms
|
||||
# By default only the "exact" algorithm is used with weight 1.
|
||||
# Note that if you are going to use the endings, metaphone, soundex, accents,
|
||||
# or synonyms algorithms, you will need to run htfuzzy to generate
|
||||
# the databases they use.
|
||||
#
|
||||
search_algorithm: exact:1 synonyms:0.5 endings:0.1
|
||||
|
||||
#
|
||||
# The following are the templates used in the builtin search results
|
||||
# The default is to use compiled versions of these files, which produces
|
||||
# slightly faster results. However, uncommenting these lines makes it
|
||||
# very easy to change the format of search results.
|
||||
# See <http://www.htdig.org/hts_templates.html> for more details.
|
||||
#
|
||||
# template_map: Long long ${common_dir}/long.html \
|
||||
# Short short ${common_dir}/short.html
|
||||
# template_name: long
|
||||
|
||||
#
|
||||
# The following are used to change the text for the page index.
|
||||
# The defaults are just boring text numbers. These images spice
|
||||
# up the result pages quite a bit. (Feel free to do whatever, though)
|
||||
#
|
||||
next_page_text: <img src="/htdig/buttonr.gif" border="0" align="middle" width="30" height="30" alt="next">
|
||||
no_next_page_text:
|
||||
prev_page_text: <img src="/htdig/buttonl.gif" border="0" align="middle" width="30" height="30" alt="prev">
|
||||
no_prev_page_text:
|
||||
page_number_text: '<img src="/htdig/button1.gif" border="0" align="middle" width="30" height="30" alt="1">' \
|
||||
'<img src="/htdig/button2.gif" border="0" align="middle" width="30" height="30" alt="2">' \
|
||||
'<img src="/htdig/button3.gif" border="0" align="middle" width="30" height="30" alt="3">' \
|
||||
'<img src="/htdig/button4.gif" border="0" align="middle" width="30" height="30" alt="4">' \
|
||||
'<img src="/htdig/button5.gif" border="0" align="middle" width="30" height="30" alt="5">' \
|
||||
'<img src="/htdig/button6.gif" border="0" align="middle" width="30" height="30" alt="6">' \
|
||||
'<img src="/htdig/button7.gif" border="0" align="middle" width="30" height="30" alt="7">' \
|
||||
'<img src="/htdig/button8.gif" border="0" align="middle" width="30" height="30" alt="8">' \
|
||||
'<img src="/htdig/button9.gif" border="0" align="middle" width="30" height="30" alt="9">' \
|
||||
'<img src="/htdig/button10.gif" border="0" align="middle" width="30" height="30" alt="10">'
|
||||
#
|
||||
# To make the current page stand out, we will put a border around the
|
||||
# image for that page.
|
||||
#
|
||||
no_page_number_text: '<img src="/htdig/button1.gif" border="2" align="middle" width="30" height="30" alt="1">' \
|
||||
'<img src="/htdig/button2.gif" border="2" align="middle" width="30" height="30" alt="2">' \
|
||||
'<img src="/htdig/button3.gif" border="2" align="middle" width="30" height="30" alt="3">' \
|
||||
'<img src="/htdig/button4.gif" border="2" align="middle" width="30" height="30" alt="4">' \
|
||||
'<img src="/htdig/button5.gif" border="2" align="middle" width="30" height="30" alt="5">' \
|
||||
'<img src="/htdig/button6.gif" border="2" align="middle" width="30" height="30" alt="6">' \
|
||||
'<img src="/htdig/button7.gif" border="2" align="middle" width="30" height="30" alt="7">' \
|
||||
'<img src="/htdig/button8.gif" border="2" align="middle" width="30" height="30" alt="8">' \
|
||||
'<img src="/htdig/button9.gif" border="2" align="middle" width="30" height="30" alt="9">' \
|
||||
'<img src="/htdig/button10.gif" border="2" align="middle" width="30" height="30" alt="10">'
|
||||
|
||||
# local variables:
|
||||
# mode: text
|
||||
# eval: (if (eq window-system 'x) (progn (setq font-lock-keywords (list '("^#.*" . font-lock-keyword-face) '("^[a-zA-Z][^ :]+" . font-lock-function-name-face) '("[+$]*:" . font-lock-comment-face) )) (font-lock-mode)))
|
||||
# end:
|
||||
279
htdig/mime.types
Normal file
279
htdig/mime.types
Normal file
@@ -0,0 +1,279 @@
|
||||
# This is the default mime.types file from the Apache web server distribution
|
||||
|
||||
# This file controls what Internet media types are sent to the client for
|
||||
# given file extension(s). Sending the correct media type to the client
|
||||
# is important so they know how to handle the content of the file.
|
||||
# Extra types can either be added here or by using an AddType directive
|
||||
# in your config files. For more information about Internet media types,
|
||||
# please read RFC 2045, 2046, 2047, 2048, and 2077. The Internet media type
|
||||
# registry is at <ftp://ftp.iana.org/in-notes/iana/assignments/media-types/>.
|
||||
|
||||
# MIME type Extension
|
||||
application/EDI-Consent
|
||||
application/EDI-X12
|
||||
application/EDIFACT
|
||||
application/activemessage
|
||||
application/andrew-inset ez
|
||||
application/applefile
|
||||
application/atomicmail
|
||||
application/cals-1840
|
||||
application/commonground
|
||||
application/cybercash
|
||||
application/dca-rft
|
||||
application/dec-dx
|
||||
application/eshop
|
||||
application/hyperstudio
|
||||
application/iges
|
||||
application/mac-binhex40 hqx
|
||||
application/mac-compactpro cpt
|
||||
application/macwriteii
|
||||
application/marc
|
||||
application/mathematica
|
||||
application/msword doc
|
||||
application/news-message-id
|
||||
application/news-transmission
|
||||
application/octet-stream bin dms lha lzh exe class
|
||||
application/oda oda
|
||||
application/pdf pdf
|
||||
application/pgp-encrypted
|
||||
application/pgp-keys
|
||||
application/pgp-signature
|
||||
application/pkcs10
|
||||
application/pkcs7-mime
|
||||
application/pkcs7-signature
|
||||
application/postscript ai eps ps
|
||||
application/prs.alvestrand.titrax-sheet
|
||||
application/prs.cww
|
||||
application/prs.nprend
|
||||
application/remote-printing
|
||||
application/riscos
|
||||
application/rtf rtf
|
||||
application/set-payment
|
||||
application/set-payment-initiation
|
||||
application/set-registration
|
||||
application/set-registration-initiation
|
||||
application/sgml
|
||||
application/sgml-open-catalog
|
||||
application/slate
|
||||
application/smil smi smil
|
||||
application/vemmi
|
||||
application/vnd.3M.Post-it-Notes
|
||||
application/vnd.FloGraphIt
|
||||
application/vnd.acucobol
|
||||
application/vnd.anser-web-certificate-issue-initiation
|
||||
application/vnd.anser-web-funds-transfer-initiation
|
||||
application/vnd.audiograph
|
||||
application/vnd.businessobjects
|
||||
application/vnd.claymore
|
||||
application/vnd.comsocaller
|
||||
application/vnd.dna
|
||||
application/vnd.dxr
|
||||
application/vnd.ecdis-update
|
||||
application/vnd.ecowin.chart
|
||||
application/vnd.ecowin.filerequest
|
||||
application/vnd.ecowin.fileupdate
|
||||
application/vnd.ecowin.series
|
||||
application/vnd.ecowin.seriesrequest
|
||||
application/vnd.ecowin.seriesupdate
|
||||
application/vnd.enliven
|
||||
application/vnd.epson.salt
|
||||
application/vnd.fdf
|
||||
application/vnd.ffsns
|
||||
application/vnd.framemaker
|
||||
application/vnd.fujitsu.oasys
|
||||
application/vnd.fujitsu.oasys2
|
||||
application/vnd.fujitsu.oasys3
|
||||
application/vnd.fujitsu.oasysgp
|
||||
application/vnd.fujitsu.oasysprs
|
||||
application/vnd.fujixerox.docuworks
|
||||
application/vnd.hp-HPGL
|
||||
application/vnd.hp-PCL
|
||||
application/vnd.hp-PCLXL
|
||||
application/vnd.hp-hps
|
||||
application/vnd.ibm.MiniPay
|
||||
application/vnd.ibm.modcap
|
||||
application/vnd.intercon.formnet
|
||||
application/vnd.intertrust.digibox
|
||||
application/vnd.intertrust.nncp
|
||||
application/vnd.is-xpr
|
||||
application/vnd.japannet-directory-service
|
||||
application/vnd.japannet-jpnstore-wakeup
|
||||
application/vnd.japannet-payment-wakeup
|
||||
application/vnd.japannet-registration
|
||||
application/vnd.japannet-registration-wakeup
|
||||
application/vnd.japannet-setstore-wakeup
|
||||
application/vnd.japannet-verification
|
||||
application/vnd.japannet-verification-wakeup
|
||||
application/vnd.koan
|
||||
application/vnd.lotus-1-2-3
|
||||
application/vnd.lotus-approach
|
||||
application/vnd.lotus-freelance
|
||||
application/vnd.lotus-organizer
|
||||
application/vnd.lotus-screencam
|
||||
application/vnd.lotus-wordpro
|
||||
application/vnd.meridian-slingshot
|
||||
application/vnd.mif mif
|
||||
application/vnd.minisoft-hp3000-save
|
||||
application/vnd.mitsubishi.misty-guard.trustweb
|
||||
application/vnd.ms-artgalry
|
||||
application/vnd.ms-asf
|
||||
application/vnd.ms-excel
|
||||
application/vnd.ms-powerpoint ppt
|
||||
application/vnd.ms-project
|
||||
application/vnd.ms-tnef
|
||||
application/vnd.ms-works
|
||||
application/vnd.music-niff
|
||||
application/vnd.musician
|
||||
application/vnd.netfpx
|
||||
application/vnd.noblenet-directory
|
||||
application/vnd.noblenet-sealer
|
||||
application/vnd.noblenet-web
|
||||
application/vnd.novadigm.EDM
|
||||
application/vnd.novadigm.EDX
|
||||
application/vnd.novadigm.EXT
|
||||
application/vnd.osa.netdeploy
|
||||
application/vnd.powerbuilder6
|
||||
application/vnd.powerbuilder6-s
|
||||
application/vnd.rapid
|
||||
application/vnd.seemail
|
||||
application/vnd.shana.informed.formtemplate
|
||||
application/vnd.shana.informed.interchange
|
||||
application/vnd.shana.informed.package
|
||||
application/vnd.street-stream
|
||||
application/vnd.sun.xml.calc sxc
|
||||
application/vnd.sun.xml.draw sxd
|
||||
application/vnd.sun.xml.impress sxi
|
||||
application/vnd.sun.xml.writer sxw
|
||||
application/vnd.svd
|
||||
application/vnd.swiftview-ics
|
||||
application/vnd.truedoc
|
||||
application/vnd.visio
|
||||
application/vnd.webturbo
|
||||
application/vnd.wrq-hp3000-labelled
|
||||
application/vnd.wt.stf
|
||||
application/vnd.xara
|
||||
application/vnd.yellowriver-custom-menu
|
||||
application/wita
|
||||
application/wordperfect5.1
|
||||
application/x-bcpio bcpio
|
||||
application/x-cdlink vcd
|
||||
application/x-chess-pgn pgn
|
||||
application/x-compress
|
||||
application/x-cpio cpio
|
||||
application/x-csh csh
|
||||
application/x-director dcr dir dxr
|
||||
application/x-dvi dvi
|
||||
application/x-futuresplash spl
|
||||
application/x-gtar gtar
|
||||
application/x-gzip
|
||||
application/x-hdf hdf
|
||||
application/x-javascript js
|
||||
application/x-koan skp skd skt skm
|
||||
application/x-latex latex
|
||||
application/x-netcdf nc cdf
|
||||
application/x-sh sh
|
||||
application/x-shar shar
|
||||
application/x-shockwave-flash swf
|
||||
application/x-stuffit sit
|
||||
application/x-sv4cpio sv4cpio
|
||||
application/x-sv4crc sv4crc
|
||||
application/x-tar tar
|
||||
application/x-tcl tcl
|
||||
application/x-tex tex
|
||||
application/x-texinfo texinfo texi
|
||||
application/x-troff t tr roff
|
||||
application/x-troff-man man
|
||||
application/x-troff-me me
|
||||
application/x-troff-ms ms
|
||||
application/x-ustar ustar
|
||||
application/x-wais-source src
|
||||
application/x400-bp
|
||||
application/xml
|
||||
application/zip zip
|
||||
audio/32kadpcm
|
||||
audio/basic au snd
|
||||
audio/midi mid midi kar
|
||||
audio/mpeg mpga mp2 mp3
|
||||
audio/vnd.qcelp
|
||||
audio/x-aiff aif aiff aifc
|
||||
audio/x-pn-realaudio ram rm
|
||||
audio/x-pn-realaudio-plugin rpm
|
||||
audio/x-realaudio ra
|
||||
audio/x-wav wav
|
||||
chemical/x-pdb pdb xyz
|
||||
image/cgm
|
||||
image/g3fax
|
||||
image/gif gif
|
||||
image/ief ief
|
||||
image/jpeg jpeg jpg jpe
|
||||
image/naplps
|
||||
image/png png
|
||||
image/prs.btif
|
||||
image/tiff tiff tif
|
||||
image/vnd.dwg
|
||||
image/vnd.dxf
|
||||
image/vnd.fpx
|
||||
image/vnd.net-fpx
|
||||
image/vnd.svf
|
||||
image/vnd.xiff
|
||||
image/x-cmu-raster ras
|
||||
image/x-portable-anymap pnm
|
||||
image/x-portable-bitmap pbm
|
||||
image/x-portable-graymap pgm
|
||||
image/x-portable-pixmap ppm
|
||||
image/x-rgb rgb
|
||||
image/x-xbitmap xbm
|
||||
image/x-xpixmap xpm
|
||||
image/x-xwindowdump xwd
|
||||
message/delivery-status
|
||||
message/disposition-notification
|
||||
message/external-body
|
||||
message/http
|
||||
message/news
|
||||
message/partial
|
||||
message/rfc822
|
||||
model/iges igs iges
|
||||
model/mesh msh mesh silo
|
||||
model/vnd.dwf
|
||||
model/vrml wrl vrml
|
||||
multipart/alternative
|
||||
multipart/appledouble
|
||||
multipart/byteranges
|
||||
multipart/digest
|
||||
multipart/encrypted
|
||||
multipart/form-data
|
||||
multipart/header-set
|
||||
multipart/mixed
|
||||
multipart/parallel
|
||||
multipart/related
|
||||
multipart/report
|
||||
multipart/signed
|
||||
multipart/voice-message
|
||||
text/css css
|
||||
text/directory
|
||||
text/enriched
|
||||
text/plain asc txt
|
||||
text/prs.lines.tag
|
||||
text/rfc822-headers
|
||||
text/richtext rtx
|
||||
text/rtf rtf
|
||||
text/sgml sgml sgm
|
||||
text/tab-separated-values tsv
|
||||
text/uri-list
|
||||
text/vnd.abc
|
||||
text/vnd.flatland.3dml
|
||||
text/vnd.fmi.flexstor
|
||||
text/vnd.in3d.3dml
|
||||
text/vnd.in3d.spot
|
||||
text/vnd.latex-z
|
||||
text/x-setext etx
|
||||
text/xml xml
|
||||
video/mpeg mpeg mpg mpe
|
||||
video/quicktime qt mov
|
||||
video/vnd.motorola.video
|
||||
video/vnd.motorola.videop
|
||||
video/vnd.vivo
|
||||
video/x-msvideo avi
|
||||
video/x-sgi-movie movie
|
||||
x-conference/x-cooltalk ice
|
||||
text/html html htm
|
||||
Reference in New Issue
Block a user