Here is an (almost) complete file extensions’s MIME in a JSON format.
You can do these examples: MIME["ppt"]
, MIME["docx"]
, etc
{
"x3d": "application/vnd.hzn-3d-crossword",
"3gp": "video/3gpp",
"3g2": "video/3gpp2",
"mseq": "application/vnd.mseq",
"pwn": "application/vnd.3m.post-it-notes",
"plb": "application/vnd.3gpp.pic-bw-large",
"psb": "application/vnd.3gpp.pic-bw-small",
"pvb": "application/vnd.3gpp.pic-bw-var",
"tcap": "application/vnd.3gpp2.tcap",
"7z": "application/x-7z-compressed",
"abw": "application/x-abiword",
"ace": "application/x-ace-compressed",
"acc": "application/vnd.americandynamics.acc",
"acu": "application/vnd.acucobol",
"atc": "application/vnd.acucorp",
"adp": "audio/adpcm",
"aab": "application/x-authorware-bin",
"aam": "application/x-authorware-map",
"aas": "application/x-authorware-seg",
"air": "application/vnd.adobe.air-application-installer-package+zip",
"swf": "application/x-shockwave-flash",
"fxp": "application/vnd.adobe.fxp",
"pdf": "application/pdf",
"ppd": "application/vnd.cups-ppd",
"dir": "application/x-director",
"xdp": "application/vnd.adobe.xdp+xml",
"xfdf": "application/vnd.adobe.xfdf",
"aac": "audio/x-aac",
"ahead": "application/vnd.ahead.space",
"azf": "application/vnd.airzip.filesecure.azf",
"azs": "application/vnd.airzip.filesecure.azs",
"azw": "application/vnd.amazon.ebook",
"ami": "application/vnd.amiga.ami",
"N/A": "application/andrew-inset",
"apk": "application/vnd.android.package-archive",
"cii": "application/vnd.anser-web-certificate-issue-initiation",
"fti": "application/vnd.anser-web-funds-transfer-initiation",
"atx": "application/vnd.antix.game-component",
"dmg": "application/x-apple-diskimage",
"mpkg": "application/vnd.apple.installer+xml",
"aw": "application/applixware",
"mp3": "audio/mpeg",
"les": "application/vnd.hhe.lesson-player",
"swi": "application/vnd.aristanetworks.swi",
"s": "text/x-asm",
"atomcat": "application/atomcat+xml",
"atomsvc": "application/atomsvc+xml",
"atom": "application/atom+xml",
"ac": "application/pkix-attr-cert",
"aif": "audio/x-aiff",
"avi": "video/x-msvideo",
"aep": "application/vnd.audiograph",
"dxf": "image/vnd.dxf",
"dwf": "model/vnd.dwf",
"par": "text/plain-bas",
"bcpio": "application/x-bcpio",
"bin": "application/octet-stream",
"bmp": "image/bmp",
"torrent": "application/x-bittorrent",
"cod": "application/vnd.rim.cod",
"mpm": "application/vnd.blueice.multipass",
"bmi": "application/vnd.bmi",
"sh": "application/x-sh",
"btif": "image/prs.btif",
"rep": "application/vnd.businessobjects",
"bz": "application/x-bzip",
"bz2": "application/x-bzip2",
"csh": "application/x-csh",
"c": "text/x-c",
"cdxml": "application/vnd.chemdraw+xml",
"css": "text/css",
"cdx": "chemical/x-cdx",
"cml": "chemical/x-cml",
"csml": "chemical/x-csml",
"cdbcmsg": "application/vnd.contact.cmsg",
"cla": "application/vnd.claymore",
"c4g": "application/vnd.clonk.c4group",
"sub": "image/vnd.dvb.subtitle",
"cdmia": "application/cdmi-capability",
"cdmic": "application/cdmi-container",
"cdmid": "application/cdmi-domain",
"cdmio": "application/cdmi-object",
"cdmiq": "application/cdmi-queue",
"c11amc": "application/vnd.cluetrust.cartomobile-config",
"c11amz": "application/vnd.cluetrust.cartomobile-config-pkg",
"ras": "image/x-cmu-raster",
"dae": "model/vnd.collada+xml",
"csv": "text/csv",
"cpt": "application/mac-compactpro",
"wmlc": "application/vnd.wap.wmlc",
"cgm": "image/cgm",
"ice": "x-conference/x-cooltalk",
"cmx": "image/x-cmx",
"xar": "application/vnd.xara",
"cmc": "application/vnd.cosmocaller",
"cpio": "application/x-cpio",
"clkx": "application/vnd.crick.clicker",
"clkk": "application/vnd.crick.clicker.keyboard",
"clkp": "application/vnd.crick.clicker.palette",
"clkt": "application/vnd.crick.clicker.template",
"clkw": "application/vnd.crick.clicker.wordbank",
"wbs": "application/vnd.criticaltools.wbs+xml",
"cryptonote": "application/vnd.rig.cryptonote",
"cif": "chemical/x-cif",
"cmdf": "chemical/x-cmdf",
"cu": "application/cu-seeme",
"cww": "application/prs.cww",
"curl": "text/vnd.curl",
"dcurl": "text/vnd.curl.dcurl",
"mcurl": "text/vnd.curl.mcurl",
"scurl": "text/vnd.curl.scurl",
"car": "application/vnd.curl.car",
"pcurl": "application/vnd.curl.pcurl",
"cmp": "application/vnd.yellowriver-custom-menu",
"dssc": "application/dssc+der",
"xdssc": "application/dssc+xml",
"deb": "application/x-debian-package",
"uva": "audio/vnd.dece.audio",
"uvi": "image/vnd.dece.graphic",
"uvh": "video/vnd.dece.hd",
"uvm": "video/vnd.dece.mobile",
"uvu": "video/vnd.uvvu.mp4",
"uvp": "video/vnd.dece.pd",
"uvs": "video/vnd.dece.sd",
"uvv": "video/vnd.dece.video",
"dvi": "application/x-dvi",
"seed": "application/vnd.fdsn.seed",
"dtb": "application/x-dtbook+xml",
"res": "application/x-dtbresource+xml",
"ait": "application/vnd.dvb.ait",
"svc": "application/vnd.dvb.service",
"eol": "audio/vnd.digital-winds",
"djvu": "image/vnd.djvu",
"dtd": "application/xml-dtd",
"mlp": "application/vnd.dolby.mlp",
"wad": "application/x-doom",
"dpg": "application/vnd.dpgraph",
"dra": "audio/vnd.dra",
"dfac": "application/vnd.dreamfactory",
"dts": "audio/vnd.dts",
"dtshd": "audio/vnd.dts.hd",
"dwg": "image/vnd.dwg",
"geo": "application/vnd.dynageo",
"es": "application/ecmascript",
"mag": "application/vnd.ecowin.chart",
"mmr": "image/vnd.fujixerox.edmics-mmr",
"rlc": "image/vnd.fujixerox.edmics-rlc",
"exi": "application/exi",
"mgz": "application/vnd.proteus.magazine",
"epub": "application/epub+zip",
"eml": "message/rfc822",
"nml": "application/vnd.enliven",
"xpr": "application/vnd.is-xpr",
"xif": "image/vnd.xiff",
"xfdl": "application/vnd.xfdl",
"emma": "application/emma+xml",
"ez2": "application/vnd.ezpix-album",
"ez3": "application/vnd.ezpix-package",
"fst": "image/vnd.fst",
"fvt": "video/vnd.fvt",
"fbs": "image/vnd.fastbidsheet",
"fe_launch": "application/vnd.denovo.fcselayout-link",
"f4v": "video/x-f4v",
"flv": "video/x-flv",
"fpx": "image/vnd.fpx",
"npx": "image/vnd.net-fpx",
"flx": "text/vnd.fmi.flexstor",
"fli": "video/x-fli",
"ftc": "application/vnd.fluxtime.clip",
"fdf": "application/vnd.fdf",
"f": "text/x-fortran",
"mif": "application/vnd.mif",
"fm": "application/vnd.framemaker",
"fh": "image/x-freehand",
"fsc": "application/vnd.fsc.weblaunch",
"fnc": "application/vnd.frogans.fnc",
"ltf": "application/vnd.frogans.ltf",
"ddd": "application/vnd.fujixerox.ddd",
"xdw": "application/vnd.fujixerox.docuworks",
"xbd": "application/vnd.fujixerox.docuworks.binder",
"oas": "application/vnd.fujitsu.oasys",
"oa2": "application/vnd.fujitsu.oasys2",
"oa3": "application/vnd.fujitsu.oasys3",
"fg5": "application/vnd.fujitsu.oasysgp",
"bh2": "application/vnd.fujitsu.oasysprs",
"spl": "application/x-futuresplash",
"fzs": "application/vnd.fuzzysheet",
"g3": "image/g3fax",
"gmx": "application/vnd.gmx",
"gtw": "model/vnd.gtw",
"txd": "application/vnd.genomatix.tuxedo",
"ggb": "application/vnd.geogebra.file",
"ggt": "application/vnd.geogebra.tool",
"gdl": "model/vnd.gdl",
"gex": "application/vnd.geometry-explorer",
"gxt": "application/vnd.geonext",
"g2w": "application/vnd.geoplan",
"g3w": "application/vnd.geospace",
"gsf": "application/x-font-ghostscript",
"bdf": "application/x-font-bdf",
"gtar": "application/x-gtar",
"texinfo": "application/x-texinfo",
"gnumeric": "application/x-gnumeric",
"kml": "application/vnd.google-earth.kml+xml",
"kmz": "application/vnd.google-earth.kmz",
"gqf": "application/vnd.grafeq",
"gif": "image/gif",
"gv": "text/vnd.graphviz",
"gac": "application/vnd.groove-account",
"ghf": "application/vnd.groove-help",
"gim": "application/vnd.groove-identity-message",
"grv": "application/vnd.groove-injector",
"gtm": "application/vnd.groove-tool-message",
"tpl": "application/vnd.groove-tool-template",
"vcg": "application/vnd.groove-vcard",
"h261": "video/h261",
"h263": "video/h263",
"h264": "video/h264",
"hpid": "application/vnd.hp-hpid",
"hps": "application/vnd.hp-hps",
"hdf": "application/x-hdf",
"rip": "audio/vnd.rip",
"hbci": "application/vnd.hbci",
"jlt": "application/vnd.hp-jlyt",
"pcl": "application/vnd.hp-pcl",
"hpgl": "application/vnd.hp-hpgl",
"hvs": "application/vnd.yamaha.hv-script",
"hvd": "application/vnd.yamaha.hv-dic",
"hvp": "application/vnd.yamaha.hv-voice",
"sfd-hdstx": "application/vnd.hydrostatix.sof-data",
"stk": "application/hyperstudio",
"hal": "application/vnd.hal+xml",
"html": "text/html",
"irm": "application/vnd.ibm.rights-management",
"sc": "application/vnd.ibm.secure-container",
"ics": "text/calendar",
"icc": "application/vnd.iccprofile",
"ico": "image/x-icon",
"igl": "application/vnd.igloader",
"ief": "image/ief",
"ivp": "application/vnd.immervision-ivp",
"ivu": "application/vnd.immervision-ivu",
"rif": "application/reginfo+xml",
"3dml": "text/vnd.in3d.3dml",
"spot": "text/vnd.in3d.spot",
"igs": "model/iges",
"i2g": "application/vnd.intergeo",
"cdy": "application/vnd.cinderella",
"xpw": "application/vnd.intercon.formnet",
"fcs": "application/vnd.isac.fcs",
"ipfix": "application/ipfix",
"cer": "application/pkix-cert",
"pki": "application/pkixcmp",
"crl": "application/pkix-crl",
"pkipath": "application/pkix-pkipath",
"igm": "application/vnd.insors.igm",
"rcprofile": "application/vnd.ipunplugged.rcprofile",
"irp": "application/vnd.irepository.package+xml",
"jad": "text/vnd.sun.j2me.app-descriptor",
"jar": "application/java-archive",
"class": "application/java-vm",
"jnlp": "application/x-java-jnlp-file",
"ser": "application/java-serialized-object",
"java": "text/x-java-source,java",
"js": "application/javascript",
"json": "application/json",
"joda": "application/vnd.joost.joda-archive",
"jpm": "video/jpm",
"jpeg": "image/x-citrix-jpeg",
"jpg": "image/x-citrix-jpeg",
"pjpeg": "image/pjpeg",
"jpgv": "video/jpeg",
"ktz": "application/vnd.kahootz",
"mmd": "application/vnd.chipnuts.karaoke-mmd",
"karbon": "application/vnd.kde.karbon",
"chrt": "application/vnd.kde.kchart",
"kfo": "application/vnd.kde.kformula",
"flw": "application/vnd.kde.kivio",
"kon": "application/vnd.kde.kontour",
"kpr": "application/vnd.kde.kpresenter",
"ksp": "application/vnd.kde.kspread",
"kwd": "application/vnd.kde.kword",
"htke": "application/vnd.kenameaapp",
"kia": "application/vnd.kidspiration",
"kne": "application/vnd.kinar",
"sse": "application/vnd.kodak-descriptor",
"lasxml": "application/vnd.las.las+xml",
"latex": "application/x-latex",
"lbd": "application/vnd.llamagraphics.life-balance.desktop",
"lbe": "application/vnd.llamagraphics.life-balance.exchange+xml",
"jam": "application/vnd.jam",
"123": "application/vnd.lotus-1-2-3",
"apr": "application/vnd.lotus-approach",
"pre": "application/vnd.lotus-freelance",
"nsf": "application/vnd.lotus-notes",
"org": "application/vnd.lotus-organizer",
"scm": "application/vnd.lotus-screencam",
"lwp": "application/vnd.lotus-wordpro",
"lvp": "audio/vnd.lucent.voice",
"m3u": "audio/x-mpegurl",
"m4v": "video/x-m4v",
"hqx": "application/mac-binhex40",
"portpkg": "application/vnd.macports.portpkg",
"mgp": "application/vnd.osgeo.mapguide.package",
"mrc": "application/marc",
"mrcx": "application/marcxml+xml",
"mxf": "application/mxf",
"nbp": "application/vnd.wolfram.player",
"ma": "application/mathematica",
"mathml": "application/mathml+xml",
"mbox": "application/mbox",
"mc1": "application/vnd.medcalcdata",
"mscml": "application/mediaservercontrol+xml",
"cdkey": "application/vnd.mediastation.cdkey",
"mwf": "application/vnd.mfer",
"mfm": "application/vnd.mfmp",
"msh": "model/mesh",
"mads": "application/mads+xml",
"mets": "application/mets+xml",
"mods": "application/mods+xml",
"meta4": "application/metalink4+xml",
"mcd": "application/vnd.mcd",
"flo": "application/vnd.micrografx.flo",
"igx": "application/vnd.micrografx.igx",
"es3": "application/vnd.eszigno3+xml",
"mdb": "application/x-msaccess",
"asf": "video/x-ms-asf",
"exe": "application/x-msdownload",
"cil": "application/vnd.ms-artgalry",
"cab": "application/vnd.ms-cab-compressed",
"ims": "application/vnd.ms-ims",
"application": "application/x-ms-application",
"clp": "application/x-msclip",
"mdi": "image/vnd.ms-modi",
"eot": "application/vnd.ms-fontobject",
"xls": "application/vnd.ms-excel",
"xlam": "application/vnd.ms-excel.addin.macroenabled.12",
"xlsb": "application/vnd.ms-excel.sheet.binary.macroenabled.12",
"xltm": "application/vnd.ms-excel.template.macroenabled.12",
"xlsm": "application/vnd.ms-excel.sheet.macroenabled.12",
"chm": "application/vnd.ms-htmlhelp",
"crd": "application/x-mscardfile",
"lrm": "application/vnd.ms-lrm",
"mvb": "application/x-msmediaview",
"mny": "application/x-msmoney",
"pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
"sldx": "application/vnd.openxmlformats-officedocument.presentationml.slide",
"ppsx": "application/vnd.openxmlformats-officedocument.presentationml.slideshow",
"potx": "application/vnd.openxmlformats-officedocument.presentationml.template",
"xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"xltx": "application/vnd.openxmlformats-officedocument.spreadsheetml.template",
"docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"dotx": "application/vnd.openxmlformats-officedocument.wordprocessingml.template",
"obd": "application/x-msbinder",
"thmx": "application/vnd.ms-officetheme",
"onetoc": "application/onenote",
"pya": "audio/vnd.ms-playready.media.pya",
"pyv": "video/vnd.ms-playready.media.pyv",
"ppt": "application/vnd.ms-powerpoint",
"ppam": "application/vnd.ms-powerpoint.addin.macroenabled.12",
"sldm": "application/vnd.ms-powerpoint.slide.macroenabled.12",
"pptm": "application/vnd.ms-powerpoint.presentation.macroenabled.12",
"ppsm": "application/vnd.ms-powerpoint.slideshow.macroenabled.12",
"potm": "application/vnd.ms-powerpoint.template.macroenabled.12",
"mpp": "application/vnd.ms-project",
"pub": "application/x-mspublisher",
"scd": "application/x-msschedule",
"xap": "application/x-silverlight-app",
"stl": "application/vnd.ms-pki.stl",
"cat": "application/vnd.ms-pki.seccat",
"vsd": "application/vnd.visio",
"vsdx": "application/vnd.visio2013",
"wm": "video/x-ms-wm",
"wma": "audio/x-ms-wma",
"wax": "audio/x-ms-wax",
"wmx": "video/x-ms-wmx",
"wmd": "application/x-ms-wmd",
"wpl": "application/vnd.ms-wpl",
"wmz": "application/x-ms-wmz",
"wmv": "video/x-ms-wmv",
"wvx": "video/x-ms-wvx",
"wmf": "application/x-msmetafile",
"trm": "application/x-msterminal",
"doc": "application/msword",
"docm": "application/vnd.ms-word.document.macroenabled.12",
"dotm": "application/vnd.ms-word.template.macroenabled.12",
"wri": "application/x-mswrite",
"wps": "application/vnd.ms-works",
"xbap": "application/x-ms-xbap",
"xps": "application/vnd.ms-xpsdocument",
"mid": "audio/midi",
"mpy": "application/vnd.ibm.minipay",
"afp": "application/vnd.ibm.modcap",
"rms": "application/vnd.jcp.javame.midlet-rms",
"tmo": "application/vnd.tmobile-livetv",
"prc": "application/x-mobipocket-ebook",
"mbk": "application/vnd.mobius.mbk",
"dis": "application/vnd.mobius.dis",
"plc": "application/vnd.mobius.plc",
"mqy": "application/vnd.mobius.mqy",
"msl": "application/vnd.mobius.msl",
"txf": "application/vnd.mobius.txf",
"daf": "application/vnd.mobius.daf",
"fly": "text/vnd.fly",
"mpc": "application/vnd.mophun.certificate",
"mpn": "application/vnd.mophun.application",
"mj2": "video/mj2",
"mpga": "audio/mpeg",
"mxu": "video/vnd.mpegurl",
"mpeg": "video/mpeg",
"m21": "application/mp21",
"mp4a": "audio/mp4",
"mp4": "video/mp4",
"m3u8": "application/vnd.apple.mpegurl",
"mus": "application/vnd.musician",
"msty": "application/vnd.muvee.style",
"mxml": "application/xv+xml",
"ngdat": "application/vnd.nokia.n-gage.data",
"n-gage": "application/vnd.nokia.n-gage.symbian.install",
"ncx": "application/x-dtbncx+xml",
"nc": "application/x-netcdf",
"nlu": "application/vnd.neurolanguage.nlu",
"dna": "application/vnd.dna",
"nnd": "application/vnd.noblenet-directory",
"nns": "application/vnd.noblenet-sealer",
"nnw": "application/vnd.noblenet-web",
"rpst": "application/vnd.nokia.radio-preset",
"rpss": "application/vnd.nokia.radio-presets",
"n3": "text/n3",
"edm": "application/vnd.novadigm.edm",
"edx": "application/vnd.novadigm.edx",
"ext": "application/vnd.novadigm.ext",
"gph": "application/vnd.flographit",
"ecelp4800": "audio/vnd.nuera.ecelp4800",
"ecelp7470": "audio/vnd.nuera.ecelp7470",
"ecelp9600": "audio/vnd.nuera.ecelp9600",
"oda": "application/oda",
"ogx": "application/ogg",
"oga": "audio/ogg",
"ogv": "video/ogg",
"dd2": "application/vnd.oma.dd2+xml",
"oth": "application/vnd.oasis.opendocument.text-web",
"opf": "application/oebps-package+xml",
"qbo": "application/vnd.intu.qbo",
"oxt": "application/vnd.openofficeorg.extension",
"osf": "application/vnd.yamaha.openscoreformat",
"weba": "audio/webm",
"webm": "video/webm",
"odc": "application/vnd.oasis.opendocument.chart",
"otc": "application/vnd.oasis.opendocument.chart-template",
"odb": "application/vnd.oasis.opendocument.database",
"odf": "application/vnd.oasis.opendocument.formula",
"odft": "application/vnd.oasis.opendocument.formula-template",
"odg": "application/vnd.oasis.opendocument.graphics",
"otg": "application/vnd.oasis.opendocument.graphics-template",
"odi": "application/vnd.oasis.opendocument.image",
"oti": "application/vnd.oasis.opendocument.image-template",
"odp": "application/vnd.oasis.opendocument.presentation",
"otp": "application/vnd.oasis.opendocument.presentation-template",
"ods": "application/vnd.oasis.opendocument.spreadsheet",
"ots": "application/vnd.oasis.opendocument.spreadsheet-template",
"odt": "application/vnd.oasis.opendocument.text",
"odm": "application/vnd.oasis.opendocument.text-master",
"ott": "application/vnd.oasis.opendocument.text-template",
"ktx": "image/ktx",
"sxc": "application/vnd.sun.xml.calc",
"stc": "application/vnd.sun.xml.calc.template",
"sxd": "application/vnd.sun.xml.draw",
"std": "application/vnd.sun.xml.draw.template",
"sxi": "application/vnd.sun.xml.impress",
"sti": "application/vnd.sun.xml.impress.template",
"sxm": "application/vnd.sun.xml.math",
"sxw": "application/vnd.sun.xml.writer",
"sxg": "application/vnd.sun.xml.writer.global",
"stw": "application/vnd.sun.xml.writer.template",
"otf": "application/x-font-otf",
"osfpvg": "application/vnd.yamaha.openscoreformat.osfpvg+xml",
"dp": "application/vnd.osgi.dp",
"pdb": "application/vnd.palm",
"p": "text/x-pascal",
"paw": "application/vnd.pawaafile",
"pclxl": "application/vnd.hp-pclxl",
"efif": "application/vnd.picsel",
"pcx": "image/x-pcx",
"psd": "image/vnd.adobe.photoshop",
"prf": "application/pics-rules",
"pic": "image/x-pict",
"chat": "application/x-chat",
"p10": "application/pkcs10",
"p12": "application/x-pkcs12",
"p7m": "application/pkcs7-mime",
"p7s": "application/pkcs7-signature",
"p7r": "application/x-pkcs7-certreqresp",
"p7b": "application/x-pkcs7-certificates",
"p8": "application/pkcs8",
"plf": "application/vnd.pocketlearn",
"pnm": "image/x-portable-anymap",
"pbm": "image/x-portable-bitmap",
"pcf": "application/x-font-pcf",
"pfr": "application/font-tdpfr",
"pgn": "application/x-chess-pgn",
"pgm": "image/x-portable-graymap",
"png": "image/x-png",
"ppm": "image/x-portable-pixmap",
"pskcxml": "application/pskc+xml",
"pml": "application/vnd.ctc-posml",
"ai": "application/postscript",
"pfa": "application/x-font-type1",
"pbd": "application/vnd.powerbuilder6",
"pgp": "application/pgp-signature",
"box": "application/vnd.previewsystems.box",
"ptid": "application/vnd.pvi.ptid1",
"pls": "application/pls+xml",
"str": "application/vnd.pg.format",
"ei6": "application/vnd.pg.osasli",
"dsc": "text/prs.lines.tag",
"psf": "application/x-font-linux-psf",
"qps": "application/vnd.publishare-delta-tree",
"wg": "application/vnd.pmi.widget",
"qxd": "application/vnd.quark.quarkxpress",
"esf": "application/vnd.epson.esf",
"msf": "application/vnd.epson.msf",
"ssf": "application/vnd.epson.ssf",
"qam": "application/vnd.epson.quickanime",
"qfx": "application/vnd.intu.qfx",
"qt": "video/quicktime",
"rar": "application/x-rar-compressed",
"ram": "audio/x-pn-realaudio",
"rmp": "audio/x-pn-realaudio-plugin",
"rsd": "application/rsd+xml",
"rm": "application/vnd.rn-realmedia",
"bed": "application/vnd.realvnc.bed",
"mxl": "application/vnd.recordare.musicxml",
"musicxml": "application/vnd.recordare.musicxml+xml",
"rnc": "application/relax-ng-compact-syntax",
"rdz": "application/vnd.data-vision.rdz",
"rdf": "application/rdf+xml",
"rp9": "application/vnd.cloanto.rp9",
"jisp": "application/vnd.jisp",
"rtf": "application/rtf",
"rtx": "text/richtext",
"link66": "application/vnd.route66.link66+xml",
"rss": "application/rss+xml",
"shf": "application/shf+xml",
"st": "application/vnd.sailingtracker.track",
"svg": "image/svg+xml",
"sus": "application/vnd.sus-calendar",
"sru": "application/sru+xml",
"setpay": "application/set-payment-initiation",
"setreg": "application/set-registration-initiation",
"sema": "application/vnd.sema",
"semd": "application/vnd.semd",
"semf": "application/vnd.semf",
"see": "application/vnd.seemail",
"snf": "application/x-font-snf",
"spq": "application/scvp-vp-request",
"spp": "application/scvp-vp-response",
"scq": "application/scvp-cv-request",
"scs": "application/scvp-cv-response",
"sdp": "application/sdp",
"etx": "text/x-setext",
"movie": "video/x-sgi-movie",
"ifm": "application/vnd.shana.informed.formdata",
"itp": "application/vnd.shana.informed.formtemplate",
"iif": "application/vnd.shana.informed.interchange",
"ipk": "application/vnd.shana.informed.package",
"tfi": "application/thraud+xml",
"shar": "application/x-shar",
"rgb": "image/x-rgb",
"slt": "application/vnd.epson.salt",
"aso": "application/vnd.accpac.simply.aso",
"imp": "application/vnd.accpac.simply.imp",
"twd": "application/vnd.simtech-mindmapper",
"csp": "application/vnd.commonspace",
"saf": "application/vnd.yamaha.smaf-audio",
"mmf": "application/vnd.smaf",
"spf": "application/vnd.yamaha.smaf-phrase",
"teacher": "application/vnd.smart.teacher",
"svd": "application/vnd.svd",
"rq": "application/sparql-query",
"srx": "application/sparql-results+xml",
"gram": "application/srgs",
"grxml": "application/srgs+xml",
"ssml": "application/ssml+xml",
"skp": "application/vnd.koan",
"sgml": "text/sgml",
"sdc": "application/vnd.stardivision.calc",
"sda": "application/vnd.stardivision.draw",
"sdd": "application/vnd.stardivision.impress",
"smf": "application/vnd.stardivision.math",
"sdw": "application/vnd.stardivision.writer",
"sgl": "application/vnd.stardivision.writer-global",
"sm": "application/vnd.stepmania.stepchart",
"sit": "application/x-stuffit",
"sitx": "application/x-stuffitx",
"sdkm": "application/vnd.solent.sdkm+xml",
"xo": "application/vnd.olpc-sugar",
"au": "audio/basic",
"wqd": "application/vnd.wqd",
"sis": "application/vnd.symbian.install",
"smi": "application/smil+xml",
"xsm": "application/vnd.syncml+xml",
"bdm": "application/vnd.syncml.dm+wbxml",
"xdm": "application/vnd.syncml.dm+xml",
"sv4cpio": "application/x-sv4cpio",
"sv4crc": "application/x-sv4crc",
"sbml": "application/sbml+xml",
"tsv": "text/tab-separated-values",
"tiff": "image/tiff",
"tao": "application/vnd.tao.intent-module-archive",
"tar": "application/x-tar",
"tcl": "application/x-tcl",
"tex": "application/x-tex",
"tfm": "application/x-tex-tfm",
"tei": "application/tei+xml",
"txt": "text/plain",
"dxp": "application/vnd.spotfire.dxp",
"sfs": "application/vnd.spotfire.sfs",
"tsd": "application/timestamped-data",
"tpt": "application/vnd.trid.tpt",
"mxs": "application/vnd.triscape.mxs",
"t": "text/troff",
"tra": "application/vnd.trueapp",
"ttf": "application/x-font-ttf",
"ttl": "text/turtle",
"umj": "application/vnd.umajin",
"uoml": "application/vnd.uoml+xml",
"unityweb": "application/vnd.unity",
"ufd": "application/vnd.ufdl",
"uri": "text/uri-list",
"utz": "application/vnd.uiq.theme",
"ustar": "application/x-ustar",
"uu": "text/x-uuencode",
"vcs": "text/x-vcalendar",
"vcf": "text/x-vcard",
"vcd": "application/x-cdlink",
"vsf": "application/vnd.vsf",
"wrl": "model/vrml",
"vcx": "application/vnd.vcx",
"mts": "model/vnd.mts",
"vtu": "model/vnd.vtu",
"vis": "application/vnd.visionary",
"viv": "video/vnd.vivo",
"ccxml": "application/ccxml+xml,",
"vxml": "application/voicexml+xml",
"src": "application/x-wais-source",
"wbxml": "application/vnd.wap.wbxml",
"wbmp": "image/vnd.wap.wbmp",
"wav": "audio/x-wav",
"davmount": "application/davmount+xml",
"woff": "application/x-font-woff",
"wspolicy": "application/wspolicy+xml",
"webp": "image/webp",
"wtb": "application/vnd.webturbo",
"wgt": "application/widget",
"hlp": "application/winhlp",
"wml": "text/vnd.wap.wml",
"wmls": "text/vnd.wap.wmlscript",
"wmlsc": "application/vnd.wap.wmlscriptc",
"wpd": "application/vnd.wordperfect",
"stf": "application/vnd.wt.stf",
"wsdl": "application/wsdl+xml",
"xbm": "image/x-xbitmap",
"xpm": "image/x-xpixmap",
"xwd": "image/x-xwindowdump",
"der": "application/x-x509-ca-cert",
"fig": "application/x-xfig",
"xhtml": "application/xhtml+xml",
"xml": "application/xml",
"xdf": "application/xcap-diff+xml",
"xenc": "application/xenc+xml",
"xer": "application/patch-ops-error+xml",
"rl": "application/resource-lists+xml",
"rs": "application/rls-services+xml",
"rld": "application/resource-lists-diff+xml",
"xslt": "application/xslt+xml",
"xop": "application/xop+xml",
"xpi": "application/x-xpinstall",
"xspf": "application/xspf+xml",
"xul": "application/vnd.mozilla.xul+xml",
"xyz": "chemical/x-xyz",
"yaml": "text/yaml",
"yang": "application/yang",
"yin": "application/yin+xml",
"zir": "application/vnd.zul",
"zip": "application/zip",
"zmm": "application/vnd.handheld-entertainment+xml",
"zaz": "application/vnd.zzazz.deck+xml"
}
Edit:
According to @Octo Poulos suggestions:
- Separated jpeg and jpg MIME into two different keys
- updated the MIME of mp4 format to
video/mp4
This topic lists the most common MIME types with corresponding document types, ordered by their common extensions.
The following two important MIME types are the default types:
text/plain
is the default value for textual files. A textual file should be human-readable and must not contain binary data.application/octet-stream
is the default value for all other cases. An unknown file type should use this type. Browsers are particularly careful when manipulating these files to protect users from software vulnerabilities and possible dangerous behavior.
IANA is the official registry of MIME media types and maintains a list of all the official MIME types. This table lists important MIME types for the Web:
Extension | Kind of document | MIME Type |
---|---|---|
.aac |
AAC audio | audio/aac |
.abw |
AbiWord document | application/x-abiword |
.arc |
Archive document (multiple files embedded) | application/x-freearc |
.avif |
AVIF image | image/avif |
.avi |
AVI: Audio Video Interleave | video/x-msvideo |
.azw |
Amazon Kindle eBook format | application/vnd.amazon.ebook |
.bin |
Any kind of binary data | application/octet-stream |
.bmp |
Windows OS/2 Bitmap Graphics | image/bmp |
.bz |
BZip archive | application/x-bzip |
.bz2 |
BZip2 archive | application/x-bzip2 |
.cda |
CD audio | application/x-cdf |
.csh |
C-Shell script | application/x-csh |
.css |
Cascading Style Sheets (CSS) | text/css |
.csv |
Comma-separated values (CSV) | text/csv |
.doc |
Microsoft Word | application/msword |
.docx |
Microsoft Word (OpenXML) | application/vnd.openxmlformats-officedocument.wordprocessingml.document |
.eot |
MS Embedded OpenType fonts | application/vnd.ms-fontobject |
.epub |
Electronic publication (EPUB) | application/epub+zip |
.gz |
GZip Compressed Archive | application/gzip |
.gif |
Graphics Interchange Format (GIF) | image/gif |
.htm , .html |
HyperText Markup Language (HTML) | text/html |
.ico |
Icon format | image/vnd.microsoft.icon |
.ics |
iCalendar format | text/calendar |
.jar |
Java Archive (JAR) | application/java-archive |
.jpeg , .jpg |
JPEG images | image/jpeg |
.js |
JavaScript | text/javascript (Specifications: HTML and RFC 9239) |
.json |
JSON format | application/json |
.jsonld |
JSON-LD format | application/ld+json |
.mid , .midi |
Musical Instrument Digital Interface (MIDI) | audio/midi , audio/x-midi |
.mjs |
JavaScript module | text/javascript |
.mp3 |
MP3 audio | audio/mpeg |
.mp4 |
MP4 video | video/mp4 |
.mpeg |
MPEG Video | video/mpeg |
.mpkg |
Apple Installer Package | application/vnd.apple.installer+xml |
.odp |
OpenDocument presentation document | application/vnd.oasis.opendocument.presentation |
.ods |
OpenDocument spreadsheet document | application/vnd.oasis.opendocument.spreadsheet |
.odt |
OpenDocument text document | application/vnd.oasis.opendocument.text |
.oga |
OGG audio | audio/ogg |
.ogv |
OGG video | video/ogg |
.ogx |
OGG | application/ogg |
.opus |
Opus audio | audio/opus |
.otf |
OpenType font | font/otf |
.png |
Portable Network Graphics | image/png |
.pdf |
Adobe Portable Document Format (PDF) | application/pdf |
.php |
Hypertext Preprocessor (Personal Home Page) | application/x-httpd-php |
.ppt |
Microsoft PowerPoint | application/vnd.ms-powerpoint |
.pptx |
Microsoft PowerPoint (OpenXML) | application/vnd.openxmlformats-officedocument.presentationml.presentation |
.rar |
RAR archive | application/vnd.rar |
.rtf |
Rich Text Format (RTF) | application/rtf |
.sh |
Bourne shell script | application/x-sh |
.svg |
Scalable Vector Graphics (SVG) | image/svg+xml |
.tar |
Tape Archive (TAR) | application/x-tar |
.tif , .tiff |
Tagged Image File Format (TIFF) | image/tiff |
.ts |
MPEG transport stream | video/mp2t |
.ttf |
TrueType Font | font/ttf |
.txt |
Text, (generally ASCII or ISO 8859-n) | text/plain |
.vsd |
Microsoft Visio | application/vnd.visio |
.wav |
Waveform Audio Format | audio/wav |
.weba |
WEBM audio | audio/webm |
.webm |
WEBM video | video/webm |
.webp |
WEBP image | image/webp |
.woff |
Web Open Font Format (WOFF) | font/woff |
.woff2 |
Web Open Font Format (WOFF) | font/woff2 |
.xhtml |
XHTML | application/xhtml+xml |
.xls |
Microsoft Excel | application/vnd.ms-excel |
.xlsx |
Microsoft Excel (OpenXML) | application/vnd.openxmlformats-officedocument.spreadsheetml.sheet |
.xml |
XML | application/xml is recommended as of RFC 7303 (section 4.1), but text/xml is still used sometimes. You can assign a specific MIME type to a file with .xml extension depending on how its contents are meant to be interpreted. For instance, an Atom feed is application/atom+xml , but application/xml serves as a valid default. |
.xul |
XUL | application/vnd.mozilla.xul+xml |
.zip |
ZIP archive | application/zip |
.3gp |
3GPP audio/video container | video/3gpp ; audio/3gpp if it doesn’t contain video |
.3g2 |
3GPP2 audio/video container | video/3gpp2 ; audio/3gpp2 if it doesn’t contain video |
.7z |
7-zip archive | application/x-7z-compressed |
This article describes MIME Types and the corresponding file extension of the Microsoft Office documents. It will very be useful for document analysis, and you can easily define ContentType for the Microsoft Office associated documents in ASP.NET applications. Also you can use this details to customize MIME types in IIS server configuration.
For Microsoft Office Excel, you can define content type like this example.
Aspx page
<%response.ContentType=“application/vnd.ms-excel”%>
C#
Response.ContentType = “application/vnd.ms-excel”;
The following table lists the MIME types and file extensions that are associated with the Microsoft Office documents.
Extension | MIME Type |
.doc | application/msword |
.dot | application/msword |
.docx | application/vnd.openxmlformats-officedocument.wordprocessingml.document |
.dotx | application/vnd.openxmlformats-officedocument.wordprocessingml.template |
.docm | application/vnd.ms-word.document.macroEnabled.12 |
.dotm | application/vnd.ms-word.template.macroEnabled.12 |
.xls | application/vnd.ms-excel |
.xlt | application/vnd.ms-excel |
.xla | application/vnd.ms-excel |
.xlsx | application/vnd.openxmlformats-officedocument.spreadsheetml.sheet |
.xltx | application/vnd.openxmlformats-officedocument.spreadsheetml.template |
.xlsm | application/vnd.ms-excel.sheet.macroEnabled.12 |
.xltm | application/vnd.ms-excel.template.macroEnabled.12 |
.xlam | application/vnd.ms-excel.addin.macroEnabled.12 |
.xlsb | application/vnd.ms-excel.sheet.binary.macroEnabled.12 |
.ppt | application/vnd.ms-powerpoint |
.pot | application/vnd.ms-powerpoint |
.pps | application/vnd.ms-powerpoint |
.ppa | application/vnd.ms-powerpoint |
.pptx | application/vnd.openxmlformats-officedocument.presentationml.presentation |
.potx | application/vnd.openxmlformats-officedocument.presentationml.template |
.ppsx | application/vnd.openxmlformats-officedocument.presentationml.slideshow |
.ppam | application/vnd.ms-powerpoint.addin.macroEnabled.12 |
.pptm | application/vnd.ms-powerpoint.presentation.macroEnabled.12 |
.potm | application/vnd.ms-powerpoint.template.macroEnabled.12 |
.ppsm | application/vnd.ms-powerpoint.slideshow.macroEnabled.12 |
Thanks,
Morgan
Software Developer
This is a list of the MIME Types of all well known and lesser known file formats and kinds of documents from the Microsoft Office family.
You can find the MIME Types in the table ordered by the corresponding application Microsoft Word, Excel, Power Point and Access as well as the extension of the file.
Microsoft Word
Extension | MIME-Type |
DOC | application/msword |
DOCX | application/vnd.openxmlformats-officedocument.wordprocessingml.document |
DOT | application/msword |
DOTX | application/vnd.openxmlformats-officedocument.wordprocessingml.template |
DOCM | application/vnd.ms-word.document.macroEnabled.12 |
DOTM | application/vnd.ms-word.template.macroEnabled.12 |
DOCM | application/vnd.ms-word.document.macroEnabled.12 |
DOTM | application/vnd.ms-word.template.macroEnabled.12 |
WORD | application/msword |
W6W | application/msword |
Microsoft Excel
Extension | MIME-Type |
XLS | application/msexcel |
XLSX | application/vnd.openxmlformats-officedocument.spreadsheetml.sheet |
XLT | application/msexcel |
XLTX | application/vnd.openxmlformats-officedocument.spreadsheetml.template |
XLA | application/msexcel |
XLW | application/msexcel |
XLSM | application/vnd.ms-excel.sheet.macroEnabled.12 |
XLSB | application/vnd.ms-excel.sheet.binary.macroEnabled.12 |
XLTM | application/vnd.ms-excel.template.macroEnabled.12 |
XLAM | application/vnd.ms-excel.addin.macroEnabled.12 |
Microsoft PowerPoint
Extension | MIME-Type |
PPT | application/mspowerpoint |
PPTX | application/vnd.openxmlformats-officedocument.presentationml.presentation |
POT | application/mspowerpoint |
POTX | application/vnd.openxmlformats-officedocument.presentationml.template |
PPS | application/mspowerpoint |
PPSX | application/vnd.openxmlformats-officedocument.presentationml.slideshow |
PPA | application/mspowerpoint |
PPAM | application/vnd.ms-powerpoint.addin.macroEnabled.12 |
PPTM | application/vnd.ms-powerpoint.presentation.macroEnabled.12 |
PPSM | application/vnd.ms-powerpoint.slideshow.macroEnabled.12 |
POTM | application/vnd.ms-powerpoint.template.macroEnabled.12 |
Microsoft Access
Extension | MIME-Type |
MDB | application/msaccess |
ACCDA | application/msaccess |
ACCDB | application/msaccess |
ACCDE | application/msaccess |
ACCDR | application/msaccess |
ACCDT | application/msaccess |
ADE | application/msaccess |
ADP | application/msaccess |
ADN | application/msaccess |
MDE | application/msaccess |
MDF | application/msaccess |
MDN | application/msaccess |
MDT | application/msaccess |
MDW | application/msaccess |
Other
Extension | MIME-Type |
CALCX | application/vnd.ms-office.calx |
HLP (Microsoft Windows Help) | application/x-winhelp |
MPP (Microsoft Project) | application/msproject |
THMX | application/vnd.ms-officetheme |
WRI (Microsoft Write) | application/mswrite |
I hope, I could help you with this list. If I should have forgotten something, just write a comment.
This article introduces the concept of using content types to manage your content.
Note: To create or manage a site content type on a site, you must have at least Design permissions to that site.
In this article
-
What is a content type?
-
Attributes of content types
-
Kinds of content types
-
About attribute inheritance
-
How content types support content management
What is a content type?
In the course of a single project, a business might produce several different kinds of content, for example, proposals, legal contracts, statements of work, and product design specifications. A business might want to collect and maintain different kinds of metadata about each kind of content. Metadata might include an account number, project number, or project manager, for example. Although documents might be stored together because they are related to a single project, they can be created, used, shared, and retained in different ways.
Content types enable organizations to organize, manage, and handle content in a consistent way across a site collection. By defining content types for specific kinds of documents or information products, an organization can ensure that content is managed in a consistent way. Content types can be seen as templates that you apply to a list or library and you can apply multiple templates to a list or library so that they can contain multiple item or document types.
Top of Page
Attributes of content types
Content types can be defined for documents, list items, or folders. Each content type can specify:
-
The columns (metadata) that you want to assign to items of this type.
-
The document template that you want to apply to new items (available for document content types only).
-
Custom New, Edit, and Display forms.
-
The workflows for items of the content type.
-
Custom solutions or features.
Kinds of content types
There are two kinds of content types that you can create or use depending on whether you want to use them in lists and libraries across multiple sites in a site collection, or in a specific list or library on a single site.
-
Site content
types
Content types are first defined for a site in the Site Content Type Gallery. Content types that are defined at the site level are called site content types. Site content types are available for use in any subsites of the site for which they have been defined. For example, if a site content type is defined for the top-level site in a site collection, it becomes available for use in lists and libraries across all sites in that site collection. -
List content types Site content types can be added individually to lists or libraries and customized for use in those lists or libraries. When a site content type is added to a list or library, it is called a list content type. List content types are children of the site content types from which they are created. List content types can make your document libraries and lists more flexible, because a single list or library can contain multiple item types or document types, each of which can have unique metadata, policies, or behaviors. In order for a list or library to contain multiple item or document types, you must configure the list or library to allow multiple content types. The New command in that list or library then lists the types available for that list or library.
Top of Page
About attribute inheritance
You never create content types from scratch. Instead, you are provided with a default set of content types which you can use as-is or customize. Content types are organized into a hierarchy that allows one content type to inherit its characteristics from another. This structure enables you to handle whole categories of documents consistently across your organization. The tables below describe each base content type and their related group and parent content type.
Content type |
Description |
Parent content type |
System |
All content types inherit from the System content type. This content type is sealed and cannot be edited. |
Group: Document Content Types
Content type |
Description |
Parent content type |
Document |
Create a document. |
Item |
XLS Style |
Create a XSL style. |
Document |
Picture |
Upload an image or photograph. |
Document |
Master Page |
Create a master page. |
Document |
Basic Page |
Create a basic page. |
Document |
Web Part Page |
Create a Web Part page. |
Document |
Form |
Create a form that can be filled out. |
Document |
Link to a document |
Create a link to a document in a different location. |
Document |
Dublin Core Columns |
Used for the Dublin Core metadata element set. Dublin Core Columns are an industry standard metadata definition for documents. For more information, see Dublin Core Metadata Initiative. |
Document |
Group: List Content Types
Content types |
Description |
Parent content type |
Event |
Create a new meeting, deadline or other event. |
Item |
Schedule+Resource |
Schedule and reserve a resource. |
Event |
Reservations |
Reserve a resource, such as a meeting room. |
Item |
Schedule |
Create a new appointment. |
Item |
Issue |
Track an issue or problem. |
Item |
Comment |
Create a new blog comment. |
Item |
Item |
Create a new list item. |
System |
East Asia Contact |
Store information about a business or personal contact. |
Item |
Contact |
Store information about a business or personal contact. |
Item |
Message |
Create a new message. |
Item |
Task |
Track a work item that you or your team needs to complete. |
Item |
Post |
Create a new blog post. |
Item |
Announcement |
Create a new news item, status or other short piece of information. |
Item |
Link |
Create a new link to a Web page or other resource. |
Item |
Group: Group Work Content Types
Content type |
Description |
Parent content type |
Circulation |
Add a circulation. |
Item |
Holiday |
Add a new holiday. |
Item |
Word |
Add a new word to the list. |
Item |
Official Notice |
Add a new official notice. |
Item |
Phone Call Memo |
Add a new phone call memo. |
Item |
Resource |
Add a new resource. |
Item |
Resource Group |
Add a new resource group. |
Item |
Timecard |
Add new timecard data |
Item |
Users |
Add new users to the list. |
Item |
What’s New Notification |
Add a new What’s New notification |
Item |
Group: Folder Content Types
Content type |
Description |
Parent content type |
Discussion |
Create a new discussion topic. |
Folder |
Folder |
Create a new folder. |
Item |
Summary Task |
Group and describe related tasks that you or your team needs to complete. |
Folder |
When you define a new custom site content type in the Site Content Type Gallery for a site, you start by selecting an existing parent site content type in the Site Content Type Gallery as your starting point. The new site content type that you create inherits all of the attributes of its parent site content type, such as its document template, read-only setting, workflows, and columns. After you create this new site content type, you can change any of these attributes.
The following diagram helps show how content types inherit attributes from their parent content types.
1. This site content type is derived from the System content type.
2. These site content types are based on the Document content type. Any changes that you make to these content types will not affect their parent content type (Document).
3. This site content type is based on the Project Plan content type. Any changes you make to this content type will not affect its parent content type (Project Plan).
4. These list content types are based on the Project X and Sign-off Sheet content types. Any changes that you make to a list content type apply only to the instance of that content type that has been added to the list or library.
If you customize a child type with attributes that the parent doesn’t have, such as additional columns, those customizations aren’t written to the parent type. In other words, changes to inherited attributes can cascade downward from parent to child, but never upward.
The same rules apply when you create a list content type. For example, if you customize a list type with attributes that the parent type doesn’t have, such as additional columns, those customizations don’t affect the parent type. Also, remember you can customize a list type only for the list or library to which it was added, and when you change attributes in the parent type, your changes can overwrite the attributes in the child list type.
Top of Page
How content types support content management
By defining site content types, you can ensure that whole categories of documents are handled consistently across your organization. For example, all customer deliverable documents in an organization may require a specific set of metadata, such as account number, project number, and project manager. You can help ensure that account and project numbers are associated with the customer deliverable documents in your organization by creating a Customer Deliverable site content type that has required columns for these items of metadata.
As an example of how content types can help you manage your content, the image below shows the new Customer Deliverable content type as an available option when you create a new document in the Customer Deliverable document library.
All customer deliverable document types in the site collection that inherit from this site content type will then require users to specify information for these columns. If you need to track additional metadata that is related to these customer deliverable documents, you can add another required column to the Customer Deliverable site content type. Then you can update all child list content types that inherit from this site content type, adding the new column to all customer deliverable documents.
Applying consistent metadata across multiple document libraries is only one benefit of using content types to manage your content. Content types also enable you to associate the same capabilities or attributes to content across your site.
If you have a list or library that is set up to allow multiple content types, you can add content types to this list or library from the group of site content types that are available for your site. One of the key advantages of content types for lists and libraries is that they make it possible for a single list or library to contain multiple item and document types, each of which may have unique metadata, policies, or behaviors.
Top of Page
- Remove From My Forums
-
Question
-
Hi All,
I have a requirement to create custom content type for a word template. The requirement as follows
- When a user create a new document from a document library, by default it should have the Site name, Date and Time.
Can you please suggest me how can i achieve this?
Mohammed
Answers
-
Hi Mohammed,
You have to create a new Document Template (.dotx) as per your requirement and attach the same with your document library.
To attach a Document template, Select Document Library — > Library Settings — > Advanced Settings — > Set your Document template.
MCTS
My Blog
Twitter @jaggavivek-
Marked as answer by
Tuesday, May 8, 2012 2:02 AM
-
Marked as answer by
-
Word has these things called quick parts, little pieces of info that you insert into the document and they are data driven from sharepoint. I think you can get the document library url from the fieldFileRefwhich
you’d pass into the word document using the Label field like sohttp://sharepoint.microsoft.com/blogs/getthepoint/lists/posts/post.aspx?id=46
http://social.msdn.microsoft.com/Forums/en-US/sharepointecm/thread/67157526-f30f-4b44-97ed-131ba294ab60
Please mark my response as an answer if appropriate.
Learn.SharePoint.com-
Marked as answer by
Xue-Mei Chang-MSFT
Tuesday, May 8, 2012 2:02 AM
-
Marked as answer by
Как я разбирал docx с помощью XSLT
Задача обработки документов в формате docx, а также таблиц xlsx и презентаций pptx является весьма нетривиальной. В этой статье расскажу как научиться парсить, создавать и обрабатывать такие документы используя только XSLT и ZIP архиватор.
Зачем?
docx — самый популярный формат документов, поэтому задача отдавать информацию пользователю в этом формате всегда может возникнуть. Один из вариантов решения этой проблемы — использование готовой библиотеки, может не подходить по ряду причин:
- библиотеки может просто не существовать
- в проекте не нужен ещё один чёрный ящик
- ограничения библиотеки по платформам и т.п.
- проблемы с лицензированием
- скорость работы
Поэтому в этой статье будем использовать только самые базовые инструменты для работы с docx документом.
Структура docx
Для начала разоберёмся с тем, что собой представляет docx документ. docx это zip архив который физически содержит 2 типа файлов:
- xml файлы с расширениями
xml
иrels
- медиа файлы (изображения и т.п.)
А логически — 3 вида элементов:
- Типы (Content Types) — список типов медиа файлов (например png) встречающихся в документе и типов частей документов (например документ, верхний колонтитул).
- Части (Parts) — отдельные части документа, для нашего документа это document.xml, сюда входят как xml документы так и медиа файлы.
- Связи (Relationships) идентифицируют части документа для ссылок (например связь между разделом документа и колонтитулом), а также тут определены внешние части (например гиперссылки).
Они подробно описаны в стандарте ECMA-376: Office Open XML File Formats, основная часть которого — PDF документ на 5000 страниц, и ещё 2000 страниц бонусного контента.
Минимальный docx
Простейший docx после распаковки выглядит следующим образом
Давайте посмотрим из чего он состоит.
[Content_Types].xml
Находится в корне документа и перечисляет MIME типы содержимого документа:
<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types"> <Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/> <Default Extension="xml" ContentType="application/xml"/> <Override PartName="/word/document.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"/> </Types>
_rels/.rels
Главный список связей документа. В данном случае определена всего одна связь — сопоставление с идентификатором rId1 и файлом word/document.xml — основным телом документа.
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships"> <Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="word/document.xml"/> </Relationships>
word/document.xml
Основное содержимое документа.
<w:document xmlns:wpc="http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:wp14="http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing" xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" xmlns:w10="urn:schemas-microsoft-com:office:word" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:wpg="http://schemas.microsoft.com/office/word/2010/wordprocessingGroup" xmlns:wpi="http://schemas.microsoft.com/office/word/2010/wordprocessingInk" xmlns:wne="http://schemas.microsoft.com/office/word/2006/wordml" xmlns:wps="http://schemas.microsoft.com/office/word/2010/wordprocessingShape" mc:Ignorable="w14 wp14"> <w:body> <w:p w:rsidR="005F670F" w:rsidRDefault="005F79F5"> <w:r> <w:t>Test</w:t> </w:r> <w:bookmarkStart w:id="0" w:name="_GoBack"/> <w:bookmarkEnd w:id="0"/> </w:p> <w:sectPr w:rsidR="005F670F"> <w:pgSz w:w="12240" w:h="15840"/> <w:pgMar w:top="1440" w:right="1440" w:bottom="1440" w:left="1440" w:header="720" w:footer="720" w:gutter="0"/> <w:cols w:space="720"/> <w:docGrid w:linePitch="360"/> </w:sectPr> </w:body> </w:document>
Здесь:
<w:document>
— сам документ<w:body>
— тело документа<w:p>
— параграф<w:r>
— run (фрагмент) текста<w:t>
— сам текст<w:sectPr>
— описание страницы
Если открыть этот документ в текстовом редакторе, то увидим документ из одного слова Test
.
word/_rels/document.xml.rels
Здесь содержится список связей части word/document.xml
. Название файла связей создаётся из названия части документа к которой он относится и добавления к нему расширения rels
. Папка с файлом связей называется _rels
и находится на том же уровне, что и часть к которой он относится. Так как связей в word/document.xml
никаких нет то и в файле пусто:
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships"> </Relationships>
Даже если связей нет, этот файл должен существовать.
docx и Microsoft Word
docx созданный с помощью Microsoft Word, да в принципе и с помощью любого другого редактора имеет несколько дополнительных файлов.
Вот что в них содержится:
docProps/core.xml
— основные метаданные документа согласно Open Packaging Conventions и Dublin Core [1], [2].docProps/app.xml
— общая информация о документе: количество страниц, слов, символов, название приложения в котором был создан документ и т.п.word/settings.xml
— настройки относящиеся к текущему документу.word/styles.xml
— стили применимые к документу. Отделяют данные от представления.word/webSettings.xml
— настройки отображения HTML частей документа и настройки того, как конвертировать документ в HTML.word/fontTable.xml
— список шрифтов используемых в документе.word/theme1.xml
— тема (состоит из цветовой схемы, шрифтов и форматирования).
В сложных документах частей может быть гораздо больше.
Реверс-инжиниринг docx
Итак, первоначальная задача — узнать как какой-либо фрагмент документа хранится в xml, чтобы потом создавать (или парсить) подобные документы самостоятельно. Для этого нам понадобятся:
- Архиватор zip
- Библиотека для форматирования XML (Word выдаёт XML без отступов, одной строкой)
- Средство для просмотра diff между файлами, я буду использовать git и TortoiseGit
Инструменты
- Под Windows: zip, unzip, libxml2, git, TortoiseGit
- Под Linux:
apt-get install zip unzip libxml2 libxml2-utils git
Также понадобятся скрипты для автоматического (раз)архивирования и форматирования XML.
Использование под Windows:
unpack file dir
— распаковывает документfile
в папкуdir
и форматирует xmlpack dir file
— запаковывает папкуdir
в документfile
Использование под Linux аналогично, только ./unpack.sh
вместо unpack
, а pack
становится ./pack
.
Использование
Поиск изменений происходит следующим образом:
- Создаём пустой docx файл в редакторе.
- Распаковываем его с помощью
unpack
в новую папку. - Коммитим новую папку.
- Добавляем в файл из п. 1. изучаемый элемент (гиперссылку, таблицу и т.д.).
- Распаковываем изменённый файл в уже существующую папку.
- Изучаем diff, убирая ненужные изменения (перестановки связей, порядок пространств имён и т.п.).
- Запаковываем папку и проверяем что получившийся файл открывается.
- Коммитим изменённую папку.
Пример 1. Выделение текста жирным
Посмотрим на практике, как найти тег который определяет форматирование текста жирным шрифтом.
- Создаём документ
bold.docx
с обычным (не жирным) текстом Test. - Распаковываем его:
unpack bold.docx bold
. - Коммитим результат.
- Выделяем текст Test жирным.
- Распаковываем
unpack bold.docx bold
. - Изначально diff выглядел следующим образом:
Рассмотрим его подробно:
docProps/app.xml
@@ -1,9 +1,9 @@ - <TotalTime>0</TotalTime> + <TotalTime>1</TotalTime>
Изменение времени нам не нужно.
docProps/core.xml
@@ -4,9 +4,9 @@ - <cp:revision>1</cp:revision> + <cp:revision>2</cp:revision> <dcterms:created xsi:type="dcterms:W3CDTF">2017-02-07T19:37:00Z</dcterms:created> - <dcterms:modified xsi:type="dcterms:W3CDTF">2017-02-07T19:37:00Z</dcterms:modified> + <dcterms:modified xsi:type="dcterms:W3CDTF">2017-02-08T10:01:00Z</dcterms:modified>
Изменение версии документа и даты модификации нас также не интересует.
word/document.xml
«`diff
@@ -1,24 +1,26 @@
—
+
+
—
+
+
Test
—
+
«`
Изменения в w:rsidR
не интересны — это внутренняя информация для Microsoft Word. Ключевое изменение тут
в параграфе с Test. Видимо элемент <w:b/>
и делает текст жирным. Оставляем это изменение и отменяем остальные.
word/settings.xml
@@ -1,8 +1,9 @@ + <w:proofState w:spelling="clean"/> @@ -17,10 +18,11 @@ + <w:rsid w:val="00F752CF"/>
Также не содержит ничего относящегося к жирному тексту. Отменяем.
7 Запаковываем папку с 1м изменением (добавлением <w:b/>
) и проверяем что документ открывается и показывает то, что ожидалось.
8 Коммитим изменение.
Пример 2. Нижний колонтитул
Теперь разберём пример посложнее — добавление нижнего колонтитула.
Вот первоначальный коммит. Добавляем нижний колонтитул с текстом 123 и распаковываем документ. Такой diff получается первоначально:
Сразу же исключаем изменения в docProps/app.xml
и docProps/core.xml
— там тоже самое, что и в первом примере.
[Content_Types].xml
@@ -4,10 +4,13 @@ <Default Extension="xml" ContentType="application/xml"/> <Override PartName="/word/document.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"/> + <Override PartName="/word/footnotes.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml"/> + <Override PartName="/word/endnotes.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.endnotes+xml"/> + <Override PartName="/word/footer1.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml"/>
footer явно выглядит как то, что нам нужно, но что делать с footnotes и endnotes? Являются ли они обязательными при добавлении нижнего колонтитула или их создали заодно? Ответить на этот вопрос не всегда просто, вот основные пути:
- Посмотреть, связаны ли изменения друг с другом
- Экспериментировать
- Ну а если совсем не понятно что происходит:
Идём пока что дальше.
word/_rels/document.xml.rels
Изначально diff выглядит вот так:
«`diff
@@ -1,8 +1,11 @@
+
+
—
—
+
+
+
«`
Видно, что часть изменений связана с тем, что Word изменил порядок связей, уберём их:
«`diff
@@ -3,6 +3,9 @@
+
+
+
«`
Опять появляются footer, footnotes, endnotes. Все они связаны с основным документом, перейдём к нему:
word/document.xml
@@ -15,10 +15,11 @@ </w:r> <w:bookmarkStart w:id="0" w:name="_GoBack"/> <w:bookmarkEnd w:id="0"/> </w:p> <w:sectPr w:rsidR="0076695C" w:rsidRPr="00290C70"> + <w:footerReference w:type="default" r:id="rId6"/> <w:pgSz w:w="11906" w:h="16838"/> <w:pgMar w:top="1134" w:right="850" w:bottom="1134" w:left="1701" w:header="708" w:footer="708" w:gutter="0"/> <w:cols w:space="708"/> <w:docGrid w:linePitch="360"/> </w:sectPr>
Редкий случай когда есть только нужные изменения. Видна явная ссылка на footer из sectPr. А так как ссылок в документе на footnotes и endnotes нет, то можно предположить что они нам не понадобятся.
word/settings.xml
@@ -1,19 +1,30 @@ <?xml version="1.0" encoding="UTF-8" standalone="yes"?> <w:settings xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:w10="urn:schemas-microsoft-com:office:word" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" xmlns:sl="http://schemas.openxmlformats.org/schemaLibrary/2006/main" mc:Ignorable="w14 w15"> <w:zoom w:percent="100"/> + <w:proofState w:spelling="clean"/> <w:defaultTabStop w:val="708"/> <w:characterSpacingControl w:val="doNotCompress"/> + <w:footnotePr> + <w:footnote w:id="-1"/> + <w:footnote w:id="0"/> + </w:footnotePr> + <w:endnotePr> + <w:endnote w:id="-1"/> + <w:endnote w:id="0"/> + </w:endnotePr> <w:compat> <w:compatSetting w:name="compatibilityMode" w:uri="http://schemas.microsoft.com/office/word" w:val="15"/> <w:compatSetting w:name="overrideTableStyleFontSizeAndJustification" w:uri="http://schemas.microsoft.com/office/word" w:val="1"/> <w:compatSetting w:name="enableOpenTypeFeatures" w:uri="http://schemas.microsoft.com/office/word" w:val="1"/> <w:compatSetting w:name="doNotFlipMirrorIndents" w:uri="http://schemas.microsoft.com/office/word" w:val="1"/> <w:compatSetting w:name="differentiateMultirowTableHeaders" w:uri="http://schemas.microsoft.com/office/word" w:val="1"/> </w:compat> <w:rsids> <w:rsidRoot w:val="00290C70"/> + <w:rsid w:val="000A7B7B"/> + <w:rsid w:val="001B0DE6"/>
А вот и появились ссылки на footnotes, endnotes добавляющие их в документ.
word/styles.xml
«`diff
@@ -480,6 +480,50 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
«`
Изменения в стилях нас интересуют только если мы ищем как поменять стиль. В данном случае это изменение можно убрать.
word/footer1.xml
Посмотрим теперь собственно на сам нижний колонтитул (часть пространств имён опущена для читабельности, но в документе они должны быть):
<w:ftr xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"> <w:p w:rsidR="000A7B7B" w:rsidRDefault="000A7B7B"> <w:pPr> <w:pStyle w:val="a6"/> </w:pPr> <w:r> <w:t>123</w:t> </w:r> </w:p> </w:ftr>
Тут виден текст 123. Единственное, что надо исправить — убрать ссылку на <w:pStyle w:val="a6"/>
.
В результате анализа всех изменений делаем следующие предположения:
- footnotes и endnotes не нужны
- В
[Content_Types].xml
надо добавить footer - В
word/_rels/document.xml.rels
надо добавить ссылку на footer - В
word/document.xml
в тег<w:sectPr>
надо добавить<w:footerReference>
Уменьшаем diff до этого набора изменений:
Затем запаковываем документ и открываем его.
Если всё сделано правильно, то документ откроется и в нём будет нижний колонтитул с текстом 123. А вот и итоговый коммит.
Таким образом процесс поиска изменений сводится к поиску минимального набора изменений, достаточного для достижения заданного результата.
Практика
Найдя интересующее нас изменение, логично перейти к следующему этапу, это может быть что-либо из:
- Создания docx
- Парсинг docx
- Преобразования docx
Тут нам потребуются знания XSLT и XPath.
Давайте напишем достаточно простое преобразование — замену или добавление нижнего колонтитула в существующий документ. Писать я буду на языке Caché ObjectScript, но даже если вы не знаете — не беда. В основном будем вызовать XSLT и архиватор. Ничего более. Итак, приступим.
Алгоритм
Алгоритм выглядит следующим образом:
- Распаковываем документ
- Добавляем наш нижний колонтитул
- Прописываем ссылку на него в
[Content_Types].xml
иword/_rels/document.xml.rels
- В
word/document.xml
в тег<w:sectPr>
добавляем тег<w:footerReference>
или заменяем в нём ссылку на наш нижний колонтитул. - Запаковываем документ
Приступим.
Распаковка
В Caché ObjectScript есть возможность выполнять команды ОС с помощью функции $zf(-1, oscommand). Вызовем unzip для распаковки документа с помощью обёртки над $zf(-1):
/// Используя %3 (unzip) распаковать файл %1 в папку %2
Parameter UNZIP = "%3 %1 -d %2";
/// Распаковать архив source в папку targetDir
ClassMethod executeUnzip(source, targetDir) As %Status
{
set timeout = 100
set cmd = $$$FormatText(..#UNZIP, source, targetDir, ..getUnzip())
return ..execute(cmd, timeout)
}
Создаём файл нижнего колонтитула
На вход поступает текст нижнего колонтитула, запишем его в файл in.xml:
В XSLT (файл — footer.xsl) будем создавать нижний колонтитул с текстом из тега xml (часть пространств имён опущена, вот полный список):
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns="http://schemas.openxmlformats.org/package/2006/relationships" version="1.0"> <xsl:output method="xml" omit-xml-declaration="no" indent="yes" standalone="yes"/> <xsl:template match="/"> <w:ftr xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"> <w:p> <w:r> <w:rPr> <w:lang w:val="en-US"/> </w:rPr> <w:t> <xsl:value-of select="//xml/text()"/> </w:t> </w:r> </w:p> </w:ftr> </xsl:template> </xsl:stylesheet>
Теперь вызовем XSLT преобразователь:
do ##class(%XML.XSLT.Transformer).TransformFile("in.xml", "footer.xsl", footer0.xml")
В результате получится файл нижнего колонтитула footer0.xml
:
<w:ftr xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"> <w:p> <w:r> <w:rPr> <w:lang w:val="en-US"/> </w:rPr> <w:t>TEST</w:t> </w:r> </w:p> </w:ftr>
Добавляем ссылку на колонтитул в список связей основного документа
Сссылки с идентификатором rId0
как правило не существует. Впрочем можно использовать XPath для получения идентификатора которого точно не существует.
Добавляем ссылку на footer0.xml
c идентификатором rId0 в word/_rels/document.xml.rels
:
«`xml
<xsl:template match="/*">
<xsl:copy>
<xsl:copy-of select="$new"/>
<xsl:copy-of select="@* | node()"/>
</xsl:copy>
</xsl:template>
</xsl:stylesheet>
</spoiler>
#### Прописываем ссылки в документе
Далее надо в каждый тег `<w:sectPr>` добавить тег `<w:footerReference>` или заменить в нём ссылку на наш нижний колонтитул. [Оказалось](https://msdn.microsoft.com/en-us/library/documentformat.openxml.wordprocessing.footerreference(v=office.14).aspx), что у каждого тега `<w:sectPr>` может быть 3 тега `<w:footerReference>` - для первой страницы, четных страниц и всего остального:
<spoiler title="XSLT">
```xml
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
version="1.0">
<xsl:output method="xml" omit-xml-declaration="yes" indent="yes" />
<xsl:template match="//@* | //node()">
<xsl:copy>
<xsl:apply-templates select="@*"/>
<xsl:apply-templates select="node()"/>
</xsl:copy>
</xsl:template>
<xsl:template match="//w:sectPr">
<xsl:element name="{name()}" namespace="{namespace-uri()}">
<xsl:copy-of select="./namespace::*"/>
<xsl:apply-templates select="@*"/>
<xsl:copy-of select="./*[local-name() != 'footerReference']"/>
<w:footerReference w:type="default" r:id="rId0"/>
<w:footerReference w:type="first" r:id="rId0"/>
<w:footerReference w:type="even" r:id="rId0"/>
</xsl:element>
</xsl:template>
</xsl:stylesheet>
Добавляем колонтитул в [Content_Types].xml
Добавляем в [Content_Types].xml
информацию о том, что /word/footer0.xml
имеет тип application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml
:
«`xml
<xsl:template match="/*">
<xsl:copy>
<xsl:copy-of select="@* | node()"/>
<xsl:copy-of select="$new"/>
</xsl:copy>
</xsl:template>
</xsl:stylesheet>
</spoiler>
#### В результате
Весь код [опубликован](https://github.com/intersystems-ru/Converter/blob/master/Converter/Footer.cls.xml). Работает он так:
```cos
do ##class(Converter.Footer).modifyFooter("in.docx", "out.docx", "TEST")
Где:
in.docx
— исходный документout.docx
— выходящий документTEST
— текст, который добавляется в нижний колонтитул
Выводы
Используя только XSLT и ZIP можно успешно работать с документами docx, таблицами xlsx и презентациями pptx.
Открытые вопросы
- Изначально хотел использовать 7z вместо zip/unzip т..к. это одна утилита и она более распространена на Windows. Однако я столкнулся с такой проблемой, что документы запакованные 7z под Linux не открываются в Microsoft Office. Я попробовал достаточно много вариантов вызова, однако положительного результата добиться не удалось.
- Ищу XSD со схемами ECMA-376 версии 5 и комментариями. XSD версии 5 без комментариев доступен к загрузке на сайте ECMA, но без комментариев в нём сложно разобраться. XSD версии 2 с комментариями доступен к загрузке.
Ссылки
- ECMA-376
- Описание docx
- Подробная статья про docx
- Репозиторий со скриптами
- Репозиторий с преобразователем нижнего колонтитула
With approximately one billion people using Microsoft Office, the DOCX format is the most popular de facto standard for exchanging document files between offices. Its closest competitor — the ODT format — is only supported by Open/LibreOffice and some open source products, making it far from standard. The PDF format is not a competitor because PDFs can’t be edited and they don’t contain a full document structure, so they can only take limited local changes like watermarks, signatures, and the like. This is why most business documents are created in the DOCX format; there’s no good alternative to replace it.
While DOCX is a complex format, you may want to parse it manually for simpler tasks such as indexing, converting to TXT and making other small modifications. I’d like to give you enough information on DOCX internals so you don’t have to reference the ECMA specifications, a massive 5,000 page manual.
The best way to understand the format is to create a simple one-word document with MSWord and observe how editing the document changes the underlying XML. You’ll face some cases where the DOCX doesn’t format properly in MS Word and you don’t know why, or come across instances when it’s not evident how to generate the desired formatting. Seeing and understanding exactly what’s going on in the XML will help that.
I worked for about a year on a collaborative DOCX editor, CollabOffice, and I want to share some of that knowledge with the developer community. In this article I will explain the DOCX file structure, summarising information that is scattered over the internet. This article is an intermediary between the huge, complex ECMA specification and the simple internet tutorials currently available. You can find the files that accompany this article in the toptal-docx
project on my github account.
A Simple DOCX file
A DOCX file is a ZIP archive of XML files. If you create a new, empty Microsoft Word document, write a single word ‘Test’ inside and unzip it contents, you will see the following file structure:
Even though we’ve created a simple document, the save process in Microsoft Word has generated default themes, document properties, font tables, and so on, in XML format.
All the files inside a DOCX are XML files, even those with the «.rels» extension.
To start, let us remove the unused stuff and focus on document.xml
, which contains the main text elements. When you delete a file, make sure you have deleted all the relationship references to it from other the xml files. Here is a code-diff example on how I’ve cleared dependencies to app.xml and core.xml. If you have any unresolved/missing references, MSWord will consider the file broken.
Here’s the structure of our simplified, minimal DOCX document (and here’s the project on github):
Let’s break it down by file from here, from the top:
_rels/.rels
This defines the reference that tells MS Word where to look for the document contents. In this case, it references word/document.xml
:
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
<Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument"
Target="word/document.xml"/>
</Relationships>
_rels/document.xml.rels
This file defines references to resources, such as images, embedded in the document content. Our simple document has no embedded resources, so the relationship tag is empty:
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
</Relationships>
[Content_Types].xml
[Content_Types].xml
contains information about the types of media inside the document. Since we only have text content, it’s pretty simple:
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
<Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>
<Default Extension="xml" ContentType="application/xml"/>
<Override PartName="/word/document.xml"
ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"/>
</Types>
document.xml
Finally, here is the main XML with the document’s text content. I have removed some of namespace declarations for clarity, but you can find the full version of the file in the github project. In that file you’ll find that some of the namespace references in the document are unused, but you shouldn’t delete them because MS Word needs them.
Here’s our simplified example:
<w:document>
<w:body>
<w:p w:rsidR="005F670F" w:rsidRDefault="005F79F5">
<w:r><w:t>Test</w:t></w:r>
</w:p>
<w:sectPr w:rsidR="005F670F">
<w:pgSz w:w="12240" w:h="15840"/>
<w:pgMar w:top="1440" w:right="1440" w:bottom="1440" w:left="1440" w:header="720" w:footer="720"
w:gutter="0"/>
<w:cols w:space="720"/>
<w:docGrid w:linePitch="360"/>
</w:sectPr>
</w:body>
</w:document>
The main node <w:document>
represents the document itself, <w:body>
contains paragraphs, and nested within <w:body>
are page dimensions defined by <w:sectPr>
.
<w:rsidR>
is an attribute that you can ignore; it’s used by MS Word internals.
Let’s take a look at a more complex document with three paragraphs. I have highlighted the XML with the same colors on the screenshot from Microsoft Word, so you can see the correlation:
<w:p w:rsidR="0081206C" w:rsidRDefault="00E10CAE"> <w:r> <w:t xml:space="preserve">This is our example first paragraph. It's default is left aligned, and now I'd like to introduce</w:t> </w:r> <w:r> <w:rPr> <w:rFonts w:ascii="Arial" w:hAnsi="Arial" w:cs="Arial"/> <w:color w:val="000000"/> </w:rPr> <w:t>some bold</w:t> </w:r> <w:r> <w:rPr> <w:rFonts w:ascii="Arial" w:hAnsi="Arial" w:cs="Arial"/> <w:b/> <w:color w:val="000000"/> </w:rPr> <w:t xml:space="preserve"> text</w:t> </w:r> <w:r> <w:rPr> <w:rFonts w:ascii="Arial" w:hAnsi="Arial" w:cs="Arial"/> <w:color w:val="000000"/> </w:rPr> <w:t xml:space="preserve">, </w:t> </w:r> <w:proofErr w:type="gramStart"/> <w:r> <w:t xml:space="preserve">and also change the</w:t> </w:r> <w:r w:rsidRPr="00E10CAE"> <w:rPr><w:rFonts w:ascii="Impact" w:hAnsi="Impact"/> </w:rPr> <w:t>font style</w:t> </w:r> <w:r> <w:rPr> <w:rFonts w:ascii="Impact" w:hAnsi="Impact"/> </w:rPr> <w:t xml:space="preserve"> </w:t> </w:r> <w:r> <w:t>to 'Impact'.</w:t></w:r> </w:p> <w:p w:rsidR="00E10CAE" w:rsidRDefault="00E10CAE"> <w:r> <w:t>This is new paragraph.</w:t> </w:r></w:p> <w:p w:rsidR="00E10CAE" w:rsidRPr="00E10CAE" w:rsidRDefault="00E10CAE"> <w:r> <w:t>This is one more paragraph, a bit longer.</w:t> </w:r> </w:p>
Paragraph Structure
A simple document consists of paragraphs, a paragraph consists of runs (a series of text with the same font, color, etc), and runs consist of characters (such as <w:t>
).<w:t>
tags may have several characters inside, and there might be a few in the same run.
Again, we can ignore <w:rsidR>
.
Text properties
Basic text properties are font, size, color, style, and so on. There are about 40 tags that specify text appearance. As you can see in our three paragraph example, each run has its own properties inside <w:rPr>
, specifying <w:color>
, <w:rFonts>
and boldness <w:b>
.
An important thing to note is that properties make a distinction between the two groups of characters, normal and complex script (Arabic, for instance), and that the properties have a different tag depending on which type of character it’s affecting.
Most normal script property tags have a matching complex script tag with an added “C” specifying the property is for complex scripts. For example: <w:i>
(italic) becomes <w:iCs>
, and the bold tag for normal script, <w:b>
, becomes <w:bCs>
for complex script.
Styles
There’s an entire toolbar in Microsoft Word dedicated to styles: normal, no spacing, heading 1, heading 2, title, and so on. These styles are stored in /word/styles.xml
(note: in the first step in our simple example, we removed this XML from DOCX. Make a new DOCX to see this).
Once you have text defined as a style, you will find reference to this style inside the paragraph properties tag, <w:pPr>
. Here’s an example where I’ve defined my text with the style Heading 1:
<w:p>
<w:pPr>
<w:pStyle w:val="Heading1"/>
</w:pPr>
<w:r>
<w:t>My heading 1</w:t>
</w:r>
</w:p>
and here is the style itself from styles.xml
:
<w:style w:type="paragraph" w:styleId="Heading1">
<w:name w:val="heading 1"/>
<w:basedOn w:val="Normal"/>
<w:next w:val="Normal"/>
<w:link w:val="Heading1Char"/>
<w:uiPriority w:val="9"/>
<w:qFormat/>
<w:rsid w:val="002F7F18"/>
<w:pPr>
<w:keepNext/>
<w:keepLines/>
<w:spacing w:before="480" w:after="0"/>
<w:outlineLvl w:val="0"/>
</w:pPr>
<w:rPr>
<w:rFonts w:asciiTheme="majorHAnsi" w:eastAsiaTheme="majorEastAsia" w:hAnsiTheme="majorHAnsi"
w:cstheme="majorBidi"/>
<w:b/>
<w:bCs/>
<w:color w:val="365F91" w:themeColor="accent1" w:themeShade="BF"/>
<w:sz w:val="28"/>
<w:szCs w:val="28"/>
</w:rPr>
</w:style>
The <w:style/w:rPr/w:b>
xpath specifies that the font is bold, and <w:style/w:rPr/w:color>
indicates the font color. <w:basedOn>
instructs MSWord to use “Normal” style for any missing properties.
Property Inheritance
Text properties are inherited. A run has its own properties (w:p/w:r/w:rPr/*
), but it also inherits properties from paragraph (w:r/w:pPr/*
), and both can reference style properties from the /word/styles.xml
.
<w:r>
<w:rPr>
<w:rStyle w:val="DefaultParagraphFont"/>
<w:sz w:val="16"/>
</w:rPr>
<w:tab/>
</w:r>
Paragraphs and runs start with default properties: w:styles/w:docDefaults/w:rPrDefault/*
and w:styles/w:docDefaults/w:pPrDefault/*
. To get the end result of a character’s properties you should:
- Use default run/paragraph properties
- Append run/paragraph style properties
- Append local run/paragraph properties
- Append result run properties over paragraph properties
When I say “append” B to A, I mean to iterate through all B properties and override all A’s properties, leaving all non-intersecting properties as-is.
One more place where default properties may be located is in the <w:style>
tag with w:type="paragraph"
and w:default="1"
. Note, that characters themselves inside a run never have a default style, so <w:style w:type="character" w:default="1">
doesn’t actually affect any text.
1554402290400-dbb29eef3ba6035df7ad726dfc99b2af.png)
Characters in a run can inherit from its paragraph and both can inherit from styles.xml.
Toggle properties
Some of the properties are “toggle” properties, such as <w:b>
(bold) or <w:i>
(italic); these attributes behave like an XOR operator.
This means if the parent style is bold and a child run is bold, the result will be regular, non-bold text.
You have to do lots of testing and reverse-engineering to handle toggle attributes correctly. Take a look at paragraph 17.7.3 of ECMA-376 Open XML specification to get the formal, detailed rules for toggle properties/
Toggle properties are the most complex for a layouter to handle correctly.
Fonts
Fonts follow the same common rules as other text attributes, but font property default values are specified in a separate theme file, referenced under word/_rels/document.xml.rels
like this:
<Relationship Id="rId7" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme" Target="theme/theme1.xml"/>
Based on the above reference, the default font name will be found in word/theme/themes1.xml
, inside a <a:theme>
tag, a:themeElements/a:fontScheme/a:majorFont
or a:minorFont
tag.
The default font size is 10 unless the w:docDefaults/w:rPrDefault
tag is missing, then it is size 11.
Text alignment
Text alignment is specified by a <w:jc>
tag with four w:val
modes available: "left"
, "center"
, "right"
and "both"
.
"left"
is the default mode; text is started at the left of paragraph rectangle (usually the page width). (This paragraph is aligned to the left, which is standard.)
"center"
mode, predictably, centers all characters inside the page width. (Again, this paragraph exemplifies centered alignment.)
In "right"
mode, paragraph text is aligned to the right margin. (Notice how this text is aligned to the right side.)
"both"
mode puts extra spacing between words so that lines get wider and occupy the full paragraph width, with the exception of the last line which is left aligned. (This paragraph is a demonstration of that.)
Images
DOCX supports two sorts of images: inline and floating.
Inline images appear inside a paragraph along with the other characters, <w:drawing>
is used instead of using <w:t>
(text). You can find image ID with the following xpath syntax:
w:drawing/wp:inline/a:graphic/a:graphicData/pic:pic/pic:blipFill/a:blip/@r:embed
The image ID is used to look up the filename in the word/_rels/document.xml.rels
file, and it should point to gif/jpeg file inside word/media subfolder. (See the github project’s word/_rels/document.xml.rels
file, where you can see the image ID.)
Floating images are placed relative to paragraphs with text flowing around them. (Here’s th github project sample document with a floating image.)
Floating images use <wp:anchor>
instead of <w:drawing>
, so if you delete any text inside <w:p>
, be careful with the anchors if you don’t want the images removed.
MS Word’s image options refer to image alignment as «text wrapping mode».
Tables
XML tags for tables are similar to HTML table markup– is the same as <table>, matches with <tr>, etc.
<w:tbl>
, the table itself, has table properties <w:tblPr>
, and each column property is presented by <w:gridCol>
inside <w:tblGrid>
. Rows follow one by one as <w:tr>
tags and each row should have same number of columns as specified in <w:tblGrid>
:
<w:tbl>
<w:tblPr>
<w:tblW w:w="5000" w:type="pct" />
</w:tblPr>
<w:tblGrid><w:gridCol/><w:gridCol/></w:tblGrid>
<w:tr>
<w:tc><w:p><w:r><w:t>left</w:t></w:r></w:p></w:tc>
<w:tc><w:p><w:r><w:t>right</w:t></w:r></w:p></w:tc>
</w:tr>
</w:tbl>
Width for table columns can be specified in the <w:tblW>
tag, but if you don’t define it MS Word will use its internal algorithms to find the optimal width of columns for the smallest effective table size.
Units
Many XML attributes inside DOCX specify sizes or distances. While they’re integers inside the XML, they all have different units so some conversion is necessary. The topic is a complicated one, so I’d recommend this article by Lars Corneliussen on units in DOCX files. The table he presents is useful, though with a small misprint: inches should be pt/72, not pt*72.
Here’s a cheat sheet:
COMMON DOCX XML UNIT CONVERSIONS | ||||||
20th of a point | Points dxa/20 |
Inches pt/72 |
Centimeters in*2,54 |
Font half size pt/144 |
EMU in*914400 |
|
Example | 11906 | 595.3 | 8,27… | 21.00086… | 4,135 | 7562088 |
Tags using this | pgSz/pgMar/w:spacing | w:sz | wp:extent, a:ext |
Tips for Implementing a Layouter
If you want to convert a DOCX file (to PDF, for instance), draw it on canvas, or count number of pages, you’ll have to implement a layouter. A layouter is an algorithm for calculating character positions from a DOCX file.
This is a complex task if you need 100 percent fidelity rendering. The amount of time needed to implement a good layouter is measured in man-years, but if you only need a simple, limited one, it can be done relatively quickly.
A layouter fills a parent rectangle, which is usually a rectangle of the page. It add words from a run one by one. When the current line overflows, it starts a new one. If the paragraph is too high for the parent rectangle, it’s wrapped to the next page.
Here are some important things to keep in mind if you decide to implement a layouter:
- The layouter should take care about text alignment and text floating over images
- It should be capable of handling nested objects, such as nested tables
- If you want to provide full support for such images, you’ll have to implement a layouter with at least two passes, the first step collects floating images’ positions and the second fills empty space with text characters.
- Be aware of indentations and spacings. Each paragraph has spacing before and after, and these numbers are specified by the
w:spacing
tag. Vertical spacing is specified byw:after
andw:before
tags. Note that line spacing is specified byw:line
, but this is not the size of the line as one may expect. To get the size of the line, take the current font height, multiply byw:line
and divide by 12. - DOCX files contain no information about pagination. You won’t find the number of pages in the document unless you calculate how much space you need for each line to ascertain the number of pages. If you need to find exact coordinates of each character on the page, be sure to take into account all spacings, indentations and sizes.
- If you implement a full-featured DOCX layouter that handles tables, note the special cases when tables span multiple pages. A cell which causes a page overflow also affects other cells.
- Creating an optimal algorithm for calculating a table columns’ width is a challenging math problem and word processors and layouters usually use some suboptimal implementations. I propose using the algorithm from W3C HTML table documentation as a first approximation. I haven’t found a description of the algorithm used by MS Word, and Microsoft has fine-tuned the algorithm over time so different versions of Word may lay out tables slightly differently.
If something is unclear: reverse-engineer the XML!
When it’s not obvious how this or that XML tag works inside MS Word, there are two main approaches to figuring it out:
-
Create the desired content step-by-step. Start with a simple docx file. Save each step to its own file, as in
1.docx
,2.docx
, for example. Unzip each of them and use a visual diff tool for folder comparison to see which tags appear after your changes. (For a commercial option, try Araxis Merge, or for a free option, WinMerge.) -
If you generate a DOCX file that MS Word doesn’t like, work backwards. Simplify your XML step by step. At some point you will learn which change MS Word found incorrect.
DOCX is quite complex, isn’t it?
It is complex, and Microsoft’s license forbids using MS Word on the server side for processing DOCX– this is pretty standard for commercial products. Microsoft has, however, provided the XSLT file to handle most DOCX tags, but it won’t give you 100 percent or even 99 percent fidelity. Processes such as text wrapping over images are not supported, but you will be able to support the majority of documents. (If you don’t need complexity, consider using Markdown as an alternative.)
If you have a sufficient budget (there is no free DOCX rendering engine), you may want to use commercial products such as Aspose or docx4j. The most popular free solution is LibreOffice for converting between DOCX and other formats, including PDF. Unfortunately, LibreOffice contains many small bugs during conversion, and since it’s a sophisticated, open-source C++ product, it’s slow and difficult to fix fidelity issues.
Alternatively, if you find DOCX layouting too complicated to implement yourself, you can also convert it to HTML and use a browser to render it. You can also consider one of Toptal’s freelance XML developers.
DOCX Resources for further reading
- ECMA DOCX specification
- OpenXML library for DOCX manipulation from C#. It doesn’t contain information on layouting or rendering code, but offers a class hierarchy matching each possible XML node in DOCX.
- You can always search or ask on stackoverflow with keywords like docx4j, OpenXML and docx; there are people in the community who are knowledgeable.
MIME (Multipurpose Internet Mail Extensions) is an internet standard that is used to identify the types of content found in various files. These types can include applications, sounds, video, text, and many others.
MIME Types Explained
In a way, they are much like the file extensions you’re familiar with on your desktop or laptop. For example, the .doc extension that is used for Word documents, the .exe for executable windows files, and .xls that is found on Excel files are all file extensions you are undoubtedly familiar with for the files on your computer.
MIME types are defined in HTML by the type attribute on links, objects, and script and style tags.
In this article, we will list the various MIME types for applications, sounds, images, mail messages, text files, video files, and virtual world files. You can use this article as a handy catalog of all of these files should you need these MIME types in the future.
The MIME type for HTML is:
text/html
Applications and Their MIME Types
Here’s a list of applications, their MIME types, and their file extensions.
Application | MIME Type | File Extension |
---|---|---|
Corel Envoy | application/envoy | evy |
fractal image file | application/fractals | fif |
Windows print spool file | application/futuresplash | spl |
HTML application | application/hta | hta |
Atari ST Program | application/internet-property-stream | acx |
BinHex encoded file | application/mac-binhex40 | hqx |
Word document | application/msword | doc |
Word document template | application/msword | dot |
application/octet-stream | * | |
binary disk image | application/octet-stream | bin |
Java class file | application/octet-stream | class |
Disk Masher image | application/octet-stream | dms |
executable file | application/octet-stream | exe |
LHARC compressed archive | application/octet-stream | lha |
LZH compressed file | application/octet-stream | lzh |
CALS raster image | application/oda | oda |
ActiveX script | application/olescript | axs |
Acrobat file | application/pdf | |
Outlook profile file | application/pics-rules | prf |
certificate request file | application/pkcs10 | p10 |
certificate revocation list file | application/pkix-crl | crl |
Adobe Illustrator file | application/postscript | ai |
postscript file | application/postscript | eps |
postscript file | application/postscript | ps |
rich text format file | application/rtf | rtf |
set payment initiation | application/set-payment-initiation | setpay |
set registration initiation | application/set-registration-initiation | setreg |
Excel Add-in file | application/vnd.ms-excel | xla |
Excel chart | application/vnd.ms-excel | xlc |
Excel macro | application/vnd.ms-excel | xlm |
Excel spreadsheet | application/vnd.ms-excel | xls |
Excel template | application/vnd.ms-excel | xlt |
Excel worspace | application/vnd.ms-excel | xlw |
Outlook mail message | application/vnd.ms-outlook | msg |
serialized certificate store file | application/vnd.ms-pkicertstore | sst |
Windows catalog file | application/vnd.ms-pkiseccat | cat |
stereolithography file | application/vnd.ms-pkistl | stl |
PowerPoint template | application/vnd.ms-powerpoint | pot |
PowerPoint slide show | application/vnd.ms-powerpoint | pps |
PowerPoint presentation | application/vnd.ms-powerpoint | ppt |
Microsoft Project file | application/vnd.ms-project | mpp |
WordPerfect macro | application/vnd.ms-works | wcm |
Microsoft Works database | application/vnd.ms-works | wdb |
Microsoft Works spreadsheet | application/vnd.ms-works | wks |
Microsoft Works word processsor document | application/vnd.ms-works | wps |
Windows help file | application/winhlp | hlp |
binary CPIO archive | application/x-bcpio | bcpio |
computable document format file | application/x-cdf | cdf |
Unix compressed file | application/x-compress | z |
gzipped tar file | application/x-compressed | tgz |
Unix CPIO archive | application/x-cpio | cpio |
Photoshop custom shapes file | application/x-csh | csh |
Kodak RAW image file | application/x-director | dcr |
Adobe Director movie | application/x-director | dir |
Macromedia Director movie | application/x-director | dxr |
device independent format file | application/x-dvi | dvi |
Gnu tar archive | application/x-gtar | gtar |
Gnu zipped archive | application/x-gzip | gz |
hierarchical data format file | application/x-hdf | hdf |
internet settings file | application/x-internet-signup | ins |
IIS internet service provider settings | application/x-internet-signup | isp |
ARC+ architectural file | application/x-iphone | iii |
JavaScript file | application/x-javascript | js |
LaTex document | application/x-latex | latex |
Microsoft Access database | application/x-msaccess | mdb |
Windows CardSpace file | application/x-mscardfile | crd |
CrazyTalk clip file | application/x-msclip | clp |
dynamic link library | application/x-msdownload | dll |
Microsoft media viewer file | application/x-msmediaview | m13 |
Steuer2001 file | application/x-msmediaview | m14 |
multimedia viewer book source file | application/x-msmediaview | mvb |
Windows meta file | application/x-msmetafile | wmf |
Microsoft Money file | application/x-msmoney | mny |
Microsoft Publisher file | application/x-mspublisher | pub |
Turbo Tax tax schedule list | application/x-msschedule | scd |
FTR media file | application/x-msterminal | trm |
Microsoft Write file | application/x-mswrite | wri |
computable document format file | application/x-netcdf | cdf |
Mastercam numerical control file | application/x-netcdf | nc |
MSX computers archive format | application/x-perfmon | pma |
performance monitor counter file | application/x-perfmon | pmc |
process monitor log file | application/x-perfmon | pml |
Avid persistant media record file | application/x-perfmon | pmr |
Pegasus Mail draft stored message | application/x-perfmon | pmw |
personal information exchange file | application/x-pkcs12 | p12 |
PKCS #12 certificate file | application/x-pkcs12 | pfx |
PKCS #7 certificate file | application/x-pkcs7-certificates | p7b |
software publisher certificate file | application/x-pkcs7-certificates | spc |
certificate request response file | application/x-pkcs7-certreqresp | p7r |
PKCS #7 certificate file | application/x-pkcs7-mime | p7c |
digitally encrypted message | application/x-pkcs7-mime | p7m |
digitally signed email message | application/x-pkcs7-signature | p7s |
Bash shell script | application/x-sh | sh |
Unix shar archive | application/x-shar | shar |
Flash file | application/x-shockwave-flash | swf |
Stuffit archive file | application/x-stuffit | sit |
system 5 release 4 CPIO file | application/x-sv4cpio | sv4cpio |
system 5 release 4 CPIO checksum data | application/x-sv4crc | sv4crc |
consolidated Unix file archive | application/x-tar | tar |
Tcl script | application/x-tcl | tcl |
LaTeX source document | application/x-tex | tex |
LaTeX info document | application/x-texinfo | texi |
LaTeX info document | application/x-texinfo | texinfo |
unformatted manual page | application/x-troff | roff |
Turing source code file | application/x-troff | t |
TomeRaider 2 ebook file | application/x-troff | tr |
Unix manual | application/x-troff-man | man |
readme text file | application/x-troff-me | me |
3ds Max script file | application/x-troff-ms | ms |
uniform standard tape archive format file | application/x-ustar | ustar |
source code | application/x-wais-source | src |
internet security certificate | application/x-x509-ca-cert | cer |
security certificate | application/x-x509-ca-cert | crt |
DER certificate file | application/x-x509-ca-cert | der |
public key security object | application/ynd.ms-pkipko | pko |
zipped file | application/zip | zip |
Sound Files and Their MIME Types
Here’s a list of sound files, their MIME types, and their file extensions.
Application | MIME Type | File Extension |
---|---|---|
audio file | audio/basic | au |
sound file | audio/basic | snd |
midi file | audio/mid | mid |
media processing server studio | audio/mid | rmi |
MP3 file | audio/mpeg | mp3 |
audio interchange file format | audio/x-aiff | aif |
compressed audio interchange file | audio/x-aiff | aifc |
audio interchange file format | audio/x-aiff | aiff |
media playlist file | audio/x-mpegurl | m3u |
Real Audio file | audio/x-pn-realaudio | ra |
Real Audio metadata file | audio/x-pn-realaudio | ram |
WAVE audio file | audio/x-wav | wav |
Image Files and Their MIME Types
Here’s a list of image files, their MIME types, and their file extensions.
Application | MIME Type | File Extension |
---|---|---|
Bitmap | image/bmp | bmp |
compiled source code | image/cis-cod | cod |
graphic interchange format | image/gif | gif |
image file | image/ief | ief |
JPEG image | image/jpeg | jpe |
JPEG image | image/jpeg | jpeg |
JPEG image | image/jpeg | jpg |
JPEG file interchange format | image/pipeg | jfif |
scalable vector graphic | image/svg+xml | svg |
TIF image | image/tiff | tif |
TIF image | image/tiff | tiff |
Sun raster graphic | image/x-cmu-raster | ras |
Corel metafile exchange image file | image/x-cmx | cmx |
icon | image/x-icon | ico |
portable any map image | image/x-portable-anymap | pnm |
portable bitmap image | image/x-portable-bitmap | pbm |
portable graymap image | image/x-portable-graymap | pgm |
portable pixmap image | image/x-portable-pixmap | ppm |
RGB bitmap | image/x-rgb | rgb |
X11 bitmap | image/x-xbitmap | xbm |
X11 pixmap | image/x-xpixmap | xpm |
X-Windows dump image | image/x-xwindowdump | xwd |
Mail Message Files and Their MIME Types
Here’s a list of mail message files, their MIME types, and their file extensions.
Application | MIME Type | File Extension |
---|---|---|
MHTML web archive | message/rfc822 | mht |
MIME HTML file | message/rfc822 | mhtml |
Windows Live Mail newsgroup file | message/rfc822 | nws |
Text Files and Their MIME Types
Here’s a list of text files, their MIME types, and their file extensions.
Application | MIME Type | File Extension |
---|---|---|
Cascading Style Sheet | text/css | css |
H.323 internet telephony file | text/h323 | 323 |
HTML file | text/html | htm |
HTML file | text/html | html |
Exchange streaming media file | text/html | stm |
NetMeeting user location service file | text/iuls | uls |
BASIC source code file | text/plain | bas |
C/C++ source code file | text/plain | c |
C/C++/Objective C header file | text/plain | h |
text file | text/plain | txt |
rich text file | text/richtext | rtx |
Scitext continuous tone file | text/scriptlet | sct |
tab separated values file | text/tab-separated-values | tsv |
hypertext template file | text/webviewhtml | htt |
HTML component file | text/x-component | htc |
TeX font encoding file | text/x-setext | etx |
vCard file | text/x-vcard | vcf |
Video Files and Their MIME Types
Here’s a list of video files, their MIME types, and their file extensions.
Application | MIME Type | File Extension |
---|---|---|
MPEG-2 audio file | video/mpeg | mp2 |
MPEG-2 audio file | video/mpeg | mpa |
MPEG movie file | video/mpeg | mpe |
MPEG movie file | video/mpeg | mpeg |
MPEG movie file | video/mpeg | mpg |
MPEG-2 video stream | video/mpeg | mpv2 |
MPEG-4 | video/mp4 | mp4 |
Apple QuickTime movie | video/quicktime | mov |
Apple QuickTime movie | video/quicktime | qt |
Logos library system file | video/x-la-asf | lsf |
streaming media shortcut | video/x-la-asf | lsx |
advanced systems format file | video/x-ms-asf | asf |
ActionScript remote document | video/x-ms-asf | asr |
Microsoft ASF redirector file | video/x-ms-asf | asx |
audio video interleave file | video/x-msvideo | avi |
Apple QuickTime movie | video/x-sgi-movie | movie |
Virtual World Files and Their MIME Types
Here’s a list of virtual world files, their MIME types, and their file extensions.
Application | MIME Type | File Extension |
---|---|---|
Flare decompiled actionscript file | x-world/x-vrml | flr |
VRML file | x-world/x-vrml | vrml |
VRML world | x-world/x-vrml | wrl |
compressed VRML world | x-world/x-vrml | wrz |
3ds max XML animation file | x-world/x-vrml | xaf |
Reality Lab 3D image file | x-world/x-vrml | xof |