CLAM Formats

class clam.common.formats.AlpinoXMLFormat(file, **kwargs)
attributes = {}
mimetype = 'text/xml'
name = 'Alpino XML'
scheme = ''
class clam.common.formats.BinaryDataFormat(file, **kwargs)
attributes = {}
mimetype = 'application/octet-stream'
name = 'Application-specific Binary Data'
class clam.common.formats.CSVFormat(file, **kwargs)
attributes = {'encoding': True, 'language': False}
mimetype = 'text/csv'
name = 'Comma separated file'
class clam.common.formats.DCOIFormat(file, **kwargs)
attributes = {}
mimetype = 'text/xml'
name = 'DCOI format'
scheme = ''
class clam.common.formats.DjVuFormat(file, **kwargs)
attributes = {}
mimetype = 'image/x-djvu'
name = 'DjVu format'
class clam.common.formats.ExampleFormat(file, **kwargs)

This is an Example format, please inspect its source code if you want to create custom formats!

allowcustomattributes = True
httpheaders()

HTTP headers to output for this format. Yields (key,value) tuples.

loadinlinemetadata()

If there is metadata IN the actual file, this method should extract it and assign it to this object. Will be automatically called from constructor. Note that the file (CLAMFile) is accessible through self.file

mimetype = 'text/plain'
saveinlinemetadata()

If there is metadata that should be IN the actual file, this method can store it. Note that the file (CLAMFile) is accessible through self.file

scheme = None
validate()

Add your validation method here, should return True or False

class clam.common.formats.FoLiAXMLFormat(file, **kwargs)
attributes = {}
mimetype = 'text/xml'
name = 'FoLiA XML'
scheme = ''
class clam.common.formats.GifImageFormat(file, **kwargs)
attributes = {}
mimetype = 'image/gif'
name = 'Gif Image'
class clam.common.formats.HTMLFormat(file, **kwargs)

HTML Format Definition. This format has one required attribute: encoding

attributes = {'encoding': True, 'language': False}
httpheaders()

HTTP headers to output for this format. Yields (key,value) tuples.

mimetype = 'text/html'
class clam.common.formats.JpegImageFormat(file, **kwargs)
attributes = {}
mimetype = 'image/jpeg'
name = 'Jpeg Image'
class clam.common.formats.KBXMLFormat(file, **kwargs)
mimetype = 'text/xml'
name = 'Koninklijke Bibliotheek XML-formaat'
scheme = ''
class clam.common.formats.MP3AudioFormat(file, **kwargs)
attributes = {}
mimetype = 'audio/mpeg'
name = 'MP3 Audio File'
class clam.common.formats.MSWordFormat(file, **kwargs)
attributes = {}
mimetype = 'application/msword'
name = 'Microsoft Word format'
scheme = ''
class clam.common.formats.MpegVideoFormat(file, **kwargs)
attributes = {}
mimetype = 'video/mpeg'
name = 'Mpeg Video'
class clam.common.formats.OggAudioFormat(file, **kwargs)
attributes = {}
mimetype = 'audio/ogg'
name = 'Ogg Audio File'
class clam.common.formats.OggVideoFormat(file, **kwargs)
attributes = {}
mimetype = 'audio/ogg'
name = 'Ogg Video File'
class clam.common.formats.OpenDocumentTextFormat(file, **kwargs)
attributes = {}
mimetype = 'application/vnd.oasis.opendocument.text'
name = 'Open Document Text Format'
class clam.common.formats.PDFFormat(file, **kwargs)
attributes = {}
mimetype = 'application/pdf'
name = 'PDF'
class clam.common.formats.PlainTextFormat(file, **kwargs)

Plain Text Format Definition. This format has one required attribute: encoding

attributes = {'encoding': True, 'language': False}
httpheaders()

HTTP headers to output for this format. Yields (key,value) tuples.

mimetype = 'text/plain'
class clam.common.formats.PngImageFormat(file, **kwargs)
attributes = {}
mimetype = 'image/png'
name = 'PNG Image'
class clam.common.formats.TICCLShadowOutputXML(file, **kwargs)
mimetype = 'text/xml'
name = 'Ticcl Shadow Output'
scheme = ''
class clam.common.formats.TICCLVariantOutputXML(file, **kwargs)
mimetype = 'text/xml'
name = 'Ticcl Variant Output'
scheme = ''
class clam.common.formats.TadpoleFormat(file, **kwargs)
attributes = {'tokenisation': 'yes', 'morphologicalanalysis': ['yes', 'no'], 'postagging': ['yes', 'no'], 'mwudetection': ['yes', 'no'], 'parsing': ['yes', 'no'], 'lemmatisation': ['yes', 'no']}
mimetype = 'text/plain'
name = 'Tadpole Columned Output Format'
class clam.common.formats.TiffImageFormat(file, **kwargs)
attributes = {}
mimetype = 'image/tiff'
name = 'Tiff Image'
class clam.common.formats.UndefinedXMLFormat(file, **kwargs)
mimetype = 'text/xml'
name = 'Undefined XML Format'
scheme = ''
class clam.common.formats.WaveAudioFormat(file, **kwargs)
attributes = {}
mimetype = 'audio/wav'
name = 'Wave Audio File'
class clam.common.formats.XMLStyleSheet(file, **kwargs)
attributes = {}
mimetype = 'application/xslt+xml'
name = 'XML Stylesheet'
class clam.common.formats.ZIPFormat(file, **kwargs)
attributes = {}
mimetype = 'application/zip'
name = 'ZIP Archive'