
    khm                         d Z ddlZddlZddlmZ ddlmZ dZ G d d      Z G d d	e      Z	 G d
 de	      Z
 G d de	      Z G d de      Z ede
fdefdefg      ZddZd Zd Zy)a9  File formats for training and testing data.

Includes a registry of valid file formats. New file formats can be added to the
registry like so: ::

    from textblob import formats


    class PipeDelimitedFormat(formats.DelimitedFormat):
        delimiter = "|"


    formats.register("psv", PipeDelimitedFormat)

Once a format has been registered, classifiers will be able to read data files with
that format. ::

    from textblob.classifiers import NaiveBayesAnalyzer

    with open("training_data.psv", "r") as fp:
        cl = NaiveBayesAnalyzer(fp, format="psv")
    N)OrderedDict)is_filelikezutf-8c                   ,    e Zd ZdZd Zd Zed        Zy)
BaseFormata  Interface for format classes. Individual formats can decide on the
    composition and meaning of ``**kwargs``.

    :param File fp: A file-like object.

    .. versionchanged:: 0.9.0
        Constructor receives a file pointer rather than a file path.
    c                      y N selffpkwargss      L/opt/mcp/mcp-sentiment/venv/lib/python3.12/site-packages/textblob/formats.py__init__zBaseFormat.__init__+   s        c                     t        d      )(Return an iterable object from the data.z&Must implement a "to_iterable" method.NotImplementedErrorr   s    r   to_iterablezBaseFormat.to_iterable.   s    !"JKKr   c                     t        d      )zDetect the file format given a filename.
        Return True if a stream is this file format.

        .. versionchanged:: 0.9.0
            Changed from a static method to a class method.
        z'Must implement a "detect" class method.r   clsstreams     r   detectzBaseFormat.detect2   s     ""KLLr   N__name__
__module____qualname____doc__r   r   classmethodr   r	   r   r   r   r   !   s(    L M Mr   r   c                   0    e Zd ZdZdZd Zd Zed        Zy)DelimitedFormatz%A general character-delimited format.,c                     t        j                  | |fi | t        j                  || j                        }|D cg c]  }| c}| _        y c c}w )N)	delimiter)r   r   csvreaderr&   data)r   r   r   r(   rows        r   r   zDelimitedFormat.__init__B   s@    D"//B$..9$*+SS+	+s   	Ac                     | j                   S )r   )r)   r   s    r   r   zDelimitedFormat.to_iterableG   s    yyr   c                     	 t        j                         j                  || j                         y# t         j                  t
        f$ r Y yw xY w)zReturn True if stream is valid.)
delimitersTF)r'   Sniffersniffr&   Error	TypeErrorr   s     r   r   zDelimitedFormat.detectK   sB    	KKM3==A		9% 		s   /2 AAN)	r   r   r   r    r&   r   r   r!   r   r	   r   r   r#   r#   =   s(    /I,
  r   r#   c                       e Zd ZdZdZy)CSVzCSV format. Assumes each row is of the form ``text,label``.
    ::

        Today is a good day,pos
        I hate this car.,pos
    r$   Nr   r   r   r    r&   r	   r   r   r3   r3   U   s     Ir   r3   c                       e Zd ZdZdZy)TSVz;TSV format. Assumes each row is of the form ``text	label``.	Nr4   r	   r   r   r6   r6   `   s
    FIr   r6   c                   ,    e Zd ZdZd Zd Zed        Zy)JSONa  JSON format.

    Assumes that JSON is formatted as an array of objects with ``text`` and
    ``label`` properties.
    ::

        [
            {"text": "Today is a good day.", "label": "pos"},
            {"text": "I hate this car.", "label": "neg"},
        ]
    c                 f    t        j                  | |fi | t        j                  |      | _        y r   )r   r   jsonloaddictr
   s      r   r   zJSON.__init__s   s&    D"//IIbM	r   c                 P    | j                   D cg c]  }|d   |d   f c}S c c}w )z-Return an iterable object from the JSON data.textlabel)r=   )r   ds     r   r   zJSON.to_iterablew   s'    15;A6AgJ';;;s   #c                 N    	 t        j                  |       y# t        $ r Y yw xY w)z$Return True if stream is valid JSON.TF)r;   loads
ValueErrorr   s     r   r   zJSON.detect{   s(    	JJv 		s    	$$Nr   r	   r   r   r9   r9   f   s%    
"<  r   r9   r'   r;   tsvc                     t        |       syt        j                         D ]H  }|j                  | j	                  |            r| j                  d       |c S | j                  d       J y)zAttempt to detect a file's format, trying each of the supported
    formats. Return the format class that was detected. If no format is
    detected, return ``None``.
    Nr   )r   	_registryvaluesr   readseek)r   max_readFormats      r   r   r      s[    
 r?""$ ==*+GGAJM

	
 r   c                      t         S )z*Return a dictionary of registered formats.)rG   r	   r   r   get_registryrN      s    r   c                     |t               | <   y)zRegister a new format.

    :param str name: The name that will be used to refer to the format, e.g. 'csv'
    :param type format_class: The format class to register.
    N)rN   )nameformat_classs     r   registerrR      s     (LN4r   )i   )r    r'   r;   collectionsr   textblob.utilsr   DEFAULT_ENCODINGr   r#   r3   r6   r9   rG   r   rN   rR   r	   r   r   <module>rV      s   .   # & M M8j 0/ / : > 				
(r   