
    kh9k                        d Z ddlZddlZddlmZ ddlZddlmZmZm	Z	m
Z
mZ ddlmZmZ ddlmZ ddlmZ ddlmZ dd	lmZmZ dd
lmZ ddlmZ ddlmZ ddlm Z  ddl!m"Z"m#Z#m$Z$ ddl%m&Z&m'Z' ejP                  jR                  Z*e+e,fZ-d Z. G d de+      Z/ G d de0      Z1ddZ2d Z3 G d dee      Z4 G d de4      Z5 G d de4      Z6 G d d      Z7y) a  Wrappers for various units of text, including the main
:class:`TextBlob <textblob.blob.TextBlob>`, :class:`Word <textblob.blob.Word>`,
and :class:`WordList <textblob.blob.WordList>` classes.
Example usage: ::

    >>> from textblob import TextBlob
    >>> b = TextBlob("Simple is better than complex.")
    >>> b.tags
    [(u'Simple', u'NN'), (u'is', u'VBZ'), (u'better', u'JJR'), (u'than', u'IN'), (u'complex', u'NN')]
    >>> b.noun_phrases
    WordList([u'simple'])
    >>> b.words
    WordList([u'Simple', u'is', u'better', u'than', u'complex'])
    >>> b.sentiment
    (0.06666666666666667, 0.41904761904761906)
    >>> b.words[0].synsets()[0]
    Synset('simple.n.01')

.. versionchanged:: 0.8.0
    These classes are now imported from ``textblob`` rather than ``text.blob``.
    N)defaultdict)BaseNPExtractor
BaseParserBaseSentimentAnalyzer
BaseTaggerBaseTokenizer)cached_propertyrequires_nltk_corpus)suggest)	pluralize)singularize)BlobComparableMixinStringlikeMixin)FastNPExtractor)PatternParser)PatternAnalyzer)
NLTKTagger)WordTokenizersent_tokenizeword_tokenize)PUNCTUATION_REGEX
lowerstripc                     | dv rt         j                  S | dv rt         j                  S | dv rt         j                  S | dv rt         j                  S y)z.Converts a Penn corpus tag into a Wordnet tag.)NNNNSNNPNNPS)JJJJRJJS)VBVBDVBGVBNVBPVBZ)RBRBRRBSN)_wordnetNOUNADJVERBADV)tags    I/opt/mcp/mcp-sentiment/venv/lib/python3.12/site-packages/textblob/blob.py_penn_to_wordnetr1   7   sQ    
**}}
""||
77}}
""||    c                       e Zd ZdZd fd	ZddZd Zd Zd Zd Z	d Z
d	 Zeed
               Zedd       Zej"                  j$                  j'                         Zej"                  j(                  j+                         Zej"                  j,                  j/                  d      ZefdZed        Zed        ZddZddZ xZS )Wordz`A simple word representation. Includes methods for inflection,
    and WordNet integration.
    c                 $    t         |   | |      S )zReturn a new instance of the class. It is necessary to override
        this method in order to handle the extra pos_tag argument in the
        constructor.
        )super__new__)clsstringpos_tag	__class__s      r0   r7   zWord.__new__I   s    
 wsF++r2   c                      || _         || _        y N)r9   r:   )selfr9   r:   s      r0   __init__zWord.__init__P   s    r2   c                 ,    t        | j                        S r=   )reprr9   r>   s    r0   __repr__zWord.__repr__T   s    DKK  r2   c                     | j                   S r=   )r9   rB   s    r0   __str__zWord.__str__W   s    {{r2   c                 >    t        t        | j                              S )z4Return the singular version of the word as a string.)r4   _singularizer9   rB   s    r0   r   zWord.singularizeZ   s    L-..r2   c                 >    t        t        | j                              S )z2Return the plural version of the word as a string.)r4   
_pluralizer9   rB   s    r0   r   zWord.pluralize^   s    Jt{{+,,r2   c                 ,    t        | j                        S )a  Return a list of (word, confidence) tuples of spelling corrections.

        Based on: Peter Norvig, "How to Write a Spelling Corrector"
        (http://norvig.com/spell-correct.html) as implemented in the pattern
        library.

        .. versionadded:: 0.6.0
        )r   r9   rB   s    r0   
spellcheckzWord.spellcheckb   s     t{{##r2   c                 @    t        | j                         d   d         S )zCorrect the spelling of the word. Returns the word with the highest
        confidence using the spelling corrector.

        .. versionadded:: 0.6.0
        r   )r4   rK   rB   s    r0   correctzWord.correctm   s      DOO%a(+,,r2   c                 :    | j                  | j                        S )z>Return the lemma of this word using Wordnet's morphy function.pos)	lemmatizer:   rB   s    r0   lemmaz
Word.lemmau   s     ~~$,,~//r2   c                     |t         j                  }n.|t         j                  j                         v r|}nt	        |      }t
        j                  j                         }|j                  | j                  |      S )zReturn the lemma for a word using WordNet's morphy function.

        :param pos: Part of speech to filter upon. If `None`, defaults to
            ``_wordnet.NOUN``.

        .. versionadded:: 0.8.1
        )
r*   r+   _FILEMAPkeysr1   nltkstemWordNetLemmatizerrQ   r9   )r>   rP   r/   
lemmatizers       r0   rQ   zWord.lemmatize{   sa     ;--CH%%**,,C"3'CYY002
##DKK55r2   englishc                 8    |j                  | j                        S )zmStem a word using various NLTK stemmers. (Default: Porter Stemmer)

        .. versionadded:: 0.12.0
        )rW   r9   )r>   stemmers     r0   rW   z	Word.stem   s    
 ||DKK((r2   c                 &    | j                  d      S )ztThe list of Synset objects for this Word.

        :rtype: list of Synsets

        .. versionadded:: 0.7.0
        NrO   )get_synsetsrB   s    r0   synsetszWord.synsets   s     D))r2   c                 &    | j                  d      S )zThe list of definitions for this word. Each definition corresponds
        to a synset.

        .. versionadded:: 0.7.0
        NrO   )definerB   s    r0   definitionszWord.definitions   s     {{t{$$r2   c                 B    t         j                  | j                  |      S )a   Return a list of Synset objects for this word.

        :param pos: A part-of-speech tag to filter upon. If ``None``, all
            synsets for all parts of speech will be loaded.

        :rtype: list of Synsets

        .. versionadded:: 0.7.0
        )r*   r_   r9   )r>   rP   s     r0   r^   zWord.get_synsets   s     S11r2   c                 h    | j                  |      D cg c]  }|j                          c}S c c}w )a;  Return a list of definitions for this word. Each definition
        corresponds to a synset for this word.

        :param pos: A part-of-speech tag to filter upon. If ``None``, definitions
            for all parts of speech will be loaded.
        :rtype: List of strings

        .. versionadded:: 0.7.0
        rO   )r^   
definition)r>   rP   syns      r0   ra   zWord.define   s-     -1,<,<,<,EFS FFFs   /r=   )__name__
__module____qualname____doc__r7   r?   rC   rE   r   r   rK   rM   r	   r
   rR   rQ   rV   rW   porterPorterStemmer	lancasterLancasterStemmersnowballSnowballStemmerr_   rb   r^   ra   __classcell__r;   s   @r0   r4   r4   D   s    ,!/-	$- 0  0 6 6" II$$224Myy**;;=ii((88CO ) ) * * % %
2
Gr2   r4   c                        e Zd ZdZ fdZ fdZ fdZ fdZ fdZ fdZ	d fd	Z
 fd	Zd
 Zd Zd Zd Zd Zd Zd Z xZS )WordListz A list-like collection of words.c                 \    t         |   |D cg c]  }t        |       c}       yc c}w )z[Initialize a WordList. Takes a collection of strings as
        its only argument.
        N)r6   r?   r4   )r>   
collectionwr;   s      r0   r?   zWordList.__init__   s$     	:6a$q'676s   )c                      t         |          S )z-Returns a string representation for printing.)r6   rC   )r>   r;   s    r0   rE   zWordList.__str__   s    w!!r2   c                 X    | j                   j                  }| dt        |           dS )z.Returns a string representation for debugging.())r;   rg   r6   rC   )r>   
class_namer;   s     r0   rC   zWordList.__repr__   s.    ^^,,
Quw/12!44r2   c                 h    t         |   |      }t        |t              r| j	                  |      S |S )z$Returns a string at the given index.)r6   __getitem__
isinstanceslicer;   )r>   keyitemr;   s      r0   r~   zWordList.__getitem__   s0    w"3'c5!>>$''Kr2   c                 B    | j                  t        | 	  ||            S r=   )r;   r6   __getslice__)r>   ijr;   s      r0   r   zWordList.__getslice__   s    ~~eg21a899r2   c                 z    t        |t              rt        |   |t	        |             yt        |   ||       y)zPlaces object at given index, replacing existing item. If the object
        is a string, inserts a :class:`Word <Word>` object.
        N)r   
basestringr6   __setitem__r4   )r>   indexobjr;   s      r0   r   zWordList.__setitem__   s1     c:&GtCy1Gs+r2   c                     |s@ | D cg c]  }|j                          c}j                  |j                         g|i |S t        |   |g|i |S c c}w )zGet the count of a word or phrase `s` within this WordList.

        :param strg: The string to count.
        :param case_sensitive: A boolean, whether or not the search is case-sensitive.
        )lowercountr6   )r>   strgcase_sensitiveargskwargswordr;   s         r0   r   zWordList.count   s[     8T2TDJJL288WWPVWWw}T3D3F33 3s   Ac                 v    t        |t              rt        |   t	        |             yt        |   |       y)zkAppend an object to end. If the object is a string, appends a
        :class:`Word <Word>` object.
        N)r   r   r6   appendr4   )r>   r   r;   s     r0   r   zWordList.append   s+     c:&GN49%GN3r2   c                 4    |D ]  }| j                  |        y)zExtend WordList by appending elements from ``iterable``. If an element
        is a string, appends a :class:`Word <Word>` object.
        N)r   )r>   iterablees      r0   extendzWordList.extend   s      	AKKN	r2   c                 f    | j                  | D cg c]  }|j                          c}      S c c}w )z1Return a new WordList with each word upper-cased.)r;   upperr>   r   s     r0   r   zWordList.upper  %    ~~=tzz|=>>=   .c                 f    | j                  | D cg c]  }|j                          c}      S c c}w )z1Return a new WordList with each word lower-cased.)r;   r   r   s     r0   r   zWordList.lower  r   r   c                 f    | j                  | D cg c]  }|j                          c}      S c c}w )z8Return the single version of each word in this WordList.)r;   r   r   s     r0   r   zWordList.singularize  s(    ~~dCdt//1CDDCr   c                 f    | j                  | D cg c]  }|j                          c}      S c c}w )z8Return the plural version of each word in this WordList.)r;   r   r   s     r0   r   zWordList.pluralize  &    ~~DADt~~/ABBAr   c                 f    | j                  | D cg c]  }|j                          c}      S c c}w )z/Return the lemma of each word in this WordList.)r;   rQ   r   s     r0   rQ   zWordList.lemmatize  r   r   c           
      j    | j                  | D cg c]  } |j                  |i | c}      S c c}w )z/Return the stem for each word in this WordList.)r;   rW   )r>   r   r   r   s       r0   rW   zWordList.stem  s/    ~~dKdytyy$9&9KLLKs   0)F)rg   rh   ri   rj   r?   rE   rC   r~   r   r   r   r   r   r   r   r   r   rQ   rW   rq   rr   s   @r0   rt   rt      sU    *8"5
:,4 ??ECCMr2   rt   c                 j    |r|n|j                   }| t        | |      st        | d|       | xs |S )aj  Validates a parameter passed to __init__. Makes sure that obj is
    the correct class. Return obj if it's not None or falls back to default

    :param obj: The object passed in.
    :param name: The name of the parameter.
    :param base_class: The class that obj must inherit from.
    :param default: The default object to fall back upon if obj is None.
    z must be an instance of )rg   r   
ValueError)r   name
base_classdefaultbase_class_names        r0   _validated_paramr      sC     *9oj>Q>QO
z#z:D6!9/9JKLL>'r2   c                    t        |dt        t        j                  j                  j
                  ft        j                  d      | _        t        |dt        t        j                        | _	        t        |dt        t        j                        | _        t        |dt        t        j                        | _        t        |dt        t        j                        | _        || _        y	)
z;Common initialization between BaseBlob and Blobber classes.	tokenizerr   )r   r   r   np_extractor)r   r   
pos_taggeranalyzerparserN)r   r   rV   tokenizeapi
TokenizerIBaseBlobr   r   r   r   r   r   r   r   r   
classifier)r   r   r   r   r   r   r   s          r0   _initialize_modelsr   /  s    
 %!4==#4#4#?#?@""'CM ("%%	C &L*h.A.ACN $*3X5F5FCL "&(JPCJCNr2   c                   l   e Zd ZdZ e       Z e       Z e       Z	 e
       Z e       Z	 	 	 	 	 	 	 ddZed        Zed        ZddZddZd Zed	        Zed
        Zed        Zed        Zed        Zed        ZeZed        Zed        ZddZd Zd Z d Z!d Z"d Z#de$jJ                  fdZ&y)r   aZ  An abstract base class that all textblob classes will inherit from.
    Includes words, POS tag, NP, and word count properties. Also includes
    basic dunder and string methods for making objects like Python strings.

    :param text: A string.
    :param tokenizer: (optional) A tokenizer instance. If ``None``,
        defaults to :class:`WordTokenizer() <textblob.tokenizers.WordTokenizer>`.
    :param np_extractor: (optional) An NPExtractor instance. If ``None``,
        defaults to :class:`FastNPExtractor() <textblob.en.np_extractors.FastNPExtractor>`.
    :param pos_tagger: (optional) A Tagger instance. If ``None``,
        defaults to :class:`NLTKTagger <textblob.en.taggers.NLTKTagger>`.
    :param analyzer: (optional) A sentiment analyzer. If ``None``,
        defaults to :class:`PatternAnalyzer <textblob.en.sentiments.PatternAnalyzer>`.
    :param parser: A parser. If ``None``, defaults to
        :class:`PatternParser <textblob.en.parsers.PatternParser>`.
    :param classifier: A classifier.

    .. versionchanged:: 0.6.0
        ``clean_html`` parameter deprecated, as it was in NLTK.
    Nc	           	          t        |t              st        dt        |             |rt	        d      |x| _        | _        t        | j
                  d      | _        t        | ||||||       y )NzEThe `text` argument passed to `__init__(text)` must be a string, not z^clean_html has been deprecated. To remove HTML markup, use BeautifulSoup's get_text() functionT)all)
r   r   	TypeErrortypeNotImplementedErrorrawr9   r   strippedr   )	r>   textr   r   r   r   r   r   
clean_htmls	            r0   r?   zBaseBlob.__init__g  s~     $
+))-d6  %& 
 "&%4;"4886)Zx	
r2   c                 B    t        t        | j                  d            S zReturn a list of word tokens. This excludes punctuation characters.
        If you want to include punctuation characters, access the ``tokens``
        property.

        :returns: A :class:`WordList <WordList>` of word tokens.
        F)include_puncrt   r   r   rB   s    r0   wordszBaseBlob.words       dhhUCDDr2   c                 ^    t        | j                  j                  | j                              S )zReturn a list of tokens, using this blob's tokenizer object
        (defaults to :class:`WordTokenizer <textblob.tokenizers.WordTokenizer>`).
        )rt   r   r   r   rB   s    r0   tokenszBaseBlob.tokens  s"    
 //9::r2   c                 j    ||n| j                   }t        |j                  | j                              S )zReturn a list of tokens, using ``tokenizer``.

        :param tokenizer: (optional) A tokenizer object. If None, defaults to
            this blob's default tokenizer.
        )r   rt   r   r   )r>   r   ts      r0   r   zBaseBlob.tokenize  s,     #.IDNN

488,--r2   c                 X    ||n| j                   }|j                  | j                        S )zParse the text.

        :param parser: (optional) A parser instance. If ``None``, defaults to
            this blob's default parser.

        .. versionadded:: 0.6.0
        )r   parser   )r>   r   ps      r0   r   zBaseBlob.parse  s'     (Fdkkwwtxx  r2   c                 z    | j                   t        d      | j                   j                  | j                        S )z2Classify the blob using the blob's ``classifier``.z-This blob has no classifier. Train one first!)r   	NameErrorclassifyr   rB   s    r0   r   zBaseBlob.classify  s1    ??"KLL''11r2   c                 L    | j                   j                  | j                        S )aF  Return a tuple of form (polarity, subjectivity ) where polarity
        is a float within the range [-1.0, 1.0] and subjectivity is a float
        within the range [0.0, 1.0] where 0.0 is very objective and 1.0 is
        very subjective.

        :rtype: namedtuple of the form ``Sentiment(polarity, subjectivity)``
        r   analyzer   rB   s    r0   	sentimentzBaseBlob.sentiment  s     }}$$TXX..r2   c                 P    | j                   j                  | j                  d      S )a  Return a tuple of form (polarity, subjectivity, assessments ) where
        polarity is a float within the range [-1.0, 1.0], subjectivity is a
        float within the range [0.0, 1.0] where 0.0 is very objective and 1.0
        is very subjective, and assessments is a list of polarity and
        subjectivity scores for the assessed tokens.

        :rtype: namedtuple of the form ``Sentiment(polarity, subjectivity,
        assessments)``
        T)keep_assessmentsr   rB   s    r0   sentiment_assessmentszBaseBlob.sentiment_assessments  s"     }}$$TXX$EEr2   c                 N    t               j                  | j                        d   S )zaReturn the polarity score as a float within the range [-1.0, 1.0]

        :rtype: float
        r   r   r   r   rB   s    r0   polarityzBaseBlob.polarity  s!      ((2155r2   c                 N    t               j                  | j                        d   S )zReturn the subjectivity score as a float within the range [0.0, 1.0]
        where 0.0 is very objective and 1.0 is very subjective.

        :rtype: float
           r   rB   s    r0   subjectivityzBaseBlob.subjectivity  s!      ((2155r2   c                     t        | j                  j                  | j                        D cg c].  }t	        |      dkD  r|j                         j                         0 c}      S c c}w )z-Returns a list of noun phrases for this blob.r   )rt   r   extractr   lenstripr   )r>   phrases     r0   noun_phraseszBaseBlob.noun_phrases  s]      #//77Av;? $$&
 	
s   3A'c           	         t        | t              r;| j                  D cg c]  }|j                   c}D cg c]  }|D ]  }|  c}}S | j                  j                  |       D cg c]D  \  }}t        j                  t        |            s!t        t        |      |      t        |      fF c}}S c c}w c c}}w c c}}w )aa  Returns an list of tuples of the form (word, POS tag).

        Example:
        ::

            [
                ("At", "IN"),
                ("eight", "CD"),
                ("o'clock", "JJ"),
                ("on", "IN"),
                ("Thursday", "NNP"),
                ("morning", "NN"),
            ]

        :rtype: list of tuples
        )r:   )
r   TextBlob	sentencespos_tagsr   r/   r   matchstrr4   )r>   ssublistvalr   r   s         r0   r   zBaseBlob.pos_tags  s    $ dH% 59NNCq

C"     $2248D!(..s1v6 c$i+SV4 	  Ds   B7B<*A	Cc                     t        t              }| j                  D cg c]  }t        |       }}|D ]  }||xx   dz  cc<    |S c c}w )z,Dictionary of word frequencies in this text.r   )r   intr   r   )r>   countsr   stripped_wordss       r0   word_countszBaseBlob.word_counts  sR     S!7;zzBt*T*BB" 	D4LAL	 Cs   Ac                 `    t        t              }| j                  D ]  }||xx   dz  cc<    |S )z3Dictionary of noun phrase frequencies in this text.r   )r   r   r   )r>   r   r   s      r0   	np_countszBaseBlob.np_counts  s6     S!'' 	 F6NaN	 r2   c           	          |dk  rg S t        t        | j                        |z
  dz         D cg c]  }t        | j                  |||z           }}|S c c}w )zReturn a list of n-grams (tuples of n successive words) for this
        blob.

        :rtype: List of :class:`WordLists <WordList>`
        r   r   )ranger   r   rt   )r>   nr   gramss       r0   ngramszBaseBlob.ngrams  sb     6I5:3tzz?Q;NQR;R5S
01HTZZAE*+
 
 
s   "Ac                     t         j                  j                  | j                  d      }d |D        }dj	                  |      }| j                  |      S )zAttempt to correct the spelling of a blob.

        .. versionadded:: 0.6.0

        :rtype: :class:`BaseBlob <BaseBlob>`
        z\w+|[^\w\s]|\sc              3   N   K   | ]  }t        |      j                           y wr=   )r4   rM   ).0rw   s     r0   	<genexpr>z#BaseBlob.correct.<locals>.<genexpr>*  s     71T!W__&7s   #% )rV   r   regexp_tokenizer   joinr;   )r>   r   	correctedrets       r0   rM   zBaseBlob.correct!  sF     ..txx9JK77	ggi ~~c""r2   c                     | j                   S )zXKey used by ComparableMixin to implement all rich comparison
        operators.
        r   rB   s    r0   _cmpkeyzBaseBlob._cmpkey.  s     xxr2   c                     | j                   S )z8Key used by StringlikeMixin to implement string methods.r  rB   s    r0   _strkeyzBaseBlob._strkey4  s    xxr2   c                 4    t        | j                               S r=   )hashr  rB   s    r0   __hash__zBaseBlob.__hash__8  s    DLLN##r2   c                    t        |t              r| j                  | j                  |z         S t        |t              r(| j                  | j                  |j                  z         S t        d| j                  j                   d      )zConcatenates two text objects the same way Python strings are
        concatenated.

        Arguments:
        - `other`: a string or a text object
        z#Operands must be either strings or z objects)r   r   r;   r   r   r   rg   )r>   others     r0   __add__zBaseBlob.__add__;  sp     eZ(>>$((U"233x(>>$((UYY"6775dnn6M6M5NhW r2   c                 T    t        | j                         j                  ||            S )zBehaves like the built-in str.split() except returns a
        WordList.

        :rtype: :class:`WordList <WordList>`
        )rt   r  split)r>   sepmaxsplits      r0   r  zBaseBlob.splitK  s"     ,,S(;<<r2   )NNNNNNFr=   )   )'rg   rh   ri   rj   r   r   r   r   r   r   r   r   r   r   r?   r	   r   r   r   r   r   r   r   r   r   r   r   tagsr   r   r   rM   r  r  r
  r  sysmaxsizer   r2   r0   r   r   K  sX   * #$LJI H_F
 
8 E E ; ;.	!2 / / 
F 
F 6 6 6 6 
 
  < D   #$  s{{ =r2   r   c                   l    e Zd ZdZed        Zed        Zed        Zed        Z	d Z
ed        Zd Zy	)
r   a?  A general text block, meant for larger bodies of text (esp. those
    containing sentences). Inherits from :class:`BaseBlob <BaseBlob>`.

    :param str text: A string.
    :param tokenizer: (optional) A tokenizer instance. If ``None``, defaults to
        :class:`WordTokenizer() <textblob.tokenizers.WordTokenizer>`.
    :param np_extractor: (optional) An NPExtractor instance. If ``None``,
        defaults to :class:`FastNPExtractor() <textblob.en.np_extractors.FastNPExtractor>`.
    :param pos_tagger: (optional) A Tagger instance. If ``None``, defaults to
        :class:`NLTKTagger <textblob.en.taggers.NLTKTagger>`.
    :param analyzer: (optional) A sentiment analyzer. If ``None``, defaults to
        :class:`PatternAnalyzer <textblob.en.sentiments.PatternAnalyzer>`.
    :param classifier: (optional) A classifier.
    c                 "    | j                         S )z4Return list of :class:`Sentence <Sentence>` objects.)_create_sentence_objectsrB   s    r0   r   zTextBlob.sentencesd  s     ,,..r2   c                 B    t        t        | j                  d            S r   r   rB   s    r0   r   zTextBlob.wordsi  r   r2   c                 T    | j                   D cg c]  }|j                   c}S c c}w )z/List of strings, the raw sentences in the blob.)r   r   r>   sentences     r0   raw_sentenceszTextBlob.raw_sentencess  s      .2^^<<<<   %c                 T    | j                   D cg c]  }|j                   c}S c c}w )z6Returns a list of each sentence's dict representation.)r   dictr  s     r0   
serializedzTextBlob.serializedx  s      /3nn=(===r  c                 H    t        j                  | j                  g|i |S )zReturn a json representation (str) of this blob.
        Takes the same arguments as json.dumps.

        .. versionadded:: 0.5.1
        )jsondumpsr"  )r>   r   r   s      r0   to_jsonzTextBlob.to_json}  s!     zz$//;D;F;;r2   c                 "    | j                         S )zThe json representation of this blob.

        .. versionchanged:: 0.5.1
            Made ``json`` a property instead of a method to restore backwards
            compatibility that was broken after version 0.4.0.
        )r&  rB   s    r0   r$  zTextBlob.json  s     ||~r2   c                 x   g }t        | j                        }d}|D ]  }| j                  j                  ||      }|t        |      z  }|t        |      z   }t	        |||| j
                  | j                  | j                  | j                  | j                  | j                  	      }|j                  |        |S )z5Returns a list of Sentence objects from the raw text.r   )start_index	end_indexr   r   r   r   r   r   )r   r   r   r   Sentencer   r   r   r   r   r   r   )r>   sentence_objectsr   
char_indexsentr)  r*  r   s           r0   r  z!TextBlob._create_sentence_objects  s    !$((+	
 	'D ((..z:K#d)#J#c$i/I'#..!..??{{??
A ##A&%	'&  r2   N)rg   rh   ri   rj   r	   r   r   propertyr  r"  r&  r$  r  r  r2   r0   r   r   T  sw     / / E E = = > ><   r2   r   c                   4     e Zd ZdZd fd	Zed        Z xZS )r+  a  A sentence within a TextBlob. Inherits from :class:`BaseBlob <BaseBlob>`.

    :param sentence: A string, the raw sentence.
    :param start_index: An int, the index where this sentence begins
                        in a TextBlob. If not given, defaults to 0.
    :param end_index: An int, the index where this sentence ends in
                        a TextBlob. If not given, defaults to the
                        length of the sentence - 1.
    c                     t        |   |g|i | |x| _        | _        |xs t	        |      dz
  x| _        | _        y )Nr   )r6   r?   startr)  r   endr*  )r>   r  r)  r*  r   r   r;   s         r0   r?   zSentence.__init__  sE    3D3F3(33
T%$-$BX1BB4>r2   c                     | j                   | j                  | j                  | j                  | j                  | j
                  | j                  dS )z)The dict representation of this sentence.r   r)  r*  r   r   r   r   r5  rB   s    r0   r!  zSentence.dict  sE     88++ -- --
 	
r2   )r   N)rg   rh   ri   rj   r?   r/  r!  rq   rr   s   @r0   r+  r+    s"    C 

 

r2   r+  c                   z    e Zd ZdZ e       Z e       Z e       Z	 e
       Z e       Z	 	 	 	 	 	 ddZd Zd ZeZy)Blobbera:  A factory for TextBlobs that all share the same tagger,
    tokenizer, parser, classifier, and np_extractor.

    Usage:

        >>> from textblob import Blobber
        >>> from textblob.taggers import NLTKTagger
        >>> from textblob.tokenizers import SentenceTokenizer
        >>> tb = Blobber(pos_tagger=NLTKTagger(), tokenizer=SentenceTokenizer())
        >>> blob1 = tb("This is one blob.")
        >>> blob2 = tb("This blob has the same tagger and tokenizer.")
        >>> blob1.pos_tagger is blob2.pos_tagger
        True

    :param tokenizer: (optional) A tokenizer instance. If ``None``,
        defaults to :class:`WordTokenizer() <textblob.tokenizers.WordTokenizer>`.
    :param np_extractor: (optional) An NPExtractor instance. If ``None``,
        defaults to :class:`FastNPExtractor() <textblob.en.np_extractors.FastNPExtractor>`.
    :param pos_tagger: (optional) A Tagger instance. If ``None``,
        defaults to :class:`NLTKTagger <textblob.en.taggers.NLTKTagger>`.
    :param analyzer: (optional) A sentiment analyzer. If ``None``,
        defaults to :class:`PatternAnalyzer <textblob.en.sentiments.PatternAnalyzer>`.
    :param parser: A parser. If ``None``, defaults to
        :class:`PatternParser <textblob.en.parsers.PatternParser>`.
    :param classifier: A classifier.

    .. versionadded:: 0.4.0
    Nc           	      &    t        | ||||||       y r=   )r   )r>   r   r   r   r   r   r   s          r0   r?   zBlobber.__init__  s     	)Zx	
r2   c           	          t        || j                  | j                  | j                  | j                  | j
                  | j                        S )zReturn a new TextBlob object with this Blobber's ``np_extractor``,
        ``pos_tagger``, ``tokenizer``, ``analyzer``, and ``classifier``.

        :returns: A new :class:`TextBlob <TextBlob>`.
        )r   r   r   r   r   r   )r   r   r   r   r   r   r   )r>   r   s     r0   __call__zBlobber.__call__  s@     nn**]];;
 	
r2   c                    | j                   r#| j                   j                  j                  dz   nd}d| j                  j                  j                   d| j                  j                  j                   d| j
                  j                  j                   d| j                  j                  j                   d| j                  j                  j                   d| d	S )
Nz()NonezBlobber(tokenizer=z(), pos_tagger=z(), np_extractor=z(), analyzer=z(), parser=z(), classifier=r{   )r   r;   rg   r   r   r   r   r   )r>   classifier_names     r0   rC   zBlobber.__repr__  s    9=DOO%%..5f 	 !!9!9!B!B C D//33<<= > --77@@A B//889 :kk++445 6)*!-	
r2   )NNNNNN)rg   rh   ri   rj   r   r   r   r   r   r   r   r   r   r   r?   r:  rC   rE   r  r2   r0   r7  r7    sX    : #$LJI H_F 

 
 Gr2   r7  r=   )8rj   r$  r  collectionsr   rV   textblob.baser   r   r   r   r   textblob.decoratorsr	   r
   textblob.enr   textblob.inflectr   rI   r   rG   textblob.mixinsr   r   textblob.np_extractorsr   textblob.parsersr   textblob.sentimentsr   textblob.taggersr   textblob.tokenizersr   r   r   textblob.utilsr   r   corpuswordnetr*   r   bytesr   r1   r4   listrt   r   r   r   r   r+  r7  r  r2   r0   <module>rN     s   ,  
 #   F  4 8 @ 2 * / ' K K 8 ;;5\

G3 GDWMt WMt 8F= 3 F=RS x S l
x 
@N Nr2   