Merge pull request #97 from NAL-i5K/testpy3_rebase

Testpy3 rebase
NAL-i5K · Nov 1, 2019 · 80ea9ab · 80ea9ab
2 parents 28f126e + 8ab71a6
commit 80ea9ab
Show file tree

Hide file tree

Showing 23 changed files with 244 additions and 252 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -4,10 +4,11 @@ matrix:
   include:
   - os: linux
     language: python
-    python: 2.7
+    python: 3.7
     install:
-      - pip install .
       - pip install coveralls coverage codecov
+      - pip install .
+
     script:
       - coverage run -a ./gff3tool/bin/gff3_QC.py -g example_file/example.gff3 -f example_file/reference.fa -o error.txt
       - coverage run -a ./gff3tool/bin/gff3_fix.py -qc_r error.txt -g example_file/example.gff3 -og corrected.gff3
@@ -21,19 +22,18 @@ matrix:
 
   - os: linux
     language: python
-    python: 2.7
-    install:
-      - pip install --no-cache-dir gff3tool
-  - os: osx
-    language: generic
+    python: 3.7
     install:
-      - pip2 install .
+      - pip3 install --no-cache-dir gff3tool
   - os: osx
     language: generic
+    python: 3.7
     install:
-      - pip2 install --no-cache-dir gff3tool
+      - pip3 install .
+      - pip3 install --no-cache-dir gff3tool
   - os: linux
     language: ruby
+    python: 3.7
     rvm: 2.2
     before_script: gem install awesome_bot
     script: awesome_bot --allow-redirect --allow-dupe README.md docs/*.md # only this instance will check the documents
@@ -53,4 +53,3 @@ after_success:
   - coverage report
   - coveralls
   - codecov
-
diff --git a/docs/FAQ.md b/docs/FAQ.md
@@ -37,7 +37,7 @@ In addition, if your input gff file contains a feature that has two or more pare
 
 ## Q: Which codons are considered for translation? (Generate biological sequences from a GFF3 file: gff3_to_fasta.py)
 
-Translation from 64 combinations of [standard codons]( https://en.wikipedia.org/wiki/DNA_codon_table) (Only standard codons and universal stop codons are considered.)
+Translation from 64 combinations of [standard codons](https://en.wikipedia.org/wiki/DNA_codon_table) (Only standard codons and universal stop codons are considered.)
 
 ## Q: Why does gff3_merge.py sometimes reject auto-assigned replace tags when the reference model has multiple isoforms? (Merge 2 GFF3 files: gff3_merge.py)
 

diff --git a/docs/gff3_to_fasta.md b/docs/gff3_to_fasta.md
@@ -15,8 +15,8 @@ Extract sequences from specific regions of genome based on gff file.
     - **`pep`**: Translated peptide sequences (translation based on cds regions) for each record in the [`FASTA`] output. CDS features need to be included in the gff file.
     - **`user_defined`**: Specify parent and child features for fasta extraction via the -u argument, format [parent feature type] [child feature type].(e.g. `-st user_defined -u miRNA exon`)
 * **`translator` method for universal translation**: The `translator` method is feasible for
-    - translation from 64 combitions of [standard codons](https://en.wikipedia.org/wiki/DNA_codon_table) (Only standard codons and universal stop condons are considered.)
-    - translation from [codons with IUB Depiction](https://en.wikipedia.org/wiki/DNA_codon_table)
+    - translation from 64 combitions of [standard codons](https://web.archive.org/web/20161201123943/https://www-bimas.cit.nih.gov/molbio/translate/codes.html) (Only standard codons and universal stop condons are considered.)
+    - translation from [codons with IUB Depiction](https://web.archive.org/web/20161201123943/https://www-bimas.cit.nih.gov/molbio/translate/codes.html)
     - translation from mRNA (U contained) or CDS (T, instead of U contained)
 
 ## Usage

diff --git a/gff3tool/bin/gff3_QC.py b/gff3tool/bin/gff3_QC.py
@@ -1,4 +1,4 @@
-#! /usr/local/bin/python2.7
+#! /usr/local/bin/python3
 import sys
 import logging
 from gff3tool.lib.gff3 import Gff3
@@ -110,26 +110,26 @@ def script_main():
         error_set.extend(cmd)
     if args.output:
         logger_stderr.info('Print QC report at {0:s}'.format(args.output))
-        report_fh = open(args.output, 'wb')
+        report_fh = open(args.output, 'w')
     else:
         logger_stderr.info('Print QC report at {0:s}'.format('report.txt'))
-        report_fh = open('report.txt', 'wb')
+        report_fh = open('report.txt', 'w')
 
     if args.statistic:
         logger_stderr.info('Print QC statistic report at {0:s}'.format(args.statistic))
-        statistic_fh = open(args.statistic, 'wb')
+        statistic_fh = open(args.statistic, 'w')
     else:
         logger_stderr.info('Print QC statistic report at {0:s}'.format('statistic.txt'))
-        statistic_fh = open('statistic.txt', 'wb')
+        statistic_fh = open('statistic.txt', 'w')
     report_fh.write('Line_num\tError_code\tError_tag\n')
-    for e in sorted(error_set):
+    for e in sorted(error_set, key=lambda x: sorted(x.keys())):
         tag = '[{0:s}]'.format(e['eTag'])
         report_fh.write('{0:s}\t{1:s}\t{2:s}\n'.format(str(e['line_num']), str(e['eCode']), str(tag)))
     #statistic_file
     error_counts = dict()
     ERROR_INFO=ERROR.INFO
     statistic_fh.write('Error_code\tNumber_of_problematic_models\tError_tag\n')
-    for s in sorted(error_set):
+    for s in sorted(error_set, key=lambda x: sorted(x.keys())):
         if s['eCode'] not in error_counts:
             error_counts[s['eCode']]= {'count':0,'etag':ERROR_INFO[s['eCode']]}
         error_counts[s['eCode']]['count'] += 1   

diff --git a/gff3tool/bin/gff3_fix.py b/gff3tool/bin/gff3_fix.py
@@ -1,4 +1,4 @@
-#! /usr/local/bin/python2.7
+#! /usr/local/bin/python3
 import sys
 import re
 import logging
@@ -23,7 +23,7 @@ def script_main():
     parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter, description=dedent("""\
 
     Testing environment:
-    1. Python 2.7
+    1. Python 3.*
 
     Input:
     1. Error report: Error report from gff3_QC.py. Specify the file name with the -qc_r or --qc_report argument;

diff --git a/gff3tool/bin/gff3_merge.py b/gff3tool/bin/gff3_merge.py
@@ -1,4 +1,4 @@
-#! /usr/local/bin/python2.7
+#! /usr/local/bin/python3
 import sys
 import re
 import logging
@@ -19,24 +19,24 @@ def check_replace(gff, user_defined1=None):
     for line in gff.lines:
         if not user_defined1:
             try:
-                if line['line_type'] == 'feature' and not line['attributes'].has_key('Parent'):
+                if line['line_type'] == 'feature' and 'Parent' not in line['attributes']:
                    roots.append(line)
             except KeyError:
                 print('WARNING  [Missing Attributes] Program failed.\n\t\t- Line {0:s}: {1:s}'.format(str(line['line_index']+1), line['line_raw']))
         else:
             if line['type'] in u_type:
                 try:
-                    if not line['attributes'].has_key('replace'):
+                    if 'replace' not in line['attributes']:
                         error_lines.append(line)
                 except KeyError:
                     print('WARNING  [Missing Attributes] Program failed.\n\t\t- Line {0:s}: {1:s}'.format(str(line['line_index']+1), line['line_raw']))
 
-    #roots = [line for line in gff.lines if line['line_type'] == 'feature' and not line['attributes'].has_key('Parent')]
+    #roots = [line for line in gff.lines if line['line_type'] == 'feature' and 'Parent' not in line['attributes']]
 
     for root in roots:
         children = root['children']
         for child in children:
-            if not child['attributes'].has_key('replace'):
+            if 'replace' not in child['attributes']:
                 error_lines.append(child)
 
     if len(error_lines):
@@ -115,7 +115,7 @@ def script_main():
     Merge two gff files of the same genome into one.
 
     Testing enviroment:
-    1. Python 2.7
+    1. Python 3.*
 
     Inputs:
     1. GFF3 file 1: Gff with annotations modified relative to the original gff (e.g. output from the Apollo program), specify the file name with the -g1 argument
@@ -146,7 +146,6 @@ def script_main():
     parser.add_argument('-v', '--version', action='version', version='%(prog)s ' + __version__)
 
     args = parser.parse_args()
-
     if args.gff_file1:
         logger_stderr.info('Checking Update GFF3 file (%s)...', args.gff_file1)
     elif not sys.stdin.isatty(): # if STDIN connected to pipe or file
@@ -220,9 +219,9 @@ def script_main():
         sys.exit(0)
     if args.report_file:
         logger_stderr.info('Writing validation report (%s)...\n', args.report_file)
-        report_fh = open(args.report_file, 'wb')
+        report_fh = open(args.report_file, 'w')
     else:
-        report_fh = open('merge_report.txt', 'wb')
+        report_fh = open('merge_report.txt', 'w')
 
     if not args.output_gff:
         args.output_gff='merged.gff'

diff --git a/gff3tool/bin/gff3_sort.py b/gff3tool/bin/gff3_sort.py
@@ -1,4 +1,4 @@
-#! /usr/local/bin/python2.7
+#! /usr/local/bin/python3
 """
 Changelog:
     * v0.0.2
@@ -37,7 +37,7 @@ def PositionSort(linelist,reference):
                 print ('Error')
                 sys.exit(1)
             # 'seq2id': a dictionary mapping sequence number to their features
-            if seq2id.has_key(seqnum):
+            if seqnum in seq2id:
                 seq2id[seqnum].append(str(line['line_raw']))
             else:
                 seq2id[seqnum] = [str(line['line_raw'])]
@@ -53,7 +53,7 @@ def PositionSort(linelist,reference):
                 print('ERROR [SeqID] SeqID does not end with a number. \n\t\t- Line {0:s}: {1:s} \n Adding argument -r like " gff3_sort -g example_file/example.gff3 -og example-sorted.gff3 -r " can handle this situation.'.format(str(line['line_index']+1),line['line_raw']))
                 sys.exit(1)
             # 'seq2id': a dictionary mapping sequence number to their features
-            if seq2id.has_key(seqnum):
+            if seqnum in seq2id:
                 seq2id[seqnum].append(str(line['line_raw']))
             else:
                 seq2id[seqnum] = [str(line['line_raw'])]
@@ -165,7 +165,7 @@ def TypeSort(line_list, sorting_order, reverse=False):
         lineindex = line['start'] if reverse==False else line['end']
         id2line[str(line['line_raw'])] = line
         try:
-            if sorting_order.has_key(line['type']):
+            if line['type'] in sorting_order:
                 id2index[str(line['line_raw'])] = [lineindex, sorting_order[line['type']] if reverse==False else (-sorting_order[line['type']])]
             else:
                 id2index[str(line['line_raw'])] = [lineindex, 99 if reverse==False else -99]
@@ -184,7 +184,7 @@ def main(gff, output=None, sorting_order=None, isoform_sort=False, logger=None,
     gff3 = Gff3(gff_file=gff, logger=logger_null)
 
     if output:
-        report = open(output, 'wb')
+        report = open(output, 'w')
     else:
         report = sys.stdout
 
@@ -196,13 +196,13 @@ def main(gff, output=None, sorting_order=None, isoform_sort=False, logger=None,
     gff3_linenum_Set = set()
 
     for line in gff3.lines:
-       if line['line_type'] == 'feature':
-           gff3_linenum_Set.add(line['line_index'])
-       try:
-           if line['line_type'] == 'feature' and not line['attributes'].has_key('Parent') and len(line['attributes']) != 0:
-               roots.append(line)
-       except:
-           logger.warning('[Missing Attributes] Program failed.\n\t\t- Line {0:s}: {1:s}'.format(str(line['line_index']+1), line['line_raw']))
+        if line['line_type'] == 'feature':
+            gff3_linenum_Set.add(line['line_index'])
+        try:
+            if line['line_type'] == 'feature' and not 'Parent' in line['attributes'] and len(line['attributes']) != 0:
+                roots.append(line)
+        except:
+            logger.warning('[Missing Attributes] Program failed.\n\t\t- Line {0:s}: {1:s}'.format(str(line['line_index']+1), line['line_raw']))
     #roots = [line for line in gff3.lines if line['line_type'] == 'feature' and not line['attributes'].has_key('Parent')]
 
     # Sort the root-level features based on the order of the genomic sequences
@@ -249,7 +249,7 @@ def main(gff, output=None, sorting_order=None, isoform_sort=False, logger=None,
                 gchildgroup = {}
                 # Visit every third-level feature, and collect a dictionary of 'type' to 'features'
                 for grandchild in grandchildren: # Visit each third-level feature
-                    if gchildgroup.has_key(str(grandchild['type'])):
+                    if str(grandchild['type']) in gchildgroup:
                         gchildgroup[str(grandchild['type'])].append(grandchild)
                     else:
                         gchildgroup[str(grandchild['type'])] = []
@@ -273,7 +273,7 @@ def main(gff, output=None, sorting_order=None, isoform_sort=False, logger=None,
                     if StrandSort(exons):
                         exons_sorted = StrandSort(exons)
                         for exon in exons_sorted:
-                            if exon['attributes'].has_key('Parent'):
+                            if 'Parent' in exon['attributes']:
                                 if isinstance(exon['attributes']['Parent'], list) and len(exon['attributes']['Parent']) > 1:
                                     gff3_linenum_Set.discard(exon['line_index'])
                                     report.write(TwoParent(child['attributes']['ID'],exon))
@@ -289,7 +289,7 @@ def main(gff, output=None, sorting_order=None, isoform_sort=False, logger=None,
                     if StrandSort(cdss):
                         cdss_sorted = StrandSort(cdss)
                         for cds in cdss_sorted:
-                            if cds['attributes'].has_key('Parent'):
+                            if 'Parent' in cds['attributes']:
                                 if isinstance(cds['attributes']['Parent'], list) and len(cds['attributes']['Parent']) > 1:
                                     gff3_linenum_Set.discard(cds['line_index'])
                                     report.write(TwoParent(child['attributes']['ID'],cds))
@@ -303,7 +303,7 @@ def main(gff, output=None, sorting_order=None, isoform_sort=False, logger=None,
                 if len(others):
                     if PositionSort(others,reference):
                         for other in others:
-                            if other['attributes'].has_key('Parent'):
+                            if 'Parent' in other['attributes']:
                                 if isinstance(other['attributes']['Parent'], list) and len(other['attributes']['Parent']) > 1:
                                     gff3_linenum_Set.discard(other['line_index'])
                                     report.write(TwoParent(child['attributes']['ID'],other))